gpt4 book ai didi

r - 如何在具有多个条件的 R 中应用 RLE() 函数?

转载 作者:行者123 更新时间:2023-12-04 09:07:14 25 4
gpt4 key购买 nike

我有以下数据:

dat <- structure(list(year = c(1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1980L, 1980L, 1980L, 1980L,
1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1981L,
1981L, 1981L, 1981L, 1981L, 1981L, 1981L, 1981L, 1981L, 1981L,
1981L, 1981L, 1981L, 1981L, 1981L, 1981L, 1981L, 1981L, 1981L,
1981L, 1981L, 1982L, 1982L, 1982L, 1982L, 1982L, 1982L, 1982L,
1982L, 1982L, 1982L, 1982L, 1982L, 1982L, 1982L, 1982L, 1982L,
1982L, 1982L, 1982L, 1982L, 1982L, 1982L, 1982L, 1982L, 1982L,
1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L,
1983L, 1984L, 1984L, 1984L, 1984L, 1984L, 1984L, 1984L, 1984L,
1984L, 1984L, 1984L, 1985L, 1985L, 1985L, 1985L, 1985L, 1985L,
1985L, 1985L, 1985L, 1985L, 1985L, 1985L, 1985L, 1985L, 1985L,
1985L, 1985L, 1985L, 1985L, 1985L, 1985L, 1985L, 1985L, 1985L,
1985L, 1985L, 1985L, 1985L, 1985L, 1985L, 1985L, 1985L, 1985L,
1985L, 1985L, 1985L, 1985L, 1985L, 1985L, 1985L, 1985L, 1985L,
1985L, 1985L, 1985L, 1985L, 1985L, 1985L, 1985L, 1985L, 1985L,
1985L, 1986L, 1986L, 1986L, 1987L, 1987L, 1987L, 1987L, 1987L,
1987L, 1987L, 1987L, 1987L, 1987L, 1987L, 1988L, 1988L, 1988L,
1988L, 1988L, 1988L, 1988L, 1988L, 1988L, 1988L, 1988L, 1988L,
1988L, 1989L, 1989L, 1989L, 1989L, 1989L, 1989L, 1989L, 1990L,
1990L, 1990L, 1990L, 1990L, 1990L, 1990L, 1990L, 1990L, 1990L,
1990L, 1990L, 1990L, 1990L, 1990L, 1990L, 1990L, 1991L, 1991L,
1991L, 1991L, 1991L, 1991L, 1991L, 1991L, 1991L, 1991L, 1991L,
1991L, 1991L, 1991L, 1992L, 1992L, 1992L, 1992L, 1992L, 1992L,
1992L, 1992L, 1992L, 1993L, 1993L, 1993L, 1993L, 1993L, 1994L,
1994L, 1994L, 1994L, 1994L, 1994L, 1994L, 1994L, 1994L, 1994L,
1994L, 1994L, 1994L, 1994L, 1994L, 1994L, 1994L, 1994L, 1994L,
1994L, 1994L, 1994L, 1994L, 1994L, 1994L, 1994L, 1994L, 1994L,
1994L, 1994L, 1994L, 1994L, 1994L, 1994L, 1994L, 1994L, 1995L,
1995L, 1995L, 1995L, 1996L, 1996L, 1996L, 1996L, 1996L), mon = c(5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 5L, 5L, 5L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
5L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L), day = c(16L, 17L, 18L, 19L,
20L, 21L, 22L, 23L, 24L, 25L, 26L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L,
27L, 28L, 29L, 30L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 7L, 8L,
9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L,
30L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L,
27L, 28L, 29L, 30L, 31L, 18L, 19L, 20L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 21L, 22L, 29L, 30L, 31L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 24L,
25L, 26L, 27L, 28L, 29L, 30L, 31L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L,
28L, 29L, 30L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 28L, 29L,
30L, 1L, 2L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L,
30L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 17L, 18L,
19L, 20L, 28L, 29L, 30L, 1L, 2L), phase = c(6L, 6L, 6L, 6L, 7L,
7L, 7L, 7L, 7L, 8L, 8L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L,
8L, 8L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 7L,
7L, 6L, 5L, 5L, 5L, 5L, 5L, 7L, 8L, 8L, 8L, 8L, 8L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L,
8L, 8L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 7L, 7L, 7L, 6L, 6L, 6L, 6L,
6L, 7L, 7L, 7L, 8L, 8L, 1L, 7L, 7L, 8L, 8L, 8L, 8L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 7L, 7L, 7L,
8L, 8L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 7L, 7L, 8L, 8L,
8L, 8L, 8L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L,
8L, 1L, 1L, 1L, 1L), Rainfall = c(23.2, 35.575, 37.4, 6.425,
10.275, 3.05, 50.075, 23.05, 2, 1.4, 3.325, 12.84, 0.68, 7.78,
0, 12.88, 91.48, 41.08, 4.48, 0, 0.26, 0, 2.32, 13.25, 64.5,
21.55, 82.175, 33.725, 48.95, 3.8, 16.875, 4.7, 7.7, 48.7, 25.275,
3.625, 0.075, 2.5, 0, 3.525, 0.725, 0.2, 0.625, 0.25, 2.85, 6.15,
10.675, 41.975, 24.975, 127.775, 86.225, 19.95, 1.725, 11.125,
0.075, 1.775, 5.825, 5.975, 18.125, 6.425, 3.725, 11.75, 13.975,
0.1, 1, 0.725, 4.775, 0.225, 2.625, 0.575, 13.375, 0, 0, 0, 0,
0, 0, 0, 0.825, 0, 0, 0.45, 2.2, 4.5, 0, 0, 0.05, 0, 0, 0.225,
0.975, 5.375, 9.1, 27.3, 47.7, 31.475, 4.8, 11.45, 3.15, 2.3,
14.975, 77.25, 112.225, 69.675, 27.625, 43.65, 34.85, 47.325,
65.725, 83.825, 29.525, 29.95, 12.575, 0, 3.2, 30.95, 26.25,
4.15, 0.025, 0.5, 0.375, 5.15, 3.525, 0, 0.55, 2.025, 10.525,
0.4, 0.05, 9.225, 0.2, 5.25, 0, 0.5, 1.3, 3.175, 7.825, 1.15,
3.475, 0.4, 0.1, 0.2, 4.275, 3.45, 0.075, 3.95, 23.525, 6.2,
5.7, 6.1, 4.975, 2.7, 0.95, 0, 0, 1.55, 37.525, 53.8, 26.275,
101.25, 81.825, 26.05, 6.4, 6.75, 0.65, 2.475, 2.7, 1.45, 0.775,
0.2, 5.8, 0.36, 0.02, 0.8, 2.64, 3.44, 26.8, 17.98, 3.88, 33.48,
8.08, 15.8, 11.52, 21.44, 31.18, 13.06, 12.92, 0.24, 4.48, 9.4,
4.24, 4.36, 2.34, 5.72, 16.56, 10.96, 24.12, 2.96, 28.48, 14.72,
6.32, 0, 0.3, 3.46, 0.62, 0.76, 0.46, 17.22, 10.92, 1.96, 2.92,
0, 0, 3.44, 3.86, 2.88, 0.72, 0, 0, 0.06, 1.62, 28.74, 0.64,
0, 0, 1.18, 0.42, 5.46, 3.56, 0.44, 0.48, 4.9, 1.48, 2.7, 19.94,
0.4, 7.28, 29.56, 8.72, 1.5, 2.32, 2.42, 4.62, 1.2, 13.88, 9.76,
26.32, 11, 23.8, 10.08, 17.04, 47.6, 15.22, 4.06, 60.3, 71.2,
16.54, 0.44, 0.68, 0, 0.88, 0.1, 0.04, 0.34, 0, 0.36)), row.names = c(NA,
266L), class = "data.frame")
这个数据有四列:年、月、日、阶段、降雨
我想要:
(1) 统计至少连续 3 天降雨量低于 5 毫米/天的案例数(长度 >= 3)
(2) 并且这些连续天的开始(第一次出现)应具有等于 1 的阶段。
我有以下不正确的脚本:
 dat2<-subset(dat, phase == 1)
countruns = function(x){
rainfall_rle_df <- data.frame(unclass(rle(x$Rainfall < 5)))
nrow(subset(rainfall_rle_df, lengths >= 3 & values == TRUE))
}

countruns(dat2)
这个脚本给了我 5 的值,但这是不正确的。整个数据的正确答案应该只有 4。
所以问题是这个脚本忽略了连续几天的开始阶段。
例如,上面数据中的 1996 年具有以下值:
 year   month day phase Rainfall
1996 6 28 8 0.100
1996 6 29 1 0.040
1996 6 30 1 0.340
1996 7 1 1 0.000
1996 7 2 1 0.360
起始阶段是第 8 阶段,因此不应将其计算在内。
满足上述条件的年份应该是1983年、1984年、1991年和1993年。
关于如何在 R 中执行此操作的任何建议?或者在 R 中有更好的方法来做到这一点?
我必须将脚本放在一个函数 countruns() 中。
我将不胜感激任何帮助!

最佳答案

我们使用 rleid 创建分组列(来自 data.table )按“年份”分组后,然后将组保留在 all 处'Rainfall' 的值小于 5,filter所在的团体first 'phase' 的值为 1,并在 n() 中进行频率计数( summarise )

library(dplyr)
library(data.table)
dat %>%
# // create a Date column as it is easier to check for consecutive days
mutate(Date = as.Date(paste(year, mon, day, sep="-"))) %>%
# // create a group with year
group_by(year) %>%
# // add more groups with rleid on logical vector
group_by(grp = rleid(Rainfall < 5),
# // checks for difference between adjacent Date
# // if the difference is greater than 1, cumsum increments by 1
grp2 = cumsum(c(0, abs(diff(Date))) > 1), .add = TRUE) %>%
# // filter groups where all Rainfall is less than 5 and number of rows >= 3
filter(all(Rainfall < 5), n() >= 3) %>%
# // filter the groups where the first value of phase is 1
filter(first(phase) == 1) %>%
# // get the frequency count
summarise(n = n(), .groups = 'drop') %>%
# // remove the columns that are not needed
select(-grp, -grp2)
# A tibble: 4 x 2
# year n
# <int> <int>
#1 1983 8
#2 1984 11
#3 1991 6
#4 1993 5

如果我们想把它包装在一个函数中, {{}} enquo + !! (当我们将不带引号的变量名作为参数传递给函数 esp 时)
f1 <- function(data, year, month, day, rainfall) {
data %>%
mutate(Date = as.Date(str_c({{year}}, {{month}},
{{day}}, sep="-"))) %>%
group_by({{year}}) %>%
group_by(grp = rleid({{rainfall}} < 5),
grp2 = cumsum(c(0, abs(diff(Date))) > 1),
.add = TRUE) %>%
filter(all({{rainfall}} < 5), n() >=3 ) %>%
filter(first(phase) == 1) %>%
summarise(n = n(), .groups = 'drop') %>%
select(-grp, -grp2)
}
- 应用功能
f1(dat, year, mon, day, Rainfall)
# A tibble: 4 x 2
# year n
# <int> <int>
#1 1983 8
#2 1984 11
#3 1991 6
#4 1993 5

关于r - 如何在具有多个条件的 R 中应用 RLE() 函数?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/63420370/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com