gpt4 book ai didi

r - 对于数据框的所有列,每 n 行的平均值和中值,同时将日期时间对象保留为索引

转载 作者:行者123 更新时间:2023-12-01 09:32:51 25 4
gpt4 key购买 nike

我在名为 ulyDataLefs60_12 的数据框中有这个相当复杂的数据:

  Year Day Hour Min  Sec. E1.S1 E1.S2 E1.S3 E1.S4 E1.S5 E1.S6 E1.S7 E1.S8 E2.S1 E2.S2 E2.S3 E2.S4
1 2000 122 0 1 38.01 3.31 0.662 0.662 2.65 1.32 0.000 3.310 1.32 1.980 1.980 0.662 0.000
2 2000 122 0 1 50.10 1.98 3.310 1.980 1.98 1.98 1.320 4.630 1.32 1.320 0.662 0.000 3.310
3 2000 122 0 2 2.19 1.98 1.320 3.970 1.98 1.32 0.662 0.662 3.97 1.320 0.662 1.320 0.662
4 2000 122 0 2 14.28 2.65 1.320 2.650 3.31 2.65 1.320 3.970 2.65 2.650 0.000 0.662 2.650
5 2000 122 0 2 26.38 3.97 6.620 0.662 3.31 3.31 4.630 5.290 1.98 0.000 0.000 1.980 0.662
6 2000 122 0 2 38.47 2.65 0.662 3.310 1.98 1.32 1.980 1.980 2.65 0.662 1.320 1.980 1.320
E2.S5 E2.S6 E2.S7 E2.S8 E3.S1 E3.S2 E3.S3 E3.S4 E3.S5 E3.S6 E3.S7 E3.S8 E4.S1 E4.S2 E4.S3 E4.S4
1 1.320 1.32 2.65 2.650 0.662 0.000 1.320 2.650 1.320 0.000 1.320 1.320 0.000 0.000 0.662 0.662
2 0.000 0.00 1.98 0.662 0.000 0.662 0.000 0.662 1.980 1.980 0.662 1.320 0.000 0.000 0.000 0.662
3 0.662 1.98 2.65 1.980 0.000 0.662 0.662 1.320 0.662 0.000 1.320 3.310 0.662 0.000 1.980 0.662
4 0.662 1.32 1.32 0.662 0.000 0.662 0.662 0.662 0.662 0.662 0.662 0.000 0.000 0.662 0.000 0.000
5 0.000 1.32 1.32 0.662 0.662 0.000 0.000 0.662 0.000 0.662 1.320 0.662 0.000 0.000 0.000 0.662
6 1.320 1.32 1.32 0.000 1.320 0.000 0.000 0.662 1.320 0.000 0.662 0.662 0.662 1.320 0.000 0.000
E4.S5 E4.S6 E4.S7 E4.S8 FP5.S1 FP5.S2 FP5.S3 FP5.S4 FP5.S5 FP5.S6 FP5.S7 FP5.S8 FP6.S1 FP6.S2
1 0.000 0.662 0.662 0.000 0.331 0 0.662 0.000 0.662 0 0.000 0.331 0 0.331
2 0.000 0.000 0.662 0.662 0.331 0 0.662 0.000 0.662 0 0.000 0.331 0 0.331
3 0.662 0.000 0.662 1.320 0.000 0 0.662 0.000 0.331 0 0.000 0.000 0 0.000
4 0.662 0.662 0.000 0.662 0.000 0 0.662 0.000 0.331 0 0.000 0.000 0 0.000
5 0.000 0.000 0.662 0.000 0.331 0 0.000 0.331 0.331 0 0.331 0.000 0 0.000
6 0.000 0.000 0.662 0.662 0.331 0 0.000 0.331 0.331 0 0.331 0.000 0 0.000
FP6.S3 FP6.S4 FP6.S5 FP6.S6 FP6.S7 FP6.S8 FP7.S1 FP7.S2 FP7.S3 FP7.S4 FP7.S5 FP7.S6 FP7.S7 FP7.S8
1 0.331 0.000 0.000 0.000 0 0.000 0 0.331 0.331 0.662 0 0.000 0.331 0.000
2 0.331 0.000 0.000 0.000 0 0.000 0 0.331 0.331 0.662 0 0.000 0.331 0.000
3 0.662 0.000 0.662 0.000 0 0.331 0 0.000 0.000 0.331 0 0.000 0.000 0.000
4 0.662 0.000 0.662 0.000 0 0.331 0 0.000 0.000 0.331 0 0.000 0.000 0.000
5 0.000 0.662 0.000 0.992 0 0.000 0 0.000 0.000 0.000 0 0.331 0.000 0.331
6 0.000 0.662 0.000 0.992 0 0.000 0 0.000 0.000 0.000 0 0.331 0.000 0.331
PA.LEFS60S1 PA.LEFS60S2 PA.LEFS60S3 PA.LEFS60S4 PA.LEFS60S5 PA.LEFS60S6 PA.LEFS60S7 PA.LEFS60S8
1 64.2 52.0 70.9 105.0 144 170 134 96.2
2 62.6 49.5 68.8 104.0 142 168 134 95.4
3 62.7 47.7 66.2 101.0 140 167 135 96.5
4 62.4 46.3 64.4 99.3 138 166 135 96.7
5 59.9 43.7 63.2 98.8 138 164 133 94.8
6 62.3 45.7 63.7 98.7 137 166 136 96.9
BX BY BZ Bmag....nT. X datetime
1 2.64 4.98 2.25 6.07 NA 2000-05-01 00:01:38
2 2.67 5.16 2.03 6.15 NA 2000-05-01 00:01:50
3 2.52 5.35 1.88 6.21 NA 2000-05-01 00:02:02
4 2.43 5.45 1.74 6.22 NA 2000-05-01 00:02:14
5 2.53 5.46 1.46 6.19 NA 2000-05-01 00:02:26
6 2.29 5.26 1.61 5.96 NA 2000-05-01 00:02:38

dput(head(ulyDataLefs60_12))
structure(list(Year = c(2000L, 2000L, 2000L, 2000L, 2000L, 2000L
), Day = c(122L, 122L, 122L, 122L, 122L, 122L), Hour = c(0L,
0L, 0L, 0L, 0L, 0L), Min = c(1L, 1L, 2L, 2L, 2L, 2L), Sec. = c(38.01,
50.1, 2.19, 14.28, 26.38, 38.47), E1.S1 = c(3.31, 1.98, 1.98,
2.65, 3.97, 2.65), E1.S2 = c(0.662, 3.31, 1.32, 1.32, 6.62, 0.662
), E1.S3 = c(0.662, 1.98, 3.97, 2.65, 0.662, 3.31), E1.S4 = c(2.65,
1.98, 1.98, 3.31, 3.31, 1.98), E1.S5 = c(1.32, 1.98, 1.32, 2.65,
3.31, 1.32), E1.S6 = c(0, 1.32, 0.662, 1.32, 4.63, 1.98), E1.S7 = c(3.31,
4.63, 0.662, 3.97, 5.29, 1.98), E1.S8 = c(1.32, 1.32, 3.97, 2.65,
1.98, 2.65), E2.S1 = c(1.98, 1.32, 1.32, 2.65, 0, 0.662), E2.S2 = c(1.98,
0.662, 0.662, 0, 0, 1.32), E2.S3 = c(0.662, 0, 1.32, 0.662, 1.98,
1.98), E2.S4 = c(0, 3.31, 0.662, 2.65, 0.662, 1.32), E2.S5 = c(1.32,
0, 0.662, 0.662, 0, 1.32), E2.S6 = c(1.32, 0, 1.98, 1.32, 1.32,
1.32), E2.S7 = c(2.65, 1.98, 2.65, 1.32, 1.32, 1.32), E2.S8 = c(2.65,
0.662, 1.98, 0.662, 0.662, 0), E3.S1 = c(0.662, 0, 0, 0, 0.662,
1.32), E3.S2 = c(0, 0.662, 0.662, 0.662, 0, 0), E3.S3 = c(1.32,
0, 0.662, 0.662, 0, 0), E3.S4 = c(2.65, 0.662, 1.32, 0.662, 0.662,
0.662), E3.S5 = c(1.32, 1.98, 0.662, 0.662, 0, 1.32), E3.S6 = c(0,
1.98, 0, 0.662, 0.662, 0), E3.S7 = c(1.32, 0.662, 1.32, 0.662,
1.32, 0.662), E3.S8 = c(1.32, 1.32, 3.31, 0, 0.662, 0.662), E4.S1 = c(0,
0, 0.662, 0, 0, 0.662), E4.S2 = c(0, 0, 0, 0.662, 0, 1.32), E4.S3 = c(0.662,
0, 1.98, 0, 0, 0), E4.S4 = c(0.662, 0.662, 0.662, 0, 0.662, 0
), E4.S5 = c(0, 0, 0.662, 0.662, 0, 0), E4.S6 = c(0.662, 0, 0,
0.662, 0, 0), E4.S7 = c(0.662, 0.662, 0.662, 0, 0.662, 0.662),
E4.S8 = c(0, 0.662, 1.32, 0.662, 0, 0.662), FP5.S1 = c(0.331,
0.331, 0, 0, 0.331, 0.331), FP5.S2 = c(0, 0, 0, 0, 0, 0),
FP5.S3 = c(0.662, 0.662, 0.662, 0.662, 0, 0), FP5.S4 = c(0,
0, 0, 0, 0.331, 0.331), FP5.S5 = c(0.662, 0.662, 0.331, 0.331,
0.331, 0.331), FP5.S6 = c(0, 0, 0, 0, 0, 0), FP5.S7 = c(0,
0, 0, 0, 0.331, 0.331), FP5.S8 = c(0.331, 0.331, 0, 0, 0,
0), FP6.S1 = c(0, 0, 0, 0, 0, 0), FP6.S2 = c(0.331, 0.331,
0, 0, 0, 0), FP6.S3 = c(0.331, 0.331, 0.662, 0.662, 0, 0),
FP6.S4 = c(0, 0, 0, 0, 0.662, 0.662), FP6.S5 = c(0, 0, 0.662,
0.662, 0, 0), FP6.S6 = c(0, 0, 0, 0, 0.992, 0.992), FP6.S7 = c(0,
0, 0, 0, 0, 0), FP6.S8 = c(0, 0, 0.331, 0.331, 0, 0), FP7.S1 = c(0,
0, 0, 0, 0, 0), FP7.S2 = c(0.331, 0.331, 0, 0, 0, 0), FP7.S3 = c(0.331,
0.331, 0, 0, 0, 0), FP7.S4 = c(0.662, 0.662, 0.331, 0.331,
0, 0), FP7.S5 = c(0, 0, 0, 0, 0, 0), FP7.S6 = c(0, 0, 0,
0, 0.331, 0.331), FP7.S7 = c(0.331, 0.331, 0, 0, 0, 0), FP7.S8 = c(0,
0, 0, 0, 0.331, 0.331), PA.LEFS60S1 = c(64.2, 62.6, 62.7,
62.4, 59.9, 62.3), PA.LEFS60S2 = c(52, 49.5, 47.7, 46.3,
43.7, 45.7), PA.LEFS60S3 = c(70.9, 68.8, 66.2, 64.4, 63.2,
63.7), PA.LEFS60S4 = c(105, 104, 101, 99.3, 98.8, 98.7),
PA.LEFS60S5 = c(144, 142, 140, 138, 138, 137), PA.LEFS60S6 = c(170,
168, 167, 166, 164, 166), PA.LEFS60S7 = c(134, 134, 135,
135, 133, 136), PA.LEFS60S8 = c(96.2, 95.4, 96.5, 96.7, 94.8,
96.9), BX = c(2.64, 2.67, 2.52, 2.43, 2.53, 2.29), BY = c(4.98,
5.16, 5.35, 5.45, 5.46, 5.26), BZ = c(2.25, 2.03, 1.88, 1.74,
1.46, 1.61), Bmag....nT. = c(6.07, 6.15, 6.21, 6.22, 6.19,
5.96), X = c(NA, NA, NA, NA, NA, NA), datetime = structure(list(
sec = c(38, 50, 2, 14, 26, 38), min = c(1L, 1L, 2L, 2L,
2L, 2L), hour = c(0L, 0L, 0L, 0L, 0L, 0L), mday = c(1L,
1L, 1L, 1L, 1L, 1L), mon = c(4L, 4L, 4L, 4L, 4L, 4L),
year = c(100L, 100L, 100L, 100L, 100L, 100L), wday = c(1L,
1L, 1L, 1L, 1L, 1L), yday = c(121L, 121L, 121L, 121L,
121L, 121L), isdst = c(1L, 1L, 1L, 1L, 1L, 1L)), .Names = c("sec",
"min", "hour", "mday", "mon", "year", "wday", "yday", "isdst"
), class = c("POSIXlt", "POSIXt"))), .Names = c("Year", "Day",
"Hour", "Min", "Sec.", "E1.S1", "E1.S2", "E1.S3", "E1.S4", "E1.S5",
"E1.S6", "E1.S7", "E1.S8", "E2.S1", "E2.S2", "E2.S3", "E2.S4",
"E2.S5", "E2.S6", "E2.S7", "E2.S8", "E3.S1", "E3.S2", "E3.S3",
"E3.S4", "E3.S5", "E3.S6", "E3.S7", "E3.S8", "E4.S1", "E4.S2",
"E4.S3", "E4.S4", "E4.S5", "E4.S6", "E4.S7", "E4.S8", "FP5.S1",
"FP5.S2", "FP5.S3", "FP5.S4", "FP5.S5", "FP5.S6", "FP5.S7", "FP5.S8",
"FP6.S1", "FP6.S2", "FP6.S3", "FP6.S4", "FP6.S5", "FP6.S6", "FP6.S7",
"FP6.S8", "FP7.S1", "FP7.S2", "FP7.S3", "FP7.S4", "FP7.S5", "FP7.S6",
"FP7.S7", "FP7.S8", "PA.LEFS60S1", "PA.LEFS60S2", "PA.LEFS60S3",
"PA.LEFS60S4", "PA.LEFS60S5", "PA.LEFS60S6", "PA.LEFS60S7", "PA.LEFS60S8",
"BX", "BY", "BZ", "Bmag....nT.", "X", "datetime"), row.names = c(NA,
6L), class = "data.frame")

而我想要的是获取一定数量行的平均值和中位数。比方说,我想要一个新的数据框,而不是所有这些值,它具有所有列中每 5 行的平均值或中值(或至少在从 E1.S1 列开始的所有列中)。

我从查看示例开始:Calculate means of rows它确实让我能够为我的数据框的单个列获得 N 行的平均值。

ulyDataLefs60_12_avg = colSums(matrix(ulyDataLefs60_12$E1.S1, nrow=5))

问题是我想使用的 R 函数 colSums 不适用于某些字段,即 datetime 字段(原因很明显),所以我无法跨应用所有列并获得一个不错的平均数据框。

ulyDataLefs60_12_avg = colSums(matrix(ulyDataLefs60_12, nrow=5))
Error in colSums(matrix(ulyDataLefs60_12, nrow = 5)) :
'x' must be numeric

我很高兴在我用来获取平均值和中位数的每 5 行的开头有一个日期时间字段(如果我在 5 个值间隔的中心获得日期时间会更好),但到目前为止我没有给出同时做这两件事的答案。

也许这是一件很容易做的事情,但它让我头疼。

最佳答案

对于此数据:

> dput(df)
df <- structure(list(Year = c(2000L, 2000L, 2000L, 2000L, 2000L, 2000L
), Day = c(122L, 122L, 122L, 122L, 122L, 122L), Hour = c(0L,
0L, 0L, 0L, 0L, 0L), Min = c(1L, 1L, 2L, 2L, 2L, 2L), Sec. = c(38.01,
50.1, 2.19, 14.28, 26.38, 38.47), E1.S1 = c(3.31, 1.98, 1.98,
2.65, 3.97, 2.65), E1.S2 = c(0.662, 3.31, 1.32, 1.32, 6.62, 0.662
), E1.S3 = c(0.662, 1.98, 3.97, 2.65, 0.662, 3.31), E1.S4 = c(2.65,
1.98, 1.98, 3.31, 3.31, 1.98), E1.S5 = c(1.32, 1.98, 1.32, 2.65,
3.31, 1.32), E1.S6 = c(0, 1.32, 0.662, 1.32, 4.63, 1.98), E1.S7 = c(3.31,
4.63, 0.662, 3.97, 5.29, 1.98), E1.S8 = c(1.32, 1.32, 3.97, 2.65,
1.98, 2.65), E2.S1 = c(1.98, 1.32, 1.32, 2.65, 0, 0.662), E2.S2 = c(1.98,
0.662, 0.662, 0, 0, 1.32), E2.S3 = c(0.662, 0, 1.32, 0.662, 1.98,
1.98), E2.S4 = c(0, 3.31, 0.662, 2.65, 0.662, 1.32)), .Names = c("Year",
"Day", "Hour", "Min", "Sec.", "E1.S1", "E1.S2", "E1.S3", "E1.S4",
"E1.S5", "E1.S6", "E1.S7", "E1.S8", "E2.S1", "E2.S2", "E2.S3",
"E2.S4"), class = "data.frame", row.names = c("1", "2", "3",
"4", "5", "6"))

这个有效:

lapply(split(df, ceiling(seq_len(nrow(df)) / 5)), colMeans)
# $`1`
# Year Day Hour Min Sec. E1.S1 E1.S2 E1.S3 E1.S4 E1.S5 E1.S6 E1.S7
# 2000.0000 122.0000 0.0000 1.6000 26.1920 2.7780 2.6464 1.9848 2.6460 2.1160 1.5864 3.5724
# E1.S8 E2.S1 E2.S2 E2.S3 E2.S4
# 2.2480 1.4540 0.6608 0.9248 1.4568
#
# $`2`
# Year Day Hour Min Sec. E1.S1 E1.S2 E1.S3 E1.S4 E1.S5 E1.S6 E1.S7 E1.S8
# 2000.000 122.000 0.000 2.000 38.470 2.650 0.662 3.310 1.980 1.320 1.980 1.980 2.650
# E2.S1 E2.S2 E2.S3 E2.S4
# 0.662 1.320 1.980 1.320
#

然后您可以绑定(bind)它们:

do.call(rbind, lapply(split(df, ceiling(seq_len(nrow(df)) / 5)), colMeans))
# Year Day Hour Min Sec. E1.S1 E1.S2 E1.S3 E1.S4 E1.S5 E1.S6 E1.S7 E1.S8 E2.S1 E2.S2 E2.S3 E2.S4
# 1 2000 122 0 1.6 26.192 2.778 2.6464 1.9848 2.646 2.116 1.5864 3.5724 2.248 1.454 0.6608 0.9248 1.4568
# 2 2000 122 0 2.0 38.470 2.650 0.6620 3.3100 1.980 1.320 1.9800 1.9800 2.650 0.662 1.3200 1.9800 1.3200

注意:它还有助于检查您要取 mean 的所有列是 integers 还是 numeric,方法是:

> sapply(df, class)
# Year Day Hour Min Sec. E1.S1 E1.S2 E1.S3 E1.S4 E1.S5 E1.S6 E1.S7
# "integer" "integer" "integer" "integer" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
# E1.S8 E2.S1 E2.S2 E2.S3 E2.S4
# "numeric" "numeric" "numeric" "numeric" "numeric"

编辑:按照 OP 的评论:

idx <- ceiling(seq_len(nrow(dd)) / 5)
# do colMeans on all columns except last one.
res <- lapply(split(dd[-(ncol(dd))], idx), colMeans, na.rm = TRUE)
# assign first value of "datetime" in each 5-er group as names to list
names(res) <- dd$datetime[seq(1, nrow(df), by=5)]
# bind them to give a matrix
res <- do.call(rbind, res)

或者,如果您想要将 data.frame datetime 作为:

idx <- ceiling(seq_len(nrow(dd)) / 5)
res <- as.data.frame(do.call(rbind, lapply(split(dd[-(ncol(dd))], idx),
colMeans, na.rm = TRUE)))
res$datetime <- dd$datetime[seq(1, nrow(dd), by=5)]

关于r - 对于数据框的所有列,每 n 行的平均值和中值,同时将日期时间对象保留为索引,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/15389441/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com