gpt4 book ai didi

r - 为什么 rle 不接受一个因素作为输入?

转载 作者:行者123 更新时间:2023-12-04 19:12:58 27 4
gpt4 key购买 nike

我很难通过这个 rle data.frame 上的函数.函数在另一组上效果很好:

fgroup <- aggregate(fevents2[,3:14], list(weeks = fevents2[, 1]), function(x) rle(x)$values)

这会产生错误:
Error in rle(x) : 'x' must be an atomic vector

样本数据:
> dput(fevents2[1:20,])
structure(list(weeks = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2", "3", "4", "5", "6", "7"), class = "factor"), A1M.Date = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("2012-05-09", "2012-05-10", "2012-05-11",
"2012-05-14", "2012-05-15", "2012-05-17", "2012-05-18", "2012-05-21",
"2012-05-22", "2012-05-24", "2012-05-25", "2012-05-28", "2012-05-29",
"2012-05-30", "2012-05-31", "2012-06-04", "2012-06-05", "2012-06-07",
"2012-06-08", "2012-06-11", "2012-06-12", "2012-06-14", "2012-06-15",
"2012-06-18", "2012-06-19", "2012-06-21", "2012-06-22"), class = "factor"),
vv = structure(c(8L, 8L, 8L, 20L, 24L, 24L, 24L, 1L, 13L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 24L), .Label = c("C AA",
"C AJ", "C BB", "C BV", "C JA", "C JR", "C RJ", "C RR", "C RV",
"C VB", "C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JA",
"G JR", "G RJ", "G RR", "G RV", "G VB", "G VR", "nil"), class = "factor"),
rv = structure(c(25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L,
10L, 10L, 22L, 22L, 22L, 25L, 10L, 22L, 22L, 22L, 22L, 25L
), .Label = c("C AA", "C AJ", "C BB", "C BV", "C JA", "C JR",
"C RJ", "C RR", "C RV", "C VB", "C VR", "C VV", "G AA", "G AJ",
"G BB", "G BV", "G JA", "G JR", "G RJ", "G RR", "G RV", "G VB",
"G VR", "G VV", "nil"), class = "factor"), ja = structure(c(12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 25L, 25L,
12L, 24L, 24L, 24L, 24L, 24L, 24L), .Label = c("C AA", "C AJ",
"C BB", "C BV", "C JA", "C JR", "C RJ", "C RR", "C RV", "C VB",
"C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JA", "G JR",
"G RJ", "G RR", "G RV", "G VB", "G VR", "G VV", "nil"), class = "factor"),
aa = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 25L, 25L,
25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L), .Label = c("C AA",
"C AJ", "C BB", "C BV", "C JA", "C JR", "C RJ", "C RR", "C RV",
"C VB", "C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JA",
"G JR", "G RJ", "G RR", "G RV", "G VB", "G VR", "G VV", "nil"
), class = "factor"), bv = structure(c(25L, 11L, 11L, 11L,
23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L,
23L, 23L, 23L, 23L), .Label = c("C AA", "C AJ", "C BB", "C BV",
"C JA", "C JR", "C RJ", "C RR", "C RV", "C VB", "C VR", "C VV",
"G AA", "G AJ", "G BB", "G BV", "G JA", "G JR", "G RJ", "G RR",
"G RV", "G VB", "G VR", "G VV", "nil"), class = "factor"),
aj = structure(c(7L, 7L, 7L, 25L, 25L, 25L, 25L, 25L, 9L,
9L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 25L, 25L), .Label = c("C AA",
"C AJ", "C BB", "C BV", "C JA", "C JR", "C RJ", "C RR", "C RV",
"C VB", "C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JA",
"G JR", "G RJ", "G RR", "G RV", "G VB", "G VR", "G VV", "nil"
), class = "factor"), vb = structure(c(1L, 1L, 1L, 25L, 25L,
25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 2L,
25L, 2L, 2L), .Label = c("C AA", "C AJ", "C BB", "C BV",
"C JA", "C JR", "C RJ", "C RR", "C RV", "C VB", "C VR", "C VV",
"G AA", "G AJ", "G BB", "G BV", "G JA", "G JR", "G RJ", "G RR",
"G RV", "G VB", "G VR", "G VV", "nil"), class = "factor"),
rj = structure(c(5L, 5L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L), .Label = c("C AA",
"C AJ", "C BB", "C BV", "C JR", "C RJ", "C RR", "C RV", "C VB",
"C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JR", "G RJ",
"G RR", "G RV", "G VB", "G VR", "G VV", "nil"), class = "factor"),
rr = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("C AA",
"C AJ", "C BB", "C BV", "C JA", "C JR", "C RJ", "C RR", "C RV",
"C VB", "C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JA",
"G JR", "G RJ", "G RR", "G RV", "G VB", "G VR", "G VV", "nil"
), class = "factor"), vr = structure(c(5L, 5L, 5L, 25L, 25L,
7L, 7L, 7L, 7L, 7L, 25L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L), .Label = c("C AA", "C AJ", "C BB", "C BV", "C JA", "C JR",
"C RJ", "C RR", "C RV", "C VB", "C VR", "C VV", "G AA", "G AJ",
"G BB", "G BV", "G JA", "G JR", "G RJ", "G RR", "G RV", "G VB",
"G VR", "G VV", "nil"), class = "factor"), bb = structure(c(4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L), .Label = c("C AA", "C AJ", "C BB", "C BV",
"C JA", "C JR", "C RJ", "C RR", "C RV", "C VB", "C VR", "C VV",
"G AA", "G AJ", "G BB", "G BV", "G JA", "G RJ", "G RR", "G RV",
"G VB", "G VR", "G VV", "nil"), class = "factor"), jr = structure(c(25L,
25L, 10L, 10L, 22L, 22L, 25L, 25L, 25L, 25L, 25L, 25L, 25L,
25L, 25L, 25L, 5L, 5L, 5L, 5L), .Label = c("C AA", "C AJ",
"C BB", "C BV", "C JA", "C JR", "C RJ", "C RR", "C RV", "C VB",
"C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JA", "G JR",
"G RJ", "G RR", "G RV", "G VB", "G VR", "G VV", "nil"), class = "factor")),
.Names = c("weeks",
"A1M.Date", "vv", "rv", "ja", "aa", "bv", "aj", "vb", "rj", "rr",
"vr", "bb", "jr"), row.names = c(NA, 20L), class = "data.frame")

数据结构:
str(fevents2)
data.frame': 1430 obs. of 14 variables:
$ weeks : Factor w/ 7 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
$ A1M.Date: Factor w/ 27 levels "2012-05-09","2012-05-10",..: 1 1 1 1 1 1 1 1 1 1 ...
$ vv : Factor w/ 24 levels "C AA","C AJ",..: 8 8 8 20 24 24 24 1 13 13 ..
$ rv : Factor w/ 25 levels "C AA","C AJ",..: 25 25 25 25 25 25 25 25 10 10 ...
$ ja : Factor w/ 25 levels "C AA","C AJ",..: 12 12 12 12 12 12 12 12 12 12 ...
$ aa : Factor w/ 25 levels "C AA","C AJ",..: 2 2 2 2 2 2 2 2 25 25 ...
$ bv : Factor w/ 25 levels "C AA","C AJ",..: 25 11 11 11 23 23 23 23 23 23 ...
$ aj : Factor w/ 25 levels "C AA","C AJ",..: 7 7 7 25 25 25 25 25 9 9 ...
$ vb : Factor w/ 25 levels "C AA","C AJ",..: 1 1 1 25 25 25 25 25 25 25 ...
$ rj : Factor w/ 23 levels "C AA","C AJ",..: 5 5 16 16 16 16 16 16 16 16 ...
$ rr : Factor w/ 25 levels "C AA","C AJ",..: 3 3 3 3 3 3 3 3 3 3 ...
$ vr : Factor w/ 25 levels "C AA","C AJ",..: 5 5 5 25 25 7 7 7 7 7 ...
$ bb : Factor w/ 24 levels "C AA","C AJ",..: 4 4 4 4 4 4 4 4 4 4 ...
$ jr : Factor w/ 25 levels "C AA","C AJ",..: 25 25 10 10 22 22 25 25 25 25 ...
NULL

我知道我有 factor s,但正在转换 factor转至 numeric
as.numeric(as.character(fevents2))

或者:
sapply(fevents2, function(x) as.numeric(as.character(x)))

不能解决我的问题:
Error in fevents3[, 3:14] : incorrect number of dimensions
In addition: Warning message:
In eval.with.vis(expr, envir, enclos) : NAs introduced by coercion

这是一个样本 data.frame上的 rle功能工作:
    dput(fevents[1:20,]
structure(list(weeks = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1), A1M.Date = c("2012-05-09", "2012-05-09",
"2012-05-09", "2012-05-09", "2012-05-09", "2012-05-09", "2012-05-09",
"2012-05-09", "2012-05-09", "2012-05-09", "2012-05-09", "2012-05-09",
"2012-05-09", "2012-05-09", "2012-05-09", "2012-05-09", "2012-05-09",
"2012-05-09", "2012-05-09", "2012-05-09"), vv = c("C RR", "C RR",
"C RR", "G RR", "nil", "nil", "nil", "C AA", "G AA", "G AA",
"G AA", "G AA", "G AA", "G AA", "G AA", "G AA", "G AA", "G AA",
"G AA", "nil"), rv = c("nil", "nil", "nil", "nil", "nil", "nil",
"nil", "nil", "C VB", "C VB", "G VB", "G VB", "G VB", "nil",
"G VB", "G VB", "G VB", "G VB", "G VB", "nil"), ja = c("C VV",
"C VV", "C VV", "C VV", "C VV", "C VV", "C VV", "C VV", "C VV",
"C VV", "C VV", "nil", "nil", "G VV", "G VV", "G VV", "G VV",
"G VV", "G VV", "G VV"), aa = c("C AJ", "C AJ", "C AJ", "C AJ",
"C AJ", "C AJ", "C AJ", "C AJ", "nil", "nil", "nil", "nil", "nil",
"nil", "nil", "nil", "nil", "nil", "nil", "nil"), bv = c("nil",
"C VR", "C VR", "C VR", "G VR", "G VR", "G VR", "G VR", "G VR",
"G VR", "G VR", "G VR", "G VR", "G VR", "G VR", "G VR", "G VR",
"G VR", "G VR", "G VR"), aj = c("C RJ", "C RJ", "C RJ", "nil",
"nil", "nil", "nil", "nil", "C RV", "C RV", "G RV", "G RV", "G RV",
"G RV", "G RV", "G RV", "G RV", "G RV", "nil", "nil"), vb = c("C AA",
"C AA", "C AA", "nil", "nil", "nil", "nil", "nil", "nil", "nil",
"nil", "nil", "nil", "nil", "nil", "nil", "C AJ", "nil", "C AJ",
"C AJ"), rj = c("C JR", "C JR", "G JR", "G JR", "G JR", "G JR",
"G JR", "G JR", "G JR", "G JR", "G JR", "G JR", "G JR", "G JR",
"G JR", "G JR", "G JR", "G JR", "G JR", "G JR"), rr = c("C BB",
"C BB", "C BB", "C BB", "C BB", "C BB", "C BB", "C BB", "C BB",
"C BB", "C BB", "C BB", "C BB", "C BB", "C BB", "C BB", "C BB",
"C BB", "C BB", "C BB"), vr = c("C JA", "C JA", "C JA", "nil",
"nil", "C RJ", "C RJ", "C RJ", "C RJ", "C RJ", "nil", "C RJ",
"C RJ", "C RJ", "C RJ", "C RJ", "C RJ", "C RJ", "C RJ", "C RJ"
), bb = c("C BV", "C BV", "C BV", "C BV", "C BV", "C BV", "C BV",
"C BV", "C BV", "C BV", "C BV", "C BV", "C BV", "C BV", "C BV",
"C BV", "C BV", "C BV", "C BV", "C BV"), jr = c("nil", "nil",
"C VB", "C VB", "G VB", "G VB", "nil", "nil", "nil", "nil", "nil",
"nil", "nil", "nil", "nil", "nil", "C JA", "C JA", "C JA", "C JA"
)), .Names = c("weeks", "A1M.Date", "vv", "rv", "ja", "aa", "bv",
"aj", "vb", "rj", "rr", "vr", "bb", "jr"), row.names = c(NA,
20L), class = "data.frame")

str(fevents)
'data.frame': 1430 obs. of 14 variables:
$ weeks : num 1 1 1 1 1 1 1 1 1 1 ...
$ A1M.Date: chr "2012-05-09" "2012-05-09" "2012-05-09" "2012-05-09" ...
$ vv : chr "C RR" "C RR" "C RR" "G RR" ...
$ rv : chr "nil" "nil" "nil" "nil" ...
$ ja : chr "C VV" "C VV" "C VV" "C VV" ...
$ aa : chr "C AJ" "C AJ" "C AJ" "C AJ" ...
$ bv : chr "nil" "C VR" "C VR" "C VR" ...
$ aj : chr "C RJ" "C RJ" "C RJ" "nil" ...
$ vb : chr "C AA" "C AA" "C AA" "nil" ...
$ rj : chr "C JR" "C JR" "G JR" "G JR" ...
$ rr : chr "C BB" "C BB" "C BB" "C BB" ...
$ vr : chr "C JA" "C JA" "C JA" "nil" ...
$ bb : chr "C BV" "C BV" "C BV" "C BV" ...
$ jr : chr "nil" "nil" "C VB" "C VB" ...

我发现了一个非常“不优雅”的解决方法。写作 data.frame将其归档为 CSV 并使用 stringsAsFactors = FALSE 导入它.这不是我想写在我的代码中的...一定有更简单的方法来重新排列 data.frame的结构取悦 rle ?

最佳答案

问题是一个因子 * 不是 * 原子向量,因为错误清楚地说明了这一点。要么首先将所有因子转换为字符(而不是将它们强制转换为数字!),要么在您应用的匿名函数中进行转换。

所以这实现了第二个想法,工作:

aggregate(fevents2[,3:14], list(weeks = fevents2[, 1]),
function(x) rle(as.character(x))$values)

在时尚之后:
> aggregate(fevents2[,3:14], list(weeks = fevents2[, 1]),
+ function(x) rle(as.character(x))$values)
weeks vv.1 vv.2 vv.3 vv.4 vv.5 vv.6 rv.1 rv.2 rv.3 rv.4 rv.5 rv.6 rv.7 ja.1
1 1 C RR G RR nil C AA G AA nil nil C VB G VB nil C VB G VB nil C VV
ja.2 ja.3 ja.4 aa.1 aa.2 bv.1 bv.2 bv.3 aj.1 aj.2 aj.3 aj.4 aj.5 vb.1 vb.2
1 nil C VV G VV C AJ nil nil C VR G VR C RJ nil C RV G RV nil C AA nil
vb.3 vb.4 vb.5 rj.1 rj.2 rr vr.1 vr.2 vr.3 vr.4 vr.5 bb jr.1 jr.2 jr.3
1 C AJ nil C AJ C JR G JR C BB C JA nil C RJ nil C RJ C BV nil C VB G VB
jr.4 jr.5
1 nil C JA

虽然我不确定你期望得到什么 - 这里只有一周时间,而且 aggregaterle把所有的值(value)观都粘在一起了。你想分开吗 $values对于 fevents2 中的每个变量你正在聚合?

另一件事:
as.numeric(as.character(fevents2))不可能工作,因为数据不是数字!并且您无法将这些功能应用于数据框并获得任何您想要的东西 - 如果它们起作用的话。
sapply()事情应该工作。这是一个版本,它检查每个变量是否是一个因素,如果是,则强制它:
fevents3 <- sapply(fevents2,
function(x) if(is.factor(x)) { as.character(x) } else { x })

但请注意 sapply()简化为一个矩阵,它将改变 aggregate()派发的方法:
> class(fevents3)
[1] "matrix"

相反也许
fevents3 <- lapply(fevents2,
function(x) if(is.factor(x)) { as.character(x) } else { x })
fevents3 <- data.frame(fevents3, stringsAsFactors = FALSE)

现在如果你想申请 rle()到拆分数据的每一列并保持单独怎么样
spl <- split(fevents3, list(weeks = fevents3[, 1]))
res <- lapply(spl, function(x) lapply(x[, 3:14], function(y) rle(y)$values))

这使
> res
$`1`
$`1`$vv
[1] "C RR" "G RR" "nil" "C AA" "G AA" "nil"

$`1`$rv
[1] "nil" "C VB" "G VB" "nil" "C VB" "G VB" "nil"

$`1`$ja
[1] "C VV" "nil" "C VV" "G VV"

$`1`$aa
[1] "C AJ" "nil"

$`1`$bv
[1] "nil" "C VR" "G VR"

$`1`$aj
[1] "C RJ" "nil" "C RV" "G RV" "nil"

$`1`$vb
[1] "C AA" "nil" "C AJ" "nil" "C AJ"

$`1`$rj
[1] "C JR" "G JR"

$`1`$rr
[1] "C BB"

$`1`$vr
[1] "C JA" "nil" "C RJ" "nil" "C RJ"

$`1`$bb
[1] "C BV"

$`1`$jr
[1] "nil" "C VB" "G VB" "nil" "C JA"

这与 aggregate() 的答案相同以上,但每个 rle()输出保持分开:
> unlist(res)
1.vv1 1.vv2 1.vv3 1.vv4 1.vv5 1.vv6 1.rv1 1.rv2 1.rv3 1.rv4 1.rv5
"C RR" "G RR" "nil" "C AA" "G AA" "nil" "nil" "C VB" "G VB" "nil" "C VB"
1.rv6 1.rv7 1.ja1 1.ja2 1.ja3 1.ja4 1.aa1 1.aa2 1.bv1 1.bv2 1.bv3
"G VB" "nil" "C VV" "nil" "C VV" "G VV" "C AJ" "nil" "nil" "C VR" "G VR"
1.aj1 1.aj2 1.aj3 1.aj4 1.aj5 1.vb1 1.vb2 1.vb3 1.vb4 1.vb5 1.rj1
"C RJ" "nil" "C RV" "G RV" "nil" "C AA" "nil" "C AJ" "nil" "C AJ" "C JR"
1.rj2 1.rr 1.vr1 1.vr2 1.vr3 1.vr4 1.vr5 1.bb 1.jr1 1.jr2 1.jr3
"G JR" "C BB" "C JA" "nil" "C RJ" "nil" "C RJ" "C BV" "nil" "C VB" "G VB"
1.jr4 1.jr5
"nil" "C JA"
> aggregate(fevents2[,3:14], list(weeks = fevents2[, 1]),
+ function(x) rle(as.character(x))$values)
weeks vv.1 vv.2 vv.3 vv.4 vv.5 vv.6 rv.1 rv.2 rv.3 rv.4 rv.5 rv.6 rv.7 ja.1
1 1 C RR G RR nil C AA G AA nil nil C VB G VB nil C VB G VB nil C VV
ja.2 ja.3 ja.4 aa.1 aa.2 bv.1 bv.2 bv.3 aj.1 aj.2 aj.3 aj.4 aj.5 vb.1 vb.2
1 nil C VV G VV C AJ nil nil C VR G VR C RJ nil C RV G RV nil C AA nil
vb.3 vb.4 vb.5 rj.1 rj.2 rr vr.1 vr.2 vr.3 vr.4 vr.5 bb jr.1 jr.2 jr.3
1 C AJ nil C AJ C JR G JR C BB C JA nil C RJ nil C RJ C BV nil C VB G VB
jr.4 jr.5
1 nil C JA

[注意:这仅在此处成立,因为您显示的数据片段只有一周。我不记得怎么了 unlist(res))会看是否超过一周。]

关于r - 为什么 rle 不接受一个因素作为输入?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/13311457/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com