gpt4 book ai didi

r - 比较列表中的元素

转载 作者:行者123 更新时间:2023-12-04 11:15:12 25 4
gpt4 key购买 nike

我有一个包含 22 个列表的列表,其中包含两个向量。每个向量在第二个列表中的长度相等,但彼此之间的长度不相等。最后查看 dput 的输出。

我想通过 (i) 找到最长的向量,(ii) 将 2 个向量列表中的所有其他第一个向量与最长向量中的第一个向量进行比较,并用 NA 值填充任何遗漏来构建一个 data.frame。我想将生成的第二个向量(包括它们的 NA)组合到一个 data.frame 中。

(数据集是相对保留时间和面积百分比。我想将所有色谱图相互比较)。

我可以使用以下代码回答问题的第一部分 (i),

max(sapply(lapply(A, function(x) x[1,]),length))

这告诉我列表中的最后一个数据集是最长的。

现在我想让 NA 坚持使用动态时间扭曲 (dtw()) 包并应用这里给出的答案(第 3 个)dtw link但我发现它似乎只适用于相同长度的向量。

structure(list(chrom_a = structure(c(0.4663, 11.16, 0.4955, 0.82, 
0.8744, 4.92, 0.9101, 1.55, 1, 73.91, 1.2862, 5, 1.3198, 1.06,
1.4302, 1.57), .Dim = c(2L, 8L), .Dimnames = list(c("rel.ret",
"rel.area"), c("2", "3", "4", "5", "6", "7", "8", "9"))), chrom_b =structure(c(0.465,
8.6, 0.4938, 0.64, 0.8714, 5.1, 0.907, 1.49, 1, 76.22, 1.2813,
4.92, 1.3144, 1.03, 1.3704, 0.56, 1.4245, 1.45), .Dim = c(2L,
9L), .Dimnames = list(c("rel.ret", "rel.area"), c("3", "4", "5",
"6", "7", "8", "9", "10", "11"))), chrom_c = structure(c(0.4654,
7.58, 0.4943, 0.58, 0.8724, 5.12, 0.9082, 1.51, 1, 77.05, 1.2835,
4.97, 1.3168, 1.07, 1.3732, 0.57, 1.4272, 1.54), .Dim = c(2L,
9L), .Dimnames = list(c("rel.ret", "rel.area"), c("2", "3", "4",
"5", "6", "7", "8", "9", "10"))), chrom_d = structure(c(0.3841,
0.28, 0.4638, 6.07, 0.4926, 0.48, 0.7903, 0.29, 0.8694, 4.79,
0.9049, 1.38, 1, 67, 1.2783, 4.62, 1.3116, 0.95, 1.3681, 0.51,
1.4215, 13.29, 3.5744, 0.35), .Dim = c(2L, 12L), .Dimnames = list(
c("rel.ret", "rel.area"), c("3", "4", "5", "6", "7", "8",
"9", "10", "11", "12", "13", "14"))), chrom_e = structure(c(0.5304,
4.68, 0.8727, 5.34, 0.9086, 1.57, 1, 79.92, 1.2838, 5.22, 1.3168,
1.13, 1.3742, 0.58, 1.4279, 1.57), .Dim = c(2L, 8L), .Dimnames = list(
c("rel.ret", "rel.area"), c("2", "3", "4", "5", "6", "7",
"8", "9"))), chrom_f = structure(c(0.3718, 0.36, 0.4628,
3.38, 0.4915, 0.32, 0.789, 0.42, 0.8685, 6.15, 0.904, 1.72, 0.9399,
0.34, 1, 74.63, 1.0651, 0.5, 1.1014, 0.35, 1.1266, 0.29, 1.1609,
0.41, 1.2108, 0.32, 1.2774, 6.06, 1.3103, 1.36, 1.3667, 0.96,
1.4201, 1.63, 3.5693, 0.48, 3.6497, 0.34), .Dim = c(2L, 19L), .Dimnames = list(
c("rel.ret", "rel.area"), c("3", "4", "5", "6", "7", "8",
"9", "10", "11", "12", "13", "14", "15", "16", "17", "18",
"19", "20", "21"))), chrom_g = structure(c(0.465, 1.44, 0.8708,
5.4, 0.9064, 1.53, 1, 82.64, 1.2805, 5.3, 1.3134, 1.16, 1.37,
0.55, 1.4239, 1.54, 3.5792, 0.44), .Dim = c(2L, 9L), .Dimnames = list(
c("rel.ret", "rel.area"), c("3", "4", "5", "6", "7", "8",
"9", "10", "11"))), chrom_h = structure(c(0.4644, 7.08, 0.4932,
0.55, 0.8708, 5.15, 0.9064, 1.49, 1, 77.57, 1.2805, 5.04, 1.3142,
1.08, 1.3704, 0.53, 1.4245, 1.51), .Dim = c(2L, 9L), .Dimnames = list(
c("rel.ret", "rel.area"), c("3", "4", "5", "6", "7", "8",
"9", "10", "11"))), chrom_i = structure(c(0.4641, 6.82, 0.493,
0.54, 0.8714, 5.24, 0.9075, 1.46, 1, 78.08, 1.2826, 4.91, 1.3164,
1.02, 1.3722, 0.51, 1.4266, 1.41), .Dim = c(2L, 9L), .Dimnames = list(
c("rel.ret", "rel.area"), c("3", "4", "5", "6", "7", "8",
"9", "10", "11"))), chrom_j = structure(c(74.173, 31.69,
0.3729, 0.25, 0.4637, 4.36, 0.4925, 0.35, 0.7904, 0.21, 0.8697,
3.9, 0.9055, 1.06, 1, 52.76, 1.279, 3.56, 1.3119, 0.69, 1.369,
0.3, 1.4224, 0.87), .Dim = c(2L, 12L), .Dimnames = list(c("rel.ret",
"rel.area"), c("2", "3", "4", "5", "6", "7", "8", "9", "10",
"11", "12", "13"))), chrom_k = structure(c(0.4661, 5.91, 0.8743,
5.23, 0.9105, 1.53, 1, 79.89, 1.2863, 5.11, 1.3822, 0.99, 1.0169,
1.33), .Dim = c(2L, 7L), .Dimnames = list(c("rel.ret", "rel.area"
), c("2", "3", "4", "5", "6", "7", "8"))), chrom_l = structure(logical(0), .Dim = c(2L,
0L), .Dimnames = list(c("rel.ret", "rel.area"), NULL)), chrom_m = structure(logical(0), .Dim = c(2L,
0L), .Dimnames = list(c("rel.ret", "rel.area"), NULL)), chrom_n = structure(c(0.3793,
0.37, 0.4604, 6.12, 0.4896, 0.49, 0.8709, 8.41, 0.9072, 1.46,
1, 76.51, 1.2844, 4.75, 1.3188, 0.83, 1.429, 1.06), .Dim = c(2L,
9L), .Dimnames = list(c("rel.ret", "rel.area"), c("3", "4", "5",
"6", "7", "8", "9", "10", "11"))), chrom_o = structure(c(0.1252,
43.23, 0.4625, 2.06, 0.8728, 3, 0.9094, 0.88, 1, 46.69, 1.2865,
2.91, 1.3209, 0.54, 1.4306, 0.7), .Dim = c(2L, 8L), .Dimnames = list(
c("rel.ret", "rel.area"), c("2", "3", "4", "5", "6", "7",
"8", "9"))), chrom_p = structure(c(0.1248, 31.52, 0.3796,
0.23, 0.4608, 1.18, 0.7891, 0.25, 0.8687, 3.88, 0.905, 1.04,
1, 55.93, 1.2808, 3.72, 1.3149, 0.8, 1.3709, 0.38, 1.4244, 1.06
), .Dim = c(2L, 11L), .Dimnames = list(c("rel.ret", "rel.area"
), c("2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"
))), chrom_q = structure(c(0.4618, 0.74, 0.8702, 5.49, 0.9062,
1.44, 1, 83.93, 1.2837, 5.21, 1.3181, 1.13, 1.3738, 0.53, 1.428,
1.53), .Dim = c(2L, 8L), .Dimnames = list(c("rel.ret", "rel.area"
), c("3", "4", "5", "6", "7", "8", "9", "10"))), chrom_r = structure(c(0.8715,
5.39, 0.9072, 1.4, 1, 84.41, 1.2821, 5.18, 1.3151, 1.14, 1.3722,
0.63, 1.4258, 1.46, 3.5918, 0.39), .Dim = c(2L, 8L), .Dimnames = list(
c("rel.ret", "rel.area"), c("3", "4", "5", "6", "7", "8",
"9", "10"))), chrom_s = structure(c(0.3722, 0.38, 0.4636,
6.12, 0.492, 0.5, 0.7898, 0.42, 0.8687, 5.94, 0.9047, 1.69, 0.9403,
0.33, 1, 73.12, 1.0642, 0.44, 1.1033, 0.32, 1.1276, 0.25, 1.162,
0.33, 1.2787, 5.87, 1.3119, 1.32, 1.3685, 0.76, 1.422, 1.5, 3.5817,
0.42, 3.662, 0.3), .Dim = c(2L, 18L), .Dimnames = list(c("rel.ret",
"rel.area"), c("3", "4", "5", "6", "7", "8", "9", "10", "11",
"12", "13", "14", "15", "16", "17", "18", "19", "20"))), chrom_t = structure(c(0.3738,
0.37, 0.4651, 1.51, 0.7917, 0.34, 0.8711, 5.49, 0.9071, 1.51,
1, 81.91, 1.2834, 5.33, 1.3166, 1.13, 1.3736, 0.57, 1.4283, 1.41,
3.6102, 0.43), .Dim = c(2L, 11L), .Dimnames = list(c("rel.ret",
"rel.area"), c("3", "4", "5", "6", "7", "8", "9", "10", "11",
"12", "13"))), chrom_u = structure(c(0.3844, 0.38, 0.4647, 2.2,
0.7909, 0.37, 0.8699, 5.99, 0.906, 1.63, 1, 79.36, 1.2817, 5.86,
1.3149, 1.23, 1.372, 0.67, 1.4257, 1.56, 3.6037, 0.45, 3.684,
0.3), .Dim = c(2L, 12L), .Dimnames = list(c("rel.ret", "rel.area"
), c("3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14"))), chrom_v = structure(c(0.3836, 0.45, 0.4644, 0.62, 0.7894,
0.58, 0.8685, 7.17, 0.9043, 1.85, 0.941, 0.43, 1, 73.3, 1.0663,
0.31, 1.0849, 0.23, 1.1035, 0.4, 1.1284, 0.31, 1.1625, 0.5, 1.2133,
0.36, 1.2381, 0.32, 1.2795, 7.2, 1.3126, 1.64, 1.37, 1.13, 1.423,
2.05, 3.5963, 0.71, 3.6776, 0.45), .Dim = c(2L, 20L), .Dimnames = list(
c("rel.ret", "rel.area"), c("3", "4", "5", "6", "7", "8",
"9", "10", "11", "12", "13", "14", "15", "16", "17", "18",
"19", "20", "21", "22")))), .Names = c("chrom_a", "chrom_b",
"chrom_c", "chrom_d", "chrom_e", "chrom_f", "chrom_g", "chrom_h",
"chrom_i", "chrom_j", "chrom_k", "chrom_l", "chrom_m", "chrom_n",
"chrom_o", "chrom_p", "chrom_q", "chrom_r", "chrom_s", "chrom_t",
"chrom_u", "chrom_v"))

最佳答案

我采用的方式与您最初采用的方式略有不同,但最终目标相同。我的假设是,您有一个色谱图列表,其中的峰具有保留时间和面积,并且您希望最终得到一种比较不同色谱图中的峰的方法。

首先转换为长data.table:

chroms_long <- rbindlist(lapply(chroms, function(x) {
as.data.table(t(x))
}))
chroms_long[, chrom := rep(names(chroms), lengths(chroms) / 2)]

然后使用层次聚类进行分组。此调用中的截止值 h 需要针对基础数据进行更改,但对于此数据集,0.015 似乎是正确的。

setkey(chroms_long, rel.ret, chrom)
chroms_long[, group := cutree(hclust(dist(rel.ret)), h = 0.015)]

如果您现在想为每个可能的色谱图/组创建一行:

setkey(chroms_long, chrom, group)
chroms_long <- chroms_long[CJ(unique(chrom), unique(group))]

如果您还想知道每组的平均保留时间:

chroms_long[, mean_rel.ret := mean(rel.ret, na.rm = TRUE), by = group]

如果你想要宽格式的输出:

dcast.data.table(chroms_long, chrom ~ group, value.var = "rel.area")

如果您希望将平均保留作为列标题而不是组数:

dcast.data.table(chroms_long[, .(chrom, col = sprintf("%0.2f", mean_rel.ret), rel.area)],
chrom ~ col, value.var = "rel.area")

如果你想恢复原来的格式:

sapply(unique(chroms_long$chrom),
function(x) t(data.matrix(chroms_long[x, .(rel.ret, rel.area)])),
simplify = FALSE)

请注意,这使用 sapply 来实现其 USE.NAMES 功能,而不是为了简化列表的输出。

关于r - 比较列表中的元素,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/31369802/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com