gpt4 book ai didi

r - 对来自不同数据的元素进行分组

转载 作者:行者123 更新时间:2023-12-04 13:36:45 25 4
gpt4 key购买 nike

在我的工作中,我试图找出哪些基因通常会聚集在一起。所以我设置了一些实验,现在尝试分析数据。我已经写了一个很好的脚本来分析它,但仍然不够。

这次我想做的是分析几个表并确定哪些基因通常在一起 - 在同一个集群中。

这是我的数据:

第一张表:

    > dput(tbl_col_clu1[1:20,])
structure(list(`10` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), `20` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `52.5` = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `81` = c(0, 0, 0, 0,
0, 0, 0.64209043, 0, 0, 0, 0, 0, 0, 0, 0.636411741, 0.183490041,
0, 0, 0, 0), `110` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), `140.5` = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `189` = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0.84958569, 0, 0, 0, 0, 0), `222.5` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0.37119221, 0, 0, 0, 1, 0, 0, 0, 0,
0), `278` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0), `340` = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), `397` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `453.5` = c(0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `529` = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `580` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `630.5` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `683.5` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `735.5` = c(0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `784` = c(0,
0, 0, 0, 0, 0, 0, 0.399952462, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0.959211661, 1), `832` = c(0, 0.1266780707, 0, 0, 0, 0, 0, 0.2132893016,
1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0.959211661, 1), `882.5` = c(0,
0.12667807, 0, 0, 0, 1, 0, 0.08480435, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 1, 0.70163097), `926.5` = c(0, 1, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0), `973` = c(0, 0.12621196, 0,
0, 0, 0, 0, 0.11813646, 0, 0, 0, 1, 0, 0, 0.59389934, 1, 0, 0,
0, 0), `1108` = c(0, 0.092444384, 0, 0, 0, 0, 0, 0.115758222,
0, 0, 0, 0.925835779, 0, 0, 1, 0.303482426, 0.848464317, 0, 0,
0), `1200` = c(0, 0.120055749, 0, 1, 0, 0, 0, 0.150055416, 0,
0, 0, 0.558015841, 0, 0, 0.796949668, 0.276321753, 1, 0, 0, 0
), Clusters = structure(c(1L, 64L, 45L, 102L, 11L, 77L, 170L,
55L, 59L, 316L, 316L, 98L, 90L, 77L, 232L, 178L, 101L, 50L, 51L,
51L), .Label = c("10", "10,13,15", "10,15", "10,15,16", "10,20,21,22,23,24",
"10,22,23,24", "11", "11,12,13,14,15", "11,12,13,14,15,16", "11,12,13,14,15,16,17",
"12", "12,13", "12,13,14", "12,13,14,15", "12,13,14,15,16", "12,13,14,15,16,17",
"12,13,14,15,16,17,18,19,20,21,22,23,24", "12,13,15", "12,13,17",
"13", "13,14", "13,14,15", "13,14,15,16", "13,14,15,16,17", "13,15",
"13,15,16,17", "14", "14,15", "14,15,16", "14,15,16,17", "14,15,16,17,18,19,20,21,22,23,24",
"14,19", "15", "15,16", "15,16,17", "15,16,17,18,19,20,21,22,23,24",
"15,16,17,19,20,21,22,23,24", "15,17", "15,17,24", "15,22,23,24",
"15,23", "15,24", "16", "16,17", "17", "17,18,19,20", "17,18,19,20,21,22,23,24",
"17,21,22,23,24", "18", "18,19", "18,19,20", "18,19,20,21", "18,19,20,21,22",
"18,19,20,21,22,23", "18,19,20,21,22,23,24", "18,19,21", "18,19,22,23",
"18,20", "19", "19,20", "19,20,21", "19,20,21,22", "19,20,21,22,23",
"19,20,21,22,23,24", "19,20,22", "19,20,22,23", "19,20,22,23,24",
"19,20,23", "19,21", "19,22", "19,23", "19,24", "2", "2,18,19,20",
"2,19,20", "2,3,4", "20", "20,21", "20,21,22", "20,21,22,23",
"20,21,22,23,24", "20,21,23", "20,22", "20,22,23", "20,22,23,24",
"20,22,24", "20,23", "20,23,24", "20,24", "21", "21,22", "21,22,23",
"21,22,23,24", "21,23,24", "21,24", "22", "22,23", "22,23,24",
"22,24", "23", "23,24", "24", "3", "3,10", "3,18,19,20", "3,18,19,20,21,22,23,24",
"3,19,20", "3,19,20,21", "3,19,20,22,23,24", "3,20,21,22,23,24",
"3,20,22,23,24", "3,21,23,24", "3,22,23,24", "3,22,24", "3,23",
"3,23,24", "3,24", "3,4", "3,4,10", "3,4,18,19", "3,4,18,19,20",
"3,4,18,19,20,21,22,23", "3,4,18,19,20,21,22,23,24", "3,4,19,20,21",
"3,4,21", "3,4,21,22,23", "3,4,21,22,23,24", "3,4,22,23", "3,4,22,23,24",
"3,4,22,24", "3,4,23,24", "3,4,24", "3,4,5", "3,4,5,10", "3,4,5,10,23,24",
"3,4,5,20", "3,4,5,22,23,24", "3,4,5,23,24", "3,4,5,24", "3,4,5,6",
"3,4,5,6,10", "3,4,5,6,20,22,23,24", "3,4,5,6,7", "3,4,5,6,7,10",
"3,4,5,6,7,24", "3,4,5,6,7,8", "3,4,5,6,7,8,10", "3,4,5,6,7,8,10,13",
"3,4,5,6,7,8,10,22,23,24", "3,4,5,6,7,8,12", "3,4,5,6,7,8,15",
"3,4,5,6,7,8,18,19,20,21,22,23,24", "3,4,5,6,7,8,22,23,24", "3,4,5,6,7,8,9,10",
"3,4,5,6,7,8,9,10,11,12", "3,4,5,6,7,8,9,10,11,12,13,14,15",
"3,4,5,6,7,8,9,10,11,12,13,14,15,16,17", "3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24",
"3,4,5,6,7,8,9,10,11,14,15", "3,4,5,6,7,8,9,10,19,20,21,22,23,24",
"3,4,5,6,7,8,9,10,22,23,24", "3,4,6", "3,4,6,7,20,21,22,23,24",
"3,4,7", "3,4,7,8", "3,5,6,7,8", "3,5,8", "3,7", "3,7,19,20,22,23",
"4", "4,10", "4,10,24", "4,18,19,20", "4,19,20", "4,20,21,22",
"4,20,21,22,23,24", "4,20,22,23,24", "4,22,23,24", "4,23,24",
"4,24", "4,5", "4,5,10", "4,5,10,21", "4,5,10,23,24", "4,5,19,20,21,22,23",
"4,5,19,20,22,23,24", "4,5,20,21,22,23,24", "4,5,20,22,23,24",
"4,5,22,23,24", "4,5,24", "4,5,6", "4,5,6,10", "4,5,6,10,20,22,23,24",
"4,5,6,19", "4,5,6,22,23,24", "4,5,6,7", "4,5,6,7,10", "4,5,6,7,19,20,21,22,23,24",
"4,5,6,7,22,23,24", "4,5,6,7,8", "4,5,6,7,8,10", "4,5,6,7,8,10,19,20,21,22,23,24",
"4,5,6,7,8,10,20,21,22,23,24", "4,5,6,7,8,10,21,22,23,24", "4,5,6,7,8,10,22,23,24",
"4,5,6,7,8,10,23,24", "4,5,6,7,8,15", "4,5,6,7,8,17,18,19,20,21,22,23,24",
"4,5,6,7,8,19,20", "4,5,6,7,8,19,20,21,22,23,24", "4,5,6,7,8,20,21,22,23,24",
"4,5,6,7,8,21,22,23,24", "4,5,6,7,8,22,23,24", "4,5,6,7,8,9,10",
"4,5,6,7,8,9,10,11,12", "4,5,6,7,8,9,10,11,12,13,14,15", "4,5,6,7,8,9,10,11,12,13,14,15,16,17",
"4,5,6,7,8,9,10,11,12,13,14,15,16,17,18", "4,5,6,7,8,9,10,12,13",
"4,5,6,7,8,9,14,15,16", "4,5,7,9", "4,5,8,22", "4,6", "4,6,7,22,23,24",
"4,6,7,23,24", "4,6,7,8,15,17", "4,6,7,8,23,24", "4,7", "4,7,20,21",
"4,7,21,22,23,24", "4,7,8", "4,7,8,22,23,24", "5", "5,10", "5,17",
"5,18,19,20,21,22,23", "5,19,20,21,22,23,24", "5,20", "5,22,23,24",
"5,24", "5,6", "5,6,10", "5,6,7", "5,6,7,10", "5,6,7,10,19",
"5,6,7,22,23,24", "5,6,7,8", "5,6,7,8,10", "5,6,7,8,10,15", "5,6,7,8,10,22,23,24",
"5,6,7,8,15", "5,6,7,8,18,19,20,21,22,23,24", "5,6,7,8,21,22,23,24",
"5,6,7,8,22,23,24", "5,6,7,8,9", "5,6,7,8,9,10", "5,6,7,8,9,10,11,12,13",
"5,6,7,8,9,10,11,12,13,14,15", "5,6,7,8,9,12", "5,6,7,8,9,13",
"5,7", "5,7,8", "5,8", "6", "6,10", "6,21,22,23", "6,22", "6,22,23,24",
"6,7", "6,7,10,17", "6,7,22,23,24", "6,7,23,24", "6,7,24", "6,7,8",
"6,7,8,10", "6,7,8,13,14,15,16,17", "6,7,8,15", "6,7,8,19,20",
"6,7,8,20,21,22,23,24", "6,7,8,21,22,23,24", "6,7,8,23,24", "6,7,8,9",
"6,7,8,9,10", "6,7,8,9,10,11,12", "6,7,8,9,10,11,12,13,14,15,16,17",
"6,7,8,9,10,15,16", "6,7,8,9,10,18,19,20,21,22,23,24", "6,7,8,9,15",
"6,8", "7", "7,15", "7,15,17", "7,16,18,21", "7,17", "7,19,20",
"7,19,20,21,22", "7,20,21,22,23,24", "7,20,22,23,24", "7,22,23,24",
"7,24", "7,8", "7,8,10", "7,8,10,22,23,24", "7,8,13,15", "7,8,14",
"7,8,15", "7,8,15,16", "7,8,15,23", "7,8,20", "7,8,22", "7,8,23",
"7,8,9", "7,8,9,10", "7,8,9,13", "7,8,9,15,16,17", "8", "8,10",
"8,15", "8,17", "8,22", "8,24", "8,9", "8,9,10", "9", "9,10,11,12,13,14,15,16,17"
), class = "factor")), .Names = c("10", "20", "52.5", "81", "110",
"140.5", "189", "222.5", "278", "340", "397", "453.5", "529",
"580", "630.5", "683.5", "735.5", "784", "832", "882.5", "926.5",
"973", "1108", "1200", "Clusters"), row.names = c("at1g01050.1",
"at1g01080.1", "at1g01090.1", "at1g01220.1", "at1g01320.2", "at1g01420.1",
"at1g01710.1", "at1g01800.1", "at1g01920.2", "at1g01940.1", "at1g01960.1",
"at1g02020.2", "at1g02100.2", "at1g02140.1", "at1g02150.1", "at1g02500.2",
"at1g02560.1", "at1g02880.3", "at1g02920.1", "at1g02930.2"), class = "data.frame")

第二个表:
> dput(tbl_col_clu2[1:20,])
structure(list(`10` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), `20` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `52.5` = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `81` = c(0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `110` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `140.5` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `189` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `222.5` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `278` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0), `340` = c(0,
0, 0, 0, 0, 0, 0.583163048, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0,
1, 0.218194067), `397` = c(0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,
0, 0.63953839, 0, 1, 0, 0, 0, 1), `453.5` = c(0, 0.66069369,
0, 0, 0, 1, 0.57541627, 1, 1, 0, 0, 0, 1, 0.64615661, 0, 0.45209671,
0, 0, 0, 0.17022498), `529` = c(0, 0.521435654, 0, 0, 1, 0, 0.175996209,
0, 0, 0, 1, 0, 0, 0, 0, 0.886059888, 0, 0, 0, 0.17022498), `580` = c(0,
0.437291195, 0, 0, 1, 0, 0.20731698, 0, 0, 0, 1, 0, 0, 0, 0,
0.719755907, 0, 0, 0, 0.033248127), `630.5` = c(0, 0.52204783,
0, 0, 0, 0, 0.48815538, 0, 0, 0, 0, 1, 0, 0, 0, 0.82709638, 0,
0, 0, 0.09539534), `683.5` = c(0, 0.52429838, 0, 0, 0, 0, 0.59605685,
0, 0, 0, 0, 0, 0, 0, 0, 0.27845748, 0.28224351, 0, 0, 0), `735.5` = c(1,
0.3768651, 0, 1, 0, 0, 0.51381348, 0, 0, 0, 0, 0, 0, 0, 0, 0.39914361,
0.22206677, 0, 0, 0), `784` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0), `832` = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0.16189002, 0, 0, 0), `882.5` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `926.5` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0), `973` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.86100786, 0, 0, 0, 0,
0), `1108` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0), `1200` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), Clusters = structure(c(168L, 32L, 246L,
168L, 81L, 44L, 8L, 44L, 27L, 318L, 81L, 132L, 15L, 3L, 219L,
32L, 156L, 318L, 1L, 6L), .Label = c("10", "10,11", "10,11,12",
"10,11,12,13", "10,11,12,13,14", "10,11,12,13,14,15", "10,11,12,13,14,15,16",
"10,11,12,13,14,15,16,17", "10,11,12,13,14,15,16,17,18,19", "10,11,12,13,14,15,16,17,18,19,20",
"10,11,12,13,14,15,16,17,18,19,20,21", "10,11,12,13,14,16", "10,11,12,13,15,16,17,18,19,20,21",
"10,11,12,13,19", "10,12", "10,12,13", "10,12,13,14", "10,12,13,14,15",
"10,12,13,14,15,16,17", "10,12,13,15", "10,12,21", "10,13", "10,13,14",
"10,17,18", "10,20", "11", "11,12", "11,12,13", "11,12,13,14",
"11,12,13,14,15", "11,12,13,14,15,16", "11,12,13,14,15,16,17",
"11,12,13,14,15,16,17,18,19", "11,12,13,14,15,16,17,18,19,20",
"11,12,13,14,15,16,17,18,19,20,21,22,23", "11,12,13,14,15,16,17,18,19,20,21,22,23,24",
"11,12,13,14,15,16,17,18,19,21,22", "11,12,13,14,15,16,18", "11,12,13,17,18,19",
"11,12,14", "11,13", "11,13,14,15,16", "11,15", "12", "12,13",
"12,13,14", "12,13,14,15", "12,13,14,15,16", "12,13,14,15,16,17",
"12,13,14,15,16,17,18", "12,13,14,15,16,17,18,19", "12,13,14,15,16,17,18,19,20",
"12,13,14,15,16,17,18,19,20,21", "12,13,14,15,16,17,18,19,20,21,22",
"12,13,14,15,16,17,18,19,20,21,22,23", "12,13,14,15,16,17,18,19,20,21,22,23,24",
"12,13,14,15,16,17,18,19,23,24", "12,13,14,15,16,17,19", "12,13,14,15,16,17,19,20,21",
"12,13,14,15,16,17,21", "12,13,14,15,16,18", "12,13,14,15,17",
"12,13,14,16,17,19", "12,13,14,18", "12,13,15", "12,13,16", "12,13,16,17,18,19",
"12,13,16,19", "12,13,17", "12,13,21,22,23", "12,14", "12,14,15",
"12,14,15,16", "12,14,15,17,19", "12,15", "12,15,16,17", "12,16,17",
"12,20", "12,21,23", "13", "13,14", "13,14,15", "13,14,15,16",
"13,14,15,16,17", "13,14,15,16,17,18", "13,14,15,16,17,18,19",
"13,14,15,16,17,18,19,20", "13,14,15,16,17,18,19,20,21", "13,14,15,16,17,18,19,20,21,22",
"13,14,15,16,17,18,19,20,21,22,23", "13,14,15,16,17,18,19,20,21,22,23,24",
"13,14,15,16,17,18,19,21", "13,14,15,16,17,18,19,21,22,23", "13,14,15,16,17,19",
"13,14,15,16,17,21", "13,14,15,16,18,23", "13,14,17", "13,14,19,20,21,22,23",
"13,14,23,24", "13,15", "13,15,16", "13,15,16,18,19", "13,15,17",
"13,16,17", "13,17", "13,17,19", "13,19", "13,21", "14", "14,15",
"14,15,16", "14,15,16,17", "14,15,16,17,18", "14,15,16,17,18,19",
"14,15,16,17,18,19,20", "14,15,16,17,18,19,20,21", "14,15,16,17,18,19,20,21,22",
"14,15,16,17,18,19,20,21,22,23", "14,15,16,17,18,19,20,21,22,23,24",
"14,15,16,17,18,19,20,22,23,24", "14,15,16,17,19", "14,15,16,17,19,20",
"14,15,16,17,19,20,21", "14,15,16,17,22", "14,15,16,19", "14,15,17",
"14,15,19", "14,17", "14,17,18,19", "14,19", "14,21", "15", "15,16",
"15,16,17", "15,16,17,18", "15,16,17,18,19", "15,16,17,18,19,20",
"15,16,17,18,19,20,21", "15,16,17,18,19,20,21,22,23", "15,16,17,18,19,20,21,22,23,24",
"15,16,17,19", "15,16,17,19,20,21", "15,16,17,19,24", "15,16,17,20,21",
"15,16,17,21", "15,16,17,23", "15,16,18,19", "15,16,19,20", "15,17",
"15,18,19,20", "15,18,19,20,21", "15,19", "16", "16,17", "16,17,18",
"16,17,18,19", "16,17,18,19,20", "16,17,18,19,20,21", "16,17,18,19,20,21,22",
"16,17,18,19,20,21,22,23", "16,17,18,19,20,21,22,23,24", "16,17,19",
"16,17,19,20", "16,17,19,20,21", "16,17,19,21", "16,17,23", "16,19",
"17", "17,18", "17,18,19", "17,18,19,20", "17,18,19,20,21", "17,18,19,20,21,22",
"17,18,19,20,21,22,23", "17,18,19,20,21,22,23,24", "17,18,19,21",
"17,19", "17,19,20", "17,19,20,21", "17,19,20,21,22,23,24", "17,19,23",
"17,20,21", "17,20,21,23", "17,21,22", "17,23", "17,24", "18",
"18,19", "18,19,20", "18,19,20,21", "18,19,20,21,22", "18,19,20,21,22,23",
"18,19,20,21,22,23,24", "18,19,20,21,23", "18,20", "19", "19,20",
"19,20,21", "19,20,21,22", "19,20,21,22,23", "19,20,21,22,23,24",
"19,20,21,23,24", "19,20,22", "19,21", "19,22", "19,23", "2",
"2,17", "2,3,4,5,6", "2,3,4,5,6,7", "20", "20,21", "20,21,22",
"20,21,22,23", "20,21,22,23,24", "20,21,23", "20,21,23,24", "21",
"21,22", "21,22,23", "21,22,23,24", "21,23", "22", "22,23", "22,23,24",
"23", "23,24", "24", "3", "3,23,24", "3,4", "3,4,23,24", "3,4,5",
"3,4,5,6", "3,4,5,6,13,14,15,16,17,18,19,20,21,22,23,24", "3,4,5,6,7",
"3,4,5,6,7,8,9", "3,4,5,6,7,8,9,10,11,12,13,14,15,16,17", "3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24",
"3,4,5,6,7,8,9,20,21,22,23,24", "3,4,5,6,7,8,9,21,22,23,24",
"3,4,5,6,8,9", "3,4,5,7,8,9,15,16,17,18,19,20,21,22,23", "3,4,6,12,13,14,15,16,17,18,19,20,21,22,23,24",
"3,8,9,10,11,12,13,14,15,16,17,18,19,20", "4", "4,17,18,19,20,21,22,23,24",
"4,19,20,21,22,23,24", "4,21", "4,22,23,24", "4,5,17,18,19,20,21,22,23,24",
"4,5,21,22,23,24", "4,5,6", "4,5,6,22,23,24", "4,5,6,7,8,9",
"4,5,6,7,8,9,10", "4,5,6,7,8,9,10,15,16,17,18,19,20,21,22,23,24",
"4,5,6,7,8,9,12,13,14,15,16,17,18,19,20,21,22,23,24", "4,5,6,7,8,9,13",
"4,5,6,7,8,9,14,15,16,17,18,19,20,21,22,23,24", "4,5,6,7,8,9,17,18,19,20,21,22,23,24",
"4,5,6,7,8,9,19,20,21,22,23,24", "4,5,6,7,8,9,19,23,24", "4,5,6,7,8,9,23,24",
"4,5,7,8,9", "4,8,9,12,13,14,15,16,17,18,19,20,21,22,23,24",
"4,8,9,23,24", "5", "5,22,23", "5,6", "5,6,15,16,17,18,19,20,21,22,23,24",
"5,6,19,20,21,22,23,24", "5,6,24", "5,6,7", "5,6,7,8", "5,6,7,8,19,20,21,22,23,24",
"5,6,7,8,9", "5,6,7,8,9,10,11,12,13", "5,6,7,8,9,10,11,12,13,14,15,16,17",
"5,6,7,8,9,15,23,24", "5,6,9", "5,7", "5,8,9", "6", "6,15,16,17,18,19,20,21,22,23,24",
"6,19,20,21,22,23,24", "6,20,21,22,23,24", "6,21,22,23,24", "6,7",
"6,7,8", "6,7,8,9", "6,7,8,9,15,16,17,18,19,20,21,22,23,24",
"6,7,8,9,23,24", "6,7,9", "6,8,15,16,17,18,19,20,21,22,23", "6,8,9",
"6,9", "7", "7,14,24", "7,8,9", "7,8,9,10,11,12,13,14,15", "7,8,9,20,21,22,23,24",
"7,8,9,23,24", "7,9", "7,9,10", "8", "8,19,20,21", "8,19,20,21,22,23,24",
"8,9", "8,9,10,11,12,13,14,15,16,17", "8,9,10,17,18,19,20,21,22",
"8,9,12,13,14,15,16,17,18,19", "8,9,14,15,16,17,18,19,20,21,22,23,24",
"8,9,15,16,17,18,19,20,21,22", "8,9,19", "8,9,19,20,21,22,23",
"8,9,21,22", "9", "9,10", "9,10,11,12,13,14", "9,10,11,12,13,14,15,16",
"9,10,11,12,13,14,15,16,17", "9,10,11,12,13,14,15,16,17,18,19",
"9,10,11,12,13,14,15,16,17,18,19,20,21", "9,10,11,12,13,14,15,16,17,18,19,20,21,22,23",
"9,10,11,12,13,14,15,16,17,19", "9,12", "9,12,13", "9,12,13,14",
"9,13", "9,13,14,15", "9,13,14,15,16,17", "9,13,14,15,18", "9,14",
"9,14,15,16", "9,15", "9,15,16,17", "9,16", "9,16,17,18,19,21,22",
"9,16,17,19", "9,17", "9,17,18", "9,19", "9,19,20", "9,19,20,21",
"9,19,21", "9,20", "9,20,21", "9,20,21,22", "9,21", "9,22", "9,23"
), class = "factor")), .Names = c("10", "20", "52.5", "81", "110",
"140.5", "189", "222.5", "278", "340", "397", "453.5", "529",
"580", "630.5", "683.5", "735.5", "784", "832", "882.5", "926.5",
"973", "1108", "1200", "Clusters"), row.names = c("at1g01050.1",
"at1g01080.1", "at1g01090.1", "at1g01220.1", "at1g01420.1", "at1g01470.1",
"at1g01800.1", "at1g01910.5", "at1g01920.2", "at1g01980.1", "at1g02020.2",
"at1g02100.2", "at1g02130.1", "at1g02140.1", "at1g02150.1", "at1g02500.2",
"at1g02560.1", "at1g02780.1", "at1g02880.3", "at1g02920.1"), class = "data.frame")

第三表:
> dput(tbl_col_clu3[1:20,])
structure(list(`10` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), `33.95` = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `58.66` = c(0, 0, 0, 0, 0.328143363,
0.552139556, 0.495919686, 0, 0, 0, 0, 0, 0, 0, 0, 0.416266322,
0.886125103, 1, 1, 0), `84.42` = c(0, 0, 0, 0, 1, 1, 0, 0, 0,
0, 0, 0.327004551, 0, 0, 0, 0.956778355, 1, 0.175277617, 0.240402438,
0), `110.21` = c(0, 0, 0, 0, 0, 0.151581882, 0, 0, 0, 0, 0, 1,
0, 0, 1, 0, 0.091367379, 0.029316359, 0, 0), `134.16` = c(0.190968551,
0, 0, 0, 0, 0.164736594, 0, 0, 0, 0, 0, 0.650199285, 0, 0, 0,
0, 0.097800974, 0.007393484, 0, 0), `164.69` = c(0.5342874459,
0, 0.3619993464, 0, 0, 0.1891527151, 0, 0, 0, 0, 0, 0.4926963182,
0, 0, 0, 0, 0, 0, 0, 0), `199.1` = c(0.866134859, 0, 0.405387979,
0, 0, 0.274468991, 0, 0, 0, 0, 0, 0.352737127, 0.170514318, 0,
0, 0, 0, 0, 0, 0), `234.35` = c(1, 0, 0.446118481, 0, 0, 0.338427523,
0, 0, 0, 0, 0, 0.204601923, 0.343919727, 0, 0, 0, 0, 0, 0, 0),
`257.19` = c(0.732231652, 0, 0.666653103, 0, 0, 0.403078017,
0, 0, 0, 0, 0, 0.315665123, 1, 0, 0, 0, 0, 0, 0, 0), `361.84` = c(0.660960044,
0, 1, 0, 0, 0.202578329, 0, 0, 0, 0, 0, 0.320183046, 0.424361453,
0, 0, 0, 0, 0, 0, 0), `432.74` = c(0.47961801, 0, 0.48323321,
0, 0, 0.25926071, 0, 0, 0, 0, 0, 0.36362413, 0.43039587,
0, 0, 0, 0, 0, 0, 0), `506.34` = c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0.22943212, 0.19354376, 0, 0, 0, 0, 0, 0, 0), `581.46` = c(0,
0.52783556, 0, 1, 0, 0, 0, 0.64407392, 0, 0.70701938, 0,
0.2596209, 0.29757967, 0, 0, 0, 0, 0, 0, 0), `651.71` = c(0,
0.32678969, 0, 0.36428195, 0, 0, 0, 0.64951761, 0, 0.80866933,
1, 0.18614028, 0.21567888, 0.32813633, 0, 0, 0, 0, 0, 0),
`732.59` = c(0, 0.229023369, 0, 0.312832425, 0, 0, 0, 0.696041374,
0, 0.590471454, 0, 0.108699479, 0.187935709, 0.275177957,
0, 0, 0, 0, 0, 0.243080694), `817.56` = c(0, 0.25668583,
0, 0.4003249, 0, 0, 0, 0.53376606, 0, 0.85524485, 0, 0.22539659,
0.27977127, 0.55089774, 0, 0, 0, 0, 0, 1), `896.24` = c(0,
0.31675535, 0, 0.50882005, 0, 0, 0, 0.74705458, 0.12936306,
1, 0, 0.1949139, 0.21957859, 0.75063327, 0, 0, 0, 0, 0, 0.63346358
), `971.77` = c(0, 0.27811949, 0, 0.48419038, 0, 0, 0, 0.8563439,
0.39897143, 0.84491933, 0, 0.13935282, 0.17670128, 0.84111004,
0, 0, 0, 0, 0, 0), `1038.91` = c(0, 1, 0, 0.52506752, 0,
0, 0, 1, 1, 0.85617714, 0, 0.13507463, 0, 1, 0, 0, 0, 0,
0, 0), Clusters = structure(c(222L, 88L, 237L, 88L, 145L,
155L, 143L, 88L, 122L, 88L, 97L, 180L, 260L, 102L, 186L,
145L, 149L, 149L, 145L, 106L), .Label = c("10", "10,11",
"10,11,12", "10,11,12,13", "10,11,12,13,14", "10,11,12,13,14,15",
"10,11,12,13,14,15,16", "10,11,12,13,14,15,16,17,18", "10,11,12,13,14,15,16,17,18,19",
"10,11,12,13,14,15,16,17,18,19,20", "10,11,12,14", "10,11,12,14,15",
"10,11,12,14,15,16", "10,11,12,14,15,16,17,18", "10,11,12,14,15,16,17,18,19",
"10,11,12,14,15,16,17,18,19,20", "10,11,12,14,15,17,18,19",
"10,11,12,15,16,17", "10,11,14", "10,11,15", "10,11,15,16,17",
"10,11,16", "10,11,17", "10,11,20", "10,12", "10,14,15,16",
"10,14,15,16,17,18,19", "10,15", "10,15,16", "10,15,16,18",
"10,16,19", "10,18,19,20", "10,19", "10,19,20", "10,20",
"11", "11,12", "11,12,13", "11,12,13,14", "11,12,13,14,15",
"11,12,13,14,15,16", "11,12,13,14,15,16,17,18", "11,12,13,14,15,16,17,18,19",
"11,12,13,14,15,16,17,18,19,20", "11,12,13,14,15,16,18,19",
"11,12,14,15", "11,12,14,15,16,17", "11,12,14,15,16,17,18",
"11,12,14,15,16,17,18,19", "11,12,14,15,16,17,18,19,20",
"11,12,18", "11,12,19", "11,12,20", "12", "12,13", "12,13,14",
"12,13,14,15", "12,13,14,15,16", "12,13,14,15,16,17,18",
"12,13,14,15,16,17,18,19,20", "12,14", "12,14,15", "12,14,15,16",
"12,14,15,16,17", "12,14,15,16,17,18", "12,14,15,16,17,18,19",
"12,14,15,16,17,18,19,20", "12,14,15,16,20", "12,14,15,18,19,20",
"12,15", "12,16", "12,16,17,18", "12,18,19,20", "12,19,20",
"12,20", "13", "13,14", "13,14,15", "13,14,15,16,17,18,19,20",
"13,16", "13,20", "14", "14,15", "14,15,16", "14,15,16,17",
"14,15,16,17,18", "14,15,16,17,18,19", "14,15,16,17,18,19,20",
"14,15,16,18", "14,15,17", "14,15,18", "14,16", "14,16,17",
"14,16,17,18,19,20", "14,18,19,20", "14,19", "15", "15,16",
"15,16,17", "15,16,17,18", "15,16,17,18,19", "15,16,17,18,19,20",
"15,20", "16", "16,17", "16,17,18", "16,17,18,19", "16,17,18,19,20",
"16,17,18,20", "16,17,19", "16,18,19,20", "16,19,20", "17",
"17,18", "17,18,19", "17,18,19,20", "17,18,20", "17,19,20",
"17,20", "18", "18,19", "18,19,20", "19", "19,20", "2", "2,19,20",
"2,3", "2,3,4", "2,3,4,5", "2,3,4,5,11", "2,3,4,5,6", "2,3,4,5,6,7,8",
"2,3,4,5,6,7,8,11,12", "2,3,4,5,6,7,8,9", "2,3,4,5,6,7,8,9,10",
"2,3,4,5,6,7,8,9,10,11", "2,3,4,5,6,7,8,9,10,11,12", "2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20",
"2,4", "2,5", "2,5,6,7", "20", "3", "3,18", "3,4", "3,4,10",
"3,4,20", "3,4,5", "3,4,5,6", "3,4,5,6,7", "3,4,5,6,7,8",
"3,4,5,6,7,8,9", "3,4,5,6,7,8,9,10", "3,4,5,6,7,8,9,10,11",
"3,4,5,6,7,8,9,10,11,12", "3,4,5,6,7,8,9,10,11,12,13,14,15,16,17",
"3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18", "3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20",
"3,4,8", "3,4,8,9", "3,5", "3,7", "3,9", "4", "4,5", "4,5,12,13",
"4,5,16", "4,5,6", "4,5,6,16,17,18,19,20", "4,5,6,20", "4,5,6,7",
"4,5,6,7,8", "4,5,6,7,8,10,11", "4,5,6,7,8,9", "4,5,6,7,8,9,10",
"4,5,6,7,8,9,10,11", "4,5,6,7,8,9,10,11,12", "4,5,6,7,8,9,10,11,12,13,14,15",
"4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19", "4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20",
"4,5,6,7,8,9,10,11,12,14,15,16,17,18,19,20", "4,5,6,7,8,9,16,17",
"4,5,7,8,9,10,11,12,13,14,15,16,17,18,19,20", "4,6,7", "4,7,13",
"5", "5,11,12,14,15,16,17,18,19", "5,14", "5,14,15,16", "5,16,19",
"5,17,18,19,20", "5,18", "5,6", "5,6,7", "5,6,7,10", "5,6,7,8",
"5,6,7,8,10", "5,6,7,8,9", "5,6,7,8,9,10", "5,6,7,8,9,10,11",
"5,6,7,8,9,10,11,12", "5,6,7,8,9,10,11,12,13", "5,6,7,8,9,10,11,12,13,14",
"5,6,7,8,9,10,11,12,13,14,15,16", "5,6,7,8,9,10,11,12,13,14,15,16,17,18",
"5,6,7,8,9,10,11,12,13,14,15,16,17,18,19", "5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20",
"5,6,7,8,9,16,17,18,19,20", "5,6,8", "5,7,8,9,10", "5,7,8,9,10,14,15,16,17,18",
"5,8", "6", "6,7", "6,7,16", "6,7,8", "6,7,8,10,11,12,15,16,17,18",
"6,7,8,19", "6,7,8,9", "6,7,8,9,10", "6,7,8,9,10,11", "6,7,8,9,10,11,12",
"6,7,8,9,10,11,12,13,14", "6,7,8,9,10,11,12,13,14,15,16,17",
"6,7,8,9,10,11,12,13,14,15,16,17,18,19", "6,7,8,9,10,11,12,13,14,15,16,17,18,19,20",
"6,7,8,9,10,11,12,14,15,16", "6,7,8,9,10,18,19", "7", "7,10,11,14,15",
"7,12", "7,8", "7,8,12", "7,8,9", "7,8,9,10", "7,8,9,10,11",
"7,8,9,10,11,12", "7,8,9,10,11,12,13", "7,8,9,10,11,12,13,14,15,16",
"7,8,9,10,11,12,13,14,15,16,17,18", "7,8,9,10,11,12,13,14,15,16,17,18,19",
"7,8,9,10,11,12,13,14,15,16,17,18,19,20", "7,8,9,10,11,12,14,15,16,17,18,19",
"7,8,9,10,11,12,14,15,16,17,18,19,20", "7,8,9,10,12,15,16,17,18",
"7,9,10,11,12,13,14,15,16,17,18,19,20", "8", "8,10", "8,10,20",
"8,14,15,16,17,18,19,20", "8,16,17", "8,9", "8,9,10", "8,9,10,11",
"8,9,10,11,12", "8,9,10,11,12,13,14", "8,9,10,11,12,13,14,15",
"8,9,10,11,12,13,14,15,16", "8,9,10,11,12,13,14,15,16,17,18",
"8,9,10,11,12,13,14,15,16,17,18,19", "8,9,10,11,12,13,14,15,16,17,18,19,20",
"8,9,10,11,12,14,15,16", "8,9,10,11,12,14,15,16,17,18,19,20",
"8,9,10,14,15,16,17,18,19,20", "8,9,17", "9", "9,10", "9,10,11",
"9,10,11,12", "9,10,11,12,13,14,15,16,17", "9,10,11,12,13,14,15,16,17,18",
"9,10,11,12,13,14,15,16,17,18,19", "9,10,11,12,13,14,15,16,17,18,19,20",
"9,10,11,12,14,15,16", "9,10,11,12,14,15,16,17,18", "9,10,11,12,14,15,16,17,18,19",
"9,10,11,12,14,15,16,17,18,19,20", "9,10,11,12,16,17,18,19,20",
"9,10,11,14,15,16,17", "9,10,12,14,15,16,17", "9,10,14,15",
"9,11,12", "9,11,12,14", "9,12,14", "9,20"), class = "factor")), .Names = c("10",
"33.95", "58.66", "84.42", "110.21", "134.16", "164.69", "199.1",
"234.35", "257.19", "361.84", "432.74", "506.34", "581.46", "651.71",
"732.59", "817.56", "896.24", "971.77", "1038.91", "Clusters"
), row.names = c("at1g01050.1", "at1g01080.1", "at1g01090.1",
"at1g01320.2", "at1g01470.1", "at1g01800.1", "at1g01910.5", "at1g01960.1",
"at1g01980.1", "at1g02150.1", "at1g02470.1", "at1g02500.2", "at1g02560.1",
"at1g02780.1", "at1g02816.1", "at1g02880.2", "at1g02920.1", "at1g02930.2",
"at1g03030.1", "at1g03090.2"), class = "data.frame")

最后一列( Clusters )对我们和 row.names 很重要。此列说明我们可以在哪一列中找到该基因的丰度。对我来说,哪个精确簇是基因并不重要,但哪些基因与它结合在一起。

让我们举一个例子:
Those genes belong to the same cluster (cluster 5) in data1.
at1g09640.1
at1g07250.1
at1g08200.1
at1g09300.2 ##
at1g09490.2 ## Those
at1g09760.1 ##
at1g09780.1

如果我们分析其他数据集(data2)。我们可以看到其中一些基因可以再次被发现。也许是不同的集群(集群 20)左右,但它们在一起,这对我来说最重要。
at1g02880.3
at1g01220.1
at1g09300.2 ##
at1g09490.2 ## Those
at1g09760.1 ##
at1g02130.1

我有 15 个类似的数据集,我想能够问 R:告诉我可以在 15 个数据集中的 15 个或 15 个数据集中的 13 个中一起找到的基因等等......

有任何想法吗 ?

最佳答案

首先,您需要将这些逗号分隔的列表转换为列 - 以这种方式使用它们要容易得多。然后,您要查找哪些基因具有匹配的列。最后,您可以聚合以获得与其他基因匹配的基因总数。

请注意,您将同时拥有基因的顺序以及与其自身匹配的基因。此外,“集群”列会告诉您它们在同一组集群中出现的次数。

这将在 O(n^2) 时间内运行,这意味着分析的基因数量增加一倍将使时间增加四倍。我的快速计时测试估计在我的计算机上执行 15 个 2300 行的数据帧需要 15 个小时。

library(plyr)

frame_list <- list(tbl_col_clu1, tbl_col_clu2, tbl_col_clu3)

turn_numbers_into_columns <- function(x) {
# Creates a data.frame that has the group numbers as columns
x[, strsplit(x$Clusters, ",")[[1]]] <- 1
return(x)
}

get_comparison <- function(current_table) {
# Creates a comparison data frame for a single input table
simplified_frame <- data.frame(
"gene" = row.names(current_table),
"Clusters" = as.character(current_table$Clusters),
stringsAsFactors = FALSE)
split_f <- adply(simplified_frame, 1, turn_numbers_into_columns)

#This is the slow line
comparison_frame <- ddply(split_f, "gene", function(x) {
ddply(split_f, "gene", function (y) {
output <- as.data.frame(x == y)
output$gene <- x$gene
output$gene2 <- y$gene
return(output)
})
})
return(comparison_frame)
}

combined_frame <- ldply(frame_list, get_comparison)


sum_frame <- aggregate(
combined_frame[, !(names(combined_frame) %in% c("gene", "gene2"))],
by = combined_frame[, c("gene", "gene2")],
FUN = sum,
na.rm = T)

View(sum_frame)

如果您始终拥有相同的基因组和分组,则可以将所有内容转换为数组,其运行速度比数据帧快,将您的时间减少约 6 倍。运行非常缓慢的部分将被替换为这样的东西。它返回可以相加的 3 维数组。
comparison_frame <- aaply(split_f, 1, function(x) {
print(x)
output <- aaply(split_f, 1, function (y) {
output <- array(x == y, c(1, length(x)))
return(output)
})
return(output)
})

关于r - 对来自不同数据的元素进行分组,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/25117556/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com