gpt4 book ai didi

r - 使用 R 和 tidyverse 将 tidy 表转换为深度嵌套列表

转载 作者:行者123 更新时间:2023-12-02 05:16:05 24 4
gpt4 key购买 nike

我正在尝试使用 R/tidyverse 将整洁的表(例如下面的示例)转换为嵌套列表。使用一些 tidyverse 魔法,我能够将其转换为深度为三的嵌套列表,但我不知道如何将其嵌套得更深。

采用以下示例输入:

library(tidyverse)
library(stringi)

n_patient = 2
n_samples = 3
n_readgroup = 4
n_mate = 2

df = data.frame(patient = rep(rep(LETTERS[1:n_patient], n_samples),2),
sample = rep(rep(seq(1:n_samples), each = n_patient),2),
readgroup = rep(stri_rand_strings(n_patient * n_samples * n_readgroup, 6, '[A-Z]'),2),
mate = rep(1:n_mate, each = n_patient * n_samples * n_readgroup)) %>%
mutate(file = sprintf("%s.%s.%s_%s", patient, sample, readgroup, mate)) %>%
arrange(file)

json = df %>%
nest(-patient, .key = samples) %>%
mutate(samples = map(samples, nest, -sample, .key=readgroups))

jsonlite::toJSON(json, pretty = T)

例如看起来像这样

> head(df)
patient sample readgroup mate file
1 A 1 FCSDRJ 1 A.1.FCSDRJ_1
2 A 1 FCSDRJ 2 A.1.FCSDRJ_2
3 A 1 IAXDPR 1 A.1.IAXDPR_1
4 A 1 IAXDPR 2 A.1.IAXDPR_2
5 A 1 MLDBKZ 1 A.1.MLDBKZ_1
6 A 1 MLDBKZ 2 A.1.MLDBKZ_2

输出如下所示:

 [
{
"patient": "A",
"samples": [
{
"sample": 1,
"readgroups": [
{
"readgroup": "FCSDRJ",
"mate": 1,
"file": "A.1.FCSDRJ_1"
},
{
"readgroup": "FCSDRJ",
"mate": 2,
"file": "A.1.FCSDRJ_2"
},
{
"readgroup": "IAXDPR",
"mate": 1,
"file": "A.1.IAXDPR_1"
},
{
"readgroup": "IAXDPR",
"mate": 2,
"file": "A.1.IAXDPR_2"
},
{
"readgroup": "MLDBKZ",
"mate": 1,
"file": "A.1.MLDBKZ_1"
},
{
"readgroup": "MLDBKZ",
"mate": 2,
"file": "A.1.MLDBKZ_2"
},
{
"readgroup": "OMTWHK",
"mate": 1,
"file": "A.1.OMTWHK_1"
},
{
"readgroup": "OMTWHK",
"mate": 2,
"file": "A.1.OMTWHK_2"
}
]
},
{
"sample": 2,
"readgroups": [
{
"readgroup": "BHAEFA",
"mate": 1,
"file": "A.2.BHAEFA_1"
},
{
"readgroup": "BHAEFA",
"mate": 2,
"file": "A.2.BHAEFA_2"
},
{
"readgroup": "DIBRHT",
"mate": 1,
"file": "A.2.DIBRHT_1"
},
{
"readgroup": "DIBRHT",
"mate": 2,
"file": "A.2.DIBRHT_2"
},
{
"readgroup": "HHMOSV",
"mate": 1,
"file": "A.2.HHMOSV_1"
},
{
"readgroup": "HHMOSV",
"mate": 2,
"file": "A.2.HHMOSV_2"
},
{
"readgroup": "KJXTPN",
"mate": 1,
"file": "A.2.KJXTPN_1"
},
{
"readgroup": "KJXTPN",
"mate": 2,
"file": "A.2.KJXTPN_2"
}
]
},
{
"sample": 3,
"readgroups": [
{
"readgroup": "CHXJMM",
"mate": 1,
"file": "A.3.CHXJMM_1"
},
{
"readgroup": "CHXJMM",
"mate": 2,
"file": "A.3.CHXJMM_2"
},
{
"readgroup": "MDWRBS",
"mate": 1,
"file": "A.3.MDWRBS_1"
},
{
"readgroup": "MDWRBS",
"mate": 2,
"file": "A.3.MDWRBS_2"
},
{
"readgroup": "RHHKGK",
"mate": 1,
"file": "A.3.RHHKGK_1"
},
{
"readgroup": "RHHKGK",
"mate": 2,
"file": "A.3.RHHKGK_2"
},
{
"readgroup": "VVVJFD",
"mate": 1,
"file": "A.3.VVVJFD_1"
},
{
"readgroup": "VVVJFD",
"mate": 2,
"file": "A.3.VVVJFD_2"
}
]
}
]
},
{
"patient": "B",
"samples": [
{
"sample": 1,
"readgroups": [
{
"readgroup": "QAFCOS",
"mate": 1,
"file": "B.1.QAFCOS_1"
},
{
"readgroup": "QAFCOS",
"mate": 2,
"file": "B.1.QAFCOS_2"
},
{
"readgroup": "TJYYMQ",
"mate": 1,
"file": "B.1.TJYYMQ_1"
},
{
"readgroup": "TJYYMQ",
"mate": 2,
"file": "B.1.TJYYMQ_2"
},
{
"readgroup": "YMHWOI",
"mate": 1,
"file": "B.1.YMHWOI_1"
},
{
"readgroup": "YMHWOI",
"mate": 2,
"file": "B.1.YMHWOI_2"
},
{
"readgroup": "ZOMSBU",
"mate": 1,
"file": "B.1.ZOMSBU_1"
},
{
"readgroup": "ZOMSBU",
"mate": 2,
"file": "B.1.ZOMSBU_2"
}
]
},
{
"sample": 2,
"readgroups": [
{
"readgroup": "CZWHXP",
"mate": 1,
"file": "B.2.CZWHXP_1"
},
{
"readgroup": "CZWHXP",
"mate": 2,
"file": "B.2.CZWHXP_2"
},
{
"readgroup": "MIMMNH",
"mate": 1,
"file": "B.2.MIMMNH_1"
},
{
"readgroup": "MIMMNH",
"mate": 2,
"file": "B.2.MIMMNH_2"
},
{
"readgroup": "RCWMQY",
"mate": 1,
"file": "B.2.RCWMQY_1"
},
{
"readgroup": "RCWMQY",
"mate": 2,
"file": "B.2.RCWMQY_2"
},
{
"readgroup": "WDMLHE",
"mate": 1,
"file": "B.2.WDMLHE_1"
},
{
"readgroup": "WDMLHE",
"mate": 2,
"file": "B.2.WDMLHE_2"
}
]
},
{
"sample": 3,
"readgroups": [
{
"readgroup": "DWITMU",
"mate": 1,
"file": "B.3.DWITMU_1"
},
{
"readgroup": "DWITMU",
"mate": 2,
"file": "B.3.DWITMU_2"
},
{
"readgroup": "GCLWMA",
"mate": 1,
"file": "B.3.GCLWMA_1"
},
{
"readgroup": "GCLWMA",
"mate": 2,
"file": "B.3.GCLWMA_2"
},
{
"readgroup": "QZZKQB",
"mate": 1,
"file": "B.3.QZZKQB_1"
},
{
"readgroup": "QZZKQB",
"mate": 2,
"file": "B.3.QZZKQB_2"
},
{
"readgroup": "WJKGRB",
"mate": 1,
"file": "B.3.WJKGRB_1"
},
{
"readgroup": "WJKGRB",
"mate": 2,
"file": "B.3.WJKGRB_2"
}
]
}
]
}
]

这很棒,除了我还想按“mate”嵌套(理论上可以按任意数量的变量嵌套,没有任何深度限制)。有什么建议如何实现这一点吗?

谢谢!

最佳答案

第一步是注意您正在向后执行,这会提供与您相同的输出,但更简单:

json2 <- df %>% nest(-(1:2),.key=readgroups) %>% nest(-1,.key=samples)

然后我们可以扩展它:

json3 <- df %>% nest(-(1:3),.key=mate) %>% nest(-(1:2),.key=readgroups) %>% nest(-1,.key=samples)

jsonlite::toJSON(json3,pretty=T)

输出:

[
{
"patient": "A",
"samples": [
{
"sample": 1,
"readgroups": [
{
"readgroup": "FUPEYR",
"mate": [
{
"mate": 1,
"file": "A.1.FUPEYR_1"
},
{
"mate": 2,
"file": "A.1.FUPEYR_2"
}
...

如果有必要,概括它:

vars <- names(df)[-1] # or whatever variables you want to nest, order matters!
var_pairs <- map((length(vars)-1):1,~vars[.x:(.x+1)])
json4 <- reduce(var_pairs,~{nm<-.y[1];nest(.x,.y,.key=!!enquo(nm))},.init=df)

jsonlite::toJSON(json4,pretty=T)

输出:

[
{
"patient": "A",
"sample": [
{
"sample": 1,
"readgroup": [
{
"readgroup": "FUPEYR",
"mate": [
{
"mate": 1,
"file": "A.1.FUPEYR_1"
},
{
"mate": 2,
"file": "A.1.FUPEYR_2"
}
...

关于r - 使用 R 和 tidyverse 将 tidy 表转换为深度嵌套列表,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/50477156/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com