gpt4 book ai didi

r - 合并两个数据集,保留所有行差异并添加相似行

转载 作者:行者123 更新时间:2023-12-05 08:28:46 25 4
gpt4 key购买 nike

我有两个数据集 Data 和 Data1。我想合并这些保持所有差异,同时在新表中为所有公共(public)行添加数值。有什么简单的工具吗?

head(Data)
contig position variantID refAllele altAllele refCount altCount totalCount lowMAPQDepth lowBaseQDepth rawDepth otherBases improperPairs
1 chr1 905373 . T C 2 4 6 0 0 6 0 0
2 chr1 911428 . C T 1 2 3 0 0 3 0 0
3 chr1 953279 . T C 146 126 272 0 0 273 1 0
4 chr1 962184 . T C 14 15 29 0 0 29 0 0
5 chr1 1024129 . T G 1 0 1 0 0 1 0 0
6 chr1 1039514 . C T 1 1 2 0 0 2 0 0
head(Data1)
contig position variantID refAllele altAllele refCount altCount totalCount lowMAPQDepth lowBaseQDepth rawDepth otherBases improperPairs
1 chr1 905373 . T C 2 3 5 0 0 5 0 0
2 chr1 933024 . C T 1 0 1 0 0 1 0 0
3 chr1 953279 . T C 122 124 246 0 0 248 2 0
4 chr1 962184 . T C 17 21 38 0 0 38 0 0
5 chr1 1022518 . G T 0 1 1 0 0 1 0 0
6 chr1 1024129 . T G 1 2 3 0 0 3 0 0

想要的输出示例

contig  position    variantID   refAllele   altAllele   refCount    altCount    totalCount lowMAPQDepth lowBaseQDepth   rawDepth    otherBases  improperPairs
1 chr1 905373 . T C 4 7 11 0 0 11 0 0
2 chr1 911428 . C T 1 2 3 0 0 3 0 0
2 chr1 933024 . C T 1 0 1 0 0 1 0 0
4 chr1 953279 . T C 268 150 518 0 0 521 3 0

正如我们在 column position site 905373 中看到的,common 是从 refCount 列添加在一起的。而站点 911428 和 933024 对于它们的数据集都是唯一的,但已插入到新数据集中。他们创建输出表的方式不痛苦吗?

Data <- structure(list(contig = c("chr1", "chr1", "chr1", "chr1", "chr1", 
"chr1"), position = c(905373L, 911428L, 953279L, 962184L, 1024129L,
1039514L), variantID = c(".", ".", ".", ".", ".", "."), refAllele = c("T",
"C", "T", "T", "T", "C"), altAllele = c("C", "T", "C", "C", "G",
"T"), refCount = c(2L, 1L, 146L, 14L, 1L, 1L), altCount = c(4L,
2L, 126L, 15L, 0L, 1L), totalCount = c(6L, 3L, 272L, 29L, 1L,
2L), lowMAPQDepth = c(0L, 0L, 0L, 0L, 0L, 0L), lowBaseQDepth = c(0L,
0L, 0L, 0L, 0L, 0L), rawDepth = c(6L, 3L, 273L, 29L, 1L, 2L),
otherBases = c(0L, 0L, 1L, 0L, 0L, 0L), improperPairs = c(0L,
0L, 0L, 0L, 0L, 0L)), row.names = c(NA, 6L), class = "data.frame")

Data1 <- structure(list(contig = c("chr1", "chr1", "chr1", "chr1", "chr1",
"chr1"), position = c(905373L, 933024L, 953279L, 962184L, 1022518L,
1024129L), variantID = c(".", ".", ".", ".", ".", "."), refAllele = c("T",
"C", "T", "T", "G", "T"), altAllele = c("C", "T", "C", "C", "T",
"G"), refCount = c(2L, 1L, 122L, 17L, 0L, 1L), altCount = c(3L,
0L, 124L, 21L, 1L, 2L), totalCount = c(5L, 1L, 246L, 38L, 1L,
3L), lowMAPQDepth = c(0L, 0L, 0L, 0L, 0L, 0L), lowBaseQDepth = c(0L,
0L, 0L, 0L, 0L, 0L), rawDepth = c(5L, 1L, 248L, 38L, 1L, 3L),
otherBases = c(0L, 0L, 2L, 0L, 0L, 0L), improperPairs = c(0L,
0L, 0L, 0L, 0L, 0L)), row.names = c(NA, 6L), class = "data.frame")

最佳答案

这里有一个可能性:

Data
#> contig position variantID refAllele altAllele refCount altCount totalCount
#> 1 chr1 905373 . T C 2 4 6
#> 2 chr1 911428 . C T 1 2 3
#> 3 chr1 953279 . T C 146 126 272
#> 4 chr1 962184 . T C 14 15 29
#> 5 chr1 1024129 . T G 1 0 1
#> 6 chr1 1039514 . C T 1 1 2
#> lowMAPQDepth lowBaseQDepth rawDepth otherBases improperPairs
#> 1 0 0 6 0 0
#> 2 0 0 3 0 0
#> 3 0 0 273 1 0
#> 4 0 0 29 0 0
#> 5 0 0 1 0 0
#> 6 0 0 2 0 0

Data1
#> contig position variantID refAllele altAllele refCount altCount totalCount
#> 1 chr1 905373 . T C 2 3 5
#> 2 chr1 933024 . C T 1 0 1
#> 3 chr1 953279 . T C 122 124 246
#> 4 chr1 962184 . T C 17 21 38
#> 5 chr1 1022518 . G T 0 1 1
#> 6 chr1 1024129 . T G 1 2 3
#> lowMAPQDepth lowBaseQDepth rawDepth otherBases improperPairs
#> 1 0 0 5 0 0
#> 2 0 0 1 0 0
#> 3 0 0 248 2 0
#> 4 0 0 38 0 0
#> 5 0 0 1 0 0
#> 6 0 0 3 0 0

aggregate(. ~ contig + position + variantID + refAllele + altAllele, rbind(Data, Data1), sum)
#> contig position variantID refAllele altAllele refCount altCount totalCount
#> 1 chr1 905373 . T C 4 7 11
#> 2 chr1 953279 . T C 268 250 518
#> 3 chr1 962184 . T C 31 36 67
#> 4 chr1 1024129 . T G 2 2 4
#> 5 chr1 911428 . C T 1 2 3
#> 6 chr1 933024 . C T 1 0 1
#> 7 chr1 1039514 . C T 1 1 2
#> 8 chr1 1022518 . G T 0 1 1
#> lowMAPQDepth lowBaseQDepth rawDepth otherBases improperPairs
#> 1 0 0 11 0 0
#> 2 0 0 521 3 0
#> 3 0 0 67 0 0
#> 4 0 0 4 0 0
#> 5 0 0 3 0 0
#> 6 0 0 1 0 0
#> 7 0 0 2 0 0
#> 8 0 0 1 0 0

关于r - 合并两个数据集,保留所有行差异并添加相似行,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/75176056/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com