gpt4 book ai didi

r - 在数据框列表中动态设置唯一的列名 [R]

转载 作者:行者123 更新时间:2023-12-04 10:10:01 24 4
gpt4 key购买 nike

我试图将这里的代码概括为不止一个重复列:rename list of dataframe columns to mimic joined suffixes

我有一个数据框,其列名在列表中的不同数据框之间相同
我想使用与 reduce(left_join, suffix = c("_x", "_y"), by="inAll") 相同的模式
创建新名称

图案:

  • 匹配但未加入的列被赋予 _x 然后 _y 后缀
  • 这继续与 _x_x 和 _y_y 等等
  • 如果重复列没有后缀的列表项的数量最后
  • 此模式应应用于函数中未指定的所有列 ( "inAll" )
  • library(dplyr)
    library(purrr)
    library(stringr)

    dd <- list()
    dd$data <- list(
    ONE = data.frame(inAll = c(1.1,1.2,1.3), inAll_2 = c(1.4,1.5,1.6), inSome = c(1.7,1.8,1.9), only_one = c(1.10, 1.11, 1.12)),
    TWO = data.frame(inAll = c(2.1,2.2,2.3), inAll_2 = c(2.4,2.5,2.6), inOthers = c(2.7,2.8,2.9)),
    THREE = data.frame(inAll = c(3.1,3.2,3.3), inAll_2 = c(3.4,3.5,3.6)),
    FOUR = data.frame(inAll = c(4.1,4.2,4.3), inAll_2 = c(4.4,4.5,4.6), inOthers = c(4.10, 4.11, 4.12), inSome = c(4.7,4.8,4.9)),
    FIVE = data.frame(inAll = c(5.1,5.2,5.3), inAll_2 = c(5.4,5.5,5.6)),
    SIX = data.frame(inAll = c(6.1,6.2,6.3), inAll_2 = c(6.4,6.5,6.6), inOthers = c(6.7,6.8,6.8))
    )

    期望输出
    dd$data2 <- list(
    ONE = data.frame(inAll = c(1.1,1.2,1.3), inAll_2_x = c(1.4,1.5,1.6), inSome_x = c(1.7,1.8,1.9), only_one = c(1.10, 1.11, 1.12)),
    TWO = data.frame(inAll = c(2.1,2.2,2.3), inAll_2_y = c(2.4,2.5,2.6), inOthers_x = c(2.7,2.8,2.9)),
    THREE = data.frame(inAll = c(3.1,3.2,3.3), inAll_2_x_x = c(3.4,3.5,3.6)),
    FOUR = data.frame(inAll = c(4.1,4.2,4.3), inAll_2_y_y = c(4.4,4.5,4.6), inOthers_y = c(4.10, 4.11, 4.12), inSome_y = c(4.7,4.8,4.9)),
    FIVE = data.frame(inAll = c(5.1,5.2,5.3), inAll_2_x_x_x = c(5.4,5.5,5.6)),
    SIX = data.frame(inAll = c(6.1,6.2,6.3), inAll_2_y_y_y = c(6.4,6.5,6.6), inOthers = c(6.7,6.8,6.8))
    )

    如何到那:

    对完全不同的想法持开放态度!!!
    new_names <- function(data, toExclude) {
    # calculate the number of suffixes we need per column
    nnames <- map(data, ~colnames(.x)) %>%
    unlist() %>%
    tibble() %>%
    rename("names" = 1) %>%
    group_by(names) %>%
    count() %>%
    filter(!names %in% toExclude)

    suffixes <- map(nnames$n, ~strrep(rep(c('_x', '_y'), .x/2), rep(seq_len(.x/2), each = 2)))
    map2(nnames$names, suffixes, ~paste0(.x, .y))

    # .....somehow apply these to the right columns?
    }

    函数指定要排除的数据框和列,哪些应该产生所需的输出:
    new_names(dd$data, "inAll")

    任何帮助,即使只是使用什么逻辑来获得我想要的最终结果也将不胜感激,谢谢!

    最佳答案

    这是一种选择,我们从 list 中提取列名。 , 根据 split 之后的重复方式更改名称按常用名称,然后 relist返回 list再次更改原来的列名list使用 map2改名后

    library(purrr)
    library(dplyr)
    library(stringr)
    inp <- dd$data
    lst1 <- map(inp, names)
    nm1 <- unlist(lst1)
    i1 <- nm1 != 'inAll'
    lst2 <- split(nm1[i1], nm1[i1])
    i2 <- lengths(lst2) > 1
    lst2[i2] <- map(lst2[i2], ~ {
    i3 <- (seq_along(.x)-1) %/% 2 + 1
    i4 <- i3 %in% names(which(table(i3) == 2))
    n <- ceiling(length(.x[i4])/2)
    .x[i4] <- str_c(.x[i4], strrep(rep(c('_x', '_y'), n),
    rep(seq_len(n), each = 2)))
    .x
    })
    nm1[i1] <- unsplit(lst2, nm1[i1])
    out2 <- map2(inp, relist(nm1, skeleton = lst1), set_names)

    - 检查 OP 的输出
    out <- dd$data2
    identical(out, out2)
    #[1] TRUE

    关于r - 在数据框列表中动态设置唯一的列名 [R],我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/61375746/

    24 4 0
    Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
    广告合作:1813099741@qq.com 6ren.com