gpt4 book ai didi

r - 在不同的数据帧中创建相同的变量

转载 作者:行者123 更新时间:2023-12-05 06:23:34 25 4
gpt4 key购买 nike

我有 14 个具有相同列名和相同列数的数据框。我想在每个数据框中创建一个相同的变量 (hi130_eur=hi130/fixrate)。

我首先尝试了一个循环:

countries<-list(country2, country3, country4, country6, country8, country10, country11, country12,
country13, country14, country15, country51, country55, country57)

for(i in 1:length(countries)) {
countries[i] <- countries[i] %>% group_by(wave) %>% mutate(hi130_eur=hi130/fixrate)
}

其中 group_by 中的 wave 是每个 data.frame 中标识调查波数的列的名称。但是报如下错误:

Error in UseMethod("group_by_") : no applicable method for 'group_by_' applied to an object of class "list"

所以,我也通过以下方式尝试了 lapply:

lapply(countries, function(x) { 
x<-x %>% group_by(wave) %>% mutate(hi100_eur=hi130/fixrate)
cbind(x, hi130_eur)
return(x)
})

但报告找不到 hi130_eur。
关于如何在每个数据框中正确创建相同变量的任何建议?

每个数据框的结构如下:

 structure(list(country = c(2L, 2L, 2L, 2L, 2L, 2L), wave = c(1L, 
1L, 1L, 1L, 1L, 1L), hid = c(7601L, 8401L, 10701L, 15701L, 15701L,
16501L), hg004 = c(1.07908, 1.47759, 1.24982, 0.94703001, 0.94703001,
0.92088997), hg005 = c(-9L, -9L, -9L, -9L, -9L, -9L), hg006 = c(-9L,
-9L, -9L, -9L, -9L, -9L), hg007 = c(-9L, -9L, -9L, -9L, -9L,
-9L), hg014 = c(1994L, 1994L, 1994L, 1994L, 1994L, 1994L), hd001 = c(1L,
1L, 1L, 3L, 3L, 1L), hd002 = c(1L, 1L, 1L, 2L, 2L, 1L), hd003 = c(1L,
1L, 1L, 2L, 2L, 1L), hd004 = c(1, 1, 1, 2.2, 2.2, 1), hd005 = c(1,
1, 1, 1.8, 1.8, 1), hd006 = c(2L, 1L, 3L, 8L, 8L, 2L), hd006a = c(2L,
3L, 4L, 11L, 11L, 5L), hd006b = c(1L, 2L, 3L, 11L, 11L, 3L),
hd007 = c(-8L, -8L, -8L, -8L, -8L, -8L), hd008 = c(-8L, -8L,
-8L, -8L, -8L, -8L), hd009 = c(-8L, -8L, -8L, -8L, -8L, -8L
), hd010 = c(-8L, -8L, -8L, -8L, -8L, -8L), hi001 = c(4L,
1L, 1L, 1L, 1L, 1L), hi020 = c(0.58311999, 0.57414001, 0.57635999,
0.6074, 0.6074, 0.56733), hi100 = c(97000L, 126550L, 90000L,
249500L, 249500L, 156166L), hi110 = c(13000L, 106000L, 90000L,
185500L, 185500L, 156166L), hi111 = c(13000L, 106000L, 90000L,
185500L, 185500L, 156166L), hi112 = c(0L, 0L, 0L, 0L, 0L,
0L), hi120 = c(0L, 0L, 0L, 0L, 0L, 0L), hi121 = c(0L, 0L,
0L, 0L, 0L, 0L), hi122 = c(0L, 0L, 0L, 0L, 0L, 0L), hi122g = c(0L,
0L, 0L, 0L, 0L, 0L), hi123 = c(0L, 0L, 0L, 0L, 0L, 0L), hi130 = c(84000L,
20550L, 0L, 64000L, 64000L, 0L), hi131 = c(84000L, 0L, 0L,
0L, 0L, 0L), hi132 = c(0L, 6150L, 0L, 0L, 0L, 0L), hi133 = c(0L,
0L, 0L, 64000L, 64000L, 0L), hi134 = c(0L, 0L, 0L, 0L, 0L,
0L), hi135 = c(0L, 0L, 0L, 0L, 0L, 0L), hi136 = c(0L, 0L,
0L, 0L, 0L, 0L), hi137 = c(0L, 0L, 0L, 0L, 0L, 0L), hi138 = c(0L,
14400L, 0L, 0L, 0L, 0L), ha005 = c(4L, 4L, 4L, 4L, 4L, 2L
), ha006 = c(2L, 2L, 2L, 3L, 3L, 3L), ha007 = c(3L, 3L, 3L,
4L, 4L, 4L), ha008 = c(1L, 1L, 1L, 1L, 1L, 1L), ha009 = c(1L,
1L, 1L, 1L, 1L, 1L), ha010 = c(1L, 1L, 1L, 1L, 1L, 1L), ha011 = c(1L,
1L, 1L, 1L, 1L, 1L), ha012 = c(2L, 1L, 1L, 1L, 1L, 1L), ha013 = c(2L,
1L, 2L, 2L, 2L, 1L), ha014 = c(1L, 2L, 2L, 1L, 1L, 2L), ha015b = c(-8L,
-8L, -8L, -8L, -8L, -8L), ha022 = c(2L, 2L, 2L, 2L, 2L, 2L
), ha023 = c(1L, 2L, 2L, 1L, 1L, 1L), ha024a = c(1L, -8L,
-8L, 1L, 1L, 1L), ha024b = c(1180L, -8L, -8L, 1000L, 1000L,
800L), ha025 = c(-8L, -8L, -8L, -8L, -8L, -8L), ha026 = c(-8L,
1L, 1L, -8L, -8L, -8L), ha027 = c(-8L, 3400L, 2150L, -8L,
-8L, -8L), ha029 = c(-8L, 1L, 2L, -8L, -8L, -8L), ha030 = c(-8L,
2L, 2L, -8L, -8L, -8L), ha031 = c(-8L, 1L, 1L, -8L, -8L,
-8L), ha032 = c(-8L, 2L, 2L, -8L, -8L, -8L), ha033 = c(-8L,
2L, 2L, -8L, -8L, -8L), ha034 = c(-8L, 2L, 2L, -8L, -8L,
-8L), ha035 = c(-8L, 2L, 2L, -8L, -8L, -8L), hl001 = c(-8L,
-8L, -8L, -8L, -8L, -8L), hl002 = c(-8L, -8L, -8L, -8L, -8L,
-8L), hl003 = c(-8L, -8L, -8L, -8L, -8L, -8L), pid = c(76101,
84101, 107101, 157101, 157102, 165101), pg002 = c(1.07218,
1.4762599, 1.26633, 0.94999999, 0.94999999, 0.87747997),
pg003 = c(1.07218, 1.4762599, 1.26633, 0.94999999, 0.94999999,
0.87747997), pd003 = c(42L, 70L, 22L, 33L, 33L, 46L), pd004 = c(1L,
1L, 2L, 2L, 1L, 2L), pe001 = c(7L, 1L, 7L, 1L, 1L, 1L), pe003 = c(5L,
1L, 3L, 1L, 1L, 1L), pe004 = c(-8L, 1L, -8L, 1L, 1L, 1L),
pe005 = c(-8L, 30L, -8L, 37L, 37L, 37L), pe005b = c(-8L,
-8L, -8L, -8L, -8L, -8L), pe005c = c(-8L, 1L, -8L, 1L, 1L,
1L), pe014 = c(-8L, -8L, -8L, 2L, 2L, -8L), pe015 = c(-8L,
-8L, -8L, 0L, 0L, -8L), pe024 = c(-8L, -8L, -8L, -8L, -8L,
-8L), pe025 = c(-8L, -8L, -8L, -8L, -8L, -8L), pe026 = c(-8L,
2L, -8L, 2L, 2L, 2L), pe030 = c(-8L, 2L, -8L, 2L, 2L, 2L),
pu001 = c(1L, 2L, 2L, 2L, 2L, 2L), pu004 = c(1L, -8L, 2L,
-8L, -8L, -8L), pu004a = c(1L, -8L, 2L, -8L, -8L, -8L), ps002 = c(20L,
-8L, 20L, -8L, -8L, -8L), ps006 = c(-8L, -8L, -8L, -8L, -8L,
-8L), pi001 = c(4L, 1L, 1L, 5L, 1L, 1L), pi100 = c(97000L,
126550L, 90000L, 118000L, 131500L, 156166L), pi110 = c(13000L,
106000L, 90000L, 54000L, 131500L, 156166L), pi112 = c(0L,
0L, 0L, 0L, 0L, 0L), pi121 = c(0L, 0L, 0L, 0L, 0L, 0L), pi122a = c(0L,
0L, 0L, 0L, 0L, 0L), pi123 = c(0L, 0L, 0L, 0L, 0L, 0L), pi130 = c(84000L,
20550L, 0L, 64000L, 0L, 0L), pi131 = c(84000L, 0L, 0L, 0L,
0L, 0L), pi132 = c(0L, 6150L, 0L, 0L, 0L, 0L), pi133 = c(0L,
0L, 0L, 64000L, 0L, 0L), pi134 = c(0L, 0L, 0L, 0L, 0L, 0L
), pi135 = c(0L, 0L, 0L, 0L, 0L, 0L), pi136 = c(0L, 0L, 0L,
0L, 0L, 0L), pi137a = c(0L, 0L, 0L, 0L, 0L, 0L), pi138a = c(0L,
14400L, 0L, 0L, 0L, 0L), pt022 = c(2L, 2L, 2L, 2L, 2L, 1L
), pt023 = c(-9L, 20L, 19L, 28L, 20L, 23L), pt024 = c(-8L,
-8L, -8L, -8L, -8L, -8L), ph006 = c(2L, 2L, 1L, 2L, 2L, 2L
), ph007 = c(0L, 0L, 2L, 0L, 0L, 0L), ph008 = c(-8L, -8L,
-8L, -8L, -8L, -8L), ph009 = c(-8L, -8L, -8L, -8L, -8L, -8L
), ph012 = c(-8L, -8L, -8L, -8L, -8L, -8L), ph013 = c(3L,
3L, 3L, 3L, 3L, 1L), ph022 = c(-8, -8, -8, -8, -8, -8), pr006 = c(4L,
4L, 4L, 1L, 1L, 2L), pr007 = c(-8L, -8L, -8L, 3L, 3L, -8L
), pr008 = c(-8L, -8L, -8L, -8L, -8L, 1L), weight_prsn = c(1.07908,
1.47759, 1.24982, 2.84109, 2.84109, 0.92088997), fixrate = c(1,
1, 1, 1, 1, 1), hi100_eur = c(97000, 126550, 90000, 249500,
249500, 156166), pareto = c(378337.646856238, 645715.836409999,
373027.903072689, 392088.525948343, 459127.982320312, 363013.212050222
), gpd = c(NA, NA, NA, NA, NA, NA)), row.names = c(NA, 6L
), class = "data.frame")

最佳答案

考虑到评论,我决定重写我认为可以解决您问题的内容如下。

示例数据

您的数据集中有 124 变量,但要展示您所遇到问题的潜在解决方案,country、wave、hi130 和 fixrate 就足够了。因此,两个示例数据框对象(country1country2)如下所示。

#dput(country1)
structure(list(country = c(2L, 2L, 2L, 2L, 2L, 2L), wave = c(1L,
1L, 1L, 1L, 1L, 1L), hi130 = c(84000L, 20550L, 0L, 64000L, 64000L,
0L), fixrate = c(1, 1, 1, 1, 1, 1)), class = "data.frame", row.names = c(NA,
6L))
# -------------------------------------------------------------------------
# country wave hi130 fixrate
# 1 2 1 84000 1
# 2 2 1 20550 1
# 3 2 1 0 1
# 4 2 1 64000 1
# 5 2 1 64000 1
# 6 2 1 0 1
# -------------------------------------------------------------------------
#dput(country2)
structure(list(country = c(1, 1, 1, 1, 1, 1), wave = c(2, 2,
2, 2, 2, 2), hi130 = c(0, 59800, 20440, 19181, 121213, 0), fixrate = c(3,
3, 3, 3, 3, 3)), class = "data.frame", row.names = c(NA, -6L))
# -------------------------------------------------------------------------
# country wave hi130 fixrate
# 1 1 2 0 3
# 2 1 2 59800 3
# 3 1 2 20440 3
# 4 1 2 19181 3
# 5 1 2 121213 3
# 6 1 2 0 3

函数

以下函数用于获取所需的输出。

library(dplyr)
my_func <- function(df, grp="wave", hi130="hi130", fixrate="fixrate") {
df %>%
group_by_(.dots = grp) %>%
mutate(hi130_eur=(hi130/fixrate))

}
#Example usage
my_func(country1)
# -------------------------------------------------------------------------
my_func(country1)
# # A tibble: 6 x 5
# # Groups: wave [1]
# country wave hi130 fixrate hi130_eur
# <int> <int> <int> <dbl> <dbl>
# 1 2 1 84000 1 84000
# 2 2 1 20550 1 20550
# 3 2 1 0 1 0
# 4 2 1 64000 1 64000
# 5 2 1 64000 1 64000
# 6 2 1 0 1 0
#

应用于dataframe对象列表

以下是如何将函数应用于数据框对象列表。

# Add your dataframe objects to the list below
countries <-list(country1, country2)

# lapply and save the output to df_list
df_list <- lapply(countries, my_func)

您可以使用 df_list 覆盖列表 countries 中的各个数据框对象,如下所示。

for(i in seq_along(df_list)) {
assign(paste0("country", i), df_list[[i]])
}
# -------------------------------------------------------------------------
# country2
# # A tibble: 6 x 5
# # Groups: wave [1]
# country wave hi130 fixrate hi130_eur
# <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1 2 0 3 0
# 2 1 2 59800 3 19933.
# 3 1 2 20440 3 6813.
# 4 1 2 19181 3 6394.
# 5 1 2 121213 3 40404.
# 6 1 2 0 3 0
#

您可以检查其他数据框对象;所有现在都有 hi130_eur 列。可能有一种有效的方法来解决这个问题,但这是我现在拥有的方法。请参阅 ?assign 以了解它的作用。

如果您仍然遇到问题,请告诉我。

关于r - 在不同的数据帧中创建相同的变量,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/58221418/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com