gpt4 book ai didi

r - 使用 map2_dfr 将数据 bind_cols 在一起

转载 作者:行者123 更新时间:2023-12-02 06:46:43 25 4
gpt4 key购买 nike

我有两个名为 56 的列表,如下所示:

df1:

$`5`
$`5`$`2016-01-01`
[,1]
[1,] -0.8357399
[2,] 0.7793535
[3,] 1.2425095
[4,] 1.0564501
[5,] 0.7351215
[6,] -0.1976808
[7,] 0.1692951
[8,] -0.4280740
[9,] -0.5507262
[10,] 1.0437385

$`5`$`2016-01-02`
[,1]
[1,] 0.68302631
[2,] 1.16508889
[3,] 1.04583836
[4,] 0.71979517
[5,] -0.14236742
[6,] 0.07792941
[7,] -0.41616267
[8,] -0.49082834
[9,] 1.09706485
[10,] 1.06911635

这是一些预测。我还有另一个列表,如下所示:

df2:

$`5`[[3]]
date wind temp
1 2010-01-03 1.387097 42.09677
2 2010-01-04 3.225806 45.16129
3 2010-01-05 4.192308 47.34615
4 2010-01-06 8.083333 39.83333
5 2010-01-07 8.774194 33.29032
6 2010-01-08 9.709677 32.25806
7 2010-01-09 12.419355 31.16129
8 2010-01-10 1.290323 27.54839
9 2010-01-11 2.258065 31.06452
10 2010-01-12 4.967742 33.83871

$`5`[[4]]
date wind temp
1 2010-01-04 3.225806 45.16129
2 2010-01-05 4.192308 47.34615
3 2010-01-06 8.083333 39.83333
4 2010-01-07 8.774194 33.29032
5 2010-01-08 9.709677 32.25806
6 2010-01-09 12.419355 31.16129
7 2010-01-10 1.290323 27.54839
8 2010-01-11 2.258065 31.06452
9 2010-01-12 4.967742 33.83871
10 2010-01-13 4.129032 40.70968

其中包含一些原始数据。我想将 df1df2 结合起来,这样它看起来像这样:

$`5`[[3]]
date wind temp prediction
1 2010-01-03 1.387097 42.09677 -0.8357399
2 2010-01-04 3.225806 45.16129 0.7793535
3 2010-01-05 4.192308 47.34615 1.2425095
4 2010-01-06 8.083333 39.83333 1.0564501
5 2010-01-07 8.774194 33.29032 0.7351215
6 2010-01-08 9.709677 32.25806 -0.1976808
7 2010-01-09 12.419355 31.16129 0.1692951
8 2010-01-10 1.290323 27.54839 -0.4280740
9 2010-01-11 2.258065 31.06452 -0.5507262
10 2010-01-12 4.967742 33.83871 1.0437385

$`5`[[4]]
date wind temp prediction
1 2010-01-04 3.225806 45.16129 0.68302631
2 2010-01-05 4.192308 47.34615 1.16508889
3 2010-01-06 8.083333 39.83333 1.04583836
4 2010-01-07 8.774194 33.29032 0.71979517
5 2010-01-08 9.709677 32.25806 -0.14236742
6 2010-01-09 12.419355 31.16129 0.07792941
7 2010-01-10 1.290323 27.54839 -0.41616267
8 2010-01-11 2.258065 31.06452 -0.49082834
9 2010-01-12 4.967742 33.83871 1.09706485
10 2010-01-13 4.129032 40.70968 1.09706485

最后我想将所有这些列表以长格式绑定(bind)在一起,例如:

       date      wind     temp   prediction  ID
2010-01-03 1.387097 42.09677 -0.8357399 5
2010-01-04 3.225806 45.16129 0.7793535 5
2010-01-05 4.192308 47.34615 1.2425095 5
2010-01-06 8.083333 39.83333 1.0564501 5
2010-01-07 8.774194 33.29032 0.7351215 5
2010-01-08 9.709677 32.25806 -0.1976808 5
2010-01-09 12.419355 31.16129 0.1692951 5
2010-01-10 1.290323 27.54839 -0.4280740 5
2010-01-11 2.258065 31.06452 -0.5507262 5
2010-01-12 4.967742 33.83871 1.0437385 5
2010-01-04 3.225806 45.16129 0.68302631 6
2010-01-05 4.192308 47.34615 1.16508889 6
2010-01-06 8.083333 39.83333 1.04583836 6
2010-01-07 8.774194 33.29032 0.71979517 6
2010-01-08 9.709677 32.25806 -0.14236742 6
2010-01-09 12.419355 31.16129 0.07792941 6
2010-01-10 1.290323 27.54839 -0.41616267 6
2010-01-11 2.258065 31.06452 -0.49082834 6
2010-01-12 4.967742 33.83871 1.09706485 6
2010-01-13 4.129032 40.70968 1.09706485 6

这是一个大型的单一数据框,ID 列是列表名称。

我试过以下代码:

library(purrr)
map2_dfr(df1, df2, ~map2_dfr(.x, map(.y, "date"), ~cbind(.x, date = .y)))

这不符合我的要求。

# A tibble: 40 x 4
`2016-01-01` `2016-01-02` `2016-01-03` `2016-01-04`
<dbl> <dbl> <dbl> <dbl>
1 -0.836 0.683 1.12 0.935
2 0.779 1.17 1.02 0.754
3 1.24 1.05 0.776 -0.146
4 1.06 0.720 -0.188 0.160
5 0.735 -0.142 0.0884 -0.465
6 -0.198 0.0779 -0.283 -0.543
7 0.169 -0.416 -0.453 1.10
8 -0.428 -0.491 1.06 1.03
9 -0.551 1.10 1.03 0.976
10 1.04 1.07 1.06 1.29
# … with 30 more rows

数据:

df1 <- list(`5` = list(`2016-01-01` = structure(c(-0.835739850997925, 
0.779353499412537, 1.24250948429108, 1.0564501285553, 0.735121548175812,
-0.197680771350861, 0.169295132160187, -0.42807400226593, -0.550726175308228,
1.04373848438263), .Dim = c(10L, 1L)), `2016-01-02` = structure(c(0.683026313781738,
1.16508889198303, 1.04583835601807, 0.719795167446136, -0.142367422580719,
0.0779294073581696, -0.416162669658661, -0.490828335285187, 1.09706485271454,
1.06911635398865), .Dim = c(10L, 1L)), `2016-01-03` = structure(c(1.12009644508362,
1.01791954040527, 0.775721669197083, -0.187799870967865, 0.0883594155311584,
-0.283172011375427, -0.452982068061829, 1.06388020515442, 1.02800369262695,
1.06038355827332), .Dim = c(10L, 1L)), `2016-01-04` = structure(c(0.935240745544434,
0.753606081008911, -0.145884394645691, 0.160260230302811, -0.464599192142487,
-0.543198347091675, 1.10469722747803, 1.03225469589233, 0.976013004779816,
1.28949522972107), .Dim = c(10L, 1L))), `6` = list(`2016-01-01` = structure(c(-1.05108523368835,
0.831294775009155, 1.0468602180481, 1.4151725769043, 0.89024829864502,
-0.23750251531601, 0.0968895554542542, -0.447583615779877, -0.885086059570312,
1.16484880447388), .Dim = c(10L, 1L)), `2016-01-02` = structure(c(0.898594379425049,
1.03132367134094, 1.43809175491333, 1.05361354351044, -0.204488694667816,
0.0889829993247986, -0.42036372423172, -0.906104445457458, 1.16317582130432,
1.13032007217407), .Dim = c(10L, 1L)), `2016-01-03` = structure(c(0.99635636806488,
1.5137802362442, 0.92145836353302, -0.218990564346313, 0.147298634052277,
-0.466208696365356, -0.896591305732727, 1.18873286247253, 1.10375666618347,
1.59670341014862), .Dim = c(10L, 1L)), `2016-01-04` = structure(c(1.45108199119568,
0.860665202140808, -0.299971401691437, 0.0754360556602478, -0.460747301578522,
-0.947231769561768, 1.06433939933777, 1.13789772987366, 1.70162570476532,
2.04307699203491), .Dim = c(10L, 1L))))


df2 <- list(`5` = list(structure(list(date = structure(c(14610, 14611,
14612, 14613, 14614, 14615, 14616, 14617, 14618, 14619), class = "Date"),
wind = c(19.72, 3.19354838709677, 1.38709677419355, 3.2258064516129,
4.19230769230769, 8.08333333333333, 8.7741935483871, 9.70967741935484,
12.4193548387097, 1.29032258064516), temp = c(41.6, 41.9677419354839,
42.0967741935484, 45.1612903225806, 47.3461538461538, 39.8333333333333,
33.2903225806452, 32.258064516129, 31.1612903225806, 27.5483870967742
)), row.names = c(NA, 10L), class = "data.frame"), structure(list(
date = structure(c(14611, 14612, 14613, 14614, 14615, 14616,
14617, 14618, 14619, 14620), class = "Date"), wind = c(3.19354838709677,
1.38709677419355, 3.2258064516129, 4.19230769230769, 8.08333333333333,
8.7741935483871, 9.70967741935484, 12.4193548387097, 1.29032258064516,
2.25806451612903), temp = c(41.9677419354839, 42.0967741935484,
45.1612903225806, 47.3461538461538, 39.8333333333333, 33.2903225806452,
32.258064516129, 31.1612903225806, 27.5483870967742, 31.0645161290323
)), row.names = c(NA, 10L), class = "data.frame"), structure(list(
date = structure(c(14612, 14613, 14614, 14615, 14616, 14617,
14618, 14619, 14620, 14621), class = "Date"), wind = c(1.38709677419355,
3.2258064516129, 4.19230769230769, 8.08333333333333, 8.7741935483871,
9.70967741935484, 12.4193548387097, 1.29032258064516, 2.25806451612903,
4.96774193548387), temp = c(42.0967741935484, 45.1612903225806,
47.3461538461538, 39.8333333333333, 33.2903225806452, 32.258064516129,
31.1612903225806, 27.5483870967742, 31.0645161290323, 33.8387096774194
)), row.names = c(NA, 10L), class = "data.frame"), structure(list(
date = structure(c(14613, 14614, 14615, 14616, 14617, 14618,
14619, 14620, 14621, 14622), class = "Date"), wind = c(3.2258064516129,
4.19230769230769, 8.08333333333333, 8.7741935483871, 9.70967741935484,
12.4193548387097, 1.29032258064516, 2.25806451612903, 4.96774193548387,
4.12903225806452), temp = c(45.1612903225806, 47.3461538461538,
39.8333333333333, 33.2903225806452, 32.258064516129, 31.1612903225806,
27.5483870967742, 31.0645161290323, 33.8387096774194, 40.7096774193548
)), row.names = c(NA, 10L), class = "data.frame")), `6` = list(
structure(list(date = structure(c(14610, 14611, 14612, 14613,
14614, 14615, 14616, 14617, 14618, 14619), class = "Date"),
wind = c(19.72, 3.19354838709677, 1.38709677419355, 3.2258064516129,
4.19230769230769, 8.08333333333333, 8.7741935483871,
9.70967741935484, 12.4193548387097, 1.29032258064516),
temp = c(41.6, 41.9677419354839, 42.0967741935484, 45.1612903225806,
47.3461538461538, 39.8333333333333, 33.2903225806452,
32.258064516129, 31.1612903225806, 27.5483870967742)), row.names = c(NA,
10L), class = "data.frame"), structure(list(date = structure(c(14611,
14612, 14613, 14614, 14615, 14616, 14617, 14618, 14619, 14620
), class = "Date"), wind = c(3.19354838709677, 1.38709677419355,
3.2258064516129, 4.19230769230769, 8.08333333333333, 8.7741935483871,
9.70967741935484, 12.4193548387097, 1.29032258064516, 2.25806451612903
), temp = c(41.9677419354839, 42.0967741935484, 45.1612903225806,
47.3461538461538, 39.8333333333333, 33.2903225806452, 32.258064516129,
31.1612903225806, 27.5483870967742, 31.0645161290323)), row.names = c(NA,
10L), class = "data.frame"), structure(list(date = structure(c(14612,
14613, 14614, 14615, 14616, 14617, 14618, 14619, 14620, 14621
), class = "Date"), wind = c(1.38709677419355, 3.2258064516129,
4.19230769230769, 8.08333333333333, 8.7741935483871, 9.70967741935484,
12.4193548387097, 1.29032258064516, 2.25806451612903, 4.96774193548387
), temp = c(42.0967741935484, 45.1612903225806, 47.3461538461538,
39.8333333333333, 33.2903225806452, 32.258064516129, 31.1612903225806,
27.5483870967742, 31.0645161290323, 33.8387096774194)), row.names = c(NA,
10L), class = "data.frame"), structure(list(date = structure(c(14613,
14614, 14615, 14616, 14617, 14618, 14619, 14620, 14621, 14622
), class = "Date"), wind = c(3.2258064516129, 4.19230769230769,
8.08333333333333, 8.7741935483871, 9.70967741935484, 12.4193548387097,
1.29032258064516, 2.25806451612903, 4.96774193548387, 4.12903225806452
), temp = c(45.1612903225806, 47.3461538461538, 39.8333333333333,
33.2903225806452, 32.258064516129, 31.1612903225806, 27.5483870967742,
31.0645161290323, 33.8387096774194, 40.7096774193548)), row.names = c(NA,
10L), class = "data.frame")))

编辑:

我喜欢@tmfmnk 和@akrun 的两个解决方案。

(一些推理 - 主要是为了我稍后再回到这个问题)

在我的完整示例中,我收到错误 Error: Argument 2 must be length 2995920, not 2998110。当我申请时:

map2_dfr(map(df1, ~ bind_rows(.) %>%
pivot_longer(everything(), values_to = "prediction") %>%
select(-name)),
map(df2, bind_rows),
bind_cols, .id = "ID")

我将错误缩小到以下事实:

map(df1, ~bind_rows(.) %>% 
pivot_longer(everything(), values_to = "prediction") %>%
select(-name))

创建 2 个包含 2995920 观察值的列表。我也分开跑;

map(processed_analysis, ~bind_rows(.))

这会创建 2 个包含 2998110 的列表。这两个数字的区别是 2190(据我所知,这只对我有意义,但它很重要,因为 2190 是列表列表之一的长度或观察值的数量列表列表之一)。

2998110/2190 = 1369 其中 1369 是我在每个列表(56) 在这个例子中。

@akrun 的解决方案:

imap_dfr(df2, ~ bind_rows(.x) %>%
mutate(ID = .y)) %>%
mutate(prediction = unlist(df1)) %>%
as_tibble

给我一​​个由 5996220 观察值组成的数据框。这与 2998110 + 2998110 或每个列表 56 包含的观察数相同,当 1369 list of lists 已经rbind

2998110的@akrun方案和2995920的@tmfmnk方案的区别在于2190,正好是行数的长度561369 列表之一。

2998110/1369 = 2190 是正确的,而 2995920/1369 = 2188..44 与数据不一致。

我不明白为什么这两种解决方案似乎对此处提供的数据完美运行,但对我拥有的完整数据却略有不同。

最佳答案

一个选项可能是:

map2_dfr(map(df1, ~ unlist(.) %>%
enframe(value = "prediction") %>%
select(-name)),
map(df2, bind_rows),
bind_cols, .id = "ID")

ID prediction date wind temp
<chr> <dbl> <date> <dbl> <dbl>
1 5 -0.836 2010-01-01 19.7 41.6
2 5 0.779 2010-01-02 3.19 42.0
3 5 1.24 2010-01-03 1.39 42.1
4 5 1.06 2010-01-04 3.23 45.2
5 5 0.735 2010-01-05 4.19 47.3
6 5 -0.198 2010-01-06 8.08 39.8
7 5 0.169 2010-01-07 8.77 33.3
8 5 -0.428 2010-01-08 9.71 32.3
9 5 -0.551 2010-01-09 12.4 31.2
10 5 1.04 2010-01-10 1.29 27.5
# … with 70 more rows

关于r - 使用 map2_dfr 将数据 bind_cols 在一起,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/59511933/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com