gpt4 book ai didi

r - 使用 dplyr 嵌套或分组两个变量,然后对数据执行 Cronbach's alpha 函数或其他统计

转载 作者:行者123 更新时间:2023-12-05 09:30:48 25 4
gpt4 key购买 nike

在心理学中,下面呈现的这种数据集很常见

original ds

我想分组所有年龄(variable = quest),而不是分组所有尺度(com_a4_1:com_a4_6;和 gm_a4_1:gm_a4_6 等),然后对数据应用可靠性函数 (psych::alpha)。

我成功地创建了这个语法

d %>% 
select(quest,contains("_a4_")) %>% #get the data
group_by(quest) %>% #group by all age interval
do(alpha(.)$total)

result

但是,我无法使用天平的项目“子”嵌套。

就我的想象而言,我必须旋转我的数据,然后进行分组或嵌套。但是,我目前没有取得任何成功。我的预期结果类似于下图。有“两个嵌套结果”。第一个结果按比例分组(例如:com_a4_1:com_a4_6),第二个结果按年龄分组(quest)

expected output

下面是假数据和代码

library(psych)
library(tidyverse)
d %>%
select(quest,contains("_a4_")) %>% #get the data
group_by(quest) %>% #group by all age interval
do(alpha(.)$total)


d <-structure(list(quest = c(6, 4, 2, 4, 2, 6, 2, 4, 2, 2, 4, 2,
6, 4, 4, 2, 2, 4, 2, 6, 2, 2, 4, 6, 6, 4, 4, 4, 2, 6, 4, 2, 6,
4, 6, 2, 2, 4, 6, 4, 2), com_a4_1 = c(10, 0, 10, 10, 5, 10, 5,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 5, 10, 10, 0, 10,
10, 10, 10, 10, 5, 10, 10, 10, 10, 10, 10, 10, 10, 5, 10, 10,
10, 10), com_a4_2 = c(10, 10, 5, 10, 10, 5, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 5, 5, 10, 10, 10, 10, 5,
10, 10, 10, 5, 0, 10, 10, 10, 10, 0, 10, 10, 10, 10), com_a4_3 = c(10,
5, 0, 5, 10, 5, 5, 10, 10, 10, 10, 10, 5, 5, 10, 10, 5, 10, 10,
10, 10, 5, 5, 10, 10, 5, 5, 10, 10, 10, 10, 5, 10, 10, 10, 10,
0, 10, 5, 10, 10), com_a4_4 = c(10, 0, 0, 10, 5, 10, 10, 10,
10, 5, 5, 10, 10, 5, 10, 10, 5, 10, 10, 10, 10, 5, 10, 10, 10,
10, 0, 10, 5, 10, 10, 10, 10, 10, 10, 10, 5, 10, 10, 10, 10),
com_a4_5 = c(10, 0, 0, 5, 0, 10, 5, 10, 10, 5, 10, 10, 0,
10, 10, 10, 0, 10, 5, 10, 0, 0, 10, 0, 10, 10, 10, 10, 5,
0, 10, 5, 5, 10, 10, 10, 0, 10, 10, 10, 10), com_a4_6 = c(5,
10, 0, 10, 10, 5, 10, 10, 10, 0, 10, 10, 5, 10, 10, 10, 10,
10, 10, 5, 10, 10, 10, 10, 10, 10, 10, 10, 5, 10, 5, 10,
5, 10, 5, 10, 0, 10, 5, 10, 10), gm_a4_1 = c(10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 5, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10), gm_a4_2 = c(10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 5, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 5, 5, 10, 10, 10, 0, 10, 10,
5, 10, 10, 5, 10, 10, 10, 10), gm_a4_3 = c(10, 10, 10, 10,
10, 5, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 0, 0, 10, 10, 10, 0, 10, 10, 10,
10, 10, 5, 10, 10, 10, 10), gm_a4_4 = c(0, 5, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 5,
10, 10, 10, 10, 10, 0, 0, 10, 10, 10, 0, 10, 5, 5, 5, 10,
10, 10, 10, 10, 10), gm_a4_5 = c(10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 5, 10, 10, 10, 5, 10,
5, 10, 10, 10, 10), gm_a4_6 = c(0, 10, 5, 5, 10, 5, 5, 10,
10, 5, 10, 10, 0, 10, 10, 10, 5, 10, 5, 10, 10, 10, 10, 0,
10, 10, 10, 10, 10, 0, 10, 10, 10, 10, 0, 10, 0, 10, 10,
10, 10), fm_a4_1 = c(10, 5, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 5, 10, 10, 10, 10, 5, 0, 10, 10, 0, 5,
10, 10, 10, 10, 5, 5, 10, 10, 5, 5, 10, 10, 10, 10, 10),
fm_a4_2 = c(10, 10, 10, 10, 0, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 5, 10, 10, 10, 10, 10, 10, 5,
10, 10, 5, 10, 10, 10, 10, 5, 10, 10, 10, 10, 10, 10), fm_a4_3 = c(0,
5, 10, 10, 5, 10, 5, 10, 10, 10, 10, 10, 5, 10, 5, 5, 5,
10, 10, 5, 0, 10, 5, 10, 5, 10, 10, 0, 10, 10, 5, 10, 10,
10, 0, 10, 0, 10, 10, 10, 10), fm_a4_4 = c(10, 5, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 5, 10, 10, 10, 5, 10, 10, 10, 0, 10, 10, 10,
10, 10, 0, 10, 10, 10, 10), fm_a4_5 = c(0, 5, 10, 10, 10,
0, 10, 10, 10, 10, 10, 10, 0, 10, 10, 5, 10, 10, 5, 0, 10,
10, 10, 10, 10, 10, 5, 10, 10, 0, 5, 10, 0, 10, 0, 5, 5,
5, 10, 10, 10), fm_a4_6 = c(10, 5, 5, 0, 0, 5, 10, 10, 10,
0, 10, 10, 5, 10, 10, 10, 0, 10, 0, 10, 10, 0, 10, 10, 5,
0, 0, 10, 10, 10, 0, 10, 10, 5, 5, 10, 0, 0, 10, 10, 5),
cg_a4_1 = c(10, 5, 10, 5, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 0, 10, 10, 10, 10, 5, 0,
10, 10, 10, 10, 5, 10, 10, 10, 10, 5, 5, 10, 10, 10), cg_a4_2 = c(5,
10, 10, 5, 10, 5, 10, 10, 10, 10, 10, 10, 5, 10, 10, 10,
10, 10, 10, 5, 10, 10, 10, 10, 10, 5, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10), cg_a4_3 = c(10,
10, 5, 10, 10, 10, 10, 10, 10, 5, 10, 10, 5, 10, 10, 10,
5, 10, 10, 10, 10, 0, 10, 10, 5, 10, 5, 10, 10, 10, 5, 10,
10, 10, 10, 10, 5, 10, 10, 10, 10), cg_a4_4 = c(10, 10, 0,
5, 5, 5, 10, 10, 10, 5, 10, 10, 0, 5, 10, 10, 5, 10, 10,
10, 10, 0, 5, 10, 10, 5, 0, 0, 10, 10, 0, 10, 0, 10, 10,
5, 0, 5, 5, 10, 10), cg_a4_5 = c(5, 0, 0, 5, 0, 10, 5, 10,
10, 0, 10, 10, 10, 10, 5, 10, 0, 10, 0, 10, 0, 0, 10, 10,
5, 10, 5, 10, 5, 5, 5, 0, 10, 10, 5, 10, 0, 10, 10, 10, 10
), cg_a4_6 = c(0, 0, 5, 10, 10, 10, 10, 10, 0, 10, 5, 10,
10, 10, 5, 10, 10, 10, 10, 10, 5, 10, 10, 10, 10, 5, 5, 10,
5, 10, 0, 10, 10, 5, 5, 10, 5, 10, 10, 10, 10), ps_a4_1 = c(10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 5, 5, 10, 5, 10, 10, 10, 10), ps_a4_2 = c(0, 10,
10, 10, 5, 10, 5, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 5, 10, 5, 10, 10, 10, 5, 10, 10, 10, 5, 0, 10, 10, 10,
5, 0, 10, 5, 10, 10, 10, 10), ps_a4_3 = c(10, 0, 10, 5, 5,
10, 5, 10, 10, 5, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
5, 10, 10, 10, 5, 10, 10, 10, 5, 10, 10, 10, 10, 5, 0, 5,
0, 10, 5, 10, 10), ps_a4_4 = c(10, 10, 10, 10, 5, 10, 5,
10, 10, 0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 5, 10,
10, 10, 10, 10, 10, 10, 5, 10, 5, 10, 10, 10, 10, 5, 5, 10,
10, 10, 10), ps_a4_5 = c(5, 5, 10, 5, 10, 5, 10, 10, 0, 0,
10, 10, 5, 10, 10, 10, 10, 10, 0, 10, 5, 5, 5, 10, 0, 10,
5, 10, 5, 0, 10, 10, 10, 10, 0, 5, 0, 5, 10, 10, 5), ps_a4_6 = c(5,
5, 0, 5, 0, 10, 0, 10, 5, 5, 10, 10, 5, 10, 10, 10, 0, 10,
5, 10, 5, 0, 5, 10, 5, 10, 5, 0, 5, 10, 0, 0, 10, 5, 0, 5,
0, 10, 10, 10, 10)), row.names = c(NA, -41L), class = "data.frame")

最佳答案

我遵循了你的旋转时间更长的想法,使用 tidyr 中的 pivot_longer() 将比例组放在行中,但将项目留在列中。 (pivot_longer() 文档中的最后两个示例是我试图记住如何执行此操作时的首选。)

但是,这取决于您在每个量表中拥有相同数量的项目;我不确定它如何适用于每个规模的不同项目。

一旦事物处于更长的形式,在 quest 上使用 nest_by() 和 scales 变量后跟 mutate() 来嵌套并计算每一行的 alpha。

我没有在此处粘贴所有警告和消息,但有负载。如果不再需要,也可以删除末尾的 data 列。

library(psych)
library(dplyr)
library(tidyr)

d %>%
pivot_longer(cols = -quest,
names_to = c("scale", ".value"),
names_pattern = "(\\w+_\\w+_)(.)") %>%
nest_by(quest, scale) %>%
mutate(alpha(data)$total)

#> # A tibble: 15 x 12
#> # Rowwise: quest, name
#> quest name data raw_alpha std.alpha `G6(smc)` average_r `S/N` ase
#> <dbl> <chr> <list<t> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 2 cg_a4_ [16 x 6] 0.619 0.594 0.728 0.226 1.46 0.141
#> 2 2 com_a~ [16 x 6] 0.810 0.808 0.881 0.412 4.20 0.0719
#> 3 2 fm_a4_ [16 x 6] 0.400 0.421 0.546 0.108 0.728 0.221
#> 4 2 gm_a4_ [16 x 6] 0.842 0.952 0.745 0.831 19.7 0.0592
#> 5 2 ps_a4_ [16 x 6] 0.684 0.753 0.870 0.337 3.05 0.123
#> 6 4 cg_a4_ [15 x 6] 0.677 0.696 0.807 0.276 2.29 0.126
#> 7 4 com_a~ [15 x 6] 0.673 0.613 0.842 0.209 1.58 0.110
#> 8 4 fm_a4_ [15 x 6] 0.669 0.714 0.811 0.294 2.50 0.124
#> 9 4 gm_a4_ [15 x 6] 0.811 0.759 0.873 0.386 3.15 0.0389
#> 10 4 ps_a4_ [15 x 6] 0.533 0.551 0.605 0.170 1.23 0.161
#> 11 6 cg_a4_ [10 x 6] -0.168 -0.00601 0.550 -0.00120 -0.00597 0.621
#> 12 6 com_a~ [10 x 6] -0.184 0.228 0.486 0.0686 0.295 0.644
#> 13 6 fm_a4_ [10 x 6] 0.508 0.542 0.727 0.191 1.18 0.248
#> 14 6 gm_a4_ [10 x 6] -0.075 -0.492 -0.0806 -0.0582 -0.330 0.398
#> 15 6 ps_a4_ [10 x 6] 0.844 0.879 0.903 0.592 7.26 0.0710
#> # ... with 3 more variables: mean <dbl>, sd <dbl>, median_r <dbl>

reprex package 创建于 2021-09-23 (v2.0.0)

关于r - 使用 dplyr 嵌套或分组两个变量,然后对数据执行 Cronbach's alpha 函数或其他统计,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/69302457/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com