gpt4 book ai didi

r 表列出多个具有频率的分类变量

转载 作者:行者123 更新时间:2023-12-01 21:48:18 25 4
gpt4 key购买 nike

library("tidyverse")
library("papaja")

df <- structure(list(investment_type = structure(c(3L, 3L, 3L, 3L,
3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L,
3L), .Label = c("angel", "pre_seed", "seed"), class = "factor"),
gender_d = c(1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1,
1, 1, 1, 1, 0, 1), state_code_org = structure(c(3L, 22L,
3L, 15L, 3L, 4L, 3L, 3L, 22L, 3L, 29L, 25L, 8L, 29L, 10L,
6L, 22L, 4L, 17L, 23L, 17L), .Label = c("AL", "AR", "CA",
"CO", "CT", "DC", "DE", "FL", "GA", "IL", "KS", "LA", "MA",
"MD", "MN", "MO", "NC", "NE", "NH", "NJ", "NV", "NY", "OH",
"OR", "PA", "RI", "SC", "TN", "TX", "UT", "VA", "VT", "WA",
"WI", "WY"), class = "factor"), first_time_founder_d = c(0,
1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, NA, 1, 0, 0, 1,
0)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-21L))

df <- df %>%
select(investment_type,
state_code_org,
gender_d,
first_time_founder_d) %>%
mutate_at(c("gender_d", "first_time_founder_d"), list(~ factor(.))) %>%
mutate(gender_d=factor(ifelse(gender_d==1, "Male", "Female"))) %>%
mutate(first_time_founder_d=factor(ifelse(first_time_founder_d==1, "Yes", "No"))) %>%
mutate(investment_type=factor(ifelse(investment_type=="angel", "Angel", ifelse(investment_type=="pre_seed", "Pre-Seed", "Seed")))) %>%
drop_na() %>%
summary() %>%
as.data.frame()

# Clean up columns
df <- df %>%
select(-Var1) %>%
rename(Variable=Var2, N=Freq) %>%
mutate(Variable=factor(ifelse(Variable=="investment_type", "Investment Type", ifelse(Variable=="state_code_org", "State", ifelse(str_detect(Variable, "gender_d"), "Gender", "First-Time Founder"))))) %>%
drop_na()

# break N into level and N
df <- df %>%
separate(col = N, into = c("Level", "N"), sep = ":")

# Remove white space in values
df <- df %>%
mutate(
Variable=trimws(Variable)) %>%
mutate(
Level=trimws(Level)) %>%
mutate(
N=trimws(N))

# Convert N to integer
df <- df %>%
mutate(N=as.integer(N))

df <- df %>%
group_by(Variable) %>%
arrange(Variable, desc(N))

apa_table(
df,
# stub_indents = list("1", "2"),
caption = "Summary of categorical variables.",
note = "Missing data is not shown.")

这是我现在得到的。

我愿意使用任何包——这恰好使用了 papaja。但它需要在带有 PDF 输出的 rmarkdown 中工作并符合 APA 风格。

我希望表格折叠变量值,这样它们就不会重复多次,并将状态(其他)移动到状态分组的底部。像这样(不同的数据集)作为例子:

最佳答案

这是使用 apa_table() 的另一种方法。

首先是一种更简单的方法来汇总您的数据:

library("dplyr")
library("tidyr")

df <- structure(list(investment_type = structure(c(3L, 3L, 3L, 3L,
3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L,
3L), .Label = c("angel", "pre_seed", "seed"), class = "factor"),
gender_d = c(1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1,
1, 1, 1, 1, 0, 1), state_code_org = structure(c(3L, 22L,
3L, 15L, 3L, 4L, 3L, 3L, 22L, 3L, 29L, 25L, 8L, 29L, 10L,
6L, 22L, 4L, 17L, 23L, 17L), .Label = c("AL", "AR", "CA",
"CO", "CT", "DC", "DE", "FL", "GA", "IL", "KS", "LA", "MA",
"MD", "MN", "MO", "NC", "NE", "NH", "NJ", "NV", "NY", "OH",
"OR", "PA", "RI", "SC", "TN", "TX", "UT", "VA", "VT", "WA",
"WI", "WY"), class = "factor"), first_time_founder_d = c(0,
1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, NA, 1, 0, 0, 1,
0)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-21L))

factor_level_count <- df %>%
mutate(
gender_d = factor(gender_d, levels = c(0, 1), labels = c("Female", "Male"))
, first_time_founder_d = factor(first_time_founder_d, levels = c(0, 1), labels = c("No", "Yes"))
, investment_type = factor(investment_type, levels = c("angel", "pre_seed", "seed"), labels = c("Angel", "Pre-Seed", "Seed"))
) %>%
na.exclude %>%
pivot_longer(cols = everything()) %>%
group_by(name, value) %>%
count() %>%
ungroup() %>%
mutate(
name = factor(name , levels = c("first_time_founder_d", "gender_d", "investment_type", "state_code_org"), labels = c("Firt-Time Founder", "Gender", "Investement Type", "State"))
) %>%
group_by(name) %>%
mutate(percent = printnum(n / sum(n) * 100, digits = 1)) %>%
rename(Variable = value, N = n, "%" = percent)

现在您可以拆分 data.frame 并将它们重新组合成命名列表以获得 stub 缩进。

factor_level_count_list <- split(factor_level_count, f = factor_level_count$name, drop = TRUE) %>% 
lapply(function(x) x[, -1]) # Removes split-column

library("papaja")

apa_table(
factor_level_count_list
, align = "llr" # Right-align last column
, caption = "Summary of categorical variables."
, note = "Missing data is not shown."
, merge_method = "indent" # Table style to use for merging list elements
, midrules = c(3, 6, 9)
)

enter image description here

关于r 表列出多个具有频率的分类变量,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/60019195/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com