gpt4 book ai didi

r - R 中的嵌套表格输出

转载 作者:行者123 更新时间:2023-12-04 10:08:26 26 4
gpt4 key购买 nike

我正在努力确定如何生成如下所示的表格输出。 (我希望能够利用条件逻辑来对单元格进行着色,如 Excel 中生成的附加输出所示,但我很高兴能简单地了解如何在没有初学者着色的情况下生成输出。)

所需的输出(在 Excel 中生成)
image description

使用的整体流程:

  • 使用“gains”包提供的 MineThatData 数据集。
  • 完整数据集包括四个模型分数。只需保留“logistic.score”作为说明。
  • 根据模型分数(即,logistic.score)将训练样本(train = 1)中的每条记录分配给一个十分位数。
  • 使用训练样本中的分数范围将测试样本中的记录分配到十分位数
  • 按十分位数为每个采样组报告各种统计数据(包括转化率和每条记录的支出)。


  • 加载所需的包。
    library(gains)
    library(plyr)
    library(StatMeasures)
    library(sqldf)
    library(tables)
    full_dataset <- MineThatData

    reduced_dataset <- full_dataset[ ,
    c("conversion","spend","train","logistic.score")]

    reduced_dataset <- rename(reduced_dataset,c("logistic.score"="score"))

    reduced_dataset$score <- round(reduced_dataset$score, 8)

    summary(reduced_dataset)

    trainDF <- reduced_dataset[reduced_dataset$train == 1, ]
    testDF <- reduced_dataset[reduced_dataset$train == 0, ]

    trainDF$Decile <- decile(trainDF$score, decreasing = TRUE)

    summarize_results_by_decile <- function(Input_DF, Output_DF) {
    Output_DF <- sqldf("
    select
    case when train = 1 then 'Train' else 'Test' end as Sample
    ,Decile
    ,count(*) as Num_Records
    ,sum(conversion) as Num_Converters
    ,sum(spend) as Sum_Spend
    ,min(score) as Min_Score
    ,max(score) as Max_Score
    ,round(avg(conversion),4) as Pct_Response
    ,round(avg(spend),2) as Spend_per_Record
    from Input_DF
    group by Decile
    order by Decile
    ")

    temp_df <- sqldf("
    select
    case when train = 1 then 'Train' else 'Test' end as Sample
    ,11 as Decile
    ,count(*) as Num_Records
    ,sum(conversion) as Num_Converters
    ,sum(Spend) as Sum_Spend
    ,min(score) as Min_Score
    ,max(score) as Max_Score
    ,round(avg(conversion),4) as Pct_Response
    ,round(avg(spend),2) as Spend_per_Record
    from Input_DF
    ")

    Output_DF <- rbind(Output_DF , temp_df)

    Output_DF$Decile <- factor(Output_DF$Decile,
    labels =c("1","2","3","4","5","6","7","8","9","10","Total"))

    Output_DF$Pct_of_Records <- paste(format(round(Output_DF$Num_Records
    / temp_df$Num_Records * 100, 1), nsmall=1), "%", sep="")

    Output_DF$Pct_of_Converters <- paste(format(round(Output_DF$Num_Converters
    / temp_df$Num_Converters * 100, 1), nsmall=1), "%", sep="")

    Output_DF$Pct_of_Spend <- paste(format(round(Output_DF$Sum_Spend
    / temp_df$Sum_Spend * 100, 1), nsmall=1), '%', sep="")

    Output_DF$Num_Records <- format(Output_DF$Num_Records, big.mark = ",")

    Output_DF$Num_Converters <- format(Output_DF$Num_Converters,
    big.mark = ",")

    Output_DF$Sum_Spend <- paste("$" , sep="", format(Output_DF$Sum_Spend,
    big.mark = ","))

    Output_DF$Pct_Response <- paste(format(round(Output_DF$Pct_Response * 100,
    2), nsmall=2), "%", sep="")

    Output_DF$Spend_per_Record <- paste("$", sep="",
    format(Output_DF$Spend_per_Record, nsmall=2))

    return(Output_DF)
    }

    summary_results_train <- summarize_results_by_decile(trainDF,
    summary_results_train)

    Min_Decile_Scores <- t(subset(summary_results_train, select = Min_Score))

    assign_decile <- function(score_var, decile_var) {
    decile_var <- ifelse(score_var >= Min_Decile_Scores[1], 1,
    ifelse(score_var >= Min_Decile_Scores[2], 2,
    ifelse(score_var >= Min_Decile_Scores[3], 3,
    ifelse(score_var >= Min_Decile_Scores[4], 4,
    ifelse(score_var >= Min_Decile_Scores[5], 5,
    ifelse(score_var >= Min_Decile_Scores[6], 6,
    ifelse(score_var >= Min_Decile_Scores[7], 7,
    ifelse(score_var >= Min_Decile_Scores[8], 8,
    ifelse(score_var >= Min_Decile_Scores[9], 9, 10)))))))))

    return(decile_var)
    }

    验证用于在训练数据上分配十分位数分配的逻辑:
    trainDF$Replicate_Decile <- assign_decile(trainDF$score, 
    trainDF$Replicate_Decile)
    table(trainDF$Decile, trainDF$Replicate_Decile)
    trainDF$Replicate_Decile <- NULL
    testDF$Decile <- assign_decile(testDF$score, testDF$Decile)

    summary_results_test <- summarize_results_by_decile(testDF,
    summary_results_test)

    summary_results <- rbind(summary_results_train, summary_results_test)

    summary_results <- subset(summary_results, select = -c(Min_Score,Max_Score))

    这样做是为了重新排序要显示的列:
    summary_results <- summary_results[ ,c("Sample", "Decile", "Num_Records", 
    "Num_Converters", "Sum_Spend", "Pct_of_Records",
    "Pct_of_Converters","Pct_of_Spend","Pct_Response",
    "Spend_per_Record")]

    这样做是为了影响列名称的显示方式:
    summary_results <- rename(summary_results,
    c("Num_Records" = "# Records",
    "Num_Converters" = "# Converters",
    "Sum_Spend" = "Total Spend",
    "Pct_of_Records" = "% of Records",
    "Pct_of_Converters" = "% of Converters",
    "Pct_of_Spend" = "% of Spend",
    "Pct_Response" = "% Conversion",
    "Spend_per_Record" = "$ per Record"))
    print(summary_results[summary_results$Sample == 'Train', -1], row.names = FALSE)
    print(summary_results[summary_results$Sample == 'Test' , -1], row.names = FALSE)

    我能够在 R 中产生的输出
    image description

    除了这是我在 Stack Overflow 上的第一篇文章,我还是一个相对较新的 R 用户。我希望我的代码是可以理解的!在此先感谢您的帮助。

    最佳答案

    不太好和简洁的解决方案:

    library(gains)
    library(expss)

    full_dataset = MineThatData

    reduced_dataset = full_dataset[ ,
    c("conversion","spend","train","logistic.score")]
    reduced_dataset$score = round(reduced_dataset$logistic.score, 8)

    summary_fun = function(data){
    calc(data,
    list(
    "# Records" = NROW(data),
    "# Converters" = sum(conversion),
    "$ Spend" = sum(spend),
    "% Conversion" = round(mean(conversion),4)*100,
    "$ per Record" = round(mean(spend),2)
    )
    )
    }

    reduced_dataset %>%
    compute({
    decile_points = quantile(score[train==1],
    probs = seq(0,1,by = 0.1)
    )
    decile_points[length(decile_points)] = Inf
    # '11 - ' is needed make reverse order
    decile = 11 - as.integer(cut(score, decile_points, include.lowest = TRUE))
    rm(decile_points) # we don't need this in our dataset

    }) %>%
    # "|" to suppress variable label
    tab_rows("|" = decile, total(label = "Total")) %>%
    tab_cols(total(label = "|")) %>%
    tab_cells(sheet(conversion,
    spend)) %>%
    tab_subgroup(train==1) %>%
    tab_stat_fun_df(summary_fun, label = "Train") %>%
    tab_subgroup(train==0) %>%
    tab_stat_fun_df(summary_fun, label = "Test") %>%
    tab_pivot(stat_position = "inside_columns") %>%
    # calculate percent
    do_repeat(i = perl("Records|Converters|Spend"), {
    ..[gsub("#|\\$", "% of", .item_value, perl = TRUE)] = i/i[.N]*100
    }) %>%
    # move some columns to the end
    keep(!fixed("% Conversion") & !fixed("$ per Record"), other()) %>%
    # formating
    do_repeat(i = fixed("#"), {
    i = format(i, big.mark = ",")
    }) %>%
    do_repeat(i = fixed("$"), {
    i = paste0("$", format(i, big.mark = ","))
    }) %>%
    do_repeat(i = fixed("%"), {
    i = paste0(format(round(i, 1), nsmall=1), "%")
    }) %>%
    htmlTable()

    这给出了以下结果: table

    免责声明:我是“expss”包的作者。

    关于r - R 中的嵌套表格输出,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/48573903/

    26 4 0
    Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
    广告合作:1813099741@qq.com 6ren.com