r - 教程中的错误(使用 iml 包解释机器学习模型)-6ren

r - 教程中的错误(使用 iml 包解释机器学习模型)

转载作者：行者123 更新时间：2023-12-05 03:21:34

在标题为“复制要求”(https://uc-r.github.io/iml-pkg)的部分中尝试执行以下代码时出现以下错误:

#classification data
df <- rsample::attrition %>%
mutate_if(is.ordered, factor, ordered = FALSE) %>%
mutate(Attrition = recode(Attrition, "Yes" = "1", "No" = "0") %>% factor(levels = c("1", "0")))

> Error: 'attrition' is not an exported object from 'namespace:rsample'

使用以下代码解决了问题:

#data
library(modeldata)
data("attrition", package = "modeldata")
#classification data
df <- attrition %>%
mutate_if(is.ordered, factor, ordered = FALSE) %>%
mutate(Attrition = recode(Attrition, "Yes" = "1", "No" = "0") %>% factor(levels = c("1", "0")))

不幸的是，我在尝试执行以下代码后遇到了另一个错误(标题为“全局解释/特征重要性”的部分(https://uc-r.github.io/iml-pkg)):

#compute feature importance with specified loss metric
imp.glm <- FeatureImp$new(predictor.glm, loss = "mse")
imp.rf <- FeatureImp$new(predictor.rf, loss = "mse")
imp.gbm <- FeatureImp$new(predictor.gbm, loss = "mse")

> Error in [.data.frame(prediction, , self$class, drop = FALSE) : undefined columns selected

> Error in [.data.frame(prediction, , self$class, drop = FALSE) : undefined columns selected

> Error in [.data.frame(prediction, , self$class, drop = FALSE) : undefined columns selected

我用的是 R 4.2.0/Win10

最佳答案

教程中显示的参数需要稍作改动；而不是 class = "classification"，将其更改为 class = 2(根据 the docs )，示例按预期工作:

library(rsample)   # data splitting
library(ggplot2)   # allows extension of visualizations
library(dplyr)     # basic data transformation
library(h2o)       # machine learning modeling
#install.packages("iml")
library(iml)       # ML interprtation
#install.packages("modeldata")
library(modeldata)
library(R6)

h2o.no_progress()
h2o.init()
#>  Connection successful!
#> 
#> R is connected to the H2O cluster: 
#>     H2O cluster uptime:         9 minutes 18 seconds 
#>     H2O cluster timezone:       Australia/Melbourne 
#>     H2O data parsing timezone:  UTC 
#>     H2O cluster version:        3.36.0.1 
#>     H2O cluster version age:    6 months and 28 days !!! 
#>     H2O cluster name:           H2O_started_from_R_jared_mpb432 
#>     H2O cluster total nodes:    1 
#>     H2O cluster total memory:   1.58 GB 
#>     H2O cluster total cores:    4 
#>     H2O cluster allowed cores:  4 
#>     H2O cluster healthy:        TRUE 
#>     H2O Connection ip:          localhost 
#>     H2O Connection port:        54321 
#>     H2O Connection proxy:       NA 
#>     H2O Internal Security:      FALSE 
#>     H2O API Extensions:         Amazon S3, XGBoost, Algos, Infogram, AutoML, Core V3, TargetEncoder, Core V4 
#>     R Version:                  R version 4.1.3 (2022-03-10)

df <- modeldata::attrition %>% 
  mutate_if(is.ordered, factor, ordered = FALSE) %>%
  mutate(Attrition = recode(Attrition, "Yes" = "1", "No" = "0") %>%
           factor(levels = c("1", "0")))

# convert to h2o object
df.h2o <- as.h2o(df)

# create train, validation, and test splits
set.seed(123)
splits <- h2o.splitFrame(df.h2o, ratios = c(.7, .15), destination_frames = c("train","valid","test"))
names(splits) <- c("train","valid","test")

# variable names for resonse & features
y <- "Attrition"
x <- setdiff(names(df), y) 

# elastic net model 
glm <- h2o.glm(
  x = x, 
  y = y, 
  training_frame = splits$train,
  validation_frame = splits$valid,
  family = "binomial",
  seed = 123
)

# random forest model
rf <- h2o.randomForest(
  x = x, 
  y = y,
  training_frame = splits$train,
  validation_frame = splits$valid,
  ntrees = 1000,
  stopping_metric = "AUC",    
  stopping_rounds = 10,         
  stopping_tolerance = 0.005,
  seed = 123
)
#> Warning in .h2o.processResponseWarnings(res): early stopping is enabled but neither score_tree_interval or score_each_iteration are defined. Early stopping will not be reproducible!.

# gradient boosting machine model
gbm <-  h2o.gbm(
  x = x, 
  y = y,
  training_frame = splits$train,
  validation_frame = splits$valid,
  ntrees = 1000,
  stopping_metric = "AUC",    
  stopping_rounds = 10,         
  stopping_tolerance = 0.005,
  seed = 123
)
#> Warning in .h2o.processResponseWarnings(res): early stopping is enabled but neither score_tree_interval or score_each_iteration are defined. Early stopping will not be reproducible!.

# model performance
h2o.auc(glm, valid = TRUE)
#> [1] 0.7870935
## [1] 0.7870935
h2o.auc(rf, valid = TRUE)
#> [1] 0.7681021
## [1] 0.7681021
h2o.auc(gbm, valid = TRUE)
#> [1] 0.7468242
## [1] 0.7468242

features <- as.data.frame(splits$valid) %>% select(-Attrition)

# 2. Create a vector with the actual responses
response <- as.vector(as.numeric(splits$valid$Attrition))

# 3. Create custom predict function that returns the predicted values as a
#    vector (probability of purchasing in our example)
pred <- function(model, newdata)  {
  results <- as.data.frame(h2o.predict(model, as.h2o(newdata)))
  return(results[[3L]])
}

# example of prediction output
pred(glm, features) %>% head()
#> [1] 0.12243347 0.12887908 0.09674399 0.26008143 0.00672000 0.13741387

predictor.glm <- Predictor$new(
  model = glm, 
  data = features, 
  y = response, 
  predict.fun = pred,
  class = "classification"
)
predictor.glm$predict(features[1:10,])
#> Error in `[.data.frame`(prediction, , self$class, drop = FALSE): undefined columns selected
# class = "classification" doesn't make sense; from the docs:
### The class column to be returned in case of multiclass output.
### You can either use numbers, e.g. class=2 would take the 2nd column
### from the predictions, or the column name of the predicted class,
### e.g. class="dog".
# so, in this case, 'class = 2' should work as expected

predictor.glm <- Predictor$new(
  model = glm, 
  data = features,
  y = response,
  predict.function = pred,
  class = 2
)
predictor.glm$predict(features[1:10,])
#>            p1
#> 1  0.12243347
#> 2  0.12887908
#> 3  0.09674399
#> 4  0.26008143
#> 5  0.00672000
#> 6  0.13741387
#> 7  0.47917917
#> 8  0.11775822
#> 9  0.11316964
#> 10 0.22963757

predictor.rf <- Predictor$new(
  model = rf, 
  data = features, 
  y = response, 
  predict.fun = pred,
  class = 2
)

predictor.gbm <- Predictor$new(
  model = gbm, 
  data = features, 
  y = response, 
  predict.fun = pred,
  class = 2
)

imp.glm <- FeatureImp$new(predictor.glm, loss = "mse")
imp.rf <- FeatureImp$new(predictor.rf, loss = "mse")
imp.gbm <- FeatureImp$new(predictor.gbm, loss = "mse")

p1 <- plot(imp.glm) + ggtitle("GLM")
p2 <- plot(imp.rf) + ggtitle("RF")
p3 <- plot(imp.gbm) + ggtitle("GBM")

#gridExtra::grid.arrange(p1, p2, p3, nrow = 1)
p1

p2

p3

^{由 reprex package 创建于 2022-07-28 (v2.0.1)}

关于r - 教程中的错误(使用 iml 包解释机器学习模型)，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/72930868/

文章推荐： python - 提取动词短语中的平均单词数

文章推荐： linux - 最小的 x86_64 Hello World ELF 二进制文件是什么？

文章推荐： ios - 带有属性包装器的不可用属性

文章推荐： GatsbyJS - 如何使用 createPages 设置 gatsby-node.ts？

javascript - 使用 WebScriptEndpoint 使用 javascript 使用 WCF 服务
我在网上搜索但没有找到任何合适的文章解释如何使用 javascript 使用 WCF 服务，尤其是 WebScriptEndpoint。任何人都可以对此给出任何指导吗？谢谢最佳答案这是一篇关于
c - 没有结果!!使用 fork() 使用 dup2 使用 2 个管道运行 execlp()
我正在编写一个将运行 Linux 命令的 C 程序，例如: cat/etc/passwd | grep 列表 |剪切-c 1-5 我没有任何结果 *这里 parent 等待第一个 child (chi
python - 处理文件上传，使用 Pillow 调整大小，使用 SQLAlchemy 存储，使用 Flask 提供文件
所以我正在尝试处理文件上传，然后将该文件作为二进制文件存储到数据库中。在我存储它之后，我尝试在给定的 URL 上提供文件。我似乎找不到适合这里的方法。我需要使用数据库，因为我使用 Google 应用引
excel - 使用 IF 使用 VBA 在单元格中添加公式的问题
我正在尝试制作一个宏，将下面的公式添加到单元格中，然后将其拖到整个列中并在 H 列中复制相同的公式我想在 F 和 H 列中输入公式的数据 Range("F1").formula = "=IF(ISE
使用 OperatorPrecedenceParser 使用 FParsec 解析函数应用程序？
问题类似于this one ，但我想使用 OperatorPrecedenceParser 解析带有函数应用程序的表达式在 FParsec . 这是我的 AST: type Expression =
sql - 使用 sequelize 使用 where 查询编码计数
我想通过使用 sequelize 和 node.js 将这个查询更改为代码取决于在哪里 select COUNT(gender) as genderCount from customers where
bash - 使用 “let”分配Bash失败，使用 “/”
我正在使用GNU bash，版本5.0.3(1)-发行版(x86_64-pc-linux-gnu)，我想知道为什么简单的赋值语句会出现语法错误: #/bin/bash var1=/tmp
javascript - 使用 JavaScript 使用 FOR OF 数组循环时出现错误？
这里，为什么我的代码在 IE 中不起作用。我的代码适用于所有浏览器。没有问题。但是当我在 IE 上运行我的项目时，它发现错误。而且我的 jquery 类和 insertadjacentHTMl 也不
javascript - 使用 javascript 使用 for 属性更改表单标签内容
我正在尝试更改标签的innerHTML。我无权访问该表单，因此无法编辑 HTML。标签具有的唯一标识符是“for”属性。这是输入和标签的结构:
javascript - 使用 jquery 使用 .on() 将事件附加到页面上的动态插入按钮
我有一个页面，我可以在其中返回用户帖子，可以使用一些 jquery 代码对这些帖子进行即时评论，在发布新评论后，我在帖子下插入新评论以及删除按钮。问题是 Delete 按钮在新插入的元素上不起作用，
使用 awk 使用 sha1sum 进行散列
我有一个大约有 20 列的“管道分隔”文件。我只想使用 sha1sum 散列第一列，它是一个数字，如帐号，并按原样返回其余列。使用 awk 或 sed 执行此操作的最佳方法是什么？ Accounti
mysql - 使用 insert into 使用 mysql
我需要将以下内容插入到我的表中...我的用户表有五列 id、用户名、密码、名称、条目。 (我还没有提交任何东西到条目中，我稍后会使用 php 来做)但由于某种原因我不断收到这个错误:#1054 - U
jquery - 将输入字段值修剪为仅字母数字字符/使用 .使用 jQuery
所以我试图有一个输入字段，我可以在其中输入任何字符，但然后将输入的值小写，删除任何非字母数字字符，留下“。”而不是空格。例如，如果我输入: 地球的 70% 是水，-!*#$^^ & 30% 土地输
javascript - 使用 .innerHTML 使用 DOM
我正在尝试做一些我认为非常简单的事情，但出于某种原因我没有得到想要的结果？我是 javascript 的新手，但对 java 有经验，所以我相信我没有使用某种正确的规则。这是一个获取输入值、检查选择
php - 使用 angularjs 使用 where 子句从数据库获取数据
我想使用 angularjs 从 mysql 数据库加载数据。这就是应用程序的工作原理；用户登录，他们的用户名存储在 cookie 中。该用户名显示在主页上我想获取这个值并通过 angularjs
ios - 使用 UITableViewCell 使用 AutoLayout
我正在使用 autoLayout，我想在 UITableViewCell 上放置一个 UIlabel，它应该始终位于单元格的右侧和右侧的中心。这就是我想要实现的目标所以在这里你可以看到我正在谈论的
mysql - 使用 ElasticSearch 使用 or 和运算符搜索多个字段
我需要与 MySql 等效的 elasticsearch 查询。我的 sql 查询: SELECT DISTINCT t.product_id AS id FROM tbl_sup_price t
ios - 使用 Swift 使用 JSON
我正在实现代码以使用 JSON。 func setup() { if let flickrURL = NSURL(string: "https://api.flickr.com/
javascript - 使用 JavaScript 使用 for 循环声明变量
我尝试使用for循环声明变量，然后测试cols和rols是否相同。如果是，它将运行递归函数。但是，我在 javascript 中执行 do 时遇到问题。有人可以帮忙吗？现在，在比较 col.1 和
jquery - 使用 :after 使用 jquery 更改样式
我举了一个我正在处理的问题的简短示例。 HTML代码: 1 2 3 CSS 代码: .BB a:hover{ color: #000; } .BB > li:after {

行者123

个人简介

我是一名优秀的程序员,十分优秀！

作者热门文章

滴滴打车优惠券免费领取

全站热门文章

首页

博学

6Ren·AI

商城

r - 教程中的错误(使用 iml 包解释机器学习模型)