gpt4 book ai didi

R - 使用 speedglm 包中的summary()时出错

转载 作者:行者123 更新时间:2023-12-02 07:41:06 25 4
gpt4 key购买 nike

我正在使用 speedglm 来估计某些数据的逻辑回归模型。我创建了一个可重现的示例,它会生成与使用原始数据相同的错误。

library(speedglm)
n <- 10000
dtf <- data.frame( y = sample(c(0,1), n, 1),
x1 = as.factor(sample(c("a","b"), n, 1)),
x2 = rnorm(n, 30, 10))
m <- speedglm(y ~ x1 + x2, dtf, family=binomial())
summary(m)

输出如下:

Generalized Linear Model of class 'speedglm':

Call: speedglm(formula = y ~ x1 + x2, data = dtf, family = binomial())

Coefficients:
------------------------------------------------------------------
Error in data.frame(..., check.names = FALSE) :
arguments imply differing number of rows: 3, 0

我通过执行getS3method("summary", "speedglm")检查了summary.speedglm的源代码,并找到了生成错误的代码行,但这并没有帮助解决问题。

PS:代表数超过 1500 的人应该创建 speedglm 标签。

更新

Marco Enea,speedglm 的维护者,要求发布以下针对 summary.speedglmprint.summary.speedglm 的临时修复。

summary.speedglm <- function (object, correlation = FALSE, ...) 
{
if (!inherits(object, "speedglm"))
stop("object is not of class speedglm")
z <- object
var_res <- as.numeric(z$RSS/z$df)
dispersion <- if (z$family$family %in% c("poisson", "binomial")) 1 else var_res
if (z$method == "qr") {
z$XTX <- z$XTX[z$ok, z$ok]
}
inv <- solve(z$XTX, tol = z$tol.solve)
covmat <- diag(inv)
se_coef <- rep(NA, length(z$coefficients))
se_coef[z$ok] <- sqrt(dispersion * covmat)
if (z$family$family %in% c("binomial", "poisson")) {
z1 <- z$coefficients/se_coef
p <- 2 * pnorm(abs(z1), lower.tail = FALSE)
} else {
t1 <- z$coefficients/se_coef
p <- 2 * pt(abs(t1), df = z$df, lower.tail = FALSE)
}
ip <- !is.na(p)
p[ip] <- as.numeric(format(p[ip], digits = 3))
dn <- c("Estimate", "Std. Error")
if (z$family$family %in% c("binomial", "poisson")) {
format.coef <- if (any(na.omit(abs(z$coef)) < 1e-04))
format(z$coefficients, scientific = TRUE, digits = 4) else
round(z$coefficients, digits = 7)
format.se <- if (any(na.omit(se_coef) < 1e-04))
format(se_coef, scientific = TRUE, digits = 4) else round(se_coef, digits = 7)
format.pv <- if (any(na.omit(p) < 1e-04))
format(p, scientific = TRUE, digits = 4) else round(p, digits = 4)
param <- data.frame(format.coef, format.se, round(z1,
digits = 4), format.pv)
dimnames(param) <- list(names(z$coefficients), c(dn,
"z value", "Pr(>|z|)"))
} else {
format.coef <- if (any(abs(na.omit(z$coefficients)) <
1e-04))
format(z$coefficients, scientific = TRUE, digits = 4) else
round(z$coefficients, digits = 7)
format.se <- if (any(na.omit(se_coef) < 1e-04))
format(se_coef, scientific = TRUE, digits = 4) else
round(se_coef, digits = 7)
format.pv <- if (any(na.omit(p) < 1e-04))
format(p, scientific = TRUE, digits = 4) else round(p, digits = 4)
param <- data.frame(format.coef, format.se, round(t1,
digits = 4), format.pv)
dimnames(param) <- list(names(z$coefficients), c(dn,
"t value", "Pr(>|t|)"))
}
eps <- 10 * .Machine$double.eps
if (z$family$family == "binomial") {
if (any(z$mu > 1 - eps) || any(z$mu < eps))
warning("fitted probabilities numerically 0 or 1 occurred")
}
if (z$family$family == "poisson") {
if (any(z$mu < eps))
warning("fitted rates numerically 0 occurred")
}
keep <- match(c("call", "terms", "family", "deviance", "aic",
"df", "nulldev", "nulldf", "iter", "tol", "n", "convergence",
"ngoodobs", "logLik", "RSS", "rank"), names(object),
0)
ans <- c(object[keep], list(coefficients = param, dispersion = dispersion,
correlation = correlation, cov.unscaled = inv, cov.scaled = inv *
var_res))
if (correlation) {
ans$correl <- (inv * var_res)/outer(na.omit(se_coef),
na.omit(se_coef))
}
class(ans) <- "summary.speedglm"
return(ans)
}

print.summary.speedglm <- function (x, digits = max(3, getOption("digits") - 3), ...)
{
cat("Generalized Linear Model of class 'speedglm':\n")
if (!is.null(x$call))
cat("\nCall: ", deparse(x$call), "\n\n")
if (length(x$coef)) {
cat("Coefficients:\n")
cat(" ------------------------------------------------------------------",
"\n")
sig <- function(z){
if (!is.na(z)){
if (z < 0.001)
"***"
else if (z < 0.01)
"** "
else if (z < 0.05)
"* "
else if (z < 0.1)
". "
else " "
} else " "
}
options(warn=-1)
sig.1 <- sapply(as.numeric(as.character(x$coefficients[,4])),
sig)
options(warn=0)
est.1 <- cbind(format(x$coefficients, digits = digits),
sig.1)
colnames(est.1)[ncol(est.1)] <- ""
print(est.1)
cat("\n")
cat("-------------------------------------------------------------------",
"\n")
cat("Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1",
"\n")
cat("\n")
}
else cat("No coefficients\n")
cat("---\n")
cat("null df: ", x$nulldf, "; null deviance: ", round(x$nulldev,
digits = 2), ";\n", "residuals df: ", x$df, "; residuals deviance: ",
round(x$deviance, digits = 2), ";\n", "# obs.: ", x$n,
"; # non-zero weighted obs.: ", x$ngoodobs, ";\n", "AIC: ",
x$aic, "; log Likelihood: ", x$logLik, ";\n", "RSS: ",
round(x$RSS, digits = 1), "; dispersion: ", x$dispersion,
"; iterations: ", x$iter, ";\n", "rank: ", round(x$rank,
digits = 1), "; max tolerance: ", format(x$tol, scientific = TRUE,
digits = 3), "; convergence: ", x$convergence, ".\n",
sep = "")
invisible(x)
if (x$correlation) {
cat("---\n")
cat("Correlation of Coefficients:\n")
x$correl[upper.tri(x$correl, diag = TRUE)] <- NA
print(x$correl[-1, -nrow(x$correl)], na.print = "", digits = 2)
}
}

根据 42' 建议,我还要添加以下内容:

environment(summary.speedglm) <- environment(speedglm)
environment(print.summary.speedglm) <- environment(speedglm)

最佳答案

print.summary.speedglm 函数有一个小错误。如果您更改此行:

sig.1 <- cbind(sapply(as.numeric(as.character(x$coefficients$"Pr(>|t|)")), sig))

到这一行:

 sig.1 <- cbind(sapply(as.numeric(as.character(x$coefficients$"Pr(>|z|)")), sig))

并且还运行:

environment(print.summary.speedglm) <- environment(speedglm)

您将不会再看到错误消息。

报告错误的正确方法是联系维护者(我会向他发送电子邮件):

maintainer('speedglm')
[1] "Marco Enea <emarco76@libero.it>"

关于R - 使用 speedglm 包中的summary()时出错,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/34887295/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com