gpt4 book ai didi

r - 使用调查权重时,如何为 logit 模型生成边际效应?

转载 作者:行者123 更新时间:2023-12-04 11:39:56 28 4
gpt4 key购买 nike

我通常使用 mfx 包和 logitmfx 函数生成 logit 模型边际效应。但是,我使用的当前调查具有权重(由于某些人群的过度采样,这对样本中 DV 的比例有很大影响)并且 logitmfx 似乎没有任何方法可以包含权重。

我用 svyglm 拟合模型如下:

library(survey)

survey.design <- svydesign(ids = combined.survey$id,
weights = combined.survey$weight,
data = combined.survey)

vote.pred.1 <- svyglm(formula = turnout ~ gender + age.group +
education + income,
design = survey.design)
summary(vote.pred.1)

如何从这些结果中产生边际效应?

最佳答案

我有同样的问题。下面我修改了 mfx 包中的一个函数,以使用组织为调查对象的数据来计算边际效应。我没有做太多,主要是替换了“mean()”和类似的命令,这些命令旨在用调查包等效项来运行非调查数据。修改后的mfx代码,有运行示例的代码。

背景

Alan Fernihough 关于 mfx 包的详细信息:
https://cran.r-project.org/web/packages/mfx/mfx.pdf

github上mfx包的代码(我修改的文件是probitmfxest.r和probitmfx.r):
https://github.com/cran/mfx/tree/master/R

在 mfx 计算器中,我注释掉了许多内置于原始函数中的灵活性,这些函数处理关于集群和稳健 SE 的不同假设。我可能是错的,但我认为使用调查包中的回归估计命令 svyglm() 已经解决了这些问题。

边际效应计算器

 library(survey)

probitMfxEstSurv <-
function(formula,
design,
atmean = TRUE,
robust = FALSE,
clustervar1 = NULL,
clustervar2 = NULL,
start = NULL
# control = list() # this option is found in the original mfx package
){

if(is.null(formula)){
stop("model formula is missing")
}

for( i in 1:length(class(design))){
if(!((class(design)[i] %in% "survey.design2") | (class(design)[i] %in% "survey.design"))){
stop("design arguement must contain survey object")
}
}

# from Fernihough's original mfx function
# I dont think this is needed because the
# regression computed by the survey package should
# take care of stratification and robust SEs
# from the survey info
#
# # cluster sort part
# if(is.null(clustervar1) & !is.null(clustervar2)){
# stop("use clustervar1 arguement before clustervar2 arguement")
# }
# if(!is.null(clustervar1)){
# if(is.null(clustervar2)){
# if(!(clustervar1 %in% names(data))){
# stop("clustervar1 not in data.frame object")
# }
# data = data.frame(model.frame(formula, data, na.action=NULL),data[,clustervar1])
# names(data)[dim(data)[2]] = clustervar1
# data=na.omit(data)
# }
# if(!is.null(clustervar2)){
# if(!(clustervar1 %in% names(data))){
# stop("clustervar1 not in data.frame object")
# }
# if(!(clustervar2 %in% names(data))){
# stop("clustervar2 not in data.frame object")
# }
# data = data.frame(model.frame(formula, data, na.action=NULL),
# data[,c(clustervar1,clustervar2)])
# names(data)[c(dim(data)[2]-1):dim(data)[2]] = c(clustervar1,clustervar2)
# data=na.omit(data)
# }
# }

# fit the probit regression
fit = svyglm(formula,
design=design,
family = quasibinomial(link = "probit"),
x=T
)
# TS: summary(fit)

# terms needed
x1 = model.matrix(fit)
if (any(alias <- is.na(coef(fit)))) { # this conditional removes any vars with a NA coefficient
x1 <- x1[, !alias, drop = FALSE]
}

xm = as.matrix(svymean(x1,design)) # calculate means of x variables
be = as.matrix(na.omit(coef(fit))) # collect coefficients: be as in beta
k1 = length(na.omit(coef(fit))) # collect number of coefficients or x variables
xb = t(xm) %*% be # get the matrix product of xMean and beta, which is the model prediction at the mean
fxb = ifelse(atmean==TRUE, dnorm(xb), mean(dnorm(x1 %*% be))) # collect either the overall predicted mean, or the average of every observation's predictions

# get variances
vcv = vcov(fit)

# from Fernihough's original mfx function
# I dont think this is needed because the
# regression computed by the survey package should
# take care of stratification and robust SEs
# from the survey info
#
# if(robust){
# if(is.null(clustervar1)){
# # white correction
# vcv = vcovHC(fit, type = "HC0")
# } else {
# if(is.null(clustervar2)){
# vcv = clusterVCV(data=data, fm=fit, cluster1=clustervar1,cluster2=NULL)
# } else {
# vcv = clusterVCV(data=data, fm=fit, cluster1=clustervar1,cluster2=clustervar2)
# }
# }
# }
#
# if(robust==FALSE & is.null(clustervar1)==FALSE){
# if(is.null(clustervar2)){
# vcv = clusterVCV(data=data, fm=fit, cluster1=clustervar1,cluster2=NULL)
# } else {
# vcv = clusterVCV(data=data, fm=fit, cluster1=clustervar1,cluster2=clustervar2)
# }
# }

# set mfx equal to predicted mean (or other value) multiplied by beta
mfx = data.frame(mfx=fxb*be, se=NA)

# get standard errors
if(atmean){# fxb * id matrix - avg model prediction * (beta X xmean)
gr = as.numeric(fxb)*(diag(k1) - as.numeric(xb) *(be %*% t(xm)))
mfx$se = sqrt(diag(gr %*% vcv %*% t(gr)))
} else {
gr = apply(x1, 1, function(x){
as.numeric(as.numeric(dnorm(x %*% be))*(diag(k1) - as.numeric(x %*% be)*(be %*% t(x))))
})
gr = matrix(apply(gr,1,mean),nrow=k1)
mfx$se = sqrt(diag(gr %*% vcv %*% t(gr)))
}

# pick out constant and remove from mfx table
temp1 = apply(x1,2,function(x)length(table(x))==1)
const = names(temp1[temp1==TRUE])
mfx = mfx[row.names(mfx)!=const,]

# pick out discrete change variables
temp1 = apply(x1,2,function(x)length(table(x))==2)
disch = names(temp1[temp1==TRUE])

# calculate the disctrete change marginal effects and standard errors
if(length(disch)!=0){
for(i in 1:length(disch)){
if(atmean){
disx0 = disx1 = xm
disx1[disch[i],] = max(x1[,disch[i]])
disx0[disch[i],] = min(x1[,disch[i]])
# mfx equal to prediction @ x=1 minus prediction @ x=0
mfx[disch[i],1] = pnorm(t(be) %*% disx1) - pnorm(t(be) %*% disx0)
# standard errors
gr = dnorm(t(be) %*% disx1) %*% t(disx1) - dnorm(t(be) %*% disx0) %*% t(disx0)
mfx[disch[i],2] = sqrt(gr %*% vcv %*% t(gr))
} else {
disx0 = disx1 = x1
disx1[,disch[i]] = max(x1[,disch[i]])
disx0[,disch[i]] = min(x1[,disch[i]])
mfx[disch[i],1] = mean(pnorm(disx1 %*% be) - pnorm(disx0 %*% be))
# standard errors
gr = as.numeric(dnorm(disx1 %*% be)) * disx1 - as.numeric(dnorm(disx0 %*% be)) * disx0
avegr = as.matrix(colMeans(gr))
mfx[disch[i],2] = sqrt(t(avegr) %*% vcv %*% avegr)
}
}
}
mfx$discretechgvar = ifelse(rownames(mfx) %in% disch, 1, 0)
output = list(fit=fit, mfx=mfx)
return(output)
}



probitMfxSurv <-
function(formula,
design,
atmean = TRUE,
robust = FALSE,
clustervar1 = NULL,
clustervar2 = NULL,
start = NULL
# control = list() # this option is found in original mfx package
)
{
# res = probitMfxEstSurv(formula, design, atmean, robust, clustervar1, clustervar2, start, control)
res = probitMfxEstSurv(formula, design, atmean, robust, clustervar1, clustervar2, start)

est = NULL
est$mfxest = cbind(dFdx = res$mfx$mfx,
StdErr = res$mfx$se,
z.value = res$mfx$mfx/res$mfx$se,
p.value = 2*pt(-abs(res$mfx$mfx/res$mfx$se), df = Inf))
colnames(est$mfxest) = c("dF/dx","Std. Err.","z","P>|z|")
rownames(est$mfxest) = rownames(res$mfx)

est$fit = res$fit
est$dcvar = rownames(res$mfx[res$mfx$discretechgvar==1,])
est$call = match.call()
class(est) = "probitmfx"
est
}

示例
  # initialize sample data
nObs = 100
x1 = rbinom(nObs,1,.5)
x2 = rbinom(nObs,1,.3)
#x3 = rbinom(100,1,.9)
x3 = runif(nObs,0,.9)

id = 1:nObs
w1 = sample(c(10,50,100),nObs,replace=TRUE)
# dependnt variables
ystar = x1 + x2 - x3 + rnorm(nObs)
y = ifelse(ystar>0,1,0)
# set up data frame
data = data.frame(id, w1, x1, x2, x3, ystar, y)

# initialize survey
survey.design <- svydesign(ids = data$id,
weights = data$w1,
data = data)

mean(data$x2)
sd(data$x2)/(length(data$x2))^0.5
svymean(x=x2,design=survey.design)

probit = svyglm(y~x1 + x2 + x3, design=survey.design, family=quasibinomial(link='probit'))
summary(probit)

probitMfxSurv(formula = y~x1 + x2 + x3, design = survey.design)

关于r - 使用调查权重时,如何为 logit 模型生成边际效应?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/26468360/

28 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com