- html - 出于某种原因,IE8 对我的 Sass 文件中继承的 html5 CSS 不友好?
- JMeter 在响应断言中使用 span 标签的问题
- html - 在 :hover and :active? 上具有不同效果的 CSS 动画
- html - 相对于居中的 html 内容固定的 CSS 重复背景?
我正在尝试使用ggplot2绘制一些Kaplan-Meier曲线,代码位于:https://github.com/kmiddleton/rexamples/blob/master/qplot_survival.R
我在不同的数据库中使用这段出色的代码获得了良好的结果。但是,在这种情况下,它给了我以下错误......就好像我的数据框中有空行:
错误 en if (nrow(layer_data) == 0) return() :参数长度为零。
之前关于此类错误的问题似乎对我来说没有用,因为我的情况下数据和函数的类型并不相同。
我对使用 R 进行统计分析相当陌生,而且我没有编程背景,所以我认为这一定是我的数据中的“愚蠢错误”,但我找不到它在哪里......看起来确实如此ggplot2 无法找到要绘制的行。请问,您能以任何方式帮助我,提供线索、建议等吗?
这是我的数据和按顺序使用的代码,为控制台做好了准备-我在knitr脚本中尝试了它-。最后,我发布了我的 sessionInfo:
library(splines)
library(survival)
library(abind)
library(ggplot2)
library(grid)
我创建了一个名为acbi30的数据框(真实数据):
mort28day <- c(1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1)
daysurv <- c(4,29,24,29,29,29,29,19,29,29,29,3,9,29,15,29,29,11,29,5,13,20,22,29,16,21,9,29,29,15)
levo <- c(0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0)
acbi30 <- data.frame(mort28day, daysurv, levo)
save(acbi30, file="acbi30.rda")
acbi30
然后,我粘贴以下命令以使用 ggplot2 创建函数:
t.Surv <- Surv(acbi30$daysurv, acbi30$mort28day)
t.survfit <- survfit(t.Surv~1, data=acbi30)
#define custom function to create a survival data.frame#
createSurvivalFrame <- function(f.survfit){
#initialise frame variable#
f.frame <- NULL
#check if more then one strata#
if(length(names(f.survfit$strata)) == 0){
#create data.frame with data from survfit#
f.frame <- data.frame(time=f.survfit$time, n.risk=f.survfit$n.risk, n.event=f.survfit$n.event, n.censor = f.survfit
$n.censor, surv=f.survfit$surv, upper=f.survfit$upper, lower=f.survfit$lower)
#create first two rows (start at 1)#
f.start <- data.frame(time=c(0, f.frame$time[1]), n.risk=c(f.survfit$n, f.survfit$n), n.event=c(0,0),
n.censor=c(0,0), surv=c(1,1), upper=c(1,1), lower=c(1,1))
#add first row to dataset#
f.frame <- rbind(f.start, f.frame)
#remove temporary data#
rm(f.start)
}
else {
#create vector for strata identification#
f.strata <- NULL
for(f.i in 1:length(f.survfit$strata)){
#add vector for one strata according to number of rows of strata#
f.strata <- c(f.strata, rep(names(f.survfit$strata)[f.i], f.survfit$strata[f.i]))
}
#create data.frame with data from survfit (create column for strata)#
f.frame <- data.frame(time=f.survfit$time, n.risk=f.survfit$n.risk, n.event=f.survfit$n.event, n.censor = f.survfit
$n.censor, surv=f.survfit$surv, upper=f.survfit$upper, lower=f.survfit$lower, strata=factor(f.strata))
#remove temporary data#
rm(f.strata)
#create first two rows (start at 1) for each strata#
for(f.i in 1:length(f.survfit$strata)){
#take only subset for this strata from data#
f.subset <- subset(f.frame, strata==names(f.survfit$strata)[f.i])
#create first two rows (time: 0, time of first event)#
f.start <- data.frame(time=c(0, f.subset$time[1]), n.risk=rep(f.survfit[f.i]$n, 2), n.event=c(0,0),
n.censor=c(0,0), surv=c(1,1), upper=c(1,1), lower=c(1,1), strata=rep(names(f.survfit$strata)[f.i],
2))
#add first two rows to dataset#
f.frame <- rbind(f.start, f.frame)
#remove temporary data#
rm(f.start, f.subset)
}
#reorder data#
f.frame <- f.frame[order(f.frame$strata, f.frame$time), ]
#rename row.names#
rownames(f.frame) <- NULL
}
#return frame#
return(f.frame)
}
#define custom function to draw kaplan-meier curve with ggplot#
qplot_survival <- function(f.frame, f.CI="default", f.shape=3){
#use different plotting commands dependig whether or not strata's are given#
if("strata" %in% names(f.frame) == FALSE){
#confidence intervals are drawn if not specified otherwise#
if(f.CI=="default" | f.CI==TRUE ){
#create plot with 4 layers (first 3 layers only events, last layer only censored)#
#hint: censoring data for multiple censoring events at timepoint are overplotted#
#(unlike in plot.survfit in survival package)#
ggplot(data=f.frame) + geom_step(aes(x=time, y=surv), direction="hv") + geom_step(aes(x=time,
y=upper), directions="hv", linetype=2) + geom_step(aes(x=time,y=lower), direction="hv", linetype=2) +
geom_point(data=subset(f.frame, n.censor==1), aes(x=time, y=surv), shape=f.shape)
}
else {
#create plot without confidence intervals#
ggplot(data=f.frame) + geom_step(aes(x=time, y=surv), direction="hv") +
geom_point(data=subset(f.frame, n.censor==1), aes(x=time, y=surv), shape=f.shape)
}
}
else {
if(f.CI=="default" | f.CI==FALSE){
#without CI#
ggplot(data=f.frame, aes(group=strata, colour=strata)) + geom_step(aes(x=time, y=surv),
direction="hv") + geom_point(data=subset(f.frame, n.censor==1), aes(x=time, y=surv), shape=f.shape)
}
else {
#with CI (hint: use alpha for CI)#
ggplot(data=f.frame, aes(colour=strata, group=strata)) + geom_step(aes(x=time, y=surv),
direction="hv") + geom_step(aes(x=time, y=upper), directions="hv", linetype=2, alpha=0.5) +
geom_step(aes(x=time,y=lower), direction="hv", linetype=2, alpha=0.5) +
geom_point(data=subset(f.frame, n.censor==1), aes(x=time, y=surv), shape=f.shape)
}
}
}
绘制全局生存曲线(95% CI):
它没有给出任何错误:
# Kaplan-Meier plot, global survival (with CI)
t.survfit <- survfit(t.Surv~1, data=acbi30)
t.survframe <- createSurvivalFrame(t.survfit)
t.survfit
qplot_survival(t.survframe, TRUE, 20)
绘制分层生存曲线:
给出上面提到的错误:
# Kaplan-Meier plot, stratified survival
t.survfit2 <- survfit(t.Surv~levo, data=acbi30)
t.survframe2 <- createSurvivalFrame(t.survfit2)
t.survfit2
qplot_survival(t.survframe2, TRUE, 20)
不使用 ggplot2 绘制结果:
t.survframe2的结构对我来说似乎没问题,没有任何空行,所以这一定是qplot_survival读取我在t.survframe2中的数据的问题。制作一个简单的绘图不会返回任何错误:
t.survframe2
plot(t.survfit2)
我的数据框问题出在哪里?创建的函数可以很好地与其他数据集配合使用,但不适用于这个数据集...
提前谢谢您,
马雷维夫
session 信息:
sessionInfo()
R版本2.15.2 (2012-10-26)平台:i386-w64-mingw32/i386(32位)
locale:
[1] LC_COLLATE=Spanish_Spain.1252 LC_CTYPE=Spanish_Spain.1252
[3] LC_MONETARY=Spanish_Spain.1252 LC_NUMERIC=C
[5] LC_TIME=Spanish_Spain.1252
attached base packages:
[1] grid splines stats graphics grDevices utils datasets
[8] methods base
other attached packages:
[1] ggplot2_0.9.3 abind_1.4-0 survival_2.36-14 knitr_0.8
loaded via a namespace (and not attached):
[1] colorspace_1.1-1 dichromat_1.2-4 digest_0.5.2
[4] evaluate_0.4.2 formatR_0.7 gtable_0.1.2
[7] labeling_0.1 MASS_7.3-22 munsell_0.4
[10] plyr_1.8 proto_0.3-9.2 RColorBrewer_1.0-5
[13] reshape2_1.2.1 scales_0.2.3 stringr_0.6.1
[16] tools_2.15.2
最佳答案
我对您的 qplot_survival()
函数做了一些整容手术。主要问题似乎是 geom_point
的 data =
参数中的子集条件;在 t.survframe
和 t.survframe2
中,n.censor
表格产生值 0、3 和 12。通过更改子集条件到 n.censor > 0
,我设法在所有情况下都得到了一个图。我也没有看到 f.CI = "default"
的意义,因此我将默认值设置为 TRUE 并相应地修改了 if 条件。
qplot_survival <- function(f.frame, f.CI= TRUE, f.shape=3)
{
# use different plotting commands depending whether
# or not strata are given#
if(!("strata" %in% names(f.frame)))
{
#confidence intervals are drawn if not specified otherwise#
if( isTRUE(f.CI) )
{
# create plot with 4 layers (first 3 layers only events,
# last layer only censored)#
# hint: censoring data for multiple censoring events at
# timepoint are overplotted#
# (unlike in plot.survfit in survival package)#
ggplot(data=f.frame) +
geom_step(aes(x=time, y=surv), direction="hv") +
geom_step(aes(x=time, y=upper), direction ="hv", linetype=2) +
geom_step(aes(x=time,y=lower), direction="hv", linetype=2) +
geom_point(data=subset(f.frame, n.censor > 0),
aes(x=time, y=surv), shape=f.shape)
} else {
#create plot without confidence intervals#
ggplot(data=f.frame) +
geom_step(aes(x=time, y=surv), direction="hv") +
geom_point(data=subset(f.frame, n.censor > 0),
aes(x=time, y=surv), shape=f.shape)
}
} else {
if( !(isTRUE(f.CI)) ){
#without CI#
ggplot(data=f.frame, aes(group=strata, colour=strata)) +
geom_step(aes(x=time, y=surv), direction="hv") +
geom_point(data=subset(f.frame, n.censor > 0),
aes(x=time, y=surv), shape=f.shape)
} else {
#with CI (hint: use alpha for CI)#
ggplot(data=f.frame, aes(x = time, colour=strata, group=strata)) +
geom_step(aes(y=surv), direction="hv") +
geom_step(aes(y=upper), direction="hv",
linetype=2, alpha=0.5) +
geom_step(aes(y=lower), direction="hv",
linetype=2, alpha=0.5) +
geom_point(data=subset(f.frame, n.censor > 0),
aes(y=surv), shape=f.shape)
}
}
}
进行这些更改后,以下图对我来说都有效:
qplot_survival(t.survframe2, TRUE, 20)
qplot_survival(t.survframe2, FALSE, 20)
qplot_survival(t.survframe, TRUE, 20)
qplot_survival(t.survframe, FALSE, 20)
一些评论:
geom_point()
层是否真的有必要。geom_step()
调用中使用了 directions = "hv"
。该参数不是复数的,并且已在上面进行了更改。survfit
对象中提取感兴趣的列的一种方法是 t.survfit
,类似于这个:(当分层存在时展开比较)
comps <- c(2:6, 8, 10);
t.fit <- as.data.frame(do.call(cbind, lapply(comps, function(j) t.survfit[[j]])))
names(t.fit) <- names(t.survfit)[comps]
关于r - ggplot2 绘制 Kaplan-Meier 图时在生存数据框中找到空行,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/14160928/
我每天都进行回归分析。就我而言,这通常意味着估计连续和分类预测变量对各种结果的影响。生存分析可能是我执行的最常见的分析。此类分析通常以非常方便的方式出现在期刊中。下面是一个例子: 我想知道是否有人遇到
我是一名优秀的程序员,十分优秀!