I am trying to draw some Kaplan-Meier curves using ggplot2 and code found at: https://github.com/kmiddleton/rexamples/blob/master/qplot_survival.R
I had good results with this great code in a different database. However, in this case it gives me the following error... as if I had empty rows in my dataframe:
Error en if (nrow(layer_data) == 0) return() : argument is of length zero.
Previous questions about this type of error don't seem to be useful for me, as types of data and functions are not the same in my case.
I am rather new to the statistical analysis using R and I don't have programming background, so I think this must be a 'dumb bug' in my data, but I can't found where it is… It definitely seems that ggplot2 can't find rows to plot. Please, could you help me in any way, with clues, suggestions.. etc?
Here are my data and the code used, sequentially, ready for the console -I tried it in a knitr script-. At the end, I've posted my sessionInfo:
library(splines)
library(survival)
library(abind)
library(ggplot2)
library(grid)
I create a data frame called acbi30 (real data):
mort28day <- c(1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1)
daysurv <- c(4,29,24,29,29,29,29,19,29,29,29,3,9,29,15,29,29,11,29,5,13,20,22,29,16,21,9,29,29,15)
levo <- c(0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0)
acbi30 <- data.frame(mort28day, daysurv, levo)
save(acbi30, file="acbi30.rda")
acbi30
Then, I paste the following commands to create a function using ggplot2:
t.Surv <- Surv(acbi30$daysurv, acbi30$mort28day)
t.survfit <- survfit(t.Surv~1, data=acbi30)
#define custom function to create a survival data.frame#
createSurvivalFrame <- function(f.survfit){
#initialise frame variable#
f.frame <- NULL
#check if more then one strata#
if(length(names(f.survfit$strata)) == 0){
#create data.frame with data from survfit#
f.frame <- data.frame(time=f.survfit$time, n.risk=f.survfit$n.risk, n.event=f.survfit$n.event, n.censor = f.survfit
$n.censor, surv=f.survfit$surv, upper=f.survfit$upper, lower=f.survfit$lower)
#create first two rows (start at 1)#
f.start <- data.frame(time=c(0, f.frame$time[1]), n.risk=c(f.survfit$n, f.survfit$n), n.event=c(0,0),
n.censor=c(0,0), surv=c(1,1), upper=c(1,1), lower=c(1,1))
#add first row to dataset#
f.frame <- rbind(f.start, f.frame)
#remove temporary data#
rm(f.start)
}
else {
#create vector for strata identification#
f.strata <- NULL
for(f.i in 1:length(f.survfit$strata)){
#add vector for one strata according to number of rows of strata#
f.strata <- c(f.strata, rep(names(f.survfit$strata)[f.i], f.survfit$strata[f.i]))
}
#create data.frame with data from survfit (create column for strata)#
f.frame <- data.frame(time=f.survfit$time, n.risk=f.survfit$n.risk, n.event=f.survfit$n.event, n.censor = f.survfit
$n.censor, surv=f.survfit$surv, upper=f.survfit$upper, lower=f.survfit$lower, strata=factor(f.strata))
#remove temporary data#
rm(f.strata)
#create first two rows (start at 1) for each strata#
for(f.i in 1:length(f.survfit$strata)){
#take only subset for this strata from data#
f.subset <- subset(f.frame, strata==names(f.survfit$strata)[f.i])
#create first two rows (time: 0, time of first event)#
f.start <- data.frame(time=c(0, f.subset$time[1]), n.risk=rep(f.survfit[f.i]$n, 2), n.event=c(0,0),
n.censor=c(0,0), surv=c(1,1), upper=c(1,1), lower=c(1,1), strata=rep(names(f.survfit$strata)[f.i],
2))
#add first two rows to dataset#
f.frame <- rbind(f.start, f.frame)
#remove temporary data#
rm(f.start, f.subset)
}
#reorder data#
f.frame <- f.frame[order(f.frame$strata, f.frame$time), ]
#rename row.names#
rownames(f.frame) <- NULL
}
#return frame#
return(f.frame)
}
#define custom function to draw kaplan-meier curve with ggplot#
qplot_survival <- function(f.frame, f.CI="default", f.shape=3){
#use different plotting commands dependig whether or not strata's are given#
if("strata" %in% names(f.frame) == FALSE){
#confidence intervals are drawn if not specified otherwise#
if(f.CI=="default" | f.CI==TRUE ){
#create plot with 4 layers (first 3 layers only events, last layer only censored)#
#hint: censoring data for multiple censoring events at timepoint are overplotted#
#(unlike in plot.survfit in survival package)#
ggplot(data=f.frame) + geom_step(aes(x=time, y=surv), direction="hv") + geom_step(aes(x=time,
y=upper), directions="hv", linetype=2) + geom_step(aes(x=time,y=lower), direction="hv", linetype=2) +
geom_point(data=subset(f.frame, n.censor==1), aes(x=time, y=surv), shape=f.shape)
}
else {
#create plot without confidence intervals#
ggplot(data=f.frame) + geom_step(aes(x=time, y=surv), direction="hv") +
geom_point(data=subset(f.frame, n.censor==1), aes(x=time, y=surv), shape=f.shape)
}
}
else {
if(f.CI=="default" | f.CI==FALSE){
#without CI#
ggplot(data=f.frame, aes(group=strata, colour=strata)) + geom_step(aes(x=time, y=surv),
direction="hv") + geom_point(data=subset(f.frame, n.censor==1), aes(x=time, y=surv), shape=f.shape)
}
else {
#with CI (hint: use alpha for CI)#
ggplot(data=f.frame, aes(colour=strata, group=strata)) + geom_step(aes(x=time, y=surv),
direction="hv") + geom_step(aes(x=time, y=upper), directions="hv", linetype=2, alpha=0.5) +
geom_step(aes(x=time,y=lower), direction="hv", linetype=2, alpha=0.5) +
geom_point(data=subset(f.frame, n.censor==1), aes(x=time, y=surv), shape=f.shape)
}
}
}
Plotting global survival curve (with 95% CI):
It doesn't give any errors:
# Kaplan-Meier plot, global survival (with CI)
t.survfit <- survfit(t.Surv~1, data=acbi30)
t.survframe <- createSurvivalFrame(t.survfit)
t.survfit
qplot_survival(t.survframe, TRUE, 20)
Plotting stratified survival curves:
Gives the error above mentioned:
# Kaplan-Meier plot, stratified survival
t.survfit2 <- survfit(t.Surv~levo, data=acbi30)
t.survframe2 <- createSurvivalFrame(t.survfit2)
t.survfit2
qplot_survival(t.survframe2, TRUE, 20)
Plotting the results without ggplot2:
The structure of t.survframe2 seems OK to me, without any empty rows, so it must be a problem of qplot_survival reading my data in t.survframe2. Making a simple plot doesn't return any error:
t.survframe2
plot(t.survfit2)
Where is the problem with my dataframe? The functions created work well with other datasets, but not with this one...
Thank you in advance,
Mareviv
Session info:
sessionInfo()
R version 2.15.2 (2012-10-26) Platform: i386-w64-mingw32/i386 (32-bit)
locale:
[1] LC_COLLATE=Spanish_Spain.1252 LC_CTYPE=Spanish_Spain.1252
[3] LC_MONETARY=Spanish_Spain.1252 LC_NUMERIC=C
[5] LC_TIME=Spanish_Spain.1252
attached base packages:
[1] grid splines stats graphics grDevices utils datasets
[8] methods base
other attached packages:
[1] ggplot2_0.9.3 abind_1.4-0 survival_2.36-14 knitr_0.8
loaded via a namespace (and not attached):
[1] colorspace_1.1-1 dichromat_1.2-4 digest_0.5.2
[4] evaluate_0.4.2 formatR_0.7 gtable_0.1.2
[7] labeling_0.1 MASS_7.3-22 munsell_0.4
[10] plyr_1.8 proto_0.3-9.2 RColorBrewer_1.0-5
[13] reshape2_1.2.1 scales_0.2.3 stringr_0.6.1
[16] tools_2.15.2
I did a little cosmetic surgery on your qplot_survival() function. The main problem seemed to be your subset condition in the data = argument of geom_point; in both t.survframe and t.survframe2, a table of n.censor yielded values 0, 3 and 12. By changing the subset condition to n.censor > 0, I managed to get a plot in all cases. I also didn't see the point of f.CI = "default", so I set the default to TRUE and modified the if conditions accordingly.
qplot_survival <- function(f.frame, f.CI= TRUE, f.shape=3)
{
# use different plotting commands depending whether
# or not strata are given#
if(!("strata" %in% names(f.frame)))
{
#confidence intervals are drawn if not specified otherwise#
if( isTRUE(f.CI) )
{
# create plot with 4 layers (first 3 layers only events,
# last layer only censored)#
# hint: censoring data for multiple censoring events at
# timepoint are overplotted#
# (unlike in plot.survfit in survival package)#
ggplot(data=f.frame) +
geom_step(aes(x=time, y=surv), direction="hv") +
geom_step(aes(x=time, y=upper), direction ="hv", linetype=2) +
geom_step(aes(x=time,y=lower), direction="hv", linetype=2) +
geom_point(data=subset(f.frame, n.censor > 0),
aes(x=time, y=surv), shape=f.shape)
} else {
#create plot without confidence intervals#
ggplot(data=f.frame) +
geom_step(aes(x=time, y=surv), direction="hv") +
geom_point(data=subset(f.frame, n.censor > 0),
aes(x=time, y=surv), shape=f.shape)
}
} else {
if( !(isTRUE(f.CI)) ){
#without CI#
ggplot(data=f.frame, aes(group=strata, colour=strata)) +
geom_step(aes(x=time, y=surv), direction="hv") +
geom_point(data=subset(f.frame, n.censor > 0),
aes(x=time, y=surv), shape=f.shape)
} else {
#with CI (hint: use alpha for CI)#
ggplot(data=f.frame, aes(x = time, colour=strata, group=strata)) +
geom_step(aes(y=surv), direction="hv") +
geom_step(aes(y=upper), direction="hv",
linetype=2, alpha=0.5) +
geom_step(aes(y=lower), direction="hv",
linetype=2, alpha=0.5) +
geom_point(data=subset(f.frame, n.censor > 0),
aes(y=surv), shape=f.shape)
}
}
}
The following plots all worked for me after making these changes:
qplot_survival(t.survframe2, TRUE, 20)
qplot_survival(t.survframe2, FALSE, 20)
qplot_survival(t.survframe, TRUE, 20)
qplot_survival(t.survframe, FALSE, 20)
A couple of comments:
geom_point() layer is really necessary.directions = "hv" inside a geom_step() call. The argument is not pluralized and has been changed above.survfit object, say t.survfit, is something like this:(Expand comps when strata are present)
comps <- c(2:6, 8, 10);
t.fit <- as.data.frame(do.call(cbind, lapply(comps, function(j) t.survfit[[j]])))
names(t.fit) <- names(t.survfit)[comps]
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With