This file contains R code for the analyses in Chapter 6 of the book Dynamic Prediction in Clinical Survival Analysis (CRC Chapman & Hall) by Hans C. van Houwelingen and Hein Putter
R code written by Hein Putter (H.Putter@lumc.nl for comments/questions) The dynpred package is available from CRAN
Consistency with the book has been checked with - R version 2.14.0 - survival version 2.36-10 - dynpred version 0.1.1
Code
################################################################################################################################################################# Cox model with time-varying coefficients############################################################################################################################################################### Ed: because of timefix#coxph <- function(...) survival::coxph(..., control = coxph.control(timefix = FALSE))require(dynpred)data(ova, package="dynpred") # to aviod confusion with ova in package coxvcova$id <-1:nrow(ova)tt <- ova$tyears[ova$d==1]tt <-sort(unique(tt))# Cox model with time-fixed coefficients (see also Chapter 3)cfixed <-coxph(Surv(tyears, d) ~ FIGO + Diam + Broders + Ascites + Karn,data = ova)cfixed# Prepare data for Cox regression analysis with time-varying coefficientsova2 <-survSplit(data=ova, cut=tt, end="tyears", start="Tstart", event="d")# The same Cox model is obtained in the longer data set ova2cfixed <-coxph(Surv(Tstart, tyears, d) ~ FIGO + Diam + Broders + Ascites + Karn,data = ova2)cfixedova2$lnt <-log(ova2$tyears+1)ctime <-coxph(Surv(Tstart, tyears, d) ~ FIGO + Diam + Broders + Ascites + Karn + FIGO:lnt + Diam:lnt + Broders:lnt + Ascites:lnt + Karn:lnt, data = ova2)ctimeanova(cfixed,ctime)loglik.fixed <- cfixed$loglik[2]loglik.timefull <- ctime$loglik[2]################################################################################## Figure 6.2: The time-varying prognostic indices Z(t) for each of the### patients in the ovarian cancer data################################################################################ First centermm <-model.matrix(Surv(tyears, d) ~ FIGO + Diam + Broders + Ascites + Karn,data = ova)[,-1]mm <- mm -matrix(apply(mm,2,mean),nrow(mm),ncol(mm),byrow=TRUE)tseq <-seq(0,6,by=0.05)nseq <-length(tseq)ov <-cbind(ova[,-(3:7)],mm)ov <-as.data.frame(ov)ov2 <-survSplit(data=ov, cut=tt, end="tyears", start="Tstart", event="d")ov2$lnt <-log(ov2$tyears+1)# This gives the same model as ctime abovectime <-coxph(Surv(Tstart, tyears, d) ~ FIGOIV + Diam.1cm + Diam1.2cm + Diam2.5cm + Diam.5cm + Broders2 + Broders3 + Broders4 + Brodersunknown + Ascitespresent + Ascitesunknown + Karn + FIGOIV:lnt + Diam.1cm:lnt + Diam1.2cm:lnt + Diam2.5cm:lnt + Diam.5cm:lnt + Broders2:lnt + Broders3:lnt + Broders4:lnt + Brodersunknown:lnt + Ascitespresent:lnt + Ascitesunknown:lnt + Karn:lnt, data = ov2)ctimezfixed <-as.numeric(matrix(cfixed$coef,1,12) %*%t(mm))z1 <-as.numeric(matrix(ctime$coef[1:12],1,12) %*%t(mm))z2 <-as.numeric(matrix(ctime$coef[13:24],1,12) %*%t(mm))cor(zfixed,z1)z <-outer(z2,log(tseq+1),"*")z <- z +matrix(z1,nrow(z),ncol(z))apply(z,2,sd)apply(z,2,cor,y=z1)oldpar <-par(no.readonly=TRUE) # save graphical parameterslayout(matrix(c(1,2),1,2,byrow=TRUE), widths=c(3,1))par(mar=c(5,5,1,1))plot(tseq,z[1,],type="l",ylim=c(-3,3),xlab="Time in years",ylab="Prognostic index",col=8,lwd=0.5)for (i in2:nrow(z)) lines(tseq,z[i,],type="l",lwd=0.5,col=8)abline(h=0,lty=3)par(mar=c(5,0,1,1))yhist <-hist(zfixed, breaks=seq(-3,3,0.25), plot=FALSE)top <-max(yhist$counts)barplot(yhist$counts, axes=FALSE, xlim=c(0, top), space=0, horiz=TRUE)par(oldpar) # reset graphical parameters# Step-wise procedure# Step 1ctime1 <-coxph(Surv(Tstart, tyears, d) ~ Karn + Broders + FIGO + Ascites + Diam + Karn:lnt, data = ova2)anova(cfixed,ctime1)ctime1 <-coxph(Surv(Tstart, tyears, d) ~ Karn + Broders + FIGO + Ascites + Diam + Broders:lnt, data = ova2)anova(cfixed,ctime1)ctime1 <-coxph(Surv(Tstart, tyears, d) ~ Karn + Broders + FIGO + Ascites + Diam + FIGO:lnt, data = ova2)anova(cfixed,ctime1)ctime1 <-coxph(Surv(Tstart, tyears, d) ~ Karn + Broders + FIGO + Ascites + Diam + Ascites:lnt, data = ova2)anova(cfixed,ctime1)ctime1 <-coxph(Surv(Tstart, tyears, d) ~ Karn + Broders + FIGO + Ascites + Diam + Diam:lnt, data = ova2)anova(cfixed,ctime1)# Karn selectedctime1 <-coxph(Surv(Tstart, tyears, d) ~ Karn + Broders + FIGO + Ascites + Diam + Karn:lnt, data = ova2)loglik.time1 <- ctime1$loglik[2]# Step 2ctime2 <-coxph(Surv(Tstart, tyears, d) ~ Karn + Broders + FIGO + Ascites + Diam + Karn:lnt + Broders:lnt, data = ova2)anova(ctime1,ctime2)ctime2 <-coxph(Surv(Tstart, tyears, d) ~ Karn + Broders + FIGO + Ascites + Diam + Karn:lnt + FIGO:lnt, data = ova2)anova(ctime1,ctime2)ctime2 <-coxph(Surv(Tstart, tyears, d) ~ Karn + Broders + FIGO + Ascites + Diam + Karn:lnt + Ascites:lnt, data = ova2)anova(ctime1,ctime2)ctime2 <-coxph(Surv(Tstart, tyears, d) ~ Karn + Broders + FIGO + Ascites + Diam + Karn:lnt + Diam:lnt, data = ova2)anova(ctime1,ctime2)tseq <-seq(0,6,by=0.05)bseq <-matrix(NA,length(tseq),12)for (j in1:12) { bseq[,j] <- ctime$coef[j] +log(tseq+1)*ctime$coef[j+12]}################################################################################## Figure 6.1: The time-varying coefficients of the model of Table 6.1###############################################################################nms <-c("FIGO IV","Diameter <1 cm","Diameter 1-2 cm","Diameter 2-5 cm","Diameter >5 cm","Broders 2","Broders 3","Broders 4","Broders unknown","Ascites present","Ascites unknown","Karnofsky")vdist <- hdist <-0.2ltys <-c(1,1,2,3,4,1,2,3,4,1,2,2)ylim <-range(bseq)ylim[2] <- ylim[2]+0.2layout(matrix(1:4, 2, 2, byrow=TRUE),widths=c(10,10),heights=c(10,10))# topleft; KARN and FIGO (1 and 6)wh <-c(1,12)par(mar=c(vdist, 4, 3, hdist))plot(tseq,bseq[,1],type="n",ylim=ylim,xlab="",ylab="Regression coefficient",axes=FALSE)for (j in wh) lines(tseq,bseq[,j],type="l",lwd=2,lty=ltys[j])abline(h=0,lwd=1,lty=5)legend("topright",nms[wh],lwd=2,lty=ltys[wh],bty="n")axis(2); axis(3); box()# topright; Broders (6-9)wh <-6:9par(mar=c(vdist, hdist, 3, 4))plot(tseq,bseq[,1],type="n",ylim=ylim,xlab="",ylab="",axes=FALSE)for (j in wh) lines(tseq,bseq[,j],type="l",lwd=2,lty=ltys[j])abline(h=0,lwd=1,lty=5)legend("topright",nms[wh],lwd=2,lty=ltys[wh],bty="n")axis(3); axis(4); box()# bottomleft; Diameter (2-5)wh <-2:5par(mar=c(5, 4, vdist, hdist))plot(tseq,bseq[,1],type="n",ylim=ylim,xlab="Time (years)",ylab="Regression coefficient",axes=FALSE)for (j in wh) lines(tseq,bseq[,j],type="l",lwd=2,lty=ltys[j])abline(h=0,lwd=1,lty=5)legend("topright",nms[wh],lwd=2,lty=ltys[wh],bty="n")axis(1); axis(2); box()# bottomright; Ascites (10-11)wh <-10:11par(mar=c(5, hdist, vdist, 4))plot(tseq,bseq[,1],type="n",ylim=ylim,xlab="Time (years)",ylab="",axes=FALSE)for (j in wh) lines(tseq,bseq[,j],type="l",lwd=2,lty=ltys[j])abline(h=0,lwd=1,lty=5)legend("topright",nms[wh],lwd=2,lty=ltys[wh],bty="n")axis(1); axis(4); box()layout(matrix(1, 1, 1))par(oldpar) # reset graphical parameters################################################################################################################################################################# Dynamic prediction with time-varying effects################################################################################################################################################################################################################################################ Figure 6.3: Model-based failure functions (left) and dynamic fixed width### failure functions with w = 2 (right) for two patients with Karnofsky### scores 7 and 10, other covariates at mean values################################################################################ Recall cfixed and ctimecfixed <-coxph(Surv(Tstart, tyears, d) ~ FIGOIV + Diam.1cm + Diam1.2cm + Diam2.5cm + Diam.5cm + Broders2 + Broders3 + Broders4 + Brodersunknown + Ascitespresent + Ascitesunknown + Karn, data = ov2)cfixedctime <-coxph(Surv(Tstart, tyears, d) ~ FIGOIV + Diam.1cm + Diam1.2cm + Diam2.5cm + Diam.5cm + Broders2 + Broders3 + Broders4 + Brodersunknown + Ascitespresent + Ascitesunknown + Karn + FIGOIV:lnt + Diam.1cm:lnt + Diam1.2cm:lnt + Diam2.5cm:lnt + Diam.5cm:lnt + Broders2:lnt + Broders3:lnt + Broders4:lnt + Brodersunknown:lnt + Ascitespresent:lnt + Ascitesunknown:lnt + Karn:lnt, data = ov2)ctime# Again use ov2, centered version of ova2# Baselinendata <-data.frame(Karn =c(7,10)-mean(ova$Karn),Broders2 =0, Broders3 =0, Broders4 =0, Brodersunknown =0,FIGOIV =0, Ascitespresent =0, Ascitesunknown =0,Diam.1cm =0, Diam1.2cm =0, Diam2.5cm =0, Diam.5cm =0)sf <-survfit(cfixed, newdata=ndata, censor=FALSE)surva <- sf$surv[,1]survb <- sf$surv[,2]# Same ctime, now baseline (Karn=7 and =10 to be done later)ndata <-data.frame(Karn =0,Broders2 =0, Broders3 =0, Broders4 =0, Brodersunknown =0,FIGOIV =0, Ascitespresent =0, Ascitesunknown =0,Diam.1cm =0, Diam1.2cm =0, Diam2.5cm =0, Diam.5cm =0, lnt =0)sf0 <-survfit(ctime, newdata=ndata, censor=FALSE)tt <- sf0$time; nt <-length(tt)# H is now baseline centered at mean of covariatesH0 <-data.frame(time=sf0$time, H0=-log(sf0$surv))H0$h0 <-diff(c(0,H0$H0))f2 <-function(t) log(t+1)mma <- mmb <-rep(0,12)mma[12] <-7-mean(ova$Karn)z1 <-as.numeric(mma %*% ctime$coef[1:12])z2 <-as.numeric(mma %*% ctime$coef[13:24])survtda <-exp(-cumsum(exp(z1 + z2*f2(tt)) * H0$h0))mmb[12] <-10-mean(ova$Karn)z1 <-as.numeric(mmb %*% ctime$coef[1:12])z2 <-as.numeric(mmb %*% ctime$coef[13:24])survtdb <-exp(-cumsum(exp(z1 + z2*f2(tt)) * H0$h0))plot(c(0,tt),1-c(1,survtda),type="s",lwd=2,ylim=c(0,1),xlab="Time in years",ylab="Death probability")lines(c(0,tt),1-c(1,survtdb),type="s",lwd=2,lty=2)lines(c(0,tt),1-c(1,surva),type="s",lwd=2,col=8)lines(c(0,tt),1-c(1,survb),type="s",lwd=2,col=8,lty=2)legend("topleft",c("Karn=7, time-fixed","Karn=10, time-fixed","Karn=7, time-varying","Karn=10, time-varying"),lwd=2,col=c(8,8,1,1),lty=c(1,2,1,2),bty="n")Fwa <-Fwindow(data.frame(time=tt,surv=surva),w=2,variance=FALSE)Fwb <-Fwindow(data.frame(time=tt,surv=survb),w=2,variance=FALSE)Fwtda <-Fwindow(data.frame(time=tt,surv=survtda),w=2,variance=FALSE)Fwtdb <-Fwindow(data.frame(time=tt,surv=survtdb),w=2,variance=FALSE)Fwa <- Fwa[Fwa$time<=5,]Fwb <- Fwb[Fwb$time<=5,]Fwtda <- Fwtda[Fwtda$time<=5,]Fwtdb <- Fwtdb[Fwtdb$time<=5,]plot(Fwtda$time,Fwtda$Fw,type="s",lwd=2,xlim=c(0,5),ylim=c(0,1),xlab="Time in years",ylab="Death within window probability")lines(Fwtdb$time,Fwtdb$Fw,type="s",lwd=2,lty=2)lines(Fwa$time,Fwa$Fw,type="s",lwd=2,col=8)lines(Fwb$time,Fwb$Fw,type="s",lwd=2,col=8,lty=2)legend("topright",c("Karn=7, time-fixed","Karn=10, time-fixed","Karn=7, time-varying","Karn=10, time-varying"),lwd=2,col=c(8,8,1,1),lty=c(1,2,1,2),bty="n")################################################################################## Figure 6.4: Dynamic prediction error curves, with and without cross-validation,### of the time-fixed and the time-varying models################################################################################## Three basic models compared: null model, time-fixed model (Ch 3), and full### time-varying model. Because of time-varying aspects, standard calls of pecox### and pewcox are not possible (a direct call to pewcox with CV=TRUE will### cross-validate based on rows of the data, not on individuals). So instead we### construct (cross-validated) survival matrices ourselves and call pew# Recall cfixed and ctimew <-2mm <-model.matrix(Surv(tyears, d) ~ FIGO + Diam + Broders + Ascites + Karn, data = ova)[,-1]ov <-cbind(ova[,-(3:7)],mm)ov <-as.data.frame(ov)n <-nrow(ov)ov$id <-1:nov2 <-survSplit(data=ov, cut=tt, end="tyears", start="Tstart", event="d")ov2$lnt <-log(ov2$tyears+1)ctimectime <-coxph(Surv(Tstart, tyears, d) ~ FIGOIV + Diam.1cm + Diam1.2cm + Diam2.5cm + Diam.5cm + Broders2 + Broders3 + Broders4 + Brodersunknown + Ascitespresent + Ascitesunknown + Karn + FIGOIV:lnt + Diam.1cm:lnt + Diam1.2cm:lnt + Diam2.5cm:lnt + Diam.5cm:lnt + Broders2:lnt + Broders3:lnt + Broders4:lnt + Brodersunknown:lnt + Ascitespresent:lnt + Ascitesunknown:lnt + Karn:lnt, data = ov2)ctime# Same ctime, now baselinendata <-data.frame(Karn=0,Broders2=0,Broders3=0,Broders4=0,Brodersunknown=0,FIGOIV=0,Ascitespresent=0,Ascitesunknown=0,Diam.1cm=0,Diam1.2cm=0,Diam2.5cm=0,Diam.5cm=0,lnt=0)sf0 <-survfit(ctime,newdata=ndata,censor=FALSE)tt <- sf0$time; nt <-length(tt)H0 <-data.frame(time=sf0$time,H0=-log(sf0$surv))H0$h0 <-diff(c(0,H0$H0))f2 <-function(t) log(t+1)surv <-matrix(NA,nt,n)for (i in1:nrow(ova)) { z1 <-as.numeric(mm[i,] %*% ctime$coef[1:12]) z2 <-as.numeric(mm[i,] %*% ctime$coef[13:24]) surv[,i] <-exp(-cumsum(exp(z1 + z2*f2(tt)) * H0$h0))}## Cross-validated versionCVsurv <-matrix(NA,nt,n)# This is to monitor progressm <-floor(log10(nrow(ova))) +1pre <-rep("\b", 2* m +1)for (i in1:nrow(ova)) {cat(pre, i, "/", nrow(ova), sep =""); flush.console() ov2mini <- ov2[ov2$id != i,] ctmini <-coxph(Surv(Tstart, tyears, d) ~ FIGOIV + Diam.1cm + Diam1.2cm + Diam2.5cm + Diam.5cm + Broders2 + Broders3 + Broders4 + Brodersunknown + Ascitespresent + Ascitesunknown + Karn + FIGOIV:lnt + Diam.1cm:lnt + Diam1.2cm:lnt + Diam2.5cm:lnt + Diam.5cm:lnt + Broders2:lnt + Broders3:lnt + Broders4:lnt + Brodersunknown:lnt + Ascitespresent:lnt + Ascitesunknown:lnt + Karn:lnt,data = ov2mini) sf0mini <-survfit(ctmini,newdata=ndata,censor=FALSE) H0mini <-data.frame(time=tt,H0=-log(evalstep(sf0mini$time,sf0mini$surv,tt,subst=w))) H0mini$h0 <-diff(c(0,H0mini$H0)) z1mini <-as.numeric(mm[i,] %*% ctmini$coef[1:12]) z2mini <-as.numeric(mm[i,] %*% ctmini$coef[13:24]) CVsurv[,i] <-exp(-cumsum(exp(z1mini + z2mini*f2(tt)) * H0mini$h0))}n <-nrow(ova)tt <-sort(unique(ova$tyears[ova$d==1]))ttw <-c(0,tt,tt-w)ttw <- ttw[ttw>=0]ttw <-sort(unique(ttw))ntw <-length(ttw)# Similar matrix containing censoring probabilitiescoxcens <-coxph(Surv(tyears, 1-d) ~1, data=ova)ycens <- coxcens[["y"]]p <-ncol(ycens)tcens <- ycens[,p-1]dcens <- ycens[,p]xcens <- coxcens$linear.predictorscoxcens <-coxph(Surv(tcens, dcens) ~ xcens)sfcens <-survfit(coxcens,newdata=data.frame(xcens=xcens),censor=FALSE)survcens <- sfcens$survtcens <- sfcens$timedynpe1td.KL.ova <-pew(time = ova$tyears, status = ova$d, tsurv = tt,survmat = surv, tcens = tcens, censmat = survcens, width = w, FUN ="KL",tout = ttw)dynpe1tdCV.KL.ova <-pew(time = ova$tyears, status = ova$d, tsurv = tt,survmat = CVsurv, tcens = tcens, censmat = survcens, width = w, FUN ="KL",tout = ttw)dynpe1td.Brier.ova <-pew(time = ova$tyears, status = ova$d, tsurv = tt,survmat = surv, tcens = tcens, censmat = survcens, width = w, FUN ="Brier",tout = ttw)dynpe1tdCV.Brier.ova <-pew(time = ova$tyears, status = ova$d, tsurv = tt,survmat = CVsurv, tcens = tcens, censmat = survcens, width = w, FUN ="Brier",tout = ttw)dynpe1td.KL.ova$Err[is.infinite(dynpe1td.KL.ova$Err)] <-NAdynpe1tdCV.KL.ova$Err[is.infinite(dynpe1tdCV.KL.ova$Err)] <-NAdynpe1td.Brier.ova$Err[is.infinite(dynpe1td.Brier.ova$Err)] <-NAdynpe1tdCV.Brier.ova$Err[is.infinite(dynpe1tdCV.Brier.ova$Err)] <-NA# Need to fix the NA's, use NAfix() from mstaterequire(mstate)dynpe1td.KL.ova$Err <- mstate:::NAfix(dynpe1td.KL.ova$Err)dynpe1tdCV.KL.ova$Err <- mstate:::NAfix(dynpe1tdCV.KL.ova$Err)dynpe1td.Brier.ova$Err <- mstate:::NAfix(dynpe1td.Brier.ova$Err)dynpe1tdCV.Brier.ova$Err <- mstate:::NAfix(dynpe1tdCV.Brier.ova$Err)# These were also calculated in Chapter 3dynpe1.KL <-pewcox(Surv(tyears, d) ~ Karn + Broders + FIGO + Ascites + Diam,Surv(tyears, 1-d) ~1, data = ova, width =2, FUN ="KL")dynpe0.KL <-pewcox(Surv(tyears, d) ~1,Surv(tyears, 1-d) ~1, data = ova, width =2, FUN ="KL")dynpe1.Brier <-pewcox(Surv(tyears, d) ~ Karn + Broders + FIGO + Ascites + Diam,Surv(tyears, 1-d) ~1, data = ova, width =2, FUN ="Brier")dynpe0.Brier <-pewcox(Surv(tyears, d) ~1,Surv(tyears, 1-d) ~1, data = ova, width =2, FUN ="Brier")dynpe.Brier <-data.frame(time=dynpe1.Brier$time, Err0=dynpe0.Brier$Err, Err1=dynpe1.Brier$Err)dynpe.Brier$ErrRed <- (dynpe.Brier$Err0-dynpe.Brier$Err1)/dynpe.Brier$Err0dynpe.KL <-data.frame(time=dynpe1.KL$time, Err0=dynpe0.KL$Err, Err1=dynpe1.KL$Err)dynpe.KL$ErrRed <- (dynpe.KL$Err0-dynpe.KL$Err1)/dynpe.KL$Err0# Show until 5 yearsdynpe1td.KL.ova <-subset(dynpe1td.KL.ova,time<=5)dynpe1tdCV.KL.ova <-subset(dynpe1tdCV.KL.ova,time<=5)dynpe.KL <-subset(dynpe.KL,time<=5)dynpe1td.Brier.ova <-subset(dynpe1td.Brier.ova,time<=5)dynpe1tdCV.Brier.ova <-subset(dynpe1tdCV.Brier.ova,time<=5)dynpe.Brier <-subset(dynpe.Brier,time<=5)# Finally, the actual plotplot(dynpe1td.KL.ova$time,dynpe1td.KL.ova$Err,type="s",xlim=c(0,5),ylim=c(0,0.9),lwd=2,xlab="Time in years",ylab="Prediction error",axes=FALSE)lines(dynpe1tdCV.KL.ova$time,dynpe1tdCV.KL.ova$Err,type="s",lwd=2,col=8)lines(dynpe.KL$time,dynpe.KL$Err0,type="s",lwd=2,lty=2)lines(dynpe.KL$time,dynpe.KL$Err1,type="s",lwd=2,lty=3)lines(dynpe1td.Brier.ova$time,dynpe1td.Brier.ova$Err,type="s",lwd=2)lines(dynpe1tdCV.Brier.ova$time,dynpe1tdCV.Brier.ova$Err,type="s",lwd=2,col=8)lines(dynpe.Brier$time,dynpe.Brier$Err0,type="s",lwd=2,lty=2)lines(dynpe.Brier$time,dynpe.Brier$Err1,type="s",lwd=2,lty=3)axis(1)axis(2,at=seq(0,0.8,by=0.1))box()text(0,0.72,"Kullback-Leibler",adj=0)text(0,0.275,"Breier",adj=0)legend("topright",c("Null model","Time-fixed","Time-varying","Time-varying (CV)"),lwd=2,lty=c(2,3,1,1),col=c(1,1,1,8),bty="n")################################################################################################################################################################# Models inspired by the frailty concept################################################################################################################################################################# Gamma frailty using coxph of the survival package (add + frailty(id) to formula)ova$id <-1:nrow(ova)cfrailty <-coxph(Surv(tyears, d) ~ FIGO + Diam + Broders + Ascites + Karn +frailty(id), data = ova)cfrailtycfrailty <-coxph(Surv(tyears, d) ~ FIGO + Diam + Broders + Ascites + Karn +frailty.gamma(id, eps =1e-05, method="em"), data = ova)cfrailty # sameloglik.frailty <- cfrailty$history[[1]]$c.loglik### Relaxed Burr modelX <-model.matrix(Surv(tyears, d) ~ FIGO + Diam + Broders + Ascites + Karn, data = ova)[,-1]X <-t(t(X) -apply(X,2,mean)) # center covariatesdd <-data.frame(time=ova$tyears,status=ova$d)## Define simple function to fit relaxed Burr model## (returns minus the relaxed Burr partial likelihood)Burrpl <-function(pars,dd,X) {# pars contains first the beta's, then theta# uncomment the deb statements to monitor progress within optim() ord <-order(dd$time,-dd$status) dd <- dd[ord,] X <- X[ord,,drop=FALSE] p1 <-ncol(X) beta <- pars[1:p1]# deb(beta, method="cat") theta <-exp(pars[p1+1]) # only scalar for now# deb(theta, method="cat") Xbeta <-as.numeric(X %*% beta)# not optimized for speed d1 <- dd[dd$status==1,] wh <-which(dd$status==1) nt <-nrow(d1) res <-0for (i in1:nt) { dd$rat <-exp(Xbeta)/(1+theta*d1$time[i]*exp(Xbeta)) dd$rcsrat <-rev(cumsum(rev(dd$rat))) res <- res +log(dd$rat[wh[i]]) -log(dd$rcsrat[wh[i]]) }# deb(res, method="cat")return(-res)}bth <-c(0.4,rep(0,ncol(X)-1),0)opt <-optim(bth, Burrpl, method="BFGS", hessian=TRUE, dd=dd, X=X)# Parametersopt$par# Standard errorsround(sqrt(diag(solve(opt$hessian))),3)# The variance theta and its standard errorth <-exp(opt$par[13])th# Variance of log(theta) is diag(solve(opt$hessian))[13]# By delta method, SE of theta is theta*SE(log(theta))sqrt(diag(solve(opt$hessian))[13])*th################################################################################## Cure models################################################################################ Put file "semicure.s" in your working directory, then read in filesource("semicure.s")# Pure curesc1 <-semicure(Surv(tyears, d) ~1,cureform =~ Karn + Broders + FIGO + Ascites + Diam, data = ova)summary(sc1) # Note that semicure models probability of "uncure"# Cure + Coxsc2 <-semicure(Surv(tyears, d) ~ Karn + Broders + FIGO + Ascites + Diam,cureform =~ Karn + Broders + FIGO + Ascites + Diam, data = ova)summary(sc2)################################################################################################################################################################# Reduced rank models##############################################################################################################################################################require(coxvc)# Package coxvc fits the reduced rank models; careful, it also contains# a data set ova, with small differences (at least in the names of the time# and status variables). Be sure to take the ova data from dynpreddata(ova, package="dynpred")# Somehow, for the functions of coxvc to work, the names of 'time' and 'status'# need to be "time" and "death", and these need to be orderednames(ova)[1:2] <-c("time","death")ova <- ova[order(ova$time,-ova$death),]# Software works only with categorical covariates, so code explicitlymm <-model.matrix(Surv(time, death) ~ FIGO + Diam + Broders + Ascites + Karn, data=ova)[,-1]ova <-cbind(ova,mm)names(ova)[10:13] <-c("Diamless1","Diam1to2","Diam2to5","Diamgreater5")Ft <-cbind(rep(1, nrow(ova)), log(ova$time+1))# This is not in the book, but a rank=2 model with two time functions# (identity and log(t+1)) is of full rank, so this will retain the# time-varying model of Table 6.1fit.r21 <-coxvc(Surv(time, death) ~ FIGOIV + Broders2 + Broders3 + Broders4 + Brodersunknown + Ascitespresent + Ascitesunknown + Diamless1 + Diam1to2 + Diam2to5 + Diamgreater5 + Karn, Ft, rank =2, data=ova)fit.r21# Two-stage proceduremm <-model.matrix(Surv(Tstart, tyears, d) ~ FIGO + Diam + Broders + Ascites + Karn, data=ova2)[,-1]ova2$Xbeta <-as.numeric(mm %*% cfixed$coef)cfixed0 <-coxph(Surv(Tstart, tyears, d) ~ Xbeta, data = ova2)ctime3 <-coxph(Surv(Tstart, tyears, d) ~ Xbeta + Xbeta:lnt, data = ova2)loglik.twostate <- ctime3$loglik[2]anova(cfixed0,ctime3)## Rank 1 model with 1 and log(t+1)Ft <-cbind(rep(1, nrow(ova)), log(ova$time+1))fit.r11 <-coxvc(Surv(time, death) ~ Karn + Broders2 + Broders3 + Broders4 + Brodersunknown + FIGOIV + Ascitespresent + Ascitesunknown + Diamless1 + Diam1to2 + Diam2to5 + Diamgreater5, Ft, rank =1, data=ova)fit.r11# Rescale gamma's so that gamma_1 = 1gam11 <- fit.r11$gama[1]gam21 <- fit.r11$gama[2]gam21 <- gam21/gam11gam21 # -0.488gam11 <-1loglik.r11 <- fit.r11$logL# Not in the book; plotcoxvc plots survival or covariate effects. I got it to work# only if the data are ordered, names of time and status are "time" and "death", and# data is attachedattach(ova)plotcoxvc(fit.r11, fun="survival")plotcoxvc(fit.r11, fun="effects")## Three time functions, rank=1Ft <-cbind(rep(1, nrow(ova)), log(ova$time+1), log(ova$time+1)^2)fit.r12 <-coxvc(Surv(time, death) ~ Karn + Broders2 + Broders3 + Broders4 + Brodersunknown + FIGOIV + Ascitespresent + Ascitesunknown + Diamless1 + Diam1to2 + Diam2to5 + Diamgreater5, Ft, rank =1, data=ova)fit.r12loglik.r12 <- fit.r12$logL# plotcoxvc(fit.r12, fun="survival")# plotcoxvc(fit.r12, fun="effects")## Four time functions, rank=1Ft <-cbind(rep(1, nrow(ova)), log(ova$time+1), log(ova$time+1)^2, log(ova$time+1)^3)fit.r13 <-coxvc(Surv(time, death) ~ Karn + Broders2 + Broders3 + Broders4 + Brodersunknown + FIGOIV + Ascitespresent + Ascitesunknown + Diamless1 + Diam1to2 + Diam2to5 + Diamgreater5, Ft, rank =1, data=ova)fit.r13loglik.r13 <- fit.r13$logL# plotcoxvc(fit.r13, fun="survival")# plotcoxvc(fit.r13, fun="effects")################################################################################## Figure 6.5: Shape of the time variation of the rank=1 model for extended### bases###############################################################################gam12 <- fit.r12$gama[1]gam22 <- fit.r12$gama[2]gam32 <- fit.r12$gama[3]gam22 <- gam22/gam12gam32 <- gam32/gam12gam12 <-1gam13 <- fit.r13$gama[1]gam23 <- fit.r13$gama[2]gam33 <- fit.r13$gama[3]gam43 <- fit.r13$gama[4]gam23 <- gam23/gam13gam33 <- gam33/gam13gam43 <- gam43/gam13gam13 <-1tseq <-seq(0,6,by=0.05)plot(tseq,gam11 + gam21*log(tseq+1),type="l",lwd=2,ylim=c(-0.25,1),xlab="Time (years)",ylab="Regression coefficient")lines(tseq,gam12 + gam22*log(tseq+1) + gam32*log(tseq+1)^2,type="l",lwd=2,lty=2)lines(tseq,gam13 + gam23*log(tseq+1) + gam33*log(tseq+1)^2+ gam43*log(tseq+1)^3,type="l",lwd=2,lty=3)abline(h=0,lty=5,lwd=1)legend("topright",c("m=2","m=3","m=4"),lwd=2,lty=1:3,bty="n")## Rank 1 model with 1 and log(t+1) for single categorical covariateFt <-cbind(rep(1, nrow(ova)), log(ova$time+1))fit.r00 <-coxph(Surv(time, death) ~ Diam, data=ova)fit.r00fit.r11 <-coxvc(Surv(time, death) ~ Diamless1 + Diam1to2 + Diam2to5 + Diamgreater5, Ft, rank =1, data=ova)fit.r11fit.r11$gama[2]/fit.r11$gama[1] # -0.504fit.r00$loglikfit.r11$logL### Rank=2 modelsFt <-cbind(rep(1, nrow(ova)), log(ova$time+1))fit.r2 <-coxvc(Surv(time, death) ~ FIGOIV + Diamless1 + Diam1to2 + Diam2to5 + Diamgreater5 + Broders2 + Broders3 + Broders4 + Brodersunknown + Ascitespresent + Ascitesunknown + Karn, Ft, rank =2, data=ova)loglik.r21 <- fit.r2$logLFt <-cbind(rep(1, nrow(ova)), log(ova$time+1), log(ova$time+1)^2)fit.r2 <-coxvc(Surv(time, death) ~ FIGOIV + Diamless1 + Diam1to2 + Diam2to5 + Diamgreater5 + Broders2 + Broders3 + Broders4 + Brodersunknown + Ascitespresent + Ascitesunknown + Karn, Ft, rank =2, data=ova)loglik.r22 <- fit.r2$logLFt <-cbind(rep(1, nrow(ova)), log(ova$time+1), log(ova$time+1)^2, log(ova$time+1)^3)fit.r2 <-coxvc(Surv(time, death) ~ FIGOIV + Diamless1 + Diam1to2 + Diam2to5 + Diamgreater5 + Broders2 + Broders3 + Broders4 + Brodersunknown + Ascitespresent + Ascitesunknown + Karn, Ft, rank =2, data=ova)loglik.r23 <- fit.r2$logLloglik.r23 - loglik.r13################################################################################## Figure 6.6: Time-dependent regression effects for the rank = 2, m = 4 model################################################################################ Automatic default picture of effects (not shown in the book)plotcoxvc(fit.r2, fun="effects")f1 <-function(t) 1f2 <-function(t) log(t+1)f3 <-function(t) log(t+1)^2f4 <-function(t) log(t+1)^3tseq <-seq(0,6,by=0.05)nseq <-length(tseq)Ftseq <-cbind(rep(1, nseq), f2(tseq), f3(tseq), f4(tseq))bseq <- Ftseq %*%t(fit.r2$theta)nms <-c("FIGO IV","Diameter <1 cm","Diameter 1-2 cm","Diameter 2-5 cm","Diameter >5 cm","Broders 2","Broders 3","Broders 4","Broders unknown","Ascites present","Ascites unknown","Karnofsky")vdist <- hdist <-0.2ltys <-c(1,1,2,3,4,1,2,3,4,1,2,2)ylim <-range(bseq)ylim[2] <- ylim[2]+0.2layout(matrix(1:4, 2, 2, byrow=TRUE),widths=c(10,10),heights=c(10,10))# topleft; KARN and FIGO (1 and 12)wh <-c(1,12)par(mar=c(vdist, 4, 3, hdist))plot(tseq,bseq[,1],type="n",ylim=ylim,xlab="",ylab="Regression coefficient",axes=FALSE)for (j in wh) lines(tseq,bseq[,j],type="l",lwd=2,lty=ltys[j])abline(h=0,lty=5,lwd=1)legend("topright",nms[wh],lwd=2,lty=ltys[wh],bty="n")axis(2); axis(3); box()# topright; Broders (6-9)wh <-6:9par(mar=c(vdist, hdist, 3, 4))plot(tseq,bseq[,1],type="n",ylim=ylim,xlab="",ylab="",axes=FALSE)for (j in wh) lines(tseq,bseq[,j],type="l",lwd=2,lty=ltys[j])abline(h=0,lty=5,lwd=1)legend("topright",nms[wh],lwd=2,lty=ltys[wh],bty="n")axis(3); axis(4); box()# bottomleft; Diameter (2-5)wh <-2:5par(mar=c(5, 4, vdist, hdist))plot(tseq,bseq[,1],type="n",ylim=ylim,xlab="Time (years)",ylab="Regression coefficient",axes=FALSE)for (j in wh) lines(tseq,bseq[,j],type="l",lwd=2,lty=ltys[j])abline(h=0,lty=5,lwd=1)legend("topright",nms[wh],lwd=2,lty=ltys[wh],bty="n")axis(1); axis(2); box()# bottomright; Ascites (10-11)wh <-10:11par(mar=c(5, hdist, vdist, 4))plot(tseq,bseq[,1],type="n",ylim=ylim,xlab="Time (years)",ylab="",axes=FALSE)for (j in wh) lines(tseq,bseq[,j],type="l",lwd=2,lty=ltys[j])abline(h=0,lty=5,lwd=1)legend("topright",nms[wh],lwd=2,lty=ltys[wh],bty="n")axis(1); axis(4); box()par(oldpar) # reset graphical parameters### A summary of all the partial log-likelihoodsloglik.fixed # time-fixed- opt$value # relaxed Burrloglik.twostate # two-stageloglik.r11 # the reduced rank modelsloglik.r12loglik.r13loglik.r21 # the log-likelihood of the ful time-varying model (=loglik.timefull)loglik.r22loglik.r23## Comparisons mentioned in the textloglik.timefull-loglik.fixedloglik.time1-loglik.fixedloglik.r11 - loglik.twostateloglik.r13 - loglik.r11loglik.r23 - loglik.r13