This file contains R code for the analyses in Chapter 12 of the book Dynamic Prediction in Clinical Survival Analysis (CRC Chapman & Hall) by Hans C. van Houwelingen and Hein Putter
R code written by Hein Putter (H.Putter@lumc.nl for comments/questions) The dynpred package is available from CRAN
Consistency with the book has been checked with - R version 2.14.0 - survival version 2.36-10 - dynpred version 0.1.1
Code
library(penalized) # version 0.9-37library(Biobase) # see chap11.r for downloadlibrary(dynpred)data(nki)### This chapter continues from chapter 11### If chapter 11 is still in memory there is no need to run the following and### you can continue where it says --- Continue here ---# The van de Vijver data, not part of dynpred, available from the book website,# chapter11, should be placed in the working directoryload("VanDeVijver.Rdata")# Please make sure that the files "cvlassopred.txt" and "cvridgepred.txt",# available from the book website is placed in the working directorycvlasso <-read.table("cvlassopred.txt",header=TRUE,sep="\t")PI.lasso <- cvlasso$PI.lassoSurv.lasso <- cvlasso[,-(1:2)]cvridge <-read.table("cvridgepred.txt",header=TRUE,sep="\t")PI.ridge <- cvridge$PI.ridgeSurv.ridge <- cvridge[,-(1:2)]# Save the follow-up data of VanDeVijver in separate data set,# rename time and status variablesvdv <-pData(VanDeVijver)vdv$time <- vdv$survival.death.vdv$status <- vdv$event_death# Extract n and time points from the Surv.lasso objectn <-nrow(Surv.lasso)tt <-as.numeric(substr(dimnames(Surv.lasso)[[2]],start=6,stop=100))# New cross-validated PI's, append them to vdv datat0 <-5idxt0 <-max(which(tt<=t0))CVPI.lasso <-log(-log(Surv.lasso[,idxt0]))CVPI.lasso <- CVPI.lasso -mean(CVPI.lasso)CVPI.ridge <-log(-log(Surv.ridge[,idxt0]))CVPI.ridge <- CVPI.ridge -mean(CVPI.ridge)vdv$group.lasso <-as.numeric(cut(CVPI.lasso,c(-Inf,quantile(CVPI.lasso)[2:4],Inf)))vdv$group.ridge <-as.numeric(cut(CVPI.ridge,c(-Inf,quantile(CVPI.ridge)[2:4],Inf)))### Cross-validated clinical prognostic indicesCVPI.clin <-rep(NA,n)for (i in1:n) {# leave i out ci <-coxph(Surv(tyears,d) ~ chemotherapy + hormonaltherapy + typesurgery + histolgrade + vasc.invasion + diameter + posnodes + age + mlratio, data=nki[-i,], method="breslow") sfi <-survfit(ci, newdata=nki[i,]) summi <-summary(sfi, times=5) CVPI.clin[i] <-log(-log(summi$surv))}CVPI.clin <- CVPI.clin-mean(CVPI.clin)### --- Continue here ---vdv$CVPI.clin <- CVPI.clinvdv$CVPI.gen <- CVPI.ridge# keep only smaller partvdv <- vdv[,c(1,30:35)]################################################################################################################################################################# Table 12.1: Regression coefficients (standard errors) in the two-stage### Cox regression approach##############################################################################################################################################################tt <-sort(unique(vdv$time[vdv$status==1]))vdv2 <-survSplit(data=vdv, cut=tt, end="time", start="Tstart", event="status")vdv2$lnt <-log(vdv2$time+1)## Clinical cross-validated PI# Time fixed onlyclinfixed <-coxph(Surv(time, status) ~ CVPI.clin, data = vdv, method="breslow")clinfixed# Same model in longer dataclinfixed <-coxph(Surv(Tstart, time, status) ~ CVPI.clin, data = vdv2, method="breslow")clinfixeddata.frame(loglik0=clinfixed$loglik[1],loglik1=clinfixed$loglik[1],chisq=2*diff(clinfixed$loglik))# Time varyingclintime <-coxph(Surv(Tstart, time, status) ~ CVPI.clin + CVPI.clin:lnt, data = vdv2, method="breslow")clintimedata.frame(loglik0=clintime$loglik[1],loglik1=clintime$loglik[1],chisq=2*diff(clintime$loglik))anova(clinfixed,clintime)## Genetic cross-validated PI# Time fixed only, directly in longer datagenfixed <-coxph(Surv(Tstart, time, status) ~ CVPI.gen, data = vdv2, method="breslow")genfixeddata.frame(loglik0=genfixed$loglik[1],loglik1=genfixed$loglik[1],chisq=2*diff(genfixed$loglik))# Time varyinggentime <-coxph(Surv(Tstart, time, status) ~ CVPI.gen + CVPI.gen:lnt, data = vdv2, method="breslow")gentimedata.frame(loglik0=gentime$loglik[1],loglik1=gentime$loglik[1],chisq=2*diff(gentime$loglik))anova(genfixed,gentime)## Super learnerc3 <-coxph(Surv(time,status)~CVPI.clin+CVPI.gen,data=vdv,method="breslow") # from end Chapter 11vdv$CVPI.comb <- c3$coef[1]*vdv$CVPI.clin + c3$coef[2]*vdv$CVPI.genvdv2$CVPI.comb <- c3$coef[1]*vdv2$CVPI.clin + c3$coef[2]*vdv2$CVPI.gen# Time fixed onlycombfixed <-coxph(Surv(Tstart, time, status) ~ CVPI.comb, data = vdv2, method="breslow")combfixeddata.frame(loglik0=combfixed$loglik[1],loglik1=combfixed$loglik[1],chisq=2*diff(combfixed$loglik))# Time varyingcombtime <-coxph(Surv(Tstart, time, status) ~ CVPI.comb + CVPI.comb:lnt, data = vdv2, method="breslow")combtimedata.frame(loglik0=combtime$loglik[1],loglik1=combtime$loglik[1],chisq=2*diff(combtime$loglik))anova(combfixed,combtime)################################################################################################################################################################# Table 12.2: Univariate and multivariate Cox regression on clinical and### genomic cross-validated prognostic indices in different landmark data sets############################################################################################################################################################### Landmark data setsLMs <-seq(0,5,by=1)w <-5for (i in1:length(LMs)) { LM <- LMs[i]cat("\n\nLandmark time point:",LM,"\n\n") LMdata <-cutLM(data=vdv,outcome=list(time="time",status="status"),LM=LM,horizon=LM+w,covs=list(fixed=c("CVPI.clin","CVPI.gen","CVPI.comb"),timedep=NULL)) cox.clin <-coxph(Surv(LM,time,status)~CVPI.clin, data=LMdata, method="breslow")print(data.frame(B=round(cox.clin$coef,3),chisq=round(2*diff(cox.clin$loglik),3))) cox.gen <-coxph(Surv(LM,time,status)~CVPI.gen, data=LMdata, method="breslow")print(data.frame(B=round(cox.gen$coef,3),chisq=round(2*diff(cox.gen$loglik),3))) cox.comb <-coxph(Surv(LM,time,status)~CVPI.comb, data=LMdata, method="breslow")print(data.frame(B=round(cox.comb$coef,3),chisq=round(2*diff(cox.comb$loglik),3)))}################################################################################################################################################################# Table 12.3: Estimated regression coefficients (standard errors) for the### proportional baselines (ipl*) landmark super models without and with### (landmark-dependent) linear landmark interactions############################################################################################################################################################### Stacked landmark data set based on a finer gridLMs <-seq(0,7,by=0.1)w <-5LMdata <-cutLM(data=vdv,outcome=list(time="time",status="status"),LM=0,horizon=w,covs=list(fixed=c("CVPI.clin","CVPI.gen","CVPI.comb"),timedep=NULL))for (i in2:length(LMs)) { LM <- LMs[i] LMdata <-rbind(LMdata,cutLM(data=vdv,outcome=list(time="time",status="status"),LM=LM,horizon=LM+w,covs=list(fixed=c("CVPI.clin","CVPI.gen","CVPI.comb"),timedep=NULL)))}f1 <-function(t) 1f2 <-function(t) (t/7)# Explicitly code interactions of treatment with LMLMdata$clin1 <- LMdata$CVPI.clin*f1(LMdata$LM)LMdata$clin2 <- LMdata$CVPI.clin*f2(LMdata$LM)LMdata$gen1 <- LMdata$CVPI.gen*f1(LMdata$LM)LMdata$gen2 <- LMdata$CVPI.gen*f2(LMdata$LM)LMdata$comb1 <- LMdata$CVPI.comb*f1(LMdata$LM)LMdata$comb2 <- LMdata$CVPI.comb*f2(LMdata$LM)# ipl modeliplstrat <-coxph(Surv(LM,time,status) ~ clin1 + clin2 + gen1 + gen2 +strata(LM) +cluster(ID), data=LMdata, method="breslow")iplstrat# ipl* modelg1 <-function(t) (t/7)g2 <-function(t) (t/7)^2LMdata$LM1 <-g1(LMdata$LM)LMdata$LM2 <-g2(LMdata$LM)iplcov <-coxph(Surv(LM,time,status) ~ clin1 + clin2 + gen1 + gen2 + LM1 + LM2 +cluster(ID), data=LMdata, method="breslow")iplcov# ipl* models, separate and combined, with and without interactionsiplclin1 <-coxph(Surv(LM,time,status) ~ clin1 + LM1 + LM2 +cluster(ID), data=LMdata, method="breslow")iplclin1iplclin2 <-coxph(Surv(LM,time,status) ~ clin1 + clin2 + LM1 + LM2 +cluster(ID), data=LMdata, method="breslow")iplclin2iplgen1 <-coxph(Surv(LM,time,status) ~ gen1 + LM1 + LM2 +cluster(ID), data=LMdata, method="breslow")iplgen1iplgen2 <-coxph(Surv(LM,time,status) ~ gen1 + gen2 + LM1 + LM2 +cluster(ID), data=LMdata, method="breslow")iplgen2iplcomb1 <-coxph(Surv(LM,time,status) ~ comb1 + LM1 + LM2 +cluster(ID), data=LMdata, method="breslow")iplcomb1iplcomb2 <-coxph(Surv(LM,time,status) ~ comb1 + comb2 + LM1 + LM2 +cluster(ID), data=LMdata, method="breslow")iplcomb2iplcov1 <-coxph(Surv(LM,time,status) ~ clin1 + gen1 + LM1 + LM2 +cluster(ID), data=LMdata, method="breslow")iplcov1iplcov2 <-coxph(Surv(LM,time,status) ~ clin1 + clin2 + gen1 + gen2 + LM1 + LM2 +cluster(ID), data=LMdata, method="breslow")iplcov2################################################################################################################################################################# Figure 12.1: Dynamic fixed width predictions for without and with landmark### interactions##############################################################################################################################################################bhclin1 <-basehaz(iplclin1,centered=FALSE)bhclin2 <-basehaz(iplclin2,centered=FALSE)bhgen1 <-basehaz(iplgen1,centered=FALSE)bhgen2 <-basehaz(iplgen2,centered=FALSE)bhcomb1 <-basehaz(iplcomb1,centered=FALSE)bhcomb2 <-basehaz(iplcomb2,centered=FALSE)bhcov1 <-basehaz(iplcov1,centered=FALSE)bhcov2 <-basehaz(iplcov2,centered=FALSE)### Prediction plot, uses 25 and 75% quantilesclin.qs <-quantile(vdv$CVPI.clin,probs=c(0.25,0.75))clin.qsgen.qs <-quantile(vdv$CVPI.gen,probs=c(0.25,0.75))gen.qs# The corresponding quantiles of the super learner in the data# for these four individualsouter(0.495*clin.qs, 0.582*gen.qs, "+")Fn <-ecdf(vdv$CVPI.comb)Fn(outer(0.495*clin.qs, 0.582*gen.qs, "+"))## Dynamic predictions for clinical landmark-fixedsfclin1 <-survfit(iplclin1,newdata=data.frame(clin1=clin.qs,LM1=0,LM2=0),censor=FALSE)sfclin11 <-data.frame(time=sfclin1[1]$time,surv=sfclin1[1]$surv)sfclin11$Haz <--log(sfclin11$surv)sfclin11$haz <-diff(c(0,sfclin11$Haz))sfclin12 <-data.frame(time=sfclin1[2]$time,surv=sfclin1[2]$surv)sfclin12$Haz <--log(sfclin12$surv)sfclin12$haz <-diff(c(0,sfclin12$Haz))tpred <-c(sfclin11$time,sfclin11$time-w)tpred <-c(0,sort(tpred[tpred>0& tpred<=7]))npred <-length(tpred)Fwclin11 <- Fwclin12 <-rep(NA,npred)for (i in1:npred) { tp <- tpred[i] sfi1 <- sfclin11 sfi1$haz <- sfi1$haz*exp(iplclin1$coef["LM1"]*g1(tp)+iplclin1$coef["LM2"]*g2(tp)) sfi1$Haz <-cumsum(sfi1$haz) tmp <-approx(sfclin11$time, sfi1$Haz, c(tp,tp+w), method="constant",yleft=0, yright=max(sfi1$Haz))$y Fwclin11[i] <-1-exp(-diff(tmp)) sfi2 <- sfclin12 sfi2$haz <- sfi2$haz*exp(iplclin1$coef["LM1"]*g1(tp)+iplclin1$coef["LM2"]*g2(tp)) sfi2$Haz <-cumsum(sfi2$haz) tmp <-approx(sfclin12$time, sfi2$Haz, c(tp,tp+w), method="constant",yleft=0, yright=max(sfi2$Haz))$y Fwclin12[i] <-1-exp(-diff(tmp))}## Dynamic predictions for genomic landmark-fixedsfgen1 <-survfit(iplgen1,newdata=data.frame(gen1=gen.qs,LM1=0,LM2=0),censor=FALSE)sfgen11 <-data.frame(time=sfgen1[1]$time,surv=sfgen1[1]$surv)sfgen11$Haz <--log(sfgen11$surv)sfgen11$haz <-diff(c(0,sfgen11$Haz))sfgen12 <-data.frame(time=sfgen1[2]$time,surv=sfgen1[2]$surv)sfgen12$Haz <--log(sfgen12$surv)sfgen12$haz <-diff(c(0,sfgen12$Haz))tpred <-c(sfgen11$time,sfgen11$time-w)tpred <-c(0,sort(tpred[tpred>0& tpred<=7]))npred <-length(tpred)Fwgen11 <- Fwgen12 <-rep(NA,npred)for (i in1:npred) { tp <- tpred[i] sfi1 <- sfgen11 sfi1$haz <- sfi1$haz*exp(iplgen1$coef["LM1"]*g1(tp)+iplgen1$coef["LM2"]*g2(tp)) sfi1$Haz <-cumsum(sfi1$haz) tmp <-approx(sfgen11$time, sfi1$Haz, c(tp,tp+w), method="constant",yleft=0, yright=max(sfi1$Haz))$y Fwgen11[i] <-1-exp(-diff(tmp)) sfi2 <- sfgen12 sfi2$haz <- sfi2$haz*exp(iplgen1$coef["LM1"]*g1(tp)+iplgen1$coef["LM2"]*g2(tp)) sfi2$Haz <-cumsum(sfi2$haz) tmp <-approx(sfgen12$time, sfi2$Haz, c(tp,tp+w), method="constant",yleft=0, yright=max(sfi2$Haz))$y Fwgen12[i] <-1-exp(-diff(tmp))}## Dynamic predictions for super learner landmark-fixed# This will have 4 different predictionssfcomb1 <-survfit(iplcomb1,newdata=data.frame(comb1=as.vector(outer(0.495*clin.qs, 0.582*gen.qs, "+")),LM1=0,LM2=0),censor=FALSE)sfcomb11 <-data.frame(time=sfcomb1[1]$time,surv=sfcomb1[1]$surv)sfcomb11$Haz <--log(sfcomb11$surv)sfcomb11$haz <-diff(c(0,sfcomb11$Haz))sfcomb12 <-data.frame(time=sfcomb1[2]$time,surv=sfcomb1[2]$surv)sfcomb12$Haz <--log(sfcomb12$surv)sfcomb12$haz <-diff(c(0,sfcomb12$Haz))sfcomb13 <-data.frame(time=sfcomb1[3]$time,surv=sfcomb1[3]$surv)sfcomb13$Haz <--log(sfcomb13$surv)sfcomb13$haz <-diff(c(0,sfcomb13$Haz))sfcomb14 <-data.frame(time=sfcomb1[4]$time,surv=sfcomb1[4]$surv)sfcomb14$Haz <--log(sfcomb14$surv)sfcomb14$haz <-diff(c(0,sfcomb14$Haz))tpred <-c(sfcomb11$time,sfcomb11$time-w)tpred <-c(0,sort(tpred[tpred>0& tpred<=7]))npred <-length(tpred)Fwcomb11 <- Fwcomb12 <- Fwcomb13 <- Fwcomb14 <-rep(NA,npred)for (i in1:npred) { tp <- tpred[i] sfi1 <- sfcomb11 sfi1$haz <- sfi1$haz*exp(iplcomb1$coef["LM1"]*g1(tp)+iplcomb1$coef["LM2"]*g2(tp)) sfi1$Haz <-cumsum(sfi1$haz) tmp <-approx(sfcomb11$time, sfi1$Haz, c(tp,tp+w), method="constant",yleft=0, yright=max(sfi1$Haz))$y Fwcomb11[i] <-1-exp(-diff(tmp)) sfi2 <- sfcomb12 sfi2$haz <- sfi2$haz*exp(iplcomb1$coef["LM1"]*g1(tp)+iplcomb1$coef["LM2"]*g2(tp)) sfi2$Haz <-cumsum(sfi2$haz) tmp <-approx(sfcomb12$time, sfi2$Haz, c(tp,tp+w), method="constant",yleft=0, yright=max(sfi2$Haz))$y Fwcomb12[i] <-1-exp(-diff(tmp)) sfi3 <- sfcomb13 sfi3$haz <- sfi3$haz*exp(iplcomb1$coef["LM1"]*g1(tp)+iplcomb1$coef["LM2"]*g2(tp)) sfi3$Haz <-cumsum(sfi3$haz) tmp <-approx(sfcomb13$time, sfi3$Haz, c(tp,tp+w), method="constant",yleft=0, yright=max(sfi3$Haz))$y Fwcomb13[i] <-1-exp(-diff(tmp)) sfi4 <- sfcomb14 sfi4$haz <- sfi4$haz*exp(iplcomb1$coef["LM1"]*g1(tp)+iplcomb1$coef["LM2"]*g2(tp)) sfi4$Haz <-cumsum(sfi4$haz) tmp <-approx(sfcomb14$time, sfi4$Haz, c(tp,tp+w), method="constant",yleft=0, yright=max(sfi4$Haz))$y Fwcomb14[i] <-1-exp(-diff(tmp))}## Dynamic predictions for clinical landmark-dependentsfclin2 <-survfit(iplclin2,newdata=data.frame(clin1=clin.qs,clin2=0,LM1=0,LM2=0),censor=FALSE)sfclin21 <-data.frame(time=sfclin2[1]$time,surv=sfclin2[1]$surv)sfclin21$Haz <--log(sfclin21$surv)sfclin21$haz <-diff(c(0,sfclin21$Haz))sfclin22 <-data.frame(time=sfclin2[2]$time,surv=sfclin2[2]$surv)sfclin22$Haz <--log(sfclin22$surv)sfclin22$haz <-diff(c(0,sfclin22$Haz))tpred <-c(sfclin21$time,sfclin21$time-w)tpred <-c(0,sort(tpred[tpred>0& tpred<=7]))npred <-length(tpred)Fwclin21 <- Fwclin22 <-rep(NA,npred)for (i in1:npred) { tp <- tpred[i] sfi1 <- sfclin21 sfi1$haz <- sfi1$haz*exp(iplclin2$coef["clin2"]*clin.qs[1]*f2(tp)+iplclin2$coef["LM1"]*g1(tp)+iplclin2$coef["LM2"]*g2(tp)) sfi1$Haz <-cumsum(sfi1$haz) tmp <-approx(sfclin21$time, sfi1$Haz, c(tp,tp+w), method="constant",yleft=0, yright=max(sfi1$Haz))$y Fwclin21[i] <-1-exp(-diff(tmp)) sfi2 <- sfclin22 sfi2$haz <- sfi2$haz*exp(iplclin2$coef["clin2"]*clin.qs[2]*f2(tp)+iplclin2$coef["LM1"]*g1(tp)+iplclin2$coef["LM2"]*g2(tp)) sfi2$Haz <-cumsum(sfi2$haz) tmp <-approx(sfclin22$time, sfi2$Haz, c(tp,tp+w), method="constant",yleft=0, yright=max(sfi2$Haz))$y Fwclin22[i] <-1-exp(-diff(tmp))}## Dynamic predictions for genomic landmark-dependentsfgen2 <-survfit(iplgen2,newdata=data.frame(gen1=gen.qs,gen2=0,LM1=0,LM2=0),censor=FALSE)sfgen21 <-data.frame(time=sfgen2[1]$time,surv=sfgen2[1]$surv)sfgen21$Haz <--log(sfgen21$surv)sfgen21$haz <-diff(c(0,sfgen21$Haz))sfgen22 <-data.frame(time=sfgen2[2]$time,surv=sfgen2[2]$surv)sfgen22$Haz <--log(sfgen22$surv)sfgen22$haz <-diff(c(0,sfgen22$Haz))tpred <-c(sfgen21$time,sfgen21$time-w)tpred <-c(0,sort(tpred[tpred>0& tpred<=7]))npred <-length(tpred)Fwgen21 <- Fwgen22 <-rep(NA,npred)for (i in1:npred) { tp <- tpred[i] sfi1 <- sfgen21 sfi1$haz <- sfi1$haz*exp(iplgen2$coef["gen2"]*gen.qs[1]*f2(tp)+iplgen2$coef["LM1"]*g1(tp)+iplgen2$coef["LM2"]*g2(tp)) sfi1$Haz <-cumsum(sfi1$haz) tmp <-approx(sfgen21$time, sfi1$Haz, c(tp,tp+w), method="constant",yleft=0, yright=max(sfi1$Haz))$y Fwgen21[i] <-1-exp(-diff(tmp)) sfi2 <- sfgen22 sfi2$haz <- sfi2$haz*exp(iplgen2$coef["gen2"]*gen.qs[2]*f2(tp)+iplgen2$coef["LM1"]*g1(tp)+iplgen2$coef["LM2"]*g2(tp)) sfi2$Haz <-cumsum(sfi2$haz) tmp <-approx(sfgen22$time, sfi2$Haz, c(tp,tp+w), method="constant",yleft=0, yright=max(sfi2$Haz))$y Fwgen22[i] <-1-exp(-diff(tmp))}## Dynamic predictions for super learner landmark-dependent# This will have 4 different predictionscombs <-as.vector(outer(0.495*clin.qs, 0.582*gen.qs, "+"))sfcomb2 <-survfit(iplcomb2,newdata=data.frame(comb1=combs,comb2=0,LM1=0,LM2=0),censor=FALSE)sfcomb21 <-data.frame(time=sfcomb2[1]$time,surv=sfcomb2[1]$surv)sfcomb21$Haz <--log(sfcomb21$surv)sfcomb21$haz <-diff(c(0,sfcomb21$Haz))sfcomb22 <-data.frame(time=sfcomb2[2]$time,surv=sfcomb2[2]$surv)sfcomb22$Haz <--log(sfcomb22$surv)sfcomb22$haz <-diff(c(0,sfcomb22$Haz))sfcomb23 <-data.frame(time=sfcomb2[3]$time,surv=sfcomb2[3]$surv)sfcomb23$Haz <--log(sfcomb23$surv)sfcomb23$haz <-diff(c(0,sfcomb23$Haz))sfcomb24 <-data.frame(time=sfcomb2[4]$time,surv=sfcomb2[4]$surv)sfcomb24$Haz <--log(sfcomb24$surv)sfcomb24$haz <-diff(c(0,sfcomb24$Haz))tpred <-c(sfcomb21$time,sfcomb21$time-w)tpred <-c(0,sort(tpred[tpred>0& tpred<=7]))npred <-length(tpred)Fwcomb21 <- Fwcomb22 <- Fwcomb23 <- Fwcomb24 <-rep(NA,npred)for (i in1:npred) { tp <- tpred[i] sfi1 <- sfcomb21 sfi1$haz <- sfi1$haz*exp(iplcomb2$coef["comb2"]*combs[1]*f2(tp)+iplcomb2$coef["LM1"]*g1(tp)+iplcomb2$coef["LM2"]*g2(tp)) sfi1$Haz <-cumsum(sfi1$haz) tmp <-approx(sfcomb21$time, sfi1$Haz, c(tp,tp+w), method="constant",yleft=0, yright=max(sfi1$Haz))$y Fwcomb21[i] <-1-exp(-diff(tmp)) sfi2 <- sfcomb22 sfi2$haz <- sfi2$haz*exp(iplcomb2$coef["comb2"]*combs[2]*f2(tp)+iplcomb2$coef["LM1"]*g1(tp)+iplcomb2$coef["LM2"]*g2(tp)) sfi2$Haz <-cumsum(sfi2$haz) tmp <-approx(sfcomb22$time, sfi2$Haz, c(tp,tp+w), method="constant",yleft=0, yright=max(sfi2$Haz))$y Fwcomb22[i] <-1-exp(-diff(tmp)) sfi3 <- sfcomb23 sfi3$haz <- sfi3$haz*exp(iplcomb2$coef["comb2"]*combs[3]*f2(tp)+iplcomb2$coef["LM1"]*g1(tp)+iplcomb2$coef["LM2"]*g2(tp)) sfi3$Haz <-cumsum(sfi3$haz) tmp <-approx(sfcomb23$time, sfi3$Haz, c(tp,tp+w), method="constant",yleft=0, yright=max(sfi3$Haz))$y Fwcomb23[i] <-1-exp(-diff(tmp)) sfi4 <- sfcomb24 sfi4$haz <- sfi4$haz*exp(iplcomb2$coef["comb2"]*combs[4]*f2(tp)+iplcomb2$coef["LM1"]*g1(tp)+iplcomb2$coef["LM2"]*g2(tp)) sfi4$Haz <-cumsum(sfi4$haz) tmp <-approx(sfcomb24$time, sfi4$Haz, c(tp,tp+w), method="constant",yleft=0, yright=max(sfi4$Haz))$y Fwcomb24[i] <-1-exp(-diff(tmp))}plot(tpred,Fwclin11,type="s",lwd=2,ylim=c(0,0.5),xlab="Prediction time (years)",ylab="Probability")lines(tpred,Fwclin12,type="s",lwd=2,lty=2)lines(tpred,Fwclin21,type="s",lwd=2,col="#646060")lines(tpred,Fwclin22,type="s",lwd=2,col="#646060",lty=2)legend("topright",c("High risk, landmark fixed","High risk, landmark dependent","Low risk, landmark fixed","Low risk, landmark dependent"),lwd=2,lty=c(2,2,1,1),col=c(1,"#646060",1,"#646060"),bty="n")title(main="Clinical")plot(tpred,Fwgen11,type="s",lwd=2,ylim=c(0,0.5),xlab="Prediction time (years)",ylab="Probability")lines(tpred,Fwgen12,type="s",lwd=2,lty=2)lines(tpred,Fwgen21,type="s",lwd=2,col="#646060")lines(tpred,Fwgen22,type="s",lwd=2,col="#646060",lty=2)legend("topright",c("High risk, landmark fixed","High risk, landmark dependent","Low risk, landmark fixed","Low risk, landmark dependent"),lwd=2,lty=c(2,2,1,1),col=c(1,"#646060",1,"#646060"),bty="n")title(main="Genomic")plot(tpred,Fwcomb11,type="s",lwd=2,ylim=c(0,0.5),xlab="Prediction time (years)",ylab="Probability")lines(tpred,Fwcomb12,type="s",lwd=2,lty=2)lines(tpred,Fwcomb13,type="s",lwd=2,lty=3)lines(tpred,Fwcomb14,type="s",lwd=2,lty=4)lines(tpred,Fwcomb21,type="s",lwd=2,col="#646060")lines(tpred,Fwcomb22,type="s",lwd=2,col="#646060",lty=2)lines(tpred,Fwcomb23,type="s",lwd=2,col="#646060",lty=3)lines(tpred,Fwcomb24,type="s",lwd=2,col="#646060",lty=4)legend("topright",c("High risk clinical, high risk genomic","High risk clinical, low risk genomic","Low risk clinical, high risk genomic","Low risk clinical, Low risk genomic"),lwd=2,lty=c(4,2,3,1),bty="n")legend("bottomleft",c("Landmark fixed","Landmark dependent"),lwd=2,col=c(1,"#646060"),bty="n")title(main="Super learner")################################################################################################################################################################# Figure 12.2: Kullback-Leibler dynamic (fixed width w = 5) prediction error### curves for the landmark supermodels without and with landmark interactions################################################################################################################################################################# Kullback-Leibler dynamic prediction error curves# Have to call pe for each landmark time point for each of the modelsLMs <-seq(0,7,by=0.025)w <-5pes <-matrix(NA,length(LMs),9)pes[,1] <- LMsfor (j in1:length(LMs)) { LM <- LMs[j]# deb(LM, method="cat")## Make predictions# Use landmark data set for estimating conditional censoring distr LMdataj <-cutLM(data=vdv,outcome=list(time="time",status="status"),LM=LM,horizon=LM+w,covs=list(fixed=c("CVPI.clin","CVPI.gen","CVPI.comb"),timedep=NULL))# Explicitly code interactions of treatment with LM and put in LMdataj LMdataj$clin1 <- LMdataj$CVPI.clin*f1(LMdataj$LM) LMdataj$clin2 <- LMdataj$CVPI.clin*f2(LMdataj$LM) LMdataj$gen1 <- LMdataj$CVPI.gen*f1(LMdataj$LM) LMdataj$gen2 <- LMdataj$CVPI.gen*f2(LMdataj$LM) LMdataj$comb1 <- LMdataj$CVPI.comb*f1(LMdataj$LM) LMdataj$comb2 <- LMdataj$CVPI.comb*f2(LMdataj$LM) LMdataj$LM1 <-g1(LMdataj$LM) LMdataj$LM2 <-g2(LMdataj$LM) ni <-nrow(LMdataj)# deb(ni, method="cat") sfcens <-survfit(Surv(time,status==0)~1,data=LMdataj) tcens <- sfcens$time censmat <-matrix(sfcens$surv,length(sfcens$surv),ni)# clin1 H0 <-diff(evalstep(time=bhclin1$time,stepf=bhclin1$hazard,newtime=c(LM,LM+w),subst=0)) B <- iplclin1$coef HR <-exp(B["clin1"] * LMdataj$clin1 + B["LM1"]*g1(LM) + B["LM2"]*g2(LM)) pes[j,2] <-pe(time=LMdataj$time, status=LMdataj$status,tsurv=LM+w, survmat=matrix(exp(-HR*H0),1,ni),tcens=tcens, censmat=censmat, tout=LM+w-0.00001)$Err# clin2 H0 <-diff(evalstep(time=bhclin2$time,stepf=bhclin2$hazard,newtime=c(LM,LM+w),subst=0)) B <- iplclin2$coef HR <-exp(B["clin1"]*LMdataj$clin1 + B["clin2"]*LMdataj$clin2*f2(LM) + B["LM1"]*g1(LM) + B["LM2"]*g2(LM)) pes[j,3] <-pe(time=LMdataj$time, status=LMdataj$status,tsurv=LM+w, survmat=matrix(exp(-HR*H0),1,ni),tcens=tcens, censmat=censmat, tout=LM+w-0.00001)$Err# gen1 H0 <-diff(evalstep(time=bhgen1$time,stepf=bhgen1$hazard,newtime=c(LM,LM+w),subst=0)) B <- iplgen1$coef HR <-exp(B["gen1"] * LMdataj$gen1 + B["LM1"]*g1(LM) + B["LM2"]*g2(LM)) pes[j,4] <-pe(time=LMdataj$time, status=LMdataj$status,tsurv=LM+w, survmat=matrix(exp(-HR*H0),1,ni),tcens=tcens, censmat=censmat, tout=LM+w-0.00001)$Err# gen2 H0 <-diff(evalstep(time=bhgen2$time,stepf=bhgen2$hazard,newtime=c(LM,LM+w),subst=0)) B <- iplgen2$coef HR <-exp(B["gen1"]*LMdataj$gen1 + B["gen2"]*LMdataj$gen2*f2(LM) + B["LM1"]*g1(LM) + B["LM2"]*g2(LM)) pes[j,5] <-pe(time=LMdataj$time, status=LMdataj$status,tsurv=LM+w, survmat=matrix(exp(-HR*H0),1,ni),tcens=tcens, censmat=censmat, tout=LM+w-0.00001)$Err# comb1 H0 <-diff(evalstep(time=bhcomb1$time,stepf=bhcomb1$hazard,newtime=c(LM,LM+w),subst=0)) B <- iplcomb1$coef HR <-exp(B["comb1"] * LMdataj$comb1 + B["LM1"]*g1(LM) + B["LM2"]*g2(LM)) pes[j,6] <-pe(time=LMdataj$time, status=LMdataj$status,tsurv=LM+w, survmat=matrix(exp(-HR*H0),1,ni),tcens=tcens, censmat=censmat, tout=LM+w-0.00001)$Err# comb2 H0 <-diff(evalstep(time=bhcomb2$time,stepf=bhcomb2$hazard,newtime=c(LM,LM+w),subst=0)) B <- iplcomb2$coef HR <-exp(B["comb1"]*LMdataj$comb1 + B["comb2"]*LMdataj$comb2*f2(LM) + B["LM1"]*g1(LM) + B["LM2"]*g2(LM)) pes[j,7] <-pe(time=LMdataj$time, status=LMdataj$status,tsurv=LM+w, survmat=matrix(exp(-HR*H0),1,ni),tcens=tcens, censmat=censmat, tout=LM+w-0.00001)$Err# cov1 H0 <-diff(evalstep(time=bhcov1$time,stepf=bhcov1$hazard,newtime=c(LM,LM+w),subst=0)) B <- iplcov1$coef HR <-exp(B["clin1"]*LMdataj$clin1 + B["gen1"]*LMdataj$gen1 + B["LM1"]*g1(LM) + B["LM2"]*g2(LM)) pes[j,8] <-pe(time=LMdataj$time, status=LMdataj$status,tsurv=LM+w, survmat=matrix(exp(-HR*H0),1,ni),tcens=tcens, censmat=censmat, tout=LM+w-0.00001)$Err# cov2 H0 <-diff(evalstep(time=bhcov2$time,stepf=bhcov2$hazard,newtime=c(LM,LM+w),subst=0)) B <- iplcov2$coef HR <-exp(B["clin1"]*LMdataj$clin1 + B["clin2"]*LMdataj$clin2*f2(LM) ++ B["gen1"]*LMdataj$gen1 + B["gen2"]*LMdataj$gen2*f2(LM) + B["LM1"]*g1(LM) + B["LM2"]*g2(LM)) pes[j,9] <-pe(time=LMdataj$time, status=LMdataj$status,tsurv=LM+w, survmat=matrix(exp(-HR*H0),1,ni),tcens=tcens, censmat=censmat, tout=LM+w-0.00001)$Err}pes <-as.data.frame(pes)names(pes) <-c("time","clin1","clin2","gen1","gen2","comb1","comb2","cov1","cov2")# The null model is easier, just a call to pewcoxKLw0 <-pewcox(Surv(time,status)~1, Surv(time,status==0)~1, width=w,data=vdv)pes$null <-evalstep(time=KLw0$time,stepf=KLw0$Err,newtime=LMs,subst=0)## Without landmark interactionsplot(pes$time,pes$null,type="s",lwd=2,lty=2,col="#646060",ylim=c(0.3,0.52),xlab="Time (years)",ylab="Prediction error")lines(pes$time,pes$clin1,type="s",lwd=2,lty=2,col=1)lines(pes$time,pes$gen1,type="s",lwd=2,lty=1,col="#646060")lines(pes$time,pes$comb1,type="s",lwd=2,lty=1,col=1)legend("topright",c("Null model","Clinical only","Genomic only","Super learner"),lwd=2,lty=c(2,2,1,1),col=c("#646060",1,"#646060",1),bty="n")## With landmark interactionsplot(pes$time,pes$null,type="s",lwd=2,lty=2,col="#646060",ylim=c(0.3,0.52),xlab="Time (years)",ylab="Prediction error")lines(pes$time,pes$clin2,type="s",lwd=2,lty=2,col=1)lines(pes$time,pes$gen2,type="s",lwd=2,lty=1,col="#646060")lines(pes$time,pes$comb2,type="s",lwd=2,lty=1,col=1)legend("topright",c("Null model","Clinical only","Genomic only","Super learner"),lwd=2,lty=c(2,2,1,1),col=c("#646060",1,"#646060",1),bty="n")################################################################################################################################################################# Figure 12.3: Kullback-Leibler dynamic (fixed width w = 5) prediction error### reduction curves for the landmark supermodels without and with landmark### interactions##############################################################################################################################################################pesr <- pespesr[,2:9] <- (pesr[,10]-pesr[,2:9])/pesr[,10]## Without landmark interactionsplot(pesr$time,pesr$clin1,type="s",lwd=2,lty=2,col=1,ylim=c(0,0.2),xlab="Time (years)",ylab="Prediction error reduction")lines(pesr$time,pesr$gen1,type="s",lwd=2,lty=1,col="#646060")lines(pesr$time,pesr$comb1,type="s",lwd=2,lty=1,col=1)legend("topright",c("Clinical only","Genomic only","Super learner"),lwd=2,lty=c(2,1,1),col=c(1,"#646060",1),bty="n")## With landmark interactionsplot(pesr$time,pesr$clin2,type="s",lwd=2,lty=2,col=1,ylim=c(0,0.2),xlab="Time (years)",ylab="Prediction error reduction")lines(pesr$time,pesr$gen2,type="s",lwd=2,lty=1,col="#646060")lines(pesr$time,pesr$comb2,type="s",lwd=2,lty=1,col=1)legend("topright",c("Clinical only","Genomic only","Super learner"),lwd=2,lty=c(2,1,1),col=c(1,"#646060",1),bty="n")################################################################################################################################################################# Ridge regression for a set of landmark time points, 0, ..., 5############################################################################################################################################################### Note, this is very time-consuming, so instead of running this part, the# second part, now commented out, could be run instead. Please make# sure that the file "cvlassopred.txt", available from the book website# is placed in the working directory### Run ridge regression on landmark data sets, calculate cross-validated### prognostic index for each individual in the landmark data set,### and append that to vdv dataLMs <-seq(0,5,by=1)w <-5vdvplus <- vdv[,1:3]vdvplus <-cbind(vdvplus,t(exprs(VanDeVijver)))for (i in1:length(LMs)) { LM <- LMs[i]cat("\n\nLandmark time point:",LM,"\n\n") LMdata <-cutLM(data=vdvplus,outcome=list(time="time",status="status"),LM=LM,horizon=LM+w,covs=list(fixed=names(vdvplus)[-(1:3)],timedep=NULL))deb(dim(LMdata), method="cat")print(LMdata[1:6,1:8]) optlam2.LM <-optL2(Surv(LM,time,status), LMdata[,4:4922], data=LMdata) lamridg.LM <- optlam2.LM$lambda # optimal lambda ridg.LM <-cvl(Surv(time,status), LMdata[,4:4922], lambda2=lamridg.LM, data=LMdata) Surv.ridge.LM <-as.matrix(ridg.LM$predictions) PI.ridge.LM <-linear.predictors(ridg.LM$ful) tt.LM <-as.numeric(substr(dimnames(Surv.ridge.LM)[[2]],start=1,stop=100)) idxLMw <-max(which(tt.LM<=LM+w)) CVPI.ridge.LM <-log(-log(Surv.ridge.LM[,idxLMw])) CVPI.ridge.LM <- CVPI.ridge.LM -mean(CVPI.ridge.LM) nm <-paste("CVPI.LM",LM,sep="") add <-rep(NA,n) add[match(LMdata$ID,vdv$ID)] <- CVPI.ridge.LM vdv <-cbind(vdv,add)names(vdv)[ncol(vdv)] <- nm}# Write result to tab-delimited text file (uncomment next line)# write.table(vdv,file="vdv.txt",sep="\t",row.names=FALSE,col.names=TRUE)# vdv <- read.table("vdv.txt",header=TRUE,sep="\t")# This did not make it in the book, was replaced by table with correlationspairs(vdv[,c(7,9:14)],pch=20,cex=0.5)################################################################################################################################################################# Table 12.4: Standard deviations and correlations of cross-validated### landmark specific genomic ridge predictors##############################################################################################################################################################round(apply(vdv[,c(7,9:14)],2,sd,na.rm=TRUE),2)round(cor(vdv[,c(7,9:14)],use="pairwise.complete.obs"),3)################################################################################################################################################################# Table 12.5: Comparison of model chi^2 for different approaches,### using the genomic data############################################################################################################################################################### Cox models on the landmark data setsLMs <-seq(0,5,by=1)w <-5for (i in1:length(LMs)) { LM <- LMs[i]cat("\n\nLandmark time point:",LM,"\n\n") LMdata <-cutLM(data=vdv,outcome=list(time="time",status="status"),LM=LM,horizon=LM+w,covs=list(fixed=paste("CVPI",c("gen",paste("LM",0:5,sep="")),sep="."),timedep=NULL))deb(dim(LMdata)) cgen <-coxph(Surv(LM,time,status)~CVPI.gen, data=LMdata, method="breslow")print(data.frame(chisq=round(2*diff(cgen$loglik),3))) form <-as.formula(paste("Surv(LM,time,status)",paste("CVPI.LM",LM,sep=""),sep="~")) cLM <-coxph(form, data=LMdata, method="breslow")print(data.frame(chisq=round(2*diff(cLM$loglik),3)))}# For last landmark data set, use CVPI.LM4cLM <-coxph(Surv(LM,time,status)~CVPI.LM4, data=LMdata, method="breslow")data.frame(chisq=round(2*diff(cLM$loglik),3))################################################################################################################################################################# Table 12.6: Comparison of model chi^2 for different approaches,### using the clinical data################################################################################################################################################################# First calculate the cross-validated prognostic indices for each LM data setLMs <-seq(0,5,by=1)w <-5for (i in1:length(LMs)) { LM <- LMs[i]cat("\n\nLandmark time point:",LM,"\n\n") LMdata <-cutLM(data=nki,outcome=list(time="tyears",status="d"),LM=LM,horizon=LM+w,covs=list(fixed=c("chemotherapy","hormonaltherapy","typesurgery","histolgrade","vasc.invasion","diameter","posnodes","age","mlratio"),timedep=NULL))deb(dim(LMdata))print(LMdata[1:6,1:8]) ni <-nrow(LMdata) CVPI.clin.LM <-rep(NA,ni)for (j in1:ni) {# leave i out ci <-coxph(Surv(tyears,d) ~ chemotherapy + hormonaltherapy + typesurgery + histolgrade + vasc.invasion + diameter + posnodes + age + mlratio, data=LMdata[-j,], method="breslow") sfi <-survfit(ci, newdata=LMdata[j,])# need to evaluate S at LM+w, which is always the last time point (due to administrative censoring),# hence has the lowest survival CVPI.clin.LM[j] <-log(-log(min(sfi$surv))) } CVPI.clin.LM <- CVPI.clin.LM-mean(CVPI.clin.LM) add <-rep(NA,n) nm <-paste("CVPIclin.LM",LM,sep="") add[match(LMdata$patnr,nki$patnr)] <- CVPI.clin.LM vdv <-cbind(vdv,add)names(vdv)[ncol(vdv)] <- nm}### Cox models on the landmark data setsLMs <-seq(0,5,by=1)w <-5for (i in1:length(LMs)) { LM <- LMs[i]cat("\n\nLandmark time point:",LM,"\n\n") LMdata <-cutLM(data=vdv,outcome=list(time="time",status="status"),LM=LM,horizon=LM+w,covs=list(fixed=c("CVPI.clin",paste("CVPIclin",paste("LM",0:5,sep=""),sep=".")),timedep=NULL))deb(dim(LMdata)) cclin <-coxph(Surv(LM,time,status)~CVPI.clin, data=LMdata, method="breslow")print(data.frame(chisq=round(2*diff(cclin$loglik),3))) form <-as.formula(paste("Surv(LM,time,status)",paste("CVPIclin.LM",LM,sep=""),sep="~")) cclinLM <-coxph(form, data=LMdata, method="breslow")print(data.frame(chisq=round(2*diff(cclinLM$loglik),3)))}### Clinical and genomic information combined# Cox models on the landmark data setsLMs <-seq(0,5,by=1)w <-5for (i in1:length(LMs)) { LM <- LMs[i]cat("\n\nLandmark time point:",LM,"\n\n") LMdata <-cutLM(data=vdv,outcome=list(time="time",status="status"),LM=LM,horizon=LM+w,covs=list(fixed=c("CVPI.clin",paste("CVPIclin",paste("LM",0:5,sep=""),sep="."),paste("CVPI",c("gen",paste("LM",0:5,sep="")),sep=".")),timedep=NULL))deb(dim(LMdata)) cboth <-coxph(Surv(LM,time,status)~CVPI.clin+CVPI.gen, data=LMdata, method="breslow")print(data.frame(chisq=round(2*diff(cboth$loglik),3))) form <-as.formula(paste("Surv(LM,time,status)",paste("CVPI.clin",paste("CVPI.LM",LM,sep=""),sep="+"),sep="~")) cbothLM <-coxph(form, data=LMdata, method="breslow")print(data.frame(chisq=round(2*diff(cbothLM$loglik),3)))}