This file contains R code for the analyses in Chapter 2 of the book Dynamic Prediction in Clinical Survival Analysis (CRC Chapman & Hall) by Hans C. van Houwelingen and Hein Putter
R code written by Hein Putter (H.Putter@lumc.nl for comments/questions) The dynpred package is available from CRAN
Consistency with the book has been checked with - R version 2.14.0 - survival version 2.36-10 - dynpred version 0.1.1
Figure 2.1: Nelson-Aalen estimates of Data Sets 6 and 2
Data set 6
Code
require(dynpred)data(ALL)year <-365.25# Define relapse-free survival as the earliest of relapse and death# Time to relapse (rel) and time to death (srv) are in days# RFS will be in yearsALL$rfs <-pmin(ALL$rel,ALL$srv)/365.25ALL$rfs.s <-pmax(ALL$rel.s,ALL$srv.s)c0 <-coxph(Surv(rfs,rfs.s) ~1, data=ALL, method="breslow")sf0 <-survfit(c0)sf0 <-data.frame(time=c(0,sf0$time),surv=c(1,sf0$surv))sf0$Haz <--log(sf0$surv)sf0 <- sf0[sf0$time<12,]plot(sf0$time,sf0$Haz,type="s",xlab="Time (years)",ylab="Cumulative hazard",lwd=2)
Figure 2.2: Effect of the hazard ratio on the death risk
Code
F1seq <-seq(0,1,by=0.005) # this will be 1-S1F2seq <- F1seq # this will be 1-S2; for HR=1 they are the sameplot(F1seq,F2seq,type="l",lwd=2,xlab="F1",ylab="F2")HRs <-c(1,1.2,1.5,2,3)for (i in1:length(HRs)) { HR <- HRs[i] F2seq <-1- (1-F1seq)^HRlines(F1seq,F2seq,lwd=2,lty=i)}legend("topleft",paste("HR =",HRs),lwd=2,lty=1:5,bty="n")
Figure 2.3: Comparing the effect of HR=1.5 (dotted versus solid) for different types of hazards
Code
tt <-seq(0,10,by=0.05)HR <-1.5# Hazard ratiooldpar <-par(no.readonly=TRUE) # save old graphical parameters# Two plotslayout(matrix(1:2, 1, 2),widths=c(10.25,9))# On the left, Weibull with shape=0.5, scale=5 (according to the# parametrization used in R); this is a decreasing hazardpar(mar=c(5, 4, 4, 0.1) +0.1)sig <-2# The cumulative hazard is obtained using -log(survival)H0 <--pweibull(tt, 1/sig, 5, lower =FALSE, log =TRUE)H1 <- H0 * HR# Survival is obtained using reverse relation exp(-cumulative hazard)plot(tt,exp(-H0),ylim=c(0,1),type="l",lwd=2,xlab="Time",ylab="Survival")lines(tt,exp(-H1),type="l",lwd=2,lty=2)title(main="Decreasing hazard")# On the right, Weibull with shape=2, scale=5 (according to the# parametrization used in R); this is an increasing hazardpar(mar=c(5, 0.1, 4, 1) +0.1)sig <-0.5H0 <--pweibull(tt, 1/sig, 5, lower =FALSE, log =TRUE)H1 <- H0 * HRplot(tt,exp(-H0),ylim=c(0,1),type="l",lwd=2,xlab="Time",ylab="Survival",axes=FALSE)lines(tt,exp(-H1),type="l",lwd=2,lty=2)axis(1)box()title(main="Increasing hazard")par(oldpar) # reset graphical parameters
Example: Breast cancer II
Read in data (and provide labels); both data (“bc.txt” in tab delimited txt format and syntax should be in working directory for this to work
Code
source("bc syntax.R")
Table 2.1: The Cox model for the EORTC breast cancer data (Data Set 5), including all risk factors
Code
cfull <-coxph(Surv(survyrs,survstat) ~ surgery + tusi + nodal + age50 + adjchem + tam + periop, data=bc, method="breslow")cfullXbeta <- bc$Xbeta <- cfull$linear.predictorsmean(bc$Xbeta) # is 0sd(bc$Xbeta)
Figure 2.4: Histogram of the prognostic index PI; in dark-grey patients at risk after eight years, in light-grey patients who died or were lost-to-follow-up within eight years
table(bc$risk8) # bc$risk8=0 means at risk# Those still at riskmean(bc$Xbeta[bc$risk8==0])sd(bc$Xbeta[bc$risk8==0])# Those no longer at riskmean(bc$Xbeta[bc$risk8==1])sd(bc$Xbeta[bc$risk8==1])
Figure 2.5: Predicted survival curves for different values of the prognostic index
Code
# Rerun a Cox model with the prognostic index# This should have an estimated coefficient of exactly 1 (check)cXbeta <-coxph(Surv(survyrs,survstat) ~ Xbeta, data=bc, method="breslow")# The survfit function from the survival library can take a "newdata"# argument, in which the value(s) of the covariate(s) is/are specifiednd <-data.frame(Xbeta=-2*sd(Xbeta))sf <-survfit(cXbeta,newdata=nd)plot(sf,lwd=2,conf.int=FALSE,mark.time=FALSE,lty=2,xlab="Time in years",ylab="Survival function")nd <-data.frame(Xbeta=-1*sd(Xbeta))sf <-survfit(cXbeta,newdata=nd)lines(sf,lwd=2,conf.int=FALSE,mark.time=FALSE,lty=3)nd <-data.frame(Xbeta=0)sf <-survfit(cXbeta,newdata=nd)lines(sf,lwd=2,conf.int=FALSE,mark.time=FALSE,lty=1)nd <-data.frame(Xbeta=sd(Xbeta))sf <-survfit(cXbeta,newdata=nd)lines(sf,lwd=2,conf.int=FALSE,mark.time=FALSE,lty=4)nd <-data.frame(Xbeta=2*sd(Xbeta))sf <-survfit(cXbeta,newdata=nd)lines(sf,lwd=2,conf.int=FALSE,mark.time=FALSE,lty=5)# Add Kaplan-Meier plotc.km <-coxph(Surv(survyrs,survstat) ~1, data=bc, method="breslow")sf <-survfit(c.km)lines(sf,lwd=2,conf.int=FALSE,mark.time=FALSE,col=8)legend("bottomleft",c("Mean-2sd","Mean-sd","Mean","Mean+sd","Mean+2sd","Kaplan-Meier"),lwd=2,col=c(1,1,1,1,1,8),lty=c(2,3,1,4,5,1),bty="n")
Figure 2.6: Dynamic prediction curves for different values of the prognostic index