pdf("g12.pdf");options(width=64) #setwd("C:\\Users\\kolassa\\Class542") setwd("~/Taught1/960-542/Data")
# Block 1 #******************************************************/ # Competing Risk models */ #******************************************************/ #*************************************************************/ # Data from http://lib.stat.cmu.edu/datasets/csb/ch15.dat */ # on lung cancer. Fields are id, institution, group, detect-*/ # ion method, cell type, stage, operated, three other fields,*/ # survival in days, result (0=alive, 1=dead from lung cancer,*/ # 2=dead from other causes. */ #*************************************************************/ fle<-read.table("ch15.dat",na.strings=".", stringsAsFactors=FALSE) names(fle)<-c("number","inst","gr","det","ct","st","op","a", "b","c","surv","status") # All-events survival curve library(survival)#For survfit fitall<-survfit(Surv(surv,status>0)~1,data=fle) #Crude rate for Lung Cancer fle$crudesurv<-fle$surv; fle$crudesurv[fle$status==2]<-Inf fitcrude<-survfit(Surv(crudesurv,status==1)~1,data=fle) #Net rate for Lung Cancer fitnet<-survfit(Surv(surv,status==1)~1,data=fle) #Aalen-Johansen Estimator for Lung Cancer library(PHInfiniteEstimates)#For aalenjohansen, compete.simulation out<-aalenjohansen(fle$surv,fle$status) plot(range(fle$surv),c(0,1),type="n", ylab="Survival", xlab="Time", main="Comparison of Competing Risk Ideas", sub="Lung cancer study") lines(fitall,conf.int=FALSE,lty=1) lines(fitcrude,conf.int=FALSE,lty=2) lines(fitnet,conf.int=FALSE,lty=3) lines(out$time,out$surv,lty=4,col=2) legend(#median(fle$surv),1, 0,.4, lty=1:4,col=c(1,1,1,2),legend=c("Aggregate", "Lung Cancer Crude","Lung Cancer Net","Aalen-Johansen")) compete.simulation() compete.simulation(sig=1) compete.simulation(sig=0) #************************************************************/ # Prostate data */ # From Andrews and Herzberg (1985), Data: A Collection of */ # Problems from Many Fields for the Student and Research */ # Worker, Table 46. Observations represent subjects in a */ # prostate cohort study, randomized to one of four dose */ # levels of diethylstilbestrol. Rx records dose in four */ # ordered categories, with 1 being placebo. Disease stage */ # is 3 or 4. monfol is months of followup. surv is 0 if */ # alive after 50 mo, and codes cause of death (prostate can-*/ # cer, cardiovascular disease, or other) otherwise. */ # http://lib.stat.cmu.edu/datasets/Andrews/T46.1 */ # The on-line version of the data set adds 3 fields before */ # the first field in the book. Variables of interest are */ # stage, rx, monfol, and surv in fields 5, 6, 10, 11 of the */ # online version, resp. The data file has more columns than */ # we are interested in. Put place-holding variables in for */ # variables not of interest between variables of interest. */ # Data were previously published by Byar and Corle (1977 */ # Chronic Disease) and Byar and Green (1980 Bull. Cancer). */ # Lower value of dichotomized dose begins with blank to */ # make it alphabetized before high. */ #************************************************************/ prostate<-read.table("T46.1")[,1:11] names(prostate)<-c("tableno","subtableno","lineno","patno", "stage","rx","beginmo","beginday","beginyear","monfol", "surv" ) prostate<-prostate[,c("stage","rx","monfol","surv")] prostate$dose<-c(" low","high")[1+(prostate$rx>1)] stage4<-prostate[prostate$stage==4,] prostate$crudetime<-prostate$monfol; prostate$crudetime[prostate$surv>1]<-Inf fitcrude<-survfit(Surv(crudetime,surv>0)~1,data=prostate) fitnet<-survfit(Surv(monfol,surv!=1)~1,data=prostate) prostate$pcrudetime<-prostate$monfol; prostate$pcrudetime[prostate$surv>2]<-Inf fitpcrude<-survfit(Surv(pcrudetime,surv>0)~1,data=prostate) aj<-aalenjohansen(prostate$monfol,prostate$surv) plot(c(0,max(prostate$monfol)),c(0,1), ylab="Survival", xlab="Time", main="Byar Data",type="n") lines(fitcrude,lty=1,conf.int=FALSE) lines(fitnet,lty=2,conf.int=FALSE) lines(fitpcrude,lty=3,conf.int=FALSE,col=2) lines(aj$time,aj$surv,lty=4) legend(#max(prostate$monfol)/2,1, 0,.4, lty=1:4,legend=c("Crude","Net","Partial","Aalen-Johansen")) # Block 2 library(survival)#For survfit #******************************************************/ # Competing Risk via Multi-Stage Models */ #******************************************************/ #Replicate competing risk analysis of last class using multi- #stage models. Split causes of death into first, all others. prostate$survf<-factor(pmin(prostate$surv,2)) #Survfit calculates the Aalen-Johansen estimator of the transition #cdf, rather than the survival curve. fitcr<-survfit(Surv(monfol,survf)~1,data=prostate) cat('\n Multi-stage model output for Competing Risk \n') fitcr plot(fitcr,conf.int=FALSE,col=c(1,2),xlab="Months of Followup", ylab="Probability",main="Cumulative Incidence for Prostate Data") legend(0,.4, # median(prostate$monfol),.1, legend=c("Cause 1","Other Causes"), lty=c(1,1,2,2),col=1:2) # Block 3 #*******************************************************/ # Multi-Stage Model Fit in More Generality */ #*******************************************************/ #************************************************************/ # Data from http://lib.stat.cmu.edu/datasets/csb/ch14.dat */ # Case Studies in Biometry, ch. 14, Interpretation of a */ # Leukemia Trial Stopped Early, Emerson, et al. Consider */ # age, treatment arm (0 for D, 1 for I), remission indicator*/ # and time of remission, transplant indicator and time */ # of transplant, end of study time and indicator of death, */ # fab (a measure of disease status, -1=>missing), gender */ # (1=M,0=F), white blood count (-1=>missing), kscore (a */ # measure of disease status), and chemotherapy rounds before*/ # remission). */ # See http://lib.stat.cmu.edu/datasets/csb/ch14.txt for */ # original variable list. */ #************************************************************/ emerson<-read.table("ch14.dat",na.strings=".", stringsAsFactors=FALSE,col.names=c("id","startdate","ttt", "sex","age","fab","kscore","wbc","x3","x4","b1","b2","d2", "crdate","lastdate","statusc","transc","trandate","b6"), colClasses=c(startdate="character",crdate="character", trandate="character",lastdate="character")) #Build 0-1 status variable for remission, transplant, and #eventual death. emerson$crind<-as.numeric(emerson$b2=="Y") emerson$transs<-as.numeric(emerson$transc=="Y") emerson$status<-as.numeric(emerson$statusc=="D") # Replace missingness indicator by standard R NA. emerson$fab[emerson$fab==-1]<-NA emerson$wbc[emerson$wbc==-1]<-NA # Create categorical variable for wbc, kscore emerson$wbcl<-1*(emerson$wbc<13.5) emerson$ksh<- (emerson$kscore>55)+ (emerson$kscore>85) # Replace event times coded as dates to times for(j in c("startdate","crdate","lastdate","trandate")) emerson[[j]]<-as.numeric( as.Date(as.character(emerson[[j]]),"%m%d%y")) emerson$crdate[emerson$crind==0]<-emerson$lastdate[ emerson$crind==0] # Replace times by elapsed times. emerson$crtime<-emerson$crdate-emerson$startdate; emerson$lasttime<-emerson$lastdate-emerson$startdate; emerson$ttime<-emerson$trandate-emerson$startdate; #Block from here until "Now for something completely different" #is identical to that done for time-dependent covariate. e1<-emerson e2<-emerson[emerson$transs==1,] #survival package has a tool tmerge to handle the next bit #down to but not including rbind. I don't find it so helpful. e1$entry<-0 e2$entry<-e2$ttime e1$lasttime[e1$transs==1]<-e1$ttime[e1$transs==1] # Non-transplant status is 0 if patient got transplant or # final status was 0, and 1 otherwise. e1$status<-(1-emerson$transs)*emerson$status ebig<-rbind(e1,e2) #And now for something completely different, #code censoring after transplant as 2 ebig$event<-ebig$status #First stage lines who eventually gets transplant should marked. ebig$event[(ebig$entry==0)&(ebig$transs==1)]<-2 ebig$event<-factor(ebig$event,0:2, labels=c("cens","death","trans")) msmfit<-survfit(Surv(entry,lasttime,event)~ttt,data=ebig,id=id) cat('\n General Multi-stage model output: Transplant, Death\n') print(msmfit) cat('\n General Multi-stage model transitions \n') print(msmfit$transitions) plot(msmfit,xlab="Days",ylab="Probability",col=c(1,1,2,2), lty=c(1,2,1,2),main="Probability of State", sub="Emerson transplant data") legend(0,.9,legend=as.vector(outer(msmfit$states[-1], levels(as.factor(emerson$ttt)),"paste")),col=c(1,2,1,2), lty=c(1,1,2,2)) # Block 4 #******************************************************/ # Multi-Stage Regression Model Fit in More Generality */ #******************************************************/ regfit<-coxph(Surv(entry,lasttime,event)~ttt,data=ebig,id=id) cat('\n Regression Model for Multi-State Transitions: Remission \n') print(regfit) #Now fit a model including remission instead of transplant. e1<-emerson e2<-emerson[emerson$crind==1,] #survival package has a tool tmerge to handle the next bit #down to but not including rbind. I don't find it so helpful. e1$entry<-0 e2$entry<-e2$crtime e1$lasttime[e1$crind==1]<-e1$crtime[e1$crind==1] #e1 status is 1 only if the subject had the event and no remisison e1$status<-(1-e1$crind)*e1$status remission<-rbind(e1,e2) #code remission as 2 remission$event<-remission$status #Anyone in first stage who eventually gets remission should be so marked. remission$event[(remission$entry==0)&(remission$crind==1)]<-2 remission$event<-factor(remission$event,0:2, labels=c("cens","death","remission")) msmfit<-survfit(Surv(entry,lasttime,event)~ttt, data=remission,id=id) print(msmfit) cat('\n General Multi-stage model transitions \n') print(msmfit$transitions) plot(msmfit, xlab="Days",ylab="Probability", col=c(1,1,2,2), lty=c(1,2,1,2),main="Transitions for Emerson remission data") legend(0,.9,legend=as.vector(outer(msmfit$states[-1], levels(as.factor(emerson$ttt)),"paste")), lty=c(1,1,2,2),col=c(1,2,1,2)) regfit<-coxph(Surv(entry,lasttime,event)~ttt+tt(trandate), data=remission,id=id,tt=function(trd) lasttime>trd) print(regfit)