pdf("g12.pdf");options(width=64) #setwd("C:\\Users\\kolassa\\Class542") setwd("~/Taught1/960-542/Data")
# Block 1
#******************************************************/
# Competing Risk models                               */
#******************************************************/
#*************************************************************/
# Data from http://lib.stat.cmu.edu/datasets/csb/ch15.dat    */
# on lung cancer.  Fields are id, institution, group, detect-*/
# ion method, cell type, stage, operated, three other fields,*/
# survival in days, result (0=alive, 1=dead from lung cancer,*/
# 2=dead from other causes.                                  */
#*************************************************************/
fle<-read.table("ch15.dat",na.strings=".",
   stringsAsFactors=FALSE)
names(fle)<-c("number","inst","gr","det","ct","st","op","a",
   "b","c","surv","status")

# All-events survival curve
library(survival)#For survfit
fitall<-survfit(Surv(surv,status>0)~1,data=fle)
#Crude rate for Lung Cancer
fle$crudesurv<-fle$surv;  fle$crudesurv[fle$status==2]<-Inf
fitcrude<-survfit(Surv(crudesurv,status==1)~1,data=fle)
#Net rate for Lung Cancer
fitnet<-survfit(Surv(surv,status==1)~1,data=fle)
#Aalen-Johansen Estimator for Lung Cancer
library(PHInfiniteEstimates)#For aalenjohansen, compete.simulation
out<-aalenjohansen(fle$surv,fle$status)
plot(range(fle$surv),c(0,1),type="n", 
   ylab="Survival", xlab="Time",
   main="Comparison of Competing Risk Ideas",
   sub="Lung cancer study")
lines(fitall,conf.int=FALSE,lty=1)
lines(fitcrude,conf.int=FALSE,lty=2)
lines(fitnet,conf.int=FALSE,lty=3)
lines(out$time,out$surv,lty=4,col=2)
legend(#median(fle$surv),1,
   0,.4,
   lty=1:4,col=c(1,1,1,2),legend=c("Aggregate",
  "Lung Cancer Crude","Lung Cancer Net","Aalen-Johansen"))
compete.simulation()
compete.simulation(sig=1)
compete.simulation(sig=0)
#************************************************************/
#  Prostate data                                            */
# From Andrews and Herzberg (1985), Data: A Collection of   */
# Problems from Many Fields for the Student and Research    */
# Worker, Table 46.  Observations represent subjects in a   */
# prostate cohort study, randomized to one of four dose     */
# levels of diethylstilbestrol.  Rx records dose in four    */
# ordered categories, with 1 being placebo.  Disease stage  */
# is 3 or 4.  monfol is months of followup.  surv is 0 if   */
# alive after 50 mo, and codes cause of death (prostate can-*/
# cer, cardiovascular disease, or other) otherwise.         */
# http://lib.stat.cmu.edu/datasets/Andrews/T46.1            */
# The on-line version of the data set adds 3 fields before  */
# the first field in the book.  Variables of interest are   */
# stage, rx, monfol, and surv in fields 5, 6, 10, 11 of the */
# online version, resp. The data file has more columns than */
# we are interested in.  Put place-holding variables in for */
# variables not of interest between variables of interest.  */
# Data were previously published by Byar and Corle (1977    */
# Chronic Disease) and Byar and Green (1980 Bull. Cancer).  */
# Lower value of dichotomized dose begins with blank to    */
# make it alphabetized before high.                         */
#************************************************************/
prostate<-read.table("T46.1")[,1:11]
names(prostate)<-c("tableno","subtableno","lineno","patno",
   "stage","rx","beginmo","beginday","beginyear","monfol",
   "surv" )
prostate<-prostate[,c("stage","rx","monfol","surv")]
prostate$dose<-c(" low","high")[1+(prostate$rx>1)]
stage4<-prostate[prostate$stage==4,]
prostate$crudetime<-prostate$monfol;
 prostate$crudetime[prostate$surv>1]<-Inf
fitcrude<-survfit(Surv(crudetime,surv>0)~1,data=prostate)
fitnet<-survfit(Surv(monfol,surv!=1)~1,data=prostate)
prostate$pcrudetime<-prostate$monfol;
prostate$pcrudetime[prostate$surv>2]<-Inf
fitpcrude<-survfit(Surv(pcrudetime,surv>0)~1,data=prostate)
aj<-aalenjohansen(prostate$monfol,prostate$surv)
plot(c(0,max(prostate$monfol)),c(0,1),
   ylab="Survival", xlab="Time",
   main="Byar Data",type="n")
lines(fitcrude,lty=1,conf.int=FALSE)
lines(fitnet,lty=2,conf.int=FALSE)
lines(fitpcrude,lty=3,conf.int=FALSE,col=2)
lines(aj$time,aj$surv,lty=4)
legend(#max(prostate$monfol)/2,1,
  0,.4,
  lty=1:4,legend=c("Crude","Net","Partial","Aalen-Johansen"))
# Block 2
library(survival)#For survfit
#******************************************************/
# Competing Risk via Multi-Stage Models               */
#******************************************************/
#Replicate competing risk analysis of last class using multi-
#stage models.  Split causes of death into first, all others.
prostate$survf<-factor(pmin(prostate$surv,2))
#Survfit calculates the Aalen-Johansen estimator of the transition
#cdf, rather than the survival curve.
fitcr<-survfit(Surv(monfol,survf)~1,data=prostate)
cat('\n Multi-stage model output for Competing Risk  \n')
fitcr
plot(fitcr,conf.int=FALSE,col=c(1,2),xlab="Months of Followup",
  ylab="Probability",main="Cumulative Incidence for Prostate Data")
legend(0,.4,
#  median(prostate$monfol),.1,
   legend=c("Cause 1","Other Causes"),
   lty=c(1,1,2,2),col=1:2)
# Block 3
#*******************************************************/
#  Multi-Stage Model Fit in More Generality            */
#*******************************************************/
#************************************************************/
# Data from http://lib.stat.cmu.edu/datasets/csb/ch14.dat   */
# Case Studies in Biometry, ch. 14, Interpretation of a     */
# Leukemia Trial Stopped Early, Emerson, et al.  Consider   */
# age, treatment arm (0 for D, 1 for I), remission indicator*/
# and time of remission, transplant indicator and time      */
# of transplant, end of study time and indicator of death,  */
# fab (a measure of disease status, -1=>missing), gender    */
# (1=M,0=F), white blood count (-1=>missing), kscore (a     */
# measure of disease status), and chemotherapy rounds before*/
# remission).                                               */
# See http://lib.stat.cmu.edu/datasets/csb/ch14.txt for     */
# original variable list.                                   */
#************************************************************/
emerson<-read.table("ch14.dat",na.strings=".",
   stringsAsFactors=FALSE,col.names=c("id","startdate","ttt",
   "sex","age","fab","kscore","wbc","x3","x4","b1","b2","d2",
   "crdate","lastdate","statusc","transc","trandate","b6"),
   colClasses=c(startdate="character",crdate="character",
      trandate="character",lastdate="character"))
#Build 0-1 status variable for remission, transplant, and
#eventual death.
emerson$crind<-as.numeric(emerson$b2=="Y")
emerson$transs<-as.numeric(emerson$transc=="Y")
emerson$status<-as.numeric(emerson$statusc=="D")
# Replace missingness indicator by standard R NA.
emerson$fab[emerson$fab==-1]<-NA
emerson$wbc[emerson$wbc==-1]<-NA
# Create categorical variable for wbc, kscore
emerson$wbcl<-1*(emerson$wbc<13.5)
emerson$ksh<- (emerson$kscore>55)+ (emerson$kscore>85)
# Replace event times coded as dates to times
for(j in c("startdate","crdate","lastdate","trandate")) 
   emerson[[j]]<-as.numeric(
      as.Date(as.character(emerson[[j]]),"%m%d%y"))
emerson$crdate[emerson$crind==0]<-emerson$lastdate[
   emerson$crind==0]
# Replace times by elapsed times.
emerson$crtime<-emerson$crdate-emerson$startdate;
emerson$lasttime<-emerson$lastdate-emerson$startdate;
emerson$ttime<-emerson$trandate-emerson$startdate;
#Block from here until "Now for something completely different"
#is identical to that done for time-dependent covariate.
e1<-emerson
e2<-emerson[emerson$transs==1,]
#survival package has a tool tmerge to handle the next bit
#down to but not including rbind.  I don't find it so helpful.
e1$entry<-0
e2$entry<-e2$ttime 
e1$lasttime[e1$transs==1]<-e1$ttime[e1$transs==1]
# Non-transplant status is 0 if patient got transplant or
# final status was 0, and 1 otherwise.
e1$status<-(1-emerson$transs)*emerson$status
ebig<-rbind(e1,e2)
#And now for something completely different,
#code censoring after transplant as 2
ebig$event<-ebig$status
#First stage lines who eventually gets transplant should marked.
ebig$event[(ebig$entry==0)&(ebig$transs==1)]<-2
ebig$event<-factor(ebig$event,0:2,
  labels=c("cens","death","trans"))
msmfit<-survfit(Surv(entry,lasttime,event)~ttt,data=ebig,id=id)
cat('\n General Multi-stage model output: Transplant, Death\n')
print(msmfit)
cat('\n General Multi-stage model transitions  \n')
print(msmfit$transitions)
plot(msmfit,xlab="Days",ylab="Probability",col=c(1,1,2,2),
   lty=c(1,2,1,2),main="Probability of State",
   sub="Emerson transplant data")
legend(0,.9,legend=as.vector(outer(msmfit$states[-1],
   levels(as.factor(emerson$ttt)),"paste")),col=c(1,2,1,2),
   lty=c(1,1,2,2))
# Block 4
#******************************************************/
# Multi-Stage Regression Model Fit in More Generality */
#******************************************************/
regfit<-coxph(Surv(entry,lasttime,event)~ttt,data=ebig,id=id)
cat('\n Regression Model for Multi-State Transitions: Remission  \n')
print(regfit)
#Now fit a model including remission instead of transplant.
e1<-emerson
e2<-emerson[emerson$crind==1,]
#survival package has a tool tmerge to handle the next bit
#down to but not including rbind.  I don't find it so helpful.
e1$entry<-0
e2$entry<-e2$crtime 
e1$lasttime[e1$crind==1]<-e1$crtime[e1$crind==1]
#e1 status is 1 only if the subject had the event and no remisison
e1$status<-(1-e1$crind)*e1$status
remission<-rbind(e1,e2)
#code remission as 2
remission$event<-remission$status
#Anyone in first stage who eventually gets remission should be so marked.
remission$event[(remission$entry==0)&(remission$crind==1)]<-2
remission$event<-factor(remission$event,0:2,
  labels=c("cens","death","remission"))
msmfit<-survfit(Surv(entry,lasttime,event)~ttt,
  data=remission,id=id)
print(msmfit)
cat('\n General Multi-stage model transitions  \n')
print(msmfit$transitions)
plot(msmfit, xlab="Days",ylab="Probability", col=c(1,1,2,2), 
  lty=c(1,2,1,2),main="Transitions for Emerson remission data")
legend(0,.9,legend=as.vector(outer(msmfit$states[-1],
  levels(as.factor(emerson$ttt)),"paste")),
  lty=c(1,1,2,2),col=c(1,2,1,2))
regfit<-coxph(Surv(entry,lasttime,event)~ttt+tt(trandate),
  data=remission,id=id,tt=function(trd) lasttime>trd)
print(regfit)