R version 4.2.2 Patched (2022-11-10 r83330) -- "Innocent and Trusting"
Copyright (C) 2022 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

  Natural language support but running in an English locale

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> pdf("g08.pdf");options(width=64)
> #setwd("C:\\Users\\kolassa\\Class542")
> setwd("~/Taught1/960-542/Data")
> #**************************************************/
> #* Time-Dependent Covariates                      */
> #**************************************************/
> #************************************************************/
> # Data from http://lib.stat.cmu.edu/datasets/csb/ch14.dat   */
> # Case Studies in Biometry, ch. 14, Interpretation of a     */
> # Leukemia Trial Stopped Early, Emerson, et al.  Consider   */
> # age, treatment arm (0 for D, 1 for I), remission indicator*/
> # and time of remission, transplant indicator and time      */
> # of transplant, end of study time and indicator of death,  */
> # fab (a measure of disease status, -1=>missing), gender    */
> # (1=M,0=F), white blood count (-1=>missing), kscore (a     */
> # measure of disease status), and chemotherapy rounds before*/
> # remission).                                               */
> # See http://lib.stat.cmu.edu/datasets/csb/ch14.txt for     */
> # original variable list.                                   */
> #************************************************************/
> emerson<-read.table("ch14.dat",na.strings=".",
+    stringsAsFactors=FALSE,col.names=c("id","startdate","ttt",
+    "sex","age","fab","kscore","wbc","x3","x4","b1","b2","d2",
+    "crdate","lastdate","statusc","transc","trandate","b6"),
+    colClasses=c(startdate="character",crdate="character",
+       trandate="character",lastdate="character"))
> #Build 0-1 status variable for remission, transplant, and
> #eventual death.
> emerson$crind<-as.numeric(emerson$b2=="Y")
> emerson$transs<-as.numeric(emerson$transc=="Y")
> emerson$status<-as.numeric(emerson$statusc=="D")
> # Replace missingness indicator by standard R NA.
> emerson$fab[emerson$fab==-1]<-NA
> emerson$wbc[emerson$wbc==-1]<-NA
> # Create categorical variable for wbc, kscore
> emerson$wbcl<-1*(emerson$wbc<13.5)
> emerson$ksh<- (emerson$kscore>55)+ (emerson$kscore>85)
> # Replace event times coded as dates to times
> for(j in c("startdate","crdate","lastdate","trandate")) 
+    emerson[[j]]<-as.numeric(
+       as.Date(as.character(emerson[[j]]),"%m%d%y"))
> emerson$crdate[emerson$crind==0]<-emerson$lastdate[
+    emerson$crind==0]
> # Replace times by elapsed times.
> emerson$crtime<-emerson$crdate-emerson$startdate;
> emerson$lasttime<-emerson$lastdate-emerson$startdate;
> emerson$ttime<-emerson$trandate-emerson$startdate;
> cat(til1<-"Modeling time to death\n",
+   "Fixed-covariate Cox regression\n")
Modeling time to death
 Fixed-covariate Cox regression
> library(survival)#For coxph
> coxph(Surv(lasttime,status)~ttt,data=emerson)
Call:
coxph(formula = Surv(lasttime, status) ~ ttt, data = emerson)

        coef exp(coef) se(coef)      z      p
tttI -0.4983    0.6076   0.2187 -2.278 0.0227

Likelihood ratio test=5.25  on 1 df, p=0.02195
n= 130, number of events= 87 
> cat('\n Incorrect use of transplant status\n')

 Incorrect use of transplant status
> coxph(Surv(lasttime,status)~ttt+transs,data=emerson)
Call:
coxph(formula = Surv(lasttime, status) ~ ttt + transs, data = emerson)

          coef exp(coef) se(coef)      z      p
tttI   -0.4784    0.6198   0.2185 -2.189 0.0286
transs -1.0015    0.3673   0.4618 -2.169 0.0301

Likelihood ratio test=11.6  on 2 df, p=0.003026
n= 130, number of events= 87 
> cat('\n Modeling time to transplant  \n')

 Modeling time to transplant  
> coxph(Surv(ttime,transs)~ttt,data=emerson)
Call:
coxph(formula = Surv(ttime, transs) ~ ttt, data = emerson)

       coef exp(coef) se(coef)     z     p
tttI 0.4705    1.6008   0.5766 0.816 0.414

Likelihood ratio test=0.68  on 1 df, p=0.4082
n= 14, number of events= 14 
   (116 observations deleted due to missingness)
> #***********************************************/
> # Account for time-dependent transplant status.*/
> #***********************************************/
> #Break patient history into blocks in which transplant
> #status is constant.  Below e1 is the pre-transplant
> #behavior, and e2 is the post-transplant behavior.
> e1<-emerson
> e2<-emerson[emerson$transs==1,]
> e1$transi<-0
> e2$transi<-1
> #survival package has a tool tmerge to handle the next bit
> #down to but not including rbind.  I don't find it so helpful.
> e1$entry<-0
> e2$entry<-e2$ttime 
> e1$lasttime[emerson$transs==1]<-emerson$ttime[emerson$transs==1]
> # Non-transplant status is 0 if patient got transplant or
> # final status was 0, and 1 otherwise.
> e1$status<-(1-emerson$transs)*emerson$status
> emersonbig<-rbind(e1,e2)
> cat(til1,"Adding Transplant as Time-dependent covariate")
Modeling time to death
 Adding Transplant as Time-dependent covariate> coxph(Surv(entry,lasttime,status)~ttt+transi,data=emersonbig,
+   method='breslow')
Call:
coxph(formula = Surv(entry, lasttime, status) ~ ttt + transi, 
    data = emersonbig, method = "breslow")

          coef exp(coef) se(coef)      z      p
tttI   -0.4839    0.6164   0.2186 -2.214 0.0268
transi -0.7853    0.4560   0.4658 -1.686 0.0918

Likelihood ratio test=8.81  on 2 df, p=0.0122
n= 144, number of events= 87 
> #*************************************************************/
> #Add time since remission as a time-dependent covariate, and */
> #then add age as a fixed covariate as well.                  */
> #*************************************************************/
> #Building the counting-process data set here is more complicated
> #than before, since it takes potentially a different value at
> #each time point.
> #Loop through all of the possible event times, starting with
> #the earliest, and build a very big data set with every sub-
> #ject at risk at every time listed.  At the beginning, entry
> #time is 0.  This process is slow; track the time.
> begin<-Sys.time()
> start<-0
> #Before we do start the loop, the very big data set is empty.
> verybig<-NULL
> #Now loop through all the event times.  The current time at
> #each stage in the loop is tt.
> for(tt in sort(unique(emerson$lasttime))){
+ #The individuals at risk are those with times (either event 
+ #or censoring) at least tt.
+   tempdata<-emerson[emerson$lasttime>=tt,]
+ #The minimal data needed to apply the coxph model are entry,
+ #exit, and status.
+   tempdata$entry<-start
+ #Status is zero (no event) for everyone at every time, except
+ #for those having the event at time tt.
+   tempdata$status<-tempdata$status*(tempdata$lasttime==tt)
+   tempdata$lasttime<-tt
+ #Now constsruct some useful covariates: Indicator of whether the
+ #subject is in remission, time since remission (or zero if not
+ #in remission), and current age.
+   tempdata$cri<-(tempdata$crtime cat("Elapsed time",Sys.time()-begin,"\n")
Elapsed time 0.2114804 
> verybig0<-verybig
> #Do this more effeciently, by first calculating how many
> #records will comprise the counting process data set, 
> #setting aside sufficient space, and filling the space in.
> count<-0
> for(tt in sort(unique(emerson$lasttime))){
+    count<-count+sum(emerson$lasttime>=tt)
+ }
> verybig<-as.data.frame(array(NA,c(count,length(emerson)+1)))
> names(verybig)<-c(names(emerson),"entry")
> count<-0
> #At the beginning, entry time is 0.
> start<-0
> #Now loop through all the event times.  The current time at
> #each stage in the loop is tt.
> for(tt in sort(unique(emerson$lasttime))){
+    cat("tt=",tt)
+ #The individuals at risk are those with times (either event 
+ #or censoring) at least tt.
+   use<-emerson$lasttime>=tt
+   biguse<-count+seq(sum(use))
+   verybig[biguse,names(emerson)]<-emerson[use,]
+ #The minimal data needed to apply the coxph model are entry,
+ #exit, and status.
+   verybig$entry[biguse]<-start
+ #Status is zero (no event) for everyone at every time, except
+ #for those having the event at time tt.
+   verybig$status[biguse]<-verybig$status[biguse]*(emerson[use,"lasttime"]==tt)
+   verybig$lasttime[biguse]<-tt
+   start<-tt
+   count<-count+sum(use)
+ }
tt= 3tt= 6tt= 9tt= 15tt= 17tt= 18tt= 30tt= 32tt= 33tt= 44tt= 46tt= 60tt= 67tt= 81tt= 132tt= 136tt= 150tt= 166tt= 179tt= 183tt= 194tt= 202tt= 205tt= 213tt= 216tt= 232tt= 237tt= 238tt= 244tt= 256tt= 260tt= 268tt= 271tt= 272tt= 276tt= 277tt= 279tt= 287tt= 290tt= 297tt= 311tt= 313tt= 331tt= 333tt= 334tt= 336tt= 338tt= 339tt= 350tt= 351tt= 360tt= 361tt= 364tt= 365tt= 370tt= 381tt= 383tt= 391tt= 393tt= 395tt= 396tt= 410tt= 415tt= 416tt= 422tt= 434tt= 439tt= 443tt= 444tt= 448tt= 453tt= 458tt= 463tt= 469tt= 470tt= 473tt= 474tt= 475tt= 477tt= 488tt= 497tt= 502tt= 507tt= 527tt= 528tt= 541tt= 566tt= 582tt= 585tt= 592tt= 597tt= 598tt= 600tt= 605tt= 636tt= 637tt= 642tt= 678tt= 698tt= 721tt= 753tt= 823tt= 831tt= 854tt= 871tt= 876tt= 913tt= 921tt= 982tt= 993tt= 1090tt= 1198tt= 1218tt= 1349tt= 1352tt= 1424tt= 1448tt= 1479tt= 1536tt= 1559tt= 1585tt= 1800tt= 1848> verybig$cri<-(verybig$lasttime>verybig$crtime)*verybig$crind
> verybig$crt<-verybig$cri*(verybig$lasttime-verybig$crtime)
> verybig$fage<-verybig$age+verybig$lasttime/365.25
> #Apply coxph to this big data set.  Ties are set to Breslow,
> #in case you are trying parallel computations in SAS.
> coxph(Surv(entry,lasttime,status)~ttt+cri,data=verybig,
+   method='breslow')
Call:
coxph(formula = Surv(entry, lasttime, status) ~ ttt + cri, data = verybig, 
    method = "breslow")

        coef exp(coef) se(coef)      z        p
tttI -0.3285    0.7200   0.2239 -1.467    0.142
cri  -1.2496    0.2866   0.2494 -5.010 5.44e-07

Likelihood ratio test=27.72  on 2 df, p=9.58e-07
n= 7967, number of events= 87 
> # Results are identical for the large data set constructed
> # more slowly.
> coxph(Surv(entry,lasttime,status)~ttt+cri,data=verybig0,
+   method='breslow')
Call:
coxph(formula = Surv(entry, lasttime, status) ~ ttt + cri, data = verybig0, 
    method = "breslow")

        coef exp(coef) se(coef)      z        p
tttI -0.3285    0.7200   0.2239 -1.467    0.142
cri  -1.2496    0.2866   0.2494 -5.010 5.44e-07

Likelihood ratio test=27.72  on 2 df, p=9.58e-07
n= 7967, number of events= 87 
> # Argument tt seems to be a a function with first  argument 
> # referring to a data frame item, the time variable, and three 
> # dots.  R uses the three dot argument for arguments passed to a
> # function inside of tt.  From this I conjecture that coxph 
> # modifies tt before evaluating it; this modification must 
> # include adding additional commands using the ... .  If you add
> # the tt= argument, you'll get something, but it likely won't be 
> # what you want.
> coxph(Surv(lasttime,status)~ttt+tt(crdate),data=emerson,
+   tt=function(x,t,...) as.numeric(t>x),method='breslow')
Call:
coxph(formula = Surv(lasttime, status) ~ ttt + tt(crdate), data = emerson, 
    tt = function(x, t, ...) as.numeric(t > x), method = "breslow")

              coef exp(coef) se(coef)     z      p
tttI       -0.4986    0.6074   0.2187 -2.28 0.0226
tt(crdate)      NA        NA   0.0000    NA     NA

Likelihood ratio test=5.26  on 1 df, p=0.02185
n= 130, number of events= 87 
> #**********************************/
> #Add age as a fixed time variable.*/
> #**********************************/
> coxph(Surv(entry,lasttime,status)~ttt+age,
+   data=verybig,method='breslow')
Call:
coxph(formula = Surv(entry, lasttime, status) ~ ttt + age, data = verybig, 
    method = "breslow")

          coef exp(coef)  se(coef)      z      p
tttI -0.481032  0.618145  0.218696 -2.200 0.0278
age   0.016220  1.016352  0.008315  1.951 0.0511

Likelihood ratio test=9.04  on 2 df, p=0.01088
n= 7967, number of events= 87 
> #***********************************************************/
> #Age as a time--dependent covariate gives same estimates as*/
> #it does as a fixed covariate.  Baseline estimate changes. */
> #***********************************************************/
> coxph(Surv(entry,lasttime,status)~ttt+fage,
+   data=verybig,method='breslow')
Call:
coxph(formula = Surv(entry, lasttime, status) ~ ttt + fage, data = verybig, 
    method = "breslow")

          coef exp(coef)  se(coef)      z      p
tttI -0.481032  0.618145  0.218696 -2.200 0.0278
fage  0.016220  1.016352  0.008315  1.951 0.0511

Likelihood ratio test=9.04  on 2 df, p=0.01088
n= 7967, number of events= 87 
> coxph(Surv(lasttime,status)~ttt+tt(age),method='breslow',
+   tt=function(x,t,...) x+t/365.25, data=emerson)
Call:
coxph(formula = Surv(lasttime, status) ~ ttt + tt(age), data = emerson, 
    tt = function(x, t, ...) x + t/365.25, method = "breslow")

             coef exp(coef)  se(coef)      z      p
tttI    -0.481032  0.618145  0.218696 -2.200 0.0278
tt(age)  0.016220  1.016352  0.008315  1.951 0.0511

Likelihood ratio test=9.04  on 2 df, p=0.01088
n= 130, number of events= 87 
> #**********************************************************/
> # Data excerpted from NIH Multi Center AIDS Cohort Study. */
> # Cohort consists of gay and bisexual men.  Variables are */
> # health: initial health report                           */
> # life: finer self-reported happiness measure,            */
> # happy: self-reported level of happiness at enrollment   */
> # status: 1 for death, 0 ow,                              */
> # entry: age at entry,                                    */
> # time: time to death or loss to followup                 */
> # aaids: age at development of aids.                      */
> #**********************************************************/
> mcac<-read.table("mcac.dat")
> names(mcac)<- c("health","life","happy","status","entry",
+    "time","aaids")
> #******************************************************/
> # Restrict sample to individuals with reported value  */
> # for initial health, and prepare for counting-style  */
> # input to capture time-dependent aids status.        */
> #******************************************************/
> mcac<-mcac[mcac$health>0,]
> first<-mcac[mcac$entry second<-mcac[mcac$time>mcac$aaids,]
> first$aids<-0
> second$aids<-1
> first$status[first$aaids first$time<-first$aaids
> second$entry<-second$aaids
> bigmcac<-rbind(first,second)
> coxph(Surv(entry,time,status)~happy+aids,data=bigmcac)
Call:
coxph(formula = Surv(entry, time, status) ~ happy + aids, data = bigmcac)

           coef exp(coef)  se(coef)      z       p
happy  -0.24898   0.77960   0.08333 -2.988 0.00281
aids    6.09396 443.17126   0.14443 42.194 < 2e-16

Likelihood ratio test=2174  on 2 df, p=< 2.2e-16
n= 3724, number of events= 436 
> #*****************************************************/
> # Compare treatment of a categorical covariate as    */
> # a stratifier and as a linear factor.               */
> #*****************************************************/
> summary(coxph(Surv(crtime,crind)~ttt+strata(wbcl),
+    data=emerson))
Call:
coxph(formula = Surv(crtime, crind) ~ ttt + strata(wbcl), data = emerson)

  n= 129, number of events= 89 
   (1 observation deleted due to missingness)

       coef exp(coef) se(coef)     z Pr(>|z|)   
tttI 0.5698    1.7679   0.2176 2.618  0.00884 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

     exp(coef) exp(-coef) lower .95 upper .95
tttI     1.768     0.5656     1.154     2.708

Concordance= 0.59  (se = 0.028 )
Likelihood ratio test= 6.96  on 1 df,   p=0.008
Wald test            = 6.86  on 1 df,   p=0.009
Score (logrank) test = 7.03  on 1 df,   p=0.008

> summary(coxph(Surv(crtime,crind)~ttt+factor(wbcl),
+    data=emerson))
Call:
coxph(formula = Surv(crtime, crind) ~ ttt + factor(wbcl), data = emerson)

  n= 129, number of events= 89 
   (1 observation deleted due to missingness)

                coef exp(coef) se(coef)     z Pr(>|z|)   
tttI          0.5703    1.7688   0.2161 2.639  0.00833 **
factor(wbcl)1 0.3534    1.4239   0.2154 1.641  0.10084   
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

              exp(coef) exp(-coef) lower .95 upper .95
tttI              1.769     0.5654    1.1580     2.702
factor(wbcl)1     1.424     0.7023    0.9336     2.172

Concordance= 0.614  (se = 0.032 )
Likelihood ratio test= 10.81  on 2 df,   p=0.004
Wald test            = 10.7  on 2 df,   p=0.005
Score (logrank) test = 10.99  on 2 df,   p=0.004

> #****************************************************/
> #Time dependent covariate for diagnostic purposes.  */
> #****************************************************/
> verybig$diagn<-verybig$lasttime*verybig$wbcl
> coxph(Surv(entry,lasttime,status)~wbcl+diagn,
+   data=verybig,method='breslow')
Call:
coxph(formula = Surv(entry, lasttime, status) ~ wbcl + diagn, 
    data = verybig, method = "breslow")

            coef  exp(coef)   se(coef)      z     p
wbcl  -0.5486476  0.5777306  0.3907036 -1.404 0.160
diagn  0.0007751  1.0007754  0.0009128  0.849 0.396

Likelihood ratio test=2.33  on 2 df, p=0.3117
n= 7912, number of events= 86 
   (55 observations deleted due to missingness)
> # Draw proportional hazards plot.
> regout<-basehaz(coxph(Surv(entry,lasttime,status)~strata(wbcl),
+   data=verybig,method='breslow'))
> plot(regout$time,regout$hazard,pch=as.numeric(regout$strata))
> #****************************************************/
> #Cox and Snell "residuals"                          */
> #****************************************************/
> library(survival)#For coxph
> out<-coxph(Surv(lasttime,status)~ttt,data=emerson)
> csr<-emerson$status-out$resid
> temp<-basehaz(coxph(Surv(csr,emerson$status)~1))
> plot(temp$time,temp$hazard,type="s",xlab="Time",
+   ylab="Fitted Hazard",main="Cox and Snell Plot")
>