R version 4.2.2 Patched (2022-11-10 r83330) -- "Innocent and Trusting"
Copyright (C) 2022 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

  Natural language support but running in an English locale

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> pdf("g04.pdf");options(width=64)
> #setwd("C:\\Users\\kolassa\\Class542")
> setwd("~/Taught1/960-542/Data")
> #Package below must be at least version 2.7.
> library(PHInfiniteEstimates)#For lrapproximations
> # Show four versions of the log rank statistic.
> # 1. Using exact log ranks with exact variance.
> # 2. Using sum of fractions approximation to log ranks with exact variance.
> # 3. Using survival analysis statistic with estimated standard error.
> # 4. The same as 3, except from a package.
> lrapproximations()
          True Log Rank Statistic 
                            0.025 
Approximation as Sums of Inverses 
                            0.001 
  Survival Analysis Approximation 
                            0.032 
                  Result from nph 
                            0.032 
> #*******************************************************/
> # Larynx cancer data from Klelin and Moeschberger. See */
> # class web page for link.  Cancers are graded on an   */
> # integer scale, with increasing numbers indicating    */
> # more sever.  Variables are stage, time to death, year*/
> # of diagnosis, and status.                            */
> #*******************************************************/
> larynx<-read.table("larynx.txt",header=TRUE)
> library(survival)#For survdiff
> #************************************************************/
> # Log Rank Testing.  Examine 3 log rank statistic examples. */
> # The first, using real data, compares stages 1 and 4, which*/
> # we expect to be very different in the manner that the log */
> # rank statistic should pick up.  The second will compare   */
> # stages 2 and 3, which should be similar.  Then, we make a */
> # fake group indicator fs which will split the groups in a  */
> # way that shouldn't be easily picked up by the log rank    */
> # statistic.  CIs for KM estimates will also be provided.   */
> #************************************************************/
> # Create a fake status variable indicating censoring early
> # and late.
> larynx$fs<- ((larynx$delta==0)&(larynx$time<4.4))|
+   ((larynx$delta==0)&(larynx$time>7.2))|
+   ((larynx$delta==1)&(larynx$time<1.0))|
+   ((larynx$delta==1)&(larynx$time>4.0))
> cat('\n Testing survival between two stages far apart  \n')

 Testing survival between two stages far apart  
> plot(survfit(Surv(time,delta)~stage,data=larynx,
+   subset=(stage==1)|(stage==4)),main="Two Stages Far Apart")
> #Two- and more-group tests are done by survdiff, using
> #the log rank test.  logrank_test of package coin claims
> #to do this as well, but gives different answers, and
> #survdiff agrees with SAS.
> # Putting an assignment in () prints the end result.
> (s1<-survdiff(Surv(time,delta)~stage,data=larynx,
+    subset=(stage==1)|(stage==4)))
Call:
survdiff(formula = Surv(time, delta) ~ stage, data = larynx, 
    subset = (stage == 1) | (stage == 4))

         N Observed Expected (O-E)^2/E (O-E)^2/V
stage=1 33       15    22.78      2.66      23.4
stage=4 13       11     3.22     18.81      23.4

 Chisq= 23.4  on 1 degrees of freedom, p= 1e-06 
> cat('\n Testing survival between two stages close together  \n')

 Testing survival between two stages close together  
> plot(survfit(Surv(time,delta)~stage,data=larynx,
+    subset=(stage==2)|(stage==3)), main="Two Similar Stages")
> (s2<-survdiff(Surv(time,delta)~stage,data=larynx,
+    subset=(stage==2)|(stage==3)))
Call:
survdiff(formula = Surv(time, delta) ~ stage, data = larynx, 
    subset = (stage == 2) | (stage == 3))

         N Observed Expected (O-E)^2/E (O-E)^2/V
stage=2 17        7     9.89     0.846      1.47
stage=3 27       17    14.11     0.593      1.47

 Chisq= 1.5  on 1 degrees of freedom, p= 0.2 
> cat('\n Testing survival for an artificial variable  \n')

 Testing survival for an artificial variable  
> plot(survfit(Surv(time,delta)~fs,data=larynx,
+    subset=(stage==1)|(stage==4)), main="Artificial Variable")
> (s3<-survdiff(Surv(time,delta)~fs,data=larynx,
+    subset=(stage==1)|(stage==4)))
Call:
survdiff(formula = Surv(time, delta) ~ fs, data = larynx, subset = (stage == 
    1) | (stage == 4))

          N Observed Expected (O-E)^2/E (O-E)^2/V
fs=FALSE 24       14     12.9    0.0946     0.203
fs=TRUE  22       12     13.1    0.0931     0.203

 Chisq= 0.2  on 1 degrees of freedom, p= 0.7 
> library(survival)#For survdiff
> #************************************************************/
> # Gehan Testing.                                            */
> #************************************************************/
> cat('\n Testing survival between two stages far apart\n')

 Testing survival between two stages far apart
> plot(survfit(Surv(time,delta)~fs,data=larynx,
+    subset=(stage==1)|(stage==4)),main="Two Dissimilar Stages")
> cat('Log rank version \n')
Log rank version 
> # We have already run the log rank test on stages 1 vs 4,
> # variable fs.
> print(s3)
Call:
survdiff(formula = Surv(time, delta) ~ fs, data = larynx, subset = (stage == 
    1) | (stage == 4))

          N Observed Expected (O-E)^2/E (O-E)^2/V
fs=FALSE 24       14     12.9    0.0946     0.203
fs=TRUE  22       12     13.1    0.0931     0.203

 Chisq= 0.2  on 1 degrees of freedom, p= 0.7 
> cat("Peto-Peto version \n")
Peto-Peto version 
> (s4<-survdiff(Surv(time,delta)~fs,data=larynx,
+    subset=(stage==1)|(stage==4),rho=1))
Call:
survdiff(formula = Surv(time, delta) ~ fs, data = larynx, subset = (stage == 
    1) | (stage == 4), rho = 1)

          N Observed Expected (O-E)^2/E (O-E)^2/V
fs=FALSE 24    10.18     9.71    0.0226    0.0641
fs=TRUE  22     8.49     8.96    0.0245    0.0641

 Chisq= 0.1  on 1 degrees of freedom, p= 0.8 
> cat("Gehan Wilcoxon Test \n")
Gehan Wilcoxon Test 
> library(PHInfiniteEstimates)#For gehan.wilcoxon.test
> gehan.wilcoxon.test(Surv(time,delta)~fs,
+    data=larynx[(larynx$stage==1)|(larynx$stage==4),])

	Gehan-Wilcoxon

data:  
= 0.073254, p-value = 0.7867
alternative hypothesis: two-sided

> #************************************************/
> # Log Rank Testing: Multiple groups             */
> #************************************************/
> cat('\n Testing with multiple categories  \n')

 Testing with multiple categories  
> library(survival)#For survdiff
> (fit2<-survdiff(Surv(time,delta)~stage,larynx))
Call:
survdiff(formula = Surv(time, delta) ~ stage, data = larynx)

         N Observed Expected (O-E)^2/E (O-E)^2/V
stage=1 33       15    22.57     2.537     4.741
stage=2 17        7    10.01     0.906     1.152
stage=3 27       17    14.08     0.603     0.856
stage=4 13       11     3.34    17.590    19.827

 Chisq= 22.8  on 3 degrees of freedom, p= 5e-05 
> #Explore calculation by removing one component of the statistic
> #vector, and the associated row and #column of the variance 
> #matrix, and showing that the results are the same.
> svec<-fit2$obs-fit2$exp
> for(jj in seq(length(fit2$exp))){
+    cat("jj",jj,"Statistic",
+       svec[-jj]%*%solve(fit2$var[-jj,-jj])%*%svec[-jj],"\n")
+ }
jj 1 Statistic 22.76276 
jj 2 Statistic 22.76276 
jj 3 Statistic 22.76276 
jj 4 Statistic 22.76276 
> #**********************************************/
> # Log Rank Testing: Multiple ordered groups   */
> #**********************************************/
> cat('\n Testing with multiple ordered categories.\n')

 Testing with multiple ordered categories.
> #survdiff doesn't do ordered test directly.
> #fit2 is the object testing differences between stages.
> #Build evenly-spaced scores.
> (scr<-seq(length(fit2$exp)))
[1] 1 2 3 4
> #fit2$obs-fit2$exp gives the log rank statistics before
> #adjusting for variance.
> #The ordered-category statistic is the sum of scores
> #times the group-wise statistic.
> stat<-sum(scr*(fit2$obs-fit2$exp))
> #The null variance of the statistic is estimated as
> #the estimated variance covariate matrix of separate
> #statistics, pre- and post-multiplied by scores.
> #The variance estimate is singular, but in this case
> #it doesn't matter.
> sd<-sqrt(t(scr)%*%fit2$var%*%scr)[1,1]
> z<-stat/sd
> (pval<-2*pnorm(-abs(z)))
[1] 0.0002000459
> #**************************************/
> # Stratified Log Rank Testing         */
> #**************************************/
> #***************************************************/
> #Time until tumor formation for rats, from Klein   */
> #and Moeschberger. Data on 50 litters of rats (3   */
> #rats per litter).  Variables represented in the   */
> #dataset are time, indicator of tumor development  */
> #(1=yes, 0=no), Treatment (1=treated with drug,    */
> #0=given placebo).  From Mantel et al. Cancer      */
> #Research 37 (1977): 3863-3868.                    */
> #***************************************************/
> rats<-read.table("rats.txt",header=TRUE)
> cat('\n Test of treatment for tumors in rats, Stratified  \n')

 Test of treatment for tumors in rats, Stratified  
> plot(survfit(Surv(time,tumor)~trt+strata(litter),rats))
> survdiff(Surv(time,tumor)~trt+strata(litter),rats)
Call:
survdiff(formula = Surv(time, tumor) ~ trt + strata(litter), 
    data = rats)

        N Observed Expected (O-E)^2/E (O-E)^2/V
trt=0 100       19     25.3      1.58      6.22
trt=1  50       21     14.7      2.73      6.22

 Chisq= 6.2  on 1 degrees of freedom, p= 0.01 
>