pdf("g04.pdf");options(width=64) #setwd("C:\\Users\\kolassa\\Class553") setwd("~/Taught1/960-553/Data")
# Block 1 #*************************************************/ #* Tests of homogeneity. */ #*************************************************/ #********************************************************/ #* Test whether the ball frequency is the same across */ #* all sets of balls. */ #********************************************************/ #********************************************************/ #* Lotto Example */ #* CA lotto data from Friedman Pisani and Purvis (1998).*/ #* Only one set of balls is used during each lottery */ #* drawing. Columns represent sets of balls. Rows rep-*/ #* resent numbers 0-9. Entries in table represent count*/ #* of how often each ball appears. Each set of balls */ #* was used 120 times, and so the expected number of */ #* times each ball comes up is 12. */ #********************************************************/ lotto<-read.table("lotto.dat") names(lotto)<-c("seta","setb","setc","setd") #R function test.chisq for contingency table testing #accepts data either as a matrix, or as two group #indicator variables. It won't accept the SAS proc #freq format of two indicator variables and a fre- #quency variable. To do this, you need to preprocess #process with xtabs, to be exhibited later. chisq.test(as.matrix(lotto)) #*************************************************************/ #* Prostate data */ #* From Andrews and Herzberg (1985), Data: A Collection of */ #* Problems from Many Fields for the Student and Research */ #* Worker, Table 46. Observations represent subjects in a */ #* prostate cohort study, randomized to one of four dose */ #* levels of diethylstilbestrol. Rx records dose in four */ #* ordered categories, with 1 being placebo. Disease stage */ #* is 3 or 4. monfol is months of followup. surv is 0 if */ #* alive after 50 mo, and codes cause of death otherwise. */ #* http://lib.stat.cmu.edu/datasets/Andrews/T46.1 */ #* The on-line version of the data set adds 3 fields before */ #* the first field in the book. Variables of interest are */ #* stage, rx, monfol, and surv in fields 5, 6, 10, 11 of the */ #* online version, resp. Causes of death are given by var- */ #* ious positive integers in surv; I recode these to 1. The */ #* data file has more columns than we are interested in. Put*/ #* place-holding variables in for variables not of interest */ #* between variables of interest. Data were previously pub- */ #* lished by Byar and Corle (1977 Chronic Disease) and Byar */ #* and Green (1980 Bull. Cancer). Lower value of dichoto- */ #* mized dose begins with blank to make it alphabetized */ #* before high. */ #*************************************************************/ # If we omit the ones past where the data of interest stops # in the "what" list, and use flush=T, R will ignore them. prostate<-read.table("T46.1")[,c(5,6,10,11)] dimnames(prostate)[[2]]<-c("stage","rx","monfol","surv") prostate$alive<-(prostate$surv==0)+0 prostate$dose<-c(" low","high")[1+(prostate$rx>1)] stage4<-prostate[prostate$stage==4,] #*************************************************************/ #* Causes of death are given by various positive integers in */ #* surv; I recode these to 1. */ #*************************************************************/ prostate$alive<-(prostate$surv==0)+0 #*******************************************************/ #* Test whether survival is the same across treatments.*/ #*******************************************************/ chisq.test(stage4$rx,stage4$alive) # One can score tables by inverse probability. fisher.test(stage4$rx,stage4$alive) # Block 2 #*************************************************/ #* Fisher's exact test, and odds ratio CI. */ #*************************************************/ # fisher.test does exact test and gives exact CI. fisher.test(stage4$dose,stage4$alive) # Block 3 #******************************************/ #* Tests of Trend. */ #******************************************/ # In case this example gets separated from the # the unordered calculation, reconstruct the matrix. (mat<-table(stage4$rx,stage4$alive)) prop.trend.test(mat[,1],mat[,1]+mat[,2]) # Block 4 #******************************************************/ #* Ordered Association tests when both dimensions may */ #* exceed 2. */ #******************************************************/ #R featuers a wide variety of tests associated with the #names Cochran, Mantel, and Haenszel. The only one I found #that does a general test of association between two #variables, both with three or more categories, is CMHtest. #The command below does the Pearson chi-square, the two #score tests for row and column ordered categories resp., #and the test for scorings along both dimensions. The #Pearson statistic that CMHtest produces is slightly smaller, #since it uses the hypergeometric variance, which substitutes #table total minus 1 for table total in the denoninator of #the variance. library(vcdExtra)#For CMHtest #*************************************************************/ #* Causes of death are given by various positive integers in */ #* surv; I recode these to 1. */ #*************************************************************/ prostate$alive<-(prostate$surv==0)+0 CMHtest(mat) rs<-apply(mat,1,sum); rs<-c(0,cumsum(rs[-length(rs)]))+rs/2 CMHtest(mat,rscore=rs) prop.trend.test(mat[,1],mat[,1]+mat[,2],score=rs) # Block 5 #********************************************************/ #* Testing in the presence of Confounding. */ #* Compare odds ratio estimates ignoring and controling */ #* for a third variable. */ #********************************************************/ #**************************************************************/ #* Death Penalty Example */ #* Death penalty data from Radelet, M. (1981), Racial */ #* Characteristics and Imposition of the Death Penalty */ #* American Sociological Review, 46, 918-927. Data reflect */ #* outcomes of murder cases in which race of victim and */ #* principle suspect are known. 1st column is indicator of */ #* relationship between victim and suspect (P for close */ #* relationship, N for not), Race of victim and defendant */ #* (both either B or W; other races deleted), number of cases,*/ #* number of indictments, and number of death sentences. */ #* Referenced by Moore (2000) Basic Practice of Statistics, p.*/ #* Following Moore, we will only use the non-primary cases. */ #* http://stat.rutgers.edu/~kolassa/960-553/floridadeath.dat */ #**************************************************************/ death1<-read.table("floridadeath.dat", col.names=c("rela","rv","rd","nc","ni","nd")) death2<-death1; death1$we<-death1$nd;death1$lc<-"Death" death2$we<-death1$nc-death1$nd;death2$lc<-"Live" death<-rbind(death1,death2) death$rdc<-c("White defend.","Black defend.")[(death$rd=="B")+1] death$rvc<-c("White victim","Black victim")[(death$rv=="B")+1] nonprim<-death[death$rela=="N",] (m<-xtabs(we~rdc+lc,nonprim)) chisq.test(m) # mantelhaen.test requires the stratifier variable to # be the last dimension. See mantelhaen.test document- # ation for nontabulated observations. (mm<-xtabs(we~rdc+lc+rvc,nonprim)) mantelhaen.test(mm,correct=F) #In this case, note large effect of continuity corr. mantelhaen.test(mm)