pdf("g04.pdf");options(width=64) #setwd("C:\\Users\\kolassa\\Class553") setwd("~/Taught1/960-553/Data")
# Block 1
#*************************************************/
#* Tests of homogeneity.                         */
#*************************************************/
#********************************************************/
#* Test whether the ball frequency is the same across   */
#* all sets of balls.                                   */
#********************************************************/
#********************************************************/
#*          Lotto Example                               */
#* CA lotto data from Friedman Pisani and Purvis (1998).*/
#* Only one set of balls is used during each lottery    */
#* drawing.  Columns represent sets of balls.  Rows rep-*/
#* resent numbers 0-9.  Entries in table represent count*/
#* of how often each ball appears.  Each set of balls   */
#* was used 120 times, and so the expected number of    */
#* times each ball comes up is 12.                      */
#********************************************************/
lotto<-read.table("lotto.dat") 
names(lotto)<-c("seta","setb","setc","setd") 
#R function test.chisq for contingency table testing 
#accepts data either as a matrix, or as two group    
#indicator variables.  It won't accept the SAS proc  
#freq format of two indicator variables and a fre-   
#quency variable.  To do this, you need to preprocess
#process with xtabs, to be exhibited later.          
chisq.test(as.matrix(lotto))                         
#*************************************************************/
#*  Prostate data                                            */
#* From Andrews and Herzberg (1985), Data: A Collection of   */
#* Problems from Many Fields for the Student and Research    */
#* Worker, Table 46.  Observations represent subjects in a   */
#* prostate cohort study, randomized to one of four dose     */
#* levels of diethylstilbestrol.  Rx records dose in four    */
#* ordered categories, with 1 being placebo.  Disease stage  */
#* is 3 or 4.  monfol is months of followup.  surv is 0 if   */
#* alive after 50 mo, and codes cause of death otherwise.    */
#* http://lib.stat.cmu.edu/datasets/Andrews/T46.1            */
#* The on-line version of the data set adds 3 fields before  */
#* the first field in the book.  Variables of interest are   */
#* stage, rx, monfol, and surv in fields 5, 6, 10, 11 of the */
#* online version, resp.  Causes of death are given by var-  */
#* ious positive integers in surv; I recode these to 1.  The */
#* data file has more columns than we are interested in.  Put*/
#* place-holding variables in for variables not of interest  */
#* between variables of interest.  Data were previously pub- */
#* lished by Byar and Corle (1977 Chronic Disease) and Byar  */
#* and Green (1980 Bull. Cancer).  Lower value of dichoto-   */
#* mized dose begins with blank to make it alphabetized      */
#* before high.                                              */
#*************************************************************/
# If we omit the ones past where the data of interest stops
# in the "what" list, and use flush=T, R will ignore them. 
prostate<-read.table("T46.1")[,c(5,6,10,11)]
dimnames(prostate)[[2]]<-c("stage","rx","monfol","surv")
prostate$alive<-(prostate$surv==0)+0                      
prostate$dose<-c(" low","high")[1+(prostate$rx>1)]         
stage4<-prostate[prostate$stage==4,]
#*************************************************************/
#* Causes of death are given by various positive integers in */
#* surv; I recode these to 1.                                */
#*************************************************************/
prostate$alive<-(prostate$surv==0)+0
#*******************************************************/
#* Test whether survival is the same across treatments.*/
#*******************************************************/
chisq.test(stage4$rx,stage4$alive)
# One can score tables by inverse probability.
fisher.test(stage4$rx,stage4$alive)
# Block 2
#*************************************************/
#* Fisher's exact test, and odds ratio CI.       */
#*************************************************/
# fisher.test does exact test and gives exact CI.
fisher.test(stage4$dose,stage4$alive)
# Block 3
#******************************************/
#* Tests of Trend.                        */
#******************************************/
# In case this example gets separated from the
# the unordered calculation, reconstruct the matrix.
(mat<-table(stage4$rx,stage4$alive))
prop.trend.test(mat[,1],mat[,1]+mat[,2])
# Block 4
#******************************************************/
#* Ordered Association tests when both dimensions may */
#* exceed 2.                                          */
#******************************************************/
#R featuers a wide variety of tests associated with the
#names Cochran, Mantel, and Haenszel.  The only one I found
#that does a general test of association between two
#variables, both with three or more categories, is CMHtest.
#The command below does the Pearson chi-square, the two
#score tests for row and column ordered categories resp.,
#and the test for scorings along both dimensions.  The 
#Pearson statistic that CMHtest produces is slightly smaller,
#since it uses the hypergeometric variance, which substitutes
#table total minus 1 for table total in the denoninator of
#the variance.
library(vcdExtra)#For CMHtest
#*************************************************************/
#* Causes of death are given by various positive integers in */
#* surv; I recode these to 1.                                */
#*************************************************************/
prostate$alive<-(prostate$surv==0)+0
CMHtest(mat)
rs<-apply(mat,1,sum); rs<-c(0,cumsum(rs[-length(rs)]))+rs/2
CMHtest(mat,rscore=rs)
prop.trend.test(mat[,1],mat[,1]+mat[,2],score=rs)
# Block 5
#********************************************************/
#* Testing in the presence of Confounding.              */
#* Compare odds ratio estimates ignoring and controling */
#* for a third variable.                                */
#********************************************************/
#**************************************************************/
#*          Death Penalty Example                             */
#* Death penalty data from Radelet, M. (1981), Racial         */
#* Characteristics and Imposition of the Death Penalty        */
#* American Sociological Review, 46, 918-927. Data reflect    */
#* outcomes of murder cases in which race of victim and       */
#* principle suspect are known.  1st column is indicator of   */
#* relationship between victim and suspect (P for close       */
#* relationship, N for not), Race of victim and defendant     */
#* (both either B or W; other races deleted), number of cases,*/
#* number of indictments, and number of death sentences.      */
#* Referenced by Moore (2000) Basic Practice of Statistics, p.*/
#* Following Moore, we will only use the non-primary cases.   */
#* http://stat.rutgers.edu/~kolassa/960-553/floridadeath.dat  */
#**************************************************************/
death1<-read.table("floridadeath.dat",
   col.names=c("rela","rv","rd","nc","ni","nd"))
death2<-death1; death1$we<-death1$nd;death1$lc<-"Death" 
death2$we<-death1$nc-death1$nd;death2$lc<-"Live"
death<-rbind(death1,death2)
death$rdc<-c("White defend.","Black defend.")[(death$rd=="B")+1]
death$rvc<-c("White victim","Black victim")[(death$rv=="B")+1]
nonprim<-death[death$rela=="N",]                       
(m<-xtabs(we~rdc+lc,nonprim))
chisq.test(m)
# mantelhaen.test requires the stratifier variable to
# be the last dimension. See mantelhaen.test document-
# ation for nontabulated observations.
(mm<-xtabs(we~rdc+lc+rvc,nonprim))
mantelhaen.test(mm,correct=F)
#In this case, note large effect of continuity corr.
mantelhaen.test(mm)