pdf("g02.pdf");options(width=64) #setwd("C:\\Users\\kolassa\\Class553") setwd("~/Taught1/960-553/Data")
# Block 1 #********************************************/ # Discrete Confidence Interval Construction */ #********************************************/ nn<-10 obsd<-5 epsilon<-.05 piv<-(1:9999)/10000 q<-cbind(qbinom(.025,10,piv),qbinom(.975,10,piv)) plot(range(piv),range(q),type="n",xlab="pi",ylab="Observed", main="Discrete Confidence Interval Example") for(ii in 1:2){ diffl<-diff(q[,ii])>.5 begin<-seq(length(piv))[c(TRUE,diffl)] end<-seq(length(piv))[c(diffl,TRUE)] for(jj in seq(length(end))){ segments(piv[begin[jj]],q[begin[jj],ii]+(2*ii-3)*epsilon, piv[end[jj]],q[end[jj],ii]+(2*ii-3)*epsilon) } } abline(h=obsd,lty=2) cie<-c(min(piv[q[,2]==obsd]),max(piv[q[,1]==obsd])) for(ii in c(1,2)) segments(cie[ii],obsd,cie[ii],0,lty=3) legend(0,10,lty=c(1:2,NA,3,NA),legend=c(".025, .975 quantiles", "Typical Observed","value","Confidence Interval", "End Points")) # Block 2 library(TheorStat)#For fun.coverageplot, fun.approxnormalci, wilsonpicture wilsonpicture() # Block 3 #******************************************************/ # Table of Endpoints for Binomial Confidence Interval */ #******************************************************/ coverageout<-fun.coverageplot(10) ciends<-coverageout$ciends coverageout$mincover show<-round(rbind(ciends[,1,1:6],ciends[,2,1:6]),3) for(j in 1:2) dimnames(show)[[1]][2*j+(-1:0)]<-paste( dimnames(ciends)[[2]][j],dimnames(show)[[1]][2*j+(-1:0)]) show # Block 4 #*****************************************************/ # Coverage of One-Sided Binomial Confidence Interval */ #*****************************************************/ ciends<-fun.coverageplot(10,alpha=.1, alternative="greater")$ciends coverageout<-fun.coverageplot(10,alternative="both", exactonly=TRUE,cm=.9) # Block 5 #****************************************************/ # Coverage of Two-Sided Binomial Confidence Interval*/ #****************************************************/ #Next line is very slow. allcoverout<-fun.allcover(10) # Block 6 #***************************************************/ # Confidence Region for Two Multinomial Parameters */ #***************************************************/ fun.ellipse<-function(mu,A,cc,n=1000){ pi<-4*atan(1) aaa<-rbind(cos(2*pi*(0:n)/n),sin(2*pi*(0:n)/n)) return(mu+solve(chol(A/cc))%*%aaa) } fun.cscont<-function(n,piv,alpha,lty){ mu<-piv*n if(!is.na(alpha)){ A<-(diag(1/piv)+array(1,c(2,2))/(1-sum(piv)))*2/n cc<-qchisq(1-alpha,2) eo<-fun.ellipse(mu,A,cc) lines(x=eo[1,],y=eo[2,],lty=lty) }else{ points(x=mu[1],y=mu[2],pch=lty) } } fun.threecat<-function(n,piv,alpha=NA){ if(!is.array(piv)) piv<-array(piv,c(1,2)) plot(c(0,n),c(0,n),type="n",xlab="X1",ylab="X2", sub=paste("alpha=",alpha, "3 category multinomial,",n," items"), main="Rejection Region for Chi-Square Tests") xv<-outer(0:n,rep(1,n+1)) yv<-outer(rep(1,n+1),0:n) sv<-(xv-n/3)^2/(n/3)+(yv-n/3)^2/(n/3)+(n-xv-yv-n/3)^2/(n/3) use<-xv+yv<=(n+.5) xv<-xv[use] yv<-yv[use] points(xv,yv) for(jj in seq(dim(piv)[1])) fun.cscont(n,piv[jj,],alpha,jj+1) for(jj in seq(dim(piv)[1])) fun.cscont(n,piv[jj,],NA ,jj+1) levec<-c("Pi",apply(round(piv,3),1,paste,collapse=",")) legend(.4*n,.95*n,legend=levec,pch=c(0,2:(dim(piv)[1]+1))) legend(.5*n,.75*n,legend=levec,lty=c(0,2:(dim(piv)[1]+1))) } #fun.threecat(13,rbind(rep(1,2)/4,rep(1,2)/3,c(1,2)/4)) fun.threecat(13,rbind(rep(1,2)/4,rep(1,2)/3,c(1,2)/4),.05) # Block 7 #******************************************************/ #* Confidence interval for real data */ #******************************************************/ #*************************************************************/ #* Prostate data */ #* From Andrews and Herzberg (1985), Data: A Collection of */ #* Problems from Many Fields for the Student and Research */ #* Worker, Table 46. Observations represent subjects in a */ #* prostate cohort study, randomized to one of four dose */ #* levels of diethylstilbestrol. Rx records dose in four */ #* ordered categories, with 1 being placebo. Disease stage */ #* is 3 or 4. monfol is months of followup. surv is 0 if */ #* alive after 50 mo, and codes cause of death otherwise. */ #* http://lib.stat.cmu.edu/datasets/Andrews/T46.1 */ #* The on-line version of the data set adds 3 fields before */ #* the first field in the book. Variables of interest are */ #* stage, rx, monfol, and surv in fields 5, 6, 10, 11 of the */ #* online version, resp. Causes of death are given by var- */ #* ious positive integers in surv; I recode these to 1. The */ #* data file has more columns than we are interested in. Put*/ #* place-holding variables in for variables not of interest */ #* between variables of interest. Data were previously pub- */ #* lished by Byar and Corle (1977 Chronic Disease) and Byar */ #* and Green (1980 Bull. Cancer). Lower value of dichoto- */ #* mized dose begins with blank to make it alphabetized */ #* before high. */ #*************************************************************/ # If we omit the ones past where the data of interest stops # in the "what" list, and use flush=T, R will ignore them. prostate<-read.table("T46.1")[,c(5,6,10,11)] dimnames(prostate)[[2]]<-c("stage","rx","monfol","surv") prostate$alive<-(prostate$surv==0)+0 prostate$dose<-c(" low","high")[1+(prostate$rx>1)] stage4<-prostate[prostate$stage==4,] #*********************************************************/ # Causes of death are given by various positive integers */ # in surv; I recode these to 1. */ #*********************************************************/ prostate$alive<-(prostate$surv==0)+0 library(Hmisc)#For binconf #Wilson and standard tests are given by binconf. binconf(sum(prostate$alive),length(prostate$alive), method="wilson") binconf(sum(prostate$alive),length(prostate$alive), method="asymptotic") #Exact confidence interval for the probability of survival. binom.test(sum(prostate$alive),length(prostate$alive)) # Block 8 #********************************************************/ #* Lotto Example */ #* CA lotto data from Friedman Pisani and Purvis (1998).*/ #* Only one set of balls is used during each lottery */ #* drawing. Columns represent sets of balls. Rows rep-*/ #* resent numbers 0-9. Entries in table represent count*/ #* of how often each ball appears. Each set of balls */ #* was used 120 times, and so the expected number of */ #* times each ball comes up is 12. */ #********************************************************/ lotto<-read.table("lotto.dat") names(lotto)<-c("seta","setb","setc","setd") #*****************************************************/ #* Perform the chi-square test of the null hypothesis*/ #* that each ball is equally likely on each draw. */ #*****************************************************/ lotto$setad<-(lotto$seta-12)^2/12 lotto$setbd<-(lotto$setb-12)^2/12 lotto$setcd<-(lotto$setc-12)^2/12 lotto$setdd<-(lotto$setd-12)^2/12 lotto # apply adds the contributions by set, only for those vari- # ables containing squared differences. chisq<-apply(as.matrix(lotto[, c("setad","setbd","setcd","setdd")]),2,"sum") # Use the chi-square cdf function to get the p value. Note # that lower.tail=F gives the upper tail. Lower tail is # the default. Add this column of p-values to the existing # vector of statistic values. chisq<-rbind(chisq,pchisq(chisq,9,lower.tail=F)) dimnames(chisq)[[1]]<-c("Statistic value","p-value") print(chisq) #The univarite goodness of fit tests takes equally-likely #categories as the default, and so the two following con- #structions are equivalent. chisq.test(lotto$seta) chisq.test(lotto$seta,p=rep(.1,10)) #*******************************************************/ #* Number of accidental deaths from falls, 1979 */ #* World Almanac and Book of Facts 1984 (New York: */ #* Newspaper Enterprise Association, 1984), and quoted */ #* in the Minitab Handbook, 3rd Edn. (Belmont, CA: Dux-*/ #* bury, 1994). Falls are by month. Columns are num- */ #* bers of falls, numbers of days in the month, and */ #* month abbreviation. Deaths may be influence by the */ #* weather; scorebelow scores months based on icyness. */ #*******************************************************/ falls<-as.data.frame(scan("falls.dat", what=list(falls=0,days=0,month=""))) falls$score<-(falls$month%in%c("Jan","Feb"))+ 2*(falls$month%in%c("Nov","Dec","Mar")) #********************************************************/ #* Test to see whether expected number of falls is prop-*/ #* ortional to the number of days in the month. In this*/ #* case the expected number of deaths from falls in a */ #* given month, under the null hypothesis, is the total */ #* number of deaths times the number of days in the */ #* month divided by the number of days in the year. */ #********************************************************/ falls$exp<-falls$days*sum(falls$falls)/sum(falls$days) tests<-list( pears=sum((falls$falls-falls$exp)^2/falls$exp)) tests$pearspv<-pchisq(tests$pears, length(falls$falls)-1,lower.tail=F) tests chisq.test(falls$falls,p=falls$days/sum(falls$days)) # Block 9 #*****************************************************/ #* Perform the test of trend on falls. */ #*****************************************************/ # Calculate the test of trend from Armitage (1955) ; tests<-falls; tests$score<-c(1,1,2,0,0,0,0,0,0,0,2,2) trendtests<-list( trend=sum(tests$score*(tests$falls-tests$exp)), v1=sum(tests$score^2*tests$exp), v2=sum(tests$score*tests$exp)) trendtests$trend<-trendtests$trend/ sqrt(trendtests$v1-trendtests$v2^2/sum(falls$falls)) trendtests$trendpv<-pchisq(trendtests$trend^2,1, lower.tail=F) trendtests