pdf("g02.pdf");options(width=64) #setwd("C:\\Users\\kolassa\\Class553") setwd("~/Taught1/960-553/Data")
# Block 1
#********************************************/
# Discrete Confidence Interval Construction */
#********************************************/
nn<-10
obsd<-5
epsilon<-.05
piv<-(1:9999)/10000
q<-cbind(qbinom(.025,10,piv),qbinom(.975,10,piv))
plot(range(piv),range(q),type="n",xlab="pi",ylab="Observed",
   main="Discrete Confidence Interval Example")
for(ii in 1:2){
   diffl<-diff(q[,ii])>.5
   begin<-seq(length(piv))[c(TRUE,diffl)]
   end<-seq(length(piv))[c(diffl,TRUE)]
   for(jj in seq(length(end))){
      segments(piv[begin[jj]],q[begin[jj],ii]+(2*ii-3)*epsilon,
      piv[end[jj]],q[end[jj],ii]+(2*ii-3)*epsilon)
   }
}
abline(h=obsd,lty=2)
cie<-c(min(piv[q[,2]==obsd]),max(piv[q[,1]==obsd]))
for(ii in c(1,2)) segments(cie[ii],obsd,cie[ii],0,lty=3)
legend(0,10,lty=c(1:2,NA,3,NA),legend=c(".025, .975 quantiles",
   "Typical Observed","value","Confidence Interval",
   "End Points"))
# Block 2
library(TheorStat)#For fun.coverageplot, fun.approxnormalci, wilsonpicture
wilsonpicture()
# Block 3
#******************************************************/
# Table of Endpoints for Binomial Confidence Interval */
#******************************************************/
coverageout<-fun.coverageplot(10)
ciends<-coverageout$ciends
coverageout$mincover
show<-round(rbind(ciends[,1,1:6],ciends[,2,1:6]),3)
for(j in 1:2)  dimnames(show)[[1]][2*j+(-1:0)]<-paste(
   dimnames(ciends)[[2]][j],dimnames(show)[[1]][2*j+(-1:0)])
show
# Block 4
#*****************************************************/
# Coverage of One-Sided Binomial Confidence Interval */
#*****************************************************/
ciends<-fun.coverageplot(10,alpha=.1,
   alternative="greater")$ciends
coverageout<-fun.coverageplot(10,alternative="both",
   exactonly=TRUE,cm=.9)
# Block 5
#****************************************************/
# Coverage of Two-Sided Binomial Confidence Interval*/
#****************************************************/
#Next line is very slow.
allcoverout<-fun.allcover(10)
# Block 6
#***************************************************/
# Confidence Region for Two Multinomial Parameters */
#***************************************************/
fun.ellipse<-function(mu,A,cc,n=1000){
   pi<-4*atan(1)
   aaa<-rbind(cos(2*pi*(0:n)/n),sin(2*pi*(0:n)/n))
   return(mu+solve(chol(A/cc))%*%aaa)
}
fun.cscont<-function(n,piv,alpha,lty){
   mu<-piv*n
   if(!is.na(alpha)){
      A<-(diag(1/piv)+array(1,c(2,2))/(1-sum(piv)))*2/n
      cc<-qchisq(1-alpha,2)
      eo<-fun.ellipse(mu,A,cc)
      lines(x=eo[1,],y=eo[2,],lty=lty)
   }else{
      points(x=mu[1],y=mu[2],pch=lty)
   }
}
fun.threecat<-function(n,piv,alpha=NA){
  if(!is.array(piv)) piv<-array(piv,c(1,2))
  plot(c(0,n),c(0,n),type="n",xlab="X1",ylab="X2",
     sub=paste("alpha=",alpha,
       "3 category multinomial,",n," items"),
     main="Rejection Region for Chi-Square Tests")
  xv<-outer(0:n,rep(1,n+1))
  yv<-outer(rep(1,n+1),0:n)
  sv<-(xv-n/3)^2/(n/3)+(yv-n/3)^2/(n/3)+(n-xv-yv-n/3)^2/(n/3)
  use<-xv+yv<=(n+.5)
  xv<-xv[use]
  yv<-yv[use]
  points(xv,yv)
  for(jj in seq(dim(piv)[1])) fun.cscont(n,piv[jj,],alpha,jj+1)
  for(jj in seq(dim(piv)[1])) fun.cscont(n,piv[jj,],NA   ,jj+1)
  levec<-c("Pi",apply(round(piv,3),1,paste,collapse=","))
  legend(.4*n,.95*n,legend=levec,pch=c(0,2:(dim(piv)[1]+1)))
  legend(.5*n,.75*n,legend=levec,lty=c(0,2:(dim(piv)[1]+1)))
}
#fun.threecat(13,rbind(rep(1,2)/4,rep(1,2)/3,c(1,2)/4))
fun.threecat(13,rbind(rep(1,2)/4,rep(1,2)/3,c(1,2)/4),.05)
# Block 7
#******************************************************/
#* Confidence interval for real data                  */
#******************************************************/
#*************************************************************/
#*  Prostate data                                            */
#* From Andrews and Herzberg (1985), Data: A Collection of   */
#* Problems from Many Fields for the Student and Research    */
#* Worker, Table 46.  Observations represent subjects in a   */
#* prostate cohort study, randomized to one of four dose     */
#* levels of diethylstilbestrol.  Rx records dose in four    */
#* ordered categories, with 1 being placebo.  Disease stage  */
#* is 3 or 4.  monfol is months of followup.  surv is 0 if   */
#* alive after 50 mo, and codes cause of death otherwise.    */
#* http://lib.stat.cmu.edu/datasets/Andrews/T46.1            */
#* The on-line version of the data set adds 3 fields before  */
#* the first field in the book.  Variables of interest are   */
#* stage, rx, monfol, and surv in fields 5, 6, 10, 11 of the */
#* online version, resp.  Causes of death are given by var-  */
#* ious positive integers in surv; I recode these to 1.  The */
#* data file has more columns than we are interested in.  Put*/
#* place-holding variables in for variables not of interest  */
#* between variables of interest.  Data were previously pub- */
#* lished by Byar and Corle (1977 Chronic Disease) and Byar  */
#* and Green (1980 Bull. Cancer).  Lower value of dichoto-   */
#* mized dose begins with blank to make it alphabetized      */
#* before high.                                              */
#*************************************************************/
# If we omit the ones past where the data of interest stops
# in the "what" list, and use flush=T, R will ignore them. 
prostate<-read.table("T46.1")[,c(5,6,10,11)]
dimnames(prostate)[[2]]<-c("stage","rx","monfol","surv")
prostate$alive<-(prostate$surv==0)+0                      
prostate$dose<-c(" low","high")[1+(prostate$rx>1)]         
stage4<-prostate[prostate$stage==4,]
#*********************************************************/
# Causes of death are given by various positive integers */
# in surv; I recode these to 1.                          */
#*********************************************************/
prostate$alive<-(prostate$surv==0)+0 
library(Hmisc)#For binconf
#Wilson and standard tests are given by binconf.
binconf(sum(prostate$alive),length(prostate$alive),
   method="wilson")
binconf(sum(prostate$alive),length(prostate$alive),
   method="asymptotic")
#Exact confidence interval for the probability of survival.
binom.test(sum(prostate$alive),length(prostate$alive))
# Block 8
#********************************************************/
#*          Lotto Example                               */
#* CA lotto data from Friedman Pisani and Purvis (1998).*/
#* Only one set of balls is used during each lottery    */
#* drawing.  Columns represent sets of balls.  Rows rep-*/
#* resent numbers 0-9.  Entries in table represent count*/
#* of how often each ball appears.  Each set of balls   */
#* was used 120 times, and so the expected number of    */
#* times each ball comes up is 12.                      */
#********************************************************/
lotto<-read.table("lotto.dat") 
names(lotto)<-c("seta","setb","setc","setd") 
#*****************************************************/
#* Perform the chi-square test of the null hypothesis*/
#* that each ball is equally likely on each draw.    */ 
#*****************************************************/
lotto$setad<-(lotto$seta-12)^2/12
lotto$setbd<-(lotto$setb-12)^2/12 
lotto$setcd<-(lotto$setc-12)^2/12 
lotto$setdd<-(lotto$setd-12)^2/12 
lotto        
# apply adds the contributions by set, only for those vari-
# ables containing squared differences.
chisq<-apply(as.matrix(lotto[,
      c("setad","setbd","setcd","setdd")]),2,"sum") 
# Use the chi-square cdf function to get the p value.  Note
# that lower.tail=F gives the upper tail.  Lower tail is
# the default.  Add this column of p-values to the existing
# vector of statistic values.
 chisq<-rbind(chisq,pchisq(chisq,9,lower.tail=F)) 
 dimnames(chisq)[[1]]<-c("Statistic value","p-value") 
 print(chisq)
#The univarite goodness of fit tests takes equally-likely
#categories as the default, and so the two following con-
#structions are equivalent.
chisq.test(lotto$seta) 
chisq.test(lotto$seta,p=rep(.1,10)) 
#*******************************************************/
#* Number of accidental deaths from falls, 1979        */
#* World Almanac and Book of Facts 1984 (New York:     */
#* Newspaper Enterprise Association, 1984), and quoted */
#* in the Minitab Handbook, 3rd Edn. (Belmont, CA: Dux-*/
#* bury, 1994).  Falls are by month.  Columns are num- */
#* bers of falls, numbers of days in the month, and    */
#* month abbreviation.  Deaths may be influence by the */
#* weather; scorebelow scores months based on icyness. */
#*******************************************************/
falls<-as.data.frame(scan("falls.dat",
   what=list(falls=0,days=0,month="")))
falls$score<-(falls$month%in%c("Jan","Feb"))+
   2*(falls$month%in%c("Nov","Dec","Mar"))
#********************************************************/
#* Test to see whether expected number of falls is prop-*/
#* ortional to the number of days in the month.  In this*/
#* case the expected number of deaths from falls in a   */
#* given month, under the null hypothesis, is the total */
#* number of deaths times the number of days in the     */
#* month divided by the number of days in the year.     */
#********************************************************/
falls$exp<-falls$days*sum(falls$falls)/sum(falls$days)
tests<-list(                                          
   pears=sum((falls$falls-falls$exp)^2/falls$exp))    
tests$pearspv<-pchisq(tests$pears,                    
   length(falls$falls)-1,lower.tail=F)                
tests                                                 
chisq.test(falls$falls,p=falls$days/sum(falls$days))  
# Block 9
#*****************************************************/
#* Perform the test of trend on falls.               */
#*****************************************************/
# Calculate the test of trend from Armitage (1955)        ;
tests<-falls; tests$score<-c(1,1,2,0,0,0,0,0,0,0,2,2)
trendtests<-list(                                     
   trend=sum(tests$score*(tests$falls-tests$exp)),    
   v1=sum(tests$score^2*tests$exp),                   
   v2=sum(tests$score*tests$exp))                     
trendtests$trend<-trendtests$trend/                   
  sqrt(trendtests$v1-trendtests$v2^2/sum(falls$falls))
trendtests$trendpv<-pchisq(trendtests$trend^2,1,      
                                      lower.tail=F)   
trendtests