R version 4.3.1 (2023-06-16) -- "Beagle Scouts"
Copyright (C) 2023 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

  Natural language support but running in an English locale

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> pdf("g02.pdf");options(width=64)
> #setwd("C:\\Users\\kolassa\\Class553")
> setwd("~/Taught1/960-553/Data")
> #********************************************/
> # Discrete Confidence Interval Construction */
> #********************************************/
> nn<-10
> obsd<-5
> epsilon<-.05
> piv<-(1:9999)/10000
> q<-cbind(qbinom(.025,10,piv),qbinom(.975,10,piv))
> plot(range(piv),range(q),type="n",xlab="pi",ylab="Observed",
+    main="Discrete Confidence Interval Example")
> for(ii in 1:2){
+    diffl<-diff(q[,ii])>.5
+    begin<-seq(length(piv))[c(TRUE,diffl)]
+    end<-seq(length(piv))[c(diffl,TRUE)]
+    for(jj in seq(length(end))){
+       segments(piv[begin[jj]],q[begin[jj],ii]+(2*ii-3)*epsilon,
+       piv[end[jj]],q[end[jj],ii]+(2*ii-3)*epsilon)
+    }
+ }
> abline(h=obsd,lty=2)
> cie<-c(min(piv[q[,2]==obsd]),max(piv[q[,1]==obsd]))
> for(ii in c(1,2)) segments(cie[ii],obsd,cie[ii],0,lty=3)
> legend(0,10,lty=c(1:2,NA,3,NA),legend=c(".025, .975 quantiles",
+    "Typical Observed","value","Confidence Interval",
+    "End Points"))
> library(TheorStat)#For fun.coverageplot, fun.approxnormalci, wilsonpicture
> wilsonpicture()
> #******************************************************/
> # Table of Endpoints for Binomial Confidence Interval */
> #******************************************************/
> coverageout<-fun.coverageplot(10)
> ciends<-coverageout$ciends
> coverageout$mincover
       two.sided
Exact  0.9611270
Normal 0.0099550
Wilson 0.8953561
> show<-round(rbind(ciends[,1,1:6],ciends[,2,1:6]),3)
> for(j in 1:2)  dimnames(show)[[1]][2*j+(-1:0)]<-paste(
+    dimnames(ciends)[[2]][j],dimnames(show)[[1]][2*j+(-1:0)])
> show
                 0      1      2     3     4     5
Exact Lower  0.000  0.003  0.025 0.067 0.122 0.187
Exact Upper  0.308  0.445  0.556 0.652 0.738 0.813
Normal Lower 0.000 -0.086 -0.048 0.016 0.096 0.190
Normal Upper 0.000  0.286  0.448 0.584 0.704 0.810
> #*****************************************************/
> # Coverage of One-Sided Binomial Confidence Interval */
> #*****************************************************/
> ciends<-fun.coverageplot(10,alpha=.1,
+    alternative="greater")$ciends
> coverageout<-fun.coverageplot(10,alternative="both",
+    exactonly=TRUE,cm=.9)
> #****************************************************/
> # Coverage of Two-Sided Binomial Confidence Interval*/
> #****************************************************/
> #Next line is very slow.
> allcoverout<-fun.allcover(10)
> #***************************************************/
> # Confidence Region for Two Multinomial Parameters */
> #***************************************************/
> fun.ellipse<-function(mu,A,cc,n=1000){
+    pi<-4*atan(1)
+    aaa<-rbind(cos(2*pi*(0:n)/n),sin(2*pi*(0:n)/n))
+    return(mu+solve(chol(A/cc))%*%aaa)
+ }
> fun.cscont<-function(n,piv,alpha,lty){
+    mu<-piv*n
+    if(!is.na(alpha)){
+       A<-(diag(1/piv)+array(1,c(2,2))/(1-sum(piv)))*2/n
+       cc<-qchisq(1-alpha,2)
+       eo<-fun.ellipse(mu,A,cc)
+       lines(x=eo[1,],y=eo[2,],lty=lty)
+    }else{
+       points(x=mu[1],y=mu[2],pch=lty)
+    }
+ }
> fun.threecat<-function(n,piv,alpha=NA){
+   if(!is.array(piv)) piv<-array(piv,c(1,2))
+   plot(c(0,n),c(0,n),type="n",xlab="X1",ylab="X2",
+      sub=paste("alpha=",alpha,
+        "3 category multinomial,",n," items"),
+      main="Rejection Region for Chi-Square Tests")
+   xv<-outer(0:n,rep(1,n+1))
+   yv<-outer(rep(1,n+1),0:n)
+   sv<-(xv-n/3)^2/(n/3)+(yv-n/3)^2/(n/3)+(n-xv-yv-n/3)^2/(n/3)
+   use<-xv+yv<=(n+.5)
+   xv<-xv[use]
+   yv<-yv[use]
+   points(xv,yv)
+   for(jj in seq(dim(piv)[1])) fun.cscont(n,piv[jj,],alpha,jj+1)
+   for(jj in seq(dim(piv)[1])) fun.cscont(n,piv[jj,],NA   ,jj+1)
+   levec<-c("Pi",apply(round(piv,3),1,paste,collapse=","))
+   legend(.4*n,.95*n,legend=levec,pch=c(0,2:(dim(piv)[1]+1)))
+   legend(.5*n,.75*n,legend=levec,lty=c(0,2:(dim(piv)[1]+1)))
+ }
> #fun.threecat(13,rbind(rep(1,2)/4,rep(1,2)/3,c(1,2)/4))
> fun.threecat(13,rbind(rep(1,2)/4,rep(1,2)/3,c(1,2)/4),.05)
> #******************************************************/
> #* Confidence interval for real data                  */
> #******************************************************/
> #*************************************************************/
> #*  Prostate data                                            */
> #* From Andrews and Herzberg (1985), Data: A Collection of   */
> #* Problems from Many Fields for the Student and Research    */
> #* Worker, Table 46.  Observations represent subjects in a   */
> #* prostate cohort study, randomized to one of four dose     */
> #* levels of diethylstilbestrol.  Rx records dose in four    */
> #* ordered categories, with 1 being placebo.  Disease stage  */
> #* is 3 or 4.  monfol is months of followup.  surv is 0 if   */
> #* alive after 50 mo, and codes cause of death otherwise.    */
> #* http://lib.stat.cmu.edu/datasets/Andrews/T46.1            */
> #* The on-line version of the data set adds 3 fields before  */
> #* the first field in the book.  Variables of interest are   */
> #* stage, rx, monfol, and surv in fields 5, 6, 10, 11 of the */
> #* online version, resp.  Causes of death are given by var-  */
> #* ious positive integers in surv; I recode these to 1.  The */
> #* data file has more columns than we are interested in.  Put*/
> #* place-holding variables in for variables not of interest  */
> #* between variables of interest.  Data were previously pub- */
> #* lished by Byar and Corle (1977 Chronic Disease) and Byar  */
> #* and Green (1980 Bull. Cancer).  Lower value of dichoto-   */
> #* mized dose begins with blank to make it alphabetized      */
> #* before high.                                              */
> #*************************************************************/
> # If we omit the ones past where the data of interest stops
> # in the "what" list, and use flush=T, R will ignore them. 
> prostate<-read.table("T46.1")[,c(5,6,10,11)]
> dimnames(prostate)[[2]]<-c("stage","rx","monfol","surv")
> prostate$alive<-(prostate$surv==0)+0                      
> prostate$dose<-c(" low","high")[1+(prostate$rx>1)]         
> stage4<-prostate[prostate$stage==4,]
> #*********************************************************/
> # Causes of death are given by various positive integers */
> # in surv; I recode these to 1.                          */
> #*********************************************************/
> prostate$alive<-(prostate$surv==0)+0 
> library(Hmisc)#For binconf
> #Wilson and standard tests are given by binconf.
> binconf(sum(prostate$alive),length(prostate$alive),
+    method="wilson")
  PointEst     Lower     Upper
 0.2964427 0.2583052 0.3376476
> binconf(sum(prostate$alive),length(prostate$alive),
+    method="asymptotic")
  PointEst     Lower     Upper
 0.2964427 0.2566509 0.3362344
> #Exact confidence interval for the probability of survival.
> binom.test(sum(prostate$alive),length(prostate$alive))

	Exact binomial test

data:  sum(prostate$alive) and length(prostate$alive)
number of successes = 150, number of trials = 506,
p-value < 2.2e-16
alternative hypothesis: true probability of success is not equal to 0.5
95 percent confidence interval:
 0.2569521 0.3383307
sample estimates:
probability of success 
             0.2964427 

> #********************************************************/
> #*          Lotto Example                               */
> #* CA lotto data from Friedman Pisani and Purvis (1998).*/
> #* Only one set of balls is used during each lottery    */
> #* drawing.  Columns represent sets of balls.  Rows rep-*/
> #* resent numbers 0-9.  Entries in table represent count*/
> #* of how often each ball appears.  Each set of balls   */
> #* was used 120 times, and so the expected number of    */
> #* times each ball comes up is 12.                      */
> #********************************************************/
> lotto<-read.table("lotto.dat") 
> names(lotto)<-c("seta","setb","setc","setd") 
> #*****************************************************/
> #* Perform the chi-square test of the null hypothesis*/
> #* that each ball is equally likely on each draw.    */ 
> #*****************************************************/
> lotto$setad<-(lotto$seta-12)^2/12
> lotto$setbd<-(lotto$setb-12)^2/12 
> lotto$setcd<-(lotto$setc-12)^2/12 
> lotto$setdd<-(lotto$setd-12)^2/12 
> lotto        
   seta setb setc setd      setad      setbd      setcd
1    13   22   12   16 0.08333333 8.33333333 0.00000000
2    11    8   10    7 0.08333333 1.33333333 0.33333333
3    16    7   14   12 1.33333333 2.08333333 0.33333333
4    11    8   10   14 0.08333333 1.33333333 0.33333333
5     5   19   11   15 4.08333333 4.08333333 0.08333333
6    12   20   10    5 0.00000000 5.33333333 0.33333333
7    12   10   20   10 0.00000000 0.33333333 5.33333333
8    19   11   12   21 4.08333333 0.08333333 0.00000000
9     5    6   12   11 4.08333333 3.00000000 0.00000000
10   16    9    9    9 1.33333333 0.75000000 0.75000000
        setdd
1  1.33333333
2  2.08333333
3  0.00000000
4  0.33333333
5  0.75000000
6  4.08333333
7  0.33333333
8  6.75000000
9  0.08333333
10 0.75000000
> # apply adds the contributions by set, only for those vari-
> # ables containing squared differences.
> chisq<-apply(as.matrix(lotto[,
+       c("setad","setbd","setcd","setdd")]),2,"sum") 
> # Use the chi-square cdf function to get the p value.  Note
> # that lower.tail=F gives the upper tail.  Lower tail is
> # the default.  Add this column of p-values to the existing
> # vector of statistic values.
>  chisq<-rbind(chisq,pchisq(chisq,9,lower.tail=F)) 
>  dimnames(chisq)[[1]]<-c("Statistic value","p-value") 
>  print(chisq)
                      setad        setbd     setcd       setdd
Statistic value 15.16666667 26.666666667 7.5000000 16.50000000
p-value          0.08645849  0.001587759 0.5852088  0.05714647
> #The univarite goodness of fit tests takes equally-likely
> #categories as the default, and so the two following con-
> #structions are equivalent.
> chisq.test(lotto$seta) 

	Chi-squared test for given probabilities

data:  lotto$seta
X-squared = 15.167, df = 9, p-value = 0.08646

> chisq.test(lotto$seta,p=rep(.1,10)) 

	Chi-squared test for given probabilities

data:  lotto$seta
X-squared = 15.167, df = 9, p-value = 0.08646

> #*******************************************************/
> #* Number of accidental deaths from falls, 1979        */
> #* World Almanac and Book of Facts 1984 (New York:     */
> #* Newspaper Enterprise Association, 1984), and quoted */
> #* in the Minitab Handbook, 3rd Edn. (Belmont, CA: Dux-*/
> #* bury, 1994).  Falls are by month.  Columns are num- */
> #* bers of falls, numbers of days in the month, and    */
> #* month abbreviation.  Deaths may be influence by the */
> #* weather; scorebelow scores months based on icyness. */
> #*******************************************************/
> falls<-as.data.frame(scan("falls.dat",
+    what=list(falls=0,days=0,month="")))
> falls$score<-(falls$month%in%c("Jan","Feb"))+
+    2*(falls$month%in%c("Nov","Dec","Mar"))
> #********************************************************/
> #* Test to see whether expected number of falls is prop-*/
> #* ortional to the number of days in the month.  In this*/
> #* case the expected number of deaths from falls in a   */
> #* given month, under the null hypothesis, is the total */
> #* number of deaths times the number of days in the     */
> #* month divided by the number of days in the year.     */
> #********************************************************/
> falls$exp<-falls$days*sum(falls$falls)/sum(falls$days)
> tests<-list(                                          
+    pears=sum((falls$falls-falls$exp)^2/falls$exp))    
> tests$pearspv<-pchisq(tests$pears,                    
+    length(falls$falls)-1,lower.tail=F)                
> tests                                                 
$pears
[1] 17.69244

$pearspv
[1] 0.08899695

> chisq.test(falls$falls,p=falls$days/sum(falls$days))  

	Chi-squared test for given probabilities

data:  falls$falls
X-squared = 17.692, df = 11, p-value = 0.089

> #*****************************************************/
> #* Perform the test of trend on falls.               */
> #*****************************************************/
> # Calculate the test of trend from Armitage (1955)        ;
> tests<-falls; tests$score<-c(1,1,2,0,0,0,0,0,0,0,2,2)
> trendtests<-list(                                     
+    trend=sum(tests$score*(tests$falls-tests$exp)),    
+    v1=sum(tests$score^2*tests$exp),                   
+    v2=sum(tests$score*tests$exp))                     
> trendtests$trend<-trendtests$trend/                   
+   sqrt(trendtests$v1-trendtests$v2^2/sum(falls$falls))
> trendtests$trendpv<-pchisq(trendtests$trend^2,1,      
+                                       lower.tail=F)   
> trendtests                                            
$trend
[1] -0.9653177

$v1
[1] 15460.91

$v2
[1] 8798.597

$trendpv
[1] 0.3343857

>