pdf("g10.pdf");options(width=64)
#********************************************************/
#* Yarn strength data from Example Q of Cox & Snell     */
#* (1981).  Variables represent strength of two types of*/
#* yarn collected from six different bobbins.           */
#********************************************************/
yarn<-as.data.frame(scan("yarn.dat",what=
    list(strength=0, bobbin=0,type="")))
#Calculate means by bobbin type.  split takes a vector of responses and
#a vector of group identifiers, and makes a list of vectors with one vector
#per group.  lapply applies a vector function to every element of a list.
lapply(split(yarn$strength,yarn$bobbin),"mean")
#Now build a regression model with dummy variables.  The == operator returns
#a logical.  When a logical is used as part of a mathematical expression,
#it is first forced to a number, with FALSE mapping to 0 and TRUE
#mapping to 1.
yarn$b1<-(yarn$bobbin==1)+0
yarn$b2<-(yarn$bobbin==2)+0
yarn$b3<-(yarn$bobbin==3)+0
yarn$b4<-(yarn$bobbin==4)+0
yarn$b5<-(yarn$bobbin==5)+0
yarn$b6<-(yarn$bobbin==6)+0
#Fit the model as independent means, by suppressing grand mean.
summary(yarnfit<-lm(strength~-1+b1+b2+b3+b4+b5+b6,data=yarn))
#Fit the model with a grand mean.  Note treatment of less than
#full rank design matrix.
summary(yarnfit2<-lm(strength~b1+b2+b3+b4+b5+b6,data=yarn))
#R creates the dummy variables automatically, using factor.
#factor takes a vector and creates a new vector of consecutive
#positive integer codes for the original variables.  R uses the 
#category coded as 1 for the reference.  In contrast, if dummy
#variables causing a design matrix of less than full rank are used,
#the last one is removed, forcing the last group to be taken as
#the reference variable.
yarn$bobbinf<-factor(as.character(yarn$bobbin))
summary(yarnfit3<-lm(strength~bobbinf,data=yarn))
#Ordering of the groups in a factor, and hence which group is the
#reference, can be specified explicitly:
yarn$bobbinf<-factor(as.character(yarn$bobbin),
  levels=c("6","1","2","3","4","5"))
summary(yarnfit4<-lm(strength~bobbinf,data=yarn))
#A version that gives the analysis of variance table directly:
summary(yarnfit5<-aov(strength~bobbinf,data=yarn))
#Note that aov gives some different diagnostic plots.
plot(yarnfit5)
#You can change an existing factor to have a different level
#coded as 1.  relevel seems to need for the labels to be character;
#everything else above appears to work ok without first making the
#bobbin labels a character.
yarn$bobbing<-relevel(yarn$bobbinf,"1")
summary(yarnfit6<-lm(strength~bobbing,data=yarn))
#Input data on the effectiveness of vitamin C on rat tooth growth.
library(datasets)# For data set ToothGrowth
#Effects of chemical interventions on biological systems typically are
#convex, in that doubling a dose typically has less than a doubling effect
#on response.  Let's try dose on a log scale.
with(ToothGrowth,plot(log(dose),len,pch=as.numeric(factor(supp))))
legend(0,10,pch=unique(as.numeric(factor(ToothGrowth$supp))),
   legend=levels( factor(ToothGrowth$supp)))
#It looks like VC outperforms OJ for low does but not for high doses.
summary(toothout<-lm(len~log(dose)+supp,data=ToothGrowth))
#Plot fitted values:
plot(log(ToothGrowth$dose),fitted(toothout),
   main="Tooth Growth Data",sub="Fit without Interaction")
anova(toothout)
#Data on pussom size by site, from Data Analysis and Graphics (DAAG).
#If not already installed, install with 
#install.packages("DAAG")
library(DAAG)# For data set fossum
data(fossum)
#Examine the dependence of possum size as a function of age, controlling for
#site.  In this observational study one might be concerned about 
#a relationship between site and age that would change the interpretation 
#of the age regression parameter.
boxplot(split(fossum$age,fossum$site),
   main="Possum Ages by Site")
fossumfit<-lm(formula = totlngth ~ pmin(age, 6)+factor(site) , data = fossum)
plot(fossumfit)
fossumfit2<-aov(totlngth ~ pmin(age, 6)+factor(site) , data = fossum)
summary(toothouti<-lm(len~log(dose)*supp,data=ToothGrowth))
with(ToothGrowth,plot(log(dose),fitted(toothouti),col=as.numeric(supp),
   main="Tooth Growth Data",sub="Fit with Interaction",
   pch=as.numeric(supp)))
legend(.1,15,col=1:2,levels(ToothGrowth$supp),pch=1:2)
anova(toothouti)
# R builds ANOVA tables, with one sum of squares for each variable.
# Variables as factors give tests of multiple parameters at once.
# This convention of one line per factor is why R does not give the
# standard regression anova table.
anova(yarnfit3)
#***********************************************************/
#* Chicken weightdata from Example K of Cox & Snell (1981).*/
#* Variables represent protein source, protein level, fish */
#* solubles level.  Two responses representing weight at   */
#* 16wk in g follow.                                       */
#***********************************************************/
temp1<-temp<-as.data.frame(scan("chicken.dat",what=
    list(source="", lev=0,fish=0,weight=0,w2=0)))
temp1$weight<-temp1$w2;temp$h<-0;temp1$h<-1
temp$w2<-NULL;temp1$w2<-NULL
chicken<-rbind(temp,temp1)
boxplot(split(chicken$weight,chicken$lev))
summary(lm(weight~lev,data=chicken))
anova(lm(weight~factor(lev),data=chicken))
summary(lm(weight~lev+I(lev^2),data=chicken))
anova(lm(weight~factor(lev)+factor(source),data=chicken))
anova(lm(weight~factor(source)+factor(lev),data=chicken))
#Import data on genetic variation in responses to fertilization and
#    simulated herbivory in _Arabidopsis_
library(lme4)# For data set AArabidopsis
#Fit a two-way ANOVA model.  Doing the analysis on the fourth root
#scale works ok, but not great.  Note that the design is unbalanced:
with(Arabidopsis,table(reg,status))
#The ANOVA table depends on the order in which variables are 
#entered.
anova(lm(total.fruits*(1/4)~reg+status,data=Arabidopsis))
anova(lm(total.fruits*(1/4)~status+reg,data=Arabidopsis))
#Remember we fit yarnfit5 via aov.
TukeyHSD(yarnfit5)
TukeyHSD(aov(totlngth ~ factor(site) , data = fossum))
#Give Scheffe method, but unfortunately applied only to group mean
#differences, which seems to be a less efficient approach to 
#the Tukey procedure.  If you haven't already done this, do
#install.packages("agricolae")
library(agricolae)# For scheffe.test
#This 
scheffe.test(yarnfit3,"bobbinf")
#Another version, this one allowing you to input the contrast vector.
#Again, if you omit the contrast vector, you get pairwise comparisons.
#install.packages("DescTools")
library(DescTools)# For ScheffeTest
c1<-(1:6)-3.5
c2<-c1^2-mean(c1^2)
ScheffeTest(yarnfit5,contrasts=cbind(c1,c2))
<\/pre>