pdf("g04.pdf");options(width=64)
#Examine the parent-child height data.  Data are from
#https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/T0HSJ1
#Documentation claims that these are formatted for Stata; fortunately R reads 
#them just fine.  Tell R that the first row contains column names.
galton<-read.table("galton-stata11.tab",header=TRUE)
#First examine daughters.
daughters<-galton[galton$female==1,]
#Families have multiple children.  Most of what we will do this semester will
#require observations to be independent, and so keep only the first daughter.
first<-c(TRUE,daughters$family[-1]!=daughters$family[-length(daughters$family)])
fd<-daughters[first,c("mother","father","height")]
fd$mothercm<-fd$mother*2.54
attach(fd)
Z<-cbind(1,mother-mean(mother))
nextz<-fd$mothercm-Z%*%solve(t(Z)%*%Z)%*%t(Z)%*%fd$mothercm
nextz
X<-cbind(1,fd$mother,fd$mothercm)
M<-t(X)%*%X
M
solve(M)
summary(lm(height~mother+mothercm,data=fd))
fd$mothercmround<-round(fd$mother*2.54)
summary(lm(height~mother,data=fd))
summary(colinfit<-lm(height~mother+mothercmround,data=fd))
# First install via install.packages("regclass")
library(regclass)# For VIF
VIF(colinfit)
# Exhibit residual standard deviation fit.
summary(myfit<-lm(height~mother+father,data=fd))
VIF(myfit)
X<-cbind(1,fd$mother,fd$father)
H<-diag(rep(1,dim(X)[1]))-X%*%solve(t(X)%*%X)%*%t(X)
fd$height%*%H%*%fd$height/(dim(X)[1]-dim(X)[2])
X<-cbind(1,fd$mother,fd$father)
est<-solve(t(X)%*%X)%*%t(X)%*%fd$height
varmat<-solve(t(X)%*%X)
est[2:3]%*%solve(varmat[2:3,2:3])%*%est[2:3]
# This R code doesn't give you quite what you might expect.
anova(myfit)
# Library rms must have been previously installed via 
# install.packages("rms")
library(rms)# For ols
anova(ols(height~mother+father,data=fd))
# This gives something closer to what is traditionally produced, except that
# there's no total line, and each of the separate explanatory variables is
# given.
library(NonparametricHeuristic)# For traditionalanova
traditionalanova(myfit)
<\/pre>