pdf("g11.pdf");options(width=64)
#Examine the parent-child height data.  Data are from
#https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/T0HSJ1
#Documentation claims that these are formatted for Stata; fortunately R reads 
#them just fine.  Tell R that the first row contains column names.
galton<-read.table("galton-stata11.tab",header=TRUE)
#First examine daughters.
daughters<-galton[galton$female==1,]
#Families have multiple children.  Most of what we will do this semester will
#require observations to be independent, and so keep only the first daughter.
first<-c(TRUE,daughters$family[-1]!=daughters$family[-length(daughters$family)])
fd<-daughters[first,c("mother","father","height")]
fd$mcmround<-round(fd$mother*2.54)
summary(colinfit<-lm(height~mother+mcmround,data=fd,x=TRUE))
# Examine variance inflation factor.  As far as I can tell, VIF
# from package regclass gives the same results as vif from
# package car.
# First install via install.packages("regclass")
library(car)# For vif
vif(colinfit)
cor(colinfit$x[,-1])
# Exhibit residual standard deviation fit.
# I will use squareds later as well.
fd$m2<-fd$mother^2
fd$f2<-fd$father^2
summary(quadfit<-lm(height~mother+m2+father+f2,data=fd,x=TRUE))
vif(quadfit)
cor(quadfit$x[,-1])
# Columns of the vectors component are eigenvectors.
eigen(t(colinfit$x)%*%colinfit$x)
eigen(t(quadfit$x)%*%quadfit$x)
# Can often better address this if covariates are rescaled
ss<-apply(quadfit$x^2,2,sum)
quadfit$newx<-t(apply(quadfit$x,1,"/",sqrt(ss)))
cor(quadfit$newx[,-1])
eigen(t(quadfit$newx)%*%quadfit$newx)
ss<-apply(colinfit$x^2,2,sum)
colinfit$newx<-t(apply(colinfit$x,1,"/",sqrt(ss)))
cor(colinfit$newx[,-1])
eigen(t(colinfit$newx)%*%colinfit$newx)
fd$f2o<-resid(lm(f2~father,data=fd))
fd$m2o<-resid(lm(m2~mother,data=fd))
vif(lm(height ~ mother + m2 + father + f2, data = fd))
vif(lm(height ~ mother + m2o + father + f2o, data = fd))
library(MASS)# For lm.ridge
#lm.ridge does not penalize intercept.
ridge1<-lm.ridge(height~mother+mcmround, lambda=(0:100)/100,data=fd)
plot(ridge1)
ridge2<-lm.ridge(height~mother+I(mother^2)+father+I(father^2),
   lambda=(0:100)/100,data=fd)
plot(ridge2)
#More logical to apply to standardized covariates
select(ridge4<-lm.ridge(height~quadfit$newx[,-1],lambda=(0:10)/10,data=fd))
<\/pre>