pdf("g02.pdf");options(width=64)
#Examine the parent-child height data.  Data are from
#https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/T0HSJ1
#Documentation claims that these are formatted for Stata; fortunately R reads 
#them just fine.  Tell R that the first row contains column names.
galton<-read.table("galton-stata11.tab",header=TRUE)
#First examine daughters.
daughters<-galton[galton$female==1,]
#Families have multiple children.  Most of what we will do this semester will
#require observations to be independent, and so keep only the first daughter.
first<-c(TRUE,daughters$family[-1]!=daughters$family[-length(daughters$family)])
fd<-daughters[first,c("mother","father","height")]
#Calculate the variance of the regression estimates for the daughter-mother
#regression.
attach(fd)
beta1hat<-sum((mother-mean(mother))*(height-mean(height)))/sum(
   (mother-mean(mother))^2)
beta0hat<-mean(height)-beta1hat*mean(mother)
sigmasqhat<-sum((height-beta0hat-beta1hat*mother)^2)/(length(mother)-2)
se0<-sqrt(sigmasqhat*(1/length(mother)+mean(mother)^2/sum((mother-mean(mother))^2)))
print(pt(abs(beta0hat-0)/se0,length(mother)-2))
se1<-sqrt(sigmasqhat/sum((mother-mean(mother))^2))
#Test the null hypothesis that the line runs through 0,0
print(pt(1-abs(beta0hat-0)/se0,length(mother)-2))
#Test the null hypothesis that the line has slope 0.
#This hypothesis implies that mother's height has no impact on daughter's height
print(pt(1-abs(beta1hat-0)/se1,length(mother)-2))
#Test the null hypothesis that daughter's height goes up 1 for 1 with mom's height.
print(pt(1-abs(beta1hat-1)/se1,length(mother)-2))
detach(fd)
summary(momfit<-lm(height~mother,data=fd))
confint(lm(height~mother,data=fd))
#Fitted value for average daughter's height if mom's height is 5 feet.
predict(momfit, newdata=data.frame(mother=60),interval="confidence")
attach(fd)
plot.default(mother,height,
   main="Confidence Interval for Fitted Value of Daughter Heights",
   sub="95% intervals.  Notice how they widen for values farther from mean")
detach(fd)
pp<-predict(momfit,data.frame(mother=60:72),interval="confidence")
for(j in 2:3) lines(60:72,pp[,j])
#Predict new daughter's height if mom's height is 5 feet.
predict(lm(height~mother,data=fd),
   newdata=data.frame(mother=60),interval="predict")
pp<-predict(momfit,data.frame(mother=60:72),interval="prediction")
for(j in 2:3) lines(60:72,pp[,j],lty=2)
legend(66,59,legend=c("confidence","prediction"),lty=1:2)
originfit<-lm(height~mother-1,data=fd)
attach(fd)
print(beta1hat<-sum(mother*height)/sum(mother^2))
print(sigmahatsq<-sum((height-beta1hat*mother)^2/
   (length(mother)-1)))
print(se1<-sqrt(sigmahatsq/sum(mother^2)))
summary(originfit)
detach(fd)
<\/pre>