R version 3.6.3 (2020-02-29) -- "Holding the Windsock"
Copyright (C) 2020 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

  Natural language support but running in an English locale

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> pdf("g02.pdf");options(width=64)
> #Examine the parent-child height data.  Data are from
> #https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/T0HSJ1
> #Documentation claims that these are formatted for Stata; fortunately R reads 
> #them just fine.  Tell R that the first row contains column names.
> galton<-read.table("galton-stata11.tab",header=TRUE)
> #First examine daughters.
> daughters<-galton[galton$female==1,]
> #Families have multiple children.  Most of what we will do this semester will
> #require observations to be independent, and so keep only the first daughter.
> first<-c(TRUE,daughters$family[-1]!=daughters$family[-length(daughters$family)])
> fd<-daughters[first,c("mother","father","height")]
> #Calculate the variance of the regression estimates for the daughter-mother
> #regression.
> attach(fd)
> beta1hat<-sum((mother-mean(mother))*(height-mean(height)))/sum(
+    (mother-mean(mother))^2)
> beta0hat<-mean(height)-beta1hat*mean(mother)
> sigmasqhat<-sum((height-beta0hat-beta1hat*mother)^2)/(length(mother)-2)
> se0<-sqrt(sigmasqhat*(1/length(mother)+mean(mother)^2/sum((mother-mean(mother))^2)))
> print(pt(abs(beta0hat-0)/se0,length(mother)-2))
[1] 1
> se1<-sqrt(sigmasqhat/sum((mother-mean(mother))^2))
> #Test the null hypothesis that the line runs through 0,0
> print(pt(1-abs(beta0hat-0)/se0,length(mother)-2))
[1] 7.973978e-14
> #Test the null hypothesis that the line has slope 0.
> #This hypothesis implies that mother's height has no impact on daughter's height
> print(pt(1-abs(beta1hat-0)/se1,length(mother)-2))
[1] 7.122407e-05
> #Test the null hypothesis that daughter's height goes up 1 for 1 with mom's height.
> print(pt(1-abs(beta1hat-1)/se1,length(mother)-2))
[1] 2.67195e-13
> detach(fd)
> summary(momfit<-lm(height~mother,data=fd))

Call:
lm(formula = height ~ mother, data = fd)

Residuals:
   Min     1Q Median     3Q    Max 
-7.708 -1.221  0.096  1.292  5.792 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 42.24854    4.67281   9.041 3.92e-16 ***
mother       0.35651    0.07284   4.894 2.32e-06 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 2.179 on 166 degrees of freedom
Multiple R-squared:  0.1261,	Adjusted R-squared:  0.1208 
F-statistic: 23.95 on 1 and 166 DF,  p-value: 2.32e-06

> confint(lm(height~mother,data=fd))
                 2.5 %    97.5 %
(Intercept) 33.0227475 51.474325
mother       0.2126937  0.500318
> #Fitted value for average daughter's height if mom's height is 5 feet.
> predict(momfit, newdata=data.frame(mother=60),interval="confidence")
       fit      lwr      upr
1 63.63889 62.96096 64.31681
> attach(fd)
> plot.default(mother,height,
+    main="Confidence Interval for Fitted Value of Daughter Heights",
+    sub="95% intervals.  Notice how they widen for values farther from mean")
> detach(fd)
> pp<-predict(momfit,data.frame(mother=60:72),interval="confidence")
> for(j in 2:3) lines(60:72,pp[,j])
> #Predict new daughter's height if mom's height is 5 feet.
> predict(lm(height~mother,data=fd),
+    newdata=data.frame(mother=60),interval="predict")
       fit      lwr     upr
1 63.63889 59.28317 67.9946
> pp<-predict(momfit,data.frame(mother=60:72),interval="prediction")
> for(j in 2:3) lines(60:72,pp[,j],lty=2)
> legend(66,59,legend=c("confidence","prediction"),lty=1:2)
> originfit<-lm(height~mother-1,data=fd)
> attach(fd)
> print(beta1hat<-sum(mother*height)/sum(mother^2))
[1] 1.014652
> print(sigmahatsq<-sum((height-beta1hat*mother)^2/
+    (length(mother)-1)))
[1] 7.045438
> print(se1<-sqrt(sigmahatsq/sum(mother^2)))
[1] 0.00319221
> summary(originfit)

Call:
lm(formula = height ~ mother - 1, data = fd)

Residuals:
    Min      1Q  Median      3Q     Max 
-9.8330 -0.9817  0.0366  1.5586  7.1502 

Coefficients:
       Estimate Std. Error t value Pr(>|t|)    
mother 1.014652   0.003192   317.9   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 2.654 on 167 degrees of freedom
Multiple R-squared:  0.9983,	Adjusted R-squared:  0.9983 
F-statistic: 1.01e+05 on 1 and 167 DF,  p-value: < 2.2e-16

> detach(fd)
> 
<\/pre>