R version 3.6.3 (2020-02-29) -- "Holding the Windsock"
Copyright (C) 2020 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

  Natural language support but running in an English locale

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> pdf("g03.pdf");options(width=64)
> # Fisher's Z is in the package DescTools.  If not already installed, do
> #install.packages("DescTools")
> library(DescTools)# For CorCI
> #Examine the parent-child height data.  Data are from
> #https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/T0HSJ1
> #Documentation claims that these are formatted for Stata; fortunately R reads 
> #them just fine.  Tell R that the first row contains column names.
> galton<-read.table("galton-stata11.tab",header=TRUE)
> #First examine daughters.
> daughters<-galton[galton$female==1,]
> #Families have multiple children.  Most of what we will do this semester will
> #require observations to be independent, and so keep only the first daughter.
> first<-c(TRUE,daughters$family[-1]!=daughters$family[-length(daughters$family)])
> fd<-daughters[first,c("mother","father","height")]
> cat("Confidence Interval for Mother Daughter Height Cor. via Fisher Z\n")
Confidence Interval for Mother Daughter Height Cor. via Fisher Z
> CorCI(cor(fd$height,fd$mother),length(fd$mother),
+    conf.level=0.95,alternative="two.sided")
      cor    lwr.ci    upr.ci 
0.3551171 0.2152827 0.4806815 
> # This confidence interval differs slightly from that in SAS,;
> # in that SAS includes a small bias correction but R does not.;
> library(datasets)# For data set anscombe
> #Get documentation for the data set using ?anscombe
> summary(fit1<-lm(y1~x1,data=anscombe))

Call:
lm(formula = y1 ~ x1, data = anscombe)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.92127 -0.45577 -0.04136  0.70941  1.83882 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)   
(Intercept)   3.0001     1.1247   2.667  0.02573 * 
x1            0.5001     0.1179   4.241  0.00217 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.237 on 9 degrees of freedom
Multiple R-squared:  0.6665,	Adjusted R-squared:  0.6295 
F-statistic: 17.99 on 1 and 9 DF,  p-value: 0.00217

> summary(fit2<-lm(y2~x2,data=anscombe))

Call:
lm(formula = y2 ~ x2, data = anscombe)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.9009 -0.7609  0.1291  0.9491  1.2691 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)   
(Intercept)    3.001      1.125   2.667  0.02576 * 
x2             0.500      0.118   4.239  0.00218 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.237 on 9 degrees of freedom
Multiple R-squared:  0.6662,	Adjusted R-squared:  0.6292 
F-statistic: 17.97 on 1 and 9 DF,  p-value: 0.002179

> summary(fit3<-lm(y3~x3,data=anscombe))

Call:
lm(formula = y3 ~ x3, data = anscombe)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.1586 -0.6146 -0.2303  0.1540  3.2411 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)   
(Intercept)   3.0025     1.1245   2.670  0.02562 * 
x3            0.4997     0.1179   4.239  0.00218 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.236 on 9 degrees of freedom
Multiple R-squared:  0.6663,	Adjusted R-squared:  0.6292 
F-statistic: 17.97 on 1 and 9 DF,  p-value: 0.002176

> summary(fit4<-lm(y4~x4,data=anscombe))

Call:
lm(formula = y4 ~ x4, data = anscombe)

Residuals:
   Min     1Q Median     3Q    Max 
-1.751 -0.831  0.000  0.809  1.839 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)   
(Intercept)   3.0017     1.1239   2.671  0.02559 * 
x4            0.4999     0.1178   4.243  0.00216 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.236 on 9 degrees of freedom
Multiple R-squared:  0.6667,	Adjusted R-squared:  0.6297 
F-statistic:    18 on 1 and 9 DF,  p-value: 0.002165

> par(mfrow=c(2,2))
> attach(anscombe)
> plot(x1,y1,main="Standard Case"); abline(fit1)
> plot(x2,y2,main="Linear Model Wrong"); abline(fit2)
> plot(x3,y3,main="Outlier"); abline(fit3)
> plot(x4,y4,main="Influential Observation"); abline(fit4)
> par(mfrow=c(1,1))
> detach(anscombe)
> attach(fd)
> # Build the 168 x 2 regression model with the first column all 1s and the 
> # second column the regressor of interest.
> X<-cbind(1,mother)
> # Transposing is done by t().  Multiplication is done by %*%.  The matrix 
> # inverse is calculated using solve().  This now gives the same parameter 
> # fit as above:
> solve(t(X)%*%X)%*%t(X)%*%height
             [,1]
       42.2485365
mother  0.3565058
> # Here we're using the matrix inverse to solve a system of equations with two
> # equations and two unknown.  R does this more efficiently as
> solve(t(X)%*%X,t(X)%*%height)
             [,1]
       42.2485365
mother  0.3565058
> # Compare with
> print(onefit<-lm(height~mother,data=fd))

Call:
lm(formula = height ~ mother, data = fd)

Coefficients:
(Intercept)       mother  
    42.2485       0.3565  

> # We can now add father's height
> X<-cbind(1,mother,father)
> solve(t(X)%*%X,t(X)%*%height)
             [,1]
       17.1080320
mother  0.3287695
father  0.3875316
> detach(fd)
> myfit<-lm(height~mother+father,data=fd)
> print(myfit)

Call:
lm(formula = height ~ mother + father, data = fd)

Coefficients:
(Intercept)       mother       father  
    17.1080       0.3288       0.3875  

> #Fit with mother and father
> myfit

Call:
lm(formula = height ~ mother + father, data = fd)

Coefficients:
(Intercept)       mother       father  
    17.1080       0.3288       0.3875  

> #Fit with just mother
> onefit

Call:
lm(formula = height ~ mother, data = fd)

Coefficients:
(Intercept)       mother  
    42.2485       0.3565  

> #Notice that the fit including father has mother's coefficient smaller.  
> #That's because taller women tend to be married to taller men, and so 
> #some of the increased height for daughters of taller mothers
> #gets ascribed to the fathers.
> #Perform the standardized regression.  Standardized the response explanatory variables.
> fd$stheight<-(fd$height-mean(fd$height))/sd(fd$height)
> fd$stmother<-(fd$mother-mean(fd$mother))/sd(fd$mother)
> fd$stfather<-(fd$father-mean(fd$father))/sd(fd$father)
> lm(stheight~stmother+stfather,data=fd)

Call:
lm(formula = stheight ~ stmother + stfather, data = fd)

Coefficients:
(Intercept)     stmother     stfather  
 -5.939e-16    3.275e-01    4.511e-01  

> # Shows that changing mother's height by 1 sd changes daughter height by 
> # approx one third sd.
> # Shows that changing father's height by 1 sd changes daughter height by 
> # approx one half sd.
> 
<\/pre>