R version 4.3.1 (2023-06-16) -- "Beagle Scouts"
Copyright (C) 2023 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

  Natural language support but running in an English locale

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> pdf("g09.pdf");options(width=64)
> #setwd("C:\\Users\\kolassa\\Class553")
> setwd("~/Taught1/960-553/Data")
> #****************************************************/
> # Housing data from Cox and Snell (1980), Example W,*/
> # on satisfaction with housing in Copenhagen.  Data */
> # was extracted from R package MASS.  Data may also */
> # be found at                                       */
> # www.stat.ucla.edu/data/cox-and-snell/exampleW.data*/
> #****************************************************/
> housing<-read.table("housing.dat",header=FALSE,
+   col.names=c("nn","sat","infl","type","contact","freq"))
> housing$nsat<-(housing$sat=="Medium")+2*(housing$sat=="High")
> housing$ninf<-(housing$infl=="Medium")+2*(housing$infl=="High")
> housing$ncont<-1*(housing$contact=="High")
> housing
   nn    sat   infl      type contact freq nsat ninf ncont
1   1    Low    Low     Tower     Low   21    0    0     0
2   2 Medium    Low     Tower     Low   21    1    0     0
3   3   High    Low     Tower     Low   28    2    0     0
4   4    Low Medium     Tower     Low   34    0    1     0
5   5 Medium Medium     Tower     Low   22    1    1     0
6   6   High Medium     Tower     Low   36    2    1     0
7   7    Low   High     Tower     Low   10    0    2     0
8   8 Medium   High     Tower     Low   11    1    2     0
9   9   High   High     Tower     Low   36    2    2     0
10 10    Low    Low Apartment     Low   61    0    0     0
11 11 Medium    Low Apartment     Low   23    1    0     0
12 12   High    Low Apartment     Low   17    2    0     0
13 13    Low Medium Apartment     Low   43    0    1     0
14 14 Medium Medium Apartment     Low   35    1    1     0
15 15   High Medium Apartment     Low   40    2    1     0
16 16    Low   High Apartment     Low   26    0    2     0
17 17 Medium   High Apartment     Low   18    1    2     0
18 18   High   High Apartment     Low   54    2    2     0
19 19    Low    Low    Atrium     Low   13    0    0     0
20 20 Medium    Low    Atrium     Low    9    1    0     0
21 21   High    Low    Atrium     Low   10    2    0     0
22 22    Low Medium    Atrium     Low    8    0    1     0
23 23 Medium Medium    Atrium     Low    8    1    1     0
24 24   High Medium    Atrium     Low   12    2    1     0
25 25    Low   High    Atrium     Low    6    0    2     0
26 26 Medium   High    Atrium     Low    7    1    2     0
27 27   High   High    Atrium     Low    9    2    2     0
28 28    Low    Low   Terrace     Low   18    0    0     0
29 29 Medium    Low   Terrace     Low    6    1    0     0
30 30   High    Low   Terrace     Low    7    2    0     0
31 31    Low Medium   Terrace     Low   15    0    1     0
32 32 Medium Medium   Terrace     Low   13    1    1     0
33 33   High Medium   Terrace     Low   13    2    1     0
34 34    Low   High   Terrace     Low    7    0    2     0
35 35 Medium   High   Terrace     Low    5    1    2     0
36 36   High   High   Terrace     Low   11    2    2     0
37 37    Low    Low     Tower    High   14    0    0     1
38 38 Medium    Low     Tower    High   19    1    0     1
39 39   High    Low     Tower    High   37    2    0     1
40 40    Low Medium     Tower    High   17    0    1     1
41 41 Medium Medium     Tower    High   23    1    1     1
42 42   High Medium     Tower    High   40    2    1     1
43 43    Low   High     Tower    High    3    0    2     1
44 44 Medium   High     Tower    High    5    1    2     1
45 45   High   High     Tower    High   23    2    2     1
46 46    Low    Low Apartment    High   78    0    0     1
47 47 Medium    Low Apartment    High   46    1    0     1
48 48   High    Low Apartment    High   43    2    0     1
49 49    Low Medium Apartment    High   48    0    1     1
50 50 Medium Medium Apartment    High   45    1    1     1
51 51   High Medium Apartment    High   86    2    1     1
52 52    Low   High Apartment    High   15    0    2     1
53 53 Medium   High Apartment    High   25    1    2     1
54 54   High   High Apartment    High   62    2    2     1
55 55    Low    Low    Atrium    High   20    0    0     1
56 56 Medium    Low    Atrium    High   23    1    0     1
57 57   High    Low    Atrium    High   20    2    0     1
58 58    Low Medium    Atrium    High   10    0    1     1
59 59 Medium Medium    Atrium    High   22    1    1     1
60 60   High Medium    Atrium    High   24    2    1     1
61 61    Low   High    Atrium    High    7    0    2     1
62 62 Medium   High    Atrium    High   10    1    2     1
63 63   High   High    Atrium    High   21    2    2     1
64 64    Low    Low   Terrace    High   57    0    0     1
65 65 Medium    Low   Terrace    High   23    1    0     1
66 66   High    Low   Terrace    High   13    2    0     1
67 67    Low Medium   Terrace    High   31    0    1     1
68 68 Medium Medium   Terrace    High   21    1    1     1
69 69   High Medium   Terrace    High   13    2    1     1
70 70    Low   High   Terrace    High    5    0    2     1
71 71 Medium   High   Terrace    High    6    1    2     1
72 72   High   High   Terrace    High   13    2    2     1
> #*****************************************************/
> # Polytomous Regression.  Baseline logit model first.*/
> #*****************************************************/
> cat('\n Baseline logit model  \n')

 Baseline logit model  
> #To make the numbers come close to those we get from SAS,
> #rename the categories to change the ordering. Resulting 
> #effect coefficients will be double those of SAS, since  
> #proc logistic by default uses the -1,1 parameterization.
> library(nnet)#For multinom; Fits the multinomial regression model.
> #Note below that the levels of sat are ordered,
> #but the ordering doesn't matter except to determine
> #which category is baseline.
> multinom(factor(sat)~contact,data=housing,weight=freq)
# weights:  9 (4 variable)
initial  value 1846.767257 
final  value 1821.875901 
converged
Call:
multinom(formula = factor(sat) ~ contact, data = housing, weights = freq)

Coefficients:
       (Intercept)  contactLow
Low     -0.2585741  0.21744689
Medium  -0.3878994 -0.03978849

Residual Deviance: 3643.752 
AIC: 3651.752 
> #**********************************************************/
> # Cumulative logit model.  First reuse a known technique. */
> # Simple model exhibits different parameterizations.      */
> #**********************************************************/
> cat('\n Binary response for comparison purpose  \n')

 Binary response for comparison purpose  
> housing$oksat<-housing$nsat>1
> glm(oksat~factor(contact),family=binomial,data=housing,
+         weight=freq)

Call:  glm(formula = oksat ~ factor(contact), family = binomial, data = housing, 
    weights = freq)

Coefficients:
       (Intercept)  factor(contact)Low  
           -0.3720             -0.1053  

Degrees of Freedom: 71 Total (i.e. Null);  70 Residual
Null Deviance:	    2259 
Residual Deviance: 2258 	AIC: 2262
> library(MASS)#For polr; Fits ordinal categorical regression models.
> #*************************************************************/
> # The following data set will not give us what we want, be-  */
> # cause it will order sat incorrectly.  Recode data to put   */
> # medium between high and low, and add descending to make    */
> # low the baseline.  First fit the simplest model.  This     */
> # should give logits of raw proportions.  Check this.        */
> #*************************************************************/
> #Note below that the levels of rsat are ordered.  Default
> #is alphabetical.  That's why I changed "High" to "Top".
> #You can also do this as an argument to as.factor.
> cat('\n Ordinal Logistic Regression with wrong order  \n')

 Ordinal Logistic Regression with wrong order  
> polr(factor(sat)~1,weights=freq,data=housing)
Call:
polr(formula = factor(sat) ~ 1, data = housing, weights = freq)

No coefficients

Intercepts:
  High|Low Low|Medium 
-0.4163832  1.0185074 

Residual Deviance: 3648.878 
AIC: 3652.878 
> cat('\n Ordinal Logistic Reg. with right order, empty model\n')

 Ordinal Logistic Reg. with right order, empty model
> # polr regresses with ordered response, and Allows various 
> # link functions.                                     
> polr(factor(nsat)~1,weights=freq,data=housing)
Call:
polr(formula = factor(nsat) ~ 1, data = housing, weights = freq)

No coefficients

Intercepts:
       0|1        1|2 
-0.6752324  0.4164405 

Residual Deviance: 3648.878 
AIC: 3652.878 
> cat('\n Ordinal Logist. Reg. with added covariates  \n')

 Ordinal Logist. Reg. with added covariates  
> polr(factor(nsat)~infl+type+contact,weights=freq,data=housing)
Call:
polr(formula = factor(nsat) ~ infl + type + contact, data = housing, 
    weights = freq)

Coefficients:
    inflLow  inflMedium  typeAtrium typeTerrace   typeTower 
 -1.2888275  -0.7224378   0.2061487  -0.5186378   0.5723756 
 contactLow 
 -0.3602933 

Intercepts:
       0|1        1|2 
-1.5728637 -0.3860317 

Residual Deviance: 3479.149 
AIC: 3495.149 
> housing$rsat<-as.character(housing$sat) 
> housing$rsat[housing$sat=="High"]<-"Top"
> housing$rsat<-as.factor(housing$rsat)
> (polr1<-polr(rsat~infl+type+contact,weights=freq,
+    data=housing))
Call:
polr(formula = rsat ~ infl + type + contact, data = housing, 
    weights = freq)

Coefficients:
    inflLow  inflMedium  typeAtrium typeTerrace   typeTower 
 -1.2888275  -0.7224378   0.2061487  -0.5186378   0.5723756 
 contactLow 
 -0.3602933 

Intercepts:
Low|Medium Medium|Top 
-1.5728637 -0.3860317 

Residual Deviance: 3479.149 
AIC: 3495.149 
> library(VGAM)#For vglm; Fits an ordinal baseline logit model and some variants.
> #Fit a separate set of parameters for each transition.  There
> #are ways to add constraints to keep parameters constant from
> #one level of response to the next.
> vglm(ordered(rsat)~infl+type+contact,
+    data=housing,family=cumulative,weight=freq)

Call:
vglm(formula = ordered(rsat) ~ infl + type + contact, family = cumulative, 
    data = housing, weights = freq)


Coefficients:
(Intercept):1 (Intercept):2     inflLow:1     inflLow:2 
  -1.49466789   -0.41887924    1.21915968    1.30774200 
 inflMedium:1  inflMedium:2  typeAtrium:1  typeAtrium:2 
   0.62669496    0.75794777   -0.40998457   -0.05488104 
typeTerrace:1 typeTerrace:2   typeTower:1   typeTower:2 
   0.47784805    0.58105634   -0.60115316   -0.53792605 
 contactLow:1  contactLow:2 
   0.43049306    0.29566435 

Degrees of Freedom: 144 Total; 130 Residual
Residual deviance: 3470.579 
Log-likelihood: -1735.289 
> vglm(ordered(rsat)~infl+type+contact,
+    data=housing,family=cratio,weight=freq)

Call:
vglm(formula = ordered(rsat) ~ infl + type + contact, family = cratio, 
    data = housing, weights = freq)


Coefficients:
(Intercept):1 (Intercept):2     inflLow:1     inflLow:2 
   1.50129002    1.02674966   -1.24303691   -0.96303213 
 inflMedium:1  inflMedium:2  typeAtrium:1  typeAtrium:2 
  -0.64223760   -0.64854782    0.43592249   -0.20772988 
typeTerrace:1 typeTerrace:2   typeTower:1   typeTower:2 
  -0.46475755   -0.44850968    0.61632292    0.32335345 
 contactLow:1  contactLow:2 
  -0.43014618   -0.09107075 

Degrees of Freedom: 144 Total; 130 Residual
Residual deviance: 3469.526 
Log-likelihood: -1734.763 
> vglm(ordered(rsat)~infl+type+contact,
+    data=housing,family=acat,weight=freq)

Call:
vglm(formula = ordered(rsat) ~ infl + type + contact, family = acat, 
    data = housing, weights = freq)


Coefficients:
(Intercept):1 (Intercept):2     inflLow:1     inflLow:2 
    0.1708698     1.0492138    -0.6649353    -0.9476957 
 inflMedium:1  inflMedium:2  typeAtrium:1  typeAtrium:2 
   -0.2185394    -0.6592284     0.5670590    -0.2394053 
typeTerrace:1 typeTerrace:2   typeTower:1   typeTower:2 
   -0.2308818    -0.4458142     0.4356887     0.2999430 
 contactLow:1  contactLow:2 
   -0.3608519    -0.1209751 

Degrees of Freedom: 144 Total; 130 Residual
Residual deviance: 3470.084 
Log-likelihood: -1735.042 

This is an adjacent categories model with 3 levels
> #**************************************************************/
> # Beetle data, from SAS documentation for complimentary log   */
> # log link, after reformatting to prepare data for treatment  */
> # by cloglog analysis.  Beetles were exposed to insecticides  */
> # and observed for 13 days.  The day on which the beetle died */
> # is recorded, with beetles lasting all 13 days being recorded*/ 
> # as 14.  Sex of the beetle (m=1, f=2), and concentrations of*./
> # insecticide, were also recorded.                            */
> #**************************************************************/
> beetledays<-read.table("beetledays.dat",header=TRUE)
> beetledays$sex<-c("M","F")[beetledays$sex]
> # Starting model with -1 forces there to be no intercept, and 
> # hence effects for each day are estimable.  Effects are
> # discrete hazards, which equal conditional probability of
> # failure at that time given survival up until tht time.
> # Baseline concentration is .20, measired in mg/cm^2.
> summary(glm(y~-1+factor(day)+sex+factor(conc), data=beetledays, 
+    weights=freq, family=binomial(link="cloglog")))

Call:
glm(formula = y ~ -1 + factor(day) + sex + factor(conc), family = binomial(link = "cloglog"), 
    data = beetledays, weights = freq)

Coefficients:
                 Estimate Std. Error z value Pr(>|z|)    
factor(day)1      -4.9094     0.2565 -19.137  < 2e-16 ***
factor(day)2      -3.8503     0.1929 -19.964  < 2e-16 ***
factor(day)3      -3.3698     0.1779 -18.939  < 2e-16 ***
factor(day)4      -2.9518     0.1690 -17.464  < 2e-16 ***
factor(day)5      -3.4331     0.1994 -17.213  < 2e-16 ***
factor(day)6      -4.0470     0.2609 -15.512  < 2e-16 ***
factor(day)7      -4.9112     0.3928 -12.503  < 2e-16 ***
factor(day)8      -4.7281     0.3696 -12.792  < 2e-16 ***
factor(day)9      -6.0845     0.7154  -8.505  < 2e-16 ***
factor(day)10     -6.0651     0.7125  -8.512  < 2e-16 ***
factor(day)11     -6.0427     0.7141  -8.462  < 2e-16 ***
factor(day)12     -6.0298     0.7155  -8.427  < 2e-16 ***
factor(day)13     -6.0214     0.7155  -8.415  < 2e-16 ***
sexM               0.6185     0.1144   5.405 6.49e-08 ***
factor(conc)0.32   1.2599     0.1624   7.759 8.58e-15 ***
factor(conc)0.5    1.6341     0.1677   9.742  < 2e-16 ***
factor(conc)0.8    2.1101     0.1640  12.866  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 10509.4  on 462  degrees of freedom
Residual deviance:  1909.9  on 445  degrees of freedom
AIC: 1943.9

Number of Fisher Scoring iterations: 7

> #************************************************************/
> # Cf. the complimentary log log.  Odds ratios are no longer */
> # given, since they don't come naturally out of the model.  */
> # Intercepts have a completely different meaning and        */
> # won't be compared.  Linear parameters are multiplied by a */
> # constant, approx., and z statistics are similar.          */
> #************************************************************/
> polr1
Call:
polr(formula = rsat ~ infl + type + contact, data = housing, 
    weights = freq)

Coefficients:
    inflLow  inflMedium  typeAtrium typeTerrace   typeTower 
 -1.2888275  -0.7224378   0.2061487  -0.5186378   0.5723756 
 contactLow 
 -0.3602933 

Intercepts:
Low|Medium Medium|Top 
-1.5728637 -0.3860317 

Residual Deviance: 3479.149 
AIC: 3495.149 
> cat('\n Complimentary log log  \n')

 Complimentary log log  
> polr(factor(rsat)~infl+type+contact,weights=freq,data=housing,
+   method="cloglog")
Call:
polr(formula = factor(rsat) ~ infl + type + contact, data = housing, 
    weights = freq, method = "cloglog")

Coefficients:
    inflLow  inflMedium  typeAtrium typeTerrace   typeTower 
 -0.9153805  -0.5333315   0.1266692  -0.3352559   0.4071971 
 contactLow 
 -0.2092257 

Intercepts:
Low|Medium Medium|Top 
-1.5136135 -0.6620318 

Residual Deviance: 3484.053 
AIC: 3500.053 
>