dp.97 <- read.table("http://jgill.wustl.edu/data/dp.data",header=TRUE)
dp.out <- glm(EXECUTIONS ~ INCOME + PERPOVERTY + PERBLACK + log(VC100k96) + SOUTH
                                  + PROPDEGREE, family=poisson,data=dp.97)

ci.summary <- function (in.object, alpha = 0.05,rd=4)
{
  ci <- in.object$coef + qnorm(1-alpha/2) * t(c(-1,1) %o% summary(in.object)$coef[,2])
  out.mat <- round(cbind(in.object$coefficient, summary(in.object)$coef[,2], ci),rd)
  dimnames(out.mat)[[2]] <- c("Coefficient","Std. Error",
      paste(1-alpha,"CI Lower"),paste(1-alpha,"CI Upper"))
  out.mat
}

ci.summary(dp.out)

              Coefficient Std. Error 0.95 CI Lower 0.95 CI Upper
(Intercept)       -6.3068     4.1823      -14.5040        1.8903
INCOME             0.0003     0.0001        0.0002        0.0004
PERPOVERTY         0.0690     0.0799       -0.0877        0.2257
PERBLACK          -0.0950     0.0229       -0.1398       -0.0502
log(VC100k96)      0.2212     0.4427       -0.6465        1.0890
SOUTH              2.3099     0.4291        1.4689        3.1509
PROPDEGREE       -19.7022     4.4679      -28.4592      -10.9452

library(faraway)                           bliss
data(bliss)                                  dead alive conc
                                           1    2    28    0
                                           2    8    22    1
                                           3   15    15    2
                                           4   23     7    3
                                           5   27     3    4

modl <- glm(cbind(dead,alive) ~ conc, family=binomial, bliss);    summary(modl)
Deviance Residuals:                   1        2        3        4        5  
                                -0.4510   0.3597   0.0000   0.0643  -0.2045  
Coefficients:
            Estimate Std. Error z value Pr(>|z|)
(Intercept)  -2.3238     0.4179  -5.561 2.69e-08
conc          1.1619     0.1814   6.405 1.51e-10

(Dispersion parameter for binomial family taken to be 1)
    Null deviance: 64.76327  on 4  degrees of freedom
Residual deviance:  0.37875  on 3  degrees of freedom

df.residual(modl)
3

deviance(modl)
0.3787483

1-pchisq(deviance(modl),df.residual(modl))
0.9445968

pchisq(deviance(modl),df.residual(modl),lower.tail=FALSE)
0.9445968

anova(modl,test="Chi")
Analysis of Deviance Table
Model: binomial, link: logit
Response: cbind(dead, alive)
Terms added sequentially (first to last)

     Df Deviance Resid. Df Resid. Dev P(>|Chi|)
NULL                     4     64.763          
conc  1   64.385         3      0.379 1.024e-15

modl2 <- glm(cbind(dead,alive) ~ conc+I(conc^2), family=binomial,bliss)
summary(modl2)
Deviance Residuals: 
       1         2         3         4         5  
-0.19971   0.32415  -0.21850   0.01265   0.05132  

Coefficients:
            Estimate Std. Error z value Pr(>|z|)
(Intercept) -2.49589    0.59869  -4.169 3.06e-05
conc         1.41018    0.61696   2.286   0.0223
I(conc^2)   -0.06117    0.14319  -0.427   0.6692

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 64.76327  on 4  degrees of freedom
Residual deviance:  0.19549  on 2  degrees of freedom

anova(modl,modl2,test="Chi")
Analysis of Deviance Table

Model 1: cbind(dead, alive) ~ conc
Model 2: cbind(dead, alive) ~ conc + I(conc^2)
  Resid. Df Resid. Dev Df Deviance P(>|Chi|)
1         3    0.37875                      
2         2    0.19549  1  0.18325    0.6686

anova(modl2,test="Chi")
Analysis of Deviance Table
Model: binomial, link: logit
Response: cbind(dead, alive)
Terms added sequentially (first to last)
          Df Deviance Resid. Df Resid. Dev P(>|Chi|)
NULL                          4     64.763          
conc       1   64.385         3      0.379 1.024e-15
I(conc^2)  1    0.183         2      0.195    0.6686

residuals(modl,"deviance")
residuals(modl,"pearson")
residuals(modl,"response")
bliss$dead/30 - fitted(modl)
residuals(modl,"working")

par(mfrow=c(1,3),mar=c(4,4,2,2),oma=c(2,2,2,2))
plot(Species ~ Area, gala)
plot(Species ~ log(Area), gala)
mu <- predict(modp,type="response")
z <- predict(modp)+(gala$Species-mu)/mu
plot(z ~ log(Area), gala,ylab="Linearized Response")

modpl <- glm(Species ~ log(Area) + log(Elevation) + log(Nearest) + log(Scruz+0.1)
                     + log(Adjacent), family=poisson, gala)
c(deviance(modp),deviance(modpl))

mu <- predict(modpl,type="response")
u <- (gala$Species-mu)/mu + coef(modpl)[2]*log(gala$Area)
par(mfrow=c(1,2),mar=c(4,4,2,2),oma=c(2,2,2,2))
plot(u ~ log(Area), gala,ylab="Partial Residual")
abline(0,coef(modpl)[2])
z <- predict(modpl)+(gala$Species-mu)/mu
plot(z ~ predict(modpl), xlab="Linear predictor", ylab="Linearized Response")

par(mfrow=c(1,2),mar=c(4,4,2,2),oma=c(2,2,2,2))
halfnorm(rstudent(modpl))
gali <- influence(modpl)
halfnorm(gali$hat)

par(mfrow=c(1,2),mar=c(4,4,2,2),oma=c(2,2,2,2))
halfnorm(cooks.distance(modpl))
plot(gali$coef[,5],ylab="Change in Scruz coef",xlab="Case no.")
modplr <- glm(Species ~ log(Area) + log(Elevation) + log(Nearest)
                      + log(Scruz+0.1) + log(Adjacent),
              family=poisson, gala, subset=-25)

cbind(coef(modpl),coef(modplr))
modpla <- glm(Species ~ log(Area)+log(Adjacent), family=poisson, gala)
dp <- sum(residuals(modpla,type="pearson")^2)/modpla$df.res
summary(modpla,dispersion=dp)

scotland.df <- read.table("http://jgill.wustl.edu/data/scotvote.dat",header=TRUE)
scottish.vote.glm <- glm((PerYesTax/100) ~ CouncilTax * PerClaimantFemale 
                                         + StdMortalityRatio + Active + GDP 
                                         + Percentage5to15,
                         family=Gamma, data=scotland.df)


\begin{Verbatim}[formatcom=\color{MyGreen}]
committee.dat <- read.table("http://jgill.wustl.edu/data/committee.dat",
                            header=TRUE,col.names=1)
attach(committee.dat)
committee.out <- glm.nb(BILLS104 ~ SIZE + SUBS * (log(STAFF)) + PRESTIGE + BILLS103)

resp <- resid(committee.out,type="response")
pears <- resid(committee.out,type="pearson")
working <- resid(committee.out,type="working")
devs <- resid(committee.out,type="deviance")

copper.data <- read.table("http://jgill.wustl.edu/data/copper.dat",
    header=TRUE)

copper.stage1 <- glm(COPPERPRICE ~ INCOMEINDEX + ALUMPRICE 
                   + INVENTORYINDEX + log(TIME), family=Gamma)

copper.stage2 <- glm(WORLDCONSUMPTION ~
        copper.stage1$fitted.values + INCOMEINDEX + ALUMPRICE,
        family=Gamma)

############################### SETUP ###############################################

fluid.scan <- scan("http://jgill.wustl.edu/data/fluid.data",skip=1)
fluid.mat <- matrix(fluid.scan,byrow=T,nrow=15459,ncol=21)
fluid.mat <- fluid.mat[,1:13]

dimnames(fluid.mat)[[2]] <- c("fluidvot","dist2","coalspec","mwcf","factor",
    "landmark", "freshyr1", "expert2","cj","cjstrat","dissent","unanimou","minority")

fluid.factors <- data.frame(fluidvot=fluid.mat[,1],dist2=fluid.mat[,2],
                 coalspec=fluid.mat[,3], mwcf=fluid.mat[,4],complexity=fluid.mat[,5],
                 landmark=fluid.mat[,6],freshyr1=fluid.mat[,7], expert2=fluid.mat[,8],
                 cj=fluid.mat[,9],cjstrat=fluid.mat[,10],dissent=fluid.mat[,11],
                 unanimou=fluid.mat[,12],minority=fluid.mat[,13])

\end{Verbatim}

\section*{Case Study: Analyzing Votes on the Supreme Court, code }
\begin{Verbatim}[formatcom=\color{MyGreen}]
############################### RUN THE KITCHEN SINK MODEL ##########################

kitchen.sink.fit <- glm(fluidvot ~ dist2 + coalspec + mwcf + complexity+ landmark + 
        freshyr1 + expert2 + cj + cjstrat + dissent + unanimou + minority,
        family=binomial(link=logit),data=fluid.factors)
summary.glm(kitchen.sink.fit)

Deviance Residuals: 
     Min        1Q    Median        3Q       Max  
-2.95255  -0.31468  -0.16436  -0.07645   3.76560  

Coefficients:
             Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.030356   0.066926 -45.279  < 2e-16
dist2        0.060781   0.001688  36.006  < 2e-16
coalspec    -0.458590   0.053830  -8.519  < 2e-16
mwcf        -0.336781   0.112812  -2.985  0.00283
complexity   0.112127   0.035784   3.133  0.00173
landmark     0.070948   0.154825   0.458  0.64677
freshyr1     0.266453   0.131643   2.024  0.04296
expert2     -0.152239   0.112687  -1.351  0.17670
cj          -0.088581   0.153582  -0.577  0.56410
cjstrat      0.165715   0.275034   0.603  0.54683
dissent      0.382112   0.070001   5.459 4.80e-08
unanimou    -0.142704   0.130013  -1.098  0.27238
minority     0.913227   0.184645   4.946 7.58e-07

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 8259.0  on 15458  degrees of freedom
Residual deviance: 5271.5  on 15446  degrees of freedom
AIC: 5297.5

Number of Fisher Scoring iterations: 7

8259.0 - 5271.5  = 2987.5 on df = 12
1-pchisq(2987.5,12) gives 0
\end{Verbatim}

\section*{Case Study: Analyzing Votes on the Supreme Court, code }
\begin{Verbatim}[formatcom=\color{MyGreen}]
anova(kitchen.sink.fit)
Analysis of Deviance Table
Model: binomial, link: logit             Response: fluidvot
Terms added sequentially (first to last)
              Df Deviance Resid. Df Resid. Dev
NULL                          15458     8259.0
dist2          1   2332.5     15457     5926.6
coalspec       1     50.1     15456     5876.5
mwcf           1      5.5     15455     5871.0
complexity     1      2.5     15454     5868.4
landmark       1  0.01139     15453     5868.4
freshyr1       1      3.8     15452     5864.6
expert2        1      0.3     15451     5864.2
cj             1      4.0     15450     5860.3
cjstrat        1     45.4     15449     5814.9
dissent        1    516.9     15448     5298.1
unanimou       1      1.9     15447     5296.2
minority       1     24.7     15446     5271.5

# BEWARE!  CHANGING ORDER SLIGHTLY (dist2 MOVED TO LAST):
anova(kitchen.sink.fit2)
              Df Deviance Resid. Df Resid. Dev
NULL                          15458     8259.0
coalspec       1    330.2     15457     7928.9
mwcf           1      3.0     15456     7925.9
complexity     1      6.7     15455     7919.2
landmark       1      0.2     15454     7919.0
freshyr1       1      3.2     15453     7915.8
expert2        1      0.1     15452     7915.7
cj             1      1.5     15451     7914.2
cjstrat        1     63.9     15450     7850.3
dissent        1    649.5     15449     7200.8
unanimou       1     27.7     15448     7173.1
minority       1      1.2     15447     7172.0
dist2          1   1900.5     15446     5271.5

correctly.fit.mat[,2][correctly.fit.mat[,2] < 0.5] <- 0
correctly.fit.mat[,2][correctly.fit.mat[,2] > 0.5] <- 1
table(correctly.fit.mat)

                       kitchen.sink.fit.fitted.values
fluid.factors.fluidvot       0     1    
                       0 14184   111
                       1   766   398

(14184 + 398)/(14184 + 111 + 766 + 398)
[1] 0.9432693

sum(diag(table(correctly.fit.mat))) / sum(table(correctly.fit.mat))
[1] 0.9432693

correctly.fit.mat <- data.frame(fluid.factors$fluidvot,kitchen.sink.fit$fitted.values)
mean(correctly.fit.mat[,2])
[1] 0.07529594

correctly.fit.mat[,2][correctly.fit.mat[,2] < mean(correctly.fit.mat[,2])] <- 0
correctly.fit.mat[,2][correctly.fit.mat[,2] > mean(correctly.fit.mat[,2])] <- 1
table(correctly.fit.mat)

                       kitchen.sink.fit.fitted.values
fluid.factors.fluidvot       0     1    
                       0 11549  2746
                       1   229   935

sum(diag(table(correctly.fit.mat))) / sum(table(correctly.fit.mat))
[1] 0.8075555

fluid.logit.fit1 <- glm(fluidvot ~ dist2 + coalspec + complexity+ freshyr1 +
        expert2 + dissent + minority +
        dist2:minority + complexity:minority + dissent:dist2,
        family=binomial(link=logit),data=fluid.factors)
summary.glm(fluid.logit.fit1)

Deviance Residuals: 
     Min        1Q    Median        3Q       Max  
-4.05747  -0.32187  -0.17542  -0.08674   3.63272  

Coefficients:
                     Estimate Std. Error z value Pr(>|z|)
(Intercept)         -3.029126   0.058573 -51.716  < 2e-16
dist2                0.053652   0.002125  25.245  < 2e-16
coalspec            -0.508543   0.054160  -9.390  < 2e-16
complexity           0.203519   0.040830   4.985 6.21e-07
freshyr1             0.252072   0.132876   1.897  0.05782
expert2             -0.144012   0.114785  -1.255  0.20961
dissent              0.405871   0.066298   6.122 9.25e-10
minority             0.723628   0.159244   4.544 5.52e-06
dist2:minority      -0.020005   0.007179  -2.786  0.00533
complexity:minority -0.254218   0.077374  -3.286  0.00102
dist2:dissent        0.017926   0.003265   5.490 4.01e-08

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 8259.0  on 15458  degrees of freedom
Residual deviance: 5210.7  on 15448  degrees of freedom
AIC: 5232.7

# COMPARE THE ABOVE TO THE KITCHEN SINK MODEL FIT:
    Null deviance: 8259.0  on 15458  degrees of freedom
Residual deviance: 5271.5  on 15446  degrees of freedom
AIC: 5297.5

# TEST MODEL IMPROVEMENT:
5271.5 - 5210.7 =  60.8 on df = 2
1-pchisq(60.8,2) gives 6.27276e-14

correctly.fit.mat <- data.frame(fluid.factors$fluidvot,fluid.logit.fit1$fitted.values)
correctly.fit.mat[,2][correctly.fit.mat[,2] < 0.5] <- 0
correctly.fit.mat[,2][correctly.fit.mat[,2] > 0.5] <- 1
table(correctly.fit.mat)

                      fluid.logit.fit1.fitted.values
fluid.factors.fluidvot 0     1    
                       0 14194   101
                       1   765   399

sum(diag(table(correctly.fit.mat))) / sum(table(correctly.fit.mat))
[1] 0.9439809

# FROM KITCHEN SINK MODEL: predicted correctly, naive: [1] 0.9432693

# CHI-SQUARE TEST OF PEARSON RESIDUALS:
pchisq(sum(residuals(fluid.logit.fit1,type="pearson")^2),15448)
[1] 1