dur <- c(0.833, 1.070, 1.234, 1.671, 2.065, 2.080, 2.114, 2.168, 2.274, 2.629, 2.637)
N <- c(38, 28, 27, 20, 17, 15, 15, 15, 15, 14, 12)
L <- qgamma(0.05,shape=sum(N),rate=sum(N*dur))
H <- qgamma(0.95,shape=sum(N),rate=sum(N*dur))
ruler <- seq(0,0.90,length=1000)
postscript("Class.Multilevel/Images/models.figure01.ps")
par(mfrow=c(1,1),mar=c(6,6,2,2),cex.axis=1,cex.lab=1.5,bg="slategray")
plot(ruler,seq(0,12,length=length(ruler)),type="n", ylim=c(-3,12.5),
xlab="Posterior Support",ylab="Posterior Density")
lines(ruler,dgamma(ruler,shape=sum(N),rate=sum(N*dur)),lwd=2.5); abline(h=0)
segments(L,0-.5,L,dgamma(L,shape=sum(N),rate=sum(N*dur)),lty=2)
segments(H,0-.5,H,dgamma(H,shape=sum(N),rate=sum(N*dur)),lty=2)
segments(L,0-.5,H,0-.5,lty=2)
segments((L+H)/2,0-.5,(L+H)/2,0-1.09,lty=2)
text((L+H)/2,-1.4,paste("90% Credible Interval, (",round(L,3),", ",
round(H,3),")",sep=""),cex=1.1)
segments(0,11,sum(N)/sum(N*dur),11,col="yellow3",lwd=4)
text(sum(N)/sum(N*dur)/2,11.5,"Effect Size",col="yellow3",cex=1.3)
dev.off()

library(nlme); library(arm)
trauma.out <- lmer(PTSD2 ~ CRIME + FSTPRIS + log(ARRESTS) + exp(TRAGE) + ALCO_1B
	+ (1 + AGE EDUCATE | ETHNIC), family=binomial(link="probit"),
		data=trauma.short.complete)
summary(trauma.out)


postscript("Class.Multilevel/essex_multilevel_A6.fig01.ps")				
beta <- 3; alpha <- 3*(1:5)
x <- runif(n=50,min=0,max=10)								
y <- jitter( rep(1,50) %o% alpha + x*beta, factor=3 )					
par(bg="slategray",mar=c(0,0,0,0))							
plot(c(0,max(x)),c(0,30),type="n",ylab="y",xaxt="n",yaxt="n")			
for (i in 1:ncol(y)) segments(0,alpha[i], max(x), 
    lm(y[,i] ~ x)$coef %*% c(1,beta),col=colors()[619+i], lwd=3 )
dev.off()

postscript("Class.Multilevel/essex_multilevel_A6.fig02.ps")				
alpha <- 3; beta <- 1.5*(1:5)
x <- runif(n=50,min=0,max=10)								
y <- jitter( alpha + x * (rep(1,50) %o% beta) , factor=3 )					
par(bg="slategray",mar=c(0,0,0,0))							
plot(c(0,max(x)),c(0,60),type="n",ylab="y",xaxt="n",yaxt="n")			
for (i in 1:ncol(y)) segments(0,alpha, max(x), 
lm(y[,i] ~ x)$coef %*% c(1,beta[i]),col=colors()[619+i], lwd=3 )
dev.off()

postscript("Class.Multilevel/essex_multilevel_A6.fig03.ps")				
alpha <- c(33,4,12,9,21); beta <- 1*(1:5)
x <- runif(n=50,min=0,max=10)								
y <- jitter( rep(1,50) %o% alpha + x * (rep(1,50) %o% beta) , factor=3 )					
par(bg="slategray",mar=c(0,0,0,0))							
plot(c(0,max(x)),c(0,50),type="n",ylab="y",xaxt="n",yaxt="n")			
for (i in 1:ncol(y)) segments(0,alpha[i], max(x), 
    lm(y[,i] ~ x)$coef %*% c(1,beta[i]),col=colors()[619+i], lwd=3 )
dev.off()

# SETUP IN R
srrs2 <- read.table("Class.Multilevel/Archive/examples/radon/srrs2.dat", header=TRUE, sep=",")
mn <- srrs2$state=="MN"		# SETS UP A LONG LIST OF TRUE/FALSE
radon <- srrs2$activity[mn]
log.radon <- log(ifelse (radon==0, .1, radon))
floor <- srrs2$floor[mn]        # 0 FOR BASEMENT, 1 FOR FIRST FLOOR
n <- length(radon)
y <- log.radon
x <- floor
# GET COUNTY INDEx VARIABLE (85 COUNTIES, 919 HOUSES)
county.name <- as.vector(srrs2$county[mn])
uniq <- unique(county.name)
J <- length(uniq)
county <- rep(NA, J)            # WILL MAP 919 -> 85
for (i in 1:J) county[county.name==uniq[i]] <- i

###### NO PREDICTORS FOR NOW
sample.size <- as.vector(table (county))
sample.size.jittered <- sample.size*exp(runif (J, -.1, .1))
ybarbar = mean(y)
cty.mns = tapply(y,county,mean)
cty.vars = tapply(y,county,var)
cty.sds = mean(sqrt(cty.vars[!is.na(cty.vars)]))/sqrt(sample.size)

postscript("Class.Multilevel/figure12.1.ps")
par(mfrow=c(1,2),mar=c(5,5,5,3),bg="lightgray")

# LEFT PANEL
plot(sample.size.jittered, cty.mns, cex.lab=.9, cex.axis=1, xlab="", ylab="",
      pch=20, log="x", cex=.5, ylim=c(0,3.2), yaxt="n", xaxt="n")
mtext(side=1,line=2.5,"Jittered Sample Size in County J")
mtext(side=2,line=2.5,"Sample Mean in County J")
mtext(side=3,line=1.5,"No-Pooling",cex=1.5)
axis(1, c(1,3,10,30,100), cex.axis=.9); axis(2, seq(0,3), cex.axis=.9)
for (j in 1:J) lines(rep(sample.size.jittered[j],2),
                      cty.mns[j] + c(-1,1)*cty.sds[j], lwd=.5)
abline(h=ybarbar,lty=2)
points(sample.size.jittered[36],cty.mns[36],cex=4,col="firebrick")

# RIGHT PANEL
plot(sample.size.jittered, mlm.radon.mean, cex.lab=.9, cex.axis=1,
      xlab="", ylab="", pch=20, log="x", cex=.5,
      ylim=c(0,3.2), yaxt="n", xaxt="n")
mtext(side=1,line=2.5,"Jittered Sample Size in County J")
mtext(side=2,line=2.5,"Estimated Population Mean in County J")
mtext(side=3,line=1.5,"Multilevel Model",cex=1.5)
axis(1, c(1,3,10,30,100), cex.axis=.9); axis(2, seq(0,3), cex.axis=.9)
for (j in 1:J) lines(rep(sample.size.jittered[j],2),
         mlm.radon.mean[j] + c(-1,1)*mlm.radon.sd[j], lwd=.5)
abline(h=mean(mlm.radon.mean))
points(sample.size.jittered[36],mlm.radon.mean[36],cex=4,col="forestgreen")
dev.off()

          Mean      SD   NaiveSE TimeseriesSE
sigma.y 0.7991 0.01956 0.0001956    0.0002304
a1      1.0602 0.25501 0.0025501    0.0031325
mu.a    1.3136 0.05011 0.0005011    0.0009859
sigma.a 0.3171 0.04888 0.0004888    0.0014624

lm.pooled <- lm(y ~ x)                 lm.unpooled <- lm(y ~ x + factor(county) - 1)
summary(lm.pooled)                     summary(lm.unpooled)

            Estimate      SE t-stat                     Estimate         SE  t-stat
(Intercept)  1.32674 0.02972 44.640
          x -0.61339 0.07284 -8.421    x                -0.72054    0.07352  -9.800
				       factor(county)1   0.84054    0.37866   2.220
				       factor(county)2   0.87482    0.10498   8.333
				       factor(county)3   1.52870    0.43946   3.479
				       :
				       factor(county)84  1.64535    0.20987   7.840
				       factor(county)85  1.18652    0.53487   2.218

RSE: 0.8226 on 917 df                  RSE: 0.7564 on 833 df
R-Squared: 0.0717, Adj.R-Sqr: 0.0707   R-Squared: 0.7671, Adj.R-Sqr: 0.7431 
F: 70.91, 1 and 917 df,  p < 2.2e-16   F: 31.91, 86 and 833 df, p < 2.2e-16 

# REPLICATE THE PLOT ON PAGE 256
postscript("Class.Multilevel/figure12.3a.ps")
par(mfrow=c(1,2),mar=c(5,5,5,3),bg="lightgray")
plot(sample.size.jittered, coef(lm.unpooled)[-1], cex.lab=.9, cex.axis=1, xlab="",
      ylab="", pch=20, log="x", cex=.5, ylim=c(0,3.2), yaxt="n", xaxt="n")
mtext(side=1,line=2.5,"Jittered Sample Size in County J")
mtext(side=2,line=2.5,"Estimated Intercept, No-Pooling")
mtext(side=3,line=1.5,"No-Pooling",cex=1.5)
axis(1, c(1,3,10,30,100), cex.axis=.9); axis(2, seq(0,3), cex.axis=.9)
for (j in 1:J) lines(rep(sample.size.jittered[j],2),
                      cty.mns[j] + c(-1,1)*cty.sds[j], lwd=.5)
hist(coef(lm.unpooled)[-1],col="light grey",breaks=10,main="", xlab="",ylab="",
          freq=TRUE,cex.axis=1.3)
mtext(outer=FALSE,side=1,line=2.6,"Histogram of Estimated Alpha Values")
mtext(outer=FALSE,side=3,line=2.5,cex=1.5, "Blue Line at Alpha Pooled",
      col="royalblue4")
abline(v=lm.pooled$coef[1],col="royalblue4",lwd=3)
dev.off()

library(lme4); library(arm); options(digits =  5)
M0 <- lmer(y ~ 1 + (1 | county))
summary(M0)

Random effects:
 Groups   Name        Variance Std.Dev.
 county   (Intercept) 0.095816 0.30954
 Residual             0.636620 0.79788
number of obs: 919, groups: county, 85

Fixed effects:
            Estimate Std. Error t value
(Intercept)  1.31258    0.04891   26.84

library(lme4); library(arm); options(digits =  5)
M0 <- lmer(y ~ 1 + (1 | county))
summary(M0)

Random effects:
 Groups   Name        Variance Std.Dev.
 county   (Intercept) 0.095816 0.30954 
 Residual             0.636620 0.79788 
number of obs: 919, groups: county, 85
			
Fixed effects:
            Estimate Std. Error t value
(Intercept)  1.31258    0.04891   26.84

M1 <- lmer(y ~ x + (1 | county))
summary(M1)

Random effects:
 Groups   Name     Variance Std.Dev.
 county   (Intercept) 0.108    0.328   
 Residual             0.571    0.756   
number of obs: 919, groups: county, 85
			
Fixed effects:
            Estimate Std. Error t value
(Intercept)   1.4616     0.0516   28.34
x            -0.6930     0.0704   -9.84

M1 <- lmer(y ~ x + (1 | county))
summary(M1)

Random effects:
 Groups   Name     Variance Std.Dev.
 county   (Intercept) 0.108    0.328   
 Residual             0.571    0.756
number of obs: 919, groups: county, 85
                         
Fixed effects:  
            Estimate Std. Error t value
(Intercept)   1.4616     0.0516   28.34
x            -0.6930     0.0704   -9.84

			coef(M1)
			   (Intercept)     x
			1         1.19 -0.69
			2         0.93 -0.69
			3         1.48 -0.69
			:
			83        1.57 -0.69
			84        1.59 -0.69
			85        1.39 -0.69

			ranef(M1)
			   (Intercept)
			1       -0.270
			2       -0.534
			3        0.018
			:
			83       0.110
			84       0.129
			85      -0.075

			ranef(M1)
			   (Intercept)
			1       -0.270
			2       -0.534
			3        0.018
			:
			83       0.110
			84       0.129
			85      -0.075

fixef(M1)["x"] + c(-1.96,1.96)*se.fixef(M1)["x"]
-0.83 -0.55

fixef(M1)["x"] + c(-1.96,1.96)*se.fixef(M1)["x"]
-0.83 -0.55
		
coef(M1)$county[55,1] + c(-1.96,1.96)*se.ranef(M1)$county[55]
1.1 2.0
		
fixef(M1)["x"] + c(-1.96,1.96)*se.fixef(M1)["x"]
-0.83 -0.55
		
coef(M1)$county[55,1] + c(-1.96,1.96)*se.ranef(M1)$county[55]
1.1 2.0
		
ranef(M1)$county[55,] + c(-1.96,1.96)*se.ranef(M1)$county[55]
-0.32  0.50

srrs2.fips <- srrs2$stfips*1000 + srrs2$cntyfips
cty <- read.table("Class.Multilevel/Data/cty.dat", header=T, sep=",")
usa.fips <- 1000*cty[,"stfips"] + cty[,"ctfips"]
usa.rows <- match (unique(srrs2.fips[mn]), usa.fips)
uranium <- cty[usa.rows,"Uppm"]
u <- log(uranium)
u.full <- u[county]

M2 <- lmer (y ~ x + u.full + (1 | county))
M2 <- lmer (y ~ x + + (1 + u.full | county))

fixef(M2)
(Intercept)           x      u.full 
    1.46576    -0.66824     0.72027 

# COMBINE THE AGGREGATE COUNTY LEVEL EFFECTS
a.hat.M2 <- fixef(M2)[1] + fixef(M2)[3]*u + as.vector(ranef(M2)$county)

# COLLECT THE VARIABLE FLOOR EFFECT
b.hat.M2 <- fixef(M2)[2]

M2 <- lmer (y ~ x + u.full + (1 | county))

fixef(M2)
(Intercept)           x      u.full 
    1.46576    -0.66824     0.72027 

# COMBINE THE AGGREGATE COUNTY LEVEL EFFECTS
a.hat.M2 <- fixef(M2)[1] + fixef(M2)[3]*u + as.vector(ranef(M2)$county)

# COLLECT THE VARIABLE FLOOR EFFECT
b.hat.M2 <- fixef(M2)[2]

        M1 <- lmer(y ~ x + (1 | county))
        a.hat.M1 <- fixef(M1)[1] + ranef(M1)$county #coef(M1)[[1]]["(Intercept)"]
        b.hat.M1 <- fixef(M1)[2]
        u.full <- u[county]
        M2 <- lmer(y ~ x + u.full + (1 | county))
        a.hat.M2 <- fixef(M2)[1] + fixef(M2)[3]*u + as.vector(ranef(M2)$county)
        b.hat.M2 <- fixef(M2)[2]
        x.jitter <- x + runif(n,-0.05,0.05)
        display8 <- c(2,84,24,26,19,55,45,71)
        y.range <- range(y[!is.na(match(county,display8))])
        postscript("Class.Multilevel/figure12.5.ps",horizontal=TRUE)
        par(mfrow=c(2,4),mar=c(3,2,2,1),oma=c(6,6,2,2),bg="lightgray")

for (j in display8){
    plot(x.jitter[county==j], y[county==j], xlim=c(-.05,1.05), ylim=y.range,
        xlab="", ylab="", cex.lab=1.6, cex.axis=1.5,
        pch=20, mgp=c(2,.7,0), xaxt="n", yaxt="n", cex.main=1.3)  #, main=uniq[j])
    axis(1, c(0,1), mgp=c(2,.7,0), cex.axis=1.5)
    axis(2, seq(-1,3,2), mgp=c(2,.7,0), cex.axis=1.5)
    curve(a.hat.M1[j,] + b.hat.M1*x, lty=3, lwd=2, col="forestgreen", add=TRUE)
    curve(a.hat.M2[j,] + b.hat.M2*x, lty=1, lwd=2, col="firebrick", add=TRUE)
    mtext(outer=FALSE,side=3,line=0.8,paste(uniq[j]),cex=1.1)
}
mtext(outer=TRUE,side=1,line=2.5,"Jittered Floor (x) Variable",cex=1.4)
mtext(outer=TRUE,side=2,line=2.5,"Log Radon Level",cex=1.4); dev.off()

        postscript("Class.Multilevel/figure12.6.ps")
        par(mfrow=c(1,1),mar=c(5,5,2,1),bg="lightgray")
        plot(u, t(a.hat.M2), cex.lab=1.1, cex.axis=1.1, pch=20, ylab="",xlab="",
            yaxt="n", xaxt="n", ylim=c(0.5,2))
        axis(1, seq(-1,1,.5), cex.axis=1.0)
        axis(2, seq(0,2.0,.5), cex.axis=1.0)
        mtext(side=1,cex=1.3,"County-Level Uranium",line=2.5)
        mtext(side=2,cex=1.3,"Estimated Regression Intercept",line=2.5)
        curve(fixef(M2)["(Intercept)"] + fixef(M2)["u.full"]*x, lwd=1,
            col="black", add=TRUE)
        for (j in 1:J)
            lines(rep(u[j],2), a.hat.M2[j,] + c(-1,1)*se.coef(M2)$county[j,],
                lwd=.5, col="gray10")
        dev.off()

y ~ 1 + x + (1 + x | county)

M3 <- lmer(y ~ 1 + x + (1 + x | county))
summary(M3)
Random effects:
 Groups   Name        Variance Std.Dev. Corr   
 county   (Intercept) 0.122    0.349           
          x           0.118    0.344    -0.337 
 Residual             0.557    0.746           
number of obs: 919, groups: county, 85

Fixed effects:
            Estimate Std. Error t value
(Intercept)   1.4628     0.0539   27.15
x            -0.6811     0.0876   -7.78

Correlation of Fixed Effects:       (Intr)   x -0.381

coef (M3)
   (Intercept)        x
1      1.14451 -0.54054
2      0.93338 -0.77089
3      1.47169 -0.66887
:
83     1.69429 -1.15131
84     1.59912 -0.73273
85     1.37879 -0.65315

		\begin{VM}
coef (M3)
   (Intercept)        x
1      1.14451 -0.54054
2      0.93338 -0.77089
3      1.47169 -0.66887
:
83     1.69429 -1.15131
84     1.59912 -0.73273
85     1.37879 -0.65315

fixef(M3)
(Intercept)           x 
    1.46277    -0.68109 

		ranef(M3)
		An object of class “ranef.lmer”
		[[1]]
		   (Intercept)          x
		1   -0.3182642  0.1405481
		2   -0.5293905 -0.0898034
		3    0.0089163  0.0122211
		:
		83   0.2315155 -0.4702222
		84   0.1363534 -0.0516471
		85  -0.0839834  0.0279326

		is.numeric(as.matrix(ranef(M3)[[1]]))
		[1] TRUE

		ranef(M3)
		An object of class “ranef.lmer”
		[[1]]
		   (Intercept)          x
		1   -0.3182642  0.1405481
		2   -0.5293905 -0.0898034
		3    0.0089163  0.0122211
		:
		83   0.2315155 -0.4702222
		84   0.1363534 -0.0516471
		85  -0.0839834  0.0279326

		is.numeric(as.matrix(ranef(M3)[[1]]))
		[1] TRUE

                fixef(M3)
                (Intercept)           x
                    1.46277    -0.68109

                ranef(M3)
                   (Intercept)          x
                85  -0.0839834  0.0279326

                fixef(M3)
                (Intercept)           x
                    1.46277    -0.68109

                ranef(M3)
                   (Intercept)          x
                85  -0.0839834  0.0279326

M4 <- lmer(y ~ x + u.full + x:u.full + (1 + x | county))
summary(M4)
Random effects:
Groups   Name        Variance Std.Dev. Corr  
 county   (Intercept) 0.0156   0.125          
 x           0.0941   0.307    0.409    0.409
 Residual             0.5617   0.749          
number of obs: 919, groups: county, 85

Fixed effects:
            Estimate Std. Error t value      Correlation of Fixed Effects:
(Intercept)   1.4686     0.0353    41.6             (Intr) x      u.full
x            -0.6710     0.0844    -7.9      x        -0.241              
u.full        0.8081     0.0907     8.9      u.full    0.207 -0.092       
x:u.full     -0.4195     0.2271    -1.8      x:u.full -0.093  0.173 -0.231

		   (Intercept)        x  u.full x:u.full
		1       1.4586 -0.64699 0.80806 -0.41946
		2       1.4958 -0.88908 0.80806 -0.41946
		3       1.4770 -0.64671 0.80806 -0.41946
		:
		85      1.4388 -0.70110 0.80806 -0.41946
		\end{VM}

(a.se.M4 <- se.coef(M4)$county[,1])
       1        2        3        4        5 
0.115104 0.088738 0.113913 0.107827 0.114849 
:
      81       82       83       84       85 
0.113973 0.123016 0.102748 0.104667 0.121417 


(b.se.M4 <- se.coef(M4)$county[,2])
      1       2       3       4       5       
0.27765 0.24657 0.25677 0.23350 0.27673
:
     81      82      83      84      85 
0.25710 0.30611 0.24339 0.27438 0.30546 
	
			(a.hat.M4 <- coef(M4)$county[,1] + coef(M4)$county[,3]*u)
			[1] 0.90183 0.81112 1.38532 1.04836 1.36381
 			[6] 1.76082 1.80116 1.72896 1.17220 1.54278
			[11] 1.02871 1.69382 0.89785 1.81452 1.40840
			[16] 1.03980 1.69203 0.98504 1.37662 1.69093
			[21] 1.62833 1.55381 1.78631 1.74244 1.73766
			[26] 1.36610 1.87382 1.14298 0.87602 0.93101
			[31] 1.75509 1.40663 1.60761 1.46626 0.73204
			[36] 1.81111 0.80251 1.00689 1.65030 1.88213
			[41] 1.83688 1.58336 1.48489 1.49105 1.50566
			[46] 1.45475 1.24872 1.35052 1.67190 1.80985
			[51] 1.71852 1.80635 1.64544 1.52360 1.35592
			[56] 1.36506 1.24744 1.86792 1.68286 1.67397
			[61] 1.14452 1.82848 1.78606 1.67482 1.86403
			[66] 1.36588 1.60164 0.94786 1.61865 0.91082
			[71] 1.53118 1.66351 1.84605 1.65053 1.46872
			[76] 1.88869 1.61307 0.94731 1.52746 1.32857
			[81] 1.72395 1.67474 1.77278 1.45802 1.72586

			(b.hat.M4 <- coef(M4)$county[,2] + coef(M4)$county[,4]*u)
 			[1] -0.35796 -0.53366 -0.59912 -0.35033 -0.56350
 			[6] -0.85411 -0.43575 -0.71334 -0.42378 -0.76886
			[11] -0.36401 -0.78178 -0.33904 -0.77142 -0.66199
			[16] -0.48011 -1.00850 -0.27916 -0.81425 -0.77257
			[21] -0.65545 -0.99096 -0.86368 -0.63147 -0.42185
			[26] -0.76403 -0.84279 -0.48522 -0.34061 -0.39440
			[31] -0.76366 -0.66763 -0.66642 -0.68630 -0.29324
			[36] -0.54770 -0.49519 -0.17535 -0.66512 -0.78201
			[41] -0.67906 -0.73999 -0.95435 -1.01596 -0.92309
			[46] -0.71455 -0.79364 -0.62319 -0.94192 -0.81929
			[51] -0.72530 -0.83096 -0.86356 -1.08360 -0.44362
			[56] -0.82123 -0.73802 -0.80408 -0.84303 -0.81096
			[61] -0.30246 -0.52019 -0.72368 -0.71358 -0.92401
			[66] -0.43552 -0.37705 -0.35267 -0.81248 -0.63459
			[71] -0.83835 -0.80751 -0.87154 -0.87757 -0.49857
			[76] -0.85706 -0.80950 -0.40586 -0.96665 -0.78664
			[81] -0.43324 -0.75457 -1.22833 -0.57739 -0.85013

postscript("Class.Multilevel/figure13.2.ps")
lower <- a.hat.M4 - a.se.M4
upper <- a.hat.M4 + a.se.M4
par(mfrow=c(1,2),mar=c(5,5,5,3),bg="lightgray",cex.lab=1.4,cex.axis=1.1)
plot(u, a.hat.M4, ylim=range(lower,upper), pch=20,
     xlab="County-Level Uranium", ylab=expression(alpha))
segments (u, lower, u, upper, lwd=.5, col="gray10")
curve(fixef(M4)[1] + fixef(M4)[3]*x, lwd=3, col="purple", add=TRUE)

lower <- b.hat.M4 - b.se.M4
upper <- b.hat.M4 + b.se.M4
plot (u, b.hat.M4, ylim=range(lower,upper), pch=20,
      xlab="County-Level Uranium", ylab=expression(beta))
segments (u, lower, u, upper, lwd=.5, col="gray10")
curve(fixef(M4)[2] + fixef(M4)[4]*x, lwd=3, col="purple", add=TRUE)
dev.off()

library(lme4); library(arm); options(digits =  5)
smoking <- read.table("http://jgill.wustl.edu/data/smoke_pub.dat",header=TRUE)
names(smoking)
[1] "newid"    "sex.1.F." "parsmk"   "wave"     "smkreg"  
smoking[1:12,]
   	newid sex.1.F. parsmk wave smkreg
	1      1        1      0    1      0
	2      1        1      0    2      0
	3      1        1      0    4      0
	4      1        1      0    5      0
	5      1        1      0    6      0
	6      2        0      0    1      0
	7      2        0      0    2      0
	8      2        0      0    3      0
	9      2        0      0    4      0
	10     2        0      0    5      0
	11     2        0      0    6      0
	12     3        1      0    1      0

\begin{VM}
lmer.out <- lmer(smkreg ~ wave + (1|newid), data=smoking, 
	            family=binomial(link=logit))
display(lmer.out)
glmer(formula = smkreg ~ wave + (1 | newid), data = smoking, 
    family = binomial(link = logit))
            coef.est coef.se
(Intercept) -6.41     0.27  
wave         0.21     0.05  

Error terms:
 Groups   Name        Std.Dev.
 newid    (Intercept) 4.17    
 Residual             1.00    
number of obs: 8730, groups: newid, 1760
AIC = 17774.7, DIC = 17769
deviance = 17768.7 
\end{VM}

lmer.out <- lmer(smkreg ~ wave + sex.1.F. + parsmk + (1|newid), data=smoking, 
	            family=binomial(link=logit))
display(lmer.out)
glmer(formula = smkreg ~ wave + sex.1.F. + parsmk + (1 | newid), 
    data = smoking, family = binomial(link = logit))
            coef.est coef.se
(Intercept) -6.60     0.30  
wave         0.25     0.04  
sex.1.F.     0.05     0.32  
parsmk       1.17     0.31  

Error terms:
 Groups   Name        Std.Dev.
 newid    (Intercept) 4.18    
 Residual             1.00    
number of obs: 8730, groups: newid, 1760
AIC = 3935.8, DIC = 3925.8
deviance = 3925.8 

lmer.out <- lmer(smkreg ~ wave + sex.1.F. + wave:sex.1.F. + (1|newid), data=smoking, 
	            family=binomial(link=logit))
display(lmer.out)
glmer(formula = smkreg ~ wave + sex.1.F. + wave:sex.1.F. + (1 | 
    newid), data = smoking, family = binomial(link = logit))
              coef.est coef.se
(Intercept)   -4.85     0.26  
wave           0.10     0.05  
sex.1.F.      -0.88     0.36  
wave:sex.1.F.  0.21     0.07  

Error terms:
 Groups   Name        Std.Dev.
 newid    (Intercept) 3.47    
 Residual             1.00    
number of obs: 8730, groups: newid, 1760
AIC = 4086.4, DIC = 4076.4
deviance = 4076.4 

"Indometh" <-
  structure(list(
   Subject = structure(ordered(c(1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6,
     6, 6, 6, 6, 6, 6, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4,
     4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), levels=1:6),
     class = c("ordered", "factor"),
     .Label = c("1", "4", "2", "5", "6", "3")),
   time = c(0.25, 0.5, 0.75, 1, 1.25, 2, 3, 4, 5, 6, 8, 0.25, 0.5,
     0.75, 1, 1.25, 2, 3, 4, 5, 6, 8, 0.25, 0.5, 0.75, 1, 1.25, 2, 3,
     4, 5, 6, 8, 0.25, 0.5, 0.75, 1, 1.25, 2, 3, 4, 5, 6, 8, 0.25,
     0.5, 0.75, 1, 1.25, 2, 3, 4, 5, 6, 8, 0.25, 0.5, 0.75, 1, 1.25,
     2, 3, 4, 5, 6, 8),
   conc = c(1.5, 0.94, 0.78, 0.48, 0.37, 0.19, 0.12, 0.11,
     0.08, 0.07, 0.05, 2.03, 1.63, 0.71, 0.7, 0.64, 0.36, 0.32, 0.2,
     0.25, 0.12, 0.08, 2.72, 1.49, 1.16, 0.8, 0.8, 0.39, 0.22, 0.12,
     0.11, 0.08, 0.08, 1.85, 1.39, 1.02, 0.89, 0.59, 0.4, 0.16, 0.11,
     0.1, 0.07, 0.07, 2.05, 1.04, 0.81, 0.39, 0.3, 0.23, 0.13, 0.11,
     0.08, 0.1, 0.06, 2.31, 1.44, 1.03, 0.84, 0.64, 0.42, 0.24, 0.17,
     0.13, 0.1, 0.09)),
row.names = 1:66,
class = c("nfnGroupedData", "nfGroupedData", "groupedData", "data.frame"),
formula = conc ~ time | Subject,
labels = list(x = "Time since drug administration",
  y = "Indomethacin concentration"),
units = list(x = "(hr)", y = "(mcg/ml)"))

Indometh

Grouped Data: conc ~ time | Subject
   Subject time conc
1        1 0.25 1.50
2        1 0.50 0.94
:	
65       6 6.00 0.10
66       6 8.00 0.09

names(Indometh)
"Subject" "time"    "conc"   

class(Indometh)
"nfnGroupedData" "nfGroupedData" "groupedData" "data.frame"    

formula(Indometh)
Subject ~ time + conc

library(nlme)
indo.pop.nls <- nls(conc ~ SSbiexp(time, A1, lrc1, A2, lrc2), data = Indometh) 
summary(indo.pop.nls) 
Parameters: 
     Estimate Std. Error t value Pr(>|t|)    
A1      2.773      0.253   10.95    4e-16 
lrc1    0.886      0.222    3.99  0.00018 
A2      0.607      0.267    2.27  0.02660 
lrc2   -1.092      0.409   -2.67  0.00966 
--- 
Residual standard error: 0.1745 on 62 degrees of freedom 

indo.term.lis <- nlsList(conc ~ SSbiexp(time, A1, lrc1, A2, lrc2), data = Indometh)
summary(indo.term.lis)

Coefficients:
   A1 
  Estimate Std. Error   t value     Pr(>|t|)
1 2.029277  0.2023875 10.026695 3.388285e-07
2 2.827673  0.2311604 12.232518 3.536510e-04
3 5.468312  1.8759966  2.914884 1.087070e-02
4 2.198132  0.3155032  6.967066 7.942588e-06
5 3.566103  0.3245732 10.987053 5.630248e-06
6 3.002250  0.3503106  8.570251 3.069467e-07
   lrc1 
   Estimate Std. Error  t value     Pr(>|t|)
1 0.5793887  0.2295508 2.524011 2.347391e-03
2 0.8013195  0.1803742 4.442540 5.193756e-02
3 1.7497936  0.3108862 5.628406 2.937754e-04
4 0.2423124  0.2427792 0.998077 1.402737e-01
5 1.0407660  0.1636874 6.358253 1.986106e-04
6 1.0882119  0.2564197 4.243870 3.504364e-05
   A2 
   Estimate Std. Error   t value     Pr(>|t|)
1 0.1915474  0.2037201 0.9402482 0.1269760002
2 0.4989175  0.1822390 2.7377104 0.1927034117
3 1.6757521  0.2814723 5.9535238 0.0002075745
4 0.2545223  0.3716832 0.6847828 0.2914158655
5 0.2914970  0.1592207 1.8307727 0.0811792830
6 0.9685230  0.2905245 3.3337056 0.0001646898

   lrc2 
    Estimate Std. Error   t value     Pr(>|t|)
1 -1.7877849  1.4495070 -1.233374 0.0573694854
2 -1.6353512  0.4779239 -3.421781 0.1146482023
3 -0.4122004  0.1680153 -2.453351 0.0232031740
4 -1.6026860  1.4786607 -1.083877 0.1138959965
5 -1.5068522  0.7133811 -2.112268 0.0511506022
6 -0.8731358  0.2715939 -3.214858 0.0002066297

Residual standard error: 0.0755502 on 42 degrees of freedom # 66-24

indo.nlme <- nlme( indo.term.lis, random = pdDiag(A1 + lrc1 + A2 + lrc2 ~ 1) )

summary(indo.nlme)

Data: Indometh 
        AIC       BIC   logLik
  -91.18562 -71.47873 54.59281

Random effects:
 Formula: list(A1 ~ 1, lrc1 ~ 1, A2 ~ 1, lrc2 ~ 1)
 Level: Subject
 Structure: Diagonal
             A1    lrc1      A2       lrc2 Residual
StdDev: 0.57141 0.15808 0.11160 8.2778e-06 0.081493

Fixed effects: list(A1 ~ 1, lrc1 ~ 1, A2 ~ 1, lrc2 ~ 1) 
        Value Std.Error DF t-value p-value
A1    2.82754   0.26401 57 10.7099   0e+00
lrc1  0.77362   0.11003 57  7.0313   0e+00
A2    0.46147   0.11281 57  4.0908   1e-04
lrc2 -1.34410   0.23108 57 -5.8167   0e+00

Standardized Within-Group Residuals:
     Min       Q1      Med       Q3      Max 
-3.17338 -0.35627 -0.12853  0.34232  3.00251 

Number of Observations: 66
Number of Groups: 6 
 
( be.fixed.terms <- as.vector(indo.nlme$coefficients$fixed) )

[1]  2.8275982  0.7732937  0.4610679 -1.3449124

( be.random.terms <- as.matrix(indo.nlme$coefficients$random$Subject) )

           A1         lrc1          A2          lrc2
1 -0.73966229  0.027813190 -0.11124141  6.553013e-17
2 -0.07054535  0.028667640  0.08711800 -1.855721e-16
3  0.80060558  0.004134323  0.06711258  4.906331e-17
4 -0.56554287 -0.231256957  0.01169103  7.287592e-17
5  0.41273288  0.198153725 -0.13174119  3.397749e-17
6  0.16241204 -0.027511921  0.07706099 -3.587477e-17

		connect1 <- url("http://jgill.wustl.edu/data/Pixel.rda")
		load(connect1);   close(connect1)
		Pixel[,"Side"] <- as.numeric( Pixel[,"Side"] )
		Pixel[1:10,]
		   Dog Side day  pixel
		1    1    2   0 1045.8
		2    1    2   1 1044.5
		3    1    2   2 1042.9
		4    1    2   4 1050.4
		5    1    2   6 1045.2
		6    1    2  10 1038.9
		7    1    2  14 1039.8
		8    2    2   0 1041.8
		9    2    2   1 1045.6
		10   2    2   2 1051.0

	postscript("Class.Multilevel/dogs1.ps")
	J = max(as.numeric(Pixel$Dog))
	day.range <- c(min(as.numeric(Pixel$day)), max(as.numeric(Pixel$day)))
	pixel.range <- c(min(as.numeric(Pixel$pixel)), max(as.numeric(Pixel$pixel)))
	par(oma=c(5,5,1,1),mar=c(0,0,0,0),mfrow=c(J/(J/2),J/2),bg="white")
	for (i in 1:J)  {
	    dog.i <- Pixel[Pixel["Dog"]==i,]
	    plot(dog.i[dog.i["Side"]==2,][,3:4], xlim=day.range, ylim=pixel.range,
	        pch=18,col="darkmagenta",yaxt="n",xaxt="n")
	    lines(lowess(dog.i[dog.i["Side"]==2,][,3:4], f=0.9), col="darkmagenta")
	    points(dog.i[dog.i["Side"]==1,][,3:4], pch=19,col="darkseagreen3")
	    lines(lowess(dog.i[dog.i["Side"]==1,][,3:4], f=0.9), col="darkseagreen3")
	    if (i > 5) axis(side=1,labels=seq(0,21,by=3),at=seq(0,21,by=3))
	    if (i == 1 | i==6) axis(side=2,labels=names(quantile(Pixel$pixel)[-1]),
	        at=quantile(Pixel$pixel)[-1])
	    if (i == 1)  { text(3,1150,"RIGHT",col="magenta")
	                   text(3,1140,"LEFT",col="darkseagreen3") }
	}
	dev.off()

fm1Pixel <- lmer(pixel ~ day + I(day^2) + (1 | Dog), data = Pixel)
summary(fm1Pixel)
   AIC   BIC logLik deviance REMLdev
 895.2 908.4 -442.6    886.9   885.2
Random effects:
 Groups   Name        Variance Std.Dev.
 Dog      (Intercept) 633.89   25.177  
 Residual             263.95   16.247  
Number of obs: 102, groups: Dog, 10

Fixed effects:
              Estimate Std. Error t value          Correlation of Fixed Effects:
(Intercept) 1074.16253    9.12496  117.72                   (Intr) day   
day            4.94201    1.03686    4.77          day      -0.426       
I(day^2)      -0.25047    0.05303   -4.72          I(day^2)  0.355 -0.945

fm2Pixel <- lmer(pixel ~ day + I(day^2) + Side + (1 | Dog), data = Pixel)
summary(fm2Pixel)
   AIC BIC logLik deviance REMLdev
 890.2 906 -439.1      884   878.2
Random effects:
 Groups   Name        Variance Std.Dev.
 Dog      (Intercept) 634.54   25.190  
 Residual             258.56   16.080  
Number of obs: 102, groups: Dog, 10

Fixed effects:
              Estimate Std. Error t value           Correlation of Fixed Effects:
(Intercept) 1082.28413   10.28304  105.25                   (Intr) day    I(d^2)
day            4.93811    1.02628    4.81          day      -0.374              
I(day^2)      -0.25029    0.05249   -4.77          I(day^2)  0.311 -0.945       
Side          -5.40196    3.18425   -1.70          Side     -0.464  0.000  0.000

anova(fm2Pixel,fm1Pixel)

fm1Pixel: pixel ~ day + I(day^2) + (1 | Dog)
fm2Pixel: pixel ~ day + I(day^2) + Side + (1 | Dog)
         Df     AIC     BIC  logLik  Chisq Chi Df Pr(>Chisq)
fm1Pixel  5  896.87  910.00 -443.44                         
fm2Pixel  6  895.94  911.69 -441.97 2.9352      1    0.08667

pchisq(2*as.numeric(logLik(fm2Pixel) - logLik(fm1Pixel)),1,lower.tail=FALSE)
0.03000196

fm3Pixel <- lmer(pixel ~ day + I(day^2) + (Side | Dog) + (-1 + day | Dog), 
                 data = Pixel); summary(fm3Pixel)

   AIC   BIC logLik deviance REMLdev
 843.4 864.4 -413.7    829.7   827.4
Random effects:
 Groups   Name        Variance  Std.Dev. Corr   
 Dog      (Intercept) 1937.2268 44.014          
          Side         565.3087 23.776   -0.746 
 Dog      day            3.1791  1.783          
 Residual               81.3617  9.020          
Number of obs: 102, groups: Dog, 10

Fixed effects:
             Estimate Std. Error t value          Correlation of Fixed Effects:
(Intercept) 1073.6530     9.7331  110.31                  (Intr) day   
day            6.2335     0.8655    7.20         day      -0.202       
I(day^2)      -0.3685     0.0340  -10.84         I(day^2)  0.195 -0.679

anova( fm3Pixel, fm2Pixel )

fm2Pixel: pixel ~ day + I(day^2) + Side + (1 | Dog)
fm3Pixel: pixel ~ day + I(day^2) + (Side | Dog) + (-1 + day | Dog)
         Df     AIC     BIC  logLik Chisq Chi Df Pr(>Chisq)
fm2Pixel  6  895.94  911.69 -441.97                        
fm3Pixel  8  845.69  866.69 -414.84 54.25      2  1.658e-12

fm4Pixel <- lmer(pixel ~ day + I(day^2) + Side + (Side | Dog) + (-1 + day | Dog), 
                 data = Pixel); summary(fm4Pixel)
 AIC   BIC logLik deviance REMLdev
 838 861.7   -410    828.2     820
Random effects:
 Groups   Name        Variance  Std.Dev. Corr   
 Dog      (Intercept) 1908.1242 43.6821         
          Side         544.6817 23.3384  -0.741 
 Dog      day            3.1794  1.7831         
 Residual               81.2434  9.0135         
Number of obs: 102, groups: Dog, 10

Fixed effects:
              Estimate Std. Error t value        Correlation of Fixed Effects:
(Intercept) 1086.41495   14.41456   75.37                (Intr) day    I(d^2)
day            6.23301    0.86512    7.20       day      -0.136              
I(day^2)      -0.36852    0.03398  -10.85       I(day^2)  0.131 -0.679       
Side          -9.19399    7.62640   -1.21       Side     -0.737  0.000  0.000

anova( fm4Pixel, fm3Pixel )

fm3Pixel: pixel ~ day + I(day^2) + (Side | Dog) + (-1 + day | Dog)
fm4Pixel: pixel ~ day + I(day^2) + Side + (Side | Dog) + (-1 + day | Dog)
         Df     AIC     BIC  logLik  Chisq Chi Df Pr(>Chisq)
fm3Pixel  8  845.69  866.69 -414.84                         
fm4Pixel  9  846.20  869.83 -414.10 1.4834      1     0.2232

# LAB 2 ExERCISE: PROBLEM 12.2 IN GH.  THE FOLLOWING CODE SETS UP THE PROBLEM, BUT IS MEANT ONLY
# TO GET YOU STARTED.  SEE PAGE 277 AND ExERCISE 11,4 FOR BACKGROUND)

################################### ATTACH APPROPRIATE LIBRARIES ###############################
library(lme4); library(arm); options(digits =  5)

############################# READ IN DATA FROM AN ExCELL FILE #################################
hiv.data <- read.csv("http://jgill.wustl.edu/data/allvar.csv")
attach.all (hiv.data)

################### FILTER TO "CONTROL" PATIENTS WITH INITIAL AGE FROM 1-5 YEARS ###############
ok <- treatmnt==1 & !is.na(CD4PCT) & (baseage>1 & baseage<5)
attach.all (hiv.data[ok,])

############################# SETUP OUTCOME, AGE, AND TIME MEASUREMENTS ########################
y <- sqrt (CD4PCT)			# OUTCOME OF INTEREST: THE SQUARE ROOT OF CD4 PERCENTAGE
age.baseline <- baseage         	# KID'S AGE IN YEARS AT THE BEGINNING OF THE STUDY
age.measurement <- visage      		# KID'S AGE IN YEARS AT THE TIME OF MEASUREMENT
treatment <- treatmnt			# BETTER SPELLING
time <- visage - baseage		# time GETS AGE IN STUDY MINUS AGE AT START OF STUDY

################# PATIENTS NO LONGER LISTED CONSECUTIVELY, SET UP NEW ID NUMBERS 1:J ###########
unique.pid <- unique (newpid)		# REMOVE REDUNDANT IDENTIFIERS FOR UNIQUE VECTOR
n <- length (y)				# SET n AS THE NUMBER CASES AT LOWER LEVEL 
J <- length (unique.pid)		# SET THE NUMBER OF GROUPS FROM UNIQUE IDENTIFIERS
person <- rep (NA, n)			# CREAT person VECTOR WITH ALL NA VALUES
for (j in 1:J){			     	# LOOP THROUGH GROUPS
  person[newpid==unique.pid[j]] <- j	# person VECTOR GETS UNIQUE IDENTIFIERS AT j
}					# CLOSE LOOP
patient.id <- person			# STATEMENT MISSING IN ORIGINAL FILE

########################## LOOK AT SOME SUMMARY PERSON-LEVEL STATISTICS ########################
number.of.measurements <- as.vector (table (patient.id))
patient.age.baseline <- baseage [age.baseline==age.measurement]

################################ RUN MULTILEVEL MODEL USING lmer ###############################
M1 <- lmer ( ~ + ( | ))







random.imp.vec <- function(V)  {
	gone <- is.na(V)
	there <- V[!gone]
	V[gone] <- sample(x=there,size=sum(gone),replace=TRUE)
	return(V)
}
x <- c(1,2,NA,4,5,NA)
random.imp.vec(x)
[1] 1 2 5 4 5 1

dust2.df <- read.table( "http://jgill.wustl.edu/data/dust2.asc",header=TRUE)
summary(dust2.df)
      cbr              dust            smoking            expo      
 Min.   :0.0000   Min.   :  0.900   Min.   :0.0000   Min.   : 3.00  
 1st Qu.:0.0000   1st Qu.:  1.945   1st Qu.:0.0000   1st Qu.:16.00  
 Median :0.0000   Median :  5.065   Median :1.0000   Median :25.00  
 Mean   :0.2343   Mean   :  4.822   Mean   :0.7392   Mean   :25.06  
 3rd Qu.:0.0000   3rd Qu.:  6.260   3rd Qu.:1.0000   3rd Qu.:33.00  
 Max.   :1.0000   Max.   : 24.000   Max.   :1.0000   Max.   :66.00  
                  NA's   :578.000                                   

sum(is.na(dust2.df))/prod(dim(dust2.df))
0.1159711

dust2.glm <- glm(cbr ~ dust+smoking+expo, family = binomial(link = logit),
	data=dust2.df,na.action=na.omit)  
summary(dust2.glm)
 
Coefficients:
             Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.442991   0.382028  -9.012  < 2e-16
dust         0.143878   0.037892   3.797 0.000146
smoking      0.835298   0.235169   3.552 0.000382
expo         0.037629   0.008409   4.475 7.64e-06

    Null deviance: 762.07  on 667  degrees of freedom
Residual deviance: 709.61  on 664  degrees of freedom
  (578 observations deleted due to missingness)
AIC: 717.6

dust2.df$dust <- random.imp.vec(dust2.df$dust)
dust2.glm <- glm(cbr ~ dust+smoking+expo, family = binomial(link = logit),
        data=dust2.df,na.action=na.fail)
summary(dust2.glm)

Coefficients:
             Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.154958   0.275542 -11.450  < 2e-16
dust         0.073567   0.027229   2.702 0.006896
smoking      0.674027   0.173820   3.878 0.000105
expo         0.040802   0.006169   6.614 3.74e-11

    Null deviance: 1356.8  on 1245  degrees of freedom
Residual deviance: 1286.4  on 1242  degrees of freedom
AIC: 1294.4

dust.df <- read.table( "http://jgill.wustl.edu/data/dust.asc",header=TRUE)
dust.glm <- glm(cbr ~ dust+smoking+expo, family = binomial(link = logit),
	data=dust.df);  
summary.glm(dust.glm)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -3.04785    0.24813 -12.283  < 2e-16 
dust         0.09189    0.02323   3.956 7.63e-05 
smoking      0.67683    0.17407   3.888 0.000101 
expo         0.04016    0.00620   6.476 9.40e-11 

    Null deviance: 1356.8  on 1245  degrees of freedom
Residual deviance: 1278.3  on 1242  degrees of freedom
AIC: 1286.3


invest.df <- read.table("http://jgill.wustl.edu/data/investment.dat",header=TRUE)
invest.df

   real.investment trend real.gnp real.interest      inflation
1            0.161     1    1.058          5.16           4.40
2            0.172     2    1.088          5.87           5.15
3            0.158     3    1.086          5.95           5.37
4            0.173     4    1.122          4.88           4.99
5            0.195     5    1.186          4.50           4.16
6            0.217     6    1.254          6.44           5.75
7            0.199     7    1.246          7.83           8.82
8            0.163     8    1.232          6.25           9.31
9            0.195     9    1.298          5.50           5.21
10           0.231    10    1.370          5.46           5.83
11           0.257    11    1.439          7.46           7.40
12           0.259    12    1.479         10.28           8.64
13           0.225    13    1.474         11.77           9.31
14           0.241    14    1.503         13.42           9.44
15           0.204    15    1.475         11.02           5.99

##### FIRST DO A STANDARD LINEAR MODEL ON THE FULL DATA #####
invest.lm <- lm(real.investment ~ trend + real.gnp + real.interest + inflation,
                data=invest.df);  summary(invest.lm)

Residuals:
       Min         1Q     Median         3Q        Max 
-0.0102779 -0.0022946  0.0004119  0.0029377  0.0080418 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)
(Intercept)   -5.091e-01  5.513e-02  -9.234 3.28e-06
trend         -1.658e-02  1.972e-03  -8.409 7.59e-06
real.gnp       6.704e-01  5.500e-02  12.189 2.52e-07
real.interest -2.326e-03  1.219e-03  -1.908   0.0854
inflation     -9.401e-05  1.347e-03  -0.070   0.9458

Residual standard error: 0.006714 on 10 degrees of freedom
Multiple R-Squared: 0.9724,     Adjusted R-squared: 0.9614 
F-statistic: 88.19 on 4 and 10 DF,  p-value: 9.333e-08 

##### NOW DELIBERATELY DELETE SOME DATA TO SIMULATE MISSINGNESS #####

invest.missing.df <- invest.df
invest.missing.df[1,1] <- invest.missing.df[8,5]  <- invest.missing.df [9,3] <- 
			  invest.missing.df[13,4] <- NA

sum(is.na(invest.missing.df))/prod(dim(invest.missing.df))        
[1] 0.05333333

# FROM THE lm HELP FILE:
# na.action: a function which indicates what should happen when the data
#         contain `NA's.  The default is set by the `na.action' setting
#         of `options', and is `na.fail' if that is unset. The
#         ``factory-fresh'' default is `na.omit'.
#
# STRONGLY ADVISED: options(na.action="na.fail")

##### SECOND MODEL IS THE SAME SPECIFICATION BUT ON DATASET WITH MISSING #####
invest.missing.lm <- lm(real.investment ~ trend + real.gnp + real.interest 
                        + inflation, data=invest.missing.df,na.action=na.omit)
summary.lm(invest.missing.lm)

Coefficients:
                Estimate Std. Error t value Pr(>|t|)
(Intercept)   -0.4459603  0.0324644 -13.737 9.25e-06
trend         -0.0144363  0.0012779 -11.297 2.88e-05
real.gnp       0.6032710  0.0332504  18.143 1.80e-06
real.interest -0.0031976  0.0007746  -4.128  0.00616
inflation      0.0023307  0.0009748   2.391  0.05395

Residual standard error: 0.002866 on 6 degrees of freedom
  (4 observations deleted due to missingness)
Multiple R-Squared: 0.9959,     Adjusted R-squared: 0.9931 
F-statistic: 362.5 on 4 and 6 DF,  p-value: 2.79e-07 

##### USE MULTIPLE IMPUTATION FROM THE MICE PROGRAM #####

library(mice)

# m=10 SPECIFIES 10 IMPUTATIONS (DEFAULT=5)

m <- 10
imp.invest <- mice(invest.missing.df,m)

# WE CAN NOW USE THE complete FUNCTION TEN TIMES FOLLOWED BY 10 lm CALLS
imp.invest.array <- array(NA,c(dim(invest.df),m))
for (i in 1:m)  imp.invest.array[,,i] <- as.matrix(complete(imp.invest,i))

##### THE lm.mids FUNCTION DOES THIS IN ONE COMMAND #####

invest.mids <- lm.mids(real.investment ~ trend + real.gnp + real.interest 
	+ inflation,data=imp.invest)
pool(invest.mids)

Pooled coefficients:
  (Intercept)         trend      real.gnp real.interest    inflation 
 -0.387215446  -0.011858096   0.546190553  -0.005146479  0.003709159 

Fraction of information about the coefficients missing due to nonresponse: 
  (Intercept)         trend      real.gnp real.interest   inflation 
    0.1102147     0.1597954     0.1218865     0.1205782   0.1535351 
\end{VM}

# A FUNCTION TO CONVENIENTLY GET COEFICIENTS AND STANDARD ERRORS FROM A lm.mids 
# OBJECT, param=1 GIVES THE COEFFICIENT ESTIMATES, param=2 GIVES THE STANDARD ERRORS

lm.mids.vals <- function(obj,param) { 
    out.mat <- NULL
    for (i in 1:obj$call1$m) 
        out.mat <- rbind(out.mat,summary.lm(obj$analyses[[i]])$coef[,param])
    out.mat
}

# USE THIS FUNCTION TO GET THE REQUIRED VECTOR:

impute.coef.vec <- apply(lm.mids.vals(invest.mids,1),2,mean)

  (Intercept)         trend      real.gnp real.interest    inflation 
 -0.387215446  -0.011858096   0.546190553  -0.005146479  0.003709159 

# CALCULATE THE STANDARD TWO ERROR COMPONENTS BY HAND

( between.var <- apply(lm.mids.vals(invest.mids,1),2,var) )
  (Intercept)         trend      real.gnp real.interest     inflation 
 3.982399e-04  7.837802e-07  4.919964e-04  2.755307e-07  8.322247e-07 

( within.var <- apply(lm.mids.vals(invest.mids,2)^2,2,mean) )
  (Intercept)         trend      real.gnp real.interest     inflation 
 3.536579e-03  4.533230e-06  3.898968e-03  2.210504e-06  5.047015e-06 

# USE THESE TO COMPUTE THE FINAL STANDARD ERROR VECTOR

m <- 10
( impute.se.vec <- sqrt(within.var + ((m+1)/m)*between.var) )
  (Intercept)         trend      real.gnp real.interest    inflation 
  0.063044769   0.002322798   0.066634554   0.001585430  0.002441815 

# THE DEGREES OF FREEDOM FOR THE T-STATISTIC NEEDS TO BE ADJUSTED.
# SEE LITTLE AND RUBIN (1987), PAGE 257

( impute.df <- (m-1)*(1 + (1/(m+1)) * within.var/between.var)^2 )
  (Intercept)         trend      real.gnp real.interest inflation 
     29.39766      20.95260      26.63908      26.91547  21.65926 

# CREATE A REGRESSION TABLE:

out.table <- round( cbind( impute.coef.vec,impute.se.vec,
        impute.coef.vec/impute.se.vec, 
        1-pt(abs(impute.coef.vec/impute.se.vec),impute.df) ),6 )
dimnames(out.table) <- list( c("(intercept)","trend","real.gnp","real.interest",
        "inflation"), c("Estimate","Std. Error","t value","Pr(>|t|)") ) 

out.table
               Estimate Std. Error   t value Pr(>|t|)
(intercept)   -0.390189   0.062981 -6.195350 0.000001
trend         -0.011984   0.002325 -5.153349 0.000025
real.gnp       0.549227   0.066607  8.245852 0.000000
real.interest -0.005107   0.001576 -3.241617 0.001627
inflation      0.003673   0.002421  1.516784 0.071848

# NOT QUITE THE IMPROVEMENT WE HOPED FOR.  WHY?  TWO REASONS: (1) I  PARTICULARLY 
# CHOSE THE FOUR MISSING VALUES TO "DO THE MOST DAMAGE", AND (2) THIS IS A 
# RELATIVELY SMALL DATASET SO MI DOES NOT HAVE A LOT OF INFORMATION WITH WHICH
# TO BUILD THE POSTERIOR DISTRIBUTION OF THE MISSING DATA.  

# OR PUT THE PIECES TOGETHER WITH ONE SUMMARY FUNCTION
summary(pool(invest.mids))
                       est          se         t       df
(Intercept)   -0.387215446 0.063044769 -6.141912 8.880183
trend         -0.011858096 0.002322798 -5.105092 8.324535
real.gnp       0.546190553 0.066634554  8.196807 8.751254
real.interest -0.005146479 0.001585430 -3.246110 8.765773
inflation      0.003709159 0.002441815  1.519017 8.395695
                  Pr(>|t|)        lo 95        hi 95 
(Intercept)   1.801275e-04 -0.530126490 -0.244304402      
trend         8.169365e-04 -0.017178345 -0.006537846      
real.gnp      2.157312e-05  0.394797063  0.697584043      
real.interest 1.041540e-02 -0.008747632 -0.001545326      
inflation     1.654781e-01 -0.001875817  0.009294134      
\end{VM}

dust2.df <- read.table( "http://jgill.wustl.edu/data/dust2.asc",header=TRUE)
library(mice);  m <- 5;  imp.dust2 <- mice(dust2.df,m)
dust3.glm <- glm.mids(cbr ~ dust+smoking+expo, family = binomial(link = logit),
                      data=imp.dust2)
summary(pool(dust3.glm))
                    est          se         t         df     Pr(>|t|)       lo 95
(Intercept) -3.37686445 0.349777612 -9.654318  168.55060 0.000000e+00 -4.06737387
dust         0.12613589 0.051039923  2.471318   51.20283 1.682791e-02  0.02367882
smoking      0.64561295 0.180901959  3.568855 1014.44146 3.753238e-04  0.29062809
expo         0.04004765 0.006517554  6.144583  931.29865 1.186852e-09  0.02725686
                  hi 95 missing        fmi
(Intercept) -2.68635503      NA 0.36815695
dust         0.22859295     578 0.70018227
smoking      1.00059781       0 0.06132988
expo         0.05283845       0 0.07686594

dust.df <- read.table( "http://jgill.wustl.edu/data/dust.asc",header=TRUE)
dust.glm <- glm(cbr ~ dust+smoking+expo, family = binomial(link = logit),
        data=dust.df);
summary.glm(dust.glm)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.04785    0.24813 -12.283  < 2e-16
dust         0.09189    0.02323   3.956 7.63e-05
smoking      0.67683    0.17407   3.888 0.000101
expo         0.04016    0.00620   6.476 9.40e-11

    Null deviance: 1356.8  on 1245  degrees of freedom
Residual deviance: 1278.3  on 1242  degrees of freedom
AIC: 1286.3

harr <- read.table("http://jgill.wustl.edu/data/harrison3.txt",header=TRUE)
apply(apply(harr,2,is.na),2,sum)

harr.glm <- glm(NumberKilled ~ log(AgeofFirstAttacker) + log(as.numeric(Date)) +
                AttackerisChallenged + FirstAttackerisFemale + DeviceisCar + 
		TargetisCafe + TargetisMilitary + ResponsibleHamas, data=harr,
		na.action=na.omit)

summary(harr.glm)
                        Estimate Std. Error t value Pr(>|t|)
(Intercept)               12.195     12.793    0.95   0.3438
log(AgeofFirstAttacker)   -3.369      4.170   -0.81   0.4219
log(as.numeric(Date))      0.582      0.864    0.67   0.5026
AttackerisChallenged      -3.700      1.604   -2.31   0.0241
FirstAttackerisFemale      3.202      3.040    1.05   0.2960
DeviceisCar               -2.018      2.453   -0.82   0.4137
TargetisCafe               3.967      2.013    1.97   0.0527
TargetisMilitary          -5.444      2.495   -2.18   0.0325
ResponsibleHamas           5.363      1.649    3.25   0.0018

(Dispersion parameter for gaussian family taken to be 40.519)

    Null deviance: 4077.5  on 77  degrees of freedom
Residual deviance: 2795.8  on 69  degrees of freedom
  (25 observations deleted due to missingness)
AIC: 520.5

library(mice)

attach(harr)
harr2 <- cbind(NumberKilled, NumberInjured, AgeofFirstAttacker, Date, 
	       ResponsibleisMartyrs, AttackerisChallenged, FirstAttackerisFemale, 
	       ResponsibleisPIJ, TargetisBus, TargetisCheckpoint, DeviceisCar, 
	       TargetisCafe, TargetisMilitary, ResponsibleHamas)
detach(harr)

imp.harr <- mice(harr2,m=10)

harr.mids <- glm.mids(NumberKilled ~ log(AgeofFirstAttacker) + 
		      log(as.numeric(Date)) + AttackerisChallenged + 
		      FirstAttackerisFemale + DeviceisCar + TargetisCafe + 
		      TargetisMilitary + ResponsibleHamas, data=imp.harr)

cbind(summary(harr.glm)$coef[,1:2], summary(pool(harr.mids))[,1:2])

                        Estimate Std. Error       est      se
(Intercept)             12.19545   12.79263  9.873602 9.49898
log(AgeofFirstAttacker) -3.36921    4.16985 -2.061098 3.09964
log(as.numeric(Date))    0.58241    0.86409  0.067666 0.63617
AttackerisChallenged    -3.69990    1.60370 -3.131864 1.23538
FirstAttackerisFemale    3.20166    3.04025  2.815036 2.37085
DeviceisCar             -2.01778    2.45341 -0.177107 1.75987
TargetisCafe             3.96730    2.01286  4.893596 1.72607
TargetisMilitary        -5.44396    2.49457 -3.888428 1.48603
ResponsibleHamas         5.36309    1.64870  3.933507 1.26970
\end{VM}

harr <- read.table("http://jgill.wustl.edu/data/harrison4.txt",header=TRUE)
apply(harr[,-1],2,table)

$NumberKilled
 0  1  2  3  5  6  7  8  9 11 15 17 19 21 23 24 30 
44 13  9  8  3  2  3  2  2  3  4  3  1  3  1  1  1 

$NumberInjured
  0   1   2   3   4   5   6   8   9  11  13  14  16  17  20  21  22  26  27  30 
 28   1   5   4   4   3   1   2   2   2   1   1   1   1   3   1   1   1   1   5 
 40  42  47  50  52  57  58  59  60  65  69  86  90 100 102 120 130 150 188 
  3   1   1   7   1   1   1   2   5   1   1   1   1   3   1   1   1   2   1 

$TotalCasualties
  0   1   2   3   4   5   6   8   9  10  12  13  15  17  20  21  26  27  29  30 
 22   5   6   4   3   3   2   2   1   1   2   2   1   1   2   1   1   2   1   1 
 31  32  35  38  45  49  50  51  52  53  57  58  59  61  62  63  65  67  71  75 
  1   1   1   1   1   2   1   1   1   1   2   1   1   2   1   1   2   2   3   2 
 81  91  93 105 106 123 126 141 145 151 180 199 
  1   1   1   1   1   1   1   1   1   1   1   1 
\end{VM}

\section*{Terrorism Data}
\begin{VM}
$ResponsibleHamas                       $ResponsibleisMartyrs
 0  1                                    0  1 
59 44                                   78 25 

$ResponsibleisPIJ                       $ResponsibleisOther
 0  1                                    0  1 
79 24                                   99  4 

$TargetisMilitary                       $TargetisCivilian
 0  1                                    0  1 
76 10                                   10 76 

$TargetisBus                            $TargetisCafe
 0  1                                    0  1 
89 14                                   89 14 

$TargetisCheckpoint                     $TargetisResidence
 0  1                                     0   1 
87 16                                   102   1 
\end{VM}

\section*{Terrorism Data}
\begin{VM}
$TargetisOffshore                       $TargetisStore
  0   1                                   0  1 
101   2                                  96  7 

$TargetisStreet                         $TargetisTravelstop
 0  1                                     0  1 
71 32                                    88 15 

$DeviceisCar                            $DeviceisBoat
 0  1                                     0   1 
89 14                                   101   2 

$AttackisPrevented                      $AttackerisChallenged
  0   1                                  0  1 
101   2                                 63 40 

$FirstAttackerisMale                    $FirstAttackerisFemale
 0  1                                    0  1 
 7 92                                   92  7 

$AgeofFirstAttacker
16 17 18 19 20 21 22 23 24 25 26 27 29 31 43 45 48 
 1  8  7 10 15 11 10 12  2  3  2  1  3  1  1  1  1 

library(mgcv)
harr.gam <- gam(NumberKilled ~ te(log(AgeofFirstAttacker),log(Date),k=3) +
                AttackerisChallenged + FirstAttackerisFemale +
                DeviceisCar + TargetisCafe + TargetisMilitary +
                ResponsibleHamas, data=harr)

summary(harr.gam)

Parametric coefficients:
                      Estimate Std. Error t value Pr(>|t|)
(Intercept)              4.377      1.070   4.091  9.3e-05
AttackerisChallenged    -4.059      1.144  -3.549 0.000616
FirstAttackerisFemale    1.286      2.255   0.571 0.569737
DeviceisCar              1.204      1.677   0.718 0.474763
TargetisCafe             3.824      1.638   2.335 0.021752
TargetisMilitary        -4.772      1.384  -3.448 0.000860
ResponsibleHamas         4.027      1.184   3.400 0.001003

Approximate significance of smooth terms:
                                        edf Ref.df     F p-value
te(log(AgeofFirstAttacker),log(Date)) 5.613  5.613 3.794 0.00255


\section*{Book References to Note}
\begin{ohlist}
	\item   Little, Roderick J. A., and Donald B. Rubin.  2002.
               	\emph{Statistical Analysis with Missing Data.}  Second Edition.
                New York: John Wiley \& Sons.

	\item	Rubin, Donald.  1987.
                \emph{Multiple Imputation for Nonresponse in Surveys.}
                New York: John Wiley \& Sons.

	\item 	Schafer, Joseph L.
		{\em Analysis of Incomplete Multivariate Data}.
		Chapman \& Hall, 1997.
\end{ohlist}

Article References:

\bibitem        Beale, E. M. L. and Little, R. J. A.  (1975).
                \index{authorindex}{Beale, E. M. L.}
                \index{authorindex}{Little, R. J. A.}
                Missing Values in Multivariate Analysis.
                \emph{Journal of the Royal Statistical Society, Series B} {\bf 37}, 129-145.

\bibitem        Celeux, G., Forbes, F., Robert, C. P., and Titterington, D. M.  (2006).
                \index{authorindex}{Celeux, G.}%
                \index{authorindex}{Forbes, F.}%
                \index{authorindex}{Robert, C. P.}%
                \index{authorindex}{Titterington, D. M.}%
                Deviance Information Criteria for Missing Data Models.
                \emph{Bayesian Analysis} {\bf 1}, 1-24.

\bibitem        Celeux, G., Forbes, F., Robert, C. P., and Titterington, D. M.  (2006).
                \index{authorindex}{Celeux, G.}%
                \index{authorindex}{Forbes, F.}%
                \index{authorindex}{Robert, C. P.}%
                \index{authorindex}{Titterington, D. M.}%
                Deviance Information Criteria for Missing Data Models.
                \emph{Bayesian Analysis} {\bf 1}, 1-24.

\bibitem        Little, R. J. A., and Rubin, D. B.  (1983).
                \index{authorindex}{Little, R. J. A.}
                \index{authorindex}{Rubin, D. B.}
                On Jointly Estimating Parameters and Missing Data by Maximizing the Complete-Data Likelihood.
                \emph{The American Statistician} {\bf 37}, 218-220.

\bibitem        Little, R. J. A., and Rubin, D. B.  (2002).
                \index{authorindex}{Little, R. J. A.}
                \index{authorindex}{Rubin, D. B.}
                \emph{Statistical Analysis with Missing Data.} Second Edition.
                New York: John Wiley \& Sons.