###-*- R -*-
### Examples from: "An Introduction to Statistical Modelling"
### By Annette Dobson


## Plant Weight Data (Page 9)
ctl <- c(4.17,5.58,5.18,6.11,4.50,4.61,5.17,4.53,5.33,5.14)
trt <- c(4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69)
group <- gl(2,10,20,labels=c("Ctl","Trt"))
weight <- c(ctl,trt)
anova(lm(weight~group))
summary(lm(weight~group-1))


## Birth Weight Data (Page 14)
tmp <- matrix(c(
40,2968,38,2795,40,3163,35,2925,36,2625,37,2847,
41,3292,40,3473,37,2628,38,3176,40,3421,38,2975,
40,3317,36,2729,40,2935,38,2754,42,3210,39,2817,
40,3126,37,2539,36,2412,38,2991,39,2875,40,3231
),nc=2,byrow=T)
age <- tmp[,1]
birthw <- tmp[,2]
sex <- gl(2,12,24,labels=c("M","F"))
plot(age,birthw,col=as.integer(sex))
summary(lm(birthw ~ sex + age), cor=T)
summary(lm(birthw ~ sex + age -1), cor=T)
summary(lm(birthw ~ sex + sex:age -1), cor=T)
summary(glm(birthw ~ sex + age, family=gaussian()))
summary(z <- glm(birthw ~ sex + age - 1, family=gaussian()))


## Poisson Regression Data (Page 42)
x <- c(-1,-1,0,0,0,0,1,1,1)
y <- c(2,3,6,7,8,9,10,12,15)
summary(glm(y~x,family=poisson(link="identity")))


## Calorie Data (Page 45)
tmp <- matrix(c(
33,33,100,14, 40,47, 92,15, 37,49,135,18, 27,35,144,12,
30,46,140,15, 43,52,101,15, 34,62, 95,14, 48,23,101,17,
30,32, 98,15, 38,42,105,14, 50,31,108,17, 51,61, 85,19,
30,63,130,19, 36,40,127,20, 41,50,109,15, 42,64,107,16,
46,56,117,18, 24,61,100,13, 35,48,118,18, 37,28,102,14),nc=4,byrow=T)
carb <- tmp[,1]
age <- tmp[,2]
wgt <- tmp[,3]
prot <- tmp[,4]
summary(lm(carb~age+wgt+prot))


## Extended Plant Data (Page 59)
ctl <-  c(4.17,5.58,5.18,6.11,4.50,4.61,5.17,4.53,5.33,5.14)
trtA <- c(4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69)
trtB <- c(6.31,5.12,5.54,5.50,5.37,5.29,4.92,6.15,5.80,5.26)
group <- gl(3,10,30,labels=c("Ctl","A","B"))
weight <- c(ctl,trtA,trtB)
anova(lm(weight~group))
summary(lm(weight~group))


## Fictitious Anova Data (Page 64)
y <- c(6.8,6.6,5.3,6.1,7.5,7.4,7.2,6.5,7.8,9.1,8.8,9.1)
a <- gl(3,4,12)
b <- gl(2,2,12)
anova(z <- lm(y~a*b))


## Achievement Scores (Page 70)
y <- c(6,4,5,3,4,3,6,8,9,7,9,8,5,7,6,7,7,7,8,5,7)
x <- c(3,1,3,1,2,1,4,4,5,5,4,3,1,2,3,2,2,3,4,1,4)
m <- gl(3,7,21)
print(anova(z <- lm(y~x+m)))


## Beetle Data (Page 78)
tmp <- matrix(c(
1.6907,59, 6, 1.7242,60,13, 1.7552,62,18, 1.7842,56,28,
1.8113,63,52, 1.8369,59,53, 1.8610,62,61, 1.8839,60,60
),nc=3,byrow=T)
dose <- tmp[,1]
n <- tmp[,2]
x <- tmp[,3]
dead <- cbind(x, n-x)
summary(z <- glm(dead~dose,family=binomial(link=logit)))
summary(z <- glm(dead~dose,family=binomial(link=probit)))
summary(z <- glm(dead~dose,family=binomial(link=cloglog)))


## Anther Data (Page 84)
## Note that the proportions below are not exactly
## in accord with the sample sizes quoted below.
## In particular, the value 0.555 does not seem sensible.
n <- c(102,99,108,76,81,90)
p <- c(0.539,0.525,0.528,0.724,0.617,0.555)
# x <- round(n*p)
x <- n*p
y <- cbind(x,n-x)
f <- rep(c(40,150,350),2)
g <- gl(2,3,6)
summary(glm(y~g*f,family=binomial(link="logit")))
summary(glm(y~g+f,family=binomial(link="logit")))
summary(glm(y~f,family=binomial(link="logit")))


## Tumour Data (Page 92)
counts <- c(22,2,10,16,54,115,19,33,73,11,17,28)
type <- gl(4,3,12,labels=c("freckle","superficial","nodular","indeterminate"))
site <- gl(3,1,12,labels=c("head/neck","trunk","extremities"))
data.frame(counts,type,site)
summary(z <- glm(counts ~ type + site,family=poisson()))


## Randomized Controlled Trial (Page 93)
counts <- c(18,17,15,20,10,20,25,13,12)
outcome <- gl(3,1,9)
treatment <- gl(3,3,9)
summary(z <- glm(counts ~ outcome + treatment,family=poisson()))

## Peptic Ulcers and Blood Groups
counts <- c(579,4219,911,4578,246,3775,361,4532,291,5261,396,6598)
group <- gl(2,1,12,labels=c("cases","controls"))
blood <- gl(2,2,12,labels=c("A","O"))
city <- gl(3,4,12,labels=c("London","Manchester","Newcastle"))
cbind(codes(group),codes(blood),codes(city),counts)
summary(z1 <- glm(counts ~ group*city+blood+group:blood, family=poisson()))
summary(z2 <- glm(counts ~ group*city+blood, family=poisson()))
anova(z2,z1)
