###############################################################################
# One-way ANOVA: R code
###############################################################################
# Sleep data from ppt lectures: 3 groups, 6 obs per group (t=3, n=6)
sleep <- c(5.6,5.7,5.1,3.8,4.6,5.1,8.4,8.2,8.8,7.1,7.2,8,10.6,6.6,8,8,6.8,6.6)
# drug group codes must be converted to factor to be treated as categorical 
drugs <- factor(c(rep("placeb",6),rep("stand",6),rep("new",6)))
# fit linear model
sleep.fit=aov(sleep~drugs)
# ANOVA table
anova(sleep.fit)
# see fitted coefficients (compares with "new", 1st alphabetically...)
summary.lm(sleep.fit)
# reorder group levels to compare with placebo, and refit AOV
levels(drugs)
levels(drugs)=c("placeb","stand","new")
sleep.fit=aov(sleep~drugs)
summary.lm(sleep.fit)
# side-by-side boxplots of groups
plot(sleep~drugs)
## Check fit: 4 diagnostic plots produced by default
#  Top plots: check homosdeasticity & normality.
#  Bottom plots: check for influential points
par(mfrow=c(2,2))
plot(sleep.fit)
# Bartlett's test for homogeneity of group variances (assumes data is normal)
bartlett.test(sleep~drugs)
# Fligner-Killeen (median) test for homogeneity of group variances (nonparametric)
fligner.test(sleep~drugs)
# Shapiro-Wilk test of normality of the residuals
shapiro.test(sleep.fit$resid)
# Kruskal-Wallis test (nonparametric ANOVA)
kruskal.test(sleep,drugs)
#
### multiple pairwise comparisons:
# pairwise t-tests w/ Bonferroni adjustment
pairwise.t.test(sleep, g=drugs, p.adj = "bonf")
# Tukey's HSD yardstick (alpha=0.05, t=3, MSE=1.102, n=6)
alpha=0.05; mse=1.102; t=3; n=6;
qtukey(1-alpha,t,n*t-t)*sqrt(mse/n)
# all pairwise comparisons using Tukey 
TukeyHSD(wages.fit)
plot(TukeyHSD(wages.fit))
#
### contrast comparisons:
# contrast 1: mean of placebo to mean of standard & new:
l1=c(2,-1,-1)
# contrast 2: mean of standard to mean of new:
l2=c(0,1,-1)
# set contrasts and inspect them to check
contrasts(drugs)=cbind(l1,l2)
contrasts(drugs)
# re-fit AOV
sleep.cont=aov(sleep~drugs)
summary.lm(sleep.cont)
# the "Intercept" is the estimate of the overall mean, but
# the estimates and p-values for testing sig. of contrasts are NOT right! 


##########################################################################
# Example to show why experimentwise error rate control methods are needed.
# Simulate data vector (t=15 treatments, n=10 obs/treatment) all from N(0,1).
# This mimics an ANOVA with 15 groups with no differences between groups.
##########################################################################
set.seed(17)
y <- rnorm(150)
# vector of treatment labels
tgroup <- rep(1,10)
for (i in 2:15) tgroup <- c(tgroup,rep(i,10))
tgroup <- factor(tgroup)
# perform all pairwise t-tests at 0.05 level without any adjustment 
pairwise.t.test(y, g=tgroup, p.adj="none")
# Result: type I error occurred in 20 out of 105 comparisons (20% error rate).  
# Now perform all pairwise t-tests at 0.05 level with Bonferroni adjustment: 
pairwise.t.test(y, g=tgroup, p.adj="bonf")
# Result: observed type I error rate is 1 (predicted rate is 100*0.05/105=.05%).  


###############################################################################
# Two-way ANOVA (no interaction): R code
###############################################################################
# RCBD from ppt lectures: extraction of sulfur (sulf) from FL soils
#     Groups = 4 chemicals (chem); Blocks = 5 soil types (soil)
sulf <- c(5.07,4.43,7.09,4.48,3.31,2.74,2.32,2.35,2.54,2.09,1.09,2.70,2.34,
           2.07,4.38,3.85,4.71,5.29,5.70,4.98)
chem <- factor(rep(c("cac","nh4","ca2","h2o"),5))
soil <- factor(c(rep("Troop",4),rep("Lake",4),rep("Leon",4),rep("Chip",4),rep("Norf",4)))
rcbd.fit = aov(sulf~soil+chem)
# anova table
anova(rcbd.fit)
# profile plot
interaction.plot(chem,soil,sulf)
# Default diagnostics
par(mfrow=c(2,2)); plot(rcbd.fit)
# Friedman's (nonparametric) Test for differences in the groups
friedman.test(sulf, groups=chem, blocks=soil)
# means: for groups and for blocks
tapply(sulf,chem,mean)
tapply(sulf,soil,mean)


###############################################################################
# Two-way ANOVA (interaction): R code
###############################################################################
# Factorial experiment from ppt lectures, yield of fruit trees, 4 pesticides
# (factor A), 3 varieties of citrus tree (factor B), 2 replications per treatment.
fruit <- read.table("Data/fruit.txt",header=TRUE)
# fit two-way model with interaction
fruit.lm <- lm(yield ~ factor(A) + factor(B) + factor(A)*factor(B), data=fruit)
anova(fruit.lm)
# interaction not sig so drop it off
fruit2.lm <- lm(yield ~ factor(A) + factor(B), data=fruit)
# see the fitted coefficients (they only equal cell means for interaction model!?)
summary(fruit2.lm)
# profile plots
interaction.plot(fruit$A,fruit$B,fruit$yield)
interaction.plot(fruit$B,fruit$A,fruit$yield)


###############################################################################
# Three-way ANOVA: R code
##############################95 percent confidence interval:
#################################################
# Latin Square design from ppt lectures, weight of strawberries, 3 irrigation
# methods (factor C), 3 rows (factor A), 3 columns (factor B).
straw <- read.table("Data/latin_square.txt",header=TRUE)
# fit three-way model 
straw.lm <- lm(weight ~ factor(row) + factor(column) + factor(irrig), data=straw)
anova(straw.lm)