############################################################################### # One-way ANOVA: R code ############################################################################### # Sleep data from ppt lectures: 3 groups, 6 obs per group (t=3, n=6) sleep <- c(5.6,5.7,5.1,3.8,4.6,5.1,8.4,8.2,8.8,7.1,7.2,8,10.6,6.6,8,8,6.8,6.6) # drug group codes must be converted to factor to be treated as categorical drugs <- factor(c(rep("placeb",6),rep("stand",6),rep("new",6))) # fit linear model sleep.fit=aov(sleep~drugs) # ANOVA table anova(sleep.fit) # see fitted coefficients (compares with "new", 1st alphabetically...) summary.lm(sleep.fit) # reorder group levels to compare with placebo, and refit AOV levels(drugs) levels(drugs)=c("placeb","stand","new") sleep.fit=aov(sleep~drugs) summary.lm(sleep.fit) # side-by-side boxplots of groups plot(sleep~drugs) ## Check fit: 4 diagnostic plots produced by default # Top plots: check homosdeasticity & normality. # Bottom plots: check for influential points par(mfrow=c(2,2)) plot(sleep.fit) # Bartlett's test for homogeneity of group variances (assumes data is normal) bartlett.test(sleep~drugs) # Fligner-Killeen (median) test for homogeneity of group variances (nonparametric) fligner.test(sleep~drugs) # Shapiro-Wilk test of normality of the residuals shapiro.test(sleep.fit$resid) # Kruskal-Wallis test (nonparametric ANOVA) kruskal.test(sleep,drugs) # ### multiple pairwise comparisons: # pairwise t-tests w/ Bonferroni adjustment pairwise.t.test(sleep, g=drugs, p.adj = "bonf") # Tukey's HSD yardstick (alpha=0.05, t=3, MSE=1.102, n=6) alpha=0.05; mse=1.102; t=3; n=6; qtukey(1-alpha,t,n*t-t)*sqrt(mse/n) # all pairwise comparisons using Tukey TukeyHSD(wages.fit) plot(TukeyHSD(wages.fit)) # ### contrast comparisons: # contrast 1: mean of placebo to mean of standard & new: l1=c(2,-1,-1) # contrast 2: mean of standard to mean of new: l2=c(0,1,-1) # set contrasts and inspect them to check contrasts(drugs)=cbind(l1,l2) contrasts(drugs) # re-fit AOV sleep.cont=aov(sleep~drugs) summary.lm(sleep.cont) # the "Intercept" is the estimate of the overall mean, but # the estimates and p-values for testing sig. of contrasts are NOT right! ########################################################################## # Example to show why experimentwise error rate control methods are needed. # Simulate data vector (t=15 treatments, n=10 obs/treatment) all from N(0,1). # This mimics an ANOVA with 15 groups with no differences between groups. ########################################################################## set.seed(17) y <- rnorm(150) # vector of treatment labels tgroup <- rep(1,10) for (i in 2:15) tgroup <- c(tgroup,rep(i,10)) tgroup <- factor(tgroup) # perform all pairwise t-tests at 0.05 level without any adjustment pairwise.t.test(y, g=tgroup, p.adj="none") # Result: type I error occurred in 20 out of 105 comparisons (20% error rate). # Now perform all pairwise t-tests at 0.05 level with Bonferroni adjustment: pairwise.t.test(y, g=tgroup, p.adj="bonf") # Result: observed type I error rate is 1 (predicted rate is 100*0.05/105=.05%). ############################################################################### # Two-way ANOVA (no interaction): R code ############################################################################### # RCBD from ppt lectures: extraction of sulfur (sulf) from FL soils # Groups = 4 chemicals (chem); Blocks = 5 soil types (soil) sulf <- c(5.07,4.43,7.09,4.48,3.31,2.74,2.32,2.35,2.54,2.09,1.09,2.70,2.34, 2.07,4.38,3.85,4.71,5.29,5.70,4.98) chem <- factor(rep(c("cac","nh4","ca2","h2o"),5)) soil <- factor(c(rep("Troop",4),rep("Lake",4),rep("Leon",4),rep("Chip",4),rep("Norf",4))) rcbd.fit = aov(sulf~soil+chem) # anova table anova(rcbd.fit) # profile plot interaction.plot(chem,soil,sulf) # Default diagnostics par(mfrow=c(2,2)); plot(rcbd.fit) # Friedman's (nonparametric) Test for differences in the groups friedman.test(sulf, groups=chem, blocks=soil) # means: for groups and for blocks tapply(sulf,chem,mean) tapply(sulf,soil,mean) ############################################################################### # Two-way ANOVA (interaction): R code ############################################################################### # Factorial experiment from ppt lectures, yield of fruit trees, 4 pesticides # (factor A), 3 varieties of citrus tree (factor B), 2 replications per treatment. fruit <- read.table("Data/fruit.txt",header=TRUE) # fit two-way model with interaction fruit.lm <- lm(yield ~ factor(A) + factor(B) + factor(A)*factor(B), data=fruit) anova(fruit.lm) # interaction not sig so drop it off fruit2.lm <- lm(yield ~ factor(A) + factor(B), data=fruit) # see the fitted coefficients (they only equal cell means for interaction model!?) summary(fruit2.lm) # profile plots interaction.plot(fruit$A,fruit$B,fruit$yield) interaction.plot(fruit$B,fruit$A,fruit$yield) ############################################################################### # Three-way ANOVA: R code ##############################95 percent confidence interval: ################################################# # Latin Square design from ppt lectures, weight of strawberries, 3 irrigation # methods (factor C), 3 rows (factor A), 3 columns (factor B). straw <- read.table("Data/latin_square.txt",header=TRUE) # fit three-way model straw.lm <- lm(weight ~ factor(row) + factor(column) + factor(irrig), data=straw) anova(straw.lm)