####################################################### ### ANCOVA example (glue data from the ppt lectures) ####################################################### glue <- read.table("Data/glue.txt",header=TRUE) # convert formulation codes to factor variables for proper statistical handling glue$Formulation <- as.factor(glue$Formulation) # fit linear models: full, thickness only, formulation only full.lm <- lm(Strength ~ Formulation + Thickness, data=glue) thick.lm <- lm(Strength ~ Thickness, data=glue) formu.lm <- lm(Strength ~ Formulation, data=glue) # test for sig of formulation anova(thick.lm,full.lm) # test for sig of thickness anova(formu.lm,full.lm) # plot the data & add the fitted lines for each formulation plot(glue$Thickness,glue$Strength,type="n",xlab="Thickness",ylab="Strength") text(glue$Thickness,glue$Strength,as.character(glue$Formulation)) abline(58.92788,-0.95445,lty=1) # form 1 abline(58.92788+0.63466,-0.95445,lty=2) # form 2 abline(58.92788+0.87644,-0.95445,lty=3) # form 3 abline(58.92788+0.00911,-0.95445,lty=4) # form 4 legend(14.5,51,c("Formulation 1","Formulation 2","Formulation 3","Formulation 4"),lty=c(1,2,3,4)) # note however that due to no differences in formulations, it suffices to plot # the following line, obtained from thick.lm summary(thick.lm) abline(59.9294,-1.0044,col="blue") ####################################################### ### ANCOVA example (golf course speed data from ppt lectures) ####################################################### golf <- read.table("Data/Ott5thEdDataCh16/ch16-case_study.csv",header=TRUE,sep=",") region <- factor(golf$R) cult <- factor(golf$C) humid <- golf$HUM speed <- golf$SPEED # plot speed vs. humid #pdf(file="Plots/golf1.pdf",pointsize=3,width=6,height=5) plot(humid,speed,type="n",xlab="Humidity",ylab="Speed",main="Speed vs. Humidity by Cultivar (1=C1, 2=C2, 3=C3)") text(humid,speed,as.character(cult)) #dev.off() # Fit Models 1,2,3 model1 <- lm(speed ~ humid + region + cult + humid*cult) model2 <- lm(speed ~ humid + region + cult) model3 <- lm(speed ~ humid + region) # compare models 1 to 2 (test significance of different cult slopes) anova(model2,model1) # compare models 2 to 3 (test significance of cult) anova(model3,model2) # final model anova(model2) summary(model2) # plot speed vs. humid (add fitted cult lines) #pdf(file="Plots/golf2.pdf",pointsize=3,width=6,height=5) plot(humid,speed,type="n",xlab="Humidity",ylab="Speed",main="Speed vs. Humidity With Fitted Cultivar Lines") text(humid,speed,as.character(cult)) abline(8.421762,-0.022765,lty=1) # cult 1 abline(8.421762+0.917971,-0.022765,lty=2) # cult 2 abline(8.421762+1.885567,-0.022765,lty=3) # cult 3 legend(80,9.8,c("Cultivar 1","Cultivar 2","Cultivar 3"),lty=c(1,2,3)) #dev.off() # average region (block) effect mregion <- sum(c(-0.072989,-0.084832,-0.186642,0.340397,0.434006,0.433041,0.252458))/8 # adjusted (for region and humid) means of C1, C2, C3 muC1 <- 8.421762-0.022765*mean(humid)+mregion muC2 <- 8.421762+0.917971-0.022765*mean(humid)+mregion muC3 <- 8.421762+1.885567-0.022765*mean(humid)+mregion # print these means c(muC1,muC2,muC3) # Check model fit #pdf(file="Plots/golf3.pdf",pointsize=3,width=6,height=5) par(mfrow=c(2,1)) qqnorm(model2$res) plot(model2$fitted,model2$res); abline(0,0) par(mfrow=c(1,1)) #dev.off() # all pairwise Tukey comparisons among cult means (needs "multcomp" package) library(multcomp) summary(glht(model2, linfct=mcp(cult="Tukey")))