##################################################################### ### Poisson regression R code applied to horseshoe crabs data ###################################################################### library(mgcv) par(mfrow=c(2,2)) # crabs <- read.table("Data/HorseshoeCrabs.csv", header=T, sep=",") # plot width vs satellites (Fig 4.3 in Agresti's Intro to CDA book) pdf(file="Plots/crabs1.pdf",pointsize=3,width=6,height=5) plot.width <- aggregate(rep(1,nrow(crabs)), list(satellites=crabs$satellites, width=crabs$width), sum) plot.width$satellites <- as.numeric(as.vector(plot.width$satellites)) plot.width$width <- as.numeric(as.vector(plot.width$width)) plot(y=plot.width$satellites,x=plot.width$width,xlab="Width (cm)", ylab="Number of Satellites", bty="L", axes=F, type="n") axis(2, at=1:15) axis(1, at=seq(20,34,2)) text(y=plot.width$satellites,x=plot.width$width,labels=plot.width$x) dev.off() # # smooth plot of width vs satellites (Fig 4.4) pdf(file="Plots/crabs2.pdf",pointsize=3,width=6,height=5) crabs$width.fac <- cut(crabs$width, breaks=c(0,seq(23.25, 29.25),Inf)) plot.y <- aggregate(crabs$satellites, by=list(width=crabs$width.fac), mean)$x plot.x <- aggregate(crabs$width, by=list(width=crabs$width.fac), mean)$x plot(x=plot.x, y=plot.y, ylab="Number of Satellites", xlab="Width (cm)",bty="L", axes=F, type="p", pch=16) axis(2, at=0:5) axis(1, at=seq(20,34,2)) res<- gam(plot.y~s(plot.x, k=4, fx=TRUE), family=poisson(link=log)) lines(x=plot.x,y=res$fitted.values) dev.off() # # Now fit Poisson GLM (with log link) log.fit <- glm(satellites~width, family=poisson(link=log), data=crabs) summary(log.fit) # Poisson GLM (with identity link) id.fit <- glm(satellites~width, family=poisson(link=identity),data=crabs, start=coef(log.fit)) summary(id.fit) # # Comparison of fitted lines for Poisson GLM (with log vs. identity links) pdf(file="Plots/crabs3.pdf",pointsize=3,width=6,height=5) plot(x=plot.x, y=plot.y, ylab=expression(paste("Mean number of satellites,", {mu})), xlab="Width (cm)",bty="L",axes=F, type="p", pch=16) axis(2, at=0:5) axis(1, at=seq(20,34,2)) ind<-order(crabs$width) lines(x=crabs$width[ind],y=log.fit$fitted.values[ind]) lines(x=crabs$width[ind],y=id.fit$fitted.values[ind]) arrows(x0=23.5,y0=2.9,x1=23.5,y1=predict(log.fit,newdata=data.frame(width=23.5), type="response"), length=.2) text(x=23.5,y=3,"Log Link") arrows(x0=29.75,y0=3.1,x1=29.75,y1=predict(id.fit,newdata=data.frame(width=29.75), type="response"), length=.2) text(x=29.75,y=2.9,"Identity Link") dev.off()