### R tutorial: basics, descriptive statistics, graphical displays ###--------------------------------------------------------------- #The first thing you should do after downloading R is to click "file", "change dir...", #to something like "C:\R_Stuff". This is where R will automatically look in to load #workspaces, and save and read data files. #Let's now look at some data. Create CandyData.txt, a file with Candy weights (col 1) #and cals (col 2): 40 160 15 60 40 160 40 160 3 10 40 200 40 130 40 160 40 140 15 60 40 210 40 210 40 210 40 160 40 180 40 180 40 140 15 70 15 60 15 60 40 160 15 60 #Assign 1st col of a to vector weight, 2nd col to vector cals weight <- candydata$V1 cals <- candydata$V2 #Ways to import Excel file CandyData.xls (already with variable names): # (i) convert it to .txt format, and type the following candydata <- read.table("CandyData.txt") # (ii) convert it to .csv format, and type the following candydata <- read.csv("CandyData.csv") # now make the variables "weight" and "cals" available attach(candydata) # Look what happens when you type: cals #R displays the vector "cals". You can manipulate this vector and #access individual entries, e.g. cals[5] #gives you 10, the 5th entry in vector "cals". The following squares #each entry of "cals" and then adds up all elements: sum(cals^2) ### Summary stats for cals: summary(cals) ### Histogram, boxplot, stem & leaf plots for cals: hist(cals) boxplot(cals) stem(cals) ### Open R's online docs, look up "pie chart" help.start() pie(rep(1,24), col=rainbow(24), radius=0.9) #Typical of R graphics, we can get quite fancy, some Exs. pie.sales <- c(0.12, 0.3, 0.26, 0.16, 0.04, 0.12) names(pie.sales) <- c("Blueberry", "Cherry", "Apple", "Boston Cream", "Other", "Vanilla Cream") #now split screen into 4 regions: 2 x 2, and do some plotting... par(mfrow=c(2,2)) pie(pie.sales, col=c("purple","violetred1","green3","cornsilk","cyan","white"), main="color pie-chart of pie sales") pie(pie.sales, col=gray(seq(0.4,1.0,length=6)),main="B & W pie-chart of pie sales") dotchart(pie.sales, main="dot chart of pie sales") barplot(pie.sales, main="bar chart of pie sales") par(mfrow=c(1,1)) ### Open up the built-in dataset "cars" (speed & stop dist of cars) data(cars) cars # fit a regression with lm fit <- lm(cars$dist~cars$speed) # plot speed vs. stop dist plot(cars$speed,cars$dist,main="The car data",xlab="speed (mph)", ylab="stopping distance (ft)") # add a solid (lty=1) regression line to plot abline(fit,lty=1) # add a dotted (lty=2) smoothed lowess line lines(lowess(cars),lty=2) # add a legend, positioning top left corner of legend box at (5,120) legend(5,120,legend=c("regression line","smoothed line"),lty=c(1,2)) ### Time Series Plots # mean monthly Nottingham temperatures par(mfcol=c(2,2)) ts.plot(nottem,ylab="degrees F",xlab="Year",main="Mean monthly temperatures at Nottingham") boxplot(split(nottem, cycle(nottem)), names=month.abb, main="monthly boxplots for all 20 years") # monthly deaths from lung diseases in the UK data(UKLungDeaths) ts.plot(mdeaths,fdeaths,lty=c(1,2),main="Monthly deaths from lung diseases in the UK") legend(1978.5,2750,legend=c("males","females"),lty=c(1,2)) # a barplot variation on the above lung.deaths <- aggregate(ts.union(mdeaths,fdeaths),1) barplot(t(lung.deaths),names=seq(1974,1979),col=c(gray(.3),gray(.7)),main="aggregate yearly deaths",ylim=c(0,30000)) legend(locator(1),c("males","females"),fill=c(gray(.3),gray(.7))) #click to place legend par(mfrow=c(1,1)) ### multivariate displays ## Swiss provinces (47) fertility and socioeconomic indicators for 1888 (%) # pairwise scatterplots pairs(swiss) # smooth the plots pairs(swiss,panel=panel.smooth) # put histograms on the diagonal panel.hist <- function(x, ...) { usr <- par("usr"); on.exit(par(usr)) par(usr = c(usr[1:2], 0, 1.5) ) h <- hist(x, plot = FALSE) breaks <- h$breaks; nB <- length(breaks) y <- h$counts; y <- y/max(y) rect(breaks[-nB], 0, breaks[-1], y, col="cyan", ...) } pairs(swiss, panel=panel.smooth, cex = 1, pch = 23, bg="light blue", diag.panel=panel.hist, cex.labels = 2, font.labels=2) # put (absolute) correlations on the upper panels, with size proportional to correlations panel.cor <- function(x, y, digits=2, prefix="", cex.cor) { usr <- par("usr"); on.exit(par(usr)) par(usr = c(0, 1, 0, 1)) r <- abs(cor(x, y)) txt <- format(c(r, 0.123456789), digits=digits)[1] txt <- paste(prefix, txt, sep="") if(missing(cex.cor)) cex <- 0.8/strwidth(txt) text(0.5, 0.5, txt, cex = cex * r) } pairs(swiss, lower.panel=panel.smooth, upper.panel=panel.cor) # a cluster analysis (greater heights b/ween provinces denote greater dissimilarity) data(swiss); swiss.x <- as.matrix(swiss) h <- hclust(dist(swiss.x),method="single") plclust(h) ### Oh the things you can do... (these exs taken from the "image" & "persp" help files) #Ex 1: Rotated sinc function. x <- seq(-10, 10, length=50) y <- x f <- function(x,y) { r <- sqrt(x^2+y^2) 10 * sin(r)/r } z <- outer(x, y, f) z[is.na(z)] <- 1 par(bg = "white") persp(x, y, z, theta = 30, phi = 30, expand = 0.5, col = "lightblue", xlab = "X", ylab = "Y", zlab = "Z") persp(x, y, z, theta = 30, phi = 30, expand = 0.5, col = "lightblue", ltheta = 120, shade = 0.75, ticktype = "detailed", xlab = "X", ylab = "Y", zlab = "Z") #Ex 2: A contour plot data(volcano) x <- 10*(1:nrow(volcano)) y <- 10*(1:ncol(volcano)) image(x, y, volcano, col = terrain.colors(100), axes = FALSE) contour(x, y, volcano, levels = seq(90, 200, by=5), add = TRUE, col = "peru") axis(1, at = seq(100, 800, by = 100)) axis(2, at = seq(100, 600, by = 100)) box() title(main = "Maunga Whau Volcano", font.main = 4) #Ex 3: Same contour plot again z <- 2 * volcano # Exaggerate the relief x <- 10 * (1:nrow(z)) # 10 meter spacing (S to N) y <- 10 * (1:ncol(z)) # 10 meter spacing (E to W) persp(x, y, z, theta = 120, phi = 15, scale = FALSE, axes = FALSE) #Ex 4: Now something more complex # We border the surface, to make it more "slice like" # and color the top and sides of the surface differently. zmin <- min(z) - 20 z <- rbind(zmin, cbind(zmin, z, zmin), zmin) x <- c(min(x) - 1e-10, x, max(x) + 1e-10) y <- c(min(y) - 1e-10, y, max(y) + 1e-10) fill <- matrix("green3", nr = nrow(z)-1, nc = ncol(z)-1) fill[,1] <- "gray" fill[,ncol(fill)] <- "gray" fill[1,] <- "gray" fill[nrow(fill),] <- "gray" par(bg = "lightblue") persp(x, y, z, theta = 120, phi = 15, col = fill, scale = FALSE, axes = FALSE) title(main = "Maunga Whau\nOne of 50 Volcanoes in the Auckland Region.", font.main = 4) par(bg = "slategray") persp(x, y, z, theta = 135, phi = 30, col = fill, scale = FALSE, ltheta = -120, lphi = 15, shade = 0.65, axes = FALSE) persp(x, y, z, theta = 135, phi = 30, col = "green3", scale = FALSE, ltheta = -120, shade = 0.75, border = NA, box = FALSE)