rm(list=ls()) rm(list=ls()) # install.packages("RCurl") library(RCurl) # allows accessing data from URL dd <- read.csv(text=getURL("https://raw.githubusercontent.com/R4All/datasets/master/SoaySheepFitness.csv"))
These data are from the excellent Getting Started with R: An Introduction for Biologists (1st Edition) by Beckerman and Petchey.
head(dd) plot(fitness ~ body.size, data=dd, main="Weigth at birth and fitness in sheep", xlab="Weight of the mother at birth (pounds)", ylab="Number of lambs over lifetime") lm1 <- lm(fitness ~ body.size, data=dd) summary(lm1) # check the model assumptions (graphically): par(mfrow=c(3,2)) plot(lm1) hist(lm1$resid) # data are non-normal and non-linear. glm1 <- glm(fitness ~ body.size, data=dd, family="poisson"(link = "log")) summary(glm1)
The function glm() estimates both the model parameters and their Std. Errors on the link (log) scale. The statistical tests are run assuming that thew residuals follow not a Normal, but a Poisson distribution.
# compute SE using predict() preddata <- data.frame(body.size = seq(from=min(dd$body.size), to=max(dd$body.size), by=0.01 )) preds <- predict(glm1, newdata=preddata, type="response", se.fit=T) # type="link" would give predictions on the link-scale fit <- preds$fit # limits of 95% CI: lwr <- fit - 1.96*preds$se.fit upr <- fit + 1.96*preds$se.fit
To plot mean values with CI:
plot(fitness ~ body.size, data=dd, main="Weigth at birth and fitness in sheep", xlab="Weight of the mother at birth (pounds)", ylab="Number of lambs over lifetime" ) lines(preddata$body.size, preds$fit) lines(preddata$body.size, lwr, col="red") lines(preddata$body.size, upr, col="red")
