Licensed under the Creative Commons attribution-noncommercial license. Please share & remix noncommercially, mentioning its origin.
library(ggplot2); theme_set(theme_bw())
library(ggExtra)
library(cowplot)
library(dotwhisker)
\[ z = a + bx + cy + \epsilon \] or (more predictor variables) \[ y = \beta_0 + \beta_1 x_1 + \beta_2 x_2 + \ldots + \epsilon \] or (more flexible distribution syntax) \[ y \sim \textrm{Normal}(\beta_0 + \beta_1 x_1 + \beta_2 x_2 + \ldots, \sigma^2) \] or (more complex sets of predictors) \[ \begin{split} \boldsymbol{\mu} & = \mathbf{X} \mathbf{\beta} \\ y_i & \sim \textrm{Normal}(\mu_i,\sigma^2) \end{split} \]
MPG vs displacement for cars
cars_lm <- lm(log10(mpg)~log10(disp),mtcars)
(We’ll come back to how to judge this later)
dd <- data.frame(flavour=rep(c("chocolate","vanilla"),c(2,3)))
print(dd)
## flavour
## 1 chocolate
## 2 chocolate
## 3 vanilla
## 4 vanilla
## 5 vanilla
model.matrix(~flavour,dd)
## (Intercept) flavourvanilla
## 1 1 0
## 2 1 0
## 3 1 1
## 4 1 1
## 5 1 1
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$flavour
## [1] "contr.treatment"
chocolate
) used as default (use relevel()
or factor(...,levels=...)
to change default)response ~ predictor1 + predictor2 + ...
1+ ...
):
multiplies relevant columnsa*b
: main effect plus interactionsmodel.matrix(formula, data)
y~f
: 1-way ANOVAy~f+g
: 2-way ANOVA (additive)y~f*g
: 2-way ANOVA (with interaction)y~x
: univariate regressiony~f+x
: ANCOVA (parallel slopes)y~f*x
: ANCOVA (with interaction, non-parallel slopes)y~x1+x2
: multivariate regression (additive)y~x1*x2
: multiv. regression with interactionIf confused, (1) try to write out the equation; (2) model.matrix()
too many ways to change contrasts (globally via options()
; as attribute of factor; contrasts
argument in lm()
)
Data on ant colonies from Gotelli and Ellison (2004):
ants <- data.frame(
place=rep(c("field","forest"),c(6,4)),
colonies=c(12, 9, 12, 10,
9, 6, 4, 6, 7, 10))
aggregate(colonies~place,data=ants,FUN=mean)
## place colonies
## 1 field 9.666667
## 2 forest 6.750000
pr <- function(m) printCoefmat(coef(summary(m)),digits=3,signif.stars=FALSE)
pr(lm1 <- lm(colonies~place,data=ants))
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.667 0.958 10.09 8e-06
## placeforest -2.917 1.515 -1.92 0.09
pr(lm2 <- update(lm1, contrasts=list(place=contr.sum)))
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.208 0.758 10.83 4.7e-06
## place1 1.458 0.758 1.92 0.09
data(lizards,package="brglm")
mtcars_big <- lm(mpg~.,data=mtcars)
mtcars_big_sc <- lm(mpg~.,data=as.data.frame(scale(mtcars)))
dwfun <- function(.,title) { dwplot(.,order_vars=names(sort(coef(.))))+
geom_vline(xintercept=0,linetype=2)+
ggtitle(title) }
plot_grid(dwfun(mtcars_big,"unscaled"),dwfun(mtcars_big_sc,"scaled"))
par(mfrow=c(2,2))
plot(cars_lm)
id.n
argument)summary()
(\(t\) test)anova()
, car::Anova()
(\(F\) test)proportion data: discrete counts, \(0 \le x \le N\)
linear model doesn’t make sense
"gaussian"
), Gamma)glm
is Gaussianqlogis()
function (plogis()
is logistic/inverse-link)cbind(k,N-k)
not cbind(k,N)
weights=N
glm(p~...,data,weights=rep(N,nrow(data)))
plot
is still somewhat usefularm::binnedplot
)cor(observed,predict(model, type="response"))
)type="response"
to get data-scale predictions)DHARMa
packagelike LMs, but:
confint.glm()
computes likelihood profile CIsglm()
problemsfamily
(\(\to\) linear model); using glm()
for linear models (unnecessary)back-transforming SEs rather than CIs
ignoring random effects
aids <- read.csv("../data/aids.csv")
aids <- transform(aids, date=year+(quarter-1)/4)
print(gg0 <- ggplot(aids,aes(date,cases))+geom_point())
print(gg1 <- gg0 + geom_smooth(method="glm",colour="red",
method.args=list(family="quasipoisson")))
g1 <- glm(cases~date,aids,family=quasipoisson(link="log"))
summary(g1)
##
## Call:
## glm(formula = cases ~ date, family = quasipoisson(link = "log"),
## data = aids)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.7046 -0.7978 0.1218 0.6849 2.1217
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.023e+03 6.806e+01 -15.03 1.25e-11 ***
## date 5.168e-01 3.425e-02 15.09 1.16e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 2.354647)
##
## Null deviance: 677.26 on 19 degrees of freedom
## Residual deviance: 53.02 on 18 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 4
plot(g1)
)acf(residuals(g1)) ## check autocorrelation
print(gg2 <- gg1+geom_smooth(method="glm",formula=y~poly(x,2),
method.args=list(family="quasipoisson")))
print(gg2+scale_y_log10())
g2 <- update(g1,.~poly(date,2))
summary(g2)
##
## Call:
## glm(formula = cases ~ poly(date, 2), family = quasipoisson(link = "log"),
## data = aids)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.3290 -0.9071 -0.0761 0.8985 2.3209
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.86859 0.05004 77.311 < 2e-16 ***
## poly(date, 2)1 3.82934 0.25162 15.219 2.46e-11 ***
## poly(date, 2)2 -0.68335 0.19716 -3.466 0.00295 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 1.657309)
##
## Null deviance: 677.264 on 19 degrees of freedom
## Residual deviance: 31.992 on 17 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 4
anova(g1,g2,test="F") ## for quasi-models specifically
## Analysis of Deviance Table
##
## Model 1: cases ~ date
## Model 2: cases ~ poly(date, 2)
## Resid. Df Resid. Dev Df Deviance F Pr(>F)
## 1 18 53.020
## 2 17 31.992 1 21.028 12.688 0.002399 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
acf(residuals(g2)) ## check autocorrelation
Gotelli, Nicholas J., and Aaron M. Ellison. 2004. A Primer of Ecological Statistics. Sunderland, MA: Sinauer.
Schielzeth, Holger. 2010. “Simple Means to Improve the Interpretability of Regression Coefficients.” Methods in Ecology and Evolution 1: 103–13. doi:10.1111/j.2041-210X.2010.00012.x.
Tiwari, Manjula, Karen A. Bjorndal, Alan B. Bolten, and Benjamin M. Bolker. 2006. “Evaluation of Density-Dependent Processes and Green Turtle Chelonia Mydas Hatchling Production at Tortuguero, Costa Rica.” Marine Ecology Progress Series 326: 283–93.
Wilkinson, G. N., and C. E. Rogers. 1973. “Symbolic Description of Factorial Models for Analysis of Variance.” Applied Statistics 22 (3): 392–99. doi:10.2307/2346786.