GLMM FAQ

19 Jul 2025 · linear mixed models

grep("l.?m[me][^t]",rownames(available.packages()),value=TRUE)
##  [1] "blmeco"              "buildmer"            "cellVolumeDist"     
##  [4] "climenv"             "climextRemes"        "curtailment"        
##  [7] "glmertree"           "glmm.hp"             "glmmEP"             
## [10] "glmmfields"          "glmmLasso"           "glmmML"             
## [13] "glmmPen"             "glmmrBase"           "glmmrOptim"         
## [16] "glmmSeq"             "glmmTMB"             "jlmerclusterperm"   
## [19] "lamme"               "limexhub"            "lme4"               
## [22] "lmeInfo"             "lmeresampler"        "lmerPerm"           
## [25] "lmerTest"            "lmeSplines"          "lmmot"              
## [28] "lmmpar"              "lrmest"              "lsmeans"            
## [31] "mailmerge"           "mlmm.gwas"           "multilevelmediation"
## [34] "mvglmmRank"          "nlmeU"               "nlmeVPC"            
## [37] "palmerpenguins"      "plsmmLasso"          "SherlockHolmes"     
## [40] "tglkmeans"           "trouBBlme4SolveR"    "vagalumeR"          
## [43] "vglmer"
library(lme4)
library(equatiomatic)
fm1 <- lmer(Reaction ~ Days + (Days|Subject), sleepstudy)
equatiomatic::extract_eq(fm1)
overdisp_fun <- function(model) {
    rdf <- df.residual(model)
    rp <- residuals(model,type="pearson")
    Pearson.chisq <- sum(rp^2)
    prat <- Pearson.chisq/rdf
    pval <- pchisq(Pearson.chisq, df=rdf, lower.tail=FALSE)
    c(chisq=Pearson.chisq,ratio=prat,rdf=rdf,p=pval)
}
library(lme4)
library(glmmTMB)
set.seed(101)  
d <- data.frame(x=runif(1000),
                f=factor(sample(1:10,size=1000,replace=TRUE)))
suppressMessages(d$y <- simulate(~x+(1|f), family=poisson,
                          newdata=d,
                          newparams=list(theta=1,beta=c(0,2)))[[1]])
m1 <- glmer(y~x+(1|f),data=d,family=poisson)
overdisp_fun(m1)
##        chisq        ratio          rdf            p 
## 1035.9966326    1.0391140  997.0000000    0.1902294
m2 <- glmmTMB(y~x+(1|f),data=d,family="poisson")
overdisp_fun(m2)
##        chisq        ratio          rdf            p 
## 1035.9961394    1.0391135  997.0000000    0.1902323
## extract summary table; you may also be able to do this via
##  broom::tidy or broom.mixed::tidy
quasi_table <- function(model,ctab=coef(summary(model)),
                           phi=overdisp_fun(model)["ratio"]) {
    qctab <- within(as.data.frame(ctab),
    {   `Std. Error` <- `Std. Error`*sqrt(phi)
        `z value` <- Estimate/`Std. Error`
        `Pr(>|z|)` <- 2*pnorm(abs(`z value`), lower.tail=FALSE)
    })
    return(qctab)
}
printCoefmat(quasi_table(m1),digits=3)
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.2277     0.2700    0.84      0.4    
## x             2.0640     0.0528   39.11   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## to use this with glmmTMB, we need to separate out the
##  conditional component of the summary
printCoefmat(quasi_table(m2,
                         ctab=coef(summary(m2))[["cond"]]),
             digits=3)
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.2277     0.2700    0.84      0.4    
## x             2.0640     0.0528   39.09   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(broom.mixed)
library(dplyr)
tidy_quasi <- function(model, phi=overdisp_fun(model)["ratio"],
                       conf.level=0.95) {
    tt <- (tidy(model, effects="fixed")
       %>% mutate(std.error=std.error*sqrt(phi),
                   statistic=estimate/std.error,
                   p.value=2*pnorm(abs(statistic), lower.tail=FALSE))
    )
    return(tt)
}
tidy_quasi(m1)
## # A tibble: 2 × 6
##   effect term        estimate std.error statistic p.value
##   <chr>  <chr>          <dbl>     <dbl>     <dbl>   <dbl>
## 1 fixed  (Intercept)    0.228    0.270      0.843   0.399
## 2 fixed  x              2.06     0.0528    39.1     0
tidy_quasi(m2)
## # A tibble: 2 × 7
##   effect component term        estimate std.error statistic p.value
##   <chr>  <chr>     <chr>          <dbl>     <dbl>     <dbl>   <dbl>
## 1 fixed  cond      (Intercept)    0.228    0.270      0.843   0.399
## 2 fixed  cond      x              2.06     0.0528    39.1     0
library(sos)
findFn("corStruct")
Separation: TRUE 
Existence of maximum likelihood estimates
(Intercept)      height 
        Inf         Inf 
0: finite value, Inf: infinity, -Inf: -infinity
modelfit.all <- lme4::allFit(model)
ss <- summary(modelfit.all)
library(blme)
blmer(formula = y ~ 1 + (1 | group), weights = V,
      resid.prior = point(1.0), cov.prior = NULL)
nlme::lme(Reaction~Days,random=~1|Subject,
          data=lme4::sleepstudy,
          control=list(sigma=1e-8))
library(nlme)
lmeDF <- function(formula=distance~age,random=~1|Subject) {
     mod <- lme(formula,random,data=Orthodont)
     aa <- anova(mod)
    return(setNames(aa[,"denDF"],rownames(aa)))
}
lmeDF()
## (Intercept)         age 
##          80          80
lmeDF(random=~age|Subject) ## wrong!
## (Intercept)         age 
##          80          80
source("R/calcDenDF.R")
calcDenDF(~age,"Subject",nlme::Orthodont)
## (Intercept)         age 
##          80          80
calcDenDF(~age,data=nlme::Orthodont,random=~1|Subject)
## (Intercept)         age 
##          80          80
calcDenDF(~age,data=nlme::Orthodont,random=~age|Subject) ## off by 1
## (Intercept)         age 
##          81          25
library(lme4)
m2 <- lmer(Reaction~Days+(1|Subject)+(0+Days|Subject),sleepstudy,REML=FALSE)
m1 <- update(m2,.~Days+(1|Subject))
m0 <- lm(Reaction~Days,sleepstudy)
anova(m2,m1,m0) ## two sequential tests
## Data: sleepstudy
## Models:
## m0: Reaction ~ Days
## m1: Reaction ~ Days + (1 | Subject)
## m2: Reaction ~ Days + (1 | Subject) + (0 + Days | Subject)
##    npar    AIC    BIC  logLik deviance   Chisq Df Pr(>Chisq)    
## m0    3 1906.3 1915.9 -950.15   1900.3                          
## m1    4 1802.1 1814.8 -897.04   1794.1 106.214  1  < 2.2e-16 ***
## m2    5 1762.0 1778.0 -876.00   1752.0  42.075  1  8.782e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(RLRsim)
## compare m0 and m1
exactLRT(m1,m0)
## 
##  simulated finite sample distribution of LRT. (p-value based on 10000
##  simulated values)
## 
## data:  
## LRT = 106.21, p-value < 2.2e-16
## compare m1 and m2
mA <- update(m2,REML=TRUE)
m0B <- update(mA, . ~ . - (0 + Days|Subject))
m.slope  <- update(mA, . ~ . - (1|Subject))
exactRLRT(m0=m0B,m=m.slope,mA=mA)
## 
##  simulated finite sample distribution of RLRT.
##  
##  (p-value based on 10000 simulated values)
## 
## data:  
## RLRT = 42.796, p-value < 2.2e-16
(pb <- pbkrtest::PBmodcomp(m2,m1,seed=101))
## Bootstrap test; time: 14.57 sec; samples: 1000; extremes: 0;
## Requested samples: 1000 Used samples: 501 Extremes: 0
## large : Reaction ~ Days + (1 | Subject) + (0 + Days | Subject)
## Reaction ~ Days + (1 | Subject)
##          stat df   p.value    
## LRT    42.075  1 8.782e-11 ***
## PBtest 42.075     0.001992 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(nlme) 
fm1 <- lme(distance ~ age*Sex, random = ~ 1 + age | Subject,
           data = Orthodont) 
plot(Orthodont,asp="fill") ## plot responses by individual
## note that expand.grid() orders factor levels by *order of
## appearance* -- must match levels(Orthodont$Sex)
newdat <- expand.grid(age=c(8,10,12,14), Sex=c("Female","Male")) 
newdat$pred <- predict(fm1, newdat, level = 0)

## [-2] drops response from formula
Designmat <- model.matrix(formula(fm1)[-2], newdat)
predvar <- diag(Designmat %*% vcov(fm1) %*% t(Designmat)) 
newdat$SE <- sqrt(predvar) 
newdat$SE2 <- sqrt(predvar+fm1$sigma^2)

library(ggplot2) 
pd <- position_dodge(width=0.4) 
g0 <- ggplot(newdat,aes(x=age,y=pred,colour=Sex))+ 
   geom_point(position=pd)
cmult <- 2  ## could use 1.96 instead
g0 + geom_linerange(aes(ymin=pred-cmult*SE,ymax=pred+cmult*SE), position=pd)
## prediction intervals 
g0 + geom_linerange(aes(ymin=pred-cmult*SE2,ymax=pred+cmult*SE2), position=pd) 
library(lme4)
library(ggplot2)
data("Orthodont",package="MEMSS")
fm1 <- lmer(
    formula = distance ~ age*Sex + (age|Subject)
    , data = Orthodont
)
newdat <- expand.grid(
    age=c(8,10,12,14)
    , Sex=c("Female","Male")
    , distance = 0
)
newdat$distance <- predict(fm1,newdat,re.form=NA)
mm <- model.matrix(terms(fm1),newdat)
## or newdat$distance <- mm %*% fixef(fm1)
pvar1 <- diag(mm %*% tcrossprod(vcov(fm1),mm))
tvar1 <- pvar1+VarCorr(fm1)$Subject[1]  ## must be adapted for more complex models
cmult <- 2 ## could use 1.96
newdat <- data.frame(
    newdat
    , plo = newdat$distance-cmult*sqrt(pvar1)
    , phi = newdat$distance+cmult*sqrt(pvar1)
    , tlo = newdat$distance-cmult*sqrt(tvar1)
    , thi = newdat$distance+cmult*sqrt(tvar1)
)
#plot confidence
g0 <- ggplot(newdat, aes(x=age, y=distance, colour=Sex))+geom_point()
g0 + geom_pointrange(aes(ymin = plo, ymax = phi))+
    labs(title="CI based on fixed-effects uncertainty ONLY")
#plot prediction
g0 + geom_pointrange(aes(ymin = tlo, ymax = thi))+
    labs(title="CI based on FE uncertainty + RE variance")
rm("Orthodont") ## clean up
library(glmmTMB)
data(Orthodont,package="nlme")
fm2 <- glmmTMB(distance ~ age*Sex + (age | Subject),
                data = Orthodont,
                family="gaussian")

## make prediction data frame
newdat <- expand.grid(age=c(8,10,12,14), Sex=c("Female","Male"))
## design matrix (fixed effects)
mm <- model.matrix(delete.response(terms(fm2)),newdat)
## linear predictor (for GLMMs, back-transform this with the
##  inverse link function (e.g. plogis() for binomial, beta;
##  exp() for Poisson, negative binomial
newdat$distance <- drop(mm %*% fixef(fm2)[["cond"]])
predvar <- diag(mm %*% vcov(fm2)[["cond"]] %*% t(mm))
newdat$SE <- sqrt(predvar) 
newdat$SE2 <- sqrt(predvar+sigma(fm2)^2)
library(ggplot2);  theme_set(theme_bw())
pd <- position_dodge(width=0.4)
g0 <- ggplot(Orthodont,aes(x=age,y=distance,colour=Sex))+
    stat_sum(alpha=0.2,aes(size=..n..))+
    scale_size_continuous(breaks=1:4,range=c(2,5))
g1 <- g0+geom_line(data=newdat,position=pd)+
    geom_point(data=newdat,shape=17,size=3,position=pd)
## confidence intervals
g2 <- g1 + geom_linerange(data=newdat,
                          aes(ymin=distance-2*SE,ymax=distance+2*SE),
                          lwd=2, position=pd)
## prediction intervals 
g2 + geom_linerange(data=newdat,
                    aes(ymin=distance-2*SE2,ymax=distance+2*SE2), position=pd)
## Warning: The dot-dot notation (`..n..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(n)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
library(lme4)
fm1 <- lmer(Reaction ~ Days + (Days|Subject), sleepstudy)
cV <- ranef(fm1, condVar = TRUE)   
ranvar <- attr(cV[[1]], "postVar")
sqrt(diag(ranvar[,,1]))
## [1] 12.070857  2.304839
ng <- dim(ranvar)[3]
np <- dim(ranvar)[2]
mm <- matrix(ranvar[cbind(rep(seq(np),ng),
             rep(seq(np),ng),
             rep(ng,each=np))],
       byrow=TRUE,
       nrow=ng)
vcov(fm1)[1,1]+mm[,1]
##  [1] 192.2807 192.2807 192.2807 192.2807 192.2807 192.2807 192.2807 192.2807
##  [9] 192.2807 192.2807 192.2807 192.2807 192.2807 192.2807 192.2807 192.2807
## [17] 192.2807 192.2807
library(sos); findFn("{power analysis} mixed simulation")
r2.corr.mer <- function(m) {
   lmfit <-  lm(model.response(model.frame(m)) ~ fitted(m))
   summary(lmfit)$r.squared
}
1-var(residuals(m))/var(model.response(model.frame(m)))
cor(model.response(model.frame(m)),predict(m,type="response"))^2
## n.b. have to set up a 3D warn array first ...
withCallingHandlers(tryCatch(fun(n=nvec[j],tau=tauvec[i],...),
                error = function(e) {
                  warn[k,i,j] <<- paste("ERROR:",e$message)
              NA_ans}),
               warning = function(w) {
                  warn[k,i,j] <<- w$message
                  invokeRestart("muffleWarning")
             })
sessionInfo()
## R Under development (unstable) (2025-07-18 r88431)
## Platform: x86_64-pc-linux-gnu
## Running under: Pop!_OS 22.04 LTS
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so;  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_CA.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_CA.UTF-8        LC_COLLATE=en_CA.UTF-8    
##  [5] LC_MONETARY=en_CA.UTF-8    LC_MESSAGES=en_CA.UTF-8   
##  [7] LC_PAPER=en_CA.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_CA.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: America/Toronto
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] ggplot2_3.5.2       RLRsim_3.1-8        nlme_3.1-168       
##  [4] dplyr_1.1.4         broom.mixed_0.2.9.7 glmmTMB_1.1.11.9000
##  [7] equatiomatic_0.3.7  lme4_1.1-37.9000    Matrix_1.7-3       
## [10] Cairo_1.6-2         pander_0.6.6        knitr_1.50         
## [13] rmarkdown_2.29     
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6        TMB_1.9.17          xfun_0.52          
##  [4] bslib_0.9.0         lattice_0.22-7      numDeriv_2016.8-1.1
##  [7] vctrs_0.6.5         tools_4.6.0         Rdpack_2.6.4       
## [10] generics_0.1.4      sandwich_3.1-1      parallel_4.6.0     
## [13] tibble_3.3.0        pkgconfig_2.0.3     RColorBrewer_1.1-3 
## [16] lifecycle_1.0.4     farver_2.1.2        compiler_4.6.0     
## [19] codetools_0.2-20    httpuv_1.6.16       htmltools_0.5.8.1  
## [22] sass_0.4.10         yaml_2.3.10         crayon_1.5.3       
## [25] later_1.4.2         pillar_1.11.0       furrr_0.3.1        
## [28] nloptr_2.2.1.9000   jquerylib_0.1.4     tidyr_1.3.1        
## [31] MASS_7.3-65         cachem_1.1.0        reformulas_0.4.1   
## [34] boot_1.3-31         multcomp_1.4-28     mime_0.13          
## [37] parallelly_1.45.0   tidyselect_1.2.1    digest_0.6.37      
## [40] mvtnorm_1.3-3       future_1.58.0       purrr_1.1.0        
## [43] listenv_0.9.1       labeling_0.4.3      forcats_1.0.0      
## [46] splines_4.6.0       fastmap_1.2.0       grid_4.6.0         
## [49] cli_3.6.5           magrittr_2.0.3      dichromat_2.0-0.1  
## [52] utf8_1.2.6          survival_3.8-3      TH.data_1.1-3      
## [55] broom_1.0.8         withr_3.0.2         scales_1.4.0       
## [58] promises_1.3.3      backports_1.5.0     estimability_1.5.1 
## [61] emmeans_1.11.2      globals_0.18.0      zoo_1.8-14         
## [64] coda_0.19-4.1       shiny_1.11.1        evaluate_1.0.4     
## [67] rbibutils_2.3       mgcv_1.9-3          rlang_1.1.6        
## [70] Rcpp_1.1.0          xtable_1.8-4        glue_1.8.0         
## [73] minqa_1.2.8         jsonlite_2.0.0      R6_2.6.1

formula	meaning
`(1\|group)`	random group intercept
`(x\|group)` = `(1+x\|group)`	random slope of x within group with correlated intercept
`(0+x\|group)` = `(-1+x\|group)`	random slope of x within group: no variation in intercept
`(1\|group) + (0+x\|group)`	uncorrelated random intercept and random slope within group
`(1\|site/block)` = `(1\|site)+(1\|site:block)`	intercept varying among sites and among blocks within sites (nested random effects)
`site+(1\|site:block)`	fixed effect of sites plus random variation in intercept among blocks within sites
`(x\|site/block)` = `(x\|site)+(x\|site:block)` = `(1 + x\|site)+(1+x\|site:block)`	slope and intercept varying among sites and among blocks within sites
`(x1\|site)+(x2\|block)`	two different effects, varying at different levels
`x*site+(x\|site:block)`	fixed effect variation of slope and intercept varying among sites and random variation of slope and intercept among blocks within sites
`(1\|group1)+(1\|group2)`	intercept varying among crossed random effects (e.g. site, year)

equation	formula
\(β_0 + β_{1}X_{i} + e_{si}\)	n/a (Not a mixed-effects model)
\((β_0 + b_{S,0s}) + β_{1}X_i + e_{si}\)	`∼ X + (1∣Subject)`
\((β_0 + b_{S,0s}) + (β_{1} + b_{S,1s}) X_i + e_{si}\)	`~ X + (1 + X∣Subject)`
\((β_0 + b_{S,0s} + b_{I,0i}) + (β_{1} + b_{S,1s}) X_i + e_{si}\)	`∼ X + (1 + X∣Subject) + (1∣Item)`
As above, but \(S_{0s}\), \(S_{1s}\) independent	`∼ X + (1∣Subject) + (0 + X∣ Subject) + (1∣Item)`
\((β_0 + b_{S,0s} + b_{I,0i}) + β_{1}X_i + e_{si}\)	`∼ X + (1∣Subject) + (1∣Item)`
\((β_0 + b_{I,0i}) + (β_{1} + b_{S,1s})X_i + e_{si}\)	`∼ X + (0 + X∣Subject) + (1∣Item)`

Method	Advantages	Disadvantages	Packages
Penalized quasi-likelihood	Flexible, widely implemented	Likelihood inference may be inappropriate; biased for large variance or small means	PROC GLIMMIX (SAS), GLMM (GenStat), glmmPQL (R:MASS), ASREML-R
Laplace approximation	More accurate than PQL	Slower and less flexible than PQL	glmer (R:lme4,lme4a), glmm.admb (R:glmmADMB), INLA, glmmTMB, AD Model Builder, HLM
Gauss-Hermite quadrature	More accurate than Laplace	Slower than Laplace; limited to 2‑3 random effects	PROC NLMIXED (SAS), glmer (R:lme4, lme4a), glmmML (R:glmmML), xtlogit (Stata)
Markov chain Monte Carlo	Highly flexible, arbitrary number of random effects; accurate	Slow, technically challenging, Bayesian framework	MCMCglmm (R:MCMCglmm), rstanarm (R), brms (R), MCMCpack (R), WinBUGS/OpenBUGS (R interface: BRugs/R2WinBUGS), JAGS (R interface: rjags/R2jags), AD Model Builder (R interface: R2admb), glmm.admb (post hoc MCMC after Laplace fit) (R:glmmADMB)

GLMM FAQ

Ben Bolker and others

19 Jul 2025

Introduction

Other sources of help

References

linear mixed models

web/open

books (dead-tree/closed)

Model definition

Model specification

Should I treat factor xxx as fixed or random?

Nested or crossed?

(When) can I include a predictor as both fixed and random?

Model extensions

Overdispersion

Testing for overdispersion/computing overdispersion factor

Fitting models with overdispersion?

Underdispersion

Gamma GLMMs

Beta GLMMs

Zero-inflation

Count data

Continuous data

Probability density of \(x\) zero or infinite

Probability density of \(x\) positive and finite

Tests for zero-inflation

Spatial and temporal correlation models, heteroscedasticity (“R-side” models)

Penalization/handling complete separation

Non-Gaussian random effects

Estimation

What methods are available to fit (estimate) GLMMs?

Troubleshooting

Convergence warnings

Singular fits

Setting residual variances to a fixed value (zero or other)

Other problems/lme4 error messages

REML for GLMMs

Model diagnostics

Inference and confidence intervals

Testing hypotheses

What are the p-values listed by summary(glmerfit) etc.? Are they reliable?

Methods for testing single parameters

Tests of effects (i.e. testing that several parameters are simultaneously zero)

Is the likelihood ratio test reliable for mixed models?

Why doesn’t lme4 display denominator degrees of freedom/p values? What other options do I have?

Df alternatives:

Testing significance of random effects

Standard errors of variance estimates

P-values: MCMC and parametric bootstrap

Markov chain Monte Carlo sampling:

Status of mcmcsamp

Parametric bootstrap

Predictions and/or confidence (or prediction) intervals on predictions

lme

lme4

glmmTMB

Confidence intervals on conditional means/BLUPs/random effects

lme4

Power analysis

Model selection and averaging

Can I use AIC for mixed models? How do I count the number of degrees of freedom for a random effect?

Model summaries (goodness-of-fit, decomposition of variance, etc.)

How do I compute a coefficient of determination (\(R^2\)), or an analogue, for (G)LMMs?

Problem

Simple/crude solutions

Sophisticated solutions

Variable importance

Do I have to specify the levels of fixed effects in lmer?

Miscellaneous/procedural

Pronunciation of lmer/glmer/etc.

Storing information

Mixed modeling packages

Which R packages (functions) fit GLMMs?

Should I use aov(), nlme, or lme4, or some other package?

linear and nonlinear mixed models

GLMMs

Additive and generalized-additive mixed models

Hierarchical GLMs

diagnostic and modeling frameworks

Other problems/`lme4` error messages

What are the p-values listed by `summary(glmerfit)` etc.? Are they reliable?

Why doesn’t `lme4` display denominator degrees of freedom/p values? What other options do I have?

Pronunciation of `lmer`/`glmer`/etc.

Should I use `aov()`, `nlme`, or `lme4`, or some other package?