Heteroscedastic Linear Mixed Models with glmmTMB(dispformula=) and nlme::lme(weights=varIdent(form=))

Author

Lukas Graz

Published

December 4, 2024

Summary:

Example:

glmmTMB(y ~ trt + (1|id), data = D, dispformula = ~trt)

nlme::lme(y ~ trt, random = ~1|id, 
          weights = varIdent(form = ~1|trt))

Note: Before modelling heteroscedastic LMM try:

R Functions

Data Simulation

Code
library(simr)
library(glmmTMB) 
library(nlme)
library(lmerTest)
library(car)
library(ggplot2)


# Design Matrix
X <- data.frame(
  id  = as.factor(rep(1:40, each = 10)),
  trt = as.factor(rep(c("A", "B", "C", "D"), each = 100))
)

# Make it unbalanced
set.seed(123)
X <- X[-sample(101:300, 130, replace=FALSE),]
# xtabs(~id+trt,X)

# create NULL-lmer-model
model <- makeLmer(y ~ trt + (1|id), 
  fixef=  c(0,0,0,0),
  VarCorr = 9,
  data = X, 
  sigma = 1
  )

# heteroscedastic response
get_hetero_data <- function(n) {
  D <- X
  D$y <- simulate(model, 1)[[1]]
  grpCD <- D$trt %in% c("C", "D")
  D$y[grpCD] <- D$y[grpCD] + rnorm(sum(grpCD), 0, 4)
  return(D)
}

# homoscedastic response
get_homo_data <- function(n) {
  D <- X
  D$y <- simulate(model, 1)[[1]]
  return(D)
}

Analysis

analysis functions code
anova.glmmTMB <- glmmTMB:::Anova.glmmTMB

analysis <- function(nsim = 500, 
                    data_fun = get_hetero_data, 
                    model_fun = glmmTMB, 
                    disp = FALSE){
  # seed <- 123
  PVALs <- parallel::mclapply(1:nsim, \(seed){
      set.seed(seed)
      D <- data_fun()
      if(disp){
        if(identical(model_fun,nlme::lme)){
          fit <- lme(y ~ trt, random = ~1|id, data = D, weights = varIdent(form = ~1|trt))
        } else {
          fit <- model_fun(y ~ trt + (1|id), data = D,  dispformula = ~trt)
        }
      } else {
        if(identical(model_fun,nlme::lme)){
          fit <- lme(y ~ trt, random = ~1|id, data = D)
        } else {
          fit <- model_fun(y ~ trt + (1|id), data = D)
        }
      }
      A <- anova(fit)
      C <- if(identical(model_fun, glmmTMB)) coef(summary(fit))$cond else coef(summary(fit))
      c(anova=A["trt", ncol(A)], `trt(C-A)`=C["trtC", ncol(C)])
  }) |> simplify2array()
  print(knitr::kable(as.data.frame(rbind(
    `fraction(PVALs < .05)` = rowMeans(PVALs < 0.05),
    `binom.test(p = .05)` = apply(PVALs < 0.05, 1, function(x) binom.test(sum(x), nsim, 0.05)$p.value) |> signif(2)
  ))))
  
  PVALS_long <- tidyr::pivot_longer(as.data.frame(t(PVALs)), cols = everything(), names_to = "test", values_to = "PVALs")
  ggplot(PVALS_long, aes(PVALs, group=test, col=test)) + 
    stat_ecdf() + geom_abline(slope=1, intercept=0, col="black") +
    geom_vline(xintercept = 0.05, col="black", linetype="dashed") + scale_x_sqrt() +
     scale_y_sqrt() + ggtitle("ECDF of p-values (sqrt-sqrt scale)")
}

Visualize Data

Raw heteroscedastic Data

set.seed(123)
Dat_hetero <- get_hetero_data()
ggplot(Dat_hetero, aes(x=trt, y=y, group=id)) + geom_boxplot()

Raw homoscedastic Data

ggplot(get_homo_data(), aes(x=trt, y=y, group=id)) + geom_boxplot()

Simulated Data from glmmTMB with Dispersion

Dat_hetero$y_disp <- simulate(
  glmmTMB(y ~ 0+trt + (1|id), data = Dat_hetero, dispformula = ~trt), 1)[[1]]
ggplot(Dat_hetero, aes(x=trt, y=y_disp, group=id)) + geom_boxplot()

Simulated Data from glmmTMB without Dispersion

Dat_hetero$y_nodisp <- simulate(
  glmmTMB(y ~ 0+trt + (1|id), data = Dat_hetero                    ), 1)[[1]]
ggplot(Dat_hetero, aes(x=trt, y=y_nodisp, group=id)) + geom_boxplot()

Type I error rate (on heteroscedastic data)

glmmTMB(y ~ trt + (1|id), dispformula = ~trt)

analysis(data_fun = get_hetero_data, model_fun = glmmTMB, disp = TRUE)
anova trt(C-A)
fraction(PVALs < .05) 1.02e-01 0.072
binom.test(p = .05) 2.60e-06 0.030

glmmTMB(y ~ trt + (1|id))

analysis(data_fun = get_hetero_data, model_fun = glmmTMB)
anova trt(C-A)
fraction(PVALs < .05) 1.04e-01 0.09600
binom.test(p = .05) 9.00e-07 0.00002

lmerTest::lmer(y ~ trt + (1|id))

analysis(data_fun = get_hetero_data, model_fun = lmerTest::lmer)
anova trt(C-A)
fraction(PVALs < .05) 0.058 0.074
binom.test(p = .05) 0.410 0.018

lme(y~trt,random=~1|id,weights=varIdent(form=~1|trt))

analysis(data_fun = get_hetero_data, model_fun = nlme::lme, disp = TRUE)
anova trt(C-A)
fraction(PVALs < .05) 0.064 0.056
binom.test(p = .05) 0.150 0.540

lme(y ~ trt, random = ~1|id)

analysis(data_fun = get_hetero_data, model_fun = nlme::lme)
anova trt(C-A)
fraction(PVALs < .05) 0.058 0.074
binom.test(p = .05) 0.410 0.018

Type I error rate (on homoskedastic data)

glmmTMB(y ~ trt + (1|id), dispformula = ~trt)

analysis(data_fun = get_homo_data, model_fun = glmmTMB, disp = TRUE)
anova trt(C-A)
fraction(PVALs < .05) 0.112 0.082
binom.test(p = .05) 0.000 0.002

glmmTMB(y ~ trt + (1|id))

analysis(data_fun = get_homo_data, model_fun = glmmTMB)
anova trt(C-A)
fraction(PVALs < .05) 0.114 0.08600
binom.test(p = .05) 0.000 0.00064

lmerTest::lmer(y ~ trt + (1|id))

analysis(data_fun = get_homo_data, model_fun = lmerTest::lmer)
anova trt(C-A)
fraction(PVALs < .05) 0.07 0.054
binom.test(p = .05) 0.05 0.680

lme(y~trt,random=~1|id,weights=varIdent(form=~1|trt))

analysis(data_fun = get_homo_data, model_fun = nlme::lme, disp = TRUE)
anova trt(C-A)
fraction(PVALs < .05) 0.068 0.054
binom.test(p = .05) 0.080 0.680

lme(y ~ trt, random = ~1|id)

analysis(data_fun = get_homo_data, model_fun = nlme::lme)
anova trt(C-A)
fraction(PVALs < .05) 0.07 0.054
binom.test(p = .05) 0.05 0.680