Pairwise T-tests can be improved by including more groups
Author
Lukas Graz
Published
April 14, 2023
TLDR: more groups make the test more stable and slightly more powerful (for low sample sizes). However, be aware that this assumes homoscedasticity (i.e., equal variance for all groups).
get_data <-function(n_per_group =10, n_groups =15) { grp <-as.factor(c(rep(1:n_groups, each = n_per_group))) data <-data.frame(group = grp, y =rnorm(n_groups * n_per_group, sd =2) +as.numeric(grp))}# illustrate datadata <-get_data()set.seed(123)boxplot(y ~ group, data)
# Pairwise T-test using residuals of 2-15 groups# plot resultspvals_mat <-replicate(20, { data <-get_data() pvals <-sapply(2:15, function(i) {coefficients(summary(lm(y ~ group, data, subset = group %in%1:i)))["group2", "Pr(>|t|)"] })})# plot each collumn of pvals_mat with matplotmatplot(pvals_mat, type ="l", xlab ="How Many Groups Considered +1")
# simulate power# 2 groups vs 5 groupsn_sim <-2000results <-matrix(NA, nrow = n_sim, ncol =2)for (i in1:n_sim) { data <-get_data(n_per_group =3, n_groups =5) results[i, 1] <-coefficients(summary(lm(y ~ group, data, subset = group %in%1:2)))["group2", "Pr(>|t|)"] results[i, 2] <-coefficients(summary(lm(y ~ group, data, subset = group %in%1:5)))["group2", "Pr(>|t|)"]}power <-apply(results <0.05, 2, mean)names(power) <-c("power using 2 groups,", " 5 groups")power