<- 200
nsim library(coin)
# functions
<- 40 # per group
n <- list(
laws norm = function(mean=0, sd=1) mean + sd * rnorm(n, 0, 1),
logn = function(mean=0, sd=1) mean + sd * (rlnorm(n, sdlog=1) - 1.65) / 1.95,
exp1 = function(mean=0, sd=1) mean + sd * (rexp(n) - 1),
exp2 = function(mean=0, sd=1) mean + sd * (rexp(n)^2 - 2) / 3.94,
emix = function(mean=0, sd=1) mean + sd * (c(rep(0, n/2), rexp(n/2)) - 0.5) / 0.84
)
Define a list of laws (i.e., distributions) with mean
and sd
.
set.seed(123)
options(digits=3)
<- function(){
get_data_all_laws <- lapply(names(laws), function(fname){
dat <- laws[[fname]]
f data.frame(y=f(), fun=fname)
}) <- do.call(rbind, dat)
dat
dat
}boxplot(y ~ fun, get_data_all_laws(), main= "Laws illustrated")
Verify mean and standard deviation
sapply(laws, function(f) mean(replicate(10000, mean(f(0, 1)))))
norm logn exp1 exp2 emix
1.55e-03 -2.89e-03 -1.66e-03 -8.53e-05 -1.32e-03
sapply(laws, function(f) mean(replicate(10000, mean(f(1, 2)))))
norm logn exp1 exp2 emix
1.003 1.002 0.998 0.994 0.997
sapply(laws, function(f) mean(replicate(10000, sd(f(0, 1)))))
norm logn exp1 exp2 emix
0.994 1.008 0.977 0.999 1.000
sapply(laws, function(f) mean(replicate(10000, sd(f(1, 2)))))
norm logn exp1 exp2 emix
1.99 2.01 1.96 1.99 2.00
Define the tests used
<- function(d) t.test(y ~ group,d)$p.value
pval_t <- function(d) coin::pvalue(coin::wilcox_test(y ~ group, d))
pval_w <- function(d) coin::pvalue(coin::median_test(y ~ group, d)) pval_m
# retuns data with f1() for group "A" and f2(mean2, sd2) for group "B"
<- function(f1, f2, mean2=0, sd2=1){
get_data <- rbind(
d data.frame(
y=f1(),
group="A"
),data.frame(
y=f2(mean2, sd2),
group="B"
))$group <- as.factor(d$group)
d
d
}
# get power of tests
<- function(f1, f2, nsim=1000, mean2=0, sd2=1) {
get_power <- replicate(nsim, get_data(f1, f2, mean2=mean2, sd2=sd2), simplify = FALSE)
data_list c(t = mean(sapply(data_list, pval_t) < 0.05),
w = mean(sapply(data_list, pval_w) < 0.05),
m = mean(sapply(data_list, pval_m) < 0.05))
}
# get all combinations of functions
<- expand.grid(names(laws), names(laws)) |> as.matrix()
fun_comb <- apply(fun_comb, 1, paste0, collapse="_")
rnames
<- function(nsim=1000, mean2=0, sd2=1) {
sim <- split(fun_comb, row(fun_comb))
fun_comb_list <- parallel::mclapply(fun_comb_list, function(f_names){
coverage <- laws[[f_names[1]]]
f1 <- laws[[f_names[2]]]
f2 get_power(f1, f2, nsim=nsim, mean2=mean2, sd2=sd2)
})<- do.call(rbind, coverage)
coverage rownames(coverage) <- rnames
colnames(coverage) <- paste0(
colnames(coverage), " ", as.character(mean2), " ", as.character(sd2))
|> as.data.frame()
coverage }
Simulation
set.seed(4321)
<- sim(nsim=nsim) null
set.seed(4321)
<- sim(nsim=nsim, sd2=2) s_diff
set.seed(4321)
<- sim(nsim=nsim, sd2=5) s_difff
set.seed(4321)
<- sim(nsim=nsim, mean2 = 0.1) mu_diff
set.seed(4321)
<- sim(nsim=nsim, mean2 = 0.2) mu_difff
<- cbind(
results
null,
s_diff,
s_difff,
mu_diff,
mu_difff
)* 100 results
t 0 1 w 0 1 m 0 1 t 0 2 w 0 2 m 0 2 t 0 5 w 0 5 m 0 5 t 0.1 1 w 0.1 1
norm_norm 4.0 3.5 5.5 3.0 4.5 4.0 2.0 6.5 4.0 8.5 7.5
logn_norm 5.0 11.5 28.5 6.0 7.5 17.0 5.0 8.0 12.5 10.5 20.5
exp1_norm 2.5 7.0 17.0 5.0 7.5 13.5 3.0 8.0 8.5 11.0 19.5
exp2_norm 9.0 15.0 47.5 6.5 7.0 26.5 3.0 7.5 15.5 10.0 29.0
emix_norm 2.5 11.0 49.5 3.5 8.0 25.5 5.5 10.5 9.5 8.0 19.5
norm_logn 5.5 13.5 29.5 8.0 42.5 59.5 10.5 75.0 74.5 3.0 4.5
logn_logn 3.0 5.5 4.5 4.0 54.0 32.0 9.0 70.5 63.0 8.0 17.0
exp1_logn 7.0 8.0 4.5 10.0 57.5 33.0 12.0 72.0 62.0 7.0 21.0
exp2_logn 2.5 8.0 7.5 6.5 55.5 36.0 10.0 73.5 66.5 9.5 13.0
emix_logn 2.0 3.0 11.0 7.0 61.0 0.5 8.0 74.5 15.5 1.0 29.5
norm_exp1 3.5 9.0 17.5 8.5 26.0 33.5 10.5 48.5 47.5 6.5 6.0
logn_exp1 3.5 6.0 5.5 6.0 35.5 18.0 6.5 45.0 39.0 6.0 6.0
exp1_exp1 3.5 5.5 5.5 6.0 28.5 13.0 8.5 54.5 46.0 7.5 10.0
m 0.1 1 t 0.2 1 w 0.2 1 m 0.2 1
norm_norm 3.5 15.5 14.0 10.0
logn_norm 44.5 21.5 40.5 63.0
exp1_norm 35.0 14.5 29.5 43.5
exp2_norm 64.0 18.0 43.5 76.5
emix_norm 59.0 13.0 37.5 76.5
norm_logn 12.0 11.0 8.5 9.5
logn_logn 9.0 16.5 55.0 27.0
exp1_logn 5.0 12.5 36.0 12.5
exp2_logn 21.5 19.5 47.0 56.5
emix_logn 32.5 14.0 93.5 58.5
norm_exp1 10.5 13.0 4.5 2.0
logn_exp1 9.0 12.0 18.5 16.5
exp1_exp1 8.5 14.0 35.5 17.0
[ reached 'max' / getOption("max.print") -- omitted 12 rows ]
Confidence intervals of ratios
<- function(ratio, nsim){
prop <- prop.test(round(ratio*nsim), nsim)$conf.int[1:2]
confint_ names(confint_) <- c("lower", "upper")
c(
ratio= ratio,
confint_
)}sapply(c(0:4/40, 3:10/20), prop, nsim) |> as.data.frame() * 100
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13
ratio 0.00 2.500 5.00 7.50 10.00 15.0 20.0 25.0 30.0 35.0 40.0 45.0 50.0
lower 0.00 0.924 2.56 4.41 6.37 10.5 14.8 19.3 23.8 28.5 33.2 38.0 43.1
upper 2.35 6.056 9.27 12.30 15.23 20.9 26.4 31.7 36.9 42.1 47.2 52.2 56.9
this gives an idea of the uncertainty of a ratio given 200 simulations
redo analysis but with groupwise equal medians
Define a list of laws (i.e., distributions) with median
and sd
# functions
<- 40 # per group
n # mean == to keep notation consistent
<- list(
laws norm = function(mean=0, sd=1) mean + sd * rnorm(n, 0, 1)
logn = function(mean=0, sd=1) mean + sd * (rlnorm(n, sdlog=1) - 1.02) / 1.95
,exp1 = function(mean=0, sd=1) mean + sd * (rexp(n) - 0.706)
,exp2 = function(mean=0, sd=1) mean + sd * (rexp(n)^2 - 0.523) / 3.94
,emix = function(mean=0, sd=1) mean + sd * (c(rep(0, n/2), rexp(n/2)) -0.025) / 0.84
, )
verify median and standard deviation
sapply(laws, function(f) mean(replicate(10000, median(f(0, 1)))))
norm logn exp1 exp2 emix
-0.000142 -0.000216 0.002144 0.000204 0.000246
sapply(laws, function(f) mean(replicate(10000, median(f(1, 2)))))
norm logn exp1 exp2 emix
1.000 1.001 0.998 1.000 0.999
sapply(laws, function(f) mean(replicate(10000, sd(f(0, 1)))))
norm logn exp1 exp2 emix
0.996 1.006 0.978 1.010 1.003
sapply(laws, function(f) mean(replicate(10000, sd(f(1, 2)))))
norm logn exp1 exp2 emix
1.99 2.00 1.95 1.99 2.01
boxplot(y ~ fun, get_data_all_laws(), main= "Equal Medians (in expectation)")
set.seed(4321)
<- sim(nsim=nsim) null_median
set.seed(4321)
<- sim(nsim=nsim, sd2=2) s_diff_median
set.seed(4321)
<- sim(nsim=nsim, sd2=5) s_difff_median
set.seed(4321)
<- sim(nsim=nsim, mean2 = 0.1) mu_diff_median
set.seed(4321)
<- sim(nsim=nsim, mean2 = 0.2) mu_difff_median
<- cbind(
results_median
null_median,
s_diff_median,
s_difff_median,
mu_diff_median,
mu_difff_median
) * 100 results_median
t 0 1 w 0 1 m 0 1 t 0 2 w 0 2 m 0 2 t 0 5 w 0 5 m 0 5 t 0.1 1 w 0.1 1
norm_norm 2.0 2.0 2.5 5.0 6.5 5.5 9.5 11.5 11.5 7.5 7.0
logn_norm 26.0 16.0 5.0 11.5 16.5 11.0 5.0 8.5 9.0 14.0 8.0
exp1_norm 21.5 10.5 6.5 13.0 12.0 8.0 3.0 6.0 8.5 10.5 6.5
exp2_norm 27.5 19.5 7.0 18.0 17.0 10.5 5.5 10.0 13.5 18.5 12.0
emix_norm 75.0 53.0 1.0 30.5 27.5 2.5 12.0 15.5 4.5 55.5 35.5
norm_logn 22.5 11.0 6.0 34.5 13.0 3.5 43.0 9.0 7.5 33.5 27.5
logn_logn 3.5 5.5 3.5 7.0 7.5 6.0 23.5 11.5 10.0 6.5 15.5
exp1_logn 6.0 9.0 6.5 15.5 8.0 5.5 25.5 10.5 8.0 7.5 20.5
exp2_logn 3.0 16.5 5.5 4.5 14.5 6.5 21.5 14.5 11.5 6.0 3.5
emix_logn 26.5 64.5 0.0 4.5 33.0 0.5 17.0 17.0 1.0 9.0 25.0
norm_exp1 27.0 14.5 8.5 39.5 9.0 3.5 46.5 10.0 14.5 44.5 30.0
logn_exp1 4.0 8.0 4.0 13.0 16.5 10.5 21.5 10.5 13.5 8.5 6.0
exp1_exp1 2.5 3.0 5.5 16.0 7.5 8.5 26.5 9.5 7.5 7.0 9.5
m 0.1 1 t 0.2 1 w 0.2 1 m 0.2 1
norm_norm 6.5 12.5 11.5 11.0
logn_norm 6.0 6.5 8.5 11.5
exp1_norm 7.5 7.5 6.5 13.0
exp2_norm 7.5 5.0 5.0 20.5
emix_norm 3.5 34.0 16.5 3.0
norm_logn 10.0 64.5 53.5 17.5
logn_logn 9.5 16.0 51.0 24.5
exp1_logn 9.0 15.5 50.0 24.5
exp2_logn 17.5 13.0 24.0 37.0
emix_logn 1.0 2.5 1.0 3.0
norm_exp1 8.5 57.0 41.5 13.0
logn_exp1 7.5 10.5 13.5 13.5
exp1_exp1 5.0 17.0 30.0 15.5
[ reached 'max' / getOption("max.print") -- omitted 12 rows ]