############################################################################ # Open Online R Stream (https://www.wvbauer.com/doku.php/live_streams) # # By: Wolfgang Viechtbauer (https://www.wvbauer.com) # Date: 2021-02-18 # # Topic(s): # - simulating data # - simulation-based power analysis # - other types of simulation studies # # last updated: 2021-02-18 ############################################################################ # simulate 100 values from a normal distribution with true mean = 0 and true SD = 1 # https://en.wikipedia.org/wiki/Normal_distribution # note: mean=0, sd=1 are the defaults, so could be left out x <- rnorm(100, mean=0, sd=1) # observed mean and SD mean(x) sd(x) # histogram of the values hist(x) # simulate 100 values from a chi-square distribution with 5 degrees of freedom # https://en.wikipedia.org/wiki/Chi-square_distribution x <- rchisq(100, df=5) hist(x) # simulate 100 values from a Bernoulli distribution (e.g., flip a coin once) # https://en.wikipedia.org/wiki/Bernoulli_distribution x <- rbinom(100, size=1, prob=0.5) x table(x) # simulate 100 values from a binomial distribution (e.g., flip a coin 5 times) # https://en.wikipedia.org/wiki/Binomial_distribution x <- rbinom(100, size=5, prob=0.5) x table(x) # as 'size' gets large, a binomial distribution looks more and more like a normal distribution x <- rbinom(10000, size=50, prob=0.5) hist(x, breaks=30) # gamma and beta distributions # https://en.wikipedia.org/wiki/Gamma_distribution # https://en.wikipedia.org/wiki/Beta_distribution x <- rgamma(100, shape=1, scale=5) hist(x) x <- rbeta(100, shape1=3, shape2=3) hist(x) # look at help file for the distributions in the stats package help(distributions) # repeatedly generate 10 values from a standard normal distribution rnorm(10) rnorm(10) rnorm(10) # set seed of the random number generator; then things become reproducible set.seed(1234) rnorm(10) rnorm(10) rnorm(10) set.seed(1234) rnorm(10) rnorm(10) rnorm(10) ############################################################################ # an example of a two-independent samples t-test # H0: mu1 = mu2 (the true means are the same in the two groups) x1 <- c(2,4,2,3,4,5,2,3,4) x2 <- c(3,2,4,5,6,5,4,6,5,3,4,7,6) t.test(x1, x2, var.equal = TRUE) # now let's do a simulation to examine the power of the t-test when # n1 = n2 = 20 # mu1 = 15, mu2 = 20 # sigma = 5 iters <- 10000 pval <- rep(NA, iters) for (i in 1:iters) { x1 <- rnorm(70, mean=0, sd=1) x2 <- rnorm(70, mean=0.5, sd=1) res <- t.test(x1, x2, var.equal = TRUE) pval[i] <- res$p.value } round(mean(pval <= .05), digits=2) # this gives the 'empirical rejection rate' # strictly speaking, don't need to run a simulation; for such simple cases, we # have equations to figure the exact power; for example: power.t.test(n=70, delta=0.5, sd=1) # can also use this function to determine n for a desired power power.t.test(delta=0.5, sd=1, power=.80) # but this function cannot do power calculations for Welch's t-test # with our simulation approach, we can! iters <- 10000 pval <- rep(NA, iters) for (i in 1:iters) { x1 <- rnorm(70, mean=0, sd=1) x2 <- rnorm(70, mean=0.5, sd=1) res <- t.test(x1, x2, var.equal = FALSE) pval[i] <- res$p.value } round(mean(pval <= .05), digits=2) # apparently, we end up with essentially the same power for this scenario ############################################################################ # now, let's do a simulation study to examine how robust Student's t-test # is to violations of the assumption of equal variances in the two groups iters <- 10000 pval.s <- rep(NA, iters) pval.w <- rep(NA, iters) for (i in 1:iters) { x1 <- rnorm(30, mean=0, sd=5) x2 <- rnorm(70, mean=0, sd=1) res <- t.test(x1, x2, var.equal = TRUE) pval.s[i] <- res$p.value res <- t.test(x1, x2, var.equal = FALSE) pval.w[i] <- res$p.value } round(mean(pval.s <= .05), digits=2) round(mean(pval.w <= .05), digits=2) ############################################################################ # a more complex case of a simulation to determine power # n = 100 subjects, half in group 0, the other half in group 1, are # measured tp = 3 times n <- 100 tp <- 3 dat <- data.frame(id = rep(1:n, each=tp), time = 1:tp, grp = rep(c(0,1), each=n/2*tp)) dat dat$ui <- rep(rnorm(n, mean=0, sd=1), each=tp) dat dat$eij <- rnorm(n*tp, mean=0, sd=1) dat dat$yij <- dat$grp * dat$time * 0.4 + dat$ui + dat$eij dat # in essence, we are assuming this for the average in the two groups over time # | ----- } # | ----- } group = 1 # | ----- } # | # | ---------------- } group = 0 # | # +------------------ # time1 time2 time3 # turn id, grp, and time into factors dat$id <- factor(dat$id) dat$grp <- factor(dat$grp) dat$time <- factor(dat$time) # analyze these data with a repeated-measures ANOVA # one between-subjects factor (grp) and one within-subjects factor (time) res <- aov(yij ~ time + grp + time:grp + Error(id/time), data=dat) summary(res) # analyze these data with a mixed-effects / multilevel model (using a # relatively simle 'random intercepts' model) library(nlme) res <- lme(yij ~ time + grp + time:grp, random = ~ 1 | id, data=dat) summary(res) anova(res) # p-value for the interaction is exactly the same as with aov() # now let's do a simulation to examine the power to detect the interaction iters <- 1000 n <- 120 tp <- 3 dat <- data.frame(id = rep(1:n, each=tp), time = 1:tp, grp = rep(c(0,1), each=n/2*tp)) dat$id.fac <- factor(dat$id) dat$grp.fac <- factor(dat$grp) dat$time.fac <- factor(dat$time) pval <- rep(NA, iters) for (i in 1:iters) { dat$ui <- rep(rnorm(n, mean=0, sd=1), each=tp) dat$eij <- rnorm(n*tp, mean=0, sd=1) dat$yij <- dat$grp * dat$time * 0.4 + dat$ui + dat$eij res <- aov(yij ~ time.fac + grp.fac + time.fac:grp.fac + Error(id.fac/time.fac), data=dat) pval[i] <- summary(res)[[2]][[1]][["Pr(>F)"]][2] } # empirical rejection rate for the grp:time interaction round(mean(pval <= .05), digits=2) ############################################################################