############################################################################ # Code to go along with the talk "An overview of recent updates to the metafor # package" by Wolfgang Viechtbauer (https://www.wvbauer.com) presented at the # 2023 Annual Conference of the Society for Research Synthesis Methodology on # 2023-06-06 ############################################################################ # install the metafor package #install.packages("metafor") # load the metafor package library(metafor) ############################################################################ ### standard random-effects and mixed-effects meta-regression models # look at the BCG dataset (Colditz et al., 1994) dat.bcg # tpos - number of TB positive cases in the treated (vaccinated) group # tneg - number of TB negative cases in the treated (vaccinated) group # cpos - number of TB positive cases in the control (non-vaccinated) group # cneg - number of TB negative cases in the control (non-vaccinated) group # # these variables denote the values in 2x2 tables of the form: # # TB+ TB- # +------+------+ # treated | tpos | tneg | # +------+------+ # control | cpos | cneg | # +------+------+ # # year - publication year of the study # ablat - absolute latitude of the study location (in degrees) # alloc - method of treatment allocation (random, alternate, or systematic assignment) # calculate log risk ratios and corresponding sampling variances # note: slab for adding 'study labels' to the dataset dat <- escalc(measure="RR", ai=tpos, bi=tneg, ci=cpos, di=cneg, slab=paste(author, ", ", year, sep=""), data=dat.bcg) dat # fit a random-effects model (log risk ratios and variances as input) res <- rma(yi, vi, data=dat) res # predicted pooled risk ratio and corresponding CI/PI predict(res, transf=exp, digits=2) # forest plot forest(res, atransf=exp, at=log(c(.05, .25, 1, 4)), xlim=c(-16,6), ilab=cbind(tpos, tneg, cpos, cneg), ilab.xpos=c(-9.5,-8,-6,-4.5), header="Author(s) and Year", shade="zebra") text(c(-9.5,-8,-6,-4.5), 15, c("TB+", "TB-", "TB+", "TB-"), font=2) text(c(-8.75,-5.25), 16, c("Vaccinated", "Control"), font=2) # funnel plot funnel(res, ylim=c(0,0.8), las=1, digits=c(0,1)) # fit a mixed-effects meta-regression model with absolute latitude as # moderator (and using the Knapp & Hartung adjustment for tests/CIs) res <- rma(yi, vi, mods = ~ ablat, data=dat, test="knha") res # bubble plot (with points outside of the prediction interval labeled) regplot(res, mod="ablat", pi=TRUE, xlab="Absolute Latitude", xlim=c(0,60), predlim=c(0,60), transf=exp, refline=1, legend=TRUE, label="piout", labsize=0.9, bty="l", las=1, digits=1) ############################################################################ ### binomial-normal models with RRs/RDs # fit a random-effects model (using ML estimation) rma(yi, vi, data=dat, method="ML") # fit a binomial-normal model using a log (not logit!) link rma.glmm(measure="RR", ai=tpos, bi=tneg, ci=cpos, di=cneg, data=dat) # cross-check results using GLMMadaptive (instead of lme4) rma.glmm(measure="RR", ai=tpos, bi=tneg, ci=cpos, di=cneg, data=dat, control=list(package="GLMMadaptive")) ############################################################################ ### location-scale models for meta-analysis # data from 48 studies examining the effectiveness of 'writing-to-learn' # interventions on academic achievement (Bangert-Drowns et al., 2004) dat <- dat.bangertdrowns2004 # look at a subset of the data dat[c(1:6,46:48),c(1:3,13:16)] # yi - standardized mean differences (positive values indicate a higher # mean level of academic achievement in the intervention group) # vi - corresponding sampling variances # subject - subject matter on which the students were tested # collapse subjects down to three main groups: Math, Sci(ence), and Soc(ial Science) dat$subject <- factor(ifelse(dat$subject %in% c("Algebra", "Calculus", "Math", "Math in Science", "Statistics"), "Math", ifelse(dat$subject %in% c("Chemistry", "Comp Science and Chemistry", "Biology", "Earth Science", "Natural Resources", "Nursing", "Science"), "Sci", "Soc"))) # fit a meta-analytic location-scale model res <- rma(yi, vi, mods = ~ subject + ni, scale = ~ subject + ni, data=dat) res # the scale part of the model: # # log(tau^2) = -3.1022 + 2.2330*subjectSci + 0.4011*subjectSoc + -0.0054*ni # predicted value of tau^2 for Math, Sci, and Soc when ni=120 (~ mean n) Xnew <- cbind(c(0,1,0),c(0,0,1),120) pred <- predict(res, newscale=Xnew, transf=exp, digits=3) pred$slab <- c("Math", "Sci", "Soc") pred # so we see much greater heterogeneity in the effectiveness of such # interventions in studies focused on science subjects # the location part of the model: # # E(SMD) = 0.3443 + -0.0798*subjectSci + -0.1087*subjectSoc + -0.0006*ni # predicted value of E(SMD) for Math, Sci, and Soc when ni=120 (~ mean n) pred <- predict(res, newmods=Xnew, newscale=Xnew, digits=2) pred$slab <- c("Math", "Sci", "Soc") pred # so we see relatively little differences in the (average) effectiveness of # such interventions across the different subject categories ############################################################################ ### variance inflation factors for meta-regression models # look at a subset of the data dat[c(1:6,46:48),c(1:3,5,9:11,13,15:16)] # fit a mixed-effects meta-regression model res <- rma(yi, vi, mods = ~ length + info + pers + imag + subject, data=dat) res # compute variance inflation factors (VIFs) vif(res) # simulate the distribution of the VIFs under independence # (by randomly reshuffling the columns of the model matrix) sav <- vif(res, sim=TRUE, seed=1234) sav # 'prop' indicates the proportion of simulated values that are smaller than # the actually observed VIF value for a given model coefficient (large values # indicate that the VIF if large) # plot the distribution of the VIFs load("vif.rdata") # results based on sim=10000 plot(sav, xlim=c(1,2), breaks=seq(1,100,by=0.02), layout=c(2,3)) # show the correlation matrix for the predictors #install.packages("corrplot") library(corrplot) par(mfrow=c(1,1)) corrplot.mixed(cor(model.matrix(res)[,-1])) ############################################################################ ### construction of the var-cov matrix of dependent effect sizes # results from 17 studies on the association between recidivism and # mental health in delinquent juveniles (Assink & Wibbelink, 2016) dat <- dat.assink2016 # look at the data for studies 1 and 12 dat[dat$study %in% c(1,12),] # study - study identifier # esid - effect size within study identifier # yi - standardized mean differences (positive values indicate a higher # prevalence of recidivism in the group with a mental health disorder) # vi - corresponding sampling variances # deltype - type of delinquent behavior (general, overt, or covert) # most studies provided multiple effect size estimates table(dat$study) # construct an approximate var-cov matrix assuming a correlation of 0.7 for # effect sizes corresponding to the same type of delinquent behavior and a # correlation of 0.5 for effect sizes corresponding to different types of # delinquent behavior within studies V <- vcalc(vi, cluster=study, type=deltype, obs=esid, data=dat, rho=c(0.7,0.5)) # examine the part of V corresponding to study 1 blsplit(V, cluster=dat$study, round, 4)[[1]] blsplit(V, cluster=dat$study, cov2cor)[[1]] # examine the part of V corresponding to study 12 blsplit(V, cluster=dat$study, round, 4)[[12]] blsplit(V, cluster=dat$study, cov2cor)[[12]] # fit a multilevel random-effects model to the data using this approximate V # matrix (using a t-distribution for tests/CIs with the 'containment' method # for computing the degrees of freedom) res <- rma.mv(yi, V, random = ~ 1 | study/esid, data=dat, test="t", dfs="contain") res # vcalc() can deal with dependency in effect sizes: # - measuring the same construct with different scales # - measuring different constructs # - measuring the same/different constructs at multiple timepoints # - due to reuse of data from a shared (control) group # use cluster-robust inference methods (i.e., robust variance estimation) robust(res, cluster=study, clubSandwich=TRUE) # with clubSandwich=TRUE, robust() makes use of the methods implemented in the # clubSandwich package for improved inferences using the CR2 estimator of the # var-cov matrix of the fixed effects and using a Satterthwaite approximation # for the degrees of freedom ############################################################################ ### selection models for meta-analysis # results from 37 studies on the risk of lung cancer in women exposed # to environmental tobacco smoke (ETS) from their smoking spouse dat <- dat.hackshaw1998 dat # yi - log odds ratios (positive values indicate an increased risk of cancer in # exposed women compared to women not exposed to ETS from their spouse) # vi - corresponding sampling variances # fit a random-effects model (using ML estimation) res <- rma(yi, vi, data=dat, method="ML") res # estimated odds ratio with CI/PI predict(res, transf=exp, digits=2) # funnel plot funnel(res, ylim=c(0,0.8), las=1, digits=c(0,1)) # fit a step function selection model sel1 <- selmodel(res, type="stepfun", steps=c(.025,.1,.5,1)) sel1 # plot the selection function plot(sel1, ylim=c(0,1), bty="l") # fit a negative-exponential selection function model sel2 <- selmodel(res, type="negexp") sel2 plot(sel2, add=TRUE, col="red") # truncated distribution selection model sel3 <- selmodel(res, type="trunc") sel3 # 0.3818 is the relative likelihood of selection for effect sizes below 0 # (compared to effect sizes above 0) conditional on the estimated (average) # effect size (and the amount of heterogeneity) # selection model assuming truncation at the minimum effect size estimate sel4 <- selmodel(res, type="trunc", steps=min(dat$yi)) sel4 # trim and fill method taf <- trimfill(res) taf funnel(taf, ylim=c(0,0.8), las=1, digits=c(0,1)) abline(v=min(dat$yi), lwd=3) # force the relative likelihood of selection for effect sizes below the # minimum effect size estimate to essentially 0 (as assumed by trim and fill) sel5 <- selmodel(res, type="trunc", steps=min(dat$yi), delta=0.001) sel5 # truncated distribution selection model with estimated truncation point sel6 <- selmodel(res, type="truncest") sel6 funnel(sel6, ylim=c(0,0.8), las=1, digits=c(0,1)) abline(v=sel6$delta[2], lwd=3) # the 'truncest' model is experimental; the likelihood surface is often quite # rugged with multiple local optima, so optimization is difficult ############################################################################ ### multivariate meta-analysis and fitting regression models based on ### estimated var-cov matrix of the random effects # results from 17 studies examining overall and disease-free survival in # neuroblastoma patients with amplified vs normal MYC-N protein levels dat <- dat.riley2003[1:34,] dat # yi - log hazard ratio in those with amplified versus normal MYC-N protein levels # vi - corresponding sampling variance # outcome - DFS = disease-free survival, OS = overall survival # construct an approximate var-cov matrix, assuming a correlation of 0.8 for # the pairs of log hazard ratios within studies V <- vcalc(vi, cluster=study, type=outcome, rho=0.8, data=dat) # fit a bivariate model to the data res <- rma.mv(yi, V, mods = ~ outcome - 1, random = ~ outcome | study, struct="UN", data=dat) res # cross-check results using cluster-robust inference methods robust(res, cluster=study, clubSandwich=TRUE) # var-cov matrix of the random effects res$G # fit a regression model based on this var-cov matrix, predicting the overall # survival log hazard ratio from the disease-free survival log hazard ratio matreg(y=2, x=1, R=res$G, cov=TRUE, means=coef(res), n=res$g.levels.comb.k) # fit the bivariate model again, but now also obtain the var-cov matrix of the # estimates in res$G res <- rma.mv(yi, V, mods = ~ outcome - 1, random = ~ outcome | study, struct="UN", data=dat, cvvc="varcov") # var-cov matrix of the random effects and the var-cov matrix of these estimates res$G res$vvc # now use res$vvc as the var-cov matrix of the estimates in res$G matreg(y=2, x=1, R=res$G, cov=TRUE, means=coef(res), V=res$vvc) ############################################################################ ### the metadat package # the metafor package used to contain around 60 datasets; these datasets have # been moved to a separate data package called 'metadat' that also contains # additional datasets contributed by various people (now 87 datasets) help(package="metadat") # metadat is useful for teaching purposes, illustrating/testing meta-analytic # methods, and validating published analyses # # instructions for contributing additional datasets can be found in the docs: # # https://wviechtb.github.io/metadat/ # the datsearch() function can help to find datasets datsearch() ############################################################################