############################################################################

# Code to go along with the talk "An overview of recent updates to the metafor
# package" by Wolfgang Viechtbauer (https://www.wvbauer.com) presented at the
# 2023 Annual Conference of the Society for Research Synthesis Methodology on
# 2023-06-06

############################################################################

# install the metafor package
#install.packages("metafor")

# load the metafor package
library(metafor)

############################################################################

### standard random-effects and mixed-effects meta-regression models

# look at the BCG dataset (Colditz et al., 1994)
dat.bcg

# tpos - number of TB positive cases in the treated (vaccinated) group
# tneg - number of TB negative cases in the treated (vaccinated) group
# cpos - number of TB positive cases in the control (non-vaccinated) group
# cneg - number of TB negative cases in the control (non-vaccinated) group
#
# these variables denote the values in 2x2 tables of the form:
#
#           TB+    TB-
#         +------+------+
# treated | tpos | tneg |
#         +------+------+
# control | cpos | cneg |
#         +------+------+
#
# year  - publication year of the study
# ablat - absolute latitude of the study location (in degrees)
# alloc - method of treatment allocation (random, alternate, or systematic assignment)

# calculate log risk ratios and corresponding sampling variances
# note: slab for adding 'study labels' to the dataset
dat <- escalc(measure="RR", ai=tpos, bi=tneg,
                            ci=cpos, di=cneg,
              slab=paste(author, ", ", year, sep=""), data=dat.bcg)
dat

# fit a random-effects model (log risk ratios and variances as input)
res <- rma(yi, vi, data=dat)
res

# predicted pooled risk ratio and corresponding CI/PI
predict(res, transf=exp, digits=2)

# forest plot
forest(res, atransf=exp, at=log(c(.05, .25, 1, 4)), xlim=c(-16,6),
       ilab=cbind(tpos, tneg, cpos, cneg), ilab.xpos=c(-9.5,-8,-6,-4.5),
       header="Author(s) and Year", shade="zebra")
text(c(-9.5,-8,-6,-4.5), 15, c("TB+", "TB-", "TB+", "TB-"), font=2)
text(c(-8.75,-5.25),     16, c("Vaccinated", "Control"), font=2)

# funnel plot
funnel(res, ylim=c(0,0.8), las=1, digits=c(0,1))

# fit a mixed-effects meta-regression model with absolute latitude as
# moderator (and using the Knapp & Hartung adjustment for tests/CIs)
res <- rma(yi, vi, mods = ~ ablat, data=dat, test="knha")
res

# bubble plot (with points outside of the prediction interval labeled)
regplot(res, mod="ablat", pi=TRUE, xlab="Absolute Latitude",
        xlim=c(0,60), predlim=c(0,60), transf=exp, refline=1, legend=TRUE,
        label="piout", labsize=0.9, bty="l", las=1, digits=1)

############################################################################

### binomial-normal models with RRs/RDs

# fit a random-effects model (using ML estimation)
rma(yi, vi, data=dat, method="ML")

# fit a binomial-normal model using a log (not logit!) link
rma.glmm(measure="RR", ai=tpos, bi=tneg, ci=cpos, di=cneg, data=dat)

# cross-check results using GLMMadaptive (instead of lme4)
rma.glmm(measure="RR", ai=tpos, bi=tneg, ci=cpos, di=cneg, data=dat,
         control=list(package="GLMMadaptive"))

############################################################################

### location-scale models for meta-analysis

# data from 48 studies examining the effectiveness of 'writing-to-learn'
# interventions on academic achievement (Bangert-Drowns et al., 2004)
dat <- dat.bangertdrowns2004

# look at a subset of the data
dat[c(1:6,46:48),c(1:3,13:16)]

# yi      - standardized mean differences (positive values indicate a higher
#           mean level of academic achievement in the intervention group)
# vi      - corresponding sampling variances
# subject - subject matter on which the students were tested

# collapse subjects down to three main groups: Math, Sci(ence), and Soc(ial Science)
dat$subject <- factor(ifelse(dat$subject %in% c("Algebra", "Calculus", "Math", "Math in Science", "Statistics"), "Math",
                      ifelse(dat$subject %in% c("Chemistry", "Comp Science and Chemistry", "Biology", "Earth Science", "Natural Resources", "Nursing", "Science"), "Sci", "Soc")))

# fit a meta-analytic location-scale model
res <- rma(yi, vi, mods = ~ subject + ni, scale = ~ subject + ni, data=dat)
res

# the scale part of the model:
#
# log(tau^2) = -3.1022 + 2.2330*subjectSci + 0.4011*subjectSoc + -0.0054*ni

# predicted value of tau^2 for Math, Sci, and Soc when ni=120 (~ mean n)
Xnew <- cbind(c(0,1,0),c(0,0,1),120)
pred <- predict(res, newscale=Xnew, transf=exp, digits=3)
pred$slab <- c("Math", "Sci", "Soc")
pred

# so we see much greater heterogeneity in the effectiveness of such
# interventions in studies focused on science subjects

# the location part of the model:
#
# E(SMD) = 0.3443 + -0.0798*subjectSci + -0.1087*subjectSoc + -0.0006*ni

# predicted value of E(SMD) for Math, Sci, and Soc when ni=120 (~ mean n)
pred <- predict(res, newmods=Xnew, newscale=Xnew, digits=2)
pred$slab <- c("Math", "Sci", "Soc")
pred

# so we see relatively little differences in the (average) effectiveness of
# such interventions across the different subject categories

############################################################################

### variance inflation factors for meta-regression models

# look at a subset of the data
dat[c(1:6,46:48),c(1:3,5,9:11,13,15:16)]

# fit a mixed-effects meta-regression model
res <- rma(yi, vi, mods = ~ length + info + pers + imag + subject, data=dat)
res

# compute variance inflation factors (VIFs)
vif(res)

# simulate the distribution of the VIFs under independence
# (by randomly reshuffling the columns of the model matrix)
sav <- vif(res, sim=TRUE, seed=1234)
sav

# 'prop' indicates the proportion of simulated values that are smaller than
# the actually observed VIF value for a given model coefficient (large values
# indicate that the VIF if large)

# plot the distribution of the VIFs
load("vif.rdata") # results based on sim=10000
plot(sav, xlim=c(1,2), breaks=seq(1,100,by=0.02), layout=c(2,3))

# show the correlation matrix for the predictors
#install.packages("corrplot")
library(corrplot)
par(mfrow=c(1,1))
corrplot.mixed(cor(model.matrix(res)[,-1]))

############################################################################

### construction of the var-cov matrix of dependent effect sizes

# results from 17 studies on the association between recidivism and
# mental health in delinquent juveniles (Assink & Wibbelink, 2016)
dat <- dat.assink2016

# look at the data for studies 1 and 12
dat[dat$study %in% c(1,12),]

# study   - study identifier
# esid    - effect size within study identifier
# yi      - standardized mean differences (positive values indicate a higher
#           prevalence of recidivism in the group with a mental health disorder)
# vi      - corresponding sampling variances
# deltype - type of delinquent behavior (general, overt, or covert)

# most studies provided multiple effect size estimates
table(dat$study)

# construct an approximate var-cov matrix assuming a correlation of 0.7 for
# effect sizes corresponding to the same type of delinquent behavior and a
# correlation of 0.5 for effect sizes corresponding to different types of
# delinquent behavior within studies
V <- vcalc(vi, cluster=study, type=deltype, obs=esid, data=dat, rho=c(0.7,0.5))

# examine the part of V corresponding to study 1
blsplit(V, cluster=dat$study, round, 4)[[1]]
blsplit(V, cluster=dat$study, cov2cor)[[1]]

# examine the part of V corresponding to study 12
blsplit(V, cluster=dat$study, round, 4)[[12]]
blsplit(V, cluster=dat$study, cov2cor)[[12]]

# fit a multilevel random-effects model to the data using this approximate V
# matrix (using a t-distribution for tests/CIs with the 'containment' method
# for computing the degrees of freedom)
res <- rma.mv(yi, V, random = ~ 1 | study/esid, data=dat, test="t", dfs="contain")
res

# vcalc() can deal with dependency in effect sizes:
# - measuring the same construct with different scales
# - measuring different constructs
# - measuring the same/different constructs at multiple timepoints
# - due to reuse of data from a shared (control) group

# use cluster-robust inference methods (i.e., robust variance estimation)
robust(res, cluster=study, clubSandwich=TRUE)

# with clubSandwich=TRUE, robust() makes use of the methods implemented in the
# clubSandwich package for improved inferences using the CR2 estimator of the
# var-cov matrix of the fixed effects and using a Satterthwaite approximation
# for the degrees of freedom

############################################################################

### selection models for meta-analysis

# results from 37 studies on the risk of lung cancer in women exposed
# to environmental tobacco smoke (ETS) from their smoking spouse
dat <- dat.hackshaw1998
dat

# yi - log odds ratios (positive values indicate an increased risk of cancer in
#      exposed women compared to women not exposed to ETS from their spouse)
# vi - corresponding sampling variances

# fit a random-effects model (using ML estimation)
res <- rma(yi, vi, data=dat, method="ML")
res

# estimated odds ratio with CI/PI
predict(res, transf=exp, digits=2)

# funnel plot
funnel(res, ylim=c(0,0.8), las=1, digits=c(0,1))

# fit a step function selection model
sel1 <- selmodel(res, type="stepfun", steps=c(.025,.1,.5,1))
sel1

# plot the selection function
plot(sel1, ylim=c(0,1), bty="l")

# fit a negative-exponential selection function model
sel2 <- selmodel(res, type="negexp")
sel2
plot(sel2, add=TRUE, col="red")

# truncated distribution selection model
sel3 <- selmodel(res, type="trunc")
sel3

# 0.3818 is the relative likelihood of selection for effect sizes below 0
# (compared to effect sizes above 0) conditional on the estimated (average)
# effect size (and the amount of heterogeneity)

# selection model assuming truncation at the minimum effect size estimate
sel4 <- selmodel(res, type="trunc", steps=min(dat$yi))
sel4

# trim and fill method
taf <- trimfill(res)
taf
funnel(taf, ylim=c(0,0.8), las=1, digits=c(0,1))
abline(v=min(dat$yi), lwd=3)

# force the relative likelihood of selection for effect sizes below the
# minimum effect size estimate to essentially 0 (as assumed by trim and fill)
sel5 <- selmodel(res, type="trunc", steps=min(dat$yi), delta=0.001)
sel5

# truncated distribution selection model with estimated truncation point
sel6 <- selmodel(res, type="truncest")
sel6
funnel(sel6, ylim=c(0,0.8), las=1, digits=c(0,1))
abline(v=sel6$delta[2], lwd=3)

# the 'truncest' model is experimental; the likelihood surface is often quite
# rugged with multiple local optima, so optimization is difficult

############################################################################

### multivariate meta-analysis and fitting regression models based on
### estimated var-cov matrix of the random effects

# results from 17 studies examining overall and disease-free survival in
# neuroblastoma patients with amplified vs normal MYC-N protein levels
dat <- dat.riley2003[1:34,]
dat

# yi      - log hazard ratio in those with amplified versus normal MYC-N protein levels
# vi      - corresponding sampling variance
# outcome - DFS = disease-free survival, OS = overall survival

# construct an approximate var-cov matrix, assuming a correlation of 0.8 for
# the pairs of log hazard ratios within studies
V <- vcalc(vi, cluster=study, type=outcome, rho=0.8, data=dat)

# fit a bivariate model to the data
res <- rma.mv(yi, V, mods = ~ outcome - 1, random = ~ outcome | study,
              struct="UN", data=dat)
res

# cross-check results using cluster-robust inference methods
robust(res, cluster=study, clubSandwich=TRUE)

# var-cov matrix of the random effects
res$G

# fit a regression model based on this var-cov matrix, predicting the overall
# survival log hazard ratio from the disease-free survival log hazard ratio
matreg(y=2, x=1, R=res$G, cov=TRUE, means=coef(res), n=res$g.levels.comb.k)

# fit the bivariate model again, but now also obtain the var-cov matrix of the
# estimates in res$G
res <- rma.mv(yi, V, mods = ~ outcome - 1, random = ~ outcome | study,
              struct="UN", data=dat, cvvc="varcov")

# var-cov matrix of the random effects and the var-cov matrix of these estimates
res$G
res$vvc

# now use res$vvc as the var-cov matrix of the estimates in res$G
matreg(y=2, x=1, R=res$G, cov=TRUE, means=coef(res), V=res$vvc)

############################################################################

### the metadat package

# the metafor package used to contain around 60 datasets; these datasets have
# been moved to a separate data package called 'metadat' that also contains
# additional datasets contributed by various people (now 87 datasets)

help(package="metadat")

# metadat is useful for teaching purposes, illustrating/testing meta-analytic
# methods, and validating published analyses
#
# instructions for contributing additional datasets can be found in the docs:
#
# https://wviechtb.github.io/metadat/

# the datsearch() function can help to find datasets

datsearch()

############################################################################