############################################################################ # Experience Sampling Methodology Course # Data Analysis Exercise # # the dataset corresponding to this exercise is available here: # # https://www.wvbauer.com/lib/exe/fetch.php/suppl:viechtbauer2022_data_esmda_example.txt # # or here (in case my website is not accessible): # # https://www.kuleuven.be/samenwerking/real/real-book/viechtbauer2022_data_esmda_example # # the filename for the dataset is: viechtbauer2022_data_esmda_example.txt # # put the dataset and this R script into some folder/directory of your choice # on your computer ############################################################################ ### reading in the dataset # before we can read in the data with the following command, the 'working # directory' must be set to the same place where the dataset (and this R # script) are located on your computer; in principle, this can be done with # the setwd() command; in RStudio, you can also use the 'Files' tab (which # should be part of the bottom right pane) where you can navigate to the # location of the dataset, then click on the 'More' button (which has a little # gear symbol next to it), and then choose 'Set As Working Directory' (note # that this actually runs the setwd() command with the chosen location) # read in the dataset dat <- read.table("viechtbauer2022_data_esmda_example.txt", header=TRUE, sep="\t", na.strings="", as.is=TRUE) # note: as long as you do *not* get an error message (e.g., "cannot open file # 'viechtbauer2022_data_esmda_example.txt': No such file or directory"), then # the data have been read in # examine the first six rows of the dataset head(dat) # you can also use the View() command to inspect the dataset View(dat) # first familiarize yourself with the variables in the dataset (see also the # description of the variables in esm_data_analysis_exercise.pdf) ############################################################################ ### install the esmpack package # for some of the data management steps below, we will make use of the R # package 'esmpack' (which is available via GitHub); to install this package, # first install the 'remotes' package and then you can use install_github() to # install it # install the 'remotes' package install.packages("remotes") # install the 'esmpack' package (version 0.1-17) remotes::install_github("wviechtb/esmpack@v0.1-17") # load the 'esmpack' package library(esmpack) ############################################################################ ### some data inspection # compute response delays dat$delay <- dat$resptime - dat$beeptime # barplot of the delay values barplot(table(dat$delay), xlab="Delay in Minutes") # mean, SD, and range of the delay values round(mean(dat$delay, na.rm=TRUE), digits=1) round(sd(dat$delay, na.rm=TRUE), digits=1) range(dat$delay, na.rm=TRUE) # compute the number of beeps that each subject responded to (i.e., the number # of non-missing response times within subjects) respfreq <- calc.nomiss(resptime, id, data=dat) # histogram of the number of beeps responded to (response frequency) hist(respfreq, xlab="Response Frequency", main="", right=FALSE, breaks=25) # mean, SD, and range of the compliance rates (percentage of beeps responded to) round(mean(respfreq) / 60 * 100, digits=1) round(sd(respfreq) / 60 * 100, digits=1) round(range(respfreq / 60 * 100), digits=1) # compute variables posaff and negaff (average of the 3 PA and 6 NA items) dat$posaff <- combitems(c("mood_cheerf", "mood_relaxed", "mood_satisfi"), data=dat) dat$negaff <- combitems(c("mood_irritat", "mood_anxious", "mood_down", "mood_guilty", "mood_insecur", "mood_lonely"), data=dat) # mean and SD of the posaff variable round(mean(dat$posaff, na.rm=TRUE), digits=2) round(sd(dat$posaff, na.rm=TRUE), digits=2) # frequency table of the event pleasantness variable table(dat$eventpl, useNA="ifany") # mean, SD, and range of the age of the subjects round(mean(get.timeinvar(age, id, data=dat)), digits=1) round(sd(get.timeinvar(age, id, data=dat)), digits=1) round(range(get.timeinvar(age, id, data=dat)), digits=1) # number of female/male subjects table(get.timeinvar(sex, id, data=dat)) # number of subjects in the three status groups table(get.timeinvar(status, id, data=dat)) # for reasons to be covered later, we want to create a 'lagged' version of the # posaff variable (i.e., the posaff value at beep 1 will be the posaff value # of this lagged variable at beep 2, the value at beep 2 will be the value at # beep 3, and so on); however, we do not want to use the value at beep 10 as # the value for beep 11 since we would then be lagging this variable across # the night (the same goes for beep 20, 30, ...); also, we do not want to use # the value of beep 60 as the value for beep 1 of the following subject, since # we would then be lagging this variable across subjects # create a lagged version of the posaff variable (without lagging across the # night and across subjects) dat$lposaff <- lagvar(posaff, id=id, obs=obs, day=day, data=dat) # inspect the dataset to make sure that this was done correctly ############################################################################ ### visualizing the data of a single subject # select data from a single subject (and only rows where resptime is not missing) dat1 <- subset(dat, id == "c119" & !is.na(resptime)) # plot positive affect (posaff) versus time (tothours) for this subject plot(posaff ~ tothours, data=dat1, type="o", pch=19, xlim=c(0,144), ylim=c(1,7.2), xlab="Time (in Hours)", ylab="Positive Affect") abline(v=0:6*24, lty="dotted") # add dotted vertical lines at midnight text(12+0:5*24, 7, pos=3, paste("Day ", 1:6)) # add day number text # plot positive affect (posaff) versus event pleasantness (eventpl) for this subject plot(posaff ~ eventpl, data=dat1, pch=19, xlim=c(-3,3), ylim=c(1,7), xlab="Pleasantness of Event", ylab="Positive Affect") # one issue: multiple points may overlap; can deal with this by 'jittering' # the points so that multiple points at the same location are separated plot(jitter(posaff) ~ jitter(eventpl), data=dat1, pch=19, xlim=c(-3.5,3.5), ylim=c(1,7), xlab="Pleasantness of Event", ylab="Positive Affect") # if you like, you can try this with some other subjects and variables ############################################################################ ### analyzing the data of a single subject # examine the (linear) relationship between posaff and eventpl for the subject res <- lm(posaff ~ eventpl, data=dat1) summary(res) # add the regression line to the previous scatterplot abline(res, lwd=3) # Q: What is the interpretation of the intercept? # # A: The intercept is the estimated average positive affect (on a 1-7 scale) # for beeps where the most important event since the previous beep was rated # as neutral (eventpl=0). For subject c119, this value is equal to 4.42. # Q: What is the interpretation of the slope? # # A: The slope is the estimated change in the average positive affect for a # one-unit increase in event pleasantness. For subject c119, for beeps where # event pleasantness was rated as 1, the average positive affect is estimated # to be 0.15 points higher compared to beeps where event pleasantness was # rated as 0. And for beeps where event pleasantness was rated as 3, this # implies that the average positive affect is on average 3*0.15 = 0.45 points # higher compared to beeps where event pleasantness was rated as 0. # the model above assumes that all observations are independent; however, # since the data were collected from the same subject over time, this # assumption may not be appropriate; one way of modeling the 'autocorrelation' # in the outcome variable (posaff) is to include its 'lagged' version as a # predictor in the model res <- lm(posaff ~ eventpl + lposaff, data=dat1) summary(res) # the coefficient for 'lposaff' indicates how positive affect at one beep is # related to positive affect from the previous beep (on the same day) ############################################################################ ### analyzing the data of a group of subjects # load the nlme package (for fitting mixed-effects models) library(nlme) # fit the 'empty' model res <- lme(posaff ~ 1, random = ~ 1 | id, data=dat, na.action=na.omit) summary(res) # Q: What is the interpretation of the intercept in this model? # # A: It is the estimated average level of positive affect, averaged across # subjects and repeated assessments therein. # Q: Which source of variability in the positive affect ratings is larger? # Differences between subjects or differences between repeated assessments # within subjects? # # A: Under 'Random effects:', the output shows the estimated standard # deviation of the between-person differences (0.96) and the within-person # differences (1.01). The latter is a bit larger. # show the variances for these two sources of variability VarCorr(res) # Q: How strong is the correlation of observations from the same subject? # # A: To answer this, we need to compute the intraclass correlation coefficient, # which is equal to 0.92 / (0.92 + 1.01) = 0.48 for these data. # fit the random intercepts and slopes model to examine the association between # positive affect and event pleasantness res <- lme(posaff ~ eventpl, random = ~ eventpl | id, data=dat, na.action=na.omit) summary(res) # Q: What can we conclude about the relationship between these two variables? # # A: The estimated average slope is equal to 0.19, which differs significantly # from 0 (p < .001). Hence, averaged across subjects, there is a positive # relationship between event pleasantness and positive affect. # fit the model that allows the average intercept and slope to differ across groups res <- lme(posaff ~ status*eventpl, random = ~ eventpl | id, data=dat, na.action=na.omit) summary(res) # Fixed effects: # (Intercept) = estimated average PA when event pleasantness is 0 in the control group # eventpl = estimated average slope for event pleasantness in the control group # statusdepressed = estimated difference between the depressed and control group with respect to the intercept # statuspsychotic = estimated difference between the psychosis and control group with respect to the intercept # eventpl:statusdepressed = estimated difference between the depressed and control group with respect to the slope # eventpl:statuspsychotic = estimated difference between the psychosis and control group with respect to the slope # Q: What is the average intercept for subjects in the depression group? # # A: To answer this, we need to add the statusdepressed coefficient to the # intercept, that is, 4.97 + -1.22 = 3.75. # Q: What is the average slope for subjects in the depression group? # # A: To answer this, we need to add the statusdepressed:eventpl coefficient to # the eventpl coefficient, that is, 0.14 + 0.12 = 0.26. # a different way of parameterizing the same model (directly provides the # intercepts and the slopes for the three 'status' groups) res <- lme(posaff ~ status + eventpl:status - 1, random = ~ eventpl | id, data=dat, na.action=na.omit) summary(res) # Fixed effects: # statuscontrol = estimated average PA when event pleasantness is 0 in the control group # statusdepressed = estimated average PA when event pleasantness is 0 in the depression group # statuspsychotic = estimated average PA when event pleasantness is 0 in the psychosis group # statuscontrol:eventpl = estimated average slope for event pleasantness in the control group # statusdepressed:eventpl = estimated average slope for event pleasantness in the depression group # statuspsychotic:eventpl = estimated average slope for event pleasantness in the psychosis group # show the variances for the random effects VarCorr(res) ############################################################################