############################################################################

# Experience Sampling Methodology Course
# Data Analysis Exercise
#
# the dataset corresponding to this exercise is available here:
#
# https://www.wvbauer.com/lib/exe/fetch.php/suppl:viechtbauer2022_data_esmda_example.txt
#
# or here (in case my website is not accessible):
#
# https://www.kuleuven.be/samenwerking/real/real-book/viechtbauer2022_data_esmda_example
#
# the filename for the dataset is: viechtbauer2022_data_esmda_example.txt
#
# put the dataset and this R script into some folder/directory of your choice
# on your computer

############################################################################

### reading in the dataset

# before we can read in the data with the following command, the 'working
# directory' must be set to the same place where the dataset (and this R
# script) are located on your computer; in principle, this can be done with
# the setwd() command; in RStudio, you can also use the 'Files' tab (which
# should be part of the bottom right pane) where you can navigate to the
# location of the dataset, then click on the 'More' button (which has a little
# gear symbol next to it), and then choose 'Set As Working Directory' (note
# that this actually runs the setwd() command with the chosen location)

# read in the dataset
dat <- read.table("viechtbauer2022_data_esmda_example.txt", header=TRUE,
                  sep="\t", na.strings="", as.is=TRUE)

# note: as long as you do *not* get an error message (e.g., "cannot open file
# 'viechtbauer2022_data_esmda_example.txt': No such file or directory"), then
# the data have been read in

# examine the first six rows of the dataset
head(dat)

# you can also use the View() command to inspect the dataset
View(dat)

# first familiarize yourself with the variables in the dataset (see also the
# description of the variables in esm_data_analysis_exercise.pdf)

############################################################################

### install the esmpack package

# for some of the data management steps below, we will make use of the R
# package 'esmpack' (which is available via GitHub); to install this package,
# first install the 'remotes' package and then you can use install_github() to
# install it

# install the 'remotes' package
install.packages("remotes")

# install the 'esmpack' package (version 0.1-17)
remotes::install_github("wviechtb/esmpack@v0.1-17")

# load the 'esmpack' package
library(esmpack)

############################################################################

### some data inspection

# compute response delays
dat$delay <- dat$resptime - dat$beeptime

# barplot of the delay values
barplot(table(dat$delay), xlab="Delay in Minutes")

# mean, SD, and range of the delay values
round(mean(dat$delay, na.rm=TRUE), digits=1)
round(sd(dat$delay, na.rm=TRUE), digits=1)
range(dat$delay, na.rm=TRUE)

# compute the number of beeps that each subject responded to (i.e., the number
# of non-missing response times within subjects)
respfreq <- calc.nomiss(resptime, id, data=dat)

# histogram of the number of beeps responded to (response frequency)
hist(respfreq, xlab="Response Frequency", main="", right=FALSE, breaks=25)

# mean, SD, and range of the compliance rates (percentage of beeps responded to)
round(mean(respfreq) / 60 * 100, digits=1)
round(sd(respfreq) / 60 * 100, digits=1)
round(range(respfreq / 60 * 100), digits=1)

# compute variables posaff and negaff (average of the 3 PA and 6 NA items)
dat$posaff <- combitems(c("mood_cheerf", "mood_relaxed", "mood_satisfi"), data=dat)
dat$negaff <- combitems(c("mood_irritat", "mood_anxious", "mood_down", "mood_guilty", "mood_insecur", "mood_lonely"), data=dat)

# mean and SD of the posaff variable
round(mean(dat$posaff, na.rm=TRUE), digits=2)
round(sd(dat$posaff, na.rm=TRUE), digits=2)

# frequency table of the event pleasantness variable
table(dat$eventpl, useNA="ifany")

# mean, SD, and range of the age of the subjects
round(mean(get.timeinvar(age, id, data=dat)), digits=1)
round(sd(get.timeinvar(age, id, data=dat)), digits=1)
round(range(get.timeinvar(age, id, data=dat)), digits=1)

# number of female/male subjects
table(get.timeinvar(sex, id, data=dat))

# number of subjects in the three status groups
table(get.timeinvar(status, id, data=dat))

# for reasons to be covered later, we want to create a 'lagged' version of the
# posaff variable (i.e., the posaff value at beep 1 will be the posaff value
# of this lagged variable at beep 2, the value at beep 2 will be the value at
# beep 3, and so on); however, we do not want to use the value at beep 10 as
# the value for beep 11 since we would then be lagging this variable across
# the night (the same goes for beep 20, 30, ...); also, we do not want to use
# the value of beep 60 as the value for beep 1 of the following subject, since
# we would then be lagging this variable across subjects

# create a lagged version of the posaff variable (without lagging across the
# night and across subjects)
dat$lposaff <- lagvar(posaff, id=id, obs=obs, day=day, data=dat)

# inspect the dataset to make sure that this was done correctly

############################################################################

### visualizing the data of a single subject

# select data from a single subject (and only rows where resptime is not missing)
dat1 <- subset(dat, id == "c119" & !is.na(resptime))

# plot positive affect (posaff) versus time (tothours) for this subject
plot(posaff ~ tothours, data=dat1, type="o", pch=19, xlim=c(0,144),
     ylim=c(1,7.2), xlab="Time (in Hours)", ylab="Positive Affect")
abline(v=0:6*24, lty="dotted") # add dotted vertical lines at midnight
text(12+0:5*24, 7, pos=3, paste("Day ", 1:6)) # add day number text

# plot positive affect (posaff) versus event pleasantness (eventpl) for this subject
plot(posaff ~ eventpl, data=dat1, pch=19, xlim=c(-3,3),
     ylim=c(1,7), xlab="Pleasantness of Event", ylab="Positive Affect")

# one issue: multiple points may overlap; can deal with this by 'jittering'
# the points so that multiple points at the same location are separated
plot(jitter(posaff) ~ jitter(eventpl), data=dat1, pch=19, xlim=c(-3.5,3.5),
     ylim=c(1,7), xlab="Pleasantness of Event", ylab="Positive Affect")

# if you like, you can try this with some other subjects and variables

############################################################################

### analyzing the data of a single subject

# examine the (linear) relationship between posaff and eventpl for the subject
res <- lm(posaff ~ eventpl, data=dat1)
summary(res)

# add the regression line to the previous scatterplot
abline(res, lwd=3)

# Q: What is the interpretation of the intercept?
#
# A: The intercept is the estimated average positive affect (on a 1-7 scale)
# for beeps where the most important event since the previous beep was rated
# as neutral (eventpl=0). For subject c119, this value is equal to 4.42.

# Q: What is the interpretation of the slope?
#
# A: The slope is the estimated change in the average positive affect for a
# one-unit increase in event pleasantness. For subject c119, for beeps where
# event pleasantness was rated as 1, the average positive affect is estimated
# to be 0.15 points higher compared to beeps where event pleasantness was
# rated as 0. And for beeps where event pleasantness was rated as 3, this
# implies that the average positive affect is on average 3*0.15 = 0.45 points
# higher compared to beeps where event pleasantness was rated as 0.

# the model above assumes that all observations are independent; however,
# since the data were collected from the same subject over time, this
# assumption may not be appropriate; one way of modeling the 'autocorrelation'
# in the outcome variable (posaff) is to include its 'lagged' version as a
# predictor in the model

res <- lm(posaff ~ eventpl + lposaff, data=dat1)
summary(res)

# the coefficient for 'lposaff' indicates how positive affect at one beep is
# related to positive affect from the previous beep (on the same day)

############################################################################

### analyzing the data of a group of subjects

# load the nlme package (for fitting mixed-effects models)
library(nlme)

# fit the 'empty' model
res <- lme(posaff ~ 1, random = ~ 1 | id, data=dat, na.action=na.omit)
summary(res)

# Q: What is the interpretation of the intercept in this model?
#
# A: It is the estimated average level of positive affect, averaged across
# subjects and repeated assessments therein.

# Q: Which source of variability in the positive affect ratings is larger?
# Differences between subjects or differences between repeated assessments
# within subjects?
#
# A: Under 'Random effects:', the output shows the estimated standard
# deviation of the between-person differences (0.96) and the within-person
# differences (1.01). The latter is a bit larger.

# show the variances for these two sources of variability
VarCorr(res)

# Q: How strong is the correlation of observations from the same subject?
#
# A: To answer this, we need to compute the intraclass correlation coefficient,
# which is equal to 0.92 / (0.92 + 1.01) = 0.48 for these data.

# fit the random intercepts and slopes model to examine the association between
# positive affect and event pleasantness
res <- lme(posaff ~ eventpl, random = ~ eventpl | id, data=dat, na.action=na.omit)
summary(res)

# Q: What can we conclude about the relationship between these two variables?
#
# A: The estimated average slope is equal to 0.19, which differs significantly
# from 0 (p < .001). Hence, averaged across subjects, there is a positive
# relationship between event pleasantness and positive affect.

# fit the model that allows the average intercept and slope to differ across groups
res <- lme(posaff ~ status*eventpl, random = ~ eventpl | id, data=dat, na.action=na.omit)
summary(res)

# Fixed effects:
# (Intercept)             = estimated average PA when event pleasantness is 0 in the control group
# eventpl                 = estimated average slope for event pleasantness in the control group
# statusdepressed         = estimated difference between the depressed and control group with respect to the intercept
# statuspsychotic         = estimated difference between the psychosis and control group with respect to the intercept
# eventpl:statusdepressed = estimated difference between the depressed and control group with respect to the slope
# eventpl:statuspsychotic = estimated difference between the psychosis and control group with respect to the slope

# Q: What is the average intercept for subjects in the depression group?
#
# A: To answer this, we need to add the statusdepressed coefficient to the
# intercept, that is, 4.97 + -1.22 = 3.75.

# Q: What is the average slope for subjects in the depression group?
#
# A: To answer this, we need to add the statusdepressed:eventpl coefficient to
# the eventpl coefficient, that is, 0.14 + 0.12 = 0.26.

# a different way of parameterizing the same model (directly provides the
# intercepts and the slopes for the three 'status' groups)
res <- lme(posaff ~ status + eventpl:status - 1, random = ~ eventpl | id, data=dat, na.action=na.omit)
summary(res)

# Fixed effects:
# statuscontrol           = estimated average PA when event pleasantness is 0 in the control group
# statusdepressed         = estimated average PA when event pleasantness is 0 in the depression group
# statuspsychotic         = estimated average PA when event pleasantness is 0 in the psychosis group
# statuscontrol:eventpl   = estimated average slope for event pleasantness in the control group
# statusdepressed:eventpl = estimated average slope for event pleasantness in the depression group
# statuspsychotic:eventpl = estimated average slope for event pleasantness in the psychosis group

# show the variances for the random effects
VarCorr(res)

############################################################################