############################################################################
# Open Online R Stream (https://www.wvbauer.com/doku.php/live_streams)
#
# By: Wolfgang Viechtbauer (https://www.wvbauer.com)
# Date: 2021-03-18
#
# Topic(s):
# - fundamentals of regression modeling with R (part 1)
#
# last updated: 2021-04-08
############################################################################
# read in the data
dat <- read.delim("data_survey_edit.txt", na.strings="", as.is=TRUE)
# look at first 6 rows of the dataset
head(dat)
# to make things a bit easier below, keep only participants with complete data
# on the variables pss, source, rses, and sex
dat <- dat[complete.cases(dat[c("pss", "source", "rses", "sex")]),]
# collapse the source variable down to a smaller number of levels
table(dat$source)
dat$source <- ifelse(dat$source %in% c("children", "family", "friendships", "spouse/partner"), "interpers", dat$source)
dat$source <- ifelse(dat$source %in% c("health/illness", "lack of time", "life in general", "money/finances"), "other", dat$source)
table(dat$source)
############################################################################
# fit a regression model with pss as outcome and source as predictor
res <- lm(pss ~ source, data=dat)
summary(res)
# coding is like this:
# sourceother = 1 if source = "other" and 0 otherwise
# sourcework = 1 if source = "work" and 0 otherwise
# hence, if source = "interpers", then sourceother = 0 and sourcework = 0
# so, "interpers" is the so-called reference level
# sourceother is the difference between "other" and "interpers"
# sourcework is the difference between "work" and "interpers"
# these are 'contrasts'
# F-statistics tests that both of these contrasts are 0 (i.e, it is testing
# the null hypothesis that the average PSS value is the same in the 3 groups)
# change the reference level to "work" (note: relevel() only work on factors,
# so we must explicitly turn the source variable into a factor before we can
# use the relevel() function on it)
res <- lm(pss ~ relevel(factor(source), ref="work"), data=dat)
summary(res)
# [...]interpers is the difference between "interpers" and "work"
# [...]other is the difference between "other" and "work"
# note: F-statistic is the same, so it does not matter which reference level
# we use if we want to test if there are differences between the groups
# same model but remove the intercept
res <- lm(pss ~ 0 + source, data=dat)
#same as: res <- lm(pss ~ source - 1, data=dat)
summary(res)
# coding is like this:
# sourceinterps = 1 if source = "interpers" and 0 otherwise
# sourceother = 1 if source = "other" and 0 otherwise
# sourcework = 1 if source = "work" and 0 otherwise
# so the coefficients are the average PSS values for the 3 groups
# the F-statistic is testing whether the average PSS value is 0 in all 3 groups
# (this is a somewhat meaningless thing to test here, since PSS values range
# from 10 to 50 and so we know a priori that the mean PSS value could never be
# 0 in any of the groups anyway)
# fit of the two models is the same, they are just 'parameterized' differently
res <- lm(pss ~ source, data=dat)
logLik(res)
sigma(res)
res <- lm(pss ~ 0 + source, data=dat)
logLik(res)
sigma(res)
# (install if necessary and) load the 'car' package
#install.packages("car")
library(car)
# fit model without the intercept again
res <- lm(pss ~ 0 + source, data=dat)
summary(res)
# using linearHypothesis(), we can test linear contrasts we are interested in
# test if 1*sourceinterpers + -1*sourceother + 0*sourcework is significant
linearHypothesis(res, hypothesis.matrix=c(1,-1,0))
# test the other two pairwise contrasts
linearHypothesis(res, hypothesis.matrix=c(1,0,-1))
linearHypothesis(res, hypothesis.matrix=c(0,1,-1))
# note: the p-values for these contrasts are the same as we obtained earlier
# (when we fitted the models with the intercept)
# can also other contrasts that may be of interest
# 1*sourceinterpers - 1/2*sourceother - 1/2*sourcework
# 1*sourceinterpers - (sourceother + sourcework) / 2
# (i.e., test if the average PSS value of the "interpers" group is different
# from the average of the PSS values of the "other" and "work" groups, or more
# precisely, the average of the two averages of these two groups)
linearHypothesis(res, hypothesis.matrix=c(1,-1/2,-1/2))
# fit the model with the intercept
res <- lm(pss ~ source, data=dat)
summary(res)
linearHypothesis(res, hypothesis.matrix=c(0,1,-1))
# (mean_other - mean_interpers) - (mean_work - mean_interpers)
# mean_other - mean_interpers - mean_work + mean_interpers
# mean_other - mean_work
# so this is the same as the contrast above where we test other vs work
############################################################################