# This code will check that you have all of the packages you need to run this script, and install them if they are missing:
list.of.packages <- c("ordinal",
"tidyverse",
"sjPlot",
"scales",
"ggthemes",
"brms",
"tidybayes")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
# Loading the packages that we need
library(ordinal)
library(tidyverse)
library(sjPlot)
library(scales)
library(ggthemes)
# Ordinal models intro----
# Loading in the simulated data from Chapter 8 of Garcia (2021)
load("data/rc_hls.Rdata")
# Checking out the structure of the data
str(rc_hls)
summary(rc_hls)
# Plotting the raw data
rc = as_tibble(rc_hls)
rcProp = rc %>%
group_by(L1, Condition, Certainty3) %>%
count() %>%
group_by(L1, Condition) %>%
mutate(P = n / sum(n),
Certainty3 = factor(Certainty3, levels = c("Not certain", "Neutral", "Certain")),
Condition = factor(Condition, levels = c("NoBreak", "Low", "High")),
Text = factor(ifelse(Certainty3 == "Certain", 1, 0)))
ggplot(data = rcProp, aes(x = Condition, y = P, fill = Certainty3)) +
geom_col(width = 0.5, color = "black") +
facet_grid(~L1) +
geom_text(aes(label = str_c(P*100, "%"), color = Text),
position = position_stack(vjust = 0.5),
fontface = "plain", size = 3) +
theme_classic() +
theme(legend.position = "bottom") +
scale_fill_brewer(palette = 3, direction = 1) +
scale_y_reverse(labels = percent_format()) +
scale_color_manual(values = c("black", "white"), guide = "none") +
labs(fill = "Certainty:",
x = "Condition",
y = NULL) +
coord_flip()
# Fitting an additive ordinal model
clm_fit_addtive <- clm(Certainty3 ~
Condition +
L1,
data = rc_hls)
# Calling summary() to inspect the model
summary(clm_fit_addtive)
# Fitting our ordinal model with an interaction between Condition & L1
clm_fit_interaction <- clm(Certainty3 ~
Condition +
L1 +
Condition:L1,
data = rc_hls)
# Inspecting model
summary(clm_fit_interaction)
# Testing contribution of interaction to model fit using LRT
anova(clm_fit_addtive, clm_fit_interaction)
# Plotting the interaction with package sjPlot
p <- plot_model(clm_fit_interaction,
type = "pred",
terms = c("Condition", "L1"),
colors = c("black", "grey50"))
p +
theme_base(base_size = 18) +
ggtitle("") +
theme(axis.text.x = element_text(angle = 310,
hjust = 0)) +
ylab("Predicted probability of response")
# Manually plotting predicted values
predictedProbs2 = as_tibble(predict(clm_fit_interaction,
newdata = tibble(Condition = rep(c("NoBreak",
"Low",
"High"),
times = 2),
L1 = rep(c("English", "Spanish"),
each = 3)),
type = "prob")$fit)
# Add L1 and Condition columns to predictions:
pred_fit_clm2 = predictedProbs2 %>%
mutate(L1 = rep(c("English", "Spanish"), each = 3),
Condition = rep(c("NoBreak", "Low", "High"), times = 2))
# Wide-to-long transform tibble:
longPredictions = pred_fit_clm2 %>%
pivot_longer(names_to = "Certainty",
values_to = "Probability",
cols = `Not certain`:Certain)
# Plot model s predictions (load scales package first):
ggplot(data = longPredictions, aes(x = Certainty, y = Probability)) +
geom_hline(yintercept = 0.5, linetype = "dashed", color = "gray80") +
geom_line(aes(group = L1, linetype = L1)) +
geom_point(aes(fill = L1), shape = 21, size = 3) +
facet_grid(~Condition, labeller = "label_both") +
coord_cartesian(ylim = c(0,1)) +
scale_y_continuous(labels = percent_format()) +
scale_x_discrete(limits = c("Not certain", "Neutral", "Certain")) +
scale_fill_manual(values = c("white", "black"), guide = "none") +
scale_linetype_manual(values = c("dashed", "solid")) +
theme_classic() +
labs(linetype = "L1:") +
theme(legend.position = c(0.9,0.8),
legend.background = element_rect(color = "black"),
axis.text.x = element_text(angle = 25, hjust = 1))
# Loading in the matched guise data from Gabriella Licata hosted by Annie Helms @
# https://github.com/anniehelms/machine_learning_for_linguistics/blob/master/data/matched_guise.csv
mg_dat <- read.csv("data/matched_guise.csv")
# Coercing the Feminine voice rating to an ordered factor
mg_dat$Feminine_voice <- ordered(mg_dat$Feminine_voice)
# Getting a table of responses by GuiseGender
table(mg_dat$Feminine_voice, mg_dat$GuiseGender)
# Fitting the ordinal model
mg_fit_1 <- clmm(Feminine_voice ~
GuiseGender +
(1|Participant),
data = mg_dat)
# Inspecting our model that failed to fit
summary(mg_fit_1)
# Collapsing categories 2-6 into one bin
mg_dat$Feminine_voice_collapsed <- mg_dat$Feminine_voice
mg_dat$Feminine_voice_collapsed[mg_dat$Feminine_voice_collapsed > 1 & mg_dat$Feminine_voice_collapsed <= 6] <- 6
# Fitting an ordinal model with three categories instead of 7
mg_fit_2 <- clmm(Feminine_voice_collapsed ~
GuiseGender +
(1|Participant),
data = mg_dat,
nAGQ = 10)
# Inspecting our model fit to the collapsed middle categories
summary(mg_fit_2)
# Bayesian models----
# This section, which may not have been covered live during the workshop, shows us how to fit Bayesian ordinal models that allow for unequal variances
library(brms)
# Bayesian models----
# This section, which may not have been covered live during the workshop, shows us how to fit Bayesian ordinal models that allow for unequal variances
library(brms)
# We're going to take another rating category: Friendly
mg_dat$Friendly <- ordered(mg_dat$Friendly)
# This is the formula for a model with equal variance, it should look familiar
equal_variance_formula <- bf(
Friendly ~
GuiseLanguage +
GuiseGender +
GuiseLanguage:GuiseGender +
(1|Participant)
)
# This is the formula for a model with unequal variance. We allow the 'disc' parameter to vary by the same variables as the rating.
unequal_variance_formula <- bf(
Friendly ~
GuiseLanguage +
GuiseGender +
GuiseLanguage:GuiseGender +
(1|Participant)) +
lf(disc ~
GuiseLanguage +
GuiseGender +
GuiseLanguage:GuiseGender +
(1|Participant)
)
# Priors for unequal variance model
cumulative_prior_unequal <- c(
prior(normal(0,3), class = "Intercept"),
prior(normal(0,1), class = "Intercept", dpar = "disc"),
prior(normal(0,1), class = "b"),
prior(exponential(1), class = "sd")
)
# Fitting the model with equal variances
Friendly_ordinal_equal <- brm(equal_variance_formula,
data = mg_dat,
family = cumulative(),
prior = cumulative_prior_equal,
cores = 4,
save_pars = save_pars(all = TRUE),
control = list(adapt_delta = 0.95))
