#Set seed so your answers are all the same
set.seed(9)
# Sample Per class room people
n1 <- 20; n2 <- 20; n3 <- 20; n4 <- 20
N<-n1+n2+n3+n4 # Total N
# Uniform distrobution of proportion of time per classroom
X1 <- runif(n1, 0, .35)
X2 <- runif(n2, .3, .55)
X3 <- runif(n3, .5, .75)
X4 <- runif(n4, .7,1.0)
# noise per classroom
e1 <- rnorm(n1, 0, sd=2.5)
e2 <- rnorm(n2, 0, sd=2.5)
e3 <- rnorm(n3, 0, sd=2.5)
e4 <- rnorm(n4, 0, sd=2.5)
# Intercepts per classroom
B0.1 <- 80
B0.2 <- 70
B0.3 <- 60
B0.4 <- 50
# Same slope per classroom
B1=10
# Our equation to create Y for each classroom
Y1 = B1*scale(X1,scale=F) + B0.1 + e1
Y2 = B1*scale(X2,scale=F) + B0.2 + e2
Y3 = B1*scale(X3,scale=F) + B0.3 + e3
Y4 = B1*scale(X4,scale=F) + B0.4 + e4
# Merge classrooms into 1 data.frame
Math.Data<-data.frame(Math=c(Y1,Y2,Y3,Y4),ActiveTime=c(X1,X2,X3,X4),
Classroom=c(rep("C1",n1),rep("C2",n2),rep("C3",n3),rep("C4",n4)),
StudentID=as.factor(1:N))
library(ggplot2)
theme_set(theme_bw())
ClassRoom.Plot.1 <-ggplot(data = Math.Data,
aes(x = ActiveTime, y=Math))+ #scaffold
geom_point()+ # add layer of scatterplot
geom_smooth(method = "lm", se = TRUE)+ # add regression line
xlab("Proportion of Time Engaged in Active Learning")+
ylab("Math Score") # add labels
ClassRoom.Plot.1 #call plot
library(stargazer)
Class.All<-lm(Math~ActiveTime, data = Math.Data)
stargazer(Class.All,type="latex",
intercept.bottom = FALSE, single.row=TRUE,
star.cutoffs=c(.05,.01,.001), notes.append = FALSE,
header=FALSE)
ClassRoom.Plot.2 <-ggplot(data = Math.Data,
aes(x = ActiveTime, y=Math))+
geom_point(aes(colour = Classroom, shape=Classroom))+ # we add color by cluster
geom_smooth(method = "lm", se = TRUE)+
xlab("Proportion of Time Engaged in Active Learning")+
ylab("Math Score")+ # add labels
theme(legend.position = "top")
ClassRoom.Plot.2
ClassRoom.Plot <-ggplot(data = Math.Data,
aes(x = ActiveTime, y=Math))+
geom_point(aes(colour = Classroom, shape=Classroom))+
geom_smooth(method = "lm", se = TRUE, aes(group = Classroom))+# we add group level
xlab("Proportion of Time Engaged in Active Learning")+ylab("Math Score")+ # add labels
theme(legend.position = "top")
ClassRoom.Plot
\[ Y_i = \beta_1X_1 +\beta_0 + \epsilon\]
Within each Group/Cluster (Students = \(i\) within Classroom 1 = \(j\), and we would replace \(j\) with \(g\) for Classroom 2 and so forth for each classroom!) \[y_{ij} = B_{1j}X_{ij} + B_{0j} + r_{ij} \]
Where \(B_{0j}\) = intercept in group j
Where \(B_{ij}\) = slope of students in group j
Where \(r_{ij}\) = residuals of each i (student) within group j
\[B_{0j} = \gamma_{00}+u_{0j} \]
\[B_{1j} = \gamma_{10}+u_{1j} \]
The degree of clustering is measured via the inter-class correlation ICC
The proportion of total variance that is between the groups \[ ICC = \frac{\tau}{\tau + \sigma^2}\]
\(\tau\) = variance in a variable due to differences between a group
\(\sigma^2\) = total variance across groups
We would fit this on the null model (no level 1 predictors)
library(lme4)
Model.Null<-lmer(Math ~1+(1|Classroom),
data=Math.Data, REML=FALSE)
summary(Model.Null)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: Math ~ 1 + (1 | Classroom)
## Data: Math.Data
##
## AIC BIC logLik deviance df.resid
## 408.6 415.7 -201.3 402.6 77
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.34755 -0.72263 0.01011 0.73079 2.21975
##
## Random effects:
## Groups Name Variance Std.Dev.
## Classroom (Intercept) 126.884 11.264
## Residual 6.668 2.582
## Number of obs: 80, groups: Classroom, 4
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 64.70 5.64 11.47
ranef(Model.Null)
## $Classroom
## (Intercept)
## C1 14.959676
## C2 5.307215
## C3 -5.190059
## C4 -15.076832
##
## with conditional variances for "Classroom"
sjstats
packagesjstats::icc(Model.Null)
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.950
## Unadjusted ICC: 0.950
Model.1<-lmer(Math ~ActiveTime+(1|Classroom),
data=Math.Data, REML=FALSE)
summary(Model.1)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: Math ~ ActiveTime + (1 | Classroom)
## Data: Math.Data
##
## AIC BIC logLik deviance df.resid
## 399.3 408.8 -195.6 391.3 76
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.13111 -0.77650 -0.04481 0.61218 2.50097
##
## Random effects:
## Groups Name Variance Std.Dev.
## Classroom (Intercept) 200.00 14.142
## Residual 5.61 2.368
## Number of obs: 80, groups: Classroom, 4
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 58.862 7.261 8.107
## ActiveTime 11.269 3.140 3.589
##
## Correlation of Fixed Effects:
## (Intr)
## ActiveTime -0.224
library(effects)
Results.Model.1<-Effect(c("ActiveTime"),Model.1,
xlevels=list(ActiveTime=seq(0,1,.2)))
Results.Model.1<-as.data.frame(Results.Model.1)
Final.Fixed.Plot.1 <-ggplot(data = Results.Model.1,
aes(x = ActiveTime, y =fit))+
geom_line(size=2)+
coord_cartesian(xlim = c(0, 1),ylim = c(0, 100))+
geom_ribbon(aes(ymin=fit-se, ymax=fit+se),alpha=.2)+
xlab("Proportion of Time Engaged in Active Learning")+
ylab("Math Score")+
theme(legend.position = "none")
Final.Fixed.Plot.1
#Fixed only, removed random effects
Math.Data$Model.1.Fitted<-predict(Model.1, re.form=NA)
ClassRoom.Plot.Bad <-ggplot(data = Math.Data)+
coord_cartesian(xlim = c(0, 1),ylim = c(0, 100))+
geom_point( aes(x = ActiveTime, y=Math,
colour = Classroom, shape=Classroom))+
geom_line(aes(x = ActiveTime, y=Model.1.Fitted))+
xlab("Proportion of Time Engaged in Active Learning")+
ylab("Math Score")+
theme(legend.position = "top")
ClassRoom.Plot.Bad
\[ R^2_{Marginal} = \frac{\sigma^2_{fixed}} {\sigma^2_{fixed}+\sigma^2_{random}+\sigma^2_{residual}} \]
library(performance)
r2_nakagawa(Model.1)
## # R2 for Mixed Models
##
## Conditional R2: 0.974
## Marginal R2: 0.044
The Marginal \(R^2\) is terrible.
Often you cannot simply add a scatterplot. However, there are ways around this (but you end up plotting every classroom)
There is a fast and dirty method (but it will not work well when you have lots of control variables)
# Fixed + Random effects
Math.Data$Model.1.Fitted.Random<-predict(Model.1)
ClassRoom.Plot.Better <-ggplot(data = Math.Data)+
coord_cartesian(xlim = c(0, 1),ylim = c(0, 100))+
geom_point(aes(x = ActiveTime, y=Math,
colour = Classroom, shape=Classroom))+
geom_line(aes(x = ActiveTime, y=Model.1.Fitted.Random,
colour = Classroom))+
xlab("Proportion of Time Engaged in Active Learning")+
ylab("Math Score")+
theme(legend.position = "top")
ClassRoom.Plot.Better
This is what the model basically fit (controlling for the random effect)!
a more accurate way to graph it is show the data in its clusters
ClassRoom.Plot.Best <-ggplot(data = Math.Data, group=Classroom)+
facet_wrap(~Classroom)+
coord_cartesian(xlim = c(0, 1),ylim = c(25, 100))+
geom_point(aes(x = ActiveTime, y=Math,
colour = Classroom, shape=Classroom))+
geom_line(aes(x = ActiveTime, y=Model.1.Fitted.Random))+
xlab("Proportion of Time Engaged in Active Learning")+
ylab("Math Score")+
theme(legend.position = "top")
ClassRoom.Plot.Best
\[ R^2_{conditional} = \frac{\sigma^2_{fixed}+\sigma^2_{random}} {\sigma^2_{fixed}+\sigma^2_{random}+\sigma^2_{residual}} \]
r2_nakagawa(Model.1)
## # R2 for Mixed Models
##
## Conditional R2: 0.974
## Marginal R2: 0.044
anova(Model.Null,Model.1)
## Data: Math.Data
## Models:
## Model.Null: Math ~ 1 + (1 | Classroom)
## Model.1: Math ~ ActiveTime + (1 | Classroom)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## Model.Null 3 408.59 415.74 -201.30 402.59
## Model.1 4 399.27 408.80 -195.64 391.27 11.321 1 0.0007662 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Math.Data$ActiveTime.C<-scale(Math.Data$ActiveTime, center=TRUE, scale=FALSE)
Model.1.GM<-lmer(Math ~ActiveTime.C+(1|Classroom),
data=Math.Data, REML=FALSE)
summary(Model.1.GM)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: Math ~ ActiveTime.C + (1 | Classroom)
## Data: Math.Data
##
## AIC BIC logLik deviance df.resid
## 399.3 408.8 -195.6 391.3 76
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.13111 -0.77650 -0.04481 0.61218 2.50097
##
## Random effects:
## Groups Name Variance Std.Dev.
## Classroom (Intercept) 200.00 14.142
## Residual 5.61 2.368
## Number of obs: 80, groups: Classroom, 4
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 64.704 7.076 9.144
## ActiveTime.C 11.269 3.140 3.589
##
## Correlation of Fixed Effects:
## (Intr)
## ActiveTim.C 0.000
Math.Data$Model.1.Fitted.Random.GM<-predict(Model.1.GM) #plots fixed Random effects
ClassRoom.Plot.Better <-ggplot()+
coord_cartesian(xlim = c(-.5,.5),ylim = c(0, 100))+
geom_point(data = Math.Data, aes(x = ActiveTime.C, y=Math,
colour = Classroom, shape=Classroom))+
geom_line(data = Math.Data, aes(x = ActiveTime.C, y=Model.1.Fitted.Random.GM,
colour = Classroom))+
xlab("Proportion of Time Engaged in Active Learning\n(Grand Mean Centered)")+
ylab("Math Score")+
theme(legend.position = "top")
ClassRoom.Plot.Better
### Rescale IV
library(dplyr)
Math.Data<-Math.Data %>%
group_by(Classroom) %>%
mutate(ActiveTime.Class.Centered = ActiveTime - mean(ActiveTime))
Model.1.GC<-lmer(Math ~ActiveTime.Class.Centered+(1|Classroom),
data=Math.Data, REML=FALSE)
summary(Model.1.GC)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: Math ~ ActiveTime.Class.Centered + (1 | Classroom)
## Data: Math.Data
##
## AIC BIC logLik deviance df.resid
## 397.4 406.9 -194.7 389.4 76
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.16057 -0.78325 -0.04672 0.62451 2.50992
##
## Random effects:
## Groups Name Variance Std.Dev.
## Classroom (Intercept) 126.937 11.267
## Residual 5.606 2.368
## Number of obs: 80, groups: Classroom, 4
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 64.704 5.640 11.473
## ActiveTime.Class.Centered 11.990 3.159 3.795
##
## Correlation of Fixed Effects:
## (Intr)
## ActvTm.Cl.C 0.000
Math.Data$Model.1.Fitted.Random.GC<-predict(Model.1.GC) #plots fixed Random effects
ClassRoom.Plot.Better <-ggplot()+
coord_cartesian(xlim = c(-.3,.3),ylim = c(0, 100))+
geom_point(data = Math.Data, aes(x = ActiveTime.Class.Centered, y=Math,
colour = Classroom, shape=Classroom))+
geom_line(data = Math.Data, aes(x = ActiveTime.Class.Centered, y=Model.1.Fitted.Random.GC,
colour = Classroom))+
xlab("Proportion of Time Engaged in Active Learning\n(Centered per Classroom)")+
ylab("Math Score")+
theme(legend.position = "top")
ClassRoom.Plot.Better