\[Y = B_1X + B_2Z + B_0\]
set.seed(42)
n <- 200
# Uniform distribution of Ages
X <- runif(n, 7, 17)
Xc<-scale(X,scale=F)
# normal distrobution of IQ (100 +-15)
Z <- rnorm(n, 100, 15)
Zc<-scale(Z,scale=F)
# Our equation to create Y
Y <- 3*Xc + .2*Zc + .4*Xc*Zc +100 + rnorm(n, sd=15)
#Built our data frame
Reading.Data<-data.frame(Age=X,IQ=Z,ReadingSpeed=Y)
library(GGally)
DiagPlot <- ggpairs(Reading.Data,
lower = list(continuous = "smooth"))
DiagPlot

#Center
Reading.Data$Age.C<-scale(Reading.Data$Age, center = TRUE, scale = FALSE)[,]
Reading.Data$IQ.C<-scale(Reading.Data$IQ, center = TRUE, scale = FALSE)[,]
# Regressions
Centered.Read.1<-lm(ReadingSpeed~Age.C+IQ.C,Reading.Data)
Centered.Read.2<-lm(ReadingSpeed~Age.C*IQ.C,Reading.Data)
# Show results
library(stargazer)
stargazer(Centered.Read.1,Centered.Read.2,type="html",
column.labels = c("Main Effects", "Interaction"),
intercept.bottom = FALSE, single.row=TRUE,
notes.append = FALSE, header=FALSE)
| Dependent variable: | ||
| ReadingSpeed | ||
| Main Effects | Interaction | |
| (1) | (2) | |
| Constant | 99.441*** (1.616) | 99.363*** (1.009) |
| Age.C | 2.287*** (0.555) | 2.412*** (0.346) |
| IQ.C | 0.213* (0.112) | 0.234*** (0.070) |
| Age.C:IQ.C | 0.402*** (0.023) | |
| Observations | 200 | 200 |
| R2 | 0.095 | 0.649 |
| Adjusted R2 | 0.086 | 0.644 |
| Residual Std. Error | 22.858 (df = 197) | 14.267 (df = 196) |
| F Statistic | 10.326*** (df = 2; 197) | 120.907*** (df = 3; 196) |
| Note: | p<0.1; p<0.05; p<0.01 | |
ChangeInR<-anova(Centered.Read.1,Centered.Read.2)
knitr::kable(ChangeInR, digits=4)
| Res.Df | RSS | Df | Sum of Sq | F | Pr(>F) |
|---|---|---|---|---|---|
| 197 | 102930.36 | NA | NA | NA | NA |
| 196 | 39893.42 | 1 | 63036.93 | 309.7062 | 0 |
library(ggfortify)
autoplot(Centered.Read.1, which = 1:2, label.size = 1) + theme_bw()

autoplot(Centered.Read.2, which = 1:2, label.size = 1) + theme_bw()

library(effects)
Inter.1a<-effect(c("Age.C*IQ.C"), Centered.Read.2,
xlevels=list(Age.C=seq(-3,3, 1),
IQ.C=c(-15,0,15)))
knitr::kable(summary(Inter.1a)$effect, digits=4)
plot(Inter.1a, multiline = TRUE)
| -15 | 0 | 15 | |
|---|---|---|---|
| -3 | 106.7108 | 92.1283 | 77.5458 |
| -2 | 103.0896 | 94.5398 | 85.9900 |
| -1 | 99.4684 | 96.9513 | 94.4343 |
| 0 | 95.8471 | 99.3629 | 102.8786 |
| 1 | 92.2259 | 101.7744 | 111.3229 |
| 2 | 88.6047 | 104.1859 | 119.7671 |
| 3 | 84.9835 | 106.5974 | 128.2114 |

IQ.Quantile<-quantile(Reading.Data$IQ.C,probs=c(0,.25,.50,.75,1))
IQ.Quantile<-round(IQ.Quantile,2)
Inter.1b<-effect(c("Age.C*IQ.C"), Centered.Read.2,
xlevels=list(Age.C=seq(-3,3, 1),
IQ.C=IQ.Quantile))
plot(Inter.1b, multiline = TRUE)

IQ.SD<-c(mean(Reading.Data$IQ.C)-sd(Reading.Data$IQ.C),
mean(Reading.Data$IQ.C),
mean(Reading.Data$IQ.C)+sd(Reading.Data$IQ.C))
IQ.SD<-round(IQ.SD,2)
Inter.1c<-effect(c("Age.C*IQ.C"), Centered.Read.2,
xlevels=list(Age.C=seq(-3,3, 1),
IQ.C=IQ.SD))
plot(Inter.1c, multiline = TRUE)

X<-c(0,2,4,6,8,10)
Z<-c(0,2,4,6,8,10)
XZ<-X*Z
plot(XZ)

X.C<-scale(X, center = TRUE, scale = FALSE)[,]
Z.C<-scale(Z, center = TRUE, scale = FALSE)[,]
XZ.C<-X.C*Z.C
plot(XZ.C)

Correlations: \(r_{x_c,z_c}\) = 1, \(r_{x_c,xz_c}\) = 0, \(r_{z_c,xz_c}\) = 0
Let’s apply this to our class example
See below where I manually multiply the values and you can see a very strong positive slope
Left = Raw Score, Right plot = Centered Score


Uncentered.Read.1<-lm(ReadingSpeed~IQ+Age,Reading.Data)
Uncentered.Read.2<-lm(ReadingSpeed~IQ*Age,Reading.Data)
| Dependent variable: | ||
| ReadingSpeed | ||
| Main Effects | Interaction | |
| (1) | (2) | |
| Constant | 50.328*** (13.134) | 534.570*** (28.711) |
| IQ | 0.213* (0.112) | -4.681*** (0.287) |
| Age | 2.287*** (0.555) | -37.512*** (2.288) |
| IQ:Age | 0.402*** (0.023) | |
| Observations | 200 | 200 |
| R2 | 0.095 | 0.649 |
| Adjusted R2 | 0.086 | 0.644 |
| Residual Std. Error | 22.858 (df = 197) | 14.267 (df = 196) |
| F Statistic | 10.326*** (df = 2; 197) | 120.907*** (df = 3; 196) |
| Note: | p<0.1; p<0.05; p<0.01 | |
vif(Uncentered.Read.2)
## IQ Age IQ:Age
## 16.70086 43.64111 59.58237
vif(Centered.Read.2)
## Age.C IQ.C Age.C:IQ.C
## 1.000440 1.000316 1.000716
set.seed(42)
# 250 people
n <- 250
#Training (low to high)
X <- runif(n, -10, 10)
# normal distribution of Challenge Difficulty
Z <- rnorm(n, 0, 15)
# Our equation to create Y
Y <- 2.2*X + 2.46*Z + .75*X*Z + 16 + rnorm(n, sd=50)
#Built our data frame
Skill.Data<-data.frame(Training=X,Challenge=Z,Performance=Y)
#run our regression
Skill.Model.1<-lm(Performance~Training+Challenge,Skill.Data)
Skill.Model.2<-lm(Performance~Training*Challenge,Skill.Data)

| Dependent variable: | ||
| Performance | ||
| Main Effects | Interaction | |
| (1) | (2) | |
| Constant | 14.238*** (4.909) | 16.173*** (3.268) |
| Training | 0.895 (0.843) | 1.542*** (0.562) |
| Challenge | 2.855*** (0.354) | 2.600*** (0.236) |
| Training:Challenge | 0.762*** (0.043) | |
| Observations | 250 | 250 |
| R2 | 0.210 | 0.652 |
| Adjusted R2 | 0.204 | 0.648 |
| Residual Std. Error | 77.510 (df = 247) | 51.571 (df = 246) |
| F Statistic | 32.867*** (df = 2; 247) | 153.486*** (df = 3; 246) |
| Note: | p<0.1; p<0.05; p<0.01 | |
rockchalk library to visualize our
interaction and test our simple slopeslibrary(rockchalk)
m1ps <- plotSlopes(Skill.Model.2, modx = "Challenge", plotx = "Training", n=3, modxVals="std.dev")
m1psts <- testSlopes(m1ps)
round(m1psts$hypotests,4)
## Values of Challenge OUTSIDE this interval:
## lo hi
## -3.5050807 -0.5730182
## cause the slope of (b1 + b2*Challenge)Training to be statistically significant
## "Challenge" slope Std. Error t value Pr(>|t|)
## (m-sd) -14.50 -9.5013 0.8131 -11.6848 0.0000
## (m) -0.61 1.0771 0.5611 1.9196 0.0561
## (m+sd) 13.28 11.6554 0.8282 14.0737 0.0000

m1pQ <- plotSlopes(Skill.Model.2, modx = "Challenge", plotx = "Training", n=5, modxVals="quantile")
m1pQts <- testSlopes(m1pQ)
round(m1pQts$hypotests,4)
## Values of Challenge OUTSIDE this interval:
## lo hi
## -3.5050807 -0.5730182
## cause the slope of (b1 + b2*Challenge)Training to be statistically significant
## "Challenge" slope Std. Error t value Pr(>|t|)
## 0% -40.4989 -29.3016 1.7993 -16.2847 0.0000
## 25% -10.5190 -6.4694 0.6990 -9.2555 0.0000
## 50% -0.7985 0.9335 0.5610 1.6640 0.0974
## 75% 8.8382 8.2726 0.6994 11.8278 0.0000
## 100% 36.8939 29.6394 1.7214 17.2183 0.0000

set.seed(42)
n <- 250
#Fear of Health
X <- scale(runif(n, 1, 9), scale=F)
#Centered Self-Efficacy
Z <- scale(runif(n, 0, 15), scale=F)
# Our equation to create Y
Y <- -.05*X - .20*X^2 + .15*X*Z + .22*(X^2)*Z+ 35 + rnorm(n, sd=10)
#Built our data frame
Smoke.Data<-data.frame(Smoking=Y,Health=X,SelfEfficacy=Z)
#run our regression
Smoke.Model.1<-lm(Smoking~poly(Health,2)+SelfEfficacy,Smoke.Data)
Smoke.Model.2<-lm(Smoking~poly(Health,2)*SelfEfficacy,Smoke.Data)
Smoke.Model.1.R<-lm(Smoking~poly(Health,2, raw=T)+SelfEfficacy,Smoke.Data)
Smoke.Model.2.R<-lm(Smoking~poly(Health,2, raw=T)*SelfEfficacy,Smoke.Data)
| Dependent variable: | ||
| Smoking | ||
| Main Effects | Interaction | |
| (1) | (2) | |
| Constant | 33.263*** (0.680) | 33.493*** (0.621) |
| poly(Health, 2)1 | 1.010 (10.786) | 3.083 (9.813) |
| poly(Health, 2)2 | -9.037 (10.766) | -6.505 (9.795) |
| SelfEfficacy | 1.188*** (0.155) | 1.189*** (0.141) |
| poly(Health, 2)1:SelfEfficacy | 3.042 (2.218) | |
| poly(Health, 2)2:SelfEfficacy | 16.361*** (2.288) | |
| Observations | 250 | 250 |
| R2 | 0.197 | 0.341 |
| Adjusted R2 | 0.187 | 0.328 |
| Residual Std. Error | 10.759 (df = 246) | 9.781 (df = 244) |
| F Statistic | 20.058*** (df = 3; 246) | 25.297*** (df = 5; 244) |
| Note: | p<0.1; p<0.05; p<0.01 | |
| Dependent variable: | ||
| Smoking | ||
| Main Effects Non-Ortho | Interaction Non-Ortho | |
| (1) | (2) | |
| Constant | 33.909*** (1.027) | 33.958*** (0.934) |
| poly(Health, 2, raw = T)1 | 0.009 (0.294) | 0.070 (0.268) |
| poly(Health, 2, raw = T)2 | -0.119 (0.142) | -0.086 (0.129) |
| SelfEfficacy | 1.188*** (0.155) | 0.020 (0.217) |
| poly(Health, 2, raw = T)1:SelfEfficacy | 0.116* (0.060) | |
| poly(Health, 2, raw = T)2:SelfEfficacy | 0.216*** (0.030) | |
| Observations | 250 | 250 |
| R2 | 0.197 | 0.341 |
| Adjusted R2 | 0.187 | 0.328 |
| Residual Std. Error | 10.759 (df = 246) | 9.781 (df = 244) |
| F Statistic | 20.058*** (df = 3; 246) | 25.297*** (df = 5; 244) |
| Note: | p<0.1; p<0.05; p<0.01 | |
effects package as I showed
you above and last weekSmokeInterPlot <- plotCurves(Smoke.Model.2,
modx = "SelfEfficacy", plotx = "Health",
n=3,modxVals="std.dev")

plotCurves(Smoke.Model.1, plotx = "Health")

Smoke.Model.1.L<-lm(Smoking~Health+SelfEfficacy,Smoke.Data)
Smoke.Model.2.L<-lm(Smoking~Health*SelfEfficacy,Smoke.Data)
| Dependent variable: | ||
| Smoking | ||
| Main Effects | Interaction | |
| (1) | (2) | |
| Constant | 33.263*** (0.680) | 33.334*** (0.680) |
| Health | 0.028 (0.293) | 0.042 (0.292) |
| SelfEfficacy | 1.193*** (0.155) | 1.207*** (0.155) |
| Health:SelfEfficacy | 0.097 (0.066) | |
| Observations | 250 | 250 |
| R2 | 0.194 | 0.201 |
| Adjusted R2 | 0.188 | 0.192 |
| Residual Std. Error | 10.752 (df = 247) | 10.727 (df = 246) |
| F Statistic | 29.770*** (df = 2; 247) | 20.666*** (df = 3; 246) |
| Note: | p<0.1; p<0.05; p<0.01 | |
ChangeInR.Smoke<-anova(Smoke.Model.1.L,Smoke.Model.2.L)
knitr::kable(ChangeInR.Smoke, digits=4)
| Res.Df | RSS | Df | Sum of Sq | F | Pr(>F) |
|---|---|---|---|---|---|
| 247 | 28557.09 | NA | NA | NA | NA |
| 246 | 28306.63 | 1 | 250.4599 | 2.1766 | 0.1414 |
SmokeInterPlot.Linear <- plotSlopes(Smoke.Model.2.L, modx = "SelfEfficacy", plotx = "Health", n=3, modxVals="std.dev")

stargazer(Smoke.Model.2.L,Smoke.Model.2,type="html",
column.labels = c("Linear", "Poly"),
intercept.bottom = FALSE,
single.row=TRUE,
notes.append = FALSE,
header=FALSE)
| Dependent variable: | ||
| Smoking | ||
| Linear | Poly | |
| (1) | (2) | |
| Constant | 33.334*** (0.680) | 33.493*** (0.621) |
| Health | 0.042 (0.292) | |
| poly(Health, 2)1 | 3.083 (9.813) | |
| poly(Health, 2)2 | -6.505 (9.795) | |
| SelfEfficacy | 1.207*** (0.155) | 1.189*** (0.141) |
| Health:SelfEfficacy | 0.097 (0.066) | |
| poly(Health, 2)1:SelfEfficacy | 3.042 (2.218) | |
| poly(Health, 2)2:SelfEfficacy | 16.361*** (2.288) | |
| Observations | 250 | 250 |
| R2 | 0.201 | 0.341 |
| Adjusted R2 | 0.192 | 0.328 |
| Residual Std. Error | 10.727 (df = 246) | 9.781 (df = 244) |
| F Statistic | 20.666*** (df = 3; 246) | 25.297*** (df = 5; 244) |
| Note: | p<0.1; p<0.05; p<0.01 | |
ChangeInR.Smoke.2<-anova(Smoke.Model.2,Smoke.Model.2.L)
knitr::kable(ChangeInR.Smoke.2, digits=4)
| Res.Df | RSS | Df | Sum of Sq | F | Pr(>F) |
|---|---|---|---|---|---|
| 244 | 23341.19 | NA | NA | NA | NA |
| 246 | 28306.63 | -2 | -4965.442 | 25.9534 | 0 |
autoplot(Smoke.Model.2, which = 1:2, label.size = 1) + theme_bw()

autoplot(Smoke.Model.2.L, which = 1:2, label.size = 1) + theme_bw()
