Lazy Contrasts

Phillip M. Alday

Last update: 20 September 2019

Contrasts matter even without interaction

Chicken food

data("chickwts")
head(chickwts)
##   weight      feed
## 1    179 horsebean
## 2    160 horsebean
## 3    136 horsebean
## 4    227 horsebean
## 5    217 horsebean
## 6    168 horsebean

Default contrast functions are horrible at names

Treatment (dummy) coding

summary(lm(weight ~ 1 + feed, data=chickwts))
## 
## Call:
## lm(formula = weight ~ 1 + feed, data = chickwts)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -123.909  -34.413    1.571   38.170  103.091 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    323.583     15.834  20.436  < 2e-16 ***
## feedhorsebean -163.383     23.485  -6.957 2.07e-09 ***
## feedlinseed   -104.833     22.393  -4.682 1.49e-05 ***
## feedmeatmeal   -46.674     22.896  -2.039 0.045567 *  
## feedsoybean    -77.155     21.578  -3.576 0.000665 ***
## feedsunflower    5.333     22.393   0.238 0.812495    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 54.85 on 65 degrees of freedom
## Multiple R-squared:  0.5417, Adjusted R-squared:  0.5064 
## F-statistic: 15.36 on 5 and 65 DF,  p-value: 5.936e-10

Sum (effect) coding

contrasts(chickwts$feed) <- contr.sum(levels(chickwts$feed))
summary(lm(weight ~ 1 + feed, data=chickwts))
## 
## Call:
## lm(formula = weight ~ 1 + feed, data = chickwts)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -123.909  -34.413    1.571   38.170  103.091 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  259.131      6.543  39.602  < 2e-16 ***
## feed1         64.452     14.490   4.448 3.47e-05 ***
## feed2        -98.931     15.601  -6.341 2.48e-08 ***
## feed3        -40.381     14.490  -2.787  0.00697 ** 
## feed4         17.778     15.005   1.185  0.24042    
## feed5        -12.703     13.641  -0.931  0.35519    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 54.85 on 65 degrees of freedom
## Multiple R-squared:  0.5417, Adjusted R-squared:  0.5064 
## F-statistic: 15.36 on 5 and 65 DF,  p-value: 5.936e-10

car provides better names and a nice reminder of what contrast you’re using

Treatment (dummy) coding

suppressPackageStartupMessages(library("car"))
contrasts(chickwts$feed) <- contr.Treatment(levels(chickwts$feed))
summary(lm(weight ~ 1 + feed, data=chickwts))
## 
## Call:
## lm(formula = weight ~ 1 + feed, data = chickwts)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -123.909  -34.413    1.571   38.170  103.091 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        323.583     15.834  20.436  < 2e-16 ***
## feed[T.horsebean] -163.383     23.485  -6.957 2.07e-09 ***
## feed[T.linseed]   -104.833     22.393  -4.682 1.49e-05 ***
## feed[T.meatmeal]   -46.674     22.896  -2.039 0.045567 *  
## feed[T.soybean]    -77.155     21.578  -3.576 0.000665 ***
## feed[T.sunflower]    5.333     22.393   0.238 0.812495    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 54.85 on 65 degrees of freedom
## Multiple R-squared:  0.5417, Adjusted R-squared:  0.5064 
## F-statistic: 15.36 on 5 and 65 DF,  p-value: 5.936e-10

Sum (effect) coding

contrasts(chickwts$feed) <- contr.Sum(levels(chickwts$feed))
summary(lm(weight ~ 1 + feed, data=chickwts))
## 
## Call:
## lm(formula = weight ~ 1 + feed, data = chickwts)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -123.909  -34.413    1.571   38.170  103.091 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        259.131      6.543  39.602  < 2e-16 ***
## feed[S.casein]      64.452     14.490   4.448 3.47e-05 ***
## feed[S.horsebean]  -98.931     15.601  -6.341 2.48e-08 ***
## feed[S.linseed]    -40.381     14.490  -2.787  0.00697 ** 
## feed[S.meatmeal]    17.778     15.005   1.185  0.24042    
## feed[S.soybean]    -12.703     13.641  -0.931  0.35519    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 54.85 on 65 degrees of freedom
## Multiple R-squared:  0.5417, Adjusted R-squared:  0.5064 
## F-statistic: 15.36 on 5 and 65 DF,  p-value: 5.936e-10

Interaction makes everything harder

Tenure-Track Faculty in the US

suppressPackageStartupMessages(library("carData"))
data(Salaries)
head(Salaries)
##        rank discipline yrs.since.phd yrs.service  sex salary
## 1      Prof          B            19          18 Male 139750
## 2      Prof          B            20          16 Male 173200
## 3  AsstProf          B             4           3 Male  79750
## 4      Prof          B            45          39 Male 115000
## 5      Prof          B            40          41 Male 141500
## 6 AssocProf          B             6           6 Male  97000

Contrasts matter for ANOVA!

Treatment (dummy) coding

contrasts(Salaries$rank) <- contr.Treatment(levels(Salaries$rank))
contrasts(Salaries$sex) <- contr.Treatment(levels(Salaries$sex))
contrasts(Salaries$discipline) <- contr.Treatment(levels(Salaries$discipline))
Anova(lm(salary ~ 1 + discipline * rank * sex, data=Salaries),type=3)
## Anova Table (Type III tests)
## 
## Response: salary
##                         Sum Sq  Df F value    Pr(>F)    
## (Intercept)         3.1916e+10   1 61.5463 4.309e-14 ***
## discipline          3.4557e+08   1  0.6664  0.414816    
## rank                6.0927e+09   2  5.8746  0.003068 ** 
## sex                 8.0354e+06   1  0.0155  0.901000    
## discipline:rank     3.5697e+08   2  0.3442  0.709011    
## discipline:sex      1.7226e+06   1  0.0033  0.954069    
## rank:sex            3.4341e+08   2  0.3311  0.718322    
## discipline:rank:sex 1.3239e+08   2  0.1277  0.880195    
## Residuals           1.9965e+11 385                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Sum (effect) coding

contrasts(Salaries$rank) <- contr.Sum(levels(Salaries$rank))
contrasts(Salaries$sex) <- contr.Sum(levels(Salaries$sex))
contrasts(Salaries$discipline) <- contr.Sum(levels(Salaries$discipline))
Anova(lm(salary ~ 1 + discipline * rank * sex, data=Salaries),type=3)
## Anova Table (Type III tests)
## 
## Response: salary
##                         Sum Sq  Df   F value    Pr(>F)    
## (Intercept)         1.1569e+12   1 2230.9109 < 2.2e-16 ***
## discipline          8.5575e+09   1   16.5023 5.891e-05 ***
## rank                5.5310e+10   2   53.3296 < 2.2e-16 ***
## sex                 7.3907e+08   1    1.4252    0.2333    
## discipline:rank     5.4277e+08   2    0.5233    0.5930    
## discipline:sex      3.6989e+08   1    0.7133    0.3989    
## rank:sex            2.3139e+08   2    0.2231    0.8001    
## discipline:rank:sex 1.3239e+08   2    0.1277    0.8802    
## Residuals           1.9965e+11 385                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Type I

anova(lm(salary ~ 1 + discipline * rank * sex, data=Salaries))
## Analysis of Variance Table
## 
## Response: salary
##                      Df     Sum Sq    Mean Sq  F value    Pr(>F)    
## discipline            1 8.8508e+09 8.8508e+09  17.0679 4.427e-05 ***
## rank                  2 1.5281e+11 7.6405e+10 147.3408 < 2.2e-16 ***
## sex                   1 6.9407e+08 6.9407e+08   1.3384    0.2480    
## discipline:rank       2 5.2587e+08 2.6293e+08   0.5070    0.6027    
## discipline:sex        1 4.2147e+08 4.2147e+08   0.8128    0.3679    
## rank:sex              2 2.1849e+08 1.0925e+08   0.2107    0.8101    
## discipline:rank:sex   2 1.3239e+08 6.6196e+07   0.1277    0.8802    
## Residuals           385 1.9965e+11 5.1856e+08                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Type II

Anova(lm(salary ~ 1 + discipline * rank * sex, data=Salaries),type=2)
## Anova Table (Type II tests)
## 
## Response: salary
##                         Sum Sq  Df  F value    Pr(>F)    
## discipline          1.8475e+10   1  35.6269 5.428e-09 ***
## rank                1.4524e+11   2 140.0446 < 2.2e-16 ***
## sex                 7.5876e+08   1   1.4632    0.2272    
## discipline:rank     4.7483e+08   2   0.4578    0.6330    
## discipline:sex      4.6197e+08   1   0.8909    0.3458    
## rank:sex            2.1849e+08   2   0.2107    0.8101    
## discipline:rank:sex 1.3239e+08   2   0.1277    0.8802    
## Residuals           1.9965e+11 385                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Type III

Anova(lm(salary ~ 1 + discipline * rank * sex, data=Salaries),type=3)
## Anova Table (Type III tests)
## 
## Response: salary
##                         Sum Sq  Df   F value    Pr(>F)    
## (Intercept)         1.1569e+12   1 2230.9109 < 2.2e-16 ***
## discipline          8.5575e+09   1   16.5023 5.891e-05 ***
## rank                5.5310e+10   2   53.3296 < 2.2e-16 ***
## sex                 7.3907e+08   1    1.4252    0.2333    
## discipline:rank     5.4277e+08   2    0.5233    0.5930    
## discipline:sex      3.6989e+08   1    0.7133    0.3989    
## rank:sex            2.3139e+08   2    0.2231    0.8001    
## discipline:rank:sex 1.3239e+08   2    0.1277    0.8802    
## Residuals           1.9965e+11 385                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Thinking about effects (and not significance) is the key to understanding

Estimating effects

suppressPackageStartupMessages(library("effects"))

pay <- lm(salary ~ 1 + discipline * rank * sex, data=Salaries)
pay.eff <- allEffects(pay)
summary(pay.eff)
##  model: salary ~ 1 + discipline * rank * sex
## 
##  discipline*rank*sex effect
## , , sex = Female
## 
##           rank
## discipline AsstProf AssocProf     Prof
##          A 72933.33  72128.50 109631.9
##          B 84189.80  99435.67 131836.2
## 
## , , sex = Male
## 
##           rank
## discipline AsstProf AssocProf     Prof
##          A 74269.61  85048.86 120619.3
##          B 84647.08 101621.53 133518.4
## 
## 
##  Lower 95 Percent Confidence Limits
## , , sex = Female
## 
##           rank
## discipline AsstProf AssocProf      Prof
##          A 54654.83  49742.00  93802.23
##          B 64166.71  81157.17 117677.74
## 
## , , sex = Male
## 
##           rank
## discipline AsstProf AssocProf     Prof
##          A 63716.51  75503.23 116582.2
##          B 77383.94  93706.71 129513.7
## 
## 
##  Upper 95 Percent Confidence Limits
## , , sex = Female
## 
##           rank
## discipline  AsstProf AssocProf     Prof
##          A  91211.83   94515.0 125461.5
##          B 104212.89  117714.2 145994.7
## 
## , , sex = Male
## 
##           rank
## discipline AsstProf AssocProf     Prof
##          A 84822.71   94594.5 124656.3
##          B 91910.22  109536.4 137523.0

As a dataframe

as.data.frame(pay.eff[["discipline:rank:sex"]])
##    discipline      rank    sex       fit        se     lower     upper
## 1           A  AsstProf Female  72933.33  9296.619  54654.83  91211.83
## 2           B  AsstProf Female  84189.80 10183.936  64166.71 104212.89
## 3           A AssocProf Female  72128.50 11385.986  49742.00  94515.00
## 4           B AssocProf Female  99435.67  9296.619  81157.17 117714.17
## 5           A      Prof Female 109631.87  8051.108  93802.23 125461.52
## 6           B      Prof Female 131836.20  7201.130 117677.74 145994.66
## 7           A  AsstProf   Male  74269.61  5367.405  63716.51  84822.71
## 8           B  AsstProf   Male  84647.08  3694.102  77383.94  91910.22
## 9           A AssocProf   Male  85048.86  4855.001  75503.23  94594.50
## 10          B AssocProf   Male 101621.53  4025.554  93706.71 109536.35
## 11          A      Prof   Male 120619.26  2053.280 116582.22 124656.31
## 12          B      Prof   Male 133518.36  2036.787 129513.74 137522.98

Plotting effects I: using plot()

See also ?plot.eff

plot(pay.eff,
     multiline=TRUE,
     ci.style='auto',
     z.var='sex',
     main='Effects as modelled',
     ylab='salary (USD)')

Plotting effects II: using ggplot2

suppressPackageStartupMessages(library("ggplot2"))
pay.eff.df <- as.data.frame(pay.eff[["discipline:rank:sex"]])

ggplot(pay.eff.df, aes(x=rank,y=fit,ymax=upper,ymin=lower,color=sex, group=sex)) +
  geom_pointrange() + 
  geom_line() + 
  facet_grid(~discipline) + 
  theme_light() + 
  labs(title="Effects as modelled",y='salary (USD)')

Posthoc testing and pairwise comparison

Estimated marginal means

Make sure to check out the extensive documentation!

suppressPackageStartupMessages(library("emmeans"))
chicks <- lm(weight ~ 1 + feed, data=chickwts)
chicks.em <- emmeans(chicks, ~ feed)
summary(chicks.em)
##  feed      emmean   SE df lower.CL upper.CL
##  casein       324 15.8 65      292      355
##  horsebean    160 17.3 65      126      195
##  linseed      219 15.8 65      187      250
##  meatmeal     277 16.5 65      244      310
##  soybean      246 14.7 65      217      276
##  sunflower    329 15.8 65      297      361
## 
## Confidence level used: 0.95

Plotting

plot(chicks.em)

Pairwise differences

pairs(chicks.em)
##  contrast              estimate   SE df t.ratio p.value
##  casein - horsebean      163.38 23.5 65  6.957  <.0001 
##  casein - linseed        104.83 22.4 65  4.682  0.0002 
##  casein - meatmeal        46.67 22.9 65  2.039  0.3325 
##  casein - soybean         77.15 21.6 65  3.576  0.0084 
##  casein - sunflower       -5.33 22.4 65 -0.238  0.9999 
##  horsebean - linseed     -58.55 23.5 65 -2.493  0.1413 
##  horsebean - meatmeal   -116.71 24.0 65 -4.870  0.0001 
##  horsebean - soybean     -86.23 22.7 65 -3.797  0.0042 
##  horsebean - sunflower  -168.72 23.5 65 -7.184  <.0001 
##  linseed - meatmeal      -58.16 22.9 65 -2.540  0.1277 
##  linseed - soybean       -27.68 21.6 65 -1.283  0.7933 
##  linseed - sunflower    -110.17 22.4 65 -4.920  0.0001 
##  meatmeal - soybean       30.48 22.1 65  1.379  0.7391 
##  meatmeal - sunflower    -52.01 22.9 65 -2.271  0.2207 
##  soybean - sunflower     -82.49 21.6 65 -3.823  0.0039 
## 
## P value adjustment: tukey method for comparing a family of 6 estimates

Plotting

plot(pairs(chicks.em))

Understanding interactions through conditioning

pay <- lm(salary ~ 1 + discipline * rank * sex, data=Salaries)
pay.em <- emmeans(pay, ~ sex | rank)
## NOTE: Results may be misleading due to involvement in interactions
summary(pay.em)
## rank = AsstProf:
##  sex    emmean   SE  df lower.CL upper.CL
##  Female  78562 6895 385    65006    92117
##  Male    79458 3258 385    73053    85864
## 
## rank = AssocProf:
##  sex    emmean   SE  df lower.CL upper.CL
##  Female  85782 7350 385    71332   100233
##  Male    93335 3153 385    87135    99535
## 
## rank = Prof:
##  sex    emmean   SE  df lower.CL upper.CL
##  Female 120734 5401 385   110115   131353
##  Male   127069 1446 385   124226   129912
## 
## Results are averaged over the levels of: discipline 
## Confidence level used: 0.95

Pairwise differences

pairs(pay.em)
## rank = AsstProf:
##  contrast      estimate   SE  df t.ratio p.value
##  Female - Male     -897 7626 385 -0.118  0.9064 
## 
## rank = AssocProf:
##  contrast      estimate   SE  df t.ratio p.value
##  Female - Male    -7553 7998 385 -0.944  0.3455 
## 
## rank = Prof:
##  contrast      estimate   SE  df t.ratio p.value
##  Female - Male    -6335 5591 385 -1.133  0.2579 
## 
## Results are averaged over the levels of: discipline

Plotting

plot(pairs(pay.em))

emmeans isn’t a substitute for thinking

boom.

(pairs(emmeans(pay, ~  sex * discipline | rank)))
## rank = AsstProf:
##  contrast            estimate    SE  df t.ratio p.value
##  Female,A - Male,A      -1336 10735 385 -0.124  0.9993 
##  Female,A - Female,B   -11256 13789 385 -0.816  0.8467 
##  Female,A - Male,B     -11714 10004 385 -1.171  0.6456 
##  Male,A - Female,B      -9920 11512 385 -0.862  0.8245 
##  Male,A - Male,B       -10377  6516 385 -1.593  0.3840 
##  Female,B - Male,B       -457 10833 385 -0.042  1.0000 
## 
## rank = AssocProf:
##  contrast            estimate    SE  df t.ratio p.value
##  Female A - Male A     -12920 12378 385 -1.044  0.7237 
##  Female A - Female B   -27307 14699 385 -1.858  0.2482 
##  Female A - Male B     -29493 12077 385 -2.442  0.0710 
##  Male A - Female B     -14387 10488 385 -1.372  0.5179 
##  Male A - Male B       -16573  6307 385 -2.628  0.0441 
##  Female B - Male B      -2186 10131 385 -0.216  0.9964 
## 
## rank = Prof:
##  contrast            estimate    SE  df t.ratio p.value
##  Female A - Male A     -10987  8309 385 -1.322  0.5492 
##  Female A - Female B   -22204 10802 385 -2.056  0.1697 
##  Female A - Male B     -23886  8305 385 -2.876  0.0220 
##  Male A - Female B     -11217  7488 385 -1.498  0.4397 
##  Male A - Male B       -12899  2892 385 -4.460  0.0001 
##  Female B - Male B      -1682  7484 385 -0.225  0.9960 
## 
## P value adjustment: tukey method for comparing a family of 4 estimates

Session Info

sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 19.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.2.20.so
## 
## locale:
##  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
##  [5] LC_MONETARY=nl_NL.UTF-8    LC_MESSAGES=en_GB.UTF-8   
##  [7] LC_PAPER=nl_NL.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=nl_NL.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] emmeans_1.3.4 ggplot2_3.1.1 effects_4.1-1 car_3.0-3     carData_3.0-2
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_0.2.5  xfun_0.7          reshape2_1.4.3   
##  [4] purrr_0.3.2       mitools_2.4       splines_3.6.1    
##  [7] haven_2.1.0       lattice_0.20-38   colorspace_1.4-1 
## [10] htmltools_0.3.6   yaml_2.2.0        survival_2.44-1.1
## [13] rlang_0.4.0       pillar_1.4.1      nloptr_1.2.1     
## [16] withr_2.1.2       glue_1.3.1        foreign_0.8-71   
## [19] DBI_1.0.0         readxl_1.3.1      plyr_1.8.4       
## [22] stringr_1.4.0     munsell_0.5.0     gtable_0.3.0     
## [25] cellranger_1.1.0  zip_2.0.2         mvtnorm_1.0-10   
## [28] coda_0.19-2       evaluate_0.14     labeling_0.3     
## [31] knitr_1.23        rio_0.5.16        forcats_0.4.0    
## [34] curl_3.3          Rcpp_1.0.1        xtable_1.8-4     
## [37] scales_1.0.0      abind_1.4-5       lme4_1.1-21      
## [40] hms_0.4.2         digest_0.6.19     stringi_1.4.3    
## [43] openxlsx_4.1.0.1  dplyr_0.8.2       survey_3.36      
## [46] grid_3.6.1        tools_3.6.1       magrittr_1.5     
## [49] lazyeval_0.2.2    tibble_2.1.3      crayon_1.3.4     
## [52] pkgconfig_2.0.2   MASS_7.3-51.4     Matrix_1.2-17    
## [55] data.table_1.12.2 estimability_1.3  assertthat_0.2.1 
## [58] minqa_1.2.4       rmarkdown_1.13    R6_2.4.0         
## [61] boot_1.3-23       nnet_7.3-12       nlme_3.1-141     
## [64] compiler_3.6.1