Status

Estimated and checked against the book:

m5.1
m5.2
m5.3
m5.4
m5.5
m5.6
m5.7
m5.8
m5.9

Warning: there are small numerical differences, probably due to the difference between quap and Stan.

Libraries

library(tidyverse)
library(tidybayes)
library(rstan)
library(patchwork)
options(mc.cores = 4)

Section 5.1: Spurious association

Plausible regression lines implied by the priors:

We will estimate a series of regression models with a constant \(\alpha\) and regression coefficients \(\beta_k\), and these priors:

\[\alpha \sim N(0, .2)\] \[\beta_k \sim N(0, .5)\]

To see if these priors make sense, we can plot a few of the regression lines implied by these priors. To do this, we draw random numbers from the distributions above, and we plot the corresponding regression lines:

a = rnorm(50, 0, .2)
b = rnorm(50, 0, .5)
p <- ggplot()
for (i in 1:50) {
    p <- p + geom_abline(slope = b[i], intercept = a[i])
}
p + xlim(-3, 3) + ylim(-3, 3) +
    labs(x = 'Median age marriage (std)',
         y = 'Divorce rate (std)')

stan_program <- '
data {
  int<lower=1> n;   // number of observations
  int<lower=1> K;   // number of regressors (including constant)
  vector[n] Divorce;      // outcome
  matrix[n, K] X;   // regressors
}
parameters {
  real<lower=0,upper=50> sigma;    // scale
  vector[K] b;                     // coefficients (including constant)
}
transformed parameters {
  vector[n] mu;                    // location
  mu = X * b;
}
model {
  Divorce ~ normal(mu, sigma);    // probability model
  sigma ~ exponential(1);   // prior for scale
  b[1] ~ normal(0, 0.2);    // prior for intercept
  for (i in 2:K) {          // priors for coefficients
    b[i] ~ normal(0, 0.5);
  }
}
generated quantities {
  vector[n] yhat;           // predicted outcome
  for (i in 1:n) yhat[i] = normal_rng(mu[i], sigma);
}
'

standardize <- function(x) as.vector(scale(x))
WaffleDivorce <- read.csv('data/WaffleDivorce.csv', sep = ';') %>%
                 mutate(across(c(Divorce, Marriage, MedianAgeMarriage), standardize))
 
stan_data <- WaffleDivorce %>%
             compose_data(K = 2, 
                          y = Divorce,
                          X = model.matrix(~MedianAgeMarriage, .)) 

m5.1 <- stan(model_code = stan_program, data = stan_data)

stan_data <- WaffleDivorce %>%
             compose_data(K = 2, 
                          y = Divorce,
                          X = model.matrix(~Marriage, .)) 
m5.2 <- stan(model_code = stan_program, data = stan_data)

stan_data <- WaffleDivorce %>%
             compose_data(K = 3, 
                          y = Divorce,
                          X = model.matrix(~MedianAgeMarriage + Marriage, .)) 
m5.3 <- stan(model_code = stan_program, data = stan_data)

res5.1 <- m5.1 %>%
          spread_draws(b[i]) %>%
          mean_qi %>%
          mutate(i = c('Intercept', 'bA'),
                 Model = 'm5.1')
res5.2 <- m5.2 %>%
          spread_draws(b[i]) %>%
          mean_qi %>%
          mutate(i = c('Intercept', 'bM'),
                 Model = 'm5.2')
res5.3 <- m5.3 %>%
          spread_draws(b[i]) %>%
          mean_qi %>%
          mutate(i = c('Intercept', 'bA', 'bM'),
                 Model = 'm5.3')
res <- bind_rows(res5.1, res5.2, res5.3) %>%
       filter(i != 'Intercept')

ggplot(res, aes(x = b, y = Model, xmin = .lower, xmax = .upper)) +
    geom_vline(xintercept = 0, linetype = 'dotted') +
    geom_pointrange() +
    facet_grid(i ~ .) + 
    vincent::theme_vab()

Figure 5.5:

datplot <- m5.3 %>%
           gather_draws(mu[i]) %>% 
           mean_qi %>%
           rename(Predicted = .value)
datplot$Observed <- WaffleDivorce$Divorce

ggplot(datplot, aes(Observed, Predicted,
                    ymin = .lower, ymax = .upper)) +
    geom_pointrange() +
    geom_abline(intercept = 0, slope = 1, linetype = 'dashed') +
    labs(x = 'Observed divorce', y = 'Predicted divorce',
         title = 'Posterior predictive plot') +
    coord_fixed()

stan_program <- '
data {
  int<lower=1> n;   // number of observations
  int<lower=1> K;   // number of regressors (including constant)
  vector[n] y;      // outcome
  matrix[n, K] X;   // regressors
}
parameters {
  real<lower=0,upper=50> sigma;    // scale
  vector[K] b;                     // coefficients (including constant)
}
transformed parameters {
  vector[n] mu;                    // location
  mu = X * b;
}
model {
  y ~ normal(mu, sigma);    // probability model
  sigma ~ exponential(1);   // prior for scale
  b[1] ~ normal(0, 0.2);    // prior for intercept
  for (i in 2:K) {          // priors for coefficients
    b[i] ~ normal(0, 0.5);
  }
}
generated quantities {
  vector[n] yhat;           // predicted outcome
  for (i in 1:n) yhat[i] = normal_rng(mu[i], sigma);
}
'

stan_data <- WaffleDivorce %>%
             compose_data(K = 2, 
                          y = Marriage,
                          X = model.matrix(~MedianAgeMarriage, .)) 


m5.4 <- stan(model_code = stan_program, data = stan_data)

summary(m5.4, c('b', 'sigma'))$summary

##                mean     se_mean         sd       2.5%         25%           50%
## b[1]  -0.0001416567 0.001443138 0.08926377 -0.1746425 -0.05933226  0.0007328729
## b[2]  -0.6927474174 0.001881463 0.10018504 -0.8893169 -0.75989116 -0.6916729271
## sigma  0.7117689026 0.001375089 0.07330929  0.5851715  0.66020161  0.7046342284
##               75%      97.5%   n_eff      Rhat
## b[1]   0.05837478  0.1734897 3825.91 0.9995852
## b[2]  -0.62560113 -0.4957644 2835.40 1.0006305
## sigma  0.75675301  0.8689842 2842.21 1.0005442

Section 5.2: Masked relationship

dat <- read.csv('data/milk.csv', sep = ';') %>%
       mutate(K = standardize(kcal.per.g),
              N = standardize(neocortex.perc),
              M = standardize(log(mass))) %>%
       select(K, N, M) %>%
       drop_na
stan_data <- dat %>% compose_data

stan_program <- '
data {
    int n;
    vector[n] K;
    vector[n] N;
}
parameters {
    real a;
    real<lower=0> sigma;
    real bN;
}
model {
    vector[n] mu;
    mu = a + bN * N;
    K ~ normal(mu, sigma);
    a ~ normal(0, .2);
    bN ~ normal(0, 0.5);
    sigma ~ exponential(1);
}
'

m5.5 <- stan(model_code = stan_program, data = stan_data)

# m5.6
stan_program <- '
data {
    int n;
    vector[n] K;
    vector[n] M;
}
parameters {
    real a;
    real<lower=0> sigma;
    real bM;
}
model {
    vector[n] mu;
    mu = a + bM * M;
    K ~ normal(mu, sigma);
    a ~ normal(0, .2);
    bM ~ normal(0, 0.5);
    sigma ~ exponential(1);
}
'

m5.6 <- stan(model_code = stan_program, data = stan_data)

# m5.7
stan_program <- '
data {
    int n;
    vector[n] K;
    vector[n] N;
    vector[n] M;
}
parameters {
    real a;
    real<lower=0> sigma;
    real bM;
    real bN;
}
model {
    vector[n] mu;
    mu = a + bN * N + bM * M;
    K ~ normal(mu, sigma);
    a ~ normal(0, .2);
    bN ~ normal(0, 0.5);
    bM ~ normal(0, 0.5);
    sigma ~ exponential(1);
}
'

m5.7 <- stan(model_code = stan_program, data = stan_data)

plot(rethinking::coeftab(m5.5, m5.6, m5.7), pars = c('bM', 'bN'))

Section 5.3: Categorical variables

stan_program <- '
data {
  int<lower=1> n;
  vector[n] height;
  int sex[n];
}
parameters {
  real<lower=0,upper=50> sigma;
  vector[2] a;
}
transformed parameters {
  vector[n] mu;
  mu = a[sex];
}
model {
  height ~ normal(mu, sigma);
  sigma ~ uniform(0, 50);
  a[1] ~ normal(178, 20);
  a[2] ~ normal(178, 20);
}
generated quantities{
  real diff_fm;
  diff_fm = a[1] - a[2];
}
'

stan_data <- read.csv('data/Howell1.csv', sep = ';') %>%
             mutate(sex = ifelse(male == 1, 'Male', 'Female')) %>%
             compose_data

m5.8 <- stan(model_code = stan_program, data = stan_data)

summary(m5.8, c('diff_fm', 'sigma', 'a'))$summary

##               mean    se_mean        sd      2.5%        25%        50%
## diff_fm  -7.635007 0.03899825 2.3412006 -12.16659  -9.278128  -7.619819
## sigma    27.404630 0.01420678 0.8267147  25.88770  26.837593  27.374585
## a[1]    134.931610 0.02533222 1.5821872 131.94598 133.861086 134.910072
## a[2]    142.566617 0.02722007 1.7025623 139.30433 141.414317 142.555314
##                75%     97.5%    n_eff      Rhat
## diff_fm  -6.042655  -3.03303 3604.019 0.9996041
## sigma    27.955164  29.07944 3386.259 0.9999177
## a[1]    135.986472 138.02607 3900.939 0.9998798
## a[2]    143.717716 145.84471 3912.259 0.9995529

stan_program <- '
data {
  int n;
  vector[n] K;
  int clade[n];
}
parameters {
  real<lower=0,upper=50> sigma;
  vector[4] a;
}
transformed parameters {
  vector[n] mu;
  mu = a[clade];
}
model {
  K ~ normal(mu, sigma);
  sigma ~ uniform(0, 50);
  a ~ normal(0, .5);
}
'

stan_data <- read.csv('data/milk.csv', sep = ';') %>%
             mutate(K = as.vector(scale(kcal.per.g))) %>%
             select(K, clade) %>%
             compose_data

m5.9 <- stan(model_code = stan_program, data = stan_data)

summary(m5.9, 'a')$summary

##            mean     se_mean        sd        2.5%        25%        50%
## a[1] -0.4584094 0.003692173 0.2385145 -0.89689774 -0.6218417 -0.4655657
## a[2]  0.3485229 0.003507431 0.2466251 -0.14341270  0.1893011  0.3521133
## a[3]  0.6307122 0.003876439 0.2798779  0.07832889  0.4411852  0.6319814
## a[4] -0.5381555 0.004496708 0.3011391 -1.13292156 -0.7431130 -0.5376123
##             75%      97.5%    n_eff      Rhat
## a[1] -0.3025000 0.02129167 4173.165 1.0001576
## a[2]  0.5101727 0.81949315 4944.205 0.9992070
## a[3]  0.8261168 1.16735302 5212.801 0.9997222
## a[4] -0.3389208 0.05664975 4484.818 1.0004357

Statistical Rethinking 2: Chapter 5

Vincent Arel-Bundock

Status

Libraries

Section 5.1: Spurious association

Section 5.2: Masked relationship

Section 5.3: Categorical variables