Status

Estimated and checked against the book:

Libraries

library(tidyverse)
library(tidybayes)
library(rstan)
library(rethinking)
options(mc.cores = 4)

Was the R number generator different when the book was written? I know it changed recently.

Section 11.1: Binomial regression

chimpanzees <- read.csv('data/chimpanzees.csv', sep = ';') %>%
               mutate(treatment = factor(1 + prosoc_left + 2 * condition),
                      actor = factor(actor))

stan_data <- compose_data(chimpanzees,
                          n_actor = n_distinct(actor),
                          n_treatment = n_distinct(treatment),
                          n_predictions = n_actor * n_treatment)

stan_program <- '
data {
  int<lower=1> n;                       
  int<lower=0, upper=1> pulled_left[n];
}
parameters {
  real a;
}
transformed parameters{
  real<lower=0, upper=1> p;
  p = inv_logit(a);
}
model {
  a ~ normal(1, 10);
  pulled_left ~ binomial(1, p);
}
'

m11.1 <- stan(model_code = stan_program, data = stan_data)
m11.1
## Inference for Stan model: 0d32a53f4416b3d692f12e641f7e57ce.
## 4 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=4000.
## 
##         mean se_mean   sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
## a       0.32    0.00 0.09    0.15    0.26    0.32    0.38    0.50  1531    1
## p       0.58    0.00 0.02    0.54    0.56    0.58    0.59    0.62  1530    1
## lp__ -343.47    0.02 0.69 -345.45 -343.64 -343.20 -343.02 -342.97  1635    1
## 
## Samples were drawn using NUTS(diag_e) at Thu Jul 30 16:47:23 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).
stan_program <- '
data {
  int<lower=1> n;                       
  int<lower=1> n_treatment;
  int treatment[n];
  int<lower=0, upper=1> pulled_left[n];
}
parameters {
  real a;
  real b[n_treatment];
}
model {
  vector[n] p;
  for (i in 1:n) {
      p[i] = inv_logit(a + b[treatment[i]]);
  }
  pulled_left ~ binomial(1, p);
  a ~ normal(0, 1.5);
  b ~ normal(0, 10);
}
'

m11.2 <- stan(model_code = stan_program, data = stan_data, iter = 5000)
m11.2
## Inference for Stan model: 542bceea740a1a0cacf7886bab8d15db.
## 4 chains, each with iter=5000; warmup=2500; thin=1; 
## post-warmup draws per chain=2500, total post-warmup draws=10000.
## 
##         mean se_mean   sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
## a       0.03    0.04 1.36   -2.73   -0.87    0.04    0.95    2.64   984    1
## b[1]    0.16    0.04 1.37   -2.49   -0.76    0.16    1.07    2.94   999    1
## b[2]    0.63    0.04 1.38   -2.02   -0.31    0.63    1.55    3.45   992    1
## b[3]   -0.12    0.04 1.38   -2.74   -1.05   -0.13    0.78    2.67   993    1
## b[4]    0.53    0.04 1.37   -2.14   -0.41    0.52    1.43    3.31   993    1
## lp__ -340.01    0.03 1.57 -343.95 -340.82 -339.68 -338.86 -337.96  2581    1
## 
## Samples were drawn using NUTS(diag_e) at Thu Jul 30 16:47:51 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).
stan_program <- '
data {
  int<lower=1> n;                       
  int<lower=1> n_treatment;
  int treatment[n];
  int<lower=0, upper=1> pulled_left[n];
}
parameters {
  real a;
  real b[n_treatment];
}
model {
  vector[n] p;
  for (i in 1:n) {
      p[i] = inv_logit(a + b[treatment[i]]);
  }
  pulled_left ~ binomial(1, p);
  a ~ normal(0, 1.5);
  b ~ normal(0, 0.5);
}
'

m11.3 <- stan(model_code = stan_program, data = stan_data)
m11.3
## Inference for Stan model: 389c3a48fcf1f316a2b5b9f37cb43289.
## 4 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=4000.
## 
##         mean se_mean   sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
## a       0.31    0.01 0.26   -0.22    0.14    0.32    0.49    0.81  1045    1
## b[1]   -0.10    0.01 0.29   -0.65   -0.29   -0.10    0.10    0.46  1281    1
## b[2]    0.31    0.01 0.29   -0.26    0.11    0.31    0.51    0.89  1316    1
## b[3]   -0.36    0.01 0.29   -0.92   -0.56   -0.37   -0.17    0.23  1317    1
## b[4]    0.22    0.01 0.29   -0.35    0.02    0.22    0.42    0.80  1230    1
## lp__ -340.74    0.05 1.64 -344.85 -341.58 -340.39 -339.53 -338.59  1249    1
## 
## Samples were drawn using NUTS(diag_e) at Thu Jul 30 16:48:15 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).
stan_program <- '
data {
  int n;                       
  int n_treatment;                       
  int n_actor;
  int n_predictions;
  int actor[n];
  int pulled_left[n];
  int treatment[n];
}
parameters {
  real a[n_actor];
  real b[n_treatment];
}
transformed parameters{
  vector[n] p;
  for (i in 1:n) {
      p[i] = a[actor[i]] + b[treatment[i]];
      p[i] = inv_logit(p[i]);
  }
}
model {
  a ~ normal(0, 1.5);
  b ~ normal(0, 0.5);
  pulled_left ~ binomial(1, p);
}
generated quantities {
  real db13;
  real db24;
  db13 = b[1] - b[3];
  db24 = b[2] - b[4];
}
'

m11.4 <- stan(model_code = stan_program, data = stan_data)
summary(m11.4, c('db13', 'db24', 'a', 'b'))$summary
##             mean     se_mean        sd        2.5%         25%         50%
## db13  0.33978708 0.002752338 0.2737174 -0.20213968  0.15547749  0.33999884
## db24  0.10675078 0.002812325 0.2684722 -0.42099594 -0.07712545  0.10794224
## a[1] -0.47414059 0.008513451 0.3258588 -1.10438153 -0.70017191 -0.47837755
## a[2]  3.87262474 0.013531678 0.7621662  2.54814102  3.33943808  3.81853055
## a[3] -0.77078069 0.008958458 0.3347200 -1.43663966 -0.99571161 -0.76866744
## a[4] -0.77022171 0.008471211 0.3329607 -1.43757493 -0.99207514 -0.76615290
## a[5] -0.47228561 0.009158546 0.3266721 -1.11857169 -0.69070446 -0.47064538
## a[6]  0.45250904 0.009143663 0.3334373 -0.20345227  0.22436392  0.45729835
## a[7]  1.94065766 0.009469053 0.4180691  1.16273368  1.64969575  1.93128967
## b[1] -0.02102044 0.008084390 0.2784338 -0.56828780 -0.20690326 -0.02767337
## b[2]  0.50032127 0.008479632 0.2769592 -0.04658395  0.31601074  0.50133166
## b[3] -0.36080752 0.008484236 0.2834964 -0.93779783 -0.55529992 -0.35840625
## b[4]  0.39357050 0.007987089 0.2832683 -0.16710179  0.20795255  0.39521477
##             75%      97.5%    n_eff      Rhat
## db13  0.5196912  0.8822937 9890.112 1.0002163
## db24  0.2941861  0.6336666 9113.129 0.9995266
## a[1] -0.2539259  0.1533291 1465.034 1.0018820
## a[2]  4.3529708  5.5067614 3172.459 1.0011609
## a[3] -0.5492308 -0.1183743 1396.037 1.0022991
## a[4] -0.5479805 -0.1056836 1544.880 1.0025301
## a[5] -0.2548355  0.1587154 1272.246 1.0020621
## a[6]  0.6766061  1.1251589 1329.805 1.0019349
## a[7]  2.2139860  2.7991523 1949.319 0.9998115
## b[1]  0.1639017  0.5346906 1186.177 1.0026502
## b[2]  0.6888201  1.0429767 1066.787 1.0035372
## b[3] -0.1607954  0.1893780 1116.528 1.0030303
## b[4]  0.5826269  0.9391196 1257.821 1.0023122
datplot <- m11.4 %>% 
           spread_draws(a[actor]) %>%
           mean_qi() %>%
           mutate(actor = factor(actor, 7:1))
ggplot(datplot, aes(a, actor, xmin = .lower, xmax = .upper)) +
    geom_vline(xintercept = 0, linetype = 'dashed') +
    geom_pointrange()

datplot <- m11.4 %>% 
           spread_draws(b[treatment]) %>%
           mean_qi() %>%
           mutate(treatment = c("R/N", "L/N", "R/P", "L/P")[treatment])
ggplot(datplot, aes(b, treatment, xmin = .lower, xmax = .upper)) +
    geom_vline(xintercept = 0, linetype = 'dashed') +
    geom_pointrange()

datplot <- m11.4 %>% 
           gather_draws(db13, db24) %>%
           median_qi()
ggplot(datplot, aes(.value, .variable, xmin = .lower, xmax = .upper)) +
  geom_vline(linetype = 'dashed', xintercept = 0) +
  geom_pointrange() + 
  labs(x = 'Value', y = '')

tmp <- chimpanzees %>% mutate(i = 1:n())
datplot <- m11.4 %>% 
           spread_draws(p[i]) %>%
           mean_qi() %>%
           left_join(tmp, by = 'i')  %>%
           mutate(treatment = c('R/N', 'L/N', 'R/P', 'L/P')[treatment],
                  treatment = factor(treatment, levels = c('R/N', 'L/N', 'R/P', 'L/P')))

ggplot(datplot, aes(treatment, p, ymin = .lower, ymax = .upper, label = treatment)) +
  geom_text() +
  geom_hline(linetype = 'dashed', yintercept = 0.5) +
  geom_linerange(alpha = .1) + 
  facet_grid(~ actor) +
  labs(x = 'Value', y = '') +
  theme_classic()

stan_data <- read.csv('data/chimpanzees.csv', sep = ';') %>%
             mutate(treatment = factor(1 + prosoc_left + 2 * condition),
                    side = prosoc_left + 1,
                    cond = condition + 1) %>%
             compose_data
stan_data$n_actor <- n_distinct(stan_data$actor)
stan_data$n_side <- n_distinct(stan_data$side)
stan_data$n_treatment <- n_distinct(stan_data$treatment)
stan_data$n_cond <- n_distinct(stan_data$cond)

stan_program <- '
data {
  int n;
  int n_actor;
  int n_treatment;
  int n_side;
  int treatment[n];
  int side[n];
  int actor[n];
  int pulled_left[n];
}
parameters {
  real a[n_actor];
  real bc[n_treatment];
  real bs[n_side];
}
model {
  vector[n] p;
  a ~ normal(0, 1.5);
  bs ~ normal(0, 0.5);
  bc ~ normal(0, 0.5);
  for (i in 1:n) {
      p[i] = inv_logit(a[actor[i]] + bc[treatment[i]] + bs[side[i]]);
  }
  pulled_left ~ binomial(1, p);
}
'
m11.5 <- stan(model_code = stan_program, data = stan_data)
m11.5
## Inference for Stan model: 4ffa7ac322f82107a6ec9447ab65c223.
## 4 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=4000.
## 
##          mean se_mean   sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
## a[1]    -0.59    0.01 0.40   -1.38   -0.86   -0.59   -0.32    0.21  1253    1
## a[2]     3.82    0.02 0.79    2.44    3.27    3.79    4.31    5.52  2264    1
## a[3]    -0.89    0.01 0.41   -1.71   -1.16   -0.89   -0.62   -0.05  1207    1
## a[4]    -0.88    0.01 0.42   -1.68   -1.17   -0.88   -0.59   -0.05  1360    1
## a[5]    -0.58    0.01 0.41   -1.41   -0.86   -0.58   -0.30    0.20  1448    1
## a[6]     0.35    0.01 0.42   -0.48    0.06    0.35    0.63    1.14  1447    1
## a[7]     1.84    0.01 0.48    0.92    1.50    1.83    2.16    2.79  1780    1
## bc[1]    0.14    0.01 0.34   -0.56   -0.09    0.15    0.36    0.80  2299    1
## bc[2]    0.25    0.01 0.34   -0.43    0.02    0.25    0.48    0.93  2300    1
## bc[3]   -0.21    0.01 0.35   -0.89   -0.44   -0.20    0.03    0.47  2225    1
## bc[4]    0.14    0.01 0.34   -0.53   -0.10    0.14    0.36    0.82  2269    1
## bs[1]   -0.08    0.01 0.38   -0.83   -0.33   -0.07    0.18    0.67  1787    1
## bs[2]    0.40    0.01 0.38   -0.35    0.16    0.41    0.66    1.12  1372    1
## lp__  -268.65    0.06 2.61 -274.57 -270.22 -268.33 -266.72 -264.67  1683    1
## 
## Samples were drawn using NUTS(diag_e) at Thu Jul 30 16:49:23 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).
chimpanzees <- read.csv('data/chimpanzees.csv', sep = ';') %>%
               mutate(treatment = factor(1 + prosoc_left + 2 * condition),
                      actor = factor(actor),
                      side = prosoc_left + 1,
                      cond = condition + 1) %>%
               group_by(treatment, actor, side, cond) %>%
               summarize(left_pulls = sum(pulled_left))
stan_data <- compose_data(chimpanzees)

stan_program <- '
data {
  int n;
  int n_actor;
  int n_treatment;
  int treatment[n];
  int actor[n];
  int left_pulls[n];
}
parameters {
  real a[n_actor];
  real b[n_treatment];
}
model {
  vector[n] p;
  a ~ normal(0, 1.5);
  b ~ normal(0, 0.5);
  for (i in 1:n) {
      p[i] = inv_logit(a[actor[i]] + b[treatment[i]]);
  }
  left_pulls ~ binomial(18, p);
}
'
m11.6 <- stan(model_code = stan_program, data = stan_data)
m11.6
## Inference for Stan model: ced38b76d37e0991c969d2c1efb01352.
## 4 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=4000.
## 
##         mean se_mean   sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
## a[1]   -0.45    0.01 0.33   -1.10   -0.68   -0.45   -0.23    0.19  1195 1.01
## a[2]    3.88    0.01 0.75    2.59    3.35    3.83    4.35    5.55  3892 1.00
## a[3]   -0.75    0.01 0.33   -1.40   -0.98   -0.74   -0.52   -0.12  1300 1.00
## a[4]   -0.75    0.01 0.34   -1.39   -0.98   -0.75   -0.53   -0.10  1181 1.00
## a[5]   -0.46    0.01 0.33   -1.12   -0.68   -0.46   -0.23    0.20  1329 1.00
## a[6]    0.47    0.01 0.34   -0.17    0.24    0.46    0.69    1.16  1197 1.00
## a[7]    1.95    0.01 0.42    1.14    1.67    1.95    2.23    2.81  1875 1.00
## b[1]   -0.04    0.01 0.28   -0.59   -0.23   -0.03    0.15    0.53  1143 1.01
## b[2]    0.49    0.01 0.28   -0.07    0.30    0.49    0.68    1.04  1088 1.01
## b[3]   -0.38    0.01 0.29   -0.94   -0.57   -0.38   -0.18    0.18  1200 1.00
## b[4]    0.38    0.01 0.28   -0.17    0.19    0.38    0.56    0.94  1068 1.00
## lp__ -268.38    0.06 2.41 -274.01 -269.75 -268.07 -266.62 -264.70  1616 1.00
## 
## Samples were drawn using NUTS(diag_e) at Thu Jul 30 16:49:51 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).
UCBadmit <- read.csv('data/UCBadmit.csv', sep = ';') %>%
            rename(rejection = reject,
                   applicant_gender = applicant.gender) %>%
            mutate(ratio = admit / applications)
stan_data <- compose_data(UCBadmit)

stan_program_A <- '
data {
  int n;
  int n_dept;
  int admit[n];
  int applications[n];
  int applicant_gender[n];
}
parameters {
  real a[2];
}
transformed parameters {
  vector[n] p;
  for (i in 1:n) {
    p[i] = inv_logit(a[applicant_gender[i]]);
  }
}
model {
  a ~ normal(0, 1.5);
  for (i in 1:n) {
    admit[i] ~ binomial(applications[i], p[i]);
  }
}
'

stan_program_B <- '
data {
  int n;
  int n_dept;
  int admit[n];
  int applications[n];
  int applicant_gender[n];
  int dept[n];
}
parameters {
  real a[2];
  real b[n_dept];
}
transformed parameters {
  vector[n] p;
  for (i in 1:n) {
    p[i] = inv_logit(a[applicant_gender[i]] + b[dept[i]]);
  }
}
model {
  a ~ normal(0, 1.5);
  b ~ normal(0, 1.5);
  for (i in 1:n) {
    admit[i] ~ binomial(applications[i], p[i]);
  }
}
'

m11.7 <- stan(model_code = stan_program_A, data = stan_data)
m11.8 <- stan(model_code = stan_program_B, data = stan_data)
## Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
## Running the chains for more iterations may help. See
## http://mc-stan.org/misc/warnings.html#bulk-ess
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## http://mc-stan.org/misc/warnings.html#tail-ess
tabA <- UCBadmit %>%
        mutate(`Model A` = summary(m11.7, 'p')$summary[, 1])
tabB <- UCBadmit %>%
        mutate(`Model B` = summary(m11.8, 'p')$summary[, 1])
tab <- left_join(tabA, tabB) %>%
       rename(Observed = ratio) %>% 
       pivot_longer(cols = c(6:8))
## Joining, by = c("dept", "applicant_gender", "admit", "rejection", "applications", "ratio")
ggplot(tab, aes(applicant_gender, value, color = name, shape = applicant_gender), alpha = .8) +
    geom_point(size = 4) +
    facet_grid(. ~ dept) +
    labs(x = '', y = 'Probability of admission', color = '', shape = '')

summary(m11.7, c('a'))$summary
##            mean      se_mean         sd       2.5%        25%        50%
## a[1] -0.8288174 0.0008623703 0.05071067 -0.9307523 -0.8625765 -0.8273782
## a[2] -0.2199545 0.0006190380 0.03798484 -0.2967999 -0.2444081 -0.2197677
##             75%      97.5%    n_eff      Rhat
## a[1] -0.7939883 -0.7345118 3457.889 0.9995449
## a[2] -0.1944101 -0.1465911 3765.183 1.0008260
summary(m11.8, c('a', 'b'))$summary
##            mean    se_mean        sd         2.5%        25%        50%
## a[1] -0.4114608 0.02927185 0.5258947 -1.488974603 -0.7522778 -0.3916633
## a[2] -0.5069791 0.02911166 0.5260336 -1.589490657 -0.8542305 -0.4904988
## b[1]  1.0877493 0.02908198 0.5288973  0.037398841  0.7355244  1.0729910
## b[2]  1.0430005 0.02886238 0.5312410  0.007296877  0.6916325  1.0295871
## b[3] -0.1724636 0.02916085 0.5271089 -1.218096153 -0.5211557 -0.1861541
## b[4] -0.2056184 0.02912147 0.5276633 -1.240926966 -0.5561663 -0.2197746
## b[5] -0.6486266 0.02960073 0.5305610 -1.672269772 -1.0108539 -0.6629060
## b[6] -2.2043918 0.02912214 0.5342614 -3.268600724 -2.5603734 -2.2215414
##              75%      97.5%    n_eff     Rhat
## a[1] -0.07012997  0.6172907 322.7731 1.005186
## a[2] -0.16384130  0.5257637 326.5075 1.004974
## b[1]  1.43793009  2.1566946 330.7461 1.004632
## b[2]  1.39023209  2.1179640 338.7810 1.004729
## b[3]  0.17702100  0.8999195 326.7385 1.005075
## b[4]  0.14198578  0.8756693 328.3123 1.004961
## b[5] -0.30510684  0.4419948 321.2667 1.004869
## b[6] -1.84619253 -1.1359632 336.5588 1.005312

Section 11.2: Poisson regression

kline <- read.csv('data/kline.csv', sep = ';') %>%
         mutate(P = as.vector(scale(log(population))))
stan_data <- compose_data(kline)

stan_program <- '
data {
  int n;
  int total_tools[n];
}
parameters {
  real a;
}
model {
  vector[n] lambda;
  a ~ normal(3, 0.5);
  for (i in 1:n) {
    lambda[i] = exp(a);
  }
  total_tools ~ poisson(lambda);
}
'
m11.9 <- stan(model_code = stan_program, data = stan_data)

stan_program <- '
data {
  int n;
  int n_contact;
  int contact[n];
  real P[n];
  real Pnew[100];
  int total_tools[n];
}
parameters {
  real a[n_contact];
  real b[n_contact];
}
model {
  vector[n] lambda;
  for (i in 1:n) {
    lambda[i] = exp(a[contact[i]] + b[contact[i]] * P[i]);
  }
  a ~ normal(3, 0.5);
  b ~ normal(0, 0.2);
  total_tools ~ poisson(lambda);
}
generated quantities {
  real yhat[100, 2];
  for (i in 1:100) {
    for (j in 1:2) {
      yhat[i, j] = exp(a[j] + b[j] * Pnew[i]);
    }
  }
}
'
stan_data$Pnew <- seq(min(stan_data$P), max(stan_data$P), length.out = 100)
m11.10 <- stan(model_code = stan_program, data = stan_data)

datplot <- m11.10 %>%
           gather_draws(yhat[idx, contact]) %>%
           median_qi() %>%
           left_join(tibble(idx = 1:100, 'Population' = stan_data$Pnew), by = 'idx') %>%
           mutate(contact = ifelse(contact == 1, 'Low', 'High')) %>%
           rename(Tools = .value)

ggplot(datplot, aes(Population, Tools, linetype = contact, ymax = .upper, ymin = .lower, fill = contact)) +
    geom_ribbon(alpha = .2) +
    geom_line() +
    xlab('log population (std)')

stan_program <- '
data {
  int n;
  int n_contact;
  int contact[n];
  real population[n];
  int total_tools[n];
}
parameters {
  real a[n_contact];
  real<lower=0> b[n_contact];
  real<lower=0> g;
}
model {
  vector[n] lambda;
  for (i in 1:n) {
    lambda[i] = exp(a[contact[i]]) * population[i]^b[contact[i]] / g;
  }
  a ~ normal(1, 1);
  b ~ exponential(1);
  g ~ exponential(1);
  total_tools ~ poisson(lambda);
}
'

m11.11 <- stan(model_code = stan_program, data = stan_data)
m11.11
## Inference for Stan model: 90f6e2e00d5cca2ed41805ce7f861f47.
## 4 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=4000.
## 
##        mean se_mean   sd   2.5%    25%    50%    75%  97.5% n_eff Rhat
## a[1]   0.96    0.02 0.85  -0.69   0.39   0.94   1.52   2.69  1576    1
## a[2]   0.89    0.02 0.68  -0.53   0.44   0.91   1.36   2.17  1426    1
## b[1]   0.28    0.00 0.11   0.07   0.21   0.29   0.36   0.49  1147    1
## b[2]   0.26    0.00 0.03   0.19   0.24   0.26   0.28   0.33  2011    1
## g      1.13    0.02 0.76   0.22   0.58   0.93   1.46   3.08  1757    1
## lp__ 910.75    0.06 1.75 906.45 909.85 911.13 912.03 912.99   821    1
## 
## Samples were drawn using NUTS(diag_e) at Thu Jul 30 16:52:10 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

Section 11.2.2

old <- data.frame('monastery' = 0,
                  'exposure' = 1,
                  'y' = rpois(30, 1.5))
new <- data.frame('monastery' = 1,
                  'exposure' = 7,
                  'y' = rpois(4, 0.5 * 7))
monasteries <- rbind(old, new) %>%
               mutate(log_days = log(exposure))
stan_data <- compose_data(monasteries)

stan_program <- '
data {
  int<lower=1> n;
  real<lower=0> log_days[n];
  int<lower=0, upper=1> monastery[n];
  int y[n];
}
parameters {
  real a;
  real b;
}
model {
  real lambda[n];
  for (i in 1:n) {
    lambda[i] = exp(log_days[i] + a + b * monastery[i]);
  }
  y ~ poisson(lambda);
  a ~ normal(0, 1);
  b ~ normal(0, 1);
}
generated quantities {
  real Old;
  real New;
  Old = exp(a);
  New = exp(a + b);
}
'

m11.12 <- stan(model_code = stan_program, data = stan_data)

datplot <- m11.12 %>% 
           gather_draws(Old, New)

ggplot(datplot, aes(.value, .variable)) + 
    stat_halfeye() +
    labs(x = 'Posterior distribution of monastic productivity', y = '')

Section 11.3: Multinomial and categorical models

Results for models m11.13 and m11.14 differ from the book but are identical to those obtained by running the rethink replication code. Perhaps something changed in R random number generator between the publication of the book and my replication.

# simulate career choices among 500 individuals
N <- 500             # number of individuals
income <- c(1,2,5)   # expected income of each career
score <- 0.5*income  # scores for each career, based on income

# next line converts scores to probabilities
p <- softmax(score[1],score[2],score[3])

# now simulate choice
# outcome career holds event type values, not counts
career <- rep(NA,N)  # empty vector of choices for each individual

# sample chosen career for each individual
set.seed(34302)
for ( i in 1:N ) career[i] <- sample( 1:3 , size=1 , prob=p )
stan_data <- list( N=N , K=3 , career=career , career_income=income )

stan_program <- "
data{
    int N; // number of individuals
    int K; // number of possible careers
    int career[N]; // outcome
    vector[K] career_income;
}
parameters{
    vector[K-1] a; // intercepts
    real<lower=0> b; // association of income with choice
}
model{
    vector[K] p;
    vector[K] s;
    a ~ normal( 0 , 1 );
    b ~ normal( 0 , 0.5 );
    s[1] = a[1] + b*career_income[1];
    s[2] = a[2] + b*career_income[2];
    s[3] = 0; // pivot
    p = softmax( s );
    career ~ categorical( p );
}
"

m11.13 <- stan(model_code = stan_program, data = stan_data)
m11.13
## Inference for Stan model: 0e84d6929514bfa13b114806ec70babd.
## 4 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=4000.
## 
##         mean se_mean   sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
## a[1]   -2.14    0.01 0.18   -2.51   -2.26   -2.13   -2.02   -1.82   647    1
## a[2]   -1.79    0.01 0.25   -2.37   -1.93   -1.75   -1.61   -1.40   501    1
## b       0.13    0.01 0.11    0.00    0.04    0.10    0.20    0.41   497    1
## lp__ -375.07    0.04 1.19 -378.07 -375.66 -374.77 -374.19 -373.66  1117    1
## 
## Samples were drawn using NUTS(diag_e) at Thu Jul 30 16:53:03 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).
## R code 11.59
N <- 500
# simulate family incomes for each individual
family_income <- runif(N)
# assign a unique coefficient for each type of event
b <- c(-2,0,2)
career <- rep(NA,N)  # empty vector of choices for each individual
for ( i in 1:N ) {
    score <- 0.5*(1:3) + b*family_income[i]
    p <- softmax(score[1],score[2],score[3])
    career[i] <- sample( 1:3 , size=1 , prob=p )
}
stan_data <- list( N=N , K=3 , career=career , family_income=family_income )

stan_program <- "
data{
    int N; // number of observations
    int K; // number of outcome values
    int career[N]; // outcome
    real family_income[N];
}
parameters{
    vector[K-1] a; // intercepts
    vector[K-1] b; // coefficients on family income
}
model{
    vector[K] p;
    vector[K] s;
    a ~ normal(0,1.5);
    b ~ normal(0,1);
    for ( i in 1:N ) {
        for ( j in 1:(K-1) ) s[j] = a[j] + b[j]*family_income[i];
        s[K] = 0; // the pivot
        p = softmax( s );
        career[i] ~ categorical( p );
    }
}
"

m11.14 <- stan(model_code = stan_program, data = stan_data)
m11.14
## Inference for Stan model: 819ceb0404bec67ef7ae3da5fedce917.
## 4 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=4000.
## 
##         mean se_mean   sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
## a[1]   -1.65    0.01 0.30   -2.27   -1.83   -1.65   -1.45   -1.08  1574    1
## a[2]   -0.70    0.00 0.21   -1.10   -0.84   -0.70   -0.56   -0.31  1957    1
## b[1]   -1.94    0.01 0.58   -3.13   -2.31   -1.94   -1.55   -0.81  1807    1
## b[2]   -1.32    0.01 0.37   -2.05   -1.58   -1.31   -1.06   -0.59  1961    1
## lp__ -343.03    0.04 1.40 -346.51 -343.74 -342.70 -342.00 -341.28  1508    1
## 
## Samples were drawn using NUTS(diag_e) at Thu Jul 30 16:53:39 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

Section 11.3.3

UCBadmit <- read.csv('data/UCBadmit.csv', sep = ';') %>%
            rename(rejection = reject) %>%
            janitor::clean_names() %>%
            mutate(ratio = admit / applications)
stan_data <- compose_data(UCBadmit)
stan_program <- '
data {
  int n;
  int admit[n];
  int applications[n];
}
parameters {
  real a;
}
transformed parameters {
  real p;
  p = inv_logit(a);
}
model {
  for (i in 1:n) {
    admit[i] ~ binomial(applications[i], p);
  }
}
'
m_binom <- stan(model_code = stan_program, data = stan_data)

stan_program <- '
data {
  int n;
  int admit[n];
  int rejection[n];
  int applications[n];
}
parameters {
  real lambda[2];
  real a[2];
}
model {
  for (i in 1:n) {
    admit[i] ~ poisson(exp(a[1]));
    rejection[i] ~ poisson(exp(a[2]));
  }
}
generated quantities {
  real p;
  p = exp(a[1]) / (exp(a[1]) + exp(a[2]));
}
'
m_pois <- stan(model_code = stan_program, data = stan_data)
m_binom
## Inference for Stan model: 295c8b28771b80fcd1629f146277499f.
## 4 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=4000.
## 
##          mean se_mean   sd     2.5%      25%      50%      75%    97.5% n_eff
## a       -0.46    0.00 0.03    -0.52    -0.48    -0.46    -0.44    -0.40  1404
## p        0.39    0.00 0.01     0.37     0.38     0.39     0.39     0.40  1403
## lp__ -3022.68    0.02 0.72 -3024.75 -3022.84 -3022.39 -3022.22 -3022.17  1796
##      Rhat
## a    1.01
## p    1.01
## lp__ 1.00
## 
## Samples were drawn using NUTS(diag_e) at Thu Jul 30 16:54:05 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).
m_pois
## Inference for Stan model: 895af4c3a1a30404d88a350926015dca.
## 4 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=4000.
## 
##                    mean      se_mean           sd          2.5%           25%
## lambda[1] -7.860539e+18 1.082718e+19 1.655410e+19 -4.415856e+19 -8.162739e+18
## lambda[2] -9.925381e+18 8.372722e+18 1.292629e+19 -3.401829e+19 -2.030827e+19
## a[1]       4.990000e+00 0.000000e+00 2.000000e-02  4.940000e+00  4.970000e+00
## a[2]       5.440000e+00 0.000000e+00 2.000000e-02  5.400000e+00  5.430000e+00
## p          3.900000e-01 0.000000e+00 1.000000e-02  3.700000e-01  3.800000e-01
## lp__       1.930218e+04 2.000000e-02 1.010000e+00  1.929943e+04  1.930184e+04
##                     50%          75%        97.5% n_eff Rhat
## lambda[1]  3.230409e+17 2.066441e+18 7.437527e+18     2 3.92
## lambda[2] -2.192035e+18 4.064475e+16 5.044414e+18     2 2.88
## a[1]       4.990000e+00 5.000000e+00 5.030000e+00  4050 1.00
## a[2]       5.440000e+00 5.460000e+00 5.480000e+00  4206 1.00
## p          3.900000e-01 3.900000e-01 4.000000e-01  4168 1.00
## lp__       1.930249e+04 1.930289e+04 1.930315e+04  1720 1.00
## 
## Samples were drawn using NUTS(diag_e) at Thu Jul 30 16:54:34 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).