Estimated and checked against the book:
library(tidyverse)
library(tidybayes)
library(rstan)
library(rethinking)
library(patchwork)
options(mc.cores = 4)
dat <- read.csv('data/reedfrogs.csv', sep = ';')
stan_data <- list(n = nrow(dat),
surv = dat$surv,
dens = dat$density,
tank = 1:nrow(dat))
stan_program <- '
data {
int n;
int surv[n];
int dens[n];
int tank[n];
}
parameters {
real a[n];
}
transformed parameters {
vector[n] p;
for (i in 1:n) {
p[i] = inv_logit(a[i]);
}
}
model {
a ~ normal(0, 1.5);
for (i in 1:n) {
surv[i] ~ binomial(dens[i], p[i]);
}
}
'
m13.1 <- stan(model_code = stan_program, data = stan_data)
stan_program <- '
data {
int n;
int surv[n];
int dens[n];
int tank[n];
}
parameters {
real abar;
real<lower=0> sigma;
real a[n];
}
transformed parameters {
vector[n] p;
for (i in 1:n) {
p[i] = inv_logit(a[i]);
}
}
model {
a ~ normal(abar, sigma);
sigma ~ exponential(1);
for (i in 1:n) {
surv[i] ~ binomial(dens[i], p[i]);
}
}
'
m13.2 <- stan(model_code = stan_program, data = stan_data)
datplot <- tibble(Tank = 1:nrow(dat),
Density = dat$density,
'Observed' = dat$propsurv,
`No pooling` = summary(m13.1, 'p')$summary[, 1],
`Partial pooling` = summary(m13.2, 'p')$summary[, 1]) %>%
mutate(Size = case_when(Density == 10 ~ 'Small',
Density == 25 ~ 'Medium',
Density == 35 ~ 'Large'),
Size = factor(Size, c('Small', 'Medium', 'Large'))) %>%
pivot_longer(-c(Tank, Size, Density))
ggplot(datplot, aes(Tank, value, color = name)) +
geom_point(size = 2) +
facet_grid(. ~ Size, scales = 'free_x') +
ylab('Survival proportion')
post <- m13.2 %>%
spread_draws(abar, sigma)
datplot <- post %>%
sample_n(1000)
# side-by-side plots
par(mfrow = c(1, 2))
# log odds plot for 1000 samples
plot(NULL, xlim = c(-3, 4), ylim = c(0, .35),
xlab = 'Log-odds survive', ylab = 'Density')
for (i in 1:nrow(datplot)) {
curve(dnorm(x, datplot$abar[i], datplot$sigma[i]), add = TRUE,
col = adjustcolor('black', alpha = .02))
}
# density plot
sim_tanks <- rnorm(1:nrow(post), post$abar, post$sigma)
sim_tanks <- density(rethinking::inv_logit(sim_tanks))
plot(sim_tanks, lwd = 2, adj = .1, xlab = 'Probability survive', main = '')
Results are different because of different random seeds. This model replicates the same results produced by the book’s replication code:
set.seed(5005)
a_bar <- 1.5
sigma <- 1.5
nponds <- 60
Ni <- as.integer(rep(c(5, 10, 25, 35), each = 15))
a_pond <- rnorm(nponds, mean = a_bar, sd = sigma)
dsim <- data.frame(pond = 1:nponds,
Ni = Ni,
true_a = a_pond)
dsim$Si <- rbinom(nponds, prob = logistic(dsim$true_a), size = dsim$Ni)
dsim$p_nopool <- dsim$Si / dsim$Ni
stan_data <- compose_data(dsim)
stan_data$n_pond <- n_distinct(stan_data$pond)
stan_program <- "
data {
int n;
int n_pond;
int Si[n];
int Ni[n];
int pond[n];
}
parameters {
real a_pond[n_pond];
real a_bar;
real<lower=0> sigma;
}
model {
vector[n] p;
for (i in 1:n) {
p[i] = inv_logit(a_pond[pond[i]]);
}
Si ~ binomial(Ni, p);
a_pond ~ normal(a_bar, sigma);
a_bar ~ normal(0, 1.5);
sigma ~ exponential(1);
}
"
m13.3 <- stan(model_code = stan_program, data = stan_data)
summary(m13.3, c('a_bar', 'sigma'))$summary
## mean se_mean sd 2.5% 25% 50% 75%
## a_bar 1.677641 0.004988298 0.2594287 1.19984 1.499170 1.670012 1.845382
## sigma 1.690228 0.006543893 0.2421692 1.28234 1.523387 1.664936 1.831343
## 97.5% n_eff Rhat
## a_bar 2.210685 2704.775 1.0000900
## sigma 2.232501 1369.511 0.9995666
# Section 13.3: More than one type of cluster
```r
dat <- read.csv('data/chimpanzees.csv', sep = ';')
stan_data <- list(actor = dat$actor,
treatment = as.integer(1 + dat$prosoc_left + 2 * dat$condition),
pulled_left = dat$pulled_left,
block_id = dat$block,
n = nrow(dat),
n_block_id = length(unique(dat$block)))
stan_data$n_treatment <- length(unique(stan_data$treatment))
stan_program <- '
data {
int n;
int n_block_id;
int n_treatment;
int actor[n];
int treatment[n];
int pulled_left[n];
int block_id[n];
}
parameters {
vector[n] a;
vector[n_block_id] g;
vector[n_treatment] b;
real a_bar;
real<lower=0> sigma_a;
real<lower=0> sigma_g;
}
transformed parameters {
vector[n] p;
for (i in 1:n) {
p[i] = inv_logit(a[actor[i]] + g[block_id[i]] + b[treatment[i]]);
}
}
model {
a_bar ~ normal(0, 1.5);
a ~ normal(a_bar, sigma_a);
b ~ normal(0, 0.5);
g ~ normal(0, sigma_g);
sigma_a ~ exponential(1);
sigma_g ~ exponential(1);
pulled_left ~ binomial(1, p);
}
'
m13.4 <- stan(model_code = stan_program, data = stan_data)
summary(m13.4, c('a_bar', 'sigma_a', 'sigma_g'))$summary
## mean se_mean sd 2.5% 25% 50%
## a_bar 0.6279560 0.099775930 0.7686732 -0.83242805 0.08726087 0.6536166
## sigma_a 1.9355276 0.096569561 0.5347570 1.17111691 1.52730306 1.8561061
## sigma_g 0.1976989 0.006913039 0.1559670 0.01483376 0.08003368 0.1623560
## 75% 97.5% n_eff Rhat
## a_bar 1.1714803 2.0812933 59.35153 1.071970
## sigma_a 2.2414448 3.2084119 30.66426 1.119689
## sigma_g 0.2758551 0.5916084 509.01135 1.011938
stan_program <- '
data {
int n;
int n_block_id;
int n_treatment;
int actor[n];
int treatment[n];
int pulled_left[n];
int block_id[n];
}
parameters {
vector[n] a;
vector[n_block_id] g;
vector[n_treatment] b;
real a_bar;
real<lower=0> sigma_a;
real<lower=0> sigma_g;
}
transformed parameters {
vector[n] p;
p = inv_logit(a[actor] + b[treatment]);
}
model {
a_bar ~ normal(0, 1.5);
a ~ normal(a_bar, sigma_a);
b ~ normal(0, 0.5);
g ~ normal(0, sigma_g);
sigma_a ~ exponential(1);
sigma_g ~ exponential(1);
pulled_left ~ binomial(1, p);
}
'
m13.5 <- stan(model_code = stan_program, data = stan_data)
summary(m13.5, c('a_bar', 'sigma_a', 'sigma_g', 'g', 'b'))$summary
## mean se_mean sd 2.5% 25% 50%
## a_bar 0.621705510 0.06053770 0.6977167 -0.7995618 0.21990084 0.619654079
## sigma_a 2.015227507 0.11573790 0.6978566 0.9894752 1.55658727 1.879400992
## sigma_g 1.133640675 0.06482669 1.0708670 0.0840966 0.35697657 0.770195317
## g[1] 0.009626296 0.06963477 1.6583759 -3.3752465 -0.44003623 -0.008427721
## g[2] 0.044190643 0.05024876 1.5429640 -3.0250355 -0.43037323 0.011704894
## g[3] -0.038756205 0.03962733 1.5206989 -3.5767996 -0.45923786 -0.004879280
## g[4] 0.015332581 0.05697611 1.6160114 -3.3363745 -0.45252943 -0.008170620
## g[5] 0.053181143 0.04891856 1.5564916 -3.2999856 -0.43409482 0.011093468
## g[6] -0.021208698 0.04094730 1.5077386 -3.4294240 -0.41903540 -0.003890265
## b[1] -0.136123045 0.01110652 0.2949365 -0.7265220 -0.33672130 -0.136362455
## b[2] 0.386322339 0.01066053 0.2947023 -0.2006513 0.19039094 0.391691517
## b[3] -0.485751217 0.01053308 0.2961797 -1.0516117 -0.69005035 -0.488400062
## b[4] 0.273556937 0.01004639 0.2927892 -0.3132304 0.08003825 0.272033860
## 75% 97.5% n_eff Rhat
## a_bar 1.01564163 2.10747420 132.83315 1.012547
## sigma_a 2.36372120 3.55842949 36.35643 1.133076
## sigma_g 1.57749714 3.95942729 272.87472 1.012249
## g[1] 0.42093896 3.58001205 567.17052 1.008396
## g[2] 0.45666467 3.54196408 942.88957 1.005944
## g[3] 0.43073833 3.19697334 1472.64080 1.000983
## g[4] 0.43910812 3.54418021 804.45786 1.002594
## g[5] 0.46695512 3.80083863 1012.38606 1.003197
## g[6] 0.41333714 3.17897071 1355.81865 1.001834
## b[1] 0.06894413 0.42568977 705.18113 1.006666
## b[2] 0.58201961 0.95885228 764.20428 1.004416
## b[3] -0.28142622 0.07514089 790.67818 1.004827
## b[4] 0.47122084 0.85266357 849.35709 1.005281
stan_program <- '
data {
int n;
int n_block_id;
int n_treatment;
int actor[n];
int treatment[n];
int pulled_left[n];
int block_id[n];
}
parameters {
vector[n] a;
vector[n_block_id] g;
vector[n_treatment] b;
real a_bar;
real<lower=0> sigma_a;
real<lower=0> sigma_g;
real<lower=0> sigma_b;
}
transformed parameters {
vector[n] p;
p = inv_logit(a[actor] + g[block_id] + b[treatment]);
}
model {
a_bar ~ normal(0, 1.5);
a ~ normal(a_bar, sigma_a);
b ~ normal(0, sigma_b);
g ~ normal(0, sigma_g);
sigma_a ~ exponential(1);
sigma_g ~ exponential(1);
sigma_b ~ exponential(1);
pulled_left ~ binomial(1, p);
}
'
m13.6 <- stan(model_code = stan_program, data = stan_data)
summary(m13.6, c('a_bar', 'sigma_a', 'sigma_g', 'sigma_b', 'g', 'b'))$summary
## mean se_mean sd 2.5% 25% 50%
## a_bar 0.67985017 0.054046530 0.6249269 -0.4506771 0.265036368 0.659364724
## sigma_a 1.83168735 0.097500555 0.4918348 1.0070336 1.488080848 1.755785581
## sigma_g 0.20463892 0.006863296 0.1665183 0.0162256 0.083866518 0.164558257
## sigma_b 0.55989178 0.010184523 0.3397434 0.1395033 0.333152729 0.487383985
## g[1] -0.16177941 0.006499418 0.2198654 -0.7090949 -0.269778040 -0.100849392
## g[2] 0.03736735 0.004229374 0.1704680 -0.2964037 -0.047025886 0.019107627
## g[3] 0.05295697 0.004366830 0.1672716 -0.2454297 -0.033771471 0.024493938
## g[4] 0.00902523 0.003499595 0.1636575 -0.3247880 -0.068934909 0.002424974
## g[5] -0.02696743 0.003922287 0.1701420 -0.4116770 -0.104861050 -0.013821175
## g[6] 0.11028467 0.005351688 0.1870683 -0.1726436 -0.004549283 0.067318265
## b[1] -0.12179687 0.014595249 0.3295397 -0.8013278 -0.311678677 -0.103420797
## b[2] 0.36804014 0.015174929 0.3416145 -0.2602960 0.152370214 0.343252708
## b[3] -0.44717470 0.014796165 0.3492230 -1.1873417 -0.648761148 -0.427666875
## b[4] 0.25935278 0.014786734 0.3389653 -0.3921577 0.049705893 0.239243415
## 75% 97.5% n_eff Rhat
## a_bar 1.048493279 2.0643574 133.69735 1.034146
## sigma_a 2.116404263 2.9246040 25.44629 1.125715
## sigma_g 0.275491368 0.6338896 588.65138 1.008316
## sigma_b 0.692990503 1.4293469 1112.80940 1.007362
## g[1] -0.009072732 0.1346409 1144.36623 1.001396
## g[2] 0.116403220 0.4397712 1624.55143 1.001809
## g[3] 0.130444261 0.4548793 1467.27535 1.000025
## g[4] 0.084334775 0.3672708 2186.93748 1.000576
## g[5] 0.055694433 0.3183257 1881.67327 1.000459
## g[6] 0.198141844 0.5815002 1221.85205 1.002773
## b[1] 0.067806364 0.5157927 509.79142 1.007543
## b[2] 0.556772850 1.1112616 506.77979 1.007666
## b[3] -0.219592558 0.1769222 557.06698 1.007507
## b[4] 0.452973307 0.9824236 525.49185 1.007221
stan_program <- '
data {
int n;
}
parameters {
real v;
real x;
}
model {
x ~ normal(0, exp(v));
v ~ normal(0, 3);
}
'
stan_data <- list(n = 1)
m13.7 <- stan(model_code = stan_program, data = stan_data)
m13.7
## Inference for Stan model: 3fc6b3452631d026bac5f892a404f75c.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## v 1.80 0.83 2.12 -1.93 0.08 1.76 3.22 6.24 7 1.41
## x 5.28 15.51 229.89 -207.66 -2.69 -0.25 2.80 128.15 220 1.02
## lp__ -2.70 0.96 2.74 -8.83 -4.37 -2.29 -0.54 1.50 8 1.33
##
## Samples were drawn using NUTS(diag_e) at Sat Aug 1 13:53:40 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
stan_program <- '
data {
int n;
}
parameters {
real v;
real z;
}
model {
z ~ normal(0, 1);
v ~ normal(0, 3);
}
generated quantities {
real x;
x = z * exp(v);
}
'
m13.7nc <- stan(model_code = stan_program, data = stan_data, iter = 5000)
m13.7nc
## Inference for Stan model: e496e07a007728e871bf4286c9b15b58.
## 4 chains, each with iter=5000; warmup=2500; thin=1;
## post-warmup draws per chain=2500, total post-warmup draws=10000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## v 0.06 0.03 2.98 -5.83 -1.92 0.08 2.07 5.83 8892 1
## z -0.01 0.01 1.01 -1.99 -0.70 -0.02 0.68 1.96 9337 1
## x -13.08 10.53 1014.98 -103.79 -0.65 0.00 0.54 96.06 9285 1
## lp__ -1.00 0.01 0.99 -3.63 -1.39 -0.70 -0.29 -0.03 4880 1
##
## Samples were drawn using NUTS(diag_e) at Sat Aug 1 13:53:59 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
stan_program <- '
data {
int n;
int n_block_id;
int n_treatment;
int n_actor;
int actor[n];
int treatment[n];
int pulled_left[n];
int block_id[n];
}
parameters {
vector[n_treatment] b;
vector[n_actor] z;
vector[n_block_id] x;
real<lower=0> sigma_a;
real<lower=0> sigma_g;
real abar;
}
transformed parameters {
vector[n] a;
vector[n] g;
vector[n] p;
for (i in 1:n) {
a[i] = abar + z[actor[i]] * sigma_a;
g[i] = x[block_id[i]] * sigma_g;
p[i] = a[i] + g[i] + b[treatment[i]];
p[i] = inv_logit(p[i]);
}
}
model {
pulled_left ~ binomial(1, p);
b ~ normal(0, 0.5);
z ~ normal(0, 1);
x ~ normal(0, 1);
abar ~ normal(0, 1.5);
sigma_a ~ exponential(1);
sigma_g ~ exponential(1);
}
'
dat <- read.csv('data/chimpanzees.csv', sep = ';')
stan_data <- list(actor = dat$actor,
treatment = as.integer(1 + dat$prosoc_left + 2 * dat$condition),
pulled_left = dat$pulled_left,
block_id = dat$block,
n = nrow(dat),
n_block_id = length(unique(dat$block)),
n_actor = length(unique(dat$actor)))
stan_data$n_treatment <- length(unique(stan_data$treatment))
m13.4nc <- stan(model_code = stan_program, data = stan_data)
summary(m13.4nc, c('b', 'abar', 'sigma_a', 'sigma_g'))$summary
## mean se_mean sd 2.5% 25% 50%
## b[1] -0.1236039 0.006415408 0.3023752 -0.714193719 -0.32792293 -0.1214269
## b[2] 0.4061395 0.006500596 0.2998786 -0.173465405 0.20550126 0.4026018
## b[3] -0.4697239 0.006431459 0.3036545 -1.076468405 -0.67390900 -0.4675339
## b[4] 0.2884508 0.006459666 0.2986416 -0.291113672 0.08600717 0.2871792
## abar 0.6310772 0.025100615 0.7483470 -0.848866106 0.14592308 0.6268470
## sigma_a 2.0200782 0.019116403 0.6799616 1.081792319 1.54476451 1.8993854
## sigma_g 0.2016138 0.004093072 0.1752049 0.007663319 0.07833709 0.1625573
## 75% 97.5% n_eff Rhat
## b[1] 0.07801207 0.4728825 2221.487 1.000304
## b[2] 0.60782302 0.9905772 2128.064 1.001214
## b[3] -0.26589599 0.1249911 2229.156 1.000840
## b[4] 0.49279478 0.8828212 2137.374 1.001113
## abar 1.10589006 2.1144049 888.868 1.007331
## sigma_a 2.34695027 3.6712288 1265.192 1.002481
## sigma_g 0.27410070 0.6325778 1832.288 1.000037