Estimated and checked against the book:
Warning: In the book, models m7.1 to m7.6 are estimated using quap
rather than Stan
. My estimates are similar for models m7.1 to 7.3, but diverge somewhat for the others. Also, my compatibility intervals for \(\mu\) are very different. Not sure what the problem is.
library(tidyverse)
library(tidybayes)
library(rstan)
library(patchwork)
options(mc.cores = 4)
d <- tibble(sppnames = c("afarensis", "africanus", "habilis", "boisei", "rudolfensis", "ergaster", "sapiens"),
brainvolcc = c( 438 , 452 , 612, 521, 752, 871, 1350 ),
masskg = c( 37.0 , 35.5 , 34.5 , 41.5 , 55.5 , 61.0 , 53.5 )) %>%
mutate(brain_std = brainvolcc / max(brainvolcc),
mass_std1 = (masskg - mean(masskg)) / sd(masskg),
mass_std2 = mass_std1^2,
mass_std3 = mass_std1^3,
mass_std4 = mass_std1^4,
mass_std5 = mass_std1^5,
mass_std6 = mass_std1^6)
dpred <- tibble(mass_std1 = seq(min(d$mass_std1), max(d$mass_std1), length.out = 100),
mass_std2 = mass_std1^2,
mass_std3 = mass_std1^3,
mass_std4 = mass_std1^4,
mass_std5 = mass_std1^5,
mass_std6 = mass_std1^6)
prepare_data <- function(formula) {
out <- compose_data(d,
X = model.matrix(formula, d),
Xpred = model.matrix(formula, dpred),
y = d$brain_std,
k = ncol(X))
return(out)
}
dat <- list(~ mass_std1,
~ mass_std1 + mass_std2,
~ mass_std1 + mass_std2 + mass_std3,
~ mass_std1 + mass_std2 + mass_std3 + mass_std4,
~ mass_std1 + mass_std2 + mass_std3 + mass_std4 + mass_std5,
~ mass_std1 + mass_std2 + mass_std3 + mass_std4 + mass_std5 + mass_std6)
dat <- lapply(dat, prepare_data)
stan_program <- '
data {
int<lower=1> n; // number of observations
int<lower=1> k; // number of regressors
vector[n] y; // outcome
matrix[n, k] X; // regressors
matrix[100, k] Xpred; // new data for prediction
}
parameters {
real log_sigma;
vector[k] b;
}
transformed parameters {
vector[n] mu; // location
mu = X * b;
}
model {
y ~ normal(mu, exp(log_sigma));
log_sigma ~ normal(0, 1);
b[1] ~ normal(0.5, 1);
for (i in 2:k) b[i] ~ normal(0, 10);
}
generated quantities {
vector[100] brain_predicted;
vector[100] mass;
brain_predicted = Xpred * b * 1350;
for (i in 1:100) {
mass[i] = (Xpred[i, 2] * 10.90489) + 45.5;
}
}
'
plot_posterior_predictions <- function(model) {
datplot <- model %>%
spread_draws(brain_predicted[i], mass[i]) %>%
median_qi() %>%
drop_na
ggplot(datplot, aes(mass, brain_predicted)) +
geom_ribbon(aes(ymin = brain_predicted.lower, ymax = brain_predicted.upper), alpha = .2) +
geom_line() +
geom_point(data = d, aes(masskg, brainvolcc))
}
mod <- plt <- list()
for (i in seq_along(dat)) {
mod[[i]] <- stan(model_code = stan_program, data = dat[[i]])
plt[[i]] <- plot_posterior_predictions(mod[[i]])
}
(plt[[1]] + plt[[2]]) / (plt[[3]] + plt[[4]]) / (plt[[5]] + plt[[6]])