# Status

Estimated and checked against the book:

• m7.1
• m7.2
• m7.3
• m7.4
• m7.5
• m7.6

Warning: In the book, models m7.1 to m7.6 are estimated using quap rather than Stan. My estimates are similar for models m7.1 to 7.3, but diverge somewhat for the others. Also, my compatibility intervals for $$\mu$$ are very different. Not sure what the problem is.

# Libraries

library(tidyverse)
library(tidybayes)
library(rstan)
library(patchwork)
options(mc.cores = 4)

# Section 7.1: The problem with parameters

d <- tibble(sppnames = c("afarensis", "africanus", "habilis", "boisei", "rudolfensis", "ergaster", "sapiens"),
brainvolcc = c( 438 , 452 , 612, 521, 752, 871, 1350 ),
masskg = c( 37.0 , 35.5 , 34.5 , 41.5 , 55.5 , 61.0 , 53.5 )) %>%
mutate(brain_std = brainvolcc / max(brainvolcc),
mass_std1 = (masskg - mean(masskg)) / sd(masskg),
mass_std2 = mass_std1^2,
mass_std3 = mass_std1^3,
mass_std4 = mass_std1^4,
mass_std5 = mass_std1^5,
mass_std6 = mass_std1^6)
dpred <- tibble(mass_std1 = seq(min(d$mass_std1), max(d$mass_std1), length.out = 100),
mass_std2 = mass_std1^2,
mass_std3 = mass_std1^3,
mass_std4 = mass_std1^4,
mass_std5 = mass_std1^5,
mass_std6 = mass_std1^6)

prepare_data <- function(formula) {
out <- compose_data(d,
X = model.matrix(formula, d),
Xpred = model.matrix(formula, dpred),
y = d\$brain_std,
k = ncol(X))
return(out)
}

dat <- list(~ mass_std1,
~ mass_std1 + mass_std2,
~ mass_std1 + mass_std2 + mass_std3,
~ mass_std1 + mass_std2 + mass_std3 + mass_std4,
~ mass_std1 + mass_std2 + mass_std3 + mass_std4 + mass_std5,
~ mass_std1 + mass_std2 + mass_std3 + mass_std4 + mass_std5 + mass_std6)
dat <- lapply(dat, prepare_data)

stan_program <- '
data {
int<lower=1> n;        // number of observations
int<lower=1> k;        // number of regressors
vector[n] y;           // outcome
matrix[n, k] X;        // regressors
matrix[100, k] Xpred;  // new data for prediction
}
parameters {
real log_sigma;
vector[k] b;
}
transformed parameters {
vector[n] mu;                    // location
mu = X * b;
}
model {
y ~ normal(mu, exp(log_sigma));
log_sigma ~ normal(0, 1);
b[1] ~ normal(0.5, 1);
for (i in 2:k) b[i] ~ normal(0, 10);
}
generated quantities {
vector[100] brain_predicted;
vector[100] mass;
brain_predicted = Xpred * b * 1350;
for (i in 1:100) {
mass[i] = (Xpred[i, 2] * 10.90489) + 45.5;
}
}
'

plot_posterior_predictions <- function(model) {
datplot <- model %>%
median_qi() %>%
drop_na
ggplot(datplot, aes(mass, brain_predicted)) +
geom_ribbon(aes(ymin = brain_predicted.lower, ymax = brain_predicted.upper), alpha = .2) +
geom_line() +
geom_point(data = d, aes(masskg, brainvolcc))
}

mod <- plt <- list()
for (i in seq_along(dat)) {
mod[[i]] <- stan(model_code = stan_program, data = dat[[i]])
plt[[i]] <- plot_posterior_predictions(mod[[i]])
}

(plt[[1]] + plt[[2]]) / (plt[[3]] + plt[[4]]) /  (plt[[5]] + plt[[6]])