# Status

Estimated and checked against the book:

• m4.1
• m4.2
• m4.3
• m4.4
• m4.5
• m4.6
• m4.7

# Libraries

``````library(tidyverse)
library(tidybayes)
library(rstan)
library(patchwork)
options(mc.cores = 4)``````

# Section 4.3: Gaussian model of height

``````stan_program <- '
data {
int<lower=1> n;
vector[n] height;
}
parameters {
real mu;
real<lower=0,upper=50> sigma;
}
model {
height ~ normal(mu, sigma);
sigma ~ uniform(0, 50);
mu ~ normal(178, 20);
}
'
stan_data <- read.csv('data/Howell1.csv', sep = ';') %>%
filter(age >= 18) %>%
compose_data
m4.1 <- stan(model_code = stan_program, data = stan_data)``````
``m4.1``
``````## Inference for Stan model: 3afed02242edd49a51565886ad1f3a81.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
##          mean se_mean   sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
## mu     154.61    0.01 0.41  153.81  154.32  154.61  154.89  155.42  3152    1
## sigma    7.77    0.01 0.29    7.23    7.58    7.77    7.96    8.40  3311    1
## lp__  -895.74    0.02 1.00 -898.51 -896.11 -895.44 -895.02 -894.77  1864    1
##
## Samples were drawn using NUTS(diag_e) at Thu Jul 30 10:13:50 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).``````
``````stan_program <- '
data {
int<lower=1> n;
vector[n] height;
}
parameters {
real mu;
real<lower=0,upper=50> sigma;
}
model {
height ~ normal(mu, sigma);
sigma ~ uniform(0, 50);
mu ~ normal(178, .1);
}
'
m4.2 <- stan(model_code = stan_program, data = stan_data)``````
``m4.2``
``````## Inference for Stan model: 4da6de81b997aa275cd318e37107868e.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
##           mean se_mean   sd     2.5%      25%      50%      75%    97.5% n_eff
## mu      177.86    0.00 0.10   177.66   177.80   177.86   177.93   178.07  2559
## sigma    24.58    0.02 0.91    22.91    23.95    24.56    25.17    26.44  2917
## lp__  -1301.58    0.02 0.99 -1304.14 -1301.97 -1301.27 -1300.87 -1300.61  1890
##       Rhat
## mu       1
## sigma    1
## lp__     1
##
## Samples were drawn using NUTS(diag_e) at Thu Jul 30 10:14:10 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).``````

# Section 4.4: Linear prediction

``````stan_program <- '
data {
int<lower=1> n;
real xbar;
vector[n] height;
vector[n] weight;
}
parameters {
real<lower=0,upper=50> sigma;
real<lower=0> b;
real a;
}
model {
vector[n] mu;
mu = a + b * (weight - xbar);
height ~ normal(mu, sigma);
a ~ normal(178, 20);
b ~ lognormal(0, 1);
sigma ~ uniform(0, 50);
}
'

stan_data\$xbar <- mean(stan_data\$weight)

m4.3 <- stan(model_code = stan_program, data = stan_data)``````
``m4.3``
``````## Inference for Stan model: 6498fd004acf063dd7fe23beea1f8a87.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
##         mean se_mean   sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
## sigma    5.1    0.00 0.19    4.74    4.97    5.09    5.23    5.48  4508    1
## b        0.9    0.00 0.04    0.82    0.87    0.90    0.93    0.98  3950    1
## a      154.6    0.00 0.27  154.06  154.42  154.60  154.79  155.12  3688    1
## lp__  -748.2    0.02 1.17 -751.16 -748.76 -747.92 -747.34 -746.83  2267    1
##
## Samples were drawn using NUTS(diag_e) at Thu Jul 30 10:14:32 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).``````

# Section 4.5: Curves from lines

``````stan_program <- '
data {
int<lower=1> n;   // number of observations
int<lower=1> K;   // number of coefficients (including intercept)
vector[n] height;      // outcome
matrix[n, K] X;   // regressors
}
parameters {
real<lower=0,upper=50> sigma;
vector[K] b;
}
transformed parameters {
vector[n] mu;
mu = X * b;
}
model {
height ~ normal(mu, sigma);
b[1] ~ normal(178, 20);
b[2] ~ lognormal(0, 1);
if (K > 2) {
for (i in 3:K) {
b[i] ~ normal(0, 10);
}
}
sigma ~ uniform(0, 50);
}
generated quantities {
vector[n] muhat;
for (i in 1:n) {
muhat[i] = normal_rng(mu[i], sigma);
}
}
'

dat <- read.csv('data/Howell1.csv', sep = ';') %>%
mutate(weight = as.vector(scale(weight)))
datpred <- data.frame(weight = seq(min(dat\$weight), max(dat\$weight), length.out = 100))
stan_data <- compose_data(dat)

stan_data\$X <- model.matrix(~weight, dat)
stan_data\$K <- ncol(stan_data\$X)
m4.4 <- stan(model_code = stan_program, data = stan_data)

stan_data\$X <- model.matrix(~weight + I(weight^2), dat)
stan_data\$K <- ncol(stan_data\$X)
m4.5 <- stan(model_code = stan_program, data = stan_data)

stan_data\$X <- model.matrix(~weight + I(weight^2) + I(weight^3), dat)
stan_data\$K <- ncol(stan_data\$X)
m4.6 <- stan(model_code = stan_program, data = stan_data)``````
``````plot_predictions <- function(mod) {
pred <- mod %>%
mean_qi %>%
mutate(Weight = stan_data\$weight,
Height = stan_data\$height) %>%
arrange(Weight)
ggplot(pred) +
geom_ribbon(aes(Weight, ymin = muhat.lower, ymax = muhat.upper), alpha = .2) +
geom_ribbon(aes(Weight, ymin = mu.lower, ymax = mu.upper), alpha = .2, fill = 'red') +
geom_line(aes(Weight, mu)) +
geom_point(aes(Weight, Height), shape = 1, color = 'dodgerblue4', alpha = .5) +
ylab('Height')
}

p1 <- plot_predictions(m4.4) + ggtitle('Linear')
p3 <- plot_predictions(m4.6) + ggtitle('Cubic')

# combine plots with the `patchwork` package
p1 + p2 + p3``````

#### Cherry blossom splines

``````library(splines)

dat <- read.csv('data/cherry_blossoms.csv', sep = ';') %>%
filter(!is.na(doy))
num_knots <- 15
knot_list <- quantile(dat\$year, probs = seq(0, 1, length.out = num_knots))
B <- bs(dat\$year,
knot = knot_list[-c(1, num_knots)],
degree = 3,
intercept = TRUE)
class(B) <- 'matrix'
stan_data <- dat %>% compose_data(B = B,
k = ncol(B),
w = rep(0, ncol(B)))

stan_program <- '
data {
int n;
int k;
int doy[n];
matrix[n, k] B;
}
parameters {
real a;
vector[k] w;
real<lower=0> sigma;
}
transformed parameters {
vector[n] mu;
mu = a + B * w;
}
model {
for (i in 1:n) {
doy[i] ~ normal(mu[i], sigma);
}
a ~ normal(100, 10);
w ~ normal(0, 10);
sigma ~ exponential(1);
}
'

m4.7 <- stan(model_code = stan_program, data = stan_data)``````
``````datplot <- m4.7 %>%
gather_draws(mu[i]) %>%
mean_qi
datplot\$`Day in year` <- dat\$doy
datplot\$Year <- dat\$year

ggplot(datplot, aes(Year, `Day in year`)) +
geom_point(alpha = .4, color = 'blue') +
geom_ribbon(aes(Year, .value, ymin = .lower, ymax = .upper), alpha = .2) +
geom_line(aes(Year, .value))``````