Estimated and checked against the book:
library(tidyverse)
library(tidybayes)
library(rstan)
library(patchwork)
options(mc.cores = 4)
stan_program <- '
data {
int<lower=1> n;
vector[n] height;
}
parameters {
real mu;
real<lower=0,upper=50> sigma;
}
model {
height ~ normal(mu, sigma);
sigma ~ uniform(0, 50);
mu ~ normal(178, 20);
}
'
stan_data <- read.csv('data/Howell1.csv', sep = ';') %>%
filter(age >= 18) %>%
compose_data
m4.1 <- stan(model_code = stan_program, data = stan_data)
m4.1
## Inference for Stan model: 3afed02242edd49a51565886ad1f3a81.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## mu 154.61 0.01 0.41 153.81 154.32 154.61 154.89 155.42 3152 1
## sigma 7.77 0.01 0.29 7.23 7.58 7.77 7.96 8.40 3311 1
## lp__ -895.74 0.02 1.00 -898.51 -896.11 -895.44 -895.02 -894.77 1864 1
##
## Samples were drawn using NUTS(diag_e) at Thu Jul 30 10:13:50 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
stan_program <- '
data {
int<lower=1> n;
vector[n] height;
}
parameters {
real mu;
real<lower=0,upper=50> sigma;
}
model {
height ~ normal(mu, sigma);
sigma ~ uniform(0, 50);
mu ~ normal(178, .1);
}
'
m4.2 <- stan(model_code = stan_program, data = stan_data)
m4.2
## Inference for Stan model: 4da6de81b997aa275cd318e37107868e.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff
## mu 177.86 0.00 0.10 177.66 177.80 177.86 177.93 178.07 2559
## sigma 24.58 0.02 0.91 22.91 23.95 24.56 25.17 26.44 2917
## lp__ -1301.58 0.02 0.99 -1304.14 -1301.97 -1301.27 -1300.87 -1300.61 1890
## Rhat
## mu 1
## sigma 1
## lp__ 1
##
## Samples were drawn using NUTS(diag_e) at Thu Jul 30 10:14:10 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
stan_program <- '
data {
int<lower=1> n;
real xbar;
vector[n] height;
vector[n] weight;
}
parameters {
real<lower=0,upper=50> sigma;
real<lower=0> b;
real a;
}
model {
vector[n] mu;
mu = a + b * (weight - xbar);
height ~ normal(mu, sigma);
a ~ normal(178, 20);
b ~ lognormal(0, 1);
sigma ~ uniform(0, 50);
}
'
stan_data$xbar <- mean(stan_data$weight)
m4.3 <- stan(model_code = stan_program, data = stan_data)
m4.3
## Inference for Stan model: 6498fd004acf063dd7fe23beea1f8a87.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## sigma 5.1 0.00 0.19 4.74 4.97 5.09 5.23 5.48 4508 1
## b 0.9 0.00 0.04 0.82 0.87 0.90 0.93 0.98 3950 1
## a 154.6 0.00 0.27 154.06 154.42 154.60 154.79 155.12 3688 1
## lp__ -748.2 0.02 1.17 -751.16 -748.76 -747.92 -747.34 -746.83 2267 1
##
## Samples were drawn using NUTS(diag_e) at Thu Jul 30 10:14:32 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
stan_program <- '
data {
int<lower=1> n; // number of observations
int<lower=1> K; // number of coefficients (including intercept)
vector[n] height; // outcome
matrix[n, K] X; // regressors
}
parameters {
real<lower=0,upper=50> sigma;
vector[K] b;
}
transformed parameters {
vector[n] mu;
mu = X * b;
}
model {
height ~ normal(mu, sigma);
b[1] ~ normal(178, 20);
b[2] ~ lognormal(0, 1);
if (K > 2) {
for (i in 3:K) {
b[i] ~ normal(0, 10);
}
}
sigma ~ uniform(0, 50);
}
generated quantities {
vector[n] muhat;
for (i in 1:n) {
muhat[i] = normal_rng(mu[i], sigma);
}
}
'
dat <- read.csv('data/Howell1.csv', sep = ';') %>%
mutate(weight = as.vector(scale(weight)))
datpred <- data.frame(weight = seq(min(dat$weight), max(dat$weight), length.out = 100))
stan_data <- compose_data(dat)
stan_data$X <- model.matrix(~weight, dat)
stan_data$K <- ncol(stan_data$X)
m4.4 <- stan(model_code = stan_program, data = stan_data)
stan_data$X <- model.matrix(~weight + I(weight^2), dat)
stan_data$K <- ncol(stan_data$X)
m4.5 <- stan(model_code = stan_program, data = stan_data)
stan_data$X <- model.matrix(~weight + I(weight^2) + I(weight^3), dat)
stan_data$K <- ncol(stan_data$X)
m4.6 <- stan(model_code = stan_program, data = stan_data)
plot_predictions <- function(mod) {
pred <- mod %>%
spread_draws(mu[i], muhat[i]) %>%
mean_qi %>%
mutate(Weight = stan_data$weight,
Height = stan_data$height) %>%
arrange(Weight)
ggplot(pred) +
geom_ribbon(aes(Weight, ymin = muhat.lower, ymax = muhat.upper), alpha = .2) +
geom_ribbon(aes(Weight, ymin = mu.lower, ymax = mu.upper), alpha = .2, fill = 'red') +
geom_line(aes(Weight, mu)) +
geom_point(aes(Weight, Height), shape = 1, color = 'dodgerblue4', alpha = .5) +
ylab('Height')
}
p1 <- plot_predictions(m4.4) + ggtitle('Linear')
p2 <- plot_predictions(m4.5) + ggtitle('Quadratic')
p3 <- plot_predictions(m4.6) + ggtitle('Cubic')
# combine plots with the `patchwork` package
p1 + p2 + p3
library(splines)
dat <- read.csv('data/cherry_blossoms.csv', sep = ';') %>%
filter(!is.na(doy))
num_knots <- 15
knot_list <- quantile(dat$year, probs = seq(0, 1, length.out = num_knots))
B <- bs(dat$year,
knot = knot_list[-c(1, num_knots)],
degree = 3,
intercept = TRUE)
class(B) <- 'matrix'
stan_data <- dat %>% compose_data(B = B,
k = ncol(B),
w = rep(0, ncol(B)))
stan_program <- '
data {
int n;
int k;
int doy[n];
matrix[n, k] B;
}
parameters {
real a;
vector[k] w;
real<lower=0> sigma;
}
transformed parameters {
vector[n] mu;
mu = a + B * w;
}
model {
for (i in 1:n) {
doy[i] ~ normal(mu[i], sigma);
}
a ~ normal(100, 10);
w ~ normal(0, 10);
sigma ~ exponential(1);
}
'
m4.7 <- stan(model_code = stan_program, data = stan_data)
datplot <- m4.7 %>%
gather_draws(mu[i]) %>%
mean_qi
datplot$`Day in year` <- dat$doy
datplot$Year <- dat$year
ggplot(datplot, aes(Year, `Day in year`)) +
geom_point(alpha = .4, color = 'blue') +
geom_ribbon(aes(Year, .value, ymin = .lower, ymax = .upper), alpha = .2) +
geom_line(aes(Year, .value))