Estimated and checked against the book:
library(tidyverse)
library(tidybayes)
library(rstan)
library(patchwork)
options(mc.cores = 4)
set.seed(909)
N <- 100
stan_data <- tibble(height = rnorm(N,10,2),
leg_prop = runif(N,0.4,0.5),
leg_left = leg_prop*height + rnorm( N , 0 , 0.02 ),
leg_right = leg_prop*height + rnorm( N , 0 , 0.02 )) %>%
compose_data
stan_program <- '
data {
int n;
vector[n] height;
vector[n] leg_left;
vector[n] leg_right;
}
parameters {
real a;
real bl;
real br;
real<lower=0> sigma;
}
model {
vector[n] mu;
mu = a + bl * leg_left + br * leg_right;
height ~ normal(mu, sigma);
a ~ normal(10, 100);
bl ~ normal(2, 10);
br ~ normal(2, 10);
sigma ~ exponential(1);
}
'
m6.1 <- stan(model_code = stan_program, data = stan_data)
m6.1
## Inference for Stan model: 6a21a0fb5e106f7ac61d55274e661840.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## a 0.97 0.01 0.30 0.36 0.78 0.97 1.17 1.55 2004 1
## bl 0.25 0.06 2.57 -4.85 -1.45 0.29 2.00 5.23 1650 1
## br 1.75 0.06 2.58 -3.25 -0.01 1.70 3.44 6.82 1648 1
## sigma 0.64 0.00 0.05 0.55 0.60 0.63 0.66 0.73 2293 1
## lp__ -5.22 0.04 1.52 -9.08 -5.91 -4.85 -4.11 -3.41 1304 1
##
## Samples were drawn using NUTS(diag_e) at Thu Jul 30 11:50:15 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
# Figure 6.2
datplot <- m6.1 %>%
spread_draws(bl, br) %>%
mutate(bl_br_sum = bl + br)
p1 <- ggplot(datplot, aes(br, bl)) + geom_point(alpha = .1)
p2 <- ggplot(datplot, aes(bl_br_sum)) + geom_density()
p1 + p2
stan_program <- '
data {
int n;
vector[n] height;
vector[n] leg_left;
vector[n] leg_right;
}
parameters {
real a;
real bl;
real<lower=0> sigma;
}
model {
vector[n] mu;
mu = a + bl * leg_left;
height ~ normal(mu, sigma);
a ~ normal(10, 100);
bl ~ normal(2, 10);
sigma ~ exponential(1);
}
'
m6.2 <- stan(model_code = stan_program, data = stan_data)
m6.2
## Inference for Stan model: 9894da73a0f125d7189dd60c63773ccc.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## a 0.98 0.01 0.29 0.42 0.79 0.99 1.18 1.56 1234 1
## bl 2.00 0.00 0.06 1.87 1.95 1.99 2.04 2.12 1276 1
## sigma 0.63 0.00 0.05 0.55 0.60 0.63 0.66 0.73 1722 1
## lp__ -4.87 0.03 1.20 -7.91 -5.41 -4.58 -3.99 -3.48 1585 1
##
## Samples were drawn using NUTS(diag_e) at Thu Jul 30 11:50:40 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
dat <- read.csv('data/milk.csv', sep = ';') %>%
mutate(K = as.vector(scale(kcal.per.g)),
F = as.vector(scale(perc.fat)),
L = as.vector(scale(perc.lactose)))
stan_program <- '
data {
int n;
int k;
matrix[n, k] X;
vector[n] y;
}
parameters {
vector[k] b;
real<lower=0> sigma;
}
model {
vector[n] mu;
mu = X * b;
y ~ normal(mu, sigma);
b ~ normal(0, 0.5);
sigma ~ exponential(1);
}
'
stan_data <- compose_data(dat,
y = K,
X = model.matrix(~F, dat),
k = ncol(X))
m6.3 <- stan(model_code = stan_program, data = stan_data)
stan_data <- compose_data(dat,
y = K,
X = model.matrix(~L, dat),
k = ncol(X))
m6.4 <- stan(model_code = stan_program, data = stan_data)
stan_data <- compose_data(dat,
y = K,
X = model.matrix(~F + L, dat),
k = ncol(X))
m6.5 <- stan(model_code = stan_program, data = stan_data)
m6.3
## Inference for Stan model: 57d226675d4bd9c6ae3ce51f63a1d8e3.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## b[1] 0.00 0.00 0.09 -0.19 -0.06 0.00 0.06 0.18 3570 1
## b[2] 0.86 0.00 0.09 0.66 0.80 0.86 0.92 1.04 3227 1
## sigma 0.49 0.00 0.07 0.37 0.44 0.48 0.53 0.65 2732 1
## lp__ 4.02 0.04 1.38 0.36 3.45 4.38 5.02 5.54 1558 1
##
## Samples were drawn using NUTS(diag_e) at Thu Jul 30 11:51:07 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
m6.4
## Inference for Stan model: 57d226675d4bd9c6ae3ce51f63a1d8e3.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## b[1] 0.00 0.00 0.08 -0.15 -0.05 0.00 0.05 0.15 3649 1
## b[2] -0.90 0.00 0.08 -1.05 -0.95 -0.90 -0.85 -0.75 3482 1
## sigma 0.41 0.00 0.06 0.32 0.37 0.41 0.45 0.54 2707 1
## lp__ 8.81 0.03 1.25 5.58 8.25 9.13 9.73 10.26 1673 1
##
## Samples were drawn using NUTS(diag_e) at Thu Jul 30 11:51:07 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
m6.5
## Inference for Stan model: 57d226675d4bd9c6ae3ce51f63a1d8e3.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## b[1] 0.00 0.00 0.08 -0.14 -0.05 0.00 0.05 0.15 2911 1
## b[2] 0.25 0.00 0.20 -0.13 0.12 0.25 0.38 0.65 1893 1
## b[3] -0.67 0.00 0.20 -1.06 -0.80 -0.67 -0.54 -0.28 1939 1
## sigma 0.41 0.00 0.06 0.32 0.37 0.41 0.45 0.54 2282 1
## lp__ 9.15 0.04 1.47 5.51 8.39 9.48 10.22 11.01 1437 1
##
## Samples were drawn using NUTS(diag_e) at Thu Jul 30 11:51:08 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
## R code 6.13
set.seed(71)
# number of plants
N <- 100
# simulate initial heights
h0 <- rnorm(N,10,2)
# assign treatments and simulate fungus and growth
treatment <- rep( 0:1 , each=N/2 )
fungus <- rbinom( N , size=1 , prob=0.5 - treatment*0.4 )
h1 <- h0 + rnorm(N, 5 - 3*fungus)
# compose a clean data frame
d <- data.frame( h0=h0 , h1=h1 , treatment=treatment , fungus=fungus )
stan_data <- compose_data(d)
stan_program <- '
data {
int n;
vector[n] h1;
vector[n] h0;
}
parameters {
real p;
real<lower=0> sigma;
}
model {
vector[n] mu;
mu = h0 * p;
h1 ~ normal(mu, sigma);
sigma ~ exponential(1);
p ~ lognormal(0, 0.25);
}
'
m6.6 <- stan(model_code = stan_program, data = stan_data)
m6.6
## Inference for Stan model: c3b555e32b2b63ee697970fdcfef4113.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## p 1.43 0.00 0.02 1.39 1.41 1.43 1.44 1.46 3335 1
## sigma 1.83 0.00 0.13 1.61 1.74 1.82 1.91 2.09 2690 1
## lp__ -112.83 0.02 0.98 -115.47 -113.20 -112.53 -112.14 -111.88 2137 1
##
## Samples were drawn using NUTS(diag_e) at Thu Jul 30 11:51:31 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
stan_program <- '
data {
int n;
vector[n] h1;
vector[n] h0;
vector[n] treatment;
vector[n] fungus;
}
parameters {
real bt;
real bf;
real a;
real<lower=0> sigma;
}
model {
vector[n] mu;
vector[n] p;
p = a + bt * treatment + bf * fungus;
mu = h0 .* p;
h1 ~ normal(mu, sigma);
a ~ lognormal(0, 0.25);
bt ~ normal(0, 0.5);
bf ~ normal(0, 0.5);
sigma ~ exponential(1);
}
'
m6.7 <- stan(model_code = stan_program, data = stan_data)
m6.7
## Inference for Stan model: ad0eb54949ddc84fc0c13bafb5bcabf9.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## bt 0.00 0.00 0.03 -0.06 -0.02 0.00 0.02 0.06 1736 1
## bf -0.27 0.00 0.04 -0.34 -0.29 -0.27 -0.24 -0.20 2137 1
## a 1.48 0.00 0.03 1.43 1.47 1.48 1.50 1.53 1654 1
## sigma 1.45 0.00 0.11 1.25 1.37 1.44 1.51 1.68 2826 1
## lp__ -89.85 0.03 1.41 -93.39 -90.56 -89.53 -88.80 -88.09 1822 1
##
## Samples were drawn using NUTS(diag_e) at Thu Jul 30 11:51:54 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
stan_program <- '
data {
int n;
vector[n] h1;
vector[n] h0;
vector[n] treatment;
vector[n] fungus;
}
parameters {
real bt;
real a;
real<lower=0> sigma;
}
model {
vector[n] mu;
vector[n] p;
p = a + bt * treatment;
mu = h0 .* p;
h1 ~ normal(mu, sigma);
a ~ lognormal(0, 0.25);
bt ~ normal(0, 0.5);
sigma ~ exponential(1);
}
'
m6.8 <- stan(model_code = stan_program, data = stan_data)
m6.8
## Inference for Stan model: 6184ea605a71870fc61fbbc528db9e32.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## bt 0.08 0.00 0.03 0.02 0.06 0.08 0.11 0.15 1936 1
## a 1.38 0.00 0.03 1.33 1.36 1.38 1.40 1.43 2020 1
## sigma 1.79 0.00 0.13 1.56 1.70 1.78 1.87 2.06 2508 1
## lp__ -110.47 0.03 1.19 -113.54 -111.02 -110.17 -109.59 -109.09 1673 1
##
## Samples were drawn using NUTS(diag_e) at Thu Jul 30 11:52:30 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
# remotes::install_github('rmcelreath/rethinking')
library(rethinking)
stan_data <- sim_happiness(seed = 1977, N_years = 1000) %>%
filter(age > 17) %>%
mutate(age = (age - 18) / (65 - 18),
married = married + 1) %>%
compose_data
stan_program <- '
data {
int n;
vector[n] happiness;
vector[n] age;
int married[n];
}
parameters {
real a[2];
real bA;
real sigma;
}
model {
vector[n] mu;
for (i in 1:n) {
mu[i] = a[married[i]] + bA * age[i];
}
happiness ~ normal(mu, sigma);
a ~ normal(0, 1);
bA ~ normal(0, 2);
sigma ~ exponential(1);
}
'
m6.9 <- stan(model_code = stan_program, data = stan_data)
m6.9
## Inference for Stan model: e94e7c77147afa14e7669ca2febb0e9f.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## a[1] -0.23 0.00 0.06 -0.36 -0.28 -0.24 -0.19 -0.11 1475 1
## a[2] 1.26 0.00 0.08 1.09 1.20 1.26 1.32 1.43 1500 1
## bA -0.75 0.00 0.11 -0.97 -0.83 -0.75 -0.67 -0.53 1332 1
## sigma 0.99 0.00 0.02 0.95 0.98 0.99 1.01 1.04 2630 1
## lp__ -474.82 0.04 1.46 -478.48 -475.53 -474.48 -473.77 -473.01 1705 1
##
## Samples were drawn using NUTS(diag_e) at Thu Jul 30 11:52:59 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
stan_program <- '
data {
int n;
vector[n] happiness;
vector[n] age;
int married[n];
}
parameters {
real a;
real bA;
real sigma;
}
model {
vector[n] mu;
for (i in 1:n) {
mu[i] = a + bA * age[i];
}
happiness ~ normal(mu, sigma);
a ~ normal(0, 1);
bA ~ normal(0, 2);
sigma ~ exponential(1);
}
'
m6.10 <- stan(model_code = stan_program, data = stan_data)
m6.10
## Inference for Stan model: 66129038c498a75608907a5db88dfcd0.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## a 0.00 0.00 0.07 -0.14 -0.05 0.00 0.05 0.15 1583 1
## bA -0.01 0.00 0.13 -0.26 -0.09 -0.01 0.08 0.24 1515 1
## sigma 1.22 0.00 0.03 1.16 1.20 1.22 1.24 1.27 2371 1
## lp__ -668.86 0.03 1.19 -671.89 -669.46 -668.57 -667.97 -667.47 1414 1
##
## Samples were drawn using NUTS(diag_e) at Thu Jul 30 11:53:21 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
## R code 6.25
N <- 200 # number of grandparent-parent-child triads
b_GP <- 1 # direct effect of G on P
b_GC <- 0 # direct effect of G on C
b_PC <- 1 # direct effect of P on C
b_U <- 2 # direct effect of U on P and C
## R code 6.26
set.seed(1)
U <- 2*rbern( N , 0.5 ) - 1
G <- rnorm( N )
P <- rnorm( N , b_GP*G + b_U*U )
C <- rnorm( N , b_PC*P + b_GC*G + b_U*U )
d <- data.frame( C=C , P=P , G=G , U=U )
stan_data <- compose_data(d)
stan_program <- '
data {
int n;
vector[n] C;
vector[n] P;
vector[n] G;
}
parameters {
real a;
real b_PC;
real b_GC;
real<lower=0> sigma;
}
model {
vector[n] mu;
mu = a + b_PC * P + b_GC * G;
C ~ normal(mu, sigma);
sigma ~ exponential(1);
b_PC ~ normal(0, 1);
b_GC ~ normal(0, 1);
}
'
m6.11 <- stan(model_code = stan_program, data = stan_data)
stan_program <- '
data {
int n;
vector[n] C;
vector[n] P;
vector[n] G;
vector[n] U;
}
parameters {
real a;
real b_PC;
real b_GC;
real b_U;
real<lower=0> sigma;
}
model {
vector[n] mu;
mu = a + b_PC * P + b_GC * G + b_U * U;
C ~ normal(mu, sigma);
sigma ~ exponential(1);
b_PC ~ normal(0, 1);
b_GC ~ normal(0, 1);
b_U ~ normal(0, 1);
}
'
m6.12 <- stan(model_code = stan_program, data = stan_data)
m6.11
## Inference for Stan model: 05d80ceefb44ad92d033a1a4fe0aa6be.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## a -0.12 0.00 0.10 -0.32 -0.19 -0.12 -0.05 0.08 3880 1
## b_PC 1.79 0.00 0.05 1.70 1.76 1.79 1.82 1.88 3722 1
## b_GC -0.84 0.00 0.11 -1.04 -0.91 -0.84 -0.77 -0.62 3860 1
## sigma 1.43 0.00 0.07 1.30 1.38 1.43 1.48 1.58 3717 1
## lp__ -174.38 0.03 1.44 -177.94 -175.08 -174.07 -173.33 -172.60 1847 1
##
## Samples were drawn using NUTS(diag_e) at Thu Jul 30 11:53:42 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
m6.12
## Inference for Stan model: 63f14fe8921051e80ae787d3b155dee6.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## a -0.12 0.00 0.07 -0.27 -0.17 -0.12 -0.07 0.02 2946 1
## b_PC 1.01 0.00 0.07 0.88 0.97 1.01 1.06 1.15 1453 1
## b_GC -0.04 0.00 0.10 -0.24 -0.11 -0.04 0.03 0.15 1757 1
## b_U 1.99 0.00 0.16 1.68 1.89 2.00 2.10 2.29 1619 1
## sigma 1.04 0.00 0.05 0.94 1.00 1.03 1.07 1.14 3151 1
## lp__ -110.47 0.04 1.67 -114.49 -111.34 -110.10 -109.25 -108.30 1588 1
##
## Samples were drawn using NUTS(diag_e) at Thu Jul 30 11:54:05 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).