Estimated and checked against the book:
library(tidyverse)
library(tidybayes)
library(bayesplot)
library(rstan)
library(patchwork)
options(mc.cores = parallel::detectCores())
rugged <- read.csv('data/rugged.csv', sep = ';') %>%
mutate(log_gdp = log(rgdppc_2000),
log_gdp_std = log_gdp / mean(log_gdp, na.rm = TRUE),
rugged_std = rugged / max(rugged, na.rm = TRUE),
region = ifelse(cont_africa == 1, 'Africa', 'Not Africa')) %>%
select(country, log_gdp_std, rugged_std, region) %>%
drop_na
stan_data <- compose_data(rugged)
stan_program <- '
data {
int<lower=1> n; // number of observations
vector[n] log_gdp_std; // outcome
vector[n] rugged_std; // regressor
int region[n]; // africa indicator
}
parameters {
real<lower=0> sigma;
vector[2] a;
vector[2] b;
}
model {
vector[n] mu;
for (i in 1:n) {
mu[i] = a[region[i]] + b[region[i]] * (rugged_std[i] - 0.215);
}
a ~ normal(1, 0.1);
b ~ normal(0, 0.3);
sigma ~ exponential(1);
log_gdp_std ~ normal(mu, sigma);
}
'
m9.1 <- stan(model_code = stan_program, data = stan_data)
m9.1
## Inference for Stan model: b01cd93ce6fd8681789fca5db2e21b98.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## sigma 0.11 0.00 0.01 0.10 0.11 0.11 0.12 0.12 3988 1
## a[1] 0.89 0.00 0.02 0.86 0.88 0.89 0.91 0.93 3931 1
## a[2] 1.04 0.00 0.01 1.02 1.04 1.04 1.05 1.06 4456 1
## b[1] 0.17 0.00 0.09 -0.02 0.11 0.17 0.23 0.34 3965 1
## b[2] -0.18 0.00 0.07 -0.32 -0.23 -0.18 -0.13 -0.03 3969 1
## lp__ 285.08 0.04 1.65 281.06 284.19 285.42 286.33 287.25 1849 1
##
## Samples were drawn using NUTS(diag_e) at Sat Aug 1 13:47:34 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
mcmc_trace(m9.1)
set.seed(11)
stan_data <- data.frame(y = c(-1, 1)) %>%
compose_data
stan_program <- '
data {
int<lower=1> n;
real y[n];
}
parameters {
real alpha;
real<lower=0> sigma;
}
transformed parameters {
real mu;
mu = alpha;
}
model {
y ~ normal(mu, sigma);
alpha ~ normal(0, 1000);
sigma ~ exponential(0.0001);
}
'
m9.2 <- stan(model_code = stan_program, data = stan_data)
mcmc_trace(m9.2, c('alpha', 'sigma'))
More information in priors:
stan_program <- '
data {
int<lower=1> n;
real y[n];
}
parameters {
real alpha;
real<lower=0> sigma;
}
transformed parameters {
real mu;
mu = alpha;
}
model {
y ~ normal(mu, sigma);
alpha ~ normal(0, 10);
sigma ~ exponential(1);
}
'
m9.3 <- stan(model_code = stan_program, data = stan_data)
mcmc_trace(m9.3, c('alpha', 'sigma'))
Non-identifiable:
set.seed(41)
dat <- data.frame(y = rnorm(100)) %>%
compose_data
stan_program <- '
data {
int<lower=1> n;
real y[n];
}
parameters {
real alpha[2];
real<lower=0> sigma;
}
model {
real mu;
mu = alpha[1] + alpha[2];
y ~ normal(mu, sigma);
alpha[1] ~ normal(0, 1000);
alpha[2] ~ normal(0, 1000);
sigma ~ exponential(1);
}
'
m9.4 <- stan(model_code = stan_program, data = stan_data)
mcmc_trace(m9.4)
m9.4
## Inference for Stan model: 00fa0bf02cee0cf9124d2e44c149837e.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff
## alpha[1] 57.45 50.21 691.83 -1333.76 -408.50 69.97 543.81 1411.59 190
## alpha[2] -57.41 50.21 691.81 -1411.82 -544.36 -70.14 407.82 1333.93 190
## sigma 1.74 0.04 0.85 0.80 1.16 1.49 2.06 4.13 438
## lp__ -3.75 0.06 1.44 -7.46 -4.37 -3.35 -2.69 -2.15 549
## Rhat
## alpha[1] 1.00
## alpha[2] 1.00
## sigma 1.01
## lp__ 1.00
##
## Samples were drawn using NUTS(diag_e) at Sat Aug 1 13:48:39 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
Weakly regularized priors:
model <- '
data {
int<lower=1> n;
real y[n];
}
parameters {
real alpha[2];
real<lower=0> sigma;
}
transformed parameters {
real mu;
mu = alpha[1] + alpha[2];
}
model {
y ~ normal(mu, sigma);
alpha[1] ~ normal(0, 10);
alpha[2] ~ normal(0, 10);
sigma ~ exponential(1);
}
'
m9.5 <- stan(model_code = model, data = dat, control = list(adapt_delta = 0.99), iter = 10000)
mcmc_trace(m9.5)
m9.5
## Inference for Stan model: 5559ff6a3c8d17e90d6b4581d5f51dff.
## 4 chains, each with iter=10000; warmup=5000; thin=1;
## post-warmup draws per chain=5000, total post-warmup draws=20000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## alpha[1] 0.32 0.10 7.01 -13.35 -4.39 0.38 5.00 13.92 4646 1
## alpha[2] -0.13 0.10 7.00 -13.77 -4.82 -0.21 4.55 13.53 4648 1
## sigma 1.03 0.00 0.07 0.90 0.98 1.03 1.08 1.19 5708 1
## mu 0.19 0.00 0.10 -0.01 0.12 0.19 0.26 0.39 17196 1
## lp__ -54.77 0.02 1.21 -57.86 -55.31 -54.46 -53.88 -53.39 4499 1
##
## Samples were drawn using NUTS(diag_e) at Sat Aug 1 13:49:11 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).