Estimated and checked against book:
TODO:
library(tidyverse)
library(tidybayes)
library(rstan)
library(patchwork)
library(rethinking)
options(mc.cores = 4)
d <- read.csv('data/WaffleDivorce.csv', sep = ';')
stan_data <- list(
D_obs = standardize( d$Divorce ),
D_sd = d$Divorce.SE / sd( d$Divorce ),
M = standardize( d$Marriage ),
A = standardize( d$MedianAgeMarriage ),
N = nrow(d)
)
stan_program <- "
data {
int N;
vector[N] D_obs;
vector[N] D_sd;
vector[N] M;
vector[N] A;
}
parameters {
vector[N] D_true;
real a;
real bA;
real bM;
real<lower=0> sigma;
}
model {
vector[N] mu;
mu = a + bA * A + bM * M;
D_true ~ normal(mu, sigma);
D_obs ~ normal(D_true, D_sd);
a ~ normal(0, .2);
bA ~ normal(0, .5);
bM ~ normal(0, .5);
sigma ~ exponential(1);
}
"
m15.1 <- stan(model_code = stan_program, data = stan_data)
stan_data <- list(
D_obs = standardize( d$Divorce ),
D_sd = d$Divorce.SE / sd( d$Divorce ),
M_obs = standardize( d$Marriage ),
M_sd = d$Marriage.SE / sd( d$Marriage ),
A = standardize( d$MedianAgeMarriage ),
N = nrow(d)
)
stan_program <- "
data {
int N;
vector[N] D_obs;
vector[N] D_sd;
vector[N] M_obs;
vector[N] M_sd;
vector[N] A;
}
parameters {
vector[N] D_true;
real a;
real bA;
real bM;
real<lower=0> sigma;
real M_true[N];
}
model {
vector[N] mu;
for (i in 1:N) {
mu[i] = a + bA * A[i] + bM * M_true[i];
}
D_true ~ normal(mu, sigma);
D_obs ~ normal(D_true, D_sd);
M_true ~ normal(0, 1);
M_obs ~ normal(M_true, M_sd);
a ~ normal(0, .2);
bA ~ normal(0, .5);
bM ~ normal(0, .5);
sigma ~ exponential(1);
}
"
m15.2 <- stan(model_code = stan_program, data = stan_data)
datpost <- m15.2 %>%
spread_draws(M_true[i], D_true[i]) %>%
mean_qi %>%
select(Marriage = M_true, Divorce = D_true)
datreal <- tibble(Marriage = stan_data$M_obs, Divorce = stan_data$D_obs)
ggplot(datpost) +
geom_point(aes(Marriage, Divorce), color = 'blue') +
geom_point(data = datreal, aes(Marriage, Divorce), color = 'black')
## R code 15.7
N <- 500
A <- rnorm(N)
M <- rnorm(N,-A)
D <- rnorm(N,A)
A_obs <- rnorm(N,A)
## R code 15.8
N <- 100
S <- rnorm( N )
H <- rbinom( N , size=10 , inv_logit(S) )
## R code 15.9
D <- rbern( N ) # dogs completely random
Hm <- H
Hm[D==1] <- NA
## R code 15.10
D <- ifelse( S > 0 , 1 , 0 )
Hm <- H
Hm[D==1] <- NA
set.seed(501)
N <- 1000
X <- rnorm(N)
S <- rnorm(N)
H <- rbinom( N , size=10 , inv_logit( 2 + S - 2*X ) )
D <- ifelse( X > 1 , 1 , 0 )
Hm <- H
Hm[D==1] <- NA
## R code 15.12
stan_data <- list(
H = H,
S = S,
n = length(H))
stan_program <- "
data{
int n;
int H[n];
vector[n] S;
}
parameters {
real a;
real bS;
}
model {
vector[n] p;
p = a + bS * S;
p = inv_logit(p);
H ~ binomial(10, p);
bS ~ normal(0, .5);
a ~ normal(0, 1);
}
"
m15.3 <- stan(model_code = stan_program, data = stan_data)
m15.3
## Inference for Stan model: 3e53364a2ddc82d55911684e4b84e241.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff
## a 1.11 0.00 0.02 1.06 1.10 1.11 1.13 1.16 2426
## bS 0.69 0.00 0.03 0.64 0.67 0.69 0.71 0.74 2971
## lp__ -5365.58 0.02 1.01 -5368.34 -5365.96 -5365.28 -5364.85 -5364.60 1684
## Rhat
## a 1
## bS 1
## lp__ 1
##
## Samples were drawn using NUTS(diag_e) at Sun Aug 2 09:49:08 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
stan_data <- list(H = H[D == 0], S = S[D == 0], n = length(H[D == 0]))
stan_program <- "
data{
int n;
int H[n];
vector[n] S;
}
parameters {
real a;
real bS;
}
model {
vector[n] p;
p = a + bS * S;
p = inv_logit(p);
H ~ binomial(10, p);
bS ~ normal(0, .5);
a ~ normal(0, 1);
}
"
m15.4 <- stan(model_code = stan_program, data = stan_data)
m15.4
## Inference for Stan model: 3e53364a2ddc82d55911684e4b84e241.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff
## a 1.80 0.00 0.03 1.73 1.77 1.80 1.82 1.86 1862
## bS 0.83 0.00 0.03 0.76 0.81 0.83 0.85 0.89 1947
## lp__ -3333.33 0.02 1.04 -3336.08 -3333.71 -3333.00 -3332.61 -3332.35 1879
## Rhat
## a 1
## bS 1
## lp__ 1
##
## Samples were drawn using NUTS(diag_e) at Sun Aug 2 09:49:10 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
Stan cannot handle NA
s. Instead, we replace NA
s by Inf
, and we check for positive infinity in a loop to construct a B_merge
vector. I’m sure there is a more computationally efficient (but convoluted?) way to achieve the same goal, but this hack is transparent and easy to understand.
stan_data <- read.csv('data/milk.csv', sep = ';') %>%
mutate(neocortex.prop = neocortex.perc / 100,
logmass = log(mass),
K = standardize(kcal.per.g),
M = standardize(logmass),
B = standardize(neocortex.prop),
B = ifelse(is.na(B), Inf, B)) %>%
compose_data
stan_program <- "
data {
int n;
vector[n] K;
vector[n] M;
vector[n] B;
}
parameters {
real nu;
real a;
real bB;
real bM;
real<lower=0> sigma_B;
real<lower=0> sigma;
vector[n] B_miss;
}
transformed parameters {
vector[n] B_merge;
for (i in 1:n) {
if (B[i] == positive_infinity()) {
B_merge[i] = B_miss[i];
} else {
B_merge[i] = B[i];
}
}
}
model {
vector[n] mu;
[sigma, sigma_B] ~ exponential(1);
[a, nu, bB, bM] ~ normal(0, 0.5);
B_merge ~ normal(nu, sigma_B);
for (i in 1:n) {
mu[i] = a + bB * B_merge[i] + bM * M[i];
}
K ~ normal(mu, sigma);
}
"
m15.5 <- stan(model_code = stan_program, data = stan_data,
iter = 5000)
summary(m15.5, c('nu', 'a', 'bM', 'bB', 'sigma_B', 'sigma'))$summary
## mean se_mean sd 2.5% 25% 50%
## nu -0.04792583 0.002119570 0.2126788 -4.743518e-01 -0.18413717 -0.04522333
## a 0.02956887 0.001661338 0.1643427 -2.890587e-01 -0.08194702 0.02992069
## bM -0.53798035 0.002331181 0.2062019 -9.260217e-01 -0.67682781 -0.53980049
## bB 0.48667072 0.003041412 0.2374374 -2.177106e-05 0.33445201 0.49488402
## sigma_B 1.01879179 0.002347955 0.1780872 7.367049e-01 0.89084468 0.99598673
## sigma 0.84667699 0.001727142 0.1424338 6.059020e-01 0.74546746 0.83240041
## 75% 97.5% n_eff Rhat
## nu 0.09227741 0.3626698 10068.224 0.9998259
## a 0.14062201 0.3425337 9785.544 0.9999384
## bM -0.40417323 -0.1208925 7824.082 1.0000497
## bB 0.64732829 0.9323277 6094.635 1.0000110
## sigma_B 1.12042561 1.4361986 5752.889 1.0006955
## sigma 0.93275157 1.1697471 6800.955 1.0001070