Github: https://github.com/quangtiencs/bayesian-cognitive-modeling-with-turing.jl
$$ \theta \sim \text{Beta}(1, 1) $$ $$ k \sim \text{Binomial} ( \theta, n) $$
using DynamicPPL, Turing
using StatsPlots, Random
using LaTeXStrings
using CSV
using DataFrames
using SpecialFunctions
k = 5
n = 10
@model function BinomialModel(k)
theta ~ Beta(1, 1)
k ~ Binomial(n, theta)
end
iterations = 1_000
ϵ = 0.05
τ = 10
chain = sample(BinomialModel(k), HMC(ϵ, τ), iterations)
p = histogram(chain[:theta],
label=L"\mathtt{histogram \quad posterior} \quad \theta", normalize=true)
density!(chain[:theta],
label=L"\mathtt{density \quad posterior} \quad \theta", lw=5)
xlabel!("Rate")
xlims!(0, 1.0)
chain
Iterations = 1:1:1000
Number of chains = 1
Samples per chain = 1000
Wall duration = 2.66 seconds
Compute duration = 2.66 seconds
parameters = theta
internals = lp, n_steps, is_accept, acceptance_rate, log_density,
hamiltonian_energy, hamiltonian_energy_error,
step_size, nom_step_size
Summary Statistics
parameters mean std naive_se mcse ess rhat e ⋯
Symbol Float64 Float64 Float64 Float64 Float64 Float64 ⋯
theta 0.5080 0.1457 0.0046 0.0116 173.1098 1.0001 ⋯
1 column omitted
Quantiles
parameters 2.5% 25.0% 50.0% 75.0% 97.5%
Symbol Float64 Float64 Float64 Float64 Float64
theta 0.2236 0.4085 0.5102 0.6150 0.7763
k1 = 5
n1 = 10
k2 = 7
n2 = 10
@model function BinomialModel2(k1, k2)
theta1 ~ Beta(1, 1)
theta2 ~ Beta(1, 1)
k1 ~ Binomial(n1, theta1)
k2 ~ Binomial(n2, theta2)
delta = theta2 - theta1
return (delta=delta,)
end
iterations = 1000
ϵ = 0.05
τ = 10
m = BinomialModel2(k1, k2)
chain = sample(m, HMC(ϵ, τ), iterations)
chains_params = Turing.MCMCChains.get_sections(chain, :parameters)
quantities = generated_quantities(m, chains_params)
delta = map(x -> x[:delta], quantities);
delta_plot = histogram(delta, label=L"\delta \leftarrow \theta_1 - \theta_2",
normalize=true, bottom_margin=10Plots.mm)
density!(delta, label=L"\delta \leftarrow \theta_1 - \theta_2", lw=5)
xlabel!("Difference in Rates")
ylabel!("Posterior Density")
chain_plot = plot(chain, size=(500, 300))
plot(delta_plot, chain_plot, size=(800, 300),
left_margin=10Plots.mm, bottom_margin=10Plots.mm)
k1 = 5
n1 = 10
k2 = 7
n2 = 10
@model function BinomialModel3(k1, k2)
theta ~ Beta(1, 1)
k1 ~ Binomial(n1, theta)
k2 ~ Binomial(n2, theta)
end
iterations = 1000
ϵ = 0.05
τ = 10
m = BinomialModel3(k1, k2)
chain = sample(m, HMC(ϵ, τ), iterations)
Iterations = 1:1:1000
Number of chains = 1
Samples per chain = 1000
Wall duration = 0.41 seconds
Compute duration = 0.41 seconds
parameters = theta
internals = lp, n_steps, is_accept, acceptance_rate, log_density,
hamiltonian_energy, hamiltonian_energy_error,
step_size, nom_step_size
Summary Statistics
parameters mean std naive_se mcse ess rhat e ⋯
Symbol Float64 Float64 Float64 Float64 Float64 Float64 ⋯
theta 0.5911 0.1016 0.0032 0.0056 396.0361 0.9990 ⋯
1 column omitted
Quantiles
parameters 2.5% 25.0% 50.0% 75.0% 97.5%
Symbol Float64 Float64 Float64 Float64 Float64
theta 0.3834 0.5232 0.5898 0.6632 0.7856
plot(chain, size=(600, 300))
k = 1
n = 15
@model function BinomialModel4(k)
theta ~ Beta(1, 1)
theta_prior ~ Beta(1, 1)
for i in eachindex(k)
k[i] ~ Binomial(n, theta)
end
end
iterations = 1_000
ϵ = 0.05
τ = 10
m = BinomialModel4([k])
chain = sample(m, HMC(ϵ, τ), iterations)
# same
# posterior_predictive_k = [rand(Binomial(n, e)) for e in chain[:theta] |> collect]
m_test = BinomialModel4(Vector{Union{Missing, Int32}}(undef, length(1)))
posterior_predictive = predict(m_test, chain)
Iterations = 1:1:1000
Number of chains = 1
Samples per chain = 1000
parameters = k[1]
internals =
Summary Statistics
parameters mean std naive_se mcse ess rhat
Symbol Float64 Float64 Float64 Float64 Float64 Float64
k[1] 1.6230 1.6216 0.0513 0.1182 183.7714 0.9995
Quantiles
parameters 2.5% 25.0% 50.0% 75.0% 97.5%
Symbol Float64 Float64 Float64 Float64 Float64
k[1] 0.0000 0.0000 1.0000 2.0000 6.0000
posterior_predictive_k = posterior_predictive[:"k[1]"] |> collect
prior_predictive_k = [rand(Binomial(n, rand(Beta(1,1)))) for _ in 1:length(chain[:theta])]
p1 = density(chain[:theta], label=L"\mathtt{posterior}", lw=2)
plot!(Beta(1,1), label=L"\mathtt{prior}", lw=2)
xlims!(0, 1.0)
xlabel!("Rate")
ylabel!("Density")
p2 = histogram(posterior_predictive_k, label=L"\mathtt{posterior}",
normalize=true, bins=length(unique(posterior_predictive_k))-1, alpha=0.5, size=(600, 250))
xlims!(0, 15)
histogram!(prior_predictive_k, label=L"\mathtt{prior}", normalize=true,
bins=length(unique(prior_predictive_k))-1, alpha=0.5)
xlabel!("Success Count")
ylabel!("Mass")
plot(p1, p2; layout=(2,1))
k1 = 0
n1 = 10
k2 = 10
n2 = 10
@model function BinomialModel5(k1, k2, n1, n2)
theta ~ Beta(1, 1)
for i in eachindex(k1)
k1[i] ~ Binomial(n1, theta)
end
for i in eachindex(k2)
k2[i] ~ Binomial(n2, theta)
end
end
iterations = 1_000
ϵ = 0.05
τ = 10
m = BinomialModel5([k1], [k2], n1, n2)
chain = sample(m, HMC(ϵ, τ), iterations)
m_test = BinomialModel5(Vector{Union{Missing, Int32}}(undef, 1), Vector{Union{Missing, Int32}}(undef, 1), n1, n2)
posterior_predictive = predict(m_test, chain)
Iterations = 1:1:1000
Number of chains = 1
Samples per chain = 1000
parameters = k1[1], k2[1]
internals =
Summary Statistics
parameters mean std naive_se mcse ess rhat
Symbol Float64 Float64 Float64 Float64 Float64 Float64
k1[1] 4.9900 1.8808 0.0595 0.0525 847.5483 0.9990
k2[1] 5.0620 1.8616 0.0589 0.0730 684.7230 0.9990
Quantiles
parameters 2.5% 25.0% 50.0% 75.0% 97.5%
Symbol Float64 Float64 Float64 Float64 Float64
k1[1] 1.0000 4.0000 5.0000 6.0000 8.0000
k2[1] 1.0000 4.0000 5.0000 6.0000 9.0000
# postprekk1 = [rand(Binomial(n1, e)) for e in chain[:theta] |> collect]
# postprekk2 = [rand(Binomial(n2, e)) for e in chain[:theta] |> collect]
postprekk1 = posterior_predictive[:"k1[1]"] |> collect
postprekk2 = posterior_predictive[:"k2[1]"] |> collect
p1 = density(chain[:theta], lw=2, label=false)
ylabel!("Density")
xlabel!("Rate")
p2 = histogram2d(postprekk1, postprekk2,
fc=:hot, background="black",
xlabel="success count 1", ylabel="success count 2"
)
scatter!([k1], [k2], markersize=10, color="blue", label=false)
l = @layout [a{0.3w} a{0.44w}]
plot(p1, p2, layout=l, size=(800, 300),
bottom_margin=10Plots.mm, left_margin=10Plots.mm)
k = [16, 18, 22, 25, 27]
nmax = 500
m = length(k);
@model function BinomialModel6(k)
theta ~ Beta(1, 1)
total_n ~ DiscreteUniform(1, nmax)
for i in eachindex(k)
k[i] ~ Binomial(total_n, theta)
end
end
iterations = 10_000
nchains = 4
burnin = 2_000
ϵ = 0.05
τ = 10
m = BinomialModel6(k)
chain = sample(m, SMC(), MCMCThreads(), iterations, nchains, burnin=burnin)
Iterations = 1:1:10000
Number of chains = 4
Samples per chain = 10000
Wall duration = 45.84 seconds
Compute duration = 45.73 seconds
parameters = theta, total_n
internals = lp, weight
Summary Statistics
parameters mean std naive_se mcse ess rhat ⋯
Symbol Float64 Float64 Float64 Float64 Float64 Float64 ⋯
theta 0.1906 0.1461 0.0007 0.0033 1922.7755 1.0022 ⋯
total_n 191.5686 128.4510 0.6423 3.3487 1495.5883 1.0040 ⋯
1 column omitted
Quantiles
parameters 2.5% 25.0% 50.0% 75.0% 97.5%
Symbol Float64 Float64 Float64 Float64 Float64
theta 0.0449 0.0775 0.1396 0.2593 0.5640
total_n 38.0000 83.0000 155.0000 279.0000 471.0000
plot(chain, size=(800, 400), bottom_margin=10Plots.mm, left_margin=10Plots.mm)
posttheta = chain[:theta] |> vec |> collect
postntotal = chain[:total_n] |> vec |> collect
maximum_likelihood = -Inf64
idx = 1
for i in eachindex(postntotal)
log_likelihood = 0
log_likelihood += sum([
loggamma(postntotal[i] + 1) - loggamma(j+1) - loggamma(postntotal[i] - j + 1) +
j * log(posttheta[i]) + (postntotal[i] - j) * log(1 - posttheta[i])
for j in k
])
if log_likelihood > maximum_likelihood
maximum_likelihood = log_likelihood
idx = i
end
end
p = marginalhist(postntotal, posttheta,
fc=:Oranges_8, bins=100, size=(500, 500))
scatter!(p.subplots[2], [mean(postntotal)], [mean(posttheta)], markersize=10, color="green", markershape=:xcross, label=false)
scatter!(p.subplots[2], [postntotal[idx]], [posttheta[idx]], markersize=10, color="red", label=false)
xlabel!(p.subplots[2], "Number of Surveys")
ylabel!(p.subplots[2], "Rate of Return")