[playground] Julia Turing.jl : Bayesian Cognitive Modeling - Inferences with binomials

Posted at — Mar 18, 2023

Github: https://github.com/quangtiencs/bayesian-cognitive-modeling-with-turing.jl

3.1 Inferring a rate

$$ \theta \sim \text{Beta}(1, 1) $$ $$ k \sim \text{Binomial} ( \theta, n) $$


using DynamicPPL, Turing
using StatsPlots, Random
using LaTeXStrings
using CSV
using DataFrames
using SpecialFunctions

k = 5
n = 10

@model function BinomialModel(k)
    theta ~ Beta(1, 1)
    k ~ Binomial(n, theta)
end

iterations = 1_000
ϵ = 0.05
τ = 10

chain = sample(BinomialModel(k), HMC(ϵ, τ), iterations)

p = histogram(chain[:theta], 
    label=L"\mathtt{histogram \quad posterior} \quad \theta", normalize=true)
density!(chain[:theta], 
    label=L"\mathtt{density \quad posterior} \quad \theta", lw=5)
xlabel!("Rate")
xlims!(0, 1.0)

svg


chain

Iterations        = 1:1:1000
Number of chains  = 1
Samples per chain = 1000
Wall duration     = 2.66 seconds
Compute duration  = 2.66 seconds
parameters        = theta
internals         = lp, n_steps, is_accept, acceptance_rate, log_density, 
                    hamiltonian_energy, hamiltonian_energy_error, 
                    step_size, nom_step_size

Summary Statistics
  parameters      mean       std   naive_se      mcse        ess      rhat   e ⋯
      Symbol   Float64   Float64    Float64   Float64    Float64   Float64     ⋯

       theta    0.5080    0.1457     0.0046    0.0116   173.1098    1.0001     ⋯
                                                                1 column omitted

Quantiles
  parameters      2.5%     25.0%     50.0%     75.0%     97.5% 
      Symbol   Float64   Float64   Float64   Float64   Float64 

       theta    0.2236    0.4085    0.5102    0.6150    0.7763

3.2 Difference between two rates


k1 = 5
n1 = 10
k2 = 7
n2 = 10

@model function BinomialModel2(k1, k2)
    theta1 ~ Beta(1, 1)
    theta2 ~ Beta(1, 1)
    k1 ~ Binomial(n1, theta1)
    k2 ~ Binomial(n2, theta2)
    delta = theta2 - theta1
    return (delta=delta,)
end

iterations = 1000
ϵ = 0.05
τ = 10

m = BinomialModel2(k1, k2)
chain = sample(m, HMC(ϵ, τ), iterations)
chains_params = Turing.MCMCChains.get_sections(chain, :parameters)
quantities = generated_quantities(m, chains_params)
delta = map(x -> x[:delta], quantities);

delta_plot = histogram(delta, label=L"\delta \leftarrow \theta_1 - \theta_2",
    normalize=true, bottom_margin=10Plots.mm)
density!(delta, label=L"\delta \leftarrow \theta_1 - \theta_2", lw=5)
xlabel!("Difference in Rates")
ylabel!("Posterior Density")

chain_plot = plot(chain, size=(500, 300))
plot(delta_plot, chain_plot, size=(800, 300),
    left_margin=10Plots.mm, bottom_margin=10Plots.mm)

svg

3.3 Inferring a common rate


k1 = 5
n1 = 10
k2 = 7
n2 = 10

@model function BinomialModel3(k1, k2)
    theta ~ Beta(1, 1)
    k1 ~ Binomial(n1, theta)
    k2 ~ Binomial(n2, theta)
end

iterations = 1000
ϵ = 0.05
τ = 10

m = BinomialModel3(k1, k2)
chain = sample(m, HMC(ϵ, τ), iterations)
Iterations        = 1:1:1000
Number of chains  = 1
Samples per chain = 1000
Wall duration     = 0.41 seconds
Compute duration  = 0.41 seconds
parameters        = theta
internals         = lp, n_steps, is_accept, acceptance_rate, log_density, 
                    hamiltonian_energy, hamiltonian_energy_error, 
                    step_size, nom_step_size

Summary Statistics
  parameters      mean       std   naive_se      mcse        ess      rhat   e ⋯
      Symbol   Float64   Float64    Float64   Float64    Float64   Float64     ⋯

       theta    0.5911    0.1016     0.0032    0.0056   396.0361    0.9990     ⋯
                                                                1 column omitted

Quantiles
  parameters      2.5%     25.0%     50.0%     75.0%     97.5% 
      Symbol   Float64   Float64   Float64   Float64   Float64 

       theta    0.3834    0.5232    0.5898    0.6632    0.7856

plot(chain, size=(600, 300))

svg

3.4 Prior and posterior prediction


k = 1
n = 15


@model function BinomialModel4(k)
    theta ~ Beta(1, 1)
    theta_prior ~ Beta(1, 1)
    
    for i in eachindex(k)
        k[i] ~ Binomial(n, theta)
    end
end

iterations = 1_000
ϵ = 0.05
τ = 10

m = BinomialModel4([k])
chain = sample(m, HMC(ϵ, τ), iterations)


# same
# posterior_predictive_k = [rand(Binomial(n, e)) for e in chain[:theta] |> collect]
m_test = BinomialModel4(Vector{Union{Missing, Int32}}(undef, length(1)))
posterior_predictive = predict(m_test, chain)
Iterations        = 1:1:1000
Number of chains  = 1
Samples per chain = 1000
parameters        = k[1]
internals         = 

Summary Statistics
  parameters      mean       std   naive_se      mcse        ess      rhat 
      Symbol   Float64   Float64    Float64   Float64    Float64   Float64 

        k[1]    1.6230    1.6216     0.0513    0.1182   183.7714    0.9995

Quantiles
  parameters      2.5%     25.0%     50.0%     75.0%     97.5% 
      Symbol   Float64   Float64   Float64   Float64   Float64 

        k[1]    0.0000    0.0000    1.0000    2.0000    6.0000

posterior_predictive_k = posterior_predictive[:"k[1]"] |> collect

prior_predictive_k = [rand(Binomial(n, rand(Beta(1,1)))) for _ in 1:length(chain[:theta])]


p1 = density(chain[:theta], label=L"\mathtt{posterior}",  lw=2)
plot!(Beta(1,1), label=L"\mathtt{prior}", lw=2)
xlims!(0, 1.0)
xlabel!("Rate")
ylabel!("Density")

p2 = histogram(posterior_predictive_k, label=L"\mathtt{posterior}", 
    normalize=true, bins=length(unique(posterior_predictive_k))-1, alpha=0.5, size=(600, 250))
xlims!(0, 15)
histogram!(prior_predictive_k, label=L"\mathtt{prior}", normalize=true, 
    bins=length(unique(prior_predictive_k))-1, alpha=0.5)
xlabel!("Success Count")
ylabel!("Mass")

plot(p1, p2; layout=(2,1))

svg

3.5 Posterior Predictive


k1 = 0
n1 = 10
k2 = 10
n2 = 10

@model function BinomialModel5(k1, k2, n1, n2)
    theta ~ Beta(1, 1)
    
    for i in eachindex(k1)
        k1[i] ~ Binomial(n1, theta)
    end
    
    for i in eachindex(k2)
        k2[i] ~ Binomial(n2, theta)
    end
end

iterations = 1_000
ϵ = 0.05
τ = 10

m = BinomialModel5([k1], [k2], n1, n2)
chain = sample(m, HMC(ϵ, τ), iterations)

m_test = BinomialModel5(Vector{Union{Missing, Int32}}(undef, 1), Vector{Union{Missing, Int32}}(undef, 1), n1, n2)
posterior_predictive = predict(m_test, chain)
Iterations        = 1:1:1000
Number of chains  = 1
Samples per chain = 1000
parameters        = k1[1], k2[1]
internals         = 

Summary Statistics
  parameters      mean       std   naive_se      mcse        ess      rhat 
      Symbol   Float64   Float64    Float64   Float64    Float64   Float64 

       k1[1]    4.9900    1.8808     0.0595    0.0525   847.5483    0.9990
       k2[1]    5.0620    1.8616     0.0589    0.0730   684.7230    0.9990

Quantiles
  parameters      2.5%     25.0%     50.0%     75.0%     97.5% 
      Symbol   Float64   Float64   Float64   Float64   Float64 

       k1[1]    1.0000    4.0000    5.0000    6.0000    8.0000
       k2[1]    1.0000    4.0000    5.0000    6.0000    9.0000

# postprekk1 = [rand(Binomial(n1, e)) for e in chain[:theta] |> collect]
# postprekk2 = [rand(Binomial(n2, e)) for e in chain[:theta] |> collect]
postprekk1 = posterior_predictive[:"k1[1]"] |> collect
postprekk2 = posterior_predictive[:"k2[1]"] |> collect

p1 = density(chain[:theta],  lw=2, label=false)
ylabel!("Density")
xlabel!("Rate")
p2 = histogram2d(postprekk1, postprekk2, 
    fc=:hot, background="black", 
    xlabel="success count 1", ylabel="success count 2"
)

scatter!([k1], [k2], markersize=10, color="blue", label=false)


l = @layout [a{0.3w} a{0.44w}]
plot(p1, p2, layout=l, size=(800, 300), 
    bottom_margin=10Plots.mm, left_margin=10Plots.mm)

svg

3.6 Joint distributions


k = [16, 18, 22, 25, 27]
nmax = 500
m = length(k);

@model function BinomialModel6(k)
    theta ~ Beta(1, 1)

    total_n ~ DiscreteUniform(1, nmax)

    for i in eachindex(k)
        k[i] ~ Binomial(total_n, theta)
    end
end

iterations = 10_000
nchains = 4
burnin = 2_000
ϵ = 0.05
τ = 10

m = BinomialModel6(k)
chain = sample(m, SMC(), MCMCThreads(), iterations, nchains, burnin=burnin)
Iterations        = 1:1:10000
Number of chains  = 4
Samples per chain = 10000
Wall duration     = 45.84 seconds
Compute duration  = 45.73 seconds
parameters        = theta, total_n
internals         = lp, weight

Summary Statistics
  parameters       mean        std   naive_se      mcse         ess      rhat  ⋯
      Symbol    Float64    Float64    Float64   Float64     Float64   Float64  ⋯

       theta     0.1906     0.1461     0.0007    0.0033   1922.7755    1.0022  ⋯
     total_n   191.5686   128.4510     0.6423    3.3487   1495.5883    1.0040  ⋯
                                                                1 column omitted

Quantiles
  parameters      2.5%     25.0%      50.0%      75.0%      97.5% 
      Symbol   Float64   Float64    Float64    Float64    Float64 

       theta    0.0449    0.0775     0.1396     0.2593     0.5640
     total_n   38.0000   83.0000   155.0000   279.0000   471.0000
plot(chain, size=(800, 400), bottom_margin=10Plots.mm, left_margin=10Plots.mm)

svg


posttheta = chain[:theta] |> vec |> collect
postntotal = chain[:total_n] |> vec |> collect

maximum_likelihood = -Inf64
idx = 1

for i in eachindex(postntotal)
    log_likelihood = 0
    log_likelihood += sum([
            loggamma(postntotal[i] + 1) - loggamma(j+1) - loggamma(postntotal[i] - j + 1) +
            j * log(posttheta[i]) + (postntotal[i] - j) * log(1 - posttheta[i])
            for j in k
    ])
    
    if log_likelihood > maximum_likelihood
        maximum_likelihood = log_likelihood
        idx = i
    end
end

p = marginalhist(postntotal, posttheta, 
    fc=:Oranges_8, bins=100, size=(500, 500))
scatter!(p.subplots[2], [mean(postntotal)], [mean(posttheta)], markersize=10, color="green", markershape=:xcross, label=false)
scatter!(p.subplots[2], [postntotal[idx]], [posttheta[idx]], markersize=10, color="red", label=false)
xlabel!(p.subplots[2], "Number of Surveys")
ylabel!(p.subplots[2], "Rate of Return")

svg