(playground) Hierarchy Risk Parity

Author

quangtiencs

Published

December 24, 2024

Disclaimer

This post explores the HRP Algorithm. The views expressed here do not endorse any particular investment portfolio.

Portfolio diversification is a risk management technique that addresses systematic and unsystematic risks.

Hierarchical Risk Parity is an asset allocation technique that assigns weights based on a hierarchical structure. HRP uses three steps to build a diversified portfolio:

Hierarchical Tree Clustering: use hierarchical clustering algorithms (single, ward, or complete linkage) to group assets based on their correlation. The result is a dendrogram that shows how the assets are related.
Matrix Seriation (Quasi-Diagonalization): reorder the covariance matrix of the assets based on the hierarchy determined in the first step. Bring similar assets closer together in the matrix.
Recursive Bisection: allocates weights in a way that respects the hierarchical structure and aims to balance risk across the portfolio.

In this tutorial, we will run the HRP algorithm for some Viet Nam stocks.

Based on the source code: Stefan Jansen - Machine Learning For Trading

:D my graphviz

Config

cfg_hrp_correlation = "kendall"  # pearson, kendall
cfg_hrp_clustering = "ward"
cfg_hrp_plot_cmap = "BrBG"

Run

import numpy as np
import pandas as pd
import duckdb

from scipy.cluster.hierarchy import linkage
from scipy.spatial.distance import squareform
from scipy.cluster.hierarchy import dendrogram

import matplotlib.pyplot as plt
import seaborn as sns
import arviz as az

plt.style.use("ggplot")
plt.rcParams["xtick.labelsize"] = 8
plt.rcParams["ytick.labelsize"] = 8

from vector_field.config import FilePathConfig

conn = duckdb.connect(FilePathConfig.middleware_database_file)

prices = (
    conn.sql(
        """
WITH PRICES_VN100 AS (
    SELECT SYMBOL, "DATE", "CLOSE"
    FROM FORWARD_FILL_STOCK_PRICES_DAY_TRADING
    WHERE TRUE
    AND SYMBOL IN (
        SELECT SYMBOL FROM STATUS_SYMBOL
        WHERE VN_100
        AND PRICE_TO_EARNINGS BETWEEN 5 AND 25
    --     AND PRICE_TO_BOOK <
    )
    AND "DATE" >= '2024-12-20'::DATE - 5*365
    QUALIFY COUNT(*) OVER(PARTITION BY SYMBOL) >= 5*365 -- because ffill
)

SELECT *
FROM (
    PIVOT PRICES_VN100 ON "SYMBOL" USING FIRST("CLOSE") GROUP BY "DATE"
)
WHERE "DATE" IN (SELECT "DATE" FROM VALID_DATE) -- only valid date
ORDER BY "DATE" ASC
"""
    )
    .df()
    .set_index("DATE")
)

print(prices.columns, len(prices.columns))

Index(['AAA', 'ACB', 'ASM', 'BID', 'BMP', 'BVH', 'BWE', 'CII', 'CTD', 'CTG',
       'DBC', 'DCM', 'DGC', 'DGW', 'DHC', 'DPM', 'EIB', 'EVF', 'GAS', 'GEX',
       'GMD', 'HAG', 'HCM', 'HDB', 'HDG', 'HHV', 'HPG', 'HSG', 'LPB', 'MBB',
       'NKG', 'NT2', 'PAN', 'PC1', 'PHR', 'PLX', 'PNJ', 'POW', 'PPC', 'PTB',
       'PVD', 'PVT', 'REE', 'SAB', 'SBT', 'SCS', 'SSI', 'STB', 'TCB', 'TCH',
       'TLG', 'TPB', 'VCB', 'VCG', 'VHC', 'VHM', 'VIB', 'VIC', 'VIX', 'VND',
       'VNM', 'VPB', 'VRE'],
      dtype='object') 63

prices.head()

	AAA	ACB	ASM	BID	BMP	BVH	BWE	CII	CTD	CTG	...	VCG	VHC	VHM	VIB	VIC	VIX	VND	VNM	VPB	VRE
DATE
2019-12-23	10.219	7.879	4.316	30.906	28.630	64.069	18.831	19.555	39.616	13.712	...	17.920	28.792	61.753	4.812	102.844	1.574	2.909	78.066	6.487	32.9
2019-12-24	10.018	7.844	4.247	31.009	28.387	61.934	18.831	19.466	37.766	13.745	...	17.854	28.464	61.679	4.812	102.667	1.547	2.889	78.463	6.470	32.9
2019-12-25	10.462	7.913	4.134	31.216	28.357	61.133	18.792	19.997	39.473	13.745	...	17.854	28.573	61.679	4.922	102.667	1.547	2.899	78.066	6.554	32.8
2019-12-26	10.301	7.809	4.159	30.526	28.539	60.510	18.639	19.732	39.260	13.745	...	17.854	28.792	62.047	4.894	102.578	1.547	2.889	78.068	6.604	33.3
2019-12-27	10.422	7.844	4.228	31.907	28.053	61.222	18.524	19.466	38.335	13.779	...	17.920	28.245	62.269	4.867	102.400	1.547	2.889	78.869	6.604	33.4

5 rows × 63 columns

monthly_returns = prices.resample("ME").last().pct_change().dropna(how="all")
monthly_returns = monthly_returns.dropna(axis=1)
# monthly_returns.columns.names = ["Ticker"]
# monthly_returns.info()

cov = monthly_returns.cov()
corr = monthly_returns.corr(cfg_hrp_correlation)
corr.columns.names = ["Ticker"]

def get_distance_matrix(corr):
    """Compute distance matrix from correlation;
    0 <= d[i,j] <= 1"""
    return np.sqrt((1 - corr) / 2)

distance_matrix = get_distance_matrix(corr)
columns = list(distance_matrix.columns)
linkage_matrix = linkage(squareform(distance_matrix), cfg_hrp_clustering)

fig, axes = plt.subplots(figsize=(14, 7))
dendrogram(
    linkage_matrix,
    leaf_font_size=10,
    labels=distance_matrix.columns,
    distance_sort=True,
    ax=axes,
)
plt.show()

def quasi_diagonalize(link):
    """sort clustered assets by distance"""
    link = link.astype(int)
    sort_idx = pd.Series([link[-1, 0], link[-1, 1]])
    num_items = link[-1, 3]  # idx of original items
    while sort_idx.max() >= num_items:
        sort_idx.index = list(range(0, sort_idx.shape[0] * 2, 2))  # make space
        df0 = sort_idx[sort_idx >= num_items]  # find clusters
        i = df0.index
        j = df0.values - num_items
        sort_idx[i] = link[j, 0]  # item 1
        df0 = pd.Series(link[j, 1], index=i + 1)
        sort_idx = sort_idx._append(df0)  # item 2
        sort_idx = sort_idx.sort_index()  # re-sort
        sort_idx.index = list(range(sort_idx.shape[0]))  # re-index
    return sort_idx.tolist()

sorted_idx = quasi_diagonalize(linkage_matrix)
sorted_tickers = corr.index[sorted_idx].tolist()

fig, axes = plt.subplots(ncols=2, figsize=(18, 8))

sns.heatmap(
    corr,
    center=0,
    cmap=cfg_hrp_plot_cmap,
    ax=axes[0],
    xticklabels=True,
    yticklabels=True,
)
axes[0].set_title("Correlation Matrix")

clustered_assets = corr.loc[sorted_tickers, sorted_tickers]  # reorder
sns.heatmap(
    clustered_assets,
    center=0,
    cmap=cfg_hrp_plot_cmap,
    ax=axes[1],
    xticklabels=True,
    yticklabels=True,
)
axes[1].set_title("Clustered Correlation")
fig.tight_layout()
plt.show()

def get_inverse_var_pf(cov, **kargs):
    """Compute the inverse-variance portfolio"""
    ivp = 1 / np.diag(cov)
    return ivp / ivp.sum()


def get_cluster_var(cov, cluster_items):
    """Compute variance per cluster"""
    cov_ = cov.loc[cluster_items, cluster_items]  # matrix slice
    w_ = get_inverse_var_pf(cov_)
    return (w_ @ cov_ @ w_).item()


def get_hrp_allocation(cov, tickers):
    """Compute top-down HRP weights"""

    weights = pd.Series(1.0, index=tickers)
    clusters = [tickers]  # initialize one cluster with all assets

    while len(clusters) > 0:
        # run bisectional search:
        clusters = [
            c[start:stop]
            for c in clusters
            for start, stop in ((0, int(len(c) / 2)), (int(len(c) / 2), len(c)))
            if len(c) > 1
        ]
        for i in range(0, len(clusters), 2):  # parse in pairs
            cluster0 = clusters[i]
            cluster1 = clusters[i + 1]

            cluster0_var = get_cluster_var(cov, cluster0)
            cluster1_var = get_cluster_var(cov, cluster1)

            weight_scaler = 1 - cluster0_var / (cluster0_var + cluster1_var)
            weights[cluster0] *= weight_scaler
            weights[cluster1] *= 1 - weight_scaler
    return weights

hrp_allocation = get_hrp_allocation(cov, sorted_tickers)
hrp_allocation = hrp_allocation.sort_values(ascending=False)
# It's done :D

# title = "Hierarchical Risk Parity - Portfolio Allocation"
# hrp_allocation.sort_values(ascending=False).plot.bar(figsize=(15, 4), title=title)
# sns.despine()
# plt.tight_layout()

References

Marcos Lopez de Prado. Advances Financial Machine Learning
ML for Trading - 2nd Edition. https://github.com/stefan-jansen/machine-learning-for-trading/tree/main
Matlab - Hierarchical Risk Parity Portfolio. https://www.mathworks.com/help/finance/create-hierarchical-risk-parity-portfolio.html
Hierarchical Risk Parity https://developer.nvidia.com/blog/hierarchical-risk-parity-on-rapids-an-ml-approach-to-portfolio-allocation/