(playground) Hierarchy Risk Parity

Author

quangtiencs

Published

December 24, 2024

Disclaimer

This post explores the HRP Algorithm. The views expressed here do not endorse any particular investment portfolio.

Portfolio diversification is a risk management technique that addresses systematic and unsystematic risks.

Hierarchical Risk Parity is an asset allocation technique that assigns weights based on a hierarchical structure. HRP uses three steps to build a diversified portfolio:

In this tutorial, we will run the HRP algorithm for some Viet Nam stocks.

Based on the source code: Stefan Jansen - Machine Learning For Trading

:D my graphviz

G AAA AAA 75 AAA--75 ACB ACB 69 ACB--69 ASM ASM 79 ASM--79 BID BID 70 BID--70 BMP BMP 112 BMP--112 BVH BVH 78 BVH--78 BWE BWE 102 BWE--102 CII CII 67 CII--67 CTD CTD 91 CTD--91 CTG CTG CTG--70 DBC DBC 72 DBC--72 DCM DCM 65 DCM--65 DGC DGC 77 DGC--77 DGW DGW DGW--77 DHC DHC 103 DHC--103 DPM DPM DPM--65 EIB EIB 117 EIB--117 EVF EVF EVF--103 GAS GAS 89 GAS--89 GEX GEX 73 GEX--73 GMD GMD 83 GMD--83 HAG HAG 107 HAG--107 HCM HCM 63 HCM--63 HDB HDB 81 HDB--81 HDG HDG HDG--79 HHV HHV 84 HHV--84 HPG HPG 76 HPG--76 HSG HSG 64 HSG--64 LPB LPB 71 LPB--71 MBB MBB 66 MBB--66 NKG NKG NKG--64 NT2 NT2 95 NT2--95 PAN PAN PAN--67 PC1 PC1 88 PC1--88 PHR PHR PHR--72 PLX PLX PLX--89 PNJ PNJ 108 PNJ--108 POW POW POW--75 PPC PPC 104 PPC--104 PTB PTB PTB--83 PVD PVD 74 PVD--74 PVT PVT PVT--74 REE REE 106 REE--106 SAB SAB SAB--78 SBT SBT 93 SBT--93 SCS SCS 109 SCS--109 SSI SSI SSI--63 STB STB STB--71 TCB TCB TCB--66 TCH TCH 86 TCH--86 TLG TLG TLG--102 TPB TPB 85 TPB--85 VCB VCB 99 VCB--99 VCG VCG 96 VCG--96 VHC VHC 87 VHC--87 VHM VHM 80 VHM--80 VIB VIB VIB--69 VIC VIC 97 VIC--97 VIX VIX VIX--73 VND VND 68 VND--68 VNM VNM 105 VNM--105 VPB VPB VPB--85 VRE VRE VRE--80 63--68 64--76 65--87 66--81 82 67--82 113 68--113 90 69--90 70--99 110 71--110 72--84 73--82 74--88 75--93 100 76--100 92 77--92 101 78--101 79--91 80--97 81--90 82--86 98 83--98 94 84--94 85--100 86--96 87--92 88--106 89--101 90--110 91--94 92--98 93--95 94--107 95--104 96--113 97--108 116 98--116 99--112 111 100--111 101--105 102--109 103--111 115 104--115 119 105--119 106--116 107--115 114 108--114 118 109--118 110--117 121 111--121 112--114 120 113--120 114--119 115--120 116--118 117--121 123 118--123 122 119--122 120--123 121--122 124 122--124 123--124

Config

cfg_hrp_correlation = "kendall"  # pearson, kendall
cfg_hrp_clustering = "ward"
cfg_hrp_plot_cmap = "BrBG"

Run

import numpy as np
import pandas as pd
import duckdb

from scipy.cluster.hierarchy import linkage
from scipy.spatial.distance import squareform
from scipy.cluster.hierarchy import dendrogram

import matplotlib.pyplot as plt
import seaborn as sns
import arviz as az

plt.style.use("ggplot")
plt.rcParams["xtick.labelsize"] = 8
plt.rcParams["ytick.labelsize"] = 8

from vector_field.config import FilePathConfig
conn = duckdb.connect(FilePathConfig.middleware_database_file)
prices = (
    conn.sql(
        """
WITH PRICES_VN100 AS (
    SELECT SYMBOL, "DATE", "CLOSE"
    FROM FORWARD_FILL_STOCK_PRICES_DAY_TRADING
    WHERE TRUE
    AND SYMBOL IN (
        SELECT SYMBOL FROM STATUS_SYMBOL
        WHERE VN_100
        AND PRICE_TO_EARNINGS BETWEEN 5 AND 25
    --     AND PRICE_TO_BOOK <
    )
    AND "DATE" >= '2024-12-20'::DATE - 5*365
    QUALIFY COUNT(*) OVER(PARTITION BY SYMBOL) >= 5*365 -- because ffill
)

SELECT *
FROM (
    PIVOT PRICES_VN100 ON "SYMBOL" USING FIRST("CLOSE") GROUP BY "DATE"
)
WHERE "DATE" IN (SELECT "DATE" FROM VALID_DATE) -- only valid date
ORDER BY "DATE" ASC
"""
    )
    .df()
    .set_index("DATE")
)
print(prices.columns, len(prices.columns))
Index(['AAA', 'ACB', 'ASM', 'BID', 'BMP', 'BVH', 'BWE', 'CII', 'CTD', 'CTG',
       'DBC', 'DCM', 'DGC', 'DGW', 'DHC', 'DPM', 'EIB', 'EVF', 'GAS', 'GEX',
       'GMD', 'HAG', 'HCM', 'HDB', 'HDG', 'HHV', 'HPG', 'HSG', 'LPB', 'MBB',
       'NKG', 'NT2', 'PAN', 'PC1', 'PHR', 'PLX', 'PNJ', 'POW', 'PPC', 'PTB',
       'PVD', 'PVT', 'REE', 'SAB', 'SBT', 'SCS', 'SSI', 'STB', 'TCB', 'TCH',
       'TLG', 'TPB', 'VCB', 'VCG', 'VHC', 'VHM', 'VIB', 'VIC', 'VIX', 'VND',
       'VNM', 'VPB', 'VRE'],
      dtype='object') 63
prices.head()
AAA ACB ASM BID BMP BVH BWE CII CTD CTG ... VCG VHC VHM VIB VIC VIX VND VNM VPB VRE
DATE
2019-12-23 10.219 7.879 4.316 30.906 28.630 64.069 18.831 19.555 39.616 13.712 ... 17.920 28.792 61.753 4.812 102.844 1.574 2.909 78.066 6.487 32.9
2019-12-24 10.018 7.844 4.247 31.009 28.387 61.934 18.831 19.466 37.766 13.745 ... 17.854 28.464 61.679 4.812 102.667 1.547 2.889 78.463 6.470 32.9
2019-12-25 10.462 7.913 4.134 31.216 28.357 61.133 18.792 19.997 39.473 13.745 ... 17.854 28.573 61.679 4.922 102.667 1.547 2.899 78.066 6.554 32.8
2019-12-26 10.301 7.809 4.159 30.526 28.539 60.510 18.639 19.732 39.260 13.745 ... 17.854 28.792 62.047 4.894 102.578 1.547 2.889 78.068 6.604 33.3
2019-12-27 10.422 7.844 4.228 31.907 28.053 61.222 18.524 19.466 38.335 13.779 ... 17.920 28.245 62.269 4.867 102.400 1.547 2.889 78.869 6.604 33.4

5 rows × 63 columns

monthly_returns = prices.resample("ME").last().pct_change().dropna(how="all")
monthly_returns = monthly_returns.dropna(axis=1)
# monthly_returns.columns.names = ["Ticker"]
# monthly_returns.info()
cov = monthly_returns.cov()
corr = monthly_returns.corr(cfg_hrp_correlation)
corr.columns.names = ["Ticker"]
def get_distance_matrix(corr):
    """Compute distance matrix from correlation;
    0 <= d[i,j] <= 1"""
    return np.sqrt((1 - corr) / 2)
distance_matrix = get_distance_matrix(corr)
columns = list(distance_matrix.columns)
linkage_matrix = linkage(squareform(distance_matrix), cfg_hrp_clustering)
fig, axes = plt.subplots(figsize=(14, 7))
dendrogram(
    linkage_matrix,
    leaf_font_size=10,
    labels=distance_matrix.columns,
    distance_sort=True,
    ax=axes,
)
plt.show()

def quasi_diagonalize(link):
    """sort clustered assets by distance"""
    link = link.astype(int)
    sort_idx = pd.Series([link[-1, 0], link[-1, 1]])
    num_items = link[-1, 3]  # idx of original items
    while sort_idx.max() >= num_items:
        sort_idx.index = list(range(0, sort_idx.shape[0] * 2, 2))  # make space
        df0 = sort_idx[sort_idx >= num_items]  # find clusters
        i = df0.index
        j = df0.values - num_items
        sort_idx[i] = link[j, 0]  # item 1
        df0 = pd.Series(link[j, 1], index=i + 1)
        sort_idx = sort_idx._append(df0)  # item 2
        sort_idx = sort_idx.sort_index()  # re-sort
        sort_idx.index = list(range(sort_idx.shape[0]))  # re-index
    return sort_idx.tolist()
sorted_idx = quasi_diagonalize(linkage_matrix)
sorted_tickers = corr.index[sorted_idx].tolist()
fig, axes = plt.subplots(ncols=2, figsize=(18, 8))

sns.heatmap(
    corr,
    center=0,
    cmap=cfg_hrp_plot_cmap,
    ax=axes[0],
    xticklabels=True,
    yticklabels=True,
)
axes[0].set_title("Correlation Matrix")

clustered_assets = corr.loc[sorted_tickers, sorted_tickers]  # reorder
sns.heatmap(
    clustered_assets,
    center=0,
    cmap=cfg_hrp_plot_cmap,
    ax=axes[1],
    xticklabels=True,
    yticklabels=True,
)
axes[1].set_title("Clustered Correlation")
fig.tight_layout()
plt.show()

def get_inverse_var_pf(cov, **kargs):
    """Compute the inverse-variance portfolio"""
    ivp = 1 / np.diag(cov)
    return ivp / ivp.sum()


def get_cluster_var(cov, cluster_items):
    """Compute variance per cluster"""
    cov_ = cov.loc[cluster_items, cluster_items]  # matrix slice
    w_ = get_inverse_var_pf(cov_)
    return (w_ @ cov_ @ w_).item()


def get_hrp_allocation(cov, tickers):
    """Compute top-down HRP weights"""

    weights = pd.Series(1.0, index=tickers)
    clusters = [tickers]  # initialize one cluster with all assets

    while len(clusters) > 0:
        # run bisectional search:
        clusters = [
            c[start:stop]
            for c in clusters
            for start, stop in ((0, int(len(c) / 2)), (int(len(c) / 2), len(c)))
            if len(c) > 1
        ]
        for i in range(0, len(clusters), 2):  # parse in pairs
            cluster0 = clusters[i]
            cluster1 = clusters[i + 1]

            cluster0_var = get_cluster_var(cov, cluster0)
            cluster1_var = get_cluster_var(cov, cluster1)

            weight_scaler = 1 - cluster0_var / (cluster0_var + cluster1_var)
            weights[cluster0] *= weight_scaler
            weights[cluster1] *= 1 - weight_scaler
    return weights
hrp_allocation = get_hrp_allocation(cov, sorted_tickers)
hrp_allocation = hrp_allocation.sort_values(ascending=False)
# It's done :D
# title = "Hierarchical Risk Parity - Portfolio Allocation"
# hrp_allocation.sort_values(ascending=False).plot.bar(figsize=(15, 4), title=title)
# sns.despine()
# plt.tight_layout()

References

  1. Marcos Lopez de Prado. Advances Financial Machine Learning
  2. ML for Trading - 2nd Edition. https://github.com/stefan-jansen/machine-learning-for-trading/tree/main
  3. Matlab - Hierarchical Risk Parity Portfolio. https://www.mathworks.com/help/finance/create-hierarchical-risk-parity-portfolio.html
  4. Hierarchical Risk Parity https://developer.nvidia.com/blog/hierarchical-risk-parity-on-rapids-an-ml-approach-to-portfolio-allocation/