"""
=============================================================================
PART 6: WALK-FORWARD BACKTEST (LONG-ONLY)
=============================================================================
"""
import pandas as pd
import numpy as np
import warnings
import os
import time
from tqdm import tqdm
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import seaborn as sns
warnings.filterwarnings('ignore')
np.random.seed(42)
import logging
logging.getLogger("hmmlearn").setLevel(logging.CRITICAL)
logging.getLogger("statsmodels").setLevel(logging.CRITICAL)
logging.getLogger("arch").setLevel(logging.CRITICAL)
# =============================================================================
# CONFIGURATION
# =============================================================================
DATA_PATH = 'report_data/stock_prices.csv'
HORIZON = 63
REBALANCE_FREQ = 63
MIN_COMPOSITE_SCORE = 0.0
TOP_K = 5
WARMUP_DAYS = 252
BACKTEST_START = '2020-07-01'
RISK_FREE_ANNUAL = 0.03
ML_RETRAIN_EVERY = 4
INITIAL_CAPITAL = 1_000_000
SLIPPAGE = 0.001
COMMISSION = 0.0005
ROUND_TRIP_COST = 2 * SLIPPAGE + 2 * COMMISSION
# Suppress all print statements during backtest
VERBOSE = False
# =============================================================================
# TECHNICAL SCORE
# =============================================================================
def compute_tech_score(hist):
c = hist['adj_close']
v = hist['volume']
cur = float(c.iloc[-1])
s20 = float(c.rolling(20).mean().iloc[-1])
s50 = float(c.rolling(50).mean().iloc[-1])
s200 = float(c.rolling(200).mean().iloc[-1])
vr = float(v.iloc[-1] / (v.rolling(20).mean().iloc[-1] + 1e-10))
delta = c.diff()
gain = delta.where(delta > 0, 0).rolling(14).mean()
loss = (-delta.where(delta < 0, 0)).rolling(14).mean()
rsi_s = 100 - (100 / (1 + gain / (loss + 1e-10)))
rsi = float(rsi_s.iloc[-1])
rsi_p = float(rsi_s.iloc[-2]) if len(rsi_s) > 1 else rsi
bb_std = c.rolling(20).std()
bb_m = c.rolling(20).mean()
bb_up = bb_m + 2 * bb_std
bb_lo = bb_m - 2 * bb_std
bb_pct = float(((c - bb_lo) / (bb_up - bb_lo + 1e-10)).iloc[-1])
bb_w = float(((bb_up - bb_lo) / (bb_m + 1e-10)).iloc[-1])
bb_wp = float(((bb_up - bb_lo) / (bb_m + 1e-10)).iloc[-6]) if len(c) > 6 else bb_w
ema12 = c.ewm(span=12, adjust=False).mean()
ema26 = c.ewm(span=26, adjust=False).mean()
macd = ema12 - ema26
sig = macd.ewm(span=9, adjust=False).mean()
mh = float((macd - sig).iloc[-1])
mp = float((macd - sig).iloc[-2]) if len(c) > 1 else mh
ret1d = float(c.pct_change().iloc[-1])
score = 0.0
if not (np.isnan(s20) or np.isnan(s50)):
score += 1.0 if cur > s20 and cur > s50 else (-1.0 if cur < s20 and cur < s50 else 0)
if not (np.isnan(s50) or np.isnan(s200)):
score += 1.0 if s50 > s200 else -1.0
if not (np.isnan(rsi) or np.isnan(rsi_p)):
if 55 < rsi <= 70:
score += 2.0
elif 50 < rsi <= 55:
score += 1.0
elif rsi < 30 and rsi > rsi_p:
score += 2.0
elif rsi > 70:
score -= 2.0
if not np.isnan(vr) and vr > 1.2:
score += 1.5 if ret1d > 0 else (-1.5 if ret1d < 0 else 0)
if not any(np.isnan([bb_pct, bb_w, bb_wp])):
if bb_pct > 0.8 and bb_w > bb_wp:
score += 1.0
elif bb_pct < 0.2 and rsi < 30:
score += 0.5
elif bb_pct < 0.2 and bb_w > bb_wp:
score -= 1.0
if not (np.isnan(mh) or np.isnan(mp)):
if mh > 0 and mh > mp:
score += 1.0
elif mh < 0 and mh < mp:
score -= 1.0
return float(np.clip(score / 2.0, -5, 5))
# =============================================================================
# QUANTITATIVE SCORE
# =============================================================================
def compute_quant_score(hist):
prices = hist['adj_close']
returns = prices.pct_change().dropna()
score = 0.0
if len(returns) >= 252:
r252 = returns.tail(252)
ann_r = r252.mean() * 252
ann_v = r252.std() * np.sqrt(252)
sharpe = (ann_r - RISK_FREE_ANNUAL) / (ann_v + 1e-10)
score += 1.0 if sharpe > 1.0 else (-1.0 if sharpe < 0 else 0)
if len(prices) >= 252:
p252 = prices.tail(252)
mdd = ((p252 / p252.expanding().max()) - 1).min()
score += 1.0 if mdd > -0.15 else (-1.0 if mdd < -0.25 else 0)
if len(prices) >= 63:
mom = (prices.iloc[-1] / prices.iloc[-63]) - 1
score += 1.0 if mom > 0.05 else (-1.0 if mom < -0.05 else 0)
if len(returns) >= 252:
r252 = returns.tail(252)
mu, sg = r252.mean(), r252.std()
rng = np.random.RandomState(42) # Local seed for reproducibility
Z = rng.randn(5000)
sim_r = np.exp((mu - 0.5 * sg ** 2) * HORIZON + sg * np.sqrt(HORIZON) * Z) - 1
pop = float(np.mean(sim_r > 0))
var95 = float(np.percentile(sim_r, 5))
score += 1.0 if pop > 0.60 else (-1.0 if pop < 0.40 else 0)
score += 1.0 if var95 > -0.15 else (-1.0 if var95 < -0.25 else 0)
return float(score)
# =============================================================================
# TIME SERIES SCORE (ARIMA + GARCH)
# =============================================================================
from statsmodels.tsa.arima.model import ARIMA as _ARIMA
from arch import arch_model as _arch_model
from statsmodels.tsa.stattools import adfuller
def compute_ts_score(hist, train_window=252):
prices = hist['adj_close']
if len(prices) < train_window + 21:
return 0.0
train = prices.tail(train_window)
rets = train.pct_change().dropna() * 100
cur = float(prices.iloc[-1])
try:
tr_arr = train.iloc[:-21].values
ho_arr = train.iloc[-21:].values
pv = adfuller(tr_arr)[1]
d = 0 if pv < 0.05 else 1
fit_ho = _ARIMA(tr_arr, order=(1, d, 0)).fit()
fc_ho = fit_ho.forecast(steps=21)
mape = float(np.mean(np.abs((ho_arr - fc_ho.values) / (np.abs(ho_arr) + 1e-10))))
except:
mape = 0.20
if mape > 0.15:
return 0.0
try:
pv = adfuller(train.values)[1]
d = 0 if pv < 0.05 else 1
fit = _ARIMA(train.values, order=(1, d, 0)).fit()
fc_price = float(fit.forecast(steps=HORIZON).iloc[-1])
fc_return = (fc_price / cur) - 1
except:
fc_return = 0.0
try:
gfit = _arch_model(rets, vol='Garch', p=1, q=1, dist='normal').fit(disp='off')
gfc = gfit.forecast(horizon=HORIZON)
tot_v = float(gfc.variance.iloc[-1, :HORIZON].sum())
gvol = np.sqrt(tot_v) * np.sqrt(252 / HORIZON) / 100
except:
gvol = float(rets.std()) * np.sqrt(252) / 100
score = 0.0
ann = fc_return * (252 / HORIZON)
if ann > 0.08:
score += 2.0
elif ann > 0.03:
score += 1.0
elif ann < -0.08:
score -= 2.0
elif ann < -0.03:
score -= 1.0
if gvol < 0.20:
score += 1.0
elif gvol >= 0.40:
score -= 1.0
elif gvol >= 0.30:
score -= 0.5
return float(np.clip(score, -3, 3))
# =============================================================================
# MACHINE LEARNING SCORE (HMM + CLASSIFICATION)
# =============================================================================
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler
from hmmlearn import hmm as _hmm
EXTREME_THR = 0.05
FEAT_COLS = [
'r1', 'r5', 'r10', 'r21',
'sma_ratio10', 'sma_ratio20', 'sma_ratio50', 'sma_ratio200',
'x10_20', 'x20_50', 'x50_200',
'rsi14', 'macd_h', 'bb_p',
'v10', 'v21', 'v63', 'vratio',
'vol_r', 'hl_r', 'dhi', 'dlo', 'streak',
'hmm_st', 'hmm_bear', 'hmm_side', 'hmm_bull', 'hmm_chg'
]
def _rsi(c, p=14):
d = c.diff()
g = d.where(d > 0, 0).rolling(p).mean()
l = (-d.where(d < 0, 0)).rolling(p).mean()
return 100 - (100 / (1 + g / (l + 1e-10)))
def _macd_h(c):
return (c.ewm(span=12).mean() - c.ewm(span=26).mean()).pipe(
lambda x: x - x.ewm(span=9).mean())
def _bb_pct(c, p=20):
m = c.rolling(p).mean()
s = c.rolling(p).std()
return (c - (m - 2 * s)) / (4 * s + 1e-10)
def _fit_hmm_labels(ret, vol, vr, n=3):
X = np.column_stack([ret, vol, vr])
X = X[~np.isnan(X).any(axis=1)]
if len(X) < 50:
return np.ones(len(X), dtype=int), np.column_stack([
np.zeros(len(X)), np.ones(len(X)), np.zeros(len(X))
])
for cov in ['diag', 'spherical', 'full']:
try:
with warnings.catch_warnings():
warnings.filterwarnings('ignore')
m = _hmm.GaussianHMM(n_components=n, covariance_type=cov,
n_iter=60, random_state=42, tol=1e-2)
m.fit(X)
st = m.predict(X)
pr = m.predict_proba(X)
rank = np.argsort(m.means_[:, 0])
mp = {rank[i]: i for i in range(n)}
lab = np.array([mp[s] for s in st])
rep = np.zeros_like(pr)
for o, nw in mp.items():
rep[:, nw] = pr[:, o]
return lab, rep
except:
pass
return np.ones(len(X), dtype=int), np.column_stack([
np.zeros(len(X)), np.ones(len(X)), np.zeros(len(X))
])
def _featurize(tdf):
c = tdf['adj_close']
v = tdf['volume']
d = pd.DataFrame(index=tdf.index)
d['r1'] = c.pct_change()
d['r5'] = c.pct_change(5)
d['r10'] = c.pct_change(10)
d['r21'] = c.pct_change(21)
d['v21'] = d['r1'].rolling(21).std() * np.sqrt(252)
d['vol_r'] = v / (v.rolling(20).mean() + 1e-10)
for w in [10, 20, 50, 200]:
d[f'sma{w}'] = c.rolling(w).mean()
for w in [10, 20, 50, 200]:
d[f'sma_ratio{w}'] = c / (d[f'sma{w}'] + 1e-10) - 1
d['x10_20'] = (d['sma10'] > d['sma20']).astype(float)
d['x20_50'] = (d['sma20'] > d['sma50']).astype(float)
d['x50_200'] = (d['sma50'] > d['sma200']).astype(float)
d['rsi14'] = _rsi(c)
d['macd_h'] = _macd_h(c)
d['bb_p'] = _bb_pct(c)
d['v10'] = d['r1'].rolling(10).std() * np.sqrt(252)
d['v63'] = d['r1'].rolling(63).std() * np.sqrt(252)
d['vratio'] = d['v10'] / (d['v63'] + 1e-10)
d['hl_r'] = (c - tdf['low']) / (tdf['high'] - tdf['low'] + 1e-10)
d['dhi'] = c / (c.rolling(252).max() + 1e-10) - 1
d['dlo'] = c / (c.rolling(252).min() + 1e-10) - 1
dir_ = np.sign(d['r1'])
d['streak'] = dir_.groupby((dir_ != dir_.shift()).cumsum()).cumcount() + 1
d['streak'] *= dir_
valid = ~d[['r1', 'v21', 'vol_r']].isna().any(axis=1)
d['hmm_st'] = np.nan
d['hmm_bear'] = np.nan
d['hmm_side'] = np.nan
d['hmm_bull'] = np.nan
if valid.sum() > 50:
lab, rep = _fit_hmm_labels(d.loc[valid, 'r1'].values,
d.loc[valid, 'v21'].values,
d.loc[valid, 'vol_r'].values)
d.loc[valid, 'hmm_st'] = lab.astype(float)
d.loc[valid, 'hmm_bear'] = rep[:, 0]
d.loc[valid, 'hmm_side'] = rep[:, 1]
d.loc[valid, 'hmm_bull'] = rep[:, 2]
else:
d.loc[valid, 'hmm_st'] = 1.0
d.loc[valid, 'hmm_bear'] = 0.0
d.loc[valid, 'hmm_side'] = 1.0
d.loc[valid, 'hmm_bull'] = 0.0
d['hmm_chg'] = d['hmm_st'].diff().abs().gt(0).astype(float)
# Forward fill NaN values from HMM
d[['hmm_st', 'hmm_bear', 'hmm_side', 'hmm_bull', 'hmm_chg']] = \
d[['hmm_st', 'hmm_bear', 'hmm_side', 'hmm_bull', 'hmm_chg']].ffill().fillna(0)
return d
def _build_panel_dataset(panel_hist):
rows = []
for tk, tdf in panel_hist.groupby('ticker'):
tdf = tdf.sort_values('date').copy()
feat = _featurize(tdf)
feat['future_ret'] = tdf['adj_close'].pct_change(-HORIZON)
feat['y_event'] = (feat['future_ret'].abs() > EXTREME_THR).astype(int)
feat['y_dir'] = -1
feat.loc[feat['future_ret'] > EXTREME_THR, 'y_dir'] = 1
feat.loc[feat['future_ret'] < -EXTREME_THR, 'y_dir'] = 0
feat['ticker'] = tk
feat['date'] = tdf['date'].values
rows.append(feat)
ds = pd.concat(rows).reset_index(drop=True)
return ds.dropna(subset=FEAT_COLS + ['y_event', 'future_ret'])
class MLModels:
def __init__(self):
self.sc1 = self.sc2 = self.m1 = self.m2 = None
self.trained = False
def fit(self, panel_hist):
ds = _build_panel_dataset(panel_hist)
if len(ds) < 300:
return
dates = sorted(ds['date'].unique())
cutoff = dates[int(len(dates) * 0.80)]
tr = ds[ds['date'] <= cutoff].copy()
X1 = tr[FEAT_COLS].values
y1 = tr['y_event'].values
if len(np.unique(y1)) < 2:
return
self.sc1 = StandardScaler()
X1s = self.sc1.fit_transform(X1)
self.m1 = LogisticRegression(max_iter=500, C=0.5,
class_weight='balanced', random_state=42)
self.m1.fit(X1s, y1)
ev = tr[tr['y_event'] == 1]
ok = ev['y_dir'] != -1
X2 = ev.loc[ok, FEAT_COLS].values
y2 = ev.loc[ok, 'y_dir'].values
if len(np.unique(y2)) < 2 or len(y2) < 30:
return
self.sc2 = StandardScaler()
X2s = self.sc2.fit_transform(X2)
self.m2 = XGBClassifier(n_estimators=80, max_depth=4,
learning_rate=0.05, subsample=0.8,
colsample_bytree=0.8, use_label_encoder=False,
eval_metric='logloss', random_state=42, verbosity=0)
self.m2.fit(X2s, y2)
self.trained = True
def score(self, ticker_hist):
if not self.trained:
return 0.0, 0.5, 0.5
feat = _featurize(ticker_hist.sort_values('date').copy())
row = feat.iloc[-1]
if row[FEAT_COLS].isna().any():
return 0.0, 0.5, 0.5
X = row[FEAT_COLS].values.reshape(1, -1)
p_ev = float(self.m1.predict_proba(self.sc1.transform(X))[0, 1])
p_up = float(self.m2.predict_proba(self.sc2.transform(X))[0, 1])
return float(6 * p_ev * (p_up - 0.5)), p_ev, p_up
def composite_score_at(hist, ml_models):
if len(hist) < max(WARMUP_DAYS, 200):
return None
try:
tech = compute_tech_score(hist)
quant = compute_quant_score(hist)
ts = compute_ts_score(hist)
ml, p_ev, p_up = ml_models.score(hist)
if any(np.isnan([tech, quant, ts, ml])):
return None
return {'tech': tech, 'quant': quant, 'ts': ts, 'ml': ml,
'composite': tech + quant + ts + ml,
'p_event': p_ev, 'p_up': p_up}
except Exception as e:
return None
# =============================================================================
# WALK-FORWARD ENGINE
# =============================================================================
def run_walk_forward(df):
df = df.sort_values(['ticker', 'date']).copy()
all_dates = sorted(df['date'].unique())
tickers = sorted(df['ticker'].unique())
price_px = df.pivot(index='date', columns='ticker', values='adj_close')
start_dt = pd.Timestamp(BACKTEST_START)
rebal_dates = [d for i, d in enumerate(all_dates)
if d >= start_dt and i % REBALANCE_FREQ == 0]
all_scores = []
all_trades = []
ml_models = MLModels()
prev_long_t = set()
print(f"\n{'='*68}")
print(f" WALK-FORWARD BACKTEST (LONG-ONLY)")
print(f" Period : {rebal_dates[0].date()} to {rebal_dates[-1].date()}")
print(f" Windows : {len(rebal_dates)} rebalances x {HORIZON}-day horizon")
print(f" Target : Top-{TOP_K} stocks")
print(f" Rule : Buy ONLY if Composite Score > {MIN_COMPOSITE_SCORE}")
print(f" Costs : Slippage {SLIPPAGE:.2%} | Commission {COMMISSION:.2%}")
print(f" ML : Retrain every {ML_RETRAIN_EVERY} periods")
print(f"{'='*68}\n")
# Use tqdm with minimal output
pbar = tqdm(total=len(rebal_dates), desc="Backtest Progress",
bar_format='{l_bar}{bar:20}{r_bar}')
for i, rebal_dt in enumerate(rebal_dates):
valid_exit_dates = [d for d in all_dates if d > rebal_dt and d in price_px.index]
if len(valid_exit_dates) < HORIZON:
if i + 1 == len(rebal_dates):
next_dt = valid_exit_dates[-1] if valid_exit_dates else None
else:
pbar.update(1)
continue
else:
next_dt = valid_exit_dates[min(HORIZON - 1, len(valid_exit_dates) - 1)]
if next_dt is None or next_dt not in price_px.index or rebal_dt not in price_px.index:
pbar.update(1)
continue
panel_hist = df[df['date'] <= rebal_dt].copy()
# ML Retrain (silent)
if i % ML_RETRAIN_EVERY == 0:
try:
backup = (ml_models.sc1, ml_models.sc2, ml_models.m1,
ml_models.m2, ml_models.trained)
ml_models.fit(panel_hist)
if not ml_models.trained:
(ml_models.sc1, ml_models.sc2, ml_models.m1,
ml_models.m2, ml_models.trained) = backup
except:
pass
period_scores = {}
for tk in tickers:
hist = panel_hist[panel_hist['ticker'] == tk].sort_values('date')
res = composite_score_at(hist, ml_models)
if res:
period_scores[tk] = res
all_scores.append({'date': rebal_dt, 'ticker': tk, **res})
if not period_scores:
pbar.update(1)
continue
s_ser = pd.Series({k: v['composite'] for k, v in period_scores.items()}).sort_values(ascending=False)
long_t = [t for t in s_ser.index if s_ser[t] > MIN_COMPOSITE_SCORE][:TOP_K]
p0 = price_px.loc[rebal_dt]
p1 = price_px.loc[next_dt]
hr = (p1 / p0) - 1
hr_filled = hr.reindex(tickers).fillna(0)
l_ret = float(hr[long_t].mean()) if long_t else 0.0
cash_weight = (TOP_K - len(long_t)) / TOP_K
curr_long_t = set(long_t)
if prev_long_t:
intersection = len(curr_long_t & prev_long_t)
turnover = 1.0 - (intersection / TOP_K)
else:
turnover = 1.0
cost_drag = turnover * ROUND_TRIP_COST
p_ret = l_ret * (1 - cash_weight) - cost_drag
bm_ret = float(hr_filled.mean())
prev_long_t = curr_long_t
all_trades.append({
'rebal_date': rebal_dt,
'exit_date': next_dt,
'long_tickers': long_t,
'cash_weight': cash_weight,
'turnover': turnover,
'cost_drag': cost_drag,
'strat_ret': p_ret,
'bm_ret': bm_ret,
})
pbar.update(1)
pbar.close()
trades_df = pd.DataFrame(all_trades)
scores_df = pd.DataFrame(all_scores)
if trades_df.empty:
return trades_df, pd.DataFrame(), scores_df
daily_rows = []
bm_nav = 1.0
str_nav = 1.0
for _, row in trades_df.iterrows():
win = [d for d in all_dates if row['rebal_date'] <= d <= row['exit_date']]
if len(win) < 2:
continue
for j in range(1, len(win)):
d0, d1 = win[j - 1], win[j]
if d0 not in price_px.index or d1 not in price_px.index:
continue
bm_p0 = price_px.loc[d0, tickers].mean(skipna=True)
bm_p1 = price_px.loc[d1, tickers].mean(skipna=True)
if pd.isna(bm_p0) or pd.isna(bm_p1) or bm_p0 == 0:
daily_bm = 0.0
else:
daily_bm = (bm_p1 / bm_p0) - 1
lt = row['long_tickers']
if lt:
lt_p0 = price_px.loc[d0, lt].mean(skipna=True)
lt_p1 = price_px.loc[d1, lt].mean(skipna=True)
if pd.isna(lt_p0) or pd.isna(lt_p1) or lt_p0 == 0:
lr = 0.0
else:
lr = (lt_p1 / lt_p0) - 1
else:
lr = 0.0
cw = row['cash_weight']
dr = lr * (1 - cw)
bm_nav *= (1 + daily_bm)
str_nav *= (1 + dr)
daily_rows.append({'date': d1, 'strat_nav': str_nav,
'bm_nav': bm_nav, 'strat_ret': dr,
'bm_ret': daily_bm})
port_df = (pd.DataFrame(daily_rows)
.drop_duplicates('date')
.set_index('date')
.sort_index())
return trades_df, port_df, scores_df
# =============================================================================
# PERFORMANCE ANALYTICS
# =============================================================================
def perf_stats(returns, label, initial_capital=1_000_000):
r = returns.dropna()
if len(r) < 10:
return {'Label': label, 'Ann. Return': 'N/A', 'Ann. Volatility': 'N/A',
'Sharpe': 'N/A', 'Sortino': 'N/A', 'Max Drawdown': 'N/A',
'Calmar': 'N/A', 'Win Rate': 'N/A', 'N Obs': len(r)}
ann_r = (1 + r).prod() ** (252 / len(r)) - 1
ann_v = r.std() * np.sqrt(252)
sharpe = (ann_r - RISK_FREE_ANNUAL) / (ann_v + 1e-10)
cum = (1 + r).cumprod()
final_equity = initial_capital * cum.iloc[-1]
mdd = float(((cum / cum.cummax()) - 1).min())
calmar = ann_r / (-mdd + 1e-10)
neg = r[r < 0]
sortino = (ann_r - RISK_FREE_ANNUAL) / (neg.std() * np.sqrt(252) + 1e-10) if len(neg) > 0 else 0
win = float((r > 0).mean())
return {'Label': label, 'Final Equity': f"${final_equity:,.0f}", # <--- THÊM DÒNG NÀY
'Ann. Return': f"{ann_r:.2%}",
'Ann. Volatility': f"{ann_v:.2%}", 'Sharpe': f"{sharpe:.3f}",
'Sortino': f"{sortino:.3f}", 'Max Drawdown': f"{mdd:.2%}",
'Calmar': f"{calmar:.3f}", 'Win Rate': f"{win:.2%}",
'N Obs': len(r)}
# =============================================================================
# MAIN
# =============================================================================
if __name__ == '__main__':
t0 = time.time()
print("Loading data...")
df = pd.read_csv(DATA_PATH, parse_dates=['date'])
print(f" {len(df):,} rows | {df['ticker'].nunique()} tickers | "
f"{df['date'].min().date()} to {df['date'].max().date()}")
trades_df, port_df, scores_df = run_walk_forward(df)
if trades_df.empty or port_df.empty:
print("Not enough data for backtest.")
raise SystemExit(1)
strat_stats = perf_stats(port_df['strat_ret'], 'Composite Strategy (Long Only)')
bm_stats = perf_stats(port_df['bm_ret'], 'Equal-Weight Benchmark')
print("\n" + "=" * 65)
print("PERFORMANCE SUMMARY")
print("=" * 65)
summary_df = pd.DataFrame([strat_stats, bm_stats]).set_index('Label')
print(summary_df.to_string())
s = trades_df['strat_ret']
b = trades_df['bm_ret']
print(f"\n Rebalance periods : {len(trades_df)}")
print(f" Win rate : {(s > 0).mean():.1%} (BM: {(b > 0).mean():.1%})")
print(f" Avg period return : {s.mean():.2%} (BM: {b.mean():.2%})")
print(f" Best / Worst : {s.max():.2%} / {s.min():.2%}")
print(f" Mean alpha/period : {(s - b).mean():.2%}")
print(f" Avg turnover : {trades_df['turnover'].mean():.1%}")
print(f" Avg cost drag : {trades_df['cost_drag'].mean():.4%}")
print(f" Total runtime : {(time.time() - t0) / 60:.1f} min")