查看全集:💎Quantopia量化分析56讲
协方差衡量两个随机变量的联合变化程度。数学表达式为:
当X和Y的协方差:
示例计算:
import numpy as np
X = np.random.normal(size=1000)
epsilon = np.random.normal(0, 3, 1000)
Y = 5*X + epsilon
cov_manual = np.mean((X - X.mean())*(Y - Y.mean()))
print(f"手动计算协方差: {cov_manual:.2f}")
cov_numpy = np.cov(X, Y)[0, 1]
print(f"NumPy计算协方差: {cov_numpy:.2f}")
对于N个资产,协方差矩阵是N×N的对称矩阵:
# 四只股票的协方差矩阵示例
import yfinance as yf
symbols = ['AAPL', 'MSFT', 'GS', 'SBUX']
data = yf.download(symbols, start='2022-01-01', end='2023-01-01')['Adj Close']
returns = data.pct_change().dropna()
cov_matrix = returns.cov()
print("协方差矩阵:\n", cov_matrix)
当资产数量P > 观测数T时,样本协方差矩阵:
import seaborn as sns
# 生成高维随机数据
np.random.seed(42)
P = 50 # 资产数量
T = 60 # 观测次数
returns = np.random.randn(T, P)
# 计算样本协方差
sample_cov = np.cov(returns, rowvar=False)
# 绘制协方差分布
plt.figure(figsize=(10,6))
sns.histplot(sample_cov.flatten(), bins=50, kde=True)
plt.title("样本协方差分布 (P=50, T=60)");
将样本协方差矩阵向目标矩阵收缩:
其中:
自动计算最优δ,平衡偏差与方差:
from sklearn.covariance import LedoitWolf
# 使用Ledoit-Wolf估计
lw = LedoitWolf().fit(returns)
lw_cov = lw.covariance_
# 比较收缩强度
print(f"最优收缩系数: {lw.shrinkage_:.2f}")
# 划分训练集和测试集
train = returns[:40]
test = returns[40:]
# 计算不同估计量
sample_train_cov = np.cov(train, rowvar=False)
lw_train_cov = LedoitWolf().fit(train).covariance_
# 计算测试集样本协方差
test_cov = np.cov(test, rowvar=False)
# 计算误差
sample_error = np.linalg.norm(sample_train_cov - test_cov, 'fro')
lw_error = np.linalg.norm(lw_train_cov - test_cov, 'fro')
print(f"样本协方差误差: {sample_error:.2f}")
print(f"Ledoit-Wolf误差: {lw_error:.2f}")
# 多个月份的误差追踪
errors = []
months = pd.date_range(start='2020-01-01', end='2023-01-01', freq='M')
for i in range(6, len(months)):
train_range = months[i-6:i]
test_month = months[i]
# 获取数据
train_data = yf.download(symbols, start=train_range[0], end=train_range[-1])['Adj Close']
test_data = yf.download(symbols, start=test_month, end=test_month + pd.DateOffset(months=1))['Adj Close']
# 计算协方差
train_returns = train_data.pct_change().dropna()
test_returns = test_data.pct_change().dropna()
# 误差计算
lw_cov = LedoitWolf().fit(train_returns).covariance_
sample_cov = train_returns.cov()
true_cov = test_returns.cov()
errors.append({
'month': test_month,
'LW': np.linalg.norm(lw_cov - true_cov),
'Sample': np.linalg.norm(sample_cov - true_cov)
})
# 可视化结果
error_df = pd.DataFrame(errors).set_index('month')
error_df.plot(figsize=(12,6))
plt.ylabel('Frobenius范数误差');
from scipy.optimize import minimize
def portfolio_variance(weights, cov_matrix):
return weights.T @ cov_matrix @ weights
n_assets = len(symbols)
initial_weights = np.ones(n_assets)/n_assets
# 使用不同协方差矩阵进行优化
for cov_matrix, label in [(cov_matrix.values, "样本协方差"), (lw_cov, "Ledoit-Wolf")]:
res = minimize(
portfolio_variance,
initial_weights,
args=(cov_matrix,),
bounds=[(0,1) for _ in range(n_assets)],
constraints={'type': 'eq', 'fun': lambda w: np.sum(w) - 1}
)
print(f"\n使用{label}的权重分布:")
print(pd.Series(res.x.round(3), index=symbols))
# 初始化权重
sample_weights = ... # 来自样本协方量的优化结果
lw_weights = ... # 来自Ledoit-Wolf的优化结果
# 计算组合收益
backtest_data = yf.download(symbols, start='2023-01-01', end='2024-01-01')['Adj Close']
returns = backtest_data.pct_change().dropna()
sample_portfolio = (returns * sample_weights).sum(axis=1)
lw_portfolio = (returns * lw_weights).sum(axis=1)
# 绘制累计收益
cumulative_returns = pd.DataFrame({
'样本协方差': (1 + sample_portfolio).cumprod(),
'Ledoit-Wolf': (1 + lw_portfolio).cumprod()
})
cumulative_returns.plot(figsize=(12,6))
plt.title("组合累计收益对比");
# 练习1参考代码框架
class CustomShrinkage:
def __init__(self, target_type='diagonal'):
self.target_type = target_type
def fit(self, X):
sample_cov = np.cov(X, rowvar=False)
# 实现你的目标矩阵
if self.target_type == 'diagonal':
target = np.diag(np.diag(sample_cov))
elif self.target_type == 'identity':
target = np.eye(sample_cov.shape[0]) * np.trace(sample_cov)/sample_cov.shape[0]
# 计算最优收缩系数...
return self