通过估计AHBS模型和BS模型的期权定价差异,来比较两个模型的定价效率。
数据:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import openpyxl
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import copy
from scipy.stats import norm
plt.rcParams['font.sans-serif']=['simhei']#用于正常显示中文标签
plt.rcParams['axes.unicode_minus']=False#用于正常显示负号
class AHBS_Model():
"""AHBS期权定价模型,首先通过BS模型反解得出的隐含波动率对标的资产价格和执行价格做线性拟合,通过拟合方程计算得出波动率的拟合值,即
AHBS波动率。利用该动态波动率计算期权价格"""
def __init__(self,data_path):
data_ = pd.read_excel(data_path)
self.data_=data_
self.make_files()
self.summary=[]#用于收集汇总每个模型的系数
def make_files(self):
"""在当前工作文件夹下生成文件夹用于存放各种生成资料"""
path = os.getcwd()
# 生成用于存放图片的文件夹
self.path_picture = os.path.join(path, 'AHBS文件\AHBS图片')
if os.path.exists(self.path_picture) == False:
os.makedirs(self.path_picture)
# 生成用于存放数据的文件夹
self.path_data = os.path.join(path, 'AHBS文件\AHBS数据')
if os.path.exists(self.path_data) == False:
os.makedirs(self.path_data)
pd.DataFrame().to_excel('AHBS文件\AHBS数据\AHBS_error.xlsx') # 生成空的excel表格用于存放生成数据
self.wb = openpyxl.load_workbook('AHBS文件\AHBS数据\AHBS_error.xlsx')
def AHBS_fit(self,out_in_class,time_class):
"""
拟合AHBS线性方程:对隐含波动率进行回归分析,拟合AHBS模型
输入:
out_in_class:深度实值,实值,平值,虚值,深度虚值
time_class:长期期权,中期期权,短期期权
输出:
拟合完成的AHBS模型
"""
data_xy = copy.deepcopy(self.data_) # data_xy用于数据拟合
data_xy = data_xy.ix[(data_xy['期权虚实种类'] == out_in_class) & (data_xy['期权时间种类'] == time_class)]
train_size=0.8#训练集所占比例
if self.model_number==1:
#模型1:传统BS模型
option_data_init = pd.read_excel('option_data_init.xlsx')
data_xy = pd.read_excel('option_data.xlsx')
data_xy = data_xy.ix[(data_xy['期权虚实种类'] == out_in_class) & (data_xy['期权时间种类'] == time_class)]
data_xy['trade_date'] = option_data_init['trade_date']
data_xy.index = data_xy['trade_date']
# 计算ETF50的股价波动率
ETF50 = pd.read_excel('ETF50.xlsx')
ETF50.index = ETF50['trade_date']
data_xy['sigma'] = ETF50['sigma']
data_xy.index = range(len(data_xy))
X_train, X_test, Y_train, Y_test = train_test_split(data_xy[['close']], data_xy['implied_volatility'],
train_size=train_size) # 将数据划分训练集和测试集
d_1 = (np.log(data_xy['ETF50']) / data_xy['exercise_price'] + (
data_xy['shibor'] + data_xy['sigma'] ** 2 / 2) * data_xy['time']) / (
np.sqrt(data_xy['time']) * data_xy['sigma'])
d_2 = d_1 - data_xy['sigma'] * np.sqrt(data_xy['time'])
data_xy['ABHS期权定价'] = data_xy['ETF50'] * norm.cdf(d_1) - data_xy['exercise_price'] * np.exp(
-1 * data_xy['shibor'] * data_xy['time']) * norm.cdf(d_2)
X_train['训练集']='训练集';X_test['测试集']='测试集'
data_xy['训练集']=X_train['训练集'];data_xy['测试集']=X_test['测试集']
self.data_xy = data_xy
self.X_train = X_train
self.X_test = X_test
self.Y_train = Y_train
self.Y_test = Y_test
if self.model_number==2:
#模型1:sigma=a+b*(S/K)+c*(S/K)**2
data_xy['S_K'] = data_xy['ETF50'] / data_xy['exercise_price'] # 计算S/K
data_xy['S_K_2'] = (data_xy['ETF50'] / data_xy['exercise_price']) ** 2 # 计算(S/K)^2
X_train, X_test, Y_train, Y_test = train_test_split(data_xy[['S_K', 'S_K_2']], data_xy['implied_volatility'],train_size=train_size)# 将数据划分训练集和测试集
model = LinearRegression()
model.fit(X_train, Y_train)
data_xy['AHBS波动率'] = model.predict(data_xy[['S_K', 'S_K_2']])
X_train['训练集']='训练集';X_test['测试集']='测试集'
data_xy['训练集']=X_train['训练集'];data_xy['测试集']=X_test['测试集']
self.model = model
self.data_xy = data_xy
self.X_train = X_train
self.X_test = X_test
self.Y_train = Y_train
self.Y_test = Y_test
elif self.model_number==3:
#模型2:sigma=a+b*(K)+c*(K)**2
data_xy['K_2'] = (data_xy['exercise_price']) ** 2 # 计算(S/K)^2
X_train, X_test, Y_train, Y_test = train_test_split(data_xy[['exercise_price', 'K_2']],data_xy['implied_volatility'],train_size=0.8) # 将数据划分训练集和测试集
model = LinearRegression()
model.fit(X_train, Y_train)
data_xy['AHBS波动率'] = model.predict(data_xy[['exercise_price', 'K_2']])
X_train['训练集'] = '训练集'
X_test['测试集'] = '测试集'
data_xy['训练集'] = X_train['训练集']
data_xy['测试集'] = X_test['测试集']
self.model = model
self.data_xy = data_xy
self.X_train = X_train
self.X_test = X_test
self.Y_train = Y_train
self.Y_test = Y_test
elif self.model_number==4:
#模型3:sigma=a+b*(ln(K)/根号(t))+c*(ln(K)/根号(t))**2
data_xy['ln_K_t'] = (np.log(data_xy['exercise_price']) / data_xy['time']) # 计算(S/K)^2
data_xy['ln_K_t_2'] = (np.log(data_xy['exercise_price'])/data_xy['time']) ** 2 # 计算(S/K)^2
X_train, X_test, Y_train, Y_test = train_test_split(data_xy[['ln_K_t', 'ln_K_t_2']],data_xy['implied_volatility'],train_size=0.8) # 将数据划分训练集和测试集
model = LinearRegression()
model.fit(X_train, Y_train)
data_xy['AHBS波动率'] = model.predict(data_xy[['ln_K_t', 'ln_K_t_2']])
X_train['训练集'] = '训练集'
X_test['测试集'] = '测试集'
data_xy['训练集'] = X_train['训练集']
data_xy['测试集'] = X_test['测试集']
self.model = model
self.data_xy = data_xy
self.X_train = X_train
self.X_test = X_test
self.Y_train = Y_train
self.Y_test = Y_test
def model_summary(self):
"""
收集汇总模型的回归结果:回归系数,将每一次回归的模型系数收集起来
"""
a=self.model.intercept_
b=self.model.coef_[0]
c = self.model.coef_[0]
self.summary.append([a,b,c,'模型'+str(self.model_number),self.time_class,self.out_in_class])
def plot_test(self,start,end):
"""对线性回归进行预测,并对部分预测结果画图
输入:
start,开始点坐标
end,结束点坐标
返回:
"""
Y_pred = self.model.predict(self.X_test)
plt.figure(figsize=(14, 8))
plt.plot(range(len(Y_pred))[start:end], Y_pred[start:end], 'b', label="预测数据")
plt.plot(range(len(Y_pred))[start:end], self.Y_test[start:end], 'r', label="实际数据")
# 显示图像
plt.legend()
plt.show()
def BS_price(self,S, K, r, AHBS_sigma, time):
"""利用BS期权定价公式计算期权价格"""
d_1 = (np.log(S / K) + (r + AHBS_sigma ** 2 / 2) * time) / (AHBS_sigma * np.sqrt(time))
d_2 = d_1 - AHBS_sigma * np.sqrt(time)
C = S * norm.cdf(d_1) - K * np.exp(-1 * r * time) * norm.cdf(d_2)
return C
def AHBS_price(self):
"""
通过AHBS波动率计算期权价格
"""
data_xy=self.data_xy
data_xy['ABHS期权定价'] = self.BS_price(data_xy['ETF50'], data_xy['exercise_price'], data_xy['shibor'],data_xy['AHBS波动率'], data_xy['time'])
self.data_xy = data_xy
def AHBS_error(self):
"""
计算AHBS模型定价误差
返回 [内部平均相对误差MPE_in,内部均方误差MSE_in,外部平均相对误差MPE_out,外部均方误差MSE_out]
"""
# 计算内部拟合误差
data_xy=copy.deepcopy(self.data_xy)
data_xy=data_xy.ix[data_xy['训练集']=='训练集']
# 计算平均相对误差MPE_in
MPE_in = sum((data_xy['ABHS期权定价'] - data_xy['close']) / data_xy['close']) / len(data_xy['close'])
# 计算均方误差MSE_in
MSE_in = sum(((data_xy['ABHS期权定价'] - data_xy['close'])) ** 2) / len(data_xy['close'])
# 计算外部拟合误差
data_xy = copy.deepcopy(self.data_xy)
data_xy = data_xy.ix[data_xy['测试集'] == '测试集']
# 计算平均相对误差MPE_out
MPE_out = sum((data_xy['ABHS期权定价'] - data_xy['close']) / data_xy['close']) / len(data_xy['close'])
# 计算均方误差MSE_out
MSE_out = sum(((data_xy['ABHS期权定价'] - data_xy['close'])) ** 2) / len(data_xy['close'])
return [MPE_in,MSE_in,MPE_out,MSE_out]
def AHBS_main(self,save_excel=True,model_number=1):
"""一次性方法:将各种类型数据通过AHBS模型拟合定价误差汇总输出
输入:
save_excel=True 决定是否保存生成的数据为excel表格
model_number=1:决定选择第几种模型进行拟合,比如1,2,3,4
输出:
各种情况下预测值拟合图像和预测误差汇总表格
"""
self.model_number=model_number
data_MPE_in=pd.DataFrame(columns=['长期期权','中期期权','短期期权'],index=['深度实值','实值','平值','虚值','深度虚值'])
data_MSE_in = pd.DataFrame(columns=['长期期权', '中期期权', '短期期权'], index=['深度实值', '实值', '平值', '虚值', '深度虚值'])
data_MPE_out = pd.DataFrame(columns=['长期期权', '中期期权', '短期期权'], index=['深度实值', '实值', '平值', '虚值', '深度虚值'])
data_MSE_out = pd.DataFrame(columns=['长期期权', '中期期权', '短期期权'], index=['深度实值', '实值', '平值', '虚值', '深度虚值'])
for out_in_class in ['深度实值','实值','平值','虚值','深度虚值']:
#out_in_class='深度实值'
for time_class in ['长期期权','中期期权','短期期权']:
#time_class='中期期权'
self.out_in_class=out_in_class
self.time_class=time_class
self.AHBS_fit(out_in_class, time_class)
if self.model_number!=1:
self.AHBS_price()
self.model_summary()
MPE_in=self.AHBS_error()[0]
MSE_in = self.AHBS_error()[1]
MPE_out = self.AHBS_error()[2]
MSE_out = self.AHBS_error()[3]
data_MPE_in.ix[out_in_class,time_class]=MPE_in
data_MSE_in.ix[out_in_class,time_class]=MSE_in
data_MPE_out.ix[out_in_class, time_class] = MPE_out
data_MSE_out.ix[out_in_class, time_class] = MSE_out
summary = pd.DataFrame(self.summary, columns=['a', 'b', 'c', '模型', '期权时间种类','期权虚实种类'])
summary.to_excel('AHBS文件\AHBS数据\AHBS模型系数汇总.xlsx')
if save_excel==True:
writer = pd.ExcelWriter('AHBS文件\AHBS数据\AHBS_error.xlsx', engine='openpyxl')
writer.book = self.wb
data_MPE_in.to_excel(writer, sheet_name ='模型'+str(self.model_number),startcol=1,startrow=1,header='内部MPE')
data_MSE_in.to_excel(writer, sheet_name ='模型'+str(self.model_number),startcol=6,startrow=1,header='内部MSE')
data_MPE_out.to_excel(writer, sheet_name='模型' + str(self.model_number), startcol=1, startrow=9,header='外部MPE')
data_MSE_out.to_excel(writer, sheet_name='模型' + str(self.model_number), startcol=6, startrow=9,header='外部MSE')
writer.save()
writer.close()
return data_MPE_in,data_MSE_in
if __name__=='__main__':
data_path='option_data.xlsx'#数据所在路径
AHBS=AHBS_Model(data_path)
AHBS.AHBS_main(True,1)#估计模型1
AHBS.AHBS_main(True, 2)#估计模型2
AHBS.AHBS_main(True, 3)#估计模型3
AHBS.AHBS_main(True, 4) # 估计模型3