股票收盘价预测 用前9天的一些数据来 预测第十天的收盘价 

 

# 导入库
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
from sklearn import metrics
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error # 评价指标
import seaborn as sns
from scipy import stats
import matplotlib.pyplot as plt

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM,GRU
from keras import optimizers
import keras

import tensorflow as tf

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
import warnings
warnings.filterwarnings("ignore")#忽略一些警告 不影响运行
%matplotlib inline


data=pd.read_csv("600519.csv",encoding='gbk')
data.head()

一个本科生的人毕设  股票收盘价的预测_tensorflow

data.columns

data.drop(['股票代码','名称','日期','涨跌额','涨跌幅','成交金额','总市值','流通市值','成交量','换手率'],axis=1, inplace=True) # 删除操作

data.head() # 删除无关紧要的特征

data.corr() # 计算相关系数

print(data["收盘价"])

 

# 两类相关性分析

plt.figure(figsize=(16,8))
plt.title("相关性图")
sns.scatterplot(list(data["收盘价"]),list(data["前收盘"]))
# 散点图
plt.show()

# 热力图
ax=plt.subplots(figsize=(20,16))
ax=sns.heatmap(data.corr(),vmax=.8,square=True,annot=True)
# 解决中文显示问题
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

data=np.array(data)
print(data[0])
print(data.shape)


# 数据和标签
train_x=[] # 特征
train_y=[] # 标签

a=[]
for i in range(0,4740,10):
a.append(data[int(i):int(i)+9])
train_x.append(a)
a=[]
train_y.append(data[int(i)+9:int(i)+10][0][0])

# 基本的思想是这样的 用前9天的数据来预测第十天的数据做一个回归的预测  

print(train_y[0:10])
print(len(train_y))

print(len(train_x))
print(train_x[0])

# 定义归一化函数
def data_guiyihua(data):
scaler =MinMaxScaler(feature_range=(0, 1))#指定参数的范围 进行归一化
data_guiyihua=np.array(data) # 不进行归一化
# data_guiyihua_2 = data_guiyihua.reshape((data_guiyihua.shape[0], 1,data_guiyihua.shape[1]))
return data_guiyihua


train_x=data_guiyihua(train_x)
train_y=data_guiyihua(train_y)

# 划分验证集和测试集

x_train,x_test,y_train,y_test = train_test_split(np.array(train_x),np.array(train_y),test_size=0.2)
# 0.2 表示 八份训练 2份验证 懂吗?
x_train=x_train.reshape(379,9,5)
y_train=y_train.reshape(379,1)
print(x_train[0])
# x_train = x_train.astype(np.float32)
# y_train =y_train.astype('float64')

print(x_train.shape)
print(y_train.shape)


x_test=x_test.reshape(95,9, 5)
y_test=y_test.reshape(95,1)
# x_test = x_test.astype('float64')
# y_test =y_test.astype('float64')

print(x_test.shape)
print(y_test.shape)

lstm

def create_model_1():
model = keras.models.Sequential([
keras.layers.LSTM(100,activation='relu', input_shape=(9,5)),
keras.layers.Dense(32 ,activation='relu'),# 全连接
keras.layers.Dense(1,activation='relu')# 1个全链接
])
# model.compile(loss='binary_crossentropy', optimizer=op) 分类损失函数 和优化器

model.compile(loss='mean_squared_error', optimizer='adam') # 回归损失函数和优化器
return model
op = optimizers.RMSprop(lr=0.01)
model1 = create_model_1()
model1.summary()



model1.fit(x_train,y_train,validation_data=(x_train, y_train), epochs=200, batch_size=32,verbose=2, shuffle=True)
model1.save_weights('lstmmoxing')#模型保存

from sklearn.metrics import mean_squared_error #均方误差
from sklearn.metrics import mean_absolute_error #平方绝对误差
from sklearn.metrics import r2_score#R square
#调用

#引用上边的模型实例
model_jiazai_1 = create_model_1()
# 加载保存好的模型
model_jiazai_1.load_weights('lstmmoxing')
y1_pred_lstm = model_jiazai_1.predict(x_test)
for i in range(len(y1_pred_lstm )):
print("真实收盘价:",y_test[i])
print("预测收盘价:",y1_pred_lstm[i])
print("-----------------------")

print(mean_squared_error(y_test,y1_pred_lstm))
print(mean_absolute_error(y_test,y1_pred_lstm))
print(r2_score(y_test,y1_pred_lstm))


 一个本科生的人毕设  股票收盘价的预测_损失函数_02

lgb算法

一个本科生的人毕设  股票收盘价的预测_损失函数_03

import pandas as pd # 数据科学计算工具
import numpy as np # 数值计算工具
import matplotlib.pyplot as plt # 可视化
import seaborn as sns # matplotlib的高级API
from sklearn.model_selection import StratifiedKFold #交叉验证
from sklearn.model_selection import GridSearchCV #网格搜索
from sklearn.model_selection import train_test_split #将数据集分开成训练集和测试集
from xgboost import XGBClassifier
import lightgbm as lgb


x_train_1=x_train.reshape(379,-1)
x_test_1=x_test.reshape(95,-1)
print(x_train_1[3])


y_train_1=y_train.reshape(-1,379)[0]
y_test_1=y_test.reshape(-1,95)[0]
print(y_test_1)

lgb_train = lgb.Dataset(x_train_1, y_train_1)
lgb_eval = lgb.Dataset(x_test_1, y_test_1, reference=lgb_train)
# 上边是加载数据比较快
params = {
'max_depth':3,
'boosting_type': 'gbdt',
'objective': 'regression',
'metric': {'l2', 'l1'},
'num_leaves':50,
'learning_rate': 0.05,
'feature_fraction': 0.9,
'bagging_fraction': 0.8,
'bagging_freq': 5,
'verbose': 1
}

gbm = lgb.train(params,
lgb_train,
num_boost_round=300,
valid_sets=lgb_eval,
early_stopping_rounds=5)


 

y1_pred_lgb = gbm.predict(x_test_1, num_iteration=gbm.best_iteration)



for i in range(len(y1_pred_lgb )):
print("真实收盘价:",y_test_1[i])
print("预测收盘价:",y1_pred_lgb[i])
print("-----------------------")


print(mean_squared_error(y_test_1,y1_pred_lgb))
print(mean_absolute_error(y_test_1,y1_pred_lgb))
print(r2_score(y_test_1,y1_pred_lgb))