代码部分如下所示:
#!/usr/bin/env python
# coding: utf-8
# ## 导包
# In[1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pandas_profiling as ppf #探索性数据分析(EDA)
import warnings##忽略警告
warnings.filterwarnings('ignore')
get_ipython().run_line_magic('matplotlib', 'inline')
plt.style.use('ggplot')
# In[2]:
from sklearn.base import BaseEstimator, TransformerMixin, RegressorMixin, clone
from sklearn.preprocessing import LabelEncoder#标签编码
from sklearn.preprocessing import RobustScaler, StandardScaler#去除异常值与数据标准化
from sklearn.pipeline import Pipeline, make_pipeline#构建管道
from scipy.stats import skew#偏度
from sklearn.impute import SimpleImputer
# ## 读取并查看原数据
# In[3]:
train = pd.read_csv(r"G:\study类\大三上\机器学习\课程设计\datas\train.csv") #将数据读取进来
# In[4]:
test = pd.read_csv(r"G:\study类\大三上\机器学习\课程设计\datas\test.csv") #将数据读取进来
# In[5]:
train.head()#默认显示前五行
# In[6]:
test.head()
从中可以看出还是有很多数据需要处理的
# ## 数据探索性分析 pandas_profiling
# In[7]:
ppf.ProfileReport(train)
# In[8]:
train.YearBuilt#显示这一列的数据
# In[9]:
train.SalePrice
# ## 通过箱型图查看异常值,离群点
# In[10]:
plt.figure(figsize=(12,8))
sns.boxplot(train.YearBuilt, train.SalePrice)
# ## 通过散点图来观察存在线型的关系
# In[11]:
plt.figure(figsize=(12,6))
plt.scatter(x=train.TotalBsmtSF, y=train.SalePrice)
plt.xlabel("TotalBsmtSF", fontsize=13)
plt.ylabel("SalePrice", fontsize=13)
plt.ylim(0,800000)
# In[12]:
train.drop(train[(train["TotalBsmtSF"]>5000)].index,inplace=True)
# In[13]:
plt.figure(figsize=(12,6))
plt.scatter(x=train.TotalBsmtSF, y=train.SalePrice)
plt.xlabel("TotalBsmtSF", fontsize=13)
plt.ylabel("SalePrice", fontsize=13)
plt.ylim(0,800000)
# In[14]:
plt.figure(figsize=(12,6))
plt.scatter(x=train.GrLivArea, y=train.SalePrice)
plt.xlabel("GrLivArea", fontsize=13)
plt.ylabel("SalePrice", fontsize=13)
plt.ylim(0,800000)
# ## 把太偏离线性的那些数据给去掉,把对应的索引给删掉
# In[15]:
train.drop(train[(train["GrLivArea"]>4000)&(train["SalePrice"]<300000)].index,inplace=True)
删除后的图像
# In[16]:
plt.figure(figsize=(12,6))
plt.scatter(x=train.GrLivArea, y=train.SalePrice)
plt.xlabel("GrLivArea", fontsize=13)
plt.ylabel("SalePrice", fontsize=13)
plt.ylim(0,800000)
# ### 把test数据也做相同的处理
# In[17]:
full = pd.concat([train,test],ignore_index=True)
# ### 因为ID列和索引值都相同,故这里把ID列给删掉
# In187]:
full.drop("Id",axis=1,inplace=True)
# In[19]:
full.head()#查看删除列之后的值
# In[20]:
full.info()#查看删除后的数据信息
# # 数据清洗--空值的填充、删除
# #### 查看缺失值,并且缺失的个数要从高到低排序
# In[21]:
miss = full.isnull().sum() #统计出空值的个数pd.set_option('display.max_rows', None)
# In[22]:
miss[miss>0]
# In[23]:
miss[miss>0].sort_values(ascending=True) #由低到高进行排序
# In[24]:
full.info() #查看数据信息
# ## 空值的填充与删除
# 对字符类型的进行填充
# In[25]:
cols1 = ["PoolQC" , "MiscFeature", "Alley", "Fence", "FireplaceQu", "GarageQual", "GarageCond", "GarageFinish", "GarageYrBlt", "GarageType", "BsmtExposure", "BsmtCond", "BsmtQual", "BsmtFinType2", "BsmtFinType1", "MasVnrType"]
for col in cols1:
full[col].fillna("None",inplace=True)
# In[26]:
full.head()
# 对数值类型的进行填充
# In[27]:
cols=["MasVnrArea", "BsmtUnfSF", "TotalBsmtSF", "GarageCars", "BsmtFinSF2", "BsmtFinSF1", "GarageArea"]
for col in cols:
full[col].fillna(0, inplace=True)
# 对lotfrontage的空值使用其均值进行填充
# In[28]:
full["LotFrontage"].fillna(np.mean(full["LotFrontage"]),inplace=True)
# 对下面的列使用众数进行填充
# In[29]:
cols2 = ["MSZoning", "BsmtFullBath", "BsmtHalfBath", "Utilities", "Functional", "Electrical", "KitchenQual", "SaleType","Exterior1st", "Exterior2nd"]
for col in cols2:
full[col].fillna(full[col].mode()[0], inplace=True)
# 查看是否还有未填充好的数据
# In[30]:
full.isnull().sum()[full.isnull().sum()>0]
发现只有test的没有标签列,故已经把数据中的空值处理好了
# ## 数据预处理--把字符变成数值型
# In[31]:
full["MSZoning"].mode()[0]
# In[32]:
pd.set_option('display.max_rows', None) # 设置显示最大行,不然有一些数据会以“...”显示,不能看到部分数据
full.MSZoning
从上面可以发现有一些数据,比如31行:C(all),需要把这些数据转换成字符串的形式,将一些数字特征转换为类别特征,使用LabelEncoder来实现
# In[33]:
for col in cols2:
full[col]=full[col].astype(str)##astype来进行数据转换成字符串类型
# In[34]:
lab = LabelEncoder() #对不连续的数字或者文本进行编号
# #### 把下列内容字符型转换为数字型
# In[35]:
full["Alley"] = lab.fit_transform(full.Alley)
full["PoolQC"] = lab.fit_transform(full.PoolQC)
full["MiscFeature"] = lab.fit_transform(full.MiscFeature)
full["Fence"] = lab.fit_transform(full.Fence)
full["FireplaceQu"] = lab.fit_transform(full.FireplaceQu)
full["GarageQual"] = lab.fit_transform(full.GarageQual)
full["GarageCond"] = lab.fit_transform(full.GarageCond)
full["GarageFinish"] = lab.fit_transform(full.GarageFinish)
full["GarageYrBlt"] = full["GarageYrBlt"].astype(str)
full["GarageYrBlt"] = lab.fit_transform(full.GarageYrBlt)
full["GarageType"] = lab.fit_transform(full.GarageType)
full["BsmtExposure"] = lab.fit_transform(full.BsmtExposure)
full["BsmtCond"] = lab.fit_transform(full.BsmtCond)
full["BsmtQual"] = lab.fit_transform(full.BsmtQual)
full["BsmtFinType2"] = lab.fit_transform(full.BsmtFinType2)
full["BsmtFinType1"] = lab.fit_transform(full.BsmtFinType1)
full["MasVnrType"] = lab.fit_transform(full.MasVnrType)
full["BsmtFinType1"] = lab.fit_transform(full.BsmtFinType1)
# In[36]:
full.head()
# 将一些未转换的列继续转换为数字型
# In[37]:
full["MSZoning"] = lab.fit_transform(full.MSZoning)
full["BsmtFullBath"] = lab.fit_transform(full.BsmtFullBath)
full["BsmtHalfBath"] = lab.fit_transform(full.BsmtHalfBath)
full["Utilities"] = lab.fit_transform(full.Utilities)
full["Functional"] = lab.fit_transform(full.Functional)
full["Electrical"] = lab.fit_transform(full.Electrical)
full["KitchenQual"] = lab.fit_transform(full.KitchenQual)
full["SaleType"] = lab.fit_transform(full.SaleType)
full["Exterior1st"] = lab.fit_transform(full.Exterior1st)
full["Exterior2nd"] = lab.fit_transform(full.Exterior2nd)
# In[38]:
full.head()
# #### 发现还有一些列是字符型,未能完全转换为数字型
# In[39]:
full.drop("SalePrice",axis=1,inplace=True)##删除这一列,以便后面进行操作
# #### 从结果可以看出,行和列变得很多了
# #### 可以看到所有数据都显示为数字型了
# In[40]:
##自己写一个转换函数
class labelenc(BaseEstimator, TransformerMixin):
def __init__(self):
pass
def fit(self,X,y=None):
return self
def transform(self,X):
lab=LabelEncoder()
X["YearBuilt"] = lab.fit_transform(X["YearBuilt"])
X["YearRemodAdd"] = lab.fit_transform(X["YearRemodAdd"])
X["GarageYrBlt"] = lab.fit_transform(X["GarageYrBlt"])
X["BldgType"] = lab.fit_transform(X["BldgType"])
return X
# In[41]:
#写一个转换函数
class skew_dummies(BaseEstimator, TransformerMixin):
def __init__(self,skew=0.5):#偏度
self.skew = skew
def fit(self,X,y=None):
return self
def transform(self,X):
X_numeric=X.select_dtypes(exclude=["object"])#而是去除了包含了对象数据类型,取出来绝大部分是数值型,取出字符类型的数据
skewness = X_numeric.apply(lambda x: skew(x))#匿名函数,做成字典的形式
skewness_features = skewness[abs(skewness) >= self.skew].index#通过条件来涮选出skew>=0.5的索引的条件,取到了全部数据,防止数据的丢失
X[skewness_features] = np.log1p(X[skewness_features])#求对数,进一步让他更符合正态分布
X = pd.get_dummies(X)##一键独热,独热编码
return X
# In[42]:
from scipy.stats import norm
from scipy import stats
def get_dist_data(series):
sns.distplot(series, fit=norm);
fig = plt.figure()
res = stats.probplot(series, plot=plt)
#价格
print("Skewness: %f" % series.skew())
print("Kurtosis: %f" % series.kurt())
get_dist_data(train_df['SalePrice'])
# In[43]:
#价格对数化处理
log_SalePrice = np.log(train_df['SalePrice'] + 1)
get_dist_data(log_SalePrice)
# In[44]:
#对数化处理
plot_no = 0
plt.figure(figsize=(18, 60))
for feature in skewed.index:
plt.subplot(12, 4, plot_no + 1)
sns.distplot(df[feature], kde = True, fit=norm, color = "purple")
plt.title("Before", fontsize = 20)
plt.subplot(12, 4, plot_no + 2)
sns.distplot(df_temp[feature], kde = True, fit=norm, color = "green")
plt.title("After", fontsize = 20)
plot_no += 2
plt.tight_layout()
# In[45]:
# 构建管道
pipe = Pipeline([#构建管道
('labenc', labelenc()),
('skew_dummies', skew_dummies(skew=2)),
])
# In[46]:
# 保存原来的数据以备后用,为了防止写错
full2 = full.copy()
# In[47]:
pipeline_data = pipe.fit_transform(full2)
# In[48]:
pipeline_data.shape
# In[49]:
pipeline_data.head()
# In[50]:
from sklearn.linear_model import Lasso #运用算法来进行训练以得到特征的重要性
lasso=Lasso(alpha=0.001)
lasso.fit(X_scaled,y_log)
# In[51]:
FI_lasso = pd.DataFrame({"Feature Importance":lasso.coef_}, index=pipeline_data.columns) #索引和重要性做成dataframe形式
# In[52]:
FI_lasso.sort_values("Feature Importance",ascending=False)#由高到低进行排序
# In[53]:
#可视化
FI_lasso[FI_lasso["Feature Importance"]!=0].sort_values("Feature Importance").plot(kind="barh",figsize=(15,25))#barh:把x,y轴反转
plt.xticks(rotation=90)
plt.show()#画图显示
# ## 得到特征重要性图之后就可以进行特征选择与重做
# In[54]:
class add_feature(BaseEstimator, TransformerMixin):#定义转换函数
def __init__(self,additional=1):
self.additional = additional
def fit(self,X,y=None):
return self
def transform(self,X):
if self.additional==1:
X["TotalHouse"] = X["TotalBsmtSF"] + X["1stFlrSF"] + X["2ndFlrSF"]
X["TotalArea"] = X["TotalBsmtSF"] + X["1stFlrSF"] + X["2ndFlrSF"] + X["GarageArea"]
else:
X["TotalHouse"] = X["TotalBsmtSF"] + X["1stFlrSF"] + X["2ndFlrSF"]
X["TotalArea"] = X["TotalBsmtSF"] + X["1stFlrSF"] + X["2ndFlrSF"] + X["GarageArea"]
X["+_TotalHouse_OverallQual"] = X["TotalHouse"] * X["OverallQual"]
X["+_GrLivArea_OverallQual"] = X["GrLivArea"] * X["OverallQual"]
X["+_oMSZoning_TotalHouse"] = X["oMSZoning"] * X["TotalHouse"]
X["+_oMSZoning_OverallQual"] = X["oMSZoning"] + X["OverallQual"]
X["+_oMSZoning_YearBuilt"] = X["oMSZoning"] + X["YearBuilt"]
X["+_oNeighborhood_TotalHouse"] = X["oNeighborhood"] * X["TotalHouse"]
X["+_oNeighborhood_OverallQual"] = X["oNeighborhood"] + X["OverallQual"]
X["+_oNeighborhood_YearBuilt"] = X["oNeighborhood"] + X["YearBuilt"]
X["+_BsmtFinSF1_OverallQual"] = X["BsmtFinSF1"] * X["OverallQual"]
X["-_oFunctional_TotalHouse"] = X["oFunctional"] * X["TotalHouse"]
X["-_oFunctional_OverallQual"] = X["oFunctional"] + X["OverallQual"]
X["-_LotArea_OverallQual"] = X["LotArea"] * X["OverallQual"]
X["-_TotalHouse_LotArea"] = X["TotalHouse"] + X["LotArea"]
X["-_oCondition1_TotalHouse"] = X["oCondition1"] * X["TotalHouse"]
X["-_oCondition1_OverallQual"] = X["oCondition1"] + X["OverallQual"]
X["Bsmt"] = X["BsmtFinSF1"] + X["BsmtFinSF2"] + X["BsmtUnfSF"]
X["Rooms"] = X["FullBath"]+X["TotRmsAbvGrd"]
X["PorchArea"] = X["OpenPorchSF"]+X["EnclosedPorch"]+X["3SsnPorch"]+X["ScreenPorch"]
X["TotalPlace"] = X["TotalBsmtSF"] + X["1stFlrSF"] + X["2ndFlrSF"] + X["GarageArea"] + X["OpenPorchSF"]+X["EnclosedPorch"]+X["3SsnPorch"]+X["ScreenPorch"]
return X
# In[55]:
pipe = Pipeline([#把后面的东西加到管道里面来
('labenc', labelenc()),
('add_feature', add_feature(additional=2)),
('skew_dummies', skew_dummies(skew=4)),
])
# In[56]:
pipe
# In[57]:
n_train=train.shape[0]#训练集的行数
X = pipeline_data[:n_train]#取出处理之后的训练集
test_X = pipeline_data[n_train:]#取出n_train后的数据作为测试集
y= train.SalePrice
X_scaled = StandardScaler().fit(X).transform(X)#做转换
y_log = np.log(train.SalePrice)##这里要注意的是,更符合正态分布
#得到测试集
test_X_scaled = StandardScaler().fit_transform(test_X)
# ## 模型的构建
# #### 线性回归
# In[58]:
from sklearn.tree import DecisionTreeRegressor#导入模型
# In[59]:
model = DecisionTreeRegressor()
# In[60]:
model1 =model.fit(X_scaled,y_log)
# ## 前期比较简单的处理得到结果,并没有进行模型的堆叠
# In[61]:
#predict = modexp.predict(test_x)
# In[62]:
# result=pd.DataFrame({'Id':test.Id, 'SalePrice':predict})
# result.to_csv("submission1.csv",index=False)
# In[63]:
# predict = np.exp(model1.predict(test_X_scaled))#np.exp是对上面的对数变换之后的反变换
# In[64]:
# result=pd.DataFrame({'Id':test.Id, 'SalePrice':predict})
# result.to_csv("submission.csv",index=False)
# ## 模型的堆叠与集成并且选择最优参数,模型和评估方式
# In[65]:
from sklearn.model_selection import cross_val_score, GridSearchCV, KFold#交叉验证,网格搜索,k折验证
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor
from sklearn.svm import SVR, LinearSVR
from sklearn.linear_model import ElasticNet, SGDRegressor, BayesianRidge
from sklearn.kernel_ridge import KernelRidge
from xgboost import XGBRegressor
# In[66]:
#定义交叉验证的策略,以及评估函数
def rmse_cv(model,X,y):
rmse = np.sqrt(-cross_val_score(model, X, y, scoring="neg_mean_squared_error", cv=5))#交叉验证
return rmse
# In[67]:
models = [LinearRegression(),Ridge(),Lasso(alpha=0.01,max_iter=10000),RandomForestRegressor(),GradientBoostingRegressor(),SVR(),LinearSVR(),
ElasticNet(alpha=0.001,max_iter=10000),SGDRegressor(max_iter=1000,tol=1e-3),BayesianRidge(),KernelRidge(alpha=0.6, kernel='polynomial', degree=2, coef0=2.5),
ExtraTreesRegressor(),XGBRegressor()]#这里也是列表
# In[68]:
names = ["LR", "Ridge", "Lasso", "RF", "GBR", "SVR", "LinSVR", "Ela","SGD","Bay","Ker","Extra","Xgb"]#列表
for name, model in zip(names, models):
score = rmse_cv(model, X_scaled, y_log)
print("{}: {:.6f}, {:.4f}".format(name,score.mean(),score.std()))
# In[69]:
#定义交叉方式,先指定模型后指定参数,方便测试多个模型,网格交叉验证
class grid():
def __init__(self,model):
self.model = model#导入模型
#所有模型进行验证5次
def grid_get(self,X,y,param_grid):#网格参数一般做出字典的格式
grid_search = GridSearchCV(self.model,param_grid,cv=5, scoring="neg_mean_squared_error")
grid_search.fit(X,y)
print(grid_search.best_params_, np.sqrt(-grid_search.best_score_))
grid_search.cv_results_['mean_test_score'] = np.sqrt(-grid_search.cv_results_['mean_test_score'])
print(pd.DataFrame(grid_search.cv_results_)[['params','mean_test_score','std_test_score']])
# In[70]:
grid(Lasso()).grid_get(X_scaled,y_log,{'alpha': [0.0004,0.0005,0.0007,0.0006,0.0009,0.0008],'max_iter':[10000]})
# In[71]:
grid(Ridge()).grid_get(X_scaled,y_log,{'alpha':[35,40,45,50,55,60,65,70,80,90]})
# In[72]:
grid(SVR()).grid_get(X_scaled,y_log,{'C':[11,12,13,14,15],'kernel':["rbf"],"gamma":[0.0003,0.0004],"epsilon":[0.008,0.009]})#支持向量机回归
# In[73]:
param_grid={'alpha':[0.2,0.3,0.4,0.5], 'kernel':["polynomial"], 'degree':[3],'coef0':[0.8,1,1.2]}#定义好的参数,用字典来表示
grid(KernelRidge()).grid_get(X_scaled,y_log,param_grid)
# In[74]:
grid(ElasticNet()).grid_get(X_scaled,y_log,{'alpha':[0.0005,0.0008,0.004,0.005],'l1_ratio':[0.08,0.1,0.3,0.5,0.7],'max_iter':[10000]})
# In[75]:
#定义加权平均值,就相当于自己写fit_transform()
class AverageWeight(BaseEstimator, RegressorMixin):
def __init__(self,mod,weight):
self.mod = mod#模型的个数
self.weight = weight#权重
def fit(self,X,y):
self.models_ = [clone(x) for x in self.mod]
for model in self.models_:
model.fit(X,y)
return self
def predict(self,X):
w = list()
pred = np.array([model.predict(X) for model in self.models_])
# 针对于每一个数据点,单一的模型是乘以权重,然后加起来
for data in range(pred.shape[1]):#取列数
single = [pred[model,data]*weight for model,weight in zip(range(pred.shape[0]),self.weight)]
w.append(np.sum(single))
return w
# In[76]:
#指定每一个算法的参数
lasso = Lasso(alpha=0.0005,max_iter=10000)
ridge = Ridge(alpha=60)
svr = SVR(gamma= 0.0004,kernel='rbf',C=13,epsilon=0.009)
ker = KernelRidge(alpha=0.2 ,kernel='polynomial',degree=3 , coef0=0.8)
ela = ElasticNet(alpha=0.005,l1_ratio=0.08,max_iter=10000)
bay = BayesianRidge()
# In[77]:
#6个权重
w1 = 0.02
w2 = 0.2
w3 = 0.25
w4 = 0.3
w5 = 0.03
w6 = 0.2
# In[78]:
weight_avg = AverageWeight(mod = [lasso,ridge,svr,ker,ela,bay],weight=[w1,w2,w3,w4,w5,w6])
# In[79]:
rmse_cv(weight_avg,X_scaled,y_log), rmse_cv(weight_avg,X_scaled,y_log).mean()#计算出交叉验证的均值
# ## 模型的堆叠
# In[80]:
class stacking(BaseEstimator, RegressorMixin, TransformerMixin):
def __init__(self,mod,meta_model):
self.mod = mod
self.meta_model = meta_model#元模型
self.kf = KFold(n_splits=5, random_state=42, shuffle=True)#5折的划分
#数据集平均分成5份
def fit(self,X,y):
self.saved_model = [list() for i in self.mod]#用模型来进行拟合
oof_train = np.zeros((X.shape[0], len(self.mod)))
for i,model in enumerate(self.mod):#返回的是索引和模型本身
for train_index, val_index in self.kf.split(X,y):##返回的是数据本省
renew_model = clone(model)#模型的复制
renew_model.fit(X[train_index], y[train_index])#对数据进行训练
self.saved_model[i].append(renew_model)#把模型添加进去
oof_train[val_index,i] = renew_model.predict(X[val_index])#用来预测验证集
self.meta_model.fit(oof_train,y)#元模型
return self
def predict(self,X):
whole_test = np.column_stack([np.column_stack(model.predict(X) for model in single_model).mean(axis=1)
for single_model in self.saved_model]) #得到的是整个测试集
return self.meta_model.predict(whole_test)#返回的是利用元模型来对整个测试集进行预测
#预测,使整个测试集
def get_oof(self,X,y,test_X):
oof = np.zeros((X.shape[0],len(self.mod)))#初始化为0
test_single = np.zeros((test_X.shape[0],5))#初始化为0
test_mean = np.zeros((test_X.shape[0],len(self.mod)))
for i,model in enumerate(self.mod):#i是模型
for j, (train_index,val_index) in enumerate(self.kf.split(X,y)):#j是所有划分好的的数据
clone_model = clone(model)#克隆模块,把模型复制一下
clone_model.fit(X[train_index],y[train_index])#把分割好的数据进行训练
oof[val_index,i] = clone_model.predict(X[val_index])#对验证集进行预测
test_single[:,j] = clone_model.predict(test_X)#对测试集进行预测
test_mean[:,i] = test_single.mean(axis=1)#测试集算好均值
return oof, test_mean
# In[81]:
#经过预处理之后才能放到堆叠的模型里面去计算
a = SimpleImputer().fit_transform(X_scaled)#x
b = SimpleImputer().fit_transform(y_log.values.reshape(-1,1)).ravel()#y
# a = Imputer().fit_transform(X_scaled)#相当于x
# b = Imputer().fit_transform(y_log.values.reshape(-1,1)).ravel()#相当于y
# In[82]:
stack_model = stacking(mod=[lasso,ridge,svr,ker,ela,bay],meta_model=ker)#定义了第一层的和第二层的模型
# In[83]:
print(rmse_cv(stack_model,a,b))#运用了评估函数
print(rmse_cv(stack_model,a,b).mean())
# In[84]:
X_train_stack, X_test_stack = stack_model.get_oof(a,b,test_X_scaled)#将数据进行变换
# In[85]:
X_train_stack.shape, a.shape
# In[86]:
X_train_add = np.hstack((a,X_train_stack))
X_test_add = np.hstack((test_X_scaled,X_test_stack))
X_train_add.shape, X_test_add.shape
# In[87]:
print(rmse_cv(stack_model,X_train_add,b))
print(rmse_cv(stack_model,X_train_add,b).mean())
# In[88]:
stack_model = stacking(mod=[lasso,ridge,svr,ker,ela,bay],meta_model=ker)
# In[89]:
stack_model.fit(a,b)#模型进行训练
# In[90]:
pred = np.exp(stack_model.predict(test_X_scaled))#进行预测
# In[91]:
result=pd.DataFrame({'Id':test.Id, 'SalePrice':pred})
result.to_csv("submission3.csv",index=False)