交叉验证模型

K折交叉验证详解:

cv 用于指示 使用什么样的交叉验证方法
scoring 指定衡量标准
calculate mean 计算得分平均值 作为总体得分

# load libraries
from sklearn import datasets, metrics
from sklearn.model_selection import KFold, cross_val_score
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

# load digits dataset
digits = datasets.load_digits()

# create features matrix
features = digits.data

# create target vector
target = digits.target

# create standardizer
standardizer = StandardScaler()

# create logitic regression object
logit = LogisticRegression()

# create a pipeline that standardizes, then runs logistic regression
# 创建流水线
pipeline = make_pipeline(standardizer, logit)

# create k-fold cross-validation 创建K折交叉验证对象
kf = KFold(n_splits=10, shuffle=True, random_state=1)

# conduct k-fold cross-validation
cv_results = cross_val_score(pipeline, # Pipeline
features, # feature matrix
target, # target vector
cv=kf, # cross-validation technique,交叉验证方法
scoring="accuracy", # loss function 损失函数
n_jobs=-1) # use all CPU cores
# cv 用于指示 使用什么样的交叉验证方法
# scoring 指定衡量标准
# calculate mean 计算得分平均值 作为总体得分
cv_results.mean()