机器学习-支持向量机-sklearn

原创

高万禄 2021-07-13 18:21:15 ©著作权

文章标签 跟我一起学《深度学习》 python 机器学习 svm 支持向量机 文章分类 机器学习人工智能

©著作权归作者所有：来自51CTO博客作者高万禄的原创作品，请联系作者获取转载授权，否则将追究法律责任

支持向量机

SVM(Support Vector Machine),适合用于中小型复杂数据集的分类。
支持向量机有三宝

间隔
对偶
核技巧

大间隔分类(Large margin classification)

from sklearn.svm import SVC
from sklearn import datasets
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
iris=datasets.load_iris()#加载数据集
X=iris["data"][:,(2,3)] #花瓣长与宽
y=iris["target"] #标签
setosa_or_versicolor = (y==0)|(y==1) #y标签中，如果==0 or ==1 则将其元素为True 否则为 False
print(len(setosa_or_versicolor))
X=X[setosa_or_versicolor] #时True则选中，False不选中
y=y[setosa_or_versicolor]
# 鸢尾花或月亮(setosa_or_versicolor)
print(len(X))

#SVM Classifier model
svm_clf=SVC(kernel="linear",C=float("inf"))
svm_clf.fit(X,y)

150 100 SVC(C=inf, kernel='linear')

模型使用

def plot_svc_decision_boundary(svm_clf,xmin,xmax):
    w=svm_clf.coef_[0]
    b=svm_clf.intercept_[0]
    # At the decision boundary, w0*x0 + w1*x1 + b = 0
    # => x1 = -w0/w1 * x0 - b/w1
    x0 = np.linspace(xmin, xmax, 200)
    decision_boundary = -w[0]/w[1] * x0 - b/w[1]

    margin = 1/w[1]
    gutter_up = decision_boundary + margin
    gutter_down = decision_boundary - margin
    
    svs = svm_clf.support_vectors_
    plt.scatter(svs[:, 0], svs[:, 1], s=180, facecolors='#FFAAAA')# scatter-分散
    plt.plot(x0, decision_boundary, "k-", linewidth=2)
    plt.plot(x0, gutter_up, "k--", linewidth=2)
    plt.plot(x0, gutter_down, "k--", linewidth=2)
    print(svs)#输出支持向量
plt.plot(X[:, 0][y==1], X[:, 1][y==1], "bs", label="Iris versicolor")
plt.plot(X[:, 0][y==0], X[:, 1][y==0], "yo", label="Iris setosa")
plt.xlabel("Petal length", fontsize=14)
plt.ylabel("Petal width", fontsize=14)
plt.legend(loc="upper left", fontsize=14)

plot_svc_decision_boundary(svm_clf, 0, 5.5)
plt.axis([0, 5.5, 0, 2])
plt.show()

[[1.9 0.4] [3. 1.1]]
机器学习-支持向量机-sklearn_svm

特征尺度敏感性(Sensitivity to feature scales)

#未进行特征缩放
Xs = np.array([[1, 50], [5, 20], [3, 80], [5, 60]]).astype(np.float64)
ys = np.array([0, 0, 1, 1])
svm_clf = SVC(kernel="linear", C=100)
svm_clf.fit(Xs, ys)
plot_svc_decision_boundary(svm_clf, 0, 6)
plt.plot(Xs[:, 0][ys==1], Xs[:, 1][ys==1], "bo")
plt.plot(Xs[:, 0][ys==0], Xs[:, 1][ys==0], "ms")
plt.show()

#进行特征缩放
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
X_scaled=scaler.fit_transform(Xs)
svm_clf.fit(X_scaled,ys)
plot_svc_decision_boundary(svm_clf, -2, 2)
plt.plot(X_scaled[:, 0][ys==1], X_scaled[:, 1][ys==1], "bo")
plt.plot(X_scaled[:, 0][ys==0], X_scaled[:, 1][ys==0], "ms")
plt.show()

[[ 1. 50.] [ 5. 60.]]
机器学习-支持向量机-sklearn_机器学习_02
[[-1.50755672 -0.11547005] [ 0.90453403 -1.5011107 ] [ 0.90453403 0.34641016]]
机器学习-支持向量机-sklearn_svm_03

软间隔分类

如果你的SVM模型过拟合，可以尝试通过降低C来对其进行正则化

import numpy as np
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC

iris=datasets.load_iris()
X=iris["data"][:,(2,3)]# length width
y=(iris["target"] == 2).astype(np.float64)

svm_clf = Pipeline([
("scaler", StandardScaler()),
("linear_svc", LinearSVC(C=100, loss="hinge")),
])
svm_clf.fit(X, y)
svm_clf.predict([[5.5, 1.7]])