支持向量机

SVM(Support Vector Machine),适合用于中小型复杂数据集的分类。
支持向量机有三宝

  • 间隔
  • 对偶
  • 核技巧

大间隔分类(Large margin classification)

from sklearn.svm import SVC
from sklearn import datasets
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
iris=datasets.load_iris()#加载数据集
X=iris["data"][:,(2,3)] #花瓣长与宽
y=iris["target"] #标签
setosa_or_versicolor = (y==0)|(y==1) #y标签中,如果==0 or ==1 则将其元素为True 否则为 False
print(len(setosa_or_versicolor))
X=X[setosa_or_versicolor] #时True则选中,False不选中
y=y[setosa_or_versicolor]
# 鸢尾花或月亮(setosa_or_versicolor)
print(len(X))

#SVM Classifier model
svm_clf=SVC(kernel="linear",C=float("inf"))
svm_clf.fit(X,y)

150 100 SVC(C=inf, kernel='linear')

模型使用

def plot_svc_decision_boundary(svm_clf,xmin,xmax):
    w=svm_clf.coef_[0]
    b=svm_clf.intercept_[0]
    # At the decision boundary, w0*x0 + w1*x1 + b = 0
    # => x1 = -w0/w1 * x0 - b/w1
    x0 = np.linspace(xmin, xmax, 200)
    decision_boundary = -w[0]/w[1] * x0 - b/w[1]

    margin = 1/w[1]
    gutter_up = decision_boundary + margin
    gutter_down = decision_boundary - margin
    
    svs = svm_clf.support_vectors_
    plt.scatter(svs[:, 0], svs[:, 1], s=180, facecolors='#FFAAAA')# scatter-分散
    plt.plot(x0, decision_boundary, "k-", linewidth=2)
    plt.plot(x0, gutter_up, "k--", linewidth=2)
    plt.plot(x0, gutter_down, "k--", linewidth=2)
    print(svs)#输出支持向量
plt.plot(X[:, 0][y==1], X[:, 1][y==1], "bs", label="Iris versicolor")
plt.plot(X[:, 0][y==0], X[:, 1][y==0], "yo", label="Iris setosa")
plt.xlabel("Petal length", fontsize=14)
plt.ylabel("Petal width", fontsize=14)
plt.legend(loc="upper left", fontsize=14)

plot_svc_decision_boundary(svm_clf, 0, 5.5)
plt.axis([0, 5.5, 0, 2])
plt.show()

[[1.9 0.4] [3. 1.1]]
机器学习-支持向量机-sklearn_svm

特征尺度敏感性(Sensitivity to feature scales)

#未进行特征缩放
Xs = np.array([[1, 50], [5, 20], [3, 80], [5, 60]]).astype(np.float64)
ys = np.array([0, 0, 1, 1])
svm_clf = SVC(kernel="linear", C=100)
svm_clf.fit(Xs, ys)
plot_svc_decision_boundary(svm_clf, 0, 6)
plt.plot(Xs[:, 0][ys==1], Xs[:, 1][ys==1], "bo")
plt.plot(Xs[:, 0][ys==0], Xs[:, 1][ys==0], "ms")
plt.show()

#进行特征缩放
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
X_scaled=scaler.fit_transform(Xs)
svm_clf.fit(X_scaled,ys)
plot_svc_decision_boundary(svm_clf, -2, 2)
plt.plot(X_scaled[:, 0][ys==1], X_scaled[:, 1][ys==1], "bo")
plt.plot(X_scaled[:, 0][ys==0], X_scaled[:, 1][ys==0], "ms")
plt.show()

[[ 1. 50.] [ 5. 60.]]
机器学习-支持向量机-sklearn_机器学习_02
[[-1.50755672 -0.11547005] [ 0.90453403 -1.5011107 ] [ 0.90453403 0.34641016]]
机器学习-支持向量机-sklearn_svm_03

软间隔分类

如果你的SVM模型过拟合,可以尝试通过降低C来对其进行正则化

import numpy as np
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC

iris=datasets.load_iris()
X=iris["data"][:,(2,3)]# length width
y=(iris["target"] == 2).astype(np.float64)

svm_clf = Pipeline([
("scaler", StandardScaler()),
("linear_svc", LinearSVC(C=100, loss="hinge")),
])
svm_clf.fit(X, y)
svm_clf.predict([[5.5, 1.7]])

array([1.])