SMO算法,是求解SVM对偶问题(凸二次规划问题)的一种算法,由Platt在1998提出。下面会基于python实现SMO算法。但传统的SVM只能实现2类划分,因此下面会基于one vs one 思想处理多类划分问题。
* one vs one*
其做法是在任意两类样本之间设计一个SVM,因此k个类别的样本就需要设计k(k-1)/2个SVM。
当对一个未知样本进行分类时,用这所有的分类器测试样本,最后得票最多的类别即为该未知样本的类别。
Libsvm中的多类分类就是根据这个方法实现的。
假设有四类A,B,C,D四类。在训练的时候我选择A,B; A,C; A,D; B,C; B,D;C,D所对应的向量作为训练集,然后得到六个训练结果,在测试的时候,把对应的向量分别对六个结果进行测试,然后采取投票形式,最后得到一组结果。
投票是这样的:
A=B=C=D=0;
(A,B)-classifier 如果是A win,则A=A+1;otherwise,B=B+1;
(A,C)-classifier 如果是A win,则A=A+1;otherwise, C=C+1;
…
(C,D)-classifier 如果是A win,则C=C+1;otherwise,D=D+1;
The decision is the Max(A,B,C,D)
评价:这种方法虽然好,但是当类别很多的时候,model的个数是n*(n-1)/2,代价还是相当大的。
#smo实现
# 辅助函数,在(0,m)区间范围内随机选择一个不等于i的整数
from numpy import *
def selectJrand(i,m):
j=i
# 注意返回的随机整数不应该等于i
while (j==i):
j = int(random.uniform(0,m))
return j
# 辅助函数,调整数值大于H或小雨L的alpha值
# 为alpha值设定上下限
def clipAlpha(aj,H,L):
if aj > H:
aj = H
if L > aj:
aj = L
return aj
# 辅助函数,对于给定的alpha值,计算E值并返回
def calcEk(oS, k):
fXk = float(multiply(oS.alphas,oS.labelMat).T*oS.K[:,k] + oS.b)
Ek = fXk - float(oS.labelMat[k])
return Ek
# 辅助函数,用来选择第二个alpha值并保证每次优化中采用最大步长
def selectJ(i, oS, Ei):
maxK = -1; maxDeltaE = 0; Ej = 0
# 将输入的Ei在误差缓存中设置为已经计算好的
oS.eCache[i] = [1,Ei]
# 构建一个非零表
validEcacheList = nonzero(oS.eCache[:,0].A)[0]
if (len(validEcacheList)) > 1:
# 循环查找最大步长的Ej
for k in validEcacheList:
if k == i: continue #don't calc for i, waste of time
Ek = calcEk(oS, k)
deltaE = abs(Ei - Ek)
if (deltaE > maxDeltaE):
maxK = k; maxDeltaE = deltaE; Ej = Ek
return maxK, Ej
else:
# 如果是第一次循环
# 随机选择一个alpha值,并计算Ej
j = selectJrand(i, oS.m)
Ej = calcEk(oS, j)
return j, Ej
# 辅助函数,当alpha优化后更新误差缓存
def updateEk(oS, k):
Ek = calcEk(oS, k)
oS.eCache[k] = [1,Ek]
# 核转换函数,参数为两个数值和一个元组kTup(记录核函数信息)
def kernelTrans(X, A, kTup):
# 建立列向量
m,n = shape(X)
K = mat(zeros((m,1)))
# 根据不同的核函数类型,给出两种类型的实现
if kTup[0]=='lin': K = X * A.T
elif kTup[0]=='rbf':
for j in range(m):
deltaRow = X[j,:] - A
K[j] = deltaRow*deltaRow.T
K = exp(K/(-1*kTup[1]**2))
# 无法识别的核函数类型
else: raise NameError('Houston We Have a Problem -- \
That Kernel is not recognized')
return K
# 数据及参数存储
class optStruct:
def __init__(self,dataMatIn, classLabels, C, toler, kTup):
self.X = dataMatIn
self.labelMat = classLabels
self.C = C
self.tol = toler
self.m = shape(dataMatIn)[0]
self.alphas = mat(zeros((self.m,1)))
self.b = 0
self.eCache = mat(zeros((self.m,2))) #first column is valid flag
self.K = mat(zeros((self.m,self.m)))
for i in range(self.m):
self.K[:,i] = kernelTrans(self.X, self.X[i,:], kTup)
# 寻找决策边界的优化例程函数
def innerL(i, oS):
# 根据alpha计算Ei值
Ei = calcEk(oS, i)
# 根据误差Ei判断alpha是否可以被优化
if ((oS.labelMat[i]*Ei < -oS.tol) and (oS.alphas[i] < oS.C)) or ((oS.labelMat[i]*Ei > oS.tol) and (oS.alphas[i] > 0)):
# 选择第二个alpha值
j,Ej = selectJ(i, oS, Ei)
alphaIold = oS.alphas[i].copy(); alphaJold = oS.alphas[j].copy();
# 保证alpha值范围在0-C区间
if (oS.labelMat[i] != oS.labelMat[j]):
L = max(0, oS.alphas[j] - oS.alphas[i])
H = min(oS.C, oS.C + oS.alphas[j] - oS.alphas[i])
else:
L = max(0, oS.alphas[j] + oS.alphas[i] - oS.C)
H = min(oS.C, oS.alphas[j] + oS.alphas[i])
if L==H: print ("L==H"); return 0
# 计算alpha[j]的最优修改量
eta = 2.0 * oS.K[i,j] - oS.K[i,i] - oS.K[j,j]
if eta >= 0: print ("eta>=0"); return 0
oS.alphas[j] -= oS.labelMat[j]*(Ei - Ej)/eta
oS.alphas[j] = clipAlpha(oS.alphas[j],H,L)
updateEk(oS, j) #added this for the Ecache
if (abs(oS.alphas[j] - alphaJold) < 0.00001): print ("j not moving enough"); return 0
# 对i进行修改,修改量与j相同,但方向相反
oS.alphas[i] += oS.labelMat[j]*oS.labelMat[i]*(alphaJold - oS.alphas[j])
updateEk(oS, i)
# 计算并设置常数项b
b1 = oS.b - Ei- oS.labelMat[i]*(oS.alphas[i]-alphaIold)*oS.K[i,i] - oS.labelMat[j]*(oS.alphas[j]-alphaJold)*oS.K[i,j]
b2 = oS.b - Ej- oS.labelMat[i]*(oS.alphas[i]-alphaIold)*oS.K[i,j]- oS.labelMat[j]*(oS.alphas[j]-alphaJold)*oS.K[j,j]
if (0 < oS.alphas[i]) and (oS.C > oS.alphas[i]): oS.b = b1
elif (0 < oS.alphas[j]) and (oS.C > oS.alphas[j]): oS.b = b2
else: oS.b = (b1 + b2)/2.0
# alpha被优化则返回1
return 1
else: return 0
# 完整的Platt SMO算法
def smoP(dataMatIn, classLabels, C, toler, maxIter,kTup=('lin', 0)):
# 初始化`optStruct`,将输入参数存入数据对象
oS = optStruct(mat(dataMatIn),mat(classLabels).transpose(),C,toler, kTup)
# 初始化控制函数退出的变量
iter = 0
entireSet = True; alphaPairsChanged = 0
# 进入主体while循环
# 设置多个循环退出条件,例如迭代达到最大次数或遍历集合后未修改任何alpha值时候退出
while (iter < maxIter) and ((alphaPairsChanged > 0) or (entireSet)):
alphaPairsChanged = 0
if entireSet:
# 遍历任何可能的alpha值
for i in range(oS.m):
alphaPairsChanged += innerL(i,oS)
#print ("fullSet, iter: %d i:%d, pairs changed %d" % (iter,i,alphaPairsChanged))
iter += 1
else:
# 遍历非边界值
nonBoundIs = nonzero((oS.alphas.A > 0) * (oS.alphas.A < C))[0]
for i in nonBoundIs:
alphaPairsChanged += innerL(i,oS)
#print ("non-bound, iter: %d i:%d, pairs changed %d" % (iter,i,alphaPairsChanged))
iter += 1
# 在完整遍历和非边界值遍历之间来回切换
if entireSet: entireSet = False
elif (alphaPairsChanged == 0): entireSet = True
#print ("iteration number: %d" % iter)
# 返回常数b及alpha值
return oS.b,oS.alphas
# 训练算法
def train(dataset,target,kTup=('rbf', 50)):
# 读取训练数据
#X_train, X_test, y_train, y_test=train_test_split(dataset,target,test_size=.4,random_state=1)
dataArr,labelArr = dataset,target
# Platt SMO算法进行SVM训练,获取b及alpha值
b,alphas = smoP(dataArr, labelArr, 1, 0.001, 2500, kTup)
# 建立矩阵数据副本
datMat=mat(dataArr); labelMat = mat(labelArr).transpose()
# 获得非0的alpha值
svInd=nonzero(alphas.A>0)[0]
# 得到所需的支持向量和alpha的类别标签值
sVs=datMat[svInd]
labelSV = labelMat[svInd];
print ("there are %d Support Vectors" % shape(sVs)[0])
m,n = shape(datMat)
errorCount = 0
# 遍历训练数据
for i in range(m):
# 利用核函数进行分类,得到预测结果
kernelEval = kernelTrans(sVs,datMat[i,:],kTup)
predict=kernelEval.T * multiply(labelSV,alphas[svInd]) + b
# 对比预测结果与真实分类,计算错误率
if sign(predict)!=sign(labelArr[i]): errorCount += 1
# 输出训练数据的错误率
print ("the training error rate is: %f" % (float(errorCount)/m))
return b,alphas
'''# 读取测试数据
dataArr,labelArr = X_test,y_test
errorCount = 0
datMat=mat(dataArr); labelMat = mat(labelArr).transpose()
m,n = shape(datMat)
# 遍历测试数据
for i in range(m):
# 利用核函数进行分类,得到预测结果
kernelEval = kernelTrans(sVs,datMat[i,:],kTup)
predict=kernelEval.T * multiply(labelSV,alphas[svInd]) + b
# 对比预测结果与真实分类,计算错误率
if sign(predict)!=sign(labelArr[i]): errorCount += 1
# 输出测试错误率
print ("the test error rate is: %f" % (float(errorCount)/m))'''
'''关键代码'''
#我采用“one vs one ”的方法处理多类划分问题,由于数据集类别有三类,所以一共有3种对比:1-2,1-3,2-3
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
D=np.array([[1,2],[1,3],[2,3]])
A=B=C=0
dictclass={} #用于储存3个分类器的b,alphas
X_train, X_test, y_train, y_test=train_test_split(dataset,target,test_size=.4,random_state=1)
trainset,trainlabel=X_train,y_train
for a,c in D:
index=np.concatenate((np.nonzero(trainlabel==a)[0],np.nonzero(trainlabel==c)[0]))
target11=np.array([trainlabel[k] for k in index])
target1=np.array([-1 if k!=a else 1 for k in target11])
trainset1=np.array([trainset[k] for k in index])
print(target1.shape,trainset1.shape) #(3132,)
b,alphas=train(trainset1,target1)
dictclass[str(a)+str(c)]=(b,alphas)
#测试测试集
testset, testlabel = X_test, y_test
testset=mat(testset)
testlabel = mat(testlabel).transpose()
#对测试集进行onevsone检测
m = testset.shape[0]
corr = 0
for i in range(m):
# 利用核函数进行分类,得到预测结果
A = B = C = 0
for key, value in dictclass.items():
b, alphas = value
# 获得非0的alpha值
svInd = nonzero(alphas.A > 0)[0]
# 得到所需的支持向量和alpha的类别标签值
sVs = np.mat(trainset)[svInd]
labelSV = np.mat(trainlabel).transpose()[svInd]
print(sVs.shape, testset[i, :].shape)
kernelEval = kernelTrans(sVs, testset[i, :], kTup=('rbf', 50))
print(multiply(labelSV, alphas[svInd]).shape)
predict = kernelEval.T * multiply(labelSV, alphas[svInd]) + b
if predict > 0:
if key == '12' or key == '13': A += 1
if key == '23': B += 1
elif predict < 0:
if key == '12': B += 1
if key == '23' or key == '13': C += 1
# 对比预测结果与真实分类,计算错误率
if max(A, B, C) == testlabel[i]: corr += 1
print('正确率:', corr / m)
#下面代码是利用sklearn 的SVC,linearSVC的分类性能作为对比
from sklearn.svm import SVC,NuSVC,LinearSVC
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
names=['SVC','LinearSVC']
classifiers =[SVC(C=3,gamma=0.001,decision_function_shape='ovo',class_weight={1:7,2:1.83,3:3.17},random_state=1),
LinearSVC(C=2,class_weight={1:7,2:1.83,3:3.17},random_state=1)]
X= dataset
y=target
X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=.4) #split dataset
print('测试集大小:',X_test.shape,y_test.shape)
i=0
for name,clf in zip(names,classifiers):
print(name,':')
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)
print(score)
经检验,对于我使用的数据集,SVC和SOM利用onevsone思想实现的SVM的分类效果基本一致,都为0.55左右。
如认为本文章有错,欢迎留言讨论!