代码结构
😸本代码主要由 4
个 python
文件和 3
个文件夹组成。其中,dataset.py
用以加载数据集(MNIST
或 cifar10
);hog.py
实现了 HOG
算法;svm.py
实现 SVM
算法;main.py
用来测试分类效果。文件夹 model
用来存储训练好的 SVM
模型;feat
文件夹存放 HOG
特征;data
文件夹存放读取后的数据集信息。
dataset.py
文件可通过参数kind
指定读取训练集还是测试集的数据,通过参数reload
来指定是否重新获取数据,通过path
来指明数据集的根目录hog.py
中类HogDescriptor
提供函数get_feat
来获取HOG
特征,通过参数data
指定处理的数据,通过kind
指定数据的类型(如:mnist.train
为MNIST
的训练集),通过reload
参数指明是否重新获取HOG
特征svm.py
中多分类SVM
是借助于sklearn
库来实现的,该文件主要定义了4
个类,分别是SMO
算法求解器类、带核函数缓存机制的Solver
、二分类线性SVM
和多分类线性SVM
main.py
根据前面3
个文件中定义的类来处理主要逻辑,内容包括用以处理分类的classification
函数和主体逻辑main
函数
HOG
✍️HOG
特征检测算法,最早是由法国研究员 Dalal
等在 CVPR-2005
上提出来的一种解决人体目标检测的图像描述子,是一种用于表征图像局部梯度方向和梯度强度分布特性的描述符。其主要思想是在边缘具体位置未知的情况下,边缘方向的分布也可以很好的表示行人目标的外形轮廓。HOG
特征检测算法主要包含以下几个步骤:
- 颜色空间归一化:主要包括图像灰度化和
Gamma
矫正,可通过以下代码处理 - 梯度计算:对经过颜色空间归一化后的图像,求取其梯度及梯度方向。可通过
Sobel
算子分别在水平和垂直方向进行计算,然后进行融合 - 梯度直方图计算:当梯度方向大小不是恰好在某一个
bin
上时,需获取距离该方向最近的2
个bin
(超过bin
个数时取模),然后根据其于这两个bin
的距离来获取相应权重weight
并为直方图赋值 - 重叠块归一化:
block
默认由4
个cell
组成,其移动步长为1
个cell
,并且采用L2
范数来处理归一化(L2-norm)
SVM
✍️支持向量机 SVM
(support vector machines)是一种二分类模型。其策略是间隔最大化,这等价于正则化的合页损失函数最小化问题。在学习算法上采用序列最小最优化算法 SMO
。SVM
通常可分为线性可分支持向量机、线性支持向量机、非线性支持向量机。线性多分类 SVM LinearSVC
继承自二分类线性 SVM BiLinearSVC
,并通过 sklearn
的 multiclass
模块实现了多分类。其主要包含用以训练模型的fit函数,用以决策的 decision_function
函数,用来预测的 predict
函数以及用来评估的 score
函数
Dataset
😿本代码中只实现了 CIDAR10
和 MNIST
数据集的加载。其中,函数 _unpickle
用来解压 CIFAR10
数据集,函数 load_cifar10
用来加载 CIFAR10
数据集,函数 load_mnist
用来加载 MNIST
数据集。
代码
😶🌫️建议先调试一下 dataset.py
能不能正确获取数据(路径要自己修改),再调试一下 hog.py
能不能提取特征,接着再调试 svm.py
进行分类(处理 CIFAR10
时建议直接调用接口,精度可达 50%
左右,不过依赖于前面提取的 HOG
特征)。在调试时直接注释 main.py
中相应代码即可
# dataset.py
import os
import numpy as np
import pickle
import struct
class DatasSet():
def __init__(self, name='data'):
self.name = name # 存放数据的目录
# 用来解压 cifar10 数据集
def _unpickle(self, file):
with open(file, 'rb') as fo:
dict = pickle.load(fo, encoding='bytes')
return dict
# 加载 cifar10 数据集
def load_cifar10(self, kind='train', path='./', reload=False):
self.root = f'{self.name}/cifar10'
# 若存在相应数据且不重新加载数据,则直接读取
if os.path.exists(f'{self.root}_{kind}_data.npy') and os.path.exists(f'{self.root}_{kind}_label.npy') and not reload:
images = np.load(f'{self.root}_{kind}_data.npy')
labels = np.load(f'{self.root}_{kind}_label.npy')
print(f'Get data from {self.root}_{kind}_*.npy')
else:
if kind == 'test':
f = os.path.join(path, 'test_batch')
data = self._unpickle(f)
images = np.reshape(data[b'data'], (10000, 3, 32 * 32)) # 测试集图片为 10000 张
labels = np.reshape(data[b'labels'], (10000, 1))
elif kind == 'train':
_images, _labels = [], []
for idx in range(1, 6): # 总共有 5 个训练数据集,每个数据集 10000 张图片
f = os.path.join(path, f'data_batch_{idx}')
data = self._unpickle(f) # 解压 cifar10 数据集
_images.extend(np.reshape(data[b'data'], (10000, 3, 32 * 32)))
_labels.extend(np.reshape(data[b'labels'], (10000, 1)))
images, labels = np.array(_images), np.array(_labels)
np.save(f'{self.root}_{kind}_data.npy', images)
np.save(f'{self.root}_{kind}_label.npy', labels)
return images, labels
# 加载 MNIST 数据集
def load_mnist(self, kind='train', path='./', reload=False):
self.root = f'{self.name}/mnist'
# 判断是否重新加载 MNIST 数据集
if os.path.exists(f'{self.root}_{kind}_data.npy') and os.path.exists(f'{self.root}_{kind}_label.npy') and not reload:
images = np.load(f'{self.root}_{kind}_data.npy')
labels = np.load(f'{self.root}_{kind}_label.npy')
print(f'Get data from {self.root}_{kind}_*.npy')
else:
# 获取数据集路径
labels_path = os.path.join(path, f'{kind}-labels-idx1-ubyte')
images_path = os.path.join(path, f'{kind}-images-idx3-ubyte')
# 解压并读取 MNIST 数据集
with open(labels_path, 'rb') as lbpath:
magic, n = struct.unpack('>II', lbpath.read(8))
labels = np.fromfile(lbpath, dtype=np.uint8)
with open(images_path, 'rb') as imgpath:
magic, num, rows, cols = struct.unpack('>IIII',imgpath.read(16))
images = np.fromfile(imgpath,dtype=np.uint8).reshape(len(labels), 784)
np.save(f'{self.root}_{kind}_data.npy', images)
np.save(f'{self.root}_{kind}_label.npy', labels)
return images, labels
# hog.py
import cv2
import numpy as np
import math
import os
from tqdm import tqdm
class HogDescriptor():
def __init__(self, pixels_per_cell=4, bin_size=9):
self.pixels_per_cell = pixels_per_cell
self.bin_size = bin_size
self.angle_unit = 180 // self.bin_size
assert type(self.bin_size) == int, "bin_size should be integer,"
assert type(self.pixels_per_cell) == int, "pixels_per_cell should be integer,"
assert type(self.angle_unit) == int, "bin_size should be divisible by 180"
def _extract(self, image):
height, width = image.shape
# 计算图像每一个像素点的梯度幅值和角度
gradient_magnitude, gradient_angle = self._global_gradient(image)
gradient_magnitude = abs(gradient_magnitude)
# 计算每个 cell 的梯度直方图
cell_gradient_vector = np.zeros((height // self.pixels_per_cell, width // self.pixels_per_cell, self.bin_size))
for i in range(cell_gradient_vector.shape[0]):
for j in range(cell_gradient_vector.shape[1]):
cell_magnitude = gradient_magnitude[i * self.pixels_per_cell:(i + 1) * self.pixels_per_cell,
j * self.pixels_per_cell:(j + 1) * self.pixels_per_cell]
cell_angle = gradient_angle[i * self.pixels_per_cell:(i + 1) * self.pixels_per_cell,
j * self.pixels_per_cell:(j + 1) * self.pixels_per_cell]
cell_gradient_vector[i][j] = self._cell_gradient(cell_magnitude, cell_angle)
hog_vector = []
for i in range(cell_gradient_vector.shape[0] - 1):
for j in range(cell_gradient_vector.shape[1] - 1):
# 默认 block 由 2x2 个 cell 组成,步长为 1 个 cell
block_vector = []
block_vector.extend(cell_gradient_vector[i][j])
block_vector.extend(cell_gradient_vector[i][j + 1])
block_vector.extend(cell_gradient_vector[i + 1][j])
block_vector.extend(cell_gradient_vector[i + 1][j + 1])
# 计算 L2 范数
mag = lambda vector: math.sqrt(sum(i ** 2 for i in vector))
magnitude = mag(block_vector)
# 归一化操作
if magnitude != 0:
normalize = lambda block_vector, magnitude: [element / magnitude for element in block_vector]
block_vector = normalize(block_vector, magnitude)
hog_vector.append(block_vector)
return np.array(hog_vector).ravel() # 将特征打平
# 通过 Sobel 算子计算图像沿 x 轴和 y 轴梯度并融合,最后再计算其梯度方向
def _global_gradient(self, image):
gradient_values_x = cv2.Sobel(image, cv2.CV_64F, 1, 0, ksize=5)
gradient_values_y = cv2.Sobel(image, cv2.CV_64F, 0, 1, ksize=5)
gradient_magnitude, gradient_angle = cv2.cartToPolar(gradient_values_x, gradient_values_y, angleInDegrees=True)
return gradient_magnitude, gradient_angle
# 通过每个 cell 的梯度幅值和梯度方向来计算其直方图
def _cell_gradient(self, cell_magnitude, cell_angle):
orientation_centers = [0] * self.bin_size
for i in range(cell_magnitude.shape[0]):
for j in range(cell_magnitude.shape[1]):
gradient_strength = cell_magnitude[i][j] # cell 中像素点梯度幅值
gradient_angle = cell_angle[i][j] # cell 中像素点梯度方向
# 获取 bins 的位置后根据距离大小的权值来分别赋值,越近其权重越大
min_angle, max_angle, weight = self._get_closest_bins(gradient_angle)
orientation_centers[min_angle] += (gradient_strength * (1 - (weight / self.angle_unit)))
orientation_centers[max_angle] += (gradient_strength * (weight / self.angle_unit))
return orientation_centers
# 获取距离最近的两个 bins
def _get_closest_bins(self, gradient_angle):
idx = int(gradient_angle / self.angle_unit)
weight = gradient_angle % self.angle_unit
return idx % self.bin_size, (idx + 1) % self.bin_size, weight
# 获取数据集中图像的 HOG 特征
def get_feat(self, data, kind='mnist.train', reload=False):
data_name, data_type = kind.split('.')[0], kind.split('.')[1] # 获取数据的名称和类型
# 判断是否重新获取HOG数据集
if os.path.exists(f'feat/{data_name}_{data_type}_feat.npy') and not reload:
feat = np.load(f'feat/{data_name}_{data_type}_feat.npy')
print(f'get feature from feat/{data_name}_{data_type}_feat.npy')
else:
feat = []
for image in tqdm(data):
if data_name == 'cifar10': # 处理 cifar10 数据
image = image.T.reshape((32, 32, 3))
# 图像灰度化
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Gamma 矫正,其中 gamma 值为 0.5
image = np.power(image*1.0/ float(np.max(image)), 0.5)
image = image * 255
else: # 处理 MNIST 数据
image = image.reshape((28, 28))
fd = self._extract(image) # 获得单张图片的 HOG 特征
feat.append(fd) # 将特征整合在一起
feat = np.array(feat, dtype='float64')
np.save(f'feat/{data_name}_{data_type}_feat.npy', feat) # 保存 HOG 特征
print(f'{data_type} features are extracted and saved at folder feat')
return feat
# svm.py,来自 https://github.com/Kaslanarian/PySVM
import numpy as np
from sklearn.base import BaseEstimator
from sklearn.metrics import accuracy_score
from sklearn.multiclass import OneVsOneClassifier, OneVsRestClassifier
from functools import lru_cache
# SMO 算法求解器
class Solver:
r'''SMO算法求解器,迭代求解下面的问题:
.. math:: \min_{\pmb\alpha}\quad&\frac12\pmb\alpha^T\pmb Q\pmb\alpha+\pmb p^T\pmb\alpha\\
\text{s.t.}\quad&\pmb y^T\pmb\alpha=0\\
&0\leq\alpha_i\leq C,i=1,\cdots,l
Parameters
----------
Q : numpy.ndarray
优化问题中的 :math:`\pmb Q` 矩阵;
p : numpy.ndarray
优化问题中的 :math:`\pmb p` 向量;
y : numpy.ndarray
优化问题中的 :math:`\pmb y` 向量;
C : float
优化问题中的 :math:`C` 变量;
tol : float, default=1e-5
变量选择的tolerance,默认为1e-5.
'''
def __init__(self, Q: np.ndarray, p: np.ndarray, y: np.ndarray,
C: float, tol: float = 1e-5) -> None:
problem_size = p.shape[0]
assert problem_size == y.shape[0]
if Q is not None:
assert problem_size == Q.shape[0]
assert problem_size == Q.shape[1]
self.Q, self.P, self.y = Q, p, y
self.C, self.tol, self.alpha = C, tol, np.zeros(problem_size)
# Calculate -y·▽f(α)
self.neg_y_grad = -y * p
def working_set_select(self):
r'''工作集选择,这里采用一阶选择:
.. math:: \pmb{I}_{up}(\pmb\alpha)&=\{t|\alpha_t<C,y_t=1\text{ or }\alpha_t>0,y_t=-1\}\\
\pmb{I}_{low}(\pmb\alpha)&=\{t|\alpha_t<C,y_t=-1\text{ or }\alpha_t>0,y_t=1\}\\
i&\in\arg\max_{t}\{-y_t\nabla_tf(\pmb\alpha)|t\in\pmb{I}_{up}(\pmb\alpha)\}\\
j&\in\arg\max_{t}\{-y_t\nabla_tf(\pmb\alpha)|t\in\pmb{I}_{low}(\pmb\alpha)\}\\
'''
Iup = np.argwhere(
np.logical_or(
np.logical_and(self.alpha < self.C, self.y > 0),
np.logical_and(self.alpha > 0, self.y < 0),
)).flatten()
Ilow = np.argwhere(
np.logical_or(
np.logical_and(self.alpha < self.C, self.y < 0),
np.logical_and(self.alpha > 0, self.y > 0),
)).flatten()
find_fail = False
try:
i = Iup[np.argmax(self.neg_y_grad[Iup])]
j = Ilow[np.argmin(self.neg_y_grad[Ilow])]
except:
find_fail = True
if find_fail or self.neg_y_grad[i] - self.neg_y_grad[j] < self.tol:
return -1, -1
return i, j
def update(self, i: int, j: int, func=None):
'''变量更新,在保证变量满足约束的条件下对两变量进行更新
参考<https://welts.xyz/2021/07/11/libsmo/>.
'''
Qi, Qj = self.get_Q(i, func), self.get_Q(j, func)
yi, yj = self.y[i], self.y[j]
alpha_i, alpha_j = self.alpha[i], self.alpha[j]
quad_coef = Qi[i] + Qj[j] - 2 * yi * yj * Qi[j]
if quad_coef <= 0:
quad_coef = 1e-12
if yi * yj == -1:
delta = (self.neg_y_grad[i] * yi +
self.neg_y_grad[j] * yj) / quad_coef
diff = alpha_i - alpha_j
self.alpha[i] += delta
self.alpha[j] += delta
if diff > 0:
if (self.alpha[j] < 0):
self.alpha[j] = 0
self.alpha[i] = diff
else:
if (self.alpha[i] < 0):
self.alpha[i] = 0
self.alpha[j] = -diff
if diff > 0:
if (self.alpha[i] > self.C):
self.alpha[i] = self.C
self.alpha[j] = self.C - diff
else:
if (self.alpha[j] > self.C):
self.alpha[j] = self.C
self.alpha[i] = self.C + diff
else:
delta = (self.neg_y_grad[j] * yj -
self.neg_y_grad[i] * yi) / quad_coef
sum = self.alpha[i] + self.alpha[j]
self.alpha[i] -= delta
self.alpha[j] += delta
if sum > self.C:
if self.alpha[i] > self.C:
self.alpha[i] = self.C
self.alpha[j] = sum - self.C
else:
if self.alpha[j] < 0:
self.alpha[j] = 0
self.alpha[i] = sum
if sum > self.C:
if self.alpha[j] > self.C:
self.alpha[j] = self.C
self.alpha[i] = sum - self.C
else:
if self.alpha[i] < 0:
self.alpha[i] = 0
self.alpha[j] = sum
delta_i = self.alpha[i] - alpha_i
delta_j = self.alpha[j] - alpha_j
self.neg_y_grad -= self.y * (delta_i * Qi + delta_j * Qj)
return delta_i, delta_j
def calculate_rho(self) -> float:
r'''计算偏置项
.. math:: \rho=\dfrac{\sum_{i:0<\alpha_i<C}y_i\nabla_if(\pmb\alpha)}{|\{i\vert0<\alpha_i<C\}|}
如果不存在满足条件的支持向量,那么
.. math:: -M(\pmb\alpha)&=\max\{y_i\nabla_if(\pmb\alpha)|\alpha_i=0,y_i=-1\text{ or }\alpha_i=C,y_i=1\}\\
-m(\pmb\alpha)&=\max\{y_i\nabla_if(\pmb\alpha)|\alpha_i=0,y_i=1\text{ or }\alpha_i=C,y_i=-1\}\\
\rho&=-\dfrac{M(\pmb\alpha)+m(\pmb\alpha)}{2}
'''
sv = np.logical_and(
self.alpha > 0,
self.alpha < self.C,
)
if sv.sum() > 0:
rho = -np.average(self.neg_y_grad[sv])
else:
ub_id = np.logical_or(
np.logical_and(self.alpha == 0, self.y < 0),
np.logical_and(self.alpha == self.C, self.y > 0),
)
lb_id = np.logical_or(
np.logical_and(self.alpha == 0, self.y > 0),
np.logical_and(self.alpha == self.C, self.y < 0),
)
rho = -(self.neg_y_grad[lb_id].min() +
self.neg_y_grad[ub_id].max()) / 2
return rho
def get_Q(self, i: int, func=None):
'''获取核矩阵的第i行/列,即
.. math:: [K(\pmb x_1, \pmb x_i),\cdots,K(\pmb x_l, \pmb x_i)]
'''
return self.Q[i]
# 带核函数缓存机制的 Solver:使用 LRU 缓存来计算 Q 矩阵,从而不需要计算 Q 矩阵,从而带来存储的问题
class SolverWithCache(Solver):
'''带核函数缓存机制的Solver:使用LRU缓存来计算Q矩阵,从而不需要计算Q矩阵,从而带来存储的问题。
Parameters
----------
p : numpy.ndarray
优化问题中的 :math:`\pmb p` 向量;
y : numpy.ndarray
优化问题中的 :math:`\pmb y` 向量;
C : float
优化问题中的 :math:`C` 变量;
tol : float, default=1e-5
变量选择的tolerance,默认为1e-5.
cache_size : int, default=256
LRU缓存数.
See also
--------
Solver
'''
cache_size = 256
def __init__(self,
p: np.ndarray,
y: np.ndarray,
C: float,
tol: float = 1e-5,
cache_size: int = 256) -> None:
super().__init__(None, p, y, C, tol)
cache_size = cache_size
def working_set_select(self):
return super().working_set_select()
def update(self, i: int, j: int, func=None):
return super().update(i, j, func=func)
def calculate_rho(self):
return super().calculate_rho()
@lru_cache(cache_size)
def get_Q(self, i, func):
return func(i)
# 二分类线性 SVM,该类被多分类 LinearSVC 继承,所以不需要使用它
class BiLinearSVC(BaseEstimator):
r'''二分类线性SVM,该类被多分类LinearSVC继承,所以不需要使用它。
通过求解对偶问题
.. math:: \min_{\pmb\alpha}\quad&\dfrac12\pmb\alpha^\top Q\pmb\alpha-\pmb{e}^\top\pmb{\alpha}\\
\text{s.t.}\quad& \pmb{y}^\top\pmb\alpha=0,\\
&0\leqslant\alpha_i\leqslant C,i=1,\cdots ,l
得到决策边界
.. math:: f(\pmb x)=\sum_{i=1}^ly_i\alpha_i\pmb x_i^T\pmb x-\rho
Parameters
----------
C : float, default=1
SVM的正则化参数,默认为1;
max_iter : int, default=1000
SMO算法迭代次数,默认1000;
tol : float, default=1e-5
SMO算法的容忍度参数,默认1e-5;
cache_size : int, default=256
lru缓存大小,默认256,如果为0则不使用缓存,计算Q矩阵然后求解.
'''
def __init__(self,
C: float = 1.,
max_iter: int = 1000,
tol: float = 1e-5,
cache_size: int = 256) -> None:
super().__init__()
self.C = C
self.max_iter = max_iter
self.tol = tol
self.cache_size = cache_size
def fit(self, X: np.ndarray, y: np.ndarray):
'''训练模型
Parameters
----------
X : np.ndarray
训练集特征;
y : np.array
训练集标签,建议0为负标签,1为正标签.
'''
X, y = np.array(X), np.array(y, dtype=float)
y[y != 1] = -1
l, self.n_features = X.shape
p = -np.ones(l)
w = np.zeros(self.n_features)
if self.cache_size == 0:
Q = y.reshape(-1, 1) * y * np.matmul(X, X.T)
solver = Solver(Q, p, y, self.C, self.tol)
else:
solver = SolverWithCache(p, y, self.C, self.tol, self.cache_size)
def func(i):
return y * np.matmul(X, X[i]) * y[i]
for n_iter in range(self.max_iter):
i, j = solver.working_set_select()
if i < 0:
break
delta_i, delta_j = solver.update(i, j, func)
w += delta_i * y[i] * X[i] + delta_j * y[j] * X[j]
else:
print("LinearSVC not coverage with {} iterations".format(
self.max_iter))
self.coef_ = (w, solver.calculate_rho())
return self
def decision_function(self, X: np.ndarray) -> np.ndarray:
'''决策函数,输出预测值'''
return np.matmul(self.coef_[0], np.array(X).T) - self.coef_[-1]
def predict(self, X: np.ndarray) -> np.ndarray:
'''预测函数,输出预测标签(0-1)'''
return (self.decision_function(np.array(X)) >= 0).astype(int)
def score(self, X: np.ndarray, y: np.ndarray) -> float:
'''评估函数,给定特征和标签,输出正确率'''
return accuracy_score(y, self.predict(X))
# 多分类线性 SVM,使用 sklearn 的 multiclass 模块实现了多分类
class LinearSVC(BiLinearSVC):
r'''
Parameters
----------
C : float, default=1
SVM的正则化参数,默认为1;
max_iter : int, default=1000
SMO算法迭代次数,默认1000;
tol : float, default=1e-5
SMO算法的容忍度参数,默认1e-5;
cache_size : int, default=256
lru缓存大小,默认256,如果为0则不使用缓存,计算Q矩阵然后求解;
multiclass : {"ovr", "ovo"}, default="ovr"
多分类策略,ovr(一对多)或ovo(一对一),默认ovr;
n_jobs : int, default=None
是否采用多核,使用多少CPU并行,默认不采用。
'''
def __init__(self,
C: float = 1.,
max_iter: int = 2000,
tol: float = 1e-5,
cache_size: int = 256,
multiclass: str = "ovr",
n_jobs=None) -> None:
super().__init__(C, max_iter, tol, cache_size)
self.multiclass = multiclass
self.n_jobs = n_jobs
params = {
"estimator": BiLinearSVC(C, max_iter, tol, cache_size),
"n_jobs": n_jobs,
}
self.multiclass_model: OneVsOneClassifier = {
"ovo": OneVsOneClassifier(**params),
"ovr": OneVsRestClassifier(**params),
}[multiclass]
# 训练模型
def fit(self, X: np.ndarray, y: np.ndarray):
'''训练模型
Parameters
----------
X : np.ndarray
训练集特征;
y : np.array
训练集标签,建议0为负标签,1为正标签.
Return
------
self : LinearSVC
'''
self.multiclass_model.fit(X, y)
return self
# 决策函数,输出预测值
def decision_function(self, X: np.ndarray):
return self.multiclass_model.decision_function(X)
# 预测函数,输出预测标签
def predict(self, X: np.ndarray):
return self.multiclass_model.predict(X)
# 评估函数,给定特征和标签,输出正确率
def score(self, X: np.ndarray, y: np.ndarray):
return self.multiclass_model.score(X, y)
# main.py
import joblib
import argparse
from dataset import DatasSet
from hog import HogDescriptor
from svm import LinearSVC
# *_feat 为 HOG 特征,*_labels 为标签,save_name 为模型保存名称,model 指明加载的模型位置
def classification(train_feat, train_labels, test_feat, test_labels, save_name='default', model=None):
# 判断是否加载已存在模型
if model is not None:
print(f'Load model {model}')
clf = joblib.load(model) # 加载模型
else:
print(f'Training the SVM...')
clf = LinearSVC()
clf.fit(train_feat, train_labels) # 训练模型
if save_name is not None:
print(f'Save model {save_name} at folder model')
joblib.dump(clf, f'model/{save_name}.m') # 保存模型
print(f'Classifying the images...')
# 预测分类并计算其精度
pred_y, sum = clf.predict(test_feat), 0.0
for i in range(test_labels.shape[0]):
if pred_y[i] == test_labels[i]:
sum += 1
print("Accuracy rate:", sum/test_labels.shape[0])
def main(args):
# 获取数据集类型、模型位置并实例化数据集对象和 HOG 对象
kind, model = args.kind, None if args.no_model else f'model/{args.kind}.m'
dataset, hog = DatasSet(), HogDescriptor(pixels_per_cell=args.cells, bin_size=args.bins)
# 根据类型来读取数据集
if kind == 'mnist':
# data_root 为数据集根目录,dataset 为实例化的用来读取数据的数据集对象
data_root = 'E:\\code\\pytorch\\test\\data\\MNIST\\raw'
# train_x 为训练图像,train_y 为训练标签
train_x, train_y = dataset.load_mnist(kind='train', path=data_root, reload=False)
test_x, test_y = dataset.load_mnist('t10k', path=data_root)
elif kind == 'cifar10':
data_root = 'E:\\code\\pytorch\\test\\data\\cifar-10-batches-py'
train_x, train_y = dataset.load_cifar10('train', path=data_root)
test_x, test_y = dataset.load_cifar10('test', path=data_root)
train_feat = hog.get_feat(data=train_x, kind=f'{kind}.train', reload=args.reload)
test_feat = hog.get_feat(data=test_x, kind=f'{kind}.test', reload=args.reload)
train_y, test_y = train_y.flatten(), test_y.flatten() # 将标签打平
# 处理分类问题并计算精度
classification(train_feat=train_feat, train_labels=train_y,\
test_feat=test_feat, test_labels=test_y, \
save_name=f'{kind}', model=model)
if __name__ == '__main__':
# 相关参数
parse = argparse.ArgumentParser()
parse.add_argument('--kind', type=str, default='mnist', help='Kind of the dataset, it can be mnist and cifar10')
parse.add_argument('--cells', type=int, default=4, help='Pixels per cell')
parse.add_argument('--bins', type=int, default=9, help='Number of the bins')
parse.add_argument('--reload', action='store_true', help='Reload the feat')
parse.add_argument('--no_model', action='store_true', help='Do not Use the pretrained SVM model')
args = parse.parse_args()
main(args)