多类逻辑回归

在谈多类逻辑回归之前,我们先要认识逻辑回归。逻辑回归(Logistic Regression)是机器学习中的一种分类模型,虽然它的名字有个回归,其实它是做分类的。说简单点,就是在线性回归的输出加入了sigmoid 函数,使得结果输出变成了二分类。而多类逻辑回归就是在输出加入了softmax函数,类别数由自己模型定义。

如下图,黄色的节点依旧为输出特征,绿色的节点为输出的类别,多类逻辑回归就是在绿点的输出基础上加了一个softmax函数进行概率归一化:

R survey包多分类逻辑回归 多类别逻辑回归_Test

从0开始学习实现多类逻辑回归

代码:

#!/usr/bin/env python
# -*- coding:utf-8 -*-
#Author: yuquanle
#2017/10/14
#沐神教程实战之多分类逻辑回归
#本例子使用一个类似MNIST的数据集做分类,MNIST是分类数字,这个数据集分类服饰

from mxnet import gluon
from mxnet import ndarray as nd



def transform(data, label):
    return data.astype('float32')/255, label.astype('float32')

mnist_train = gluon.data.vision.FashionMNIST(train=True, transform=transform)
mnist_test = gluon.data.vision.FashionMNIST(train=False, transform=transform)


# 标签对应的服饰名字
def get_text_labels(label):
    text_labels = [
        't-shirt', 'trouser', 'pullover', 'dress,', 'coat',
        'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot'
    ]
    return [text_labels[int(i)] for i in label]


# 数据读取
batch_size = 256
# gluon.data的DataLoader 函数,它每次 yield ⼀个批量
train_data = gluon.data.DataLoader(mnist_train, batch_size, shuffle=True)
test_data = gluon.data.DataLoader(mnist_test, batch_size, shuffle=False)

#初始化参数
num_inputs = 784
num_outputs = 10

W = nd.random_normal(shape=(num_inputs, num_outputs))
b = nd.random_normal(shape=num_outputs)

params = [W, b]

for param in params:
    param.attach_grad()

# 定义模型
# 多分类中,输出为每个类别的概率,这些概率和为1,通过softmax函数实现
from mxnet import  nd
def softmax(X):
    exp = nd.exp(X)
    partition = exp.sum(axis=1, keepdims=True)
    return exp / partition

def net(X):
    return softmax(nd.dot(X.reshape((-1, num_inputs)), W) + b)

# 交叉熵损失函数
# 我们需要定义⼀个针对预测为概率值的损失函数。其中最常⻅的是交叉熵损失函数,它将两个概率
# 分布的负交叉熵作为⽬标值,最小化这个值等价于最⼤化这两个概率的相似度。

def corss_entropy(yhat, y):
    return - nd.pick(nd.log(yhat), y)

# 计算精度
# 给定⼀个概率输出,我们将预测概率最⾼的那个类作为预测的类,然后通过⽐较真实标号得到是否预测正确
def accuracy(output, label):
    return nd.mean(output.argmax(axis=1)==label).asscalar()

def evaluate_accuracy(data_iterator, net):
    acc = 0
    for data, label in data_iterator:
        output = net(data)
        # acc_tmp = accuracy(output, label)
        acc = acc + accuracy(output, label)
    return acc/len(data_iterator)

# print(evaluate_accuracy(test_data, net))
#
# import sys
# sys.path.append('..')
from utils import SGD
from mxnet import autograd

learning_rate = 0.1
epochs = 5
for epoch in range(epochs):
    train_loss = 0
    train_acc = 0
    for data, label in train_data:
        with autograd.record():
            output = net(data)
            loss = corss_entropy(output, label)
        loss.backward()
        # 将梯度做平均,这样学习率会对 batch size 不那么敏感
        SGD(params, learning_rate / batch_size)

        train_loss = train_loss + nd.mean(loss).asscalar()
        train_acc += accuracy(output, label)

    # 模型训练完之后进行测试
    test_acc = evaluate_accuracy(test_data, net)
    print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" % (
        epoch, train_loss / len(train_data), train_acc / len(train_data), test_acc))


# 对新的样本进行标签预测
# 训练完之后,W,b参数已经固定,输入data,得到label就是预测过程
data, label = mnist_test[0:9]
print('true labels')
print(get_text_labels(label))
predicted_labels = net(data).argmax(axis=1)
print('predicted labels')
print(get_text_labels(predicted_labels.asnumpy()))

#结果
Epoch 0. Loss: 3.614154, Train acc 0.441933, Test acc 0.596094
Epoch 1. Loss: 1.931394, Train acc 0.625044, Test acc 0.651074
Epoch 2. Loss: 1.598601, Train acc 0.673343, Test acc 0.694531
Epoch 3. Loss: 1.420518, Train acc 0.701335, Test acc 0.711719
Epoch 4. Loss: 1.308131, Train acc 0.718661, Test acc 0.726855
true labels
['t-shirt', 'trouser', 'pullover', 'pullover', 'dress,', 'pullover', 'bag', 'shirt', 'sandal']
predicted labels
['shirt', 'trouser', 'pullover', 't-shirt', 'dress,', 'shirt', 'bag', 'coat', 'sandal']

Process finished with exit code 0

多类逻辑回归—使用 Gluon

代码:

#!/usr/bin/env python
# -*- coding:utf-8 -*-
#Author: yuquanle
#2017/10/15
#沐神教程实战之多分类逻辑回归
#本例子使用一个类似MNIST的数据集做分类,MNIST是分类数字,这个数据集分类服饰

# 标签对应的服饰名字
def get_text_labels(label):
    text_labels = [
        't-shirt', 'trouser', 'pullover', 'dress,', 'coat',
        'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot'
    ]
    return [text_labels[int(i)] for i in label]

# 使用mxnet高层抽象包gluon实现
from mxnet import gluon
from mxnet import ndarray as nd


batch_size = 256

def transform(data, label):
    return data.astype('float32')/255, label.astype('float32')

mnist_train = gluon.data.vision.FashionMNIST(train=True, transform=transform)
mnist_test = gluon.data.vision.FashionMNIST(train=False, transform=transform)

train_data = gluon.data.DataLoader(mnist_train, batch_size, shuffle=True)
test_data = gluon.data.DataLoader(mnist_test, batch_size, shuffle=False)

# 定义和初始化模型
# 不需要制定每层输⼊的⼤小, gluon 会做⾃动推导
net = gluon.nn.Sequential()
with net.name_scope():
    # 使⽤ Flatten 层将输⼊数据转成 batch_size x ? 的矩阵
    net.add(gluon.nn.Flatten())
    # 10个输出节点
    net.add(gluon.nn.Dense(10))
net.initialize()

# Softmax 和交叉熵损失函数
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

# 优化
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1})

# 训练
from mxnet import ndarray as nd
from mxnet import autograd
import utils

for epoch in range(5):
    train_loss = 0.
    train_acc = 0.
    for data, label in train_data:
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(batch_size)

        train_loss += nd.mean(loss).asscalar()
        train_acc += utils.accuracy(output, label)

    # 训练完模型之后,用测试集测试
    test_acc = utils.evaluate_accuracy(test_data, net)
    print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" % (
        epoch, train_loss / len(train_data), train_acc / len(train_data), test_acc))

结果:
Epoch 0. Loss: 0.791282, Train acc 0.745268, Test acc 0.802637
Epoch 1. Loss: 0.575680, Train acc 0.808965, Test acc 0.820605
Epoch 2. Loss: 0.530466, Train acc 0.823908, Test acc 0.830273
Epoch 3. Loss: 0.505710, Train acc 0.830430, Test acc 0.836816
Epoch 4. Loss: 0.490304, Train acc 0.834707, Test acc 0.836816
true labels
['t-shirt', 'trouser', 'pullover', 'pullover', 'dress,', 'pullover', 'bag', 'shirt', 'sandal']
predicted labels
['t-shirt', 'trouser', 'pullover', 'shirt', 'coat', 'shirt', 'bag', 'shirt', 'sandal']

Process finished with exit code 0

实验结果发现,迭代次数为5时,有少数类别分类出错。