概念笔记:




MATLAB逻辑回归预测模型_MATLAB逻辑回归预测模型


MATLAB逻辑回归预测模型_scala_02


MATLAB逻辑回归预测模型_scala_03


MATLAB逻辑回归预测模型_ci_04


MATLAB逻辑回归预测模型_scala_05


多分类逻辑回归代码实现:


"""
多类分类
使用逻辑回归来识别手写数字0-9
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.io import loadmat  # 用于读取matlab数据文件

# 准备数据
data = loadmat('ex3data1.mat')


# print(data)
# print(data['X'].shape, data['y'].shape)  # (5000, 400) (5000, 1)


# 第一个任务:将逻辑回归实现完全向量化

# sigmoid函数
def sigmoid(z):
    return 1 / (1 + np.exp(-z))


# 代价函数
def cost(theta, X, y, learningRate):
    """
    :param theta: 参数, shape [1 + feature_num, ]
    :param X: 输入特征, shape [sample_num, 1 + feature_num]
    :param y: 输出类别, shape [sample_num, ]
    :param learningRate: 正则化参数, scalar
    :return: whole_cost: 损失值, scalar
    """
    # 将theta、X、y转为numpy矩阵
    theta = np.array(theta)  # shape [1 + feature_num, ]
    X = np.array(X)  # shape [sample_num, 1 + feature_num]
    y = np.array(y)  # shape [sample_num, ]

    # 根据公式计算损失函数(不含正则化)
    predict = sigmoid(X @ theta.T)  # shape [sample_num, ]
    cross_cost = -y * np.log(predict) - (1 - y) * np.log(1 - predict)
    # cross_cost: shape [sample_num, ]

    # 根据公式计算损失函数中的正则化部分
    reg = (learningRate / (2 * len(X))) * np.sum(np.power(theta[1:], 2))  # scalar

    # 将上述结果相加
    whole_cost = np.sum(cross_cost) / len(X) + reg  # scalar

    return whole_cost


# theta = np.zeros([1 + 400, ])
# X = np.insert(data['X'], 0, values=np.ones([5000, ]), axis=1)
# y_0 = np.array([1 if label == 1 else 0 for label in data['y'].ravel()]).reshape([len(X), ])
# print(cost(theta, X, y_0, 1))


# 向量化的梯度下降法
def gradient(theta, X, y, learningRate):
    """
    :param theta: 参数, shape [1 + feature_num, ]
    :param X: 输入特征, shape [sample_num, 1 + feature_num]
    :param y: 输出类别, shape [sample_num, ]
    :param learningRate: 学习率, scalar
    :return: dtheta: 梯度,shape [1 + feature_num, ]
    """
    # 将theta、X、y转为numpy矩阵
    theta = np.array(theta)  # shape [1 + feature_num, ]
    X = np.array(X)  # shape [sample_num, 1 + feature_num]
    y = np.array(y)  # shape [sample_num, ]

    # 获得要更新的参数量
    parameters = int(theta.ravel().shape[0])  # ravel降维打击,将所有元素降到一维

    # 计算预测的误差
    predict = sigmoid(np.dot(X, theta.T))  # shape [sample_num, ]
    error = predict - y  # shape [sample_num, ]

    # 根据error计算梯度
    dtheta = np.dot(error, X).reshape(theta.shape) / len(X) + (learningRate / len(X)) * theta
    # dtheta: shape [1 + feature_num, ]

    # 由于b不需要正则化,需要重置b的梯度
    dtheta[0] -= (learningRate / len(X)) * theta[0]

    return dtheta  # shape [1 + feature_num, ]


# 基于上面定义的代价函数和梯度函数,接下来构建模型训练的函数。
# 该任务有10个可能的类,并且逻辑回归一次只能在2类间进行分类,
# 采用的解决办法是训练10个分类器,每个分类器在“是类别i”和“不是类别i”之间进行分类,i=1...10
# 把10个分类器的训练过程集成在一个函数中,该函数计算10个分类器最终权重,返回为10*(1+feature_num)的numpy数组
from scipy.optimize import minimize


def one_vs_all(X, y, num_labels, learning_rate):
    """
    :param X: 输入特征, shape [sample_num, feature_num]
    :param y: 输出类别, shape [sample_num, ]
    :param num_labels: 标签数, scalar
    :param learning_rate: 学习率, scalar
    :return: all_theta, shape [num_labels, 1 + feature_num]
    """
    rows = X.shape[0]  # sample_num
    params = X.shape[1]  # feature_num

    # 初始化权重参数为0
    all_theta = np.zeros([num_labels, 1 + params])

    # 在X的最左边插入一列1,表示x0
    X = np.insert(X, 0, values=np.ones([rows, ]), axis=1)  # shape [sample_num, 1 + feature_num]

    # 训练阶段,训练10个分类器
    for i in range(1, num_labels + 1):
        theta = np.zeros([1 + params, ])  # shape [1 + feature_num, ]
        y_i = np.array([1 if label[0] == i else 0 for label in y]).reshape([rows, ])  # shape [sample_num, ]

        # 最小化目标函数(代价函数)
        # X:shape [sample_num, 1 + feature_num], y_i:shape [sample_num, ]
        fmin = minimize(fun=cost, x0=theta, args=(X, y_i, learning_rate), method='TNC', jac=gradient)
        # scipy.minimize会自动把theta转为[1+feature_num,]的shape,所以gradient的输出也需要是[1+feature_num,]
        all_theta[i - 1, :] = fmin.x

    return all_theta  # shape [num_labels, 1 + feature_num]


all_theta = one_vs_all(X=data['X'], y=data['y'], num_labels=10, learning_rate=1)


# print(all_theta)


# 使用训练完毕的分类器预测每个图像的标签。需要计算每个类的类概率,输出对应样本的最高概率的类标签
def predict_all(X, all_theta):
    """
    :param X: 输入, shape: [sample_num, feature_num]
    :param all_theta: 10个分类器的参数矩阵, shape [10, 1 + feature_num]
    :return: h_argmax: 所有样本预测结果, shape [sample_num, 1]
    """
    # 获取输入矩阵的维度信息
    rows = X.shape[0]  # sample_num
    params = X.shape[1]  # feature_num
    num_labels = all_theta.shape[0]  # num_labels

    # 把矩阵X加入一列0元素
    # X = np.insert(X, [0], values=np.ones([rows, 1]), axis=1)
    X = np.insert(X, 0, values=np.ones([rows, ]), axis=1)  # shape [sample_num, 1 + feature_num]

    # 把矩阵X和all_theta转换为numpy矩阵
    X = np.array(X).reshape([rows, 1 + params])  # shape [sample_num, 1 + feature_num]
    all_theta = np.array(all_theta).reshape([num_labels, 1 + params])  # shape [num_labels, 1 + feature_num]

    # 计算样本属于每一类的概率
    h = sigmoid(np.dot(X, all_theta.T))  # shape [sample_num, num_labels]

    # 找到每个样本中预测概率最大的值对应的索引
    h_argmax = np.argmax(h, axis=1)  # shape [sample_num, 1]

    # 索引+1才是对应的预测标签
    h_argmax = h_argmax + 1  # shape [sample_num, 1]

    return h_argmax  # shape [sample_num, 1]


y_pred = predict_all(data['X'], all_theta)  # shape [sample_num, ]
correct = [1 if a == b else 0 for (a, b) in zip(y_pred, data['y'])]  # shape [sample_num, ]
accuracy = (sum(map(int, correct))) / float(len(correct))
print('accuracy = {}%'.format(accuracy * 100))