用梯度上升算法进行Logistic回归

$w=w+\nabla{f(w)}$

对应代码如下

import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import make_classification


data_1, labels = make_classification(n_samples=400, n_features=2, n_informative=2,n_redundant=0,n_repeated=0, n_classes=2, random_state=42)
data_0 = np.ones((400, 1))
data = np.hstack((data_0, data_1))


def sigmoid(inX):
    sig = 1 / (1 + np.exp(-inX))
    return sig


def grad_ascent():
    datamat = np.mat(data)
    labelsmat = np.mat(labels).transpose()
    m, n = np.shape(datamat)
    weights = np.ones((n, 1))  # 初始化weight
    alpha = 0.001
    iters = 100
    for i in range(iters):
        h = sigmoid(datamat * weights)
        error = labelsmat - h
        weights = weights + alpha * datamat.transpose() * error

    return weights


def plotBestFit(weights):  # 加载数据集
    dataArr = np.array(data)  # 转换成numpy的array数组
    n = np.shape(dataArr)[0]  # 数据个数
    xcord1 = []
    ycord1 = []  # 正样本
    xcord2 = []
    ycord2 = []  # 负样本
    for i in range(n):  # 根据数据集标签进行分类
        if int(labels[i]) == 1:
            xcord1.append(dataArr[i, 1])
            ycord1.append(dataArr[i, 2])  # 1为正样本
        else:
            xcord2.append(dataArr[i, 1])
            ycord2.append(dataArr[i, 2])  # 0为负样本
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')  # 绘制正样本
    ax.scatter(xcord2, ycord2, s=30, c='green')  # 绘制负样本
    x = np.arange(-2.0, 2.0, 0.01)  # x区间
    y = (-weights[0] - weights[1] * x) / weights[2]  # 最佳拟合直线
    plt.axis([-4, 4, -4, 4])
    ax.plot(x, np.ravel(y))
    plt.title('BestFit')  # 标题
    plt.xlabel('X1')
    plt.ylabel('X2')  # x,y轴的标签
    plt.show()


if __name__ == '__main__':
    plotBestFit(grad_ascent())