• 逻辑回归
  • 1.初始化init(n_iter=100,eta=0.0001,gd=’bgd’)
  • 2.对数函数sigmoid(fx)
  • 3.训练fit(TrainData)
  • 4.批量梯度下降
  • 5.随机梯度下降
  • 6.计算损失值costFunction()
  • 7.预测单个样本类别predict(testData)
  • 8.求准确率accuracy(TestData)
  • 9.绘制决策边界plotDecisionBoundary(TrainData)
  • 绘制损失函数等值线和三维图
  • 1.根据迭代得到的权重集合计算相应损失值calCost(data,warr)
  • 2.绘制等值线图plotContour()
  • 3.绘制三维图plot3D()


逻辑回归

写了一周逻辑回归,陆续加了SGD、L2、L1正则化、针对于两个特征绘制决策边界、绘制损失函数等值线一些功能

1.初始化init(n_iter=100,eta=0.0001,gd=’bgd’)

初始化逻辑回归类,默认迭代次数为100,学习率默认为0.0001,训练默认为批量梯度下降,默认不绘制决策边界,默认用L2正则化(L1正则化只写了损失函数,训练没有写)。
训练可以选择随机梯度下降,可以选择打印决策边界(对于多特征数据,只显示前两个特征构成的平面)。

#初始化
def __init__(self,n_iter = 100,eta = 0.0001,gd = 'bgd',plot = False,regular = 'none',lamda = 1,showcost = False):
    self.n_iter = n_iter #迭代次数
    self.eta = eta #学习率
    self.gd = gd #学习方式
    self.plot = plot #绘制决策边界变化
    self.regular = regular #正则化
    self.lamda = lamda #正则化惩罚系数
    self.showcost = showcost #显示迭代次数和cost值

2.对数函数sigmoid(fx)

通过对数几率函数将预测值范围限定在0到1之间,并且该函数单调可微

#对数几率函数    
def sigmoid(self,fx):
    return 1/(1+np.exp(-1 * fx))

3.训练fit(TrainData)

损失函数为:L(w)=−1m∑i=1m(yiln[h(xi)]+(1−yi)ln[1−h(xi)]) L ( w ) = − 1 m ∑ i = 1 m ( y i l n [ h ( x i ) ] + ( 1 − y i ) l n [ 1 − h ( x i ) ] )
求偏导得:∂L(w)∂w=−1m∑i=1m[yi−h(xi)]xi ∂ L ( w ) ∂ w = − 1 m ∑ i = 1 m [ y i − h ( x i ) ] x i
更新权重:w=w−α1m∑i=1m[h(xi)−yi]x w = w − α 1 m ∑ i = 1 m [ h ( x i ) − y i ] x
(训练方式可选择批量梯度下降或是随机梯度下降,可以通过初始化类时的plot参数选择是否显示决策边界变化情况)

#训练
def fit(self,TrainData):

    self.datanum = len(TrainData)#获取训练集大小
    self.featnum = len(TrainData.columns)-1#获取特征数量        
    self.label = np.array(TrainData.label)#获取真实标签集合
    self.data = np.array(TrainData[TrainData.columns.tolist()[0:self.featnum]])#datanum×featnum
    self.data_T = self.data.transpose()#数据大小为featnum×datanum
    self.weights = np.zeros((1,self.featnum))#初始化权重,权重大小为1×featnum                
    self.warr = []
    self.carr = []
    #批量梯度下降
    if self.gd == 'bgd':
        self.BatchGradientDescent()        
    #随机梯度下降
    if self.gd == 'sgd':
        self.StochasitcGradientDescent()

加上L2正则化后E(w)=L(w)+λ∑j=1nw2j E ( w ) = L ( w ) + λ ∑ j = 1 n w j 2
求导∂L(w)∂w=−1m∑i=1m[yi−h(xi)]xi+2λ∑j=1nwj ∂ L ( w ) ∂ w = − 1 m ∑ i = 1 m [ y i − h ( x i ) ] x i + 2 λ ∑ j = 1 n w j
加上L1正则化后E(w)=L(w)+λ∑j=1n|wj| E ( w ) = L ( w ) + λ ∑ j = 1 n | w j |
求导∂L(w)∂w=−1m∑i=1m[yi−h(xi)]xi+∑j=1nkjλ ∂ L ( w ) ∂ w = − 1 m ∑ i = 1 m [ y i − h ( x i ) ] x i + ∑ j = 1 n k j λ   (wj>0,kj=1;wj<0,kj=−1;wj=0,kj=0) ( w j > 0 , k j = 1 ; w j < 0 , k j = − 1 ; w j = 0 , k j = 0 )

4.批量梯度下降

def BatchGradientDescent(self):
    for n in range(self.n_iter):
        #sigmoid([1×featnum] × [featnum×datanum]) --> [1×datanum]
        #预测值,即h(x)=1/(1+e^(-wx))
        hx = self.sigmoid(np.dot(self.weights,self.data_T)) 

        #[1×datanum] - [1×datanum] --> [1×datanum]
        #h(x)-y,误差值
        loss = hx - self.label 

        if self.regular == 'l2':
            #L2正则化项求导,更新每个权重时都要加上,shape为[1×featnum]
            penalty = np.ones((1,self.featnum))*2*self.lamda*self.weights.sum()

        elif self.regular == 'l1':
            penalty = []
            for w in self.weights[0]:
                if w != 0:
                    p = w/np.abs(w)
                elif w==0:
                    p = 0
                penalty.append(p)
        elif self.regular == 'none':
            penalty = np.zeros((1,self.featnum))


        #([1×datanum] × [datanum×featnum] + [1×featnum])/datanum --> [1×featnum]
        #([h(x)-y]x + R(w))/m
        gradient = (np.dot(loss,self.data) + penalty) / self.datanum 

        #[1×featnum] - eta*[1×featnum] --> [1×featnum]
        self.weights = self.weights - self.eta * gradient
        self.warr.append(self.weights[0].tolist())
        #显示迭代次数和损失值
        if (n % 100 == 0) & self.showcost:
            self.cost = self.costFunction()
            print("迭代",n,"次","损失值为:",self.cost)

5.随机梯度下降

def StochasitcGradientDescent(self):
    for n in range(self.n_iter):
        #生成一个随机数x
        x = random.randint(0,self.datanum-1)  
        datax = self.data[x]  #[1×featnum]
        datax_T = datax.transpose()  #[featnum×1]

        #sigmoid([1×featnum] × [featnum×1]) --> [1×1]
        hxx = self.sigmoid(np.dot(self.weights,datax_T))

        #[1×1] - [1×1] --> [1×1]
        lossx = hxx-self.label[x]

        if self.regular == 'l2':
            #L2正则化项求导,更新每个权重时都要加上,shape为[1×featnum]
            penalty = np.ones((1,self.featnum))*2*self.lamda*self.weights.sum() 

        elif self.regular == 'l1':
            penalty = []
            for w in self.weights[0]:
                if w != 0:
                    p = w/np.abs(w)
                elif w==0:
                    p = 0
                penalty.append(p)

        elif self.regular == 'none':
            penalty = np.zeros((1,self.featnum))

        #([1×1] × [1×featnum] + [1×featnum]) --> [1×featnum]
        gradientx = (lossx * datax) + penalty

        #[1×featnum] - eta*[1×featnum] --> [1×featnum]
        self.weights = self.weights - self.eta * gradientx
        self.warr.append(self.weights[0].tolist())
        #当损失值小于0.1时停止迭代
        if self.costFunction() < 0.1:
            break
        #显示迭代次数和损失值
        if (n % 100 == 0) & self.showcost:
            self.cost = self.costFunction()
            print("迭代",n,"次","损失值为:",self.cost)

6.计算损失值costFunction()

#计算损失值
def costFunction(self):
    #sigmoid([1×featnum] × [featnum×datanum]) --> [1×datanum]
    h = self.sigmoid(np.dot(self.weights,self.data_T))

    #计算损失函数:J(w)=L(w)+lamda*R(w)
    #J(w) = -1/datanum * [y*ln(h(x)) + (1-y)*ln(1-h(x))]

    #R(w) = ||w||1 权重绝对值之和
    if self.regular == 'l1':
        #lamda*1 --> [1×1]
        C = self.lamda * np.abs(self.weights).sum()#L1正则化项

        #([1×datanum]×[datanum×1]) + ([1×datanum]×[datanum×1]) --> [1×1]
        cost = (-1/self.datanum) * (np.dot(np.log(h),self.label)+(np.dot(np.log(1-h),1-self.label)) + C)

    #R(w) = ||w||2 权重平方之和   
    elif self.regular == 'l2':
        #lamda * ([1×featnum] × [featnum×1]) --> [1×1]
        C = self.lamda * np.dot(self.weights,self.weights.transpose())#L2正则化项
        #([1×datanum]×[datanum×1]) + ([1×datanum]×[datanum×1]) --> [1×1]
        cost = (-1/self.datanum) * (np.dot(np.log(h),self.label)+(np.dot(np.log(1-h),1-self.label)) + C)            


    elif self.regular == 'none':
        cost = (-1/self.datanum) * (np.dot(np.log(h),self.label)+(np.dot(np.log(1-h),1-self.label)))

    return cost

7.预测单个样本类别predict(testData)

当w⋅x>0 w · x > 0 时为1,当w⋅x<0 w · x < 0 时为0

#预测单一样本
def predict(self,testData):
    flag = np.dot(self.weights,testData)
    if flag > 0:
        pred = 1
    else:
        pred = 0
    return pred

8.求准确率accuracy(TestData)

#获取数据集准确率
def accuracy(self,TestData):
    num = 0
    for i in range(len(TestData)):
        temp = np.array(TestData.iloc[i][0:len(TestData.columns)-1]).reshape(len(TestData.columns)-1,1)
        if self.predict(temp)==TestData.label.tolist()[i]:
            num = num + 1
    return num/len(TestData)

9.绘制决策边界plotDecisionBoundary(TrainData)

def plotDecisionBoundary(self,TrainData):
    fig=plt.figure(figsize=(10,8))
    plt.xlim(-4,4)  #  设置x轴刻度范围
    plt.ylim(-4,4)  #  设置y轴刻度范围
    plt.xlabel('x1')   
    plt.ylabel('x2')
    plt.title('decision boundary') 
    x1 = np.arange(-4,4,1)
    x2 =-1 * model_lr.weights[0][0] / model_lr.weights[0][1] * x1
    plt.scatter(TrainData[TrainData.columns[0]], TrainData[TrainData.columns[1]], c=TrainData['label'], s=30)
    plt.plot(x1,x2)
    plt.show()

绘制损失函数等值线和三维图

对于只有两个特征的数据,我们可以可视化其权重与损失值的等值线图和三维图

1.根据迭代得到的权重集合计算相应损失值calCost(data,warr)

def calCost(self,data,warr):
    datanum = len(data)
    featnum = len(data.columns)-1
    label = np.array(data.label)#获取真实标签集合
    data = np.array(data[data.columns.tolist()[0:featnum]])
    self.w1 = []
    self.w2 = []
    for i in range(len(warr)):
        self.w1.append(warr[i][0])
        self.w2.append(warr[i][1])
    #为了防止迭代次数过多导致权重矩阵过多影响运算速度,只取前1000个权重
    if(len(warr) > 1000):
        self.w1 = self.w1[0:1000]
        self.w2 = self.w2[0:1000]

    self.xd = np.array(self.w1).min()#获取x下限
    self.xu = np.array(self.w1).max()#获取x上限
    self.yd = np.array(self.w2).min()#获取y下限
    self.yu = np.array(self.w2).max()#获取y上限
    #将w1,w2转为网格数据
    self.w1,self.w2 = np.meshgrid(np.array(self.w1),np.array(self.w2))
    self.cost = 0
    for i in range(datanum):
        fx = self.w1 * data[i][0]+ self.w2 * data[i][1]
        hx = 1/(1+np.exp(-1 * fx))
        self.cost = self.cost + (label[i]*np.log(hx)+(1-label[i])*np.log(1-hx))/datanum
        if i % 100 == 0:
            print("迭代",i,"次","一共",datanum,"次","  请等待")
    self.regular_cost = np.abs(self.w1)+np.abs(self.w2)

2.绘制等值线图plotContour()

def plotContour(self):
    #self.calCost(data,warr)
    plt.figure(figsize=(8,8))
    plt.xlim(self.xd,self.xu)
    plt.ylim(self.yd,self.yu)
    #绘制原始损失函数L(w)
    P=plt.contour(self.w1,self.w2,self.cost,10)
    plt.clabel(P, alpha=0.75, cmap='jet',inline=1, fontsize=10)
    #绘制正则化损失函数R(w)
    C=plt.contour(self.w1,self.w2,self.regular_cost,1)
    plt.clabel(C, alpha=0.75, cmap='jet',inline=1, fontsize=10)
    plt.show()

3.绘制三维图plot3D()

def plot3D(self):
    #self.calCost(data,warr)
    fig = plt.figure()
    ax =fig.add_subplot(111,projection='3d')
    ax.plot_surface(self.w1,self.w2,self.cost,rstride=3,cstride=3,cmap=cm.jet)
    plt.show()

发现使用SGD+L1的方式训练,损失值一直不收敛,导致准确率震荡很大,不知道是不是写的有问题,还有就是这个等值线图,虽然画出来了,但看上去很奇怪,后续可以在别的模型上再使用看一下