## 对数几率logistics回归-数据集+代码实现

``````import numpy as np
import h5py
'''
数据导入函数
:param file_name: (string)训练数据位置
:return: feature_data(mat)特征
lable_data(mat)标签
'''
fr = open(file_name)
feature_data =[];
lable_data = [];
curLine = []
lineArr = line.split('\t')
for i in range(0,2):
curLine.append(float(lineArr[i]))
feature_data.append(curLine)
if len(lineArr)<3:
continue;
tempLine = []
for i in range(2,3):
tempLine.append(int(lineArr[i]))
lable_data.append(tempLine)
feature_mat = np.array(feature_data,dtype=float)
label_mat = np.array(lable_data,dtype=int)
fr.close()
return feature_mat,label_mat

def propagate(w,b,X,Y):
'''
:param w: parameter
:param b: para
:param X: input
:param Y: label
'''
m = Y.shape[0]
A = sig(np.dot(X,w)+b)
cost = error_rate(A,Y)
dW = 1.0/m *np.dot(X.T,(A-Y))
db = 1.0/m *np.sum(A-Y)
assert (dW.shape == w.shape)
assert (db.dtype == float)
assert (cost.shape == ())
"dw":dW,
"db":db
}

def error_rate(h, lable_data):
'''
计算当前损失函数值
:param h: (mat)预测值
:param lable_data:(mat)真实值
:return: error(float)错误率
'''
m = lable_data.shape[0]
cost = -1.0/m * np.sum(lable_data *np.log(h) + (1 - lable_data)*np.log(1-h))
cost = np.squeeze(cost)
return cost

def sig(x):
'''
sigmoid函数
:param x:(mat)feature_data * w
:return:(mat)sigmoid值
'''
z = 1.0/(1+ np.exp(-x));
return z;

def shuffle(feature,label):
'''
get the shuffle feature and label
:param feature: the input data (num, feature)
:param label:  the lable (num, 1)
:return: shuffle_feature(num,feature),shuffle_label(num,1)
'''
m = feature.shape[0]
permutation = list(np.random.permutation(m))
shuffle_feature = feature[permutation,:]
shuffle_label = label[permutation,:]
return shuffle_feature,shuffle_label

def lr_train_bgd(feature_data, lable_data, maxCycle, alpha):
'''
利用梯度下降法训练Logistic回归模型
:param feature_data: (mat)样本数据
:param lable_data: (mat)标签数据
:param maxCycle: (int)最大迭代次数
:param alpha: (float)学习率
:return: w(mat)权值
'''
w =  np.random.randn(feature_data.shape[1],1)
b = 0
for i in range(maxCycle):
if i%50 == 0:
print(cost)
w = w- alpha*dW;
b = b- alpha*db;
return w,b;

def save_model(name, w,b):
f = h5py.File(name,'w')
f.create_dataset('w',data=w)
f.create_dataset('b', data=b)
f.close()

if __name__ == "__main__":
# 1. 导入数据
feature_data, lable_data = shuffle(feature_data, lable_data)
# 2. 训练模型
print("------2. train------")
w,b = lr_train_bgd(feature_data, lable_data, 1000000, 0.0001)
# 保存最终模型
print("------3. save model------")
save_model("weight", w,b)``````

``````import numpy as np
import h5py
import train as at
import matplotlib.pyplot as plt
'''
:param name:  h5py filename(string)
:return:
'''
W_file = h5py.File(name,'r')
w = np.array(W_file['w'])
b = np.array(W_file['b'])
W_file.close()
return w,b

def predict(w,b,feature_data):
'''
:param w:
:param b:
:param feature_data:
:return:
'''
out = at.sig(np.dot(feature_data,w)+b)
out = out>0.5
return out

if __name__ == "__main__":
# 1. 导入数据