"""
@auther Rakers
@date 2020.04.14
"""
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.cluster import KMeans
plt.rcParams['font.family'] = ['sans-serif']
plt.rcParams['font.sans-serif'] = ['SimHei']
# 手动写KMeans聚类算法
def RakersKMeans(data, n_clusters=3):
n = len(data)
k = n_clusters
dist = np.zeros([n, k + 1])
# 设置中心
center = data[:k, :]
center_new = np.zeros([k, data.shape[1]])
number = 0
while True:
for i in range(n):
for j in range(k):
dist[i, j] = np.sqrt(sum((data[i, :] - center[j, :]) ** 2))
dist[i, k] =np.argmin(dist[i, :k]) # 归类
for i in range(k):
index = dist[:, k] == i
center_new[i, :] = np.mean(data[index, :])
if np.all(center == center_new):
break
else:
center = center_new
number += 1
# print('聚类迭代次数', number)
# 调整数字
i = 0
for dd in dist[:, k]:
dist[i, k] = k-1-dd
i+=1
return dist[:, k].astype(np.int32)
if __name__ == "__main__":
# 读取数据
print('获取数据')
iris = load_iris()
data = iris['data']
target = iris['target']
print('获取数据完成')
print('真实类\n', list(target))
print('真实聚类图')
plt.title('真实聚类图')
plt.scatter(data[:, 0], data[:, 1], c=target)
plt.xlabel(iris['target_names'][0])
plt.ylabel(iris['target_names'][1])
plt.savefig("../images/真实聚类图.png")
print('自主编写K-means算法 ,以鸢尾花花瓣长度数据做聚类')
pre_y = RakersKMeans(data[:, 0].reshape(-1, 1), n_clusters=3)
print('自主编写K-means算法测试类\n', list(pre_y))
plt.title('自主编写K-means算法 ,以鸢尾花花瓣长度数据做聚类')
plt.scatter(data[:, 0], data[:, 1], c=pre_y)
plt.xlabel(iris['target_names'][0])
plt.ylabel(iris['target_names'][1])
plt.savefig("../images/自主编写K-means算法 ,以鸢尾花花瓣长度数据做聚类.png")
print('用sklearn.cluster.KMeans,鸢尾花花瓣长度数据做聚类')
model = KMeans(n_clusters=3)
model.fit(data[:, 0].reshape(-1, 1))
pre_y = model.predict(data[:, 0].reshape(-1, 1))
print('KMeans算法测试类\n', list(pre_y))
plt.title('用sklearn.cluster.KMeans,鸢尾花花瓣长度数据做聚类')
plt.scatter(data[:, 0], data[:, 1], c=pre_y)
plt.xlabel(iris['target_names'][0])
plt.ylabel(iris['target_names'][1])
plt.savefig("../images/用sklearn.cluster.KMeans,鸢尾花花瓣长度数据做聚类.png")
print('用sklearn.cluster.KMeans,鸢尾花完整数据做聚类')
model = KMeans(n_clusters=3)
model.fit(data)
pre_y = model.predict(data)
print('KMeans算法测试类\n', list(pre_y))
plt.title('用sklearn.cluster.KMeans,鸢尾花完整数据做聚类')
plt.scatter(data[:, 0], data[:, 1], c=pre_y)
plt.xlabel(iris['target_names'][0])
plt.ylabel(iris['target_names'][1])
plt.savefig("../images/用sklearn.cluster.KMeans,鸢尾花完整数据做聚类.png")