结果
均值漂移:这些“模式”就对应于一群群局部最密集(local maxima) 分布的点。 均值
漂移算法的优点是它不需要事先确定集群的数量。
代码
# -*- coding: utf-8 -*-
import numpy as np
from sklearn.cluster import MeanShift, estimate_bandwidthimport utilities
# 加载数据
X = utilities.load_data('data_multivar.txt')# 带宽估计
bandwidth = estimate_bandwidth(X, quantile=0.1, n_samples=len(X))# 计算均值
meanshift_estimator = MeanShift(bandwidth=bandwidth, bin_seeding=True)
meanshift_estimator.fit(X)
labels = meanshift_estimator.labels_
centroids = meanshift_estimator.cluster_centers_
num_clusters = len(np.unique(labels))print "Number of clusters in input data =", num_clusters
###########################################################
# 画出节点import matplotlib.pyplot as plt
from itertools import cycleplt.figure()
# 为不同的簇指定标记形状
markers = '.*xv'for i, marker in zip(range(num_clusters), markers):
# plot the points belong to the current cluster
plt.scatter(X[labels==i, 0], X[labels==i, 1], marker=marker, color='k') # plot the centroid of the current cluster
centroid = centroids[i]
plt.plot(centroid[0], centroid[1], marker='o', markerfacecolor='k',
markeredgecolor='k', markersize=15)plt.title('Clusters and their centroids')
plt.show()