python核密度分析项目 python 核密度_python 密度聚类 使用

python核密度分析项目 python 核密度_python 聚类_02

python核密度分析项目 python 核密度_python 密度聚类 使用_03

python核密度分析项目 python 核密度_python核密度分析项目_04

python核密度分析项目 python 核密度_python 聚类_05

python核密度分析项目 python 核密度_python核密度分析项目_06

python核密度分析项目 python 核密度_sklearn 聚类_07

# 作者: Gael Varoquaux# 许可证: BSD 3-Clause or CC-0import matplotlib.pyplot as pltimport numpy as npfrom sklearn.cluster import AgglomerativeClusteringfrom sklearn.metrics import pairwise_distancesnp.random.seed(0)# 生成波形数据n_features = 2000t = np.pi * np.linspace(0, 1, n_features)def sqr(x):    return np.sign(np.cos(x))X = list()y = list()for i, (phi, a) in enumerate([(.5, .15), (.5, .6), (.3, .2)]):    for _ in range(30):        phase_noise = .01 * np.random.normal()        amplitude_noise = .04 * np.random.normal()        additional_noise = 1 - 2 * np.random.rand(n_features)        # Make the noise sparse        additional_noise[np.abs(additional_noise) < .997] = 0        X.append(12 * ((a + amplitude_noise)                 * (sqr(6 * (t + phi + phase_noise)))                 + additional_noise))        y.append(i)X = np.array(X)y = np.array(y)n_clusters = 3labels = ('Waveform 1', 'Waveform 2', 'Waveform 3')# 绘制真实类(ground-truth)标签plt.figure()plt.axes([0, 0, 1, 1])for l, c, n in zip(range(n_clusters), 'rgb',                   labels):    lines = plt.plot(X[y == l].T, c=c, alpha=.5)    lines[0].set_label(n)plt.legend(loc='best')plt.axis('tight')plt.axis('off')plt.suptitle("Ground truth", size=20)# 绘制距离for index, metric in enumerate(["cosine", "euclidean", "cityblock"]):    avg_dist = np.zeros((n_clusters, n_clusters))    plt.figure(figsize=(5, 4.5))    for i in range(n_clusters):        for j in range(n_clusters):            avg_dist[i, j] = pairwise_distances(X[y == i], X[y == j],                                                metric=metric).mean()    avg_dist /= avg_dist.max()    for i in range(n_clusters):        for j in range(n_clusters):            plt.text(i, j, '%5.3f' % avg_dist[i, j],                     verticalalignment='center',                     horizontalalignment='center')    plt.imshow(avg_dist, interpolation='nearest', cmap=plt.cm.gnuplot2,               vmin=0)    plt.xticks(range(n_clusters), labels, rotation=45)    plt.yticks(range(n_clusters), labels)    plt.colorbar()    plt.suptitle("Interclass %s distances" % metric, size=18)    plt.tight_layout()# 绘制聚类结果for index, metric in enumerate(["cosine", "euclidean", "cityblock"]):    model = AgglomerativeClustering(n_clusters=n_clusters,                                    linkage="average", affinity=metric)    model.fit(X)    plt.figure()    plt.axes([0, 0, 1, 1])    for l, c in zip(np.arange(model.n_clusters), 'rgbk'):        plt.plot(X[model.labels_ == l].T, c=c, alpha=.5)    plt.axis('tight')    plt.axis('off')    plt.suptitle("AgglomerativeClustering(affinity=%s)" % metric, size=20)plt.show()
# 作者: Gael Varoquaux
# 许可证: BSD 3-Clause or CC-0

import matplotlib.pyplot as plt
import numpy as np

from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import pairwise_distances

np.random.seed(0)

# 生成波形数据
n_features = 2000
t = np.pi * np.linspace(0, 1, n_features)


def sqr(x):
    return np.sign(np.cos(x))

X = list()
y = list()
for i, (phi, a) in enumerate([(.5, .15), (.5, .6), (.3, .2)]):
    for _ in range(30):
        phase_noise = .01 * np.random.normal()
        amplitude_noise = .04 * np.random.normal()
        additional_noise = 1 - 2 * np.random.rand(n_features)
        # Make the noise sparse
        additional_noise[np.abs(additional_noise) < .997] = 0

        X.append(12 * ((a + amplitude_noise)
                 * (sqr(6 * (t + phi + phase_noise)))
                 + additional_noise))
        y.append(i)

X = np.array(X)
y = np.array(y)

n_clusters = 3

labels = ('Waveform 1', 'Waveform 2', 'Waveform 3')

# 绘制真实类(ground-truth)标签
plt.figure()
plt.axes([0, 0, 1, 1])
for l, c, n in zip(range(n_clusters), 'rgb',
                   labels):
    lines = plt.plot(X[y == l].T, c=c, alpha=.5)
    lines[0].set_label(n)

plt.legend(loc='best')

plt.axis('tight')
plt.axis('off')
plt.suptitle("Ground truth", size=20)


# 绘制距离
for index, metric in enumerate(["cosine", "euclidean", "cityblock"]):
    avg_dist = np.zeros((n_clusters, n_clusters))
    plt.figure(figsize=(5, 4.5))
    for i in range(n_clusters):
        for j in range(n_clusters):
            avg_dist[i, j] = pairwise_distances(X[y == i], X[y == j],
                                                metric=metric).mean()
    avg_dist /= avg_dist.max()
    for i in range(n_clusters):
        for j in range(n_clusters):
            plt.text(i, j, '%5.3f' % avg_dist[i, j],
                     verticalalignment='center',
                     horizontalalignment='center')

    plt.imshow(avg_dist, interpolation='nearest', cmap=plt.cm.gnuplot2,
               vmin=0)
    plt.xticks(range(n_clusters), labels, rotation=45)
    plt.yticks(range(n_clusters), labels)
    plt.colorbar()
    plt.suptitle("Interclass %s distances" % metric, size=18)
    plt.tight_layout()


# 绘制聚类结果
for index, metric in enumerate(["cosine", "euclidean", "cityblock"]):
    model = AgglomerativeClustering(n_clusters=n_clusters,
                                    linkage="average", affinity=metric)
    model.fit(X)
    plt.figure()
    plt.axes([0, 0, 1, 1])
    for l, c in zip(np.arange(model.n_clusters), 'rgbk'):
        plt.plot(X[model.labels_ == l].T, c=c, alpha=.5)
    plt.axis('tight')
    plt.axis('off')
    plt.suptitle("AgglomerativeClustering(affinity=%s)" % metric, size=20)


plt.show()


脚本的总运行时间:(0分钟1.512秒)

估计的内存使用量: 9 MB


python核密度分析项目 python 核密度_python核密度分析项目_08

下载Python源代码: plot_agglomerative_clustering_metrics.py

下载Jupyter notebook源代码: plot_agglomerative_clustering_metrics.ipynb

python核密度分析项目 python 核密度_sklearn 聚类_09