PCA手写数字降维_特征值

PCA手写数字降维_特征值_02

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix
from mpl_toolkits.mplot3d import Axes3D

def get_data():
digits = load_digits()
x_data= digits.data
y_data= digits.target
return x_data,y_data

def zeroMean(dataMat):
meanVal = np.mean(dataMat,axis = 0)
newData = dataMat - meanVal
return newData,meanVal
def draw(data):
x_data= data[:,0]
y_data= data[:,1]
plt.scatter(x_data,y_data)
plt.show()

def pca(dataMat,top):#将n维降为top维
newData,meanVal = zeroMean(dataMat)
covMat = np.cov(newData,rowvar=0)#求协方差矩阵
eigVals,eigVects = np.linalg.eig(np.mat(covMat))#求特征值,特征向量
eigValsIndice = np.argsort(eigVals)#对特征值排序,并显示下表顺序
n_eigValsIndice = eigValsIndice[-1:-(top+1):-1]
n_eigVect = eigVects[:,n_eigValsIndice ]
lowDDataMat = newData*n_eigVect
reconMat = (lowDDataMat*n_eigVect.T) + meanVal
return (lowDDataMat,reconMat)

def main():
print ("----------ing-------------")
x_data,y_data = get_data()

#print (x_data.shape)
x_train,x_test,y_train,y_test = train_test_split(x_data,y_data)
mlp = MLPClassifier(hidden_layer_sizes=(100,50),max_iter=500)
mlp.fit(x_train,y_train)


#print (lowDDataMat.shape)
#plt.scatter(x,y,c='r')
#plt.show()


lowDDataMat,reconMat = pca(x_data,2)
x = np.array(lowDDataMat)[:,0]
y = np.array(lowDDataMat)[:,1]
predictions = mlp.predict(x_data)
plt.scatter(x,y,c=predictions)
plt.show()

lowDDataMat,reconMat = pca(x_data,3)
x = np.array(lowDDataMat)[:,0]
y = np.array(lowDDataMat)[:,1]
z = np.array(lowDDataMat)[:,2]
predictions = mlp.predict(x_data)
ax = plt.figure().add_subplot(111,projection='3d')
ax.scatter(x,y,z,c=predictions,s=10)
plt.show()
print ("----------end-------------")

main()

PCA手写数字降维_3d_03


PCA手写数字降维_特征值_04