pca.h
#include<math.h>
#include<fstream>
#include<iostream>
#include<stdio.h>
#include<iomanip>
#include<math.h>
using namespace std;
typedef struct sourcedata //声明了一个原始数据类型
{
int m;
int n;
double **data;
}SourceData;
class PCA
{
public:
PCA(int m, int n); //m为行数,n为列数
SourceData getdata(const char *file); //获取外部数据
void standarddata(double **a); //数据标准化
double product(double *a, double *b); //向量乘积
void swap(double &x, double &y); //数据交换
double **matrixproduct(double **a); //求解协方差矩阵
void selectionsort(double *A, double **v); //特征值排序
void zhengjiao(double **v); //向量正交化
int jcb(double **a, double **v, double eps, int jt); //求解特征值和特征向量
int selectcharactor(double *A, double getratio, double *B); //提取主分量
double **getProject(int t, double **x, double **v); //计算降维后特征点
void saveProject(const char *projectfile, double **project, int t); //保存
~PCA(){}
private:
int rows;
int columns;
};kpca.h
#include<math.h>
#include<fstream>
#include<iostream>
#include<stdio.h>
#include<iomanip>
#include<math.h>
#include<stdlib.h>
#include<time.h>
using namespace std;
class KPCA
{
public:
KPCA(int m, int n);
SourceData getdata(const char *file); //获取外部数据
int randdef(int n1, int n2); //生成n1到n2随机整数
double getvar(double **testdata, int m, int n, int l, double left, double right);//通过对随机样本的最大特征提取效率获取高斯径向基函数的参数
double product(double *a, double *b, int size); //向量乘积
double kernel(double var, double *x, double *y, int sign); //核函数定义
double **getkernelmatrix(double **a, double var, int sign); //获取核矩阵
double **modifykernelmatrix(double **K); //修正核矩阵
int jcb(double **a, double **v, double eps, int jt); //求解矩阵的特征值和特征向量
void zhengjiao(double **v); //正交化特征向量
void swap(double &x, double &y); //交换元素
void selectionsort(double *A, double **v); //特征值和特征向量选择排序
void saveeigenvectors(double A[], double **v, const char *vectorfile);//保存特征值和特征向量
int selectcharactor(double *A, double getratio, double *B); //提取特征
double **getProject(int t, double **x, double **v); //获得投影
void saveProject(const char *projectfile, double **project, int t); //保存投影
~KPCA(){}
private:
int rows;
int columns;
};pca.cpp与kpca.cpp由于篇幅问题未分享,
main.cpp
#include"pca.h"
#include"kpca.h"
void main()
{
//pca
cout << "-----------------------pca------------------------" << endl;
int i, j, t; //i,j循环用;t降维后维数
int m, n; //m行n列
double **x, **c, **v, **Project;
double *A, *B; //A特征值B贡献率
sourcedata pp;
double eps = 0.000001; //雅克比方法的终止精度
double getratio = 0.9; //特征值的提取率
const char *File = "test1.txt"; //原始数据文件名称
const char *projectfile = "pcaproject.txt"; //处理后的数据文件名称
PCA pca(2, 3); //声明一个临时对象调用成员函数来获取数据
pp = pca.getdata(File); //获取外部数据
x = pp.data;
m = pp.m;
n = pp.n;
cout << "数据的行数为" << m << ",数据的列数为 " << n << endl;
A = new double[n]; //存放特征值
B = new double[n]; //存放特值贡献率
v = new double*[n]; //存放特征向量
for (i = 0; i < n; i++)
v[i] = new double[n];
PCA testpca(m, n); //声明一个对象并初始化
testpca.standarddata(x); //对数据进行标准化处理 X是原始数据
c = testpca.matrixproduct(x); // 求协方差矩阵
i = testpca.jcb(c, v, eps, 100); // 求特征值和特征向量
for (int k = 0; k < n; k++)
A[k] = c[k][k]; //存特征值
testpca.zhengjiao(v); //正交化特征向量
testpca.selectionsort(A, v); //特征值和特征向量排序
t = testpca.selectcharactor(A, getratio, B); //提取特征值 t为降维后维数
cout << "PCA降维后的维数:" << t << endl;
cout << "排序后提取的特征值及对应的特征向量" << endl;
for (i = 0; i <= t - 1; i++) //输出特征值
printf("%13.7e ", A[i]);
printf("\n\n");
for (i = 0; i < n; i++) //输出特征向量
{
for (j = 0; j < t; j++)
printf("%13.7e ", v[i][j]);
printf("\n");
}
cout << "特征值的累计贡献率为" << endl;
for (i = 0; i < n; i++)
cout << B[i] << " ";
cout << endl;
cout << "当提取效率是" << getratio << "时提取了前" << t << "个分量" << endl; //getratio特征提取率
if (t >= 1 && t <= n)
Project = testpca.getProject(t, x, v); //求降维后特征点
else
cout << "error" << endl;
testpca.saveProject(projectfile, Project, t); //保存特征点到TXT文件
//kpca
cout << endl<< "----------------------kpca------------------------" << endl;
int a; //循环用
int l = 50; //随机提取样本的数目
const char *File2 = "test2.txt";
const char*eigenvectors = "eigen.txt"; //特征值和特征向量存储文件名称
const char *projectfile2 = "kpcaproject.txt"; //降维后特征点文件存储名称
SourceData pdata;
double gaussparameter; //高斯核参数
double **K, **KL; //高斯核矩阵k及修正核矩阵
KPCA kpca(3, 2);
pdata = kpca.getdata(File2); //获取外部数据
x = pdata.data;
m = pdata.m;
n = pdata.n;
A = new double[m];
B = new double[m];
KPCA testkpca(m, n); //对象
gaussparameter = testkpca.getvar(x, m, n, l, 100, 800); //求高斯核参数 通过对随机样本的最大特征提取效率获取高斯径向基函数的参数
cout << "高斯核参数: " << gaussparameter << endl;
K = testkpca.getkernelmatrix(x, gaussparameter, 1); //求核矩阵
KL = testkpca.modifykernelmatrix(K); //求修正核矩阵
c = new double*[m]; //定义c、v二维数组
for (a = 0; a<m; a++)
c[a] = new double[m];
v = new double*[m];
for (a = 0; a<m; a++)
v[a] = new double[m];
for (a = 0; a<m; a++) //修正核矩阵放入c
for (j = 0; j<m; j++)
c[a][j] = KL[a][j];
a = testkpca.jcb(c, v, eps, 10000); //求取特征值和特征向量
cout << "计算特征值的迭代次数为" << a << endl;
if (a != -1)
{
for (a = 0; a<m; a++)
A[a] = c[a][a]; //特征值存入A
}
else
cout << "不能求得特征值和特征向量" << endl;
testkpca.zhengjiao(v); //正交化特征向量
testkpca.saveeigenvectors(A, v, eigenvectors);
testkpca.selectionsort(A, v); //特征值和特征向量排序
t = testkpca.selectcharactor(A, getratio, B); //提取特征值
cout << "特征值的累计贡献率是" << endl;
for (a = 0; a<m; a++)
cout << B[a] << " ";
cout << endl;
cout << "当提取效率为" << getratio << "时提取了前" << t << "个分量" << endl;
if (t >= 1 && t <= m)
Project = testkpca.getProject(t, KL, v); //求降维后特征点
else
cout << "error" << endl;
testkpca.saveProject(projectfile2, Project, t); //存入TXT文件
}