python数学建模模板
- 工具使用模板
- 部分算法模板
工具使用模板
# -*- coding: utf-8 -*-
# @Time : 2021/9/5 15:03
# @Author : hzh
# @File : good_tools.py
# @Software : PyCharm
import matplotlib.pyplot as plt
import math
import numpy as np
import numpy.linalg as LA
from sympy import *
import sympy as sp
import scipy
from scipy.integrate import quad
from scipy.optimize import fsolve,fminbound,fmin,minimize
from scipy.optimize import linprog,curve_fit
from sklearn.preprocessing import minmax_scale,scale
import matplotlib.pyplot as plt
import cvxpy as cp
# 1.sympy数学符号计算
def fuhaojisuan():
#1.如果要引入变量 可以用
x,y,z=symbols('x y z')
#2.然后定义表达式
expr1 = y**2+sin(y)*cos(x)+sin(z)
#3.输出表达式
print("expr1=",expr1)
#4.带入特值
print("x=1,y=1,z=1时,expr1=",expr1.subs({x:1,y:1,z:1}))
#5.Sympy做符号函数画图参见P91
#6.求极限
print(limit(sin(x)/x,x,0))
print(limit(pow(1+1/x,x),x,oo))
#7.求导数
z = sin(x)+x**2*exp(y)
print("关于x的二阶偏导数为",diff(z,x,2))
#8.验证级数的求和
k,n= symbols('k n')
print(summation(k**2,(k,1,n)))
print(factor(summation(k**2,(k,1,n))))
print(summation(1/k**2,(k,1,oo)))
#9.泰勒展开
y = sin(x)
for k in range(3,8,2):
print(y.series(x,0,k))
plot(y,series(y,x,0,3).removeO(),series(y,x,0,5).removeO(),series(y,x,0,7).removeO(),(x,0,2),xlabel='x',ylabel='y')
#10.不定积分和定积分
print(integrate(sin(2*x),(x,0,pi)))
print(integrate(sin(x)/x,(x,0,oo)))
#11.求解代数方程(方程组)符号解
x,y=symbols('x y')
print(solve(x**3-1,x))
print(solve((x-2)**2*(x-1)**3,x))
print(roots((x-2)**2*(x-1)**3,x))#roots可以得到重根的信息
print(solve([x**2+y**2-1,x-y],[x,y]))#求解方程组用列表
#12.求微分方程的符号解(验证我们的微分方程有没有解错)
x = symbols('x')
y = symbols('y',cls=Function)
eq1 = diff(y(x),x,2)-5*diff(y(x),x)+6*y(x)#y"-5y'+6y=0
eq2 = diff(y(x),x,2)-5*diff(y(x),x)+6*y(x)-x*exp(2*x)
print("齐次方程的解为:",dsolve(eq1,y(x)))
print("非齐次方程的解为:",dsolve(eq2,y(x)))
# 2.数值计算
def shuzhijisuan():
#1.数值导数
def diff(f, dx=1E-8):#求一次导数
return lambda x: (f(x + dx) - f(x - dx)) / (2 * dx)
def diff2(f, dx=1E-8):#求二次导数推导过程见P99
return lambda x: (f(x + dx) + f(x - dx) - f(x) * 2) / (dx * dx)
#2.数值积分
#法一:定义梯形公式的函数
def trapezoid(f,n,a,b):
xi = np.linspace(a,b,n)
h = (b-a)/(n-1)
return h*(np.sum(f(xi))-(f(a)+f(b))/2)
#法二:定义辛普森公式的函数
def simpson(f,n,a,b):
xi,h =np.linspace(a,b,2*n+1),(b-a)/(2.0*n)
xe = [f(xi[i]) for i in range(len(xi)) if i%2==0]
xo = [f(xi[i]) for i in range(len(xi)) if i%2!=0]
return h*(2*np.sum(xe)+4*np.sum(xo)-f(a)-f(b))/3.0
a=0;b=1;n=1000
f=lambda x:np.sin(np.sqrt(np.cos(x)+x**2))
print("梯形法求得的积分为:",trapezoid(f,n,a,b))
print("辛普森公式法求得的积分为:",simpson(f,n,a,b))
#法三;使用scipy.integrate.quad模块函数
print("Scipy求得的积分为:",quad(f,a,b))
#3.非线性方程(组)数值解
#法一:二分法求根
def binary_search(f, eps, a, b):
c = (a + b) / 2
while np.abs(f(c)) > eps:
if f(a) * f(c) < 0:
b = c
else:
a = c
c = (a + b) / 2
return c
#法二:牛顿迭代法求根
def newton_iter(f, eps, x0, dx=1E-8):
def diff(f, dx=dx):
return lambda x: (f(x + dx) - f(x - dx)) / (2 * dx)
df = diff(f, dx)
x1 = x0 - f(x0) / df(x0)
while np.abs(x1 - x0) >= eps:
x1, x0 = x1 - f(x1) / df(x1), x1
return x1
f=lambda x:x**3+1.1*x**2+0.9*x-1.4
print("二分法求得的根为:",binary_search(f,1E-6,0,1))
print("牛顿迭代法求得的根为:",newton_iter(f,1E-6,0))
#法三:直接调用scipy.optimize.fsolve
print("直接调用Scipy求得的根为:",fsolve(f,0))
#4.求一元函数的极值点
f=lambda x:np.exp(x)*np.cos(2*x)
x0 = fminbound(f,0,3)#求区间[0,3]上的最小值
print("fminbound求得的极小点为:{},极小值为:{}".format(x0,f(x0)))
x0 = fmin(f,0)#求0附近的一个极小值点
print("fmin求得的极小点为:{},极小值为:{}".format(x0,f(x0)))
#5.多元函数的极值点
f=lambda x:100*(x[1]-x[0]**2)**2+(1-x[0])**2
x0 = minimize(f,[2.0,2.0])#求(2,2)附近的极小值
print("minimize求得的极小点为:{},极小值为:{}".format(x0.x,x0.fun))
# 3.线性代数
def xianxingdaishu():
#1.矩阵的运算,sympy符号形式使用
A=sp.Matrix([[1],[2],[3]])
B=sp.Matrix([[4],[5],[6]])
print("A的模为:",A.norm())
print("A的模的浮点数为:",A.norm().evalf())
print("A的转置矩阵为:",A.T)
print("A和B的点乘为:",A.dot(B))
print("A和B的叉乘为:",A.cross(B))
A = sp.Matrix(np.arange(1,17).reshape(4,4))
B = sp.eye(4)#单位矩阵
print("A的行列式为:",sp.det(A))
print("A的秩为:",A.rank())
print("A的转置矩阵为:",A.transpose())
print("A+B的逆矩阵为:",(A+B).inv())
print("A的平方为:",A**2)
print("A,B的乘积为:",A*B)
print("横连矩阵为:",A.row_join(B))
print("纵连矩阵为:",A.col_join(B))
print("A1为",A[0:2,0:2])
A2=A.copy()
A2.row_del(3)
print("A2为",A2)
print("A的行最简形以及所在的列为:\n",A.rref())
print("A的特征值为:",A.eigenvals())
print("A的特征向量为:",A.eigenvects())
#2.矩阵的运算,numpy数值形式使用,大多数用这个
a=np.arange(1,4)
b=np.arange(4,7)
print("a的二范数为:",np.linalg.norm(a))
print("a点乘b=",a.dot(b))
print("a,b的内积=",np.inner(a,b))
print("a叉乘b=",np.cross(a,b))
A = np.arange(1,17).reshape(4,4)
B = np.eye(4)#单位矩阵
print("A的行列式为:",np.linalg.det(A))
print("A的秩为:",np.linalg.matrix_rank(A))
print("A的转置矩阵为:",A.transpose())#等价于A.T
print("A+B的逆矩阵为:",np.linalg.inv(A+B))
print("A的平方为:",A.dot(A))
print("A,B的乘积为:",A.dot(B))
print("横连矩阵为:",np.c_[A,B])#与sympy的c,r相反
print("纵连矩阵为:",np.r_[A,B])
print("A1为:\n",A[0:2,0:2])
A2=A.copy()
A2=np.delete(A2,3,axis=0)#删除第四行,axis=0表示沿行增加的轴方向
print("A2为:\n",A2)
A = np.array([[1,-5,2,-3],[5,3,6,-1],[2,4,2,1]])
print("A的零空间(即基础解系)为:",scipy.linalg.null_space(A))
A = np.array([[0,-2,2],[-2,-3,4],[2,4,3]])
values,vectors=np.linalg.eig(A)
print("A的特征值为:",values)
print("A的特征向量为:",vectors)
# 4.线性/非线性规划
def guihua():
#1.使用scipy.optimize.linprog解线性规划
#scipy中线性规划的标准形为
# min z = c.T*x
# A*x <= b
# Aeq *x = beq
# Lb<=x<=UB
def biaozhun(c,A,b,Aeq,beq,Lb,Ub):
bounds = tuple(zip(Lb,Ub))
res = linprog(c,A,b,Aeq,beq,bounds)#默认每个决策变量的下界为0,上界为正无穷
# res = linprog(c, A_ub=None, b_ub=None, A_eq=None, b_eq=None,bounds=None, method='interior-point', callback=None,options=None, x0=None)#默认有的方法
print(res.fun)#显示目标函数的最小值
print(res.x)#显示最优解
print(res.slack)#为0时说明是紧约束,即约束条件实际上是等式约束,用于判断是否在边界取得极值
#python灵敏度分析做的不好,求参数变化时一般要带入模型重新计算
#2.整数规划有关的,推荐使用cvxpy模块 import cvxpy as cp 详细见P199
c = np.array([[4,8,7,15,12],[7,9,17,14,10],[6,9,12,8,7],[6,7,12,8,7],[6,9,12,10,6]])
x = cp.Variable((5,5),integer=True)
obj = cp.Minimize(cp.sum(cp.multiply(c,x)))
con = [0<=x,x<=1,cp.sum(x,axis=0,keepdims=True)==1,cp.sum(x,axis=1,keepdims=True)==1]
prob = cp.Problem(obj,con)
prob.solve(solver='GLPK_MI')
print("最优值为:",prob.value)
print("最优解为:\n",x.value)
#3.非整数非线性规划:使用scipy.optimize.minimize函数求解
#例题详细见P206
def obj2(x):
x1,x2,x3 =x
return (2+x1)/(1+x2)-3*x1+4*x3
LB=[0.1]*3
UB=[0.9]*3
bound = tuple(zip(LB,UB))
res = minimize(obj2,np.ones(3),bounds = bound)
print(res.fun,'\n',res.success,'\n',res.x)
#输出最优值,求解状态,最优解
#P209的飞行管理例题很精彩值得看
# 5.插值
def chazhi():
#1.拉格朗日插值
# x,y是两个具有相同长度的Numpy数组
def h(x,y,a):
s = 0.0
for i in range(len(y)):
t = y[i]
for j in range(len(y)):
if i!=j:
t*=(a-x[j])/(x[i]-x[j])
s += t
return s
#2.其余插值见P216 分段线性插值,分段二次插值,牛顿插值,样条插值
#3.scipy.interpolate实现插值
#一维插值
from scipy.interpolate import interp1d
x = np.arange(0,25,2)
y = np.array([12,9,9,10,18,24,28,27,25,20,18,15,13])
xnew = np.linspace(0,24,500)
f1 = interp1d(x,y)
y1 = f1(xnew)
f2 = interp1d(x,y,'cubic')
y2 = f2(xnew)
plt.subplot(121)
plt.plot(xnew,y1)
plt.subplot(122)
plt.plot(xnew,y2)
plt.savefig("figure1.png",dpi=500)
plt.show()
#二维插值:同理使用interp2d或griddate
#明天写
# 6.拟合
def nihe():
#1.线性采用最小二乘拟合
def two_match(X, Y):
return np.pinv(X * X.T) * X.T * Y
#2.非线性的拟合方式
#选择相对合适的拟合函数f
#将函数f带入代价函数中,形成非线性函数的极小化问题
#调用 scipy.optimize.curve_fit函数
y = lambda x,a,b,c:a*x**2+b*x+c
x0 = np.arange(0,1.1,0.1)
y0 = np.array([-0.447,1.978,3.28,6.16,7.08,7.34,7.66,9.56,9.48,9.30,11.2])
popt,pcov = curve_fit(y,x0,y0)
print("拟合的参数值为:",popt)
print("预测值分别为:",y(np.array([0.25,0.35]),*popt))
# 7.多元分析
def duoyuan():
#1.求某一类样本的均值向量和离差矩阵
def get_uL(a):
n,m = a.shape
u = np.sum(a,axis = 0)/n
L = np.zeros((n,m))
for i in range(n):
L = L+(a[i,:]-u)*(a[i,:]-u).T
return u,L
a = np.array([[1,2,3],[4,5,6],[7,8,9]])
print(get_uL(a))
# 2.矩阵/特征向量 标准正交化
def orthonormalization(a):
b = np.zeros(a.shape)
for i in range(len(a)):
b[i] = a[i]
for j in range(0, i):
b[i] -= np.dot(a[i], b[j]) / np.dot(b[j], b[j]) * b[j]
for i in range(len(b)):
b[i] = b[i] / np.sqrt(np.dot(b[i], b[i]))
return b
# 3.主成分分析(降低维度,减少变量个数) a是输入的样本n*m的数据,lada是要求的积累贡献率
def myPCA(a, lada=0.85):
b = scale(a)
n, m = b.shape
r = np.zeros((m, m))
for i in range(m):
for j in range(m):
# print(b[:,i],'\n',b[:,j],'\n',i,j)
r[i, j] = b[:, i].T @ b[:, j] / n
e, u = LA.eig(r)
u = orthonormalization(u)
print(r, '\n', e, '\n', u)
sum = np.sum(e) * lada
x = 0
for i in range(len(e)): # 保留主要的维度
x = x + e[i]
if x >= sum:
e = e[:i + 1]
u = u[:i + 1, :]
break
print(e, '\n', u, '\n')
return r, e, u
# 4.因子分析(分解为若干因子的线性组合,找出不同变量的公共因子)(就是用新的意义的变量来表示原来的变量)
def factor_analysis(a, lada=0.85):
r, e, u = myPCA(a, lada) # 利用主成分分析得到 相关系数矩阵,及其特征值和特征向量
# print(e,'\n',u)
e = np.sqrt(e)
print(e)
A = np.array([np.array(e[i] * u[i, :]) for i in range(len(e))]).T # 求因子载荷矩阵
d = np.diagonal(r - A @ A.T) # 估计特殊因子的方差
print(A, '\n', d)
return A, d
# 8.灵敏度分析
def lingming():
X = np.array(
[[1, 7, 26], [1, 1, 29], [1, 11, 56], [1, 11, 51], [1, 7, 52], [1, 11, 55], [1, 3, 71], [1, 1, 31], [1, 2, 54],
[1, 21, 47], [1, 1, 40], [1, 11, 66], [1, 10, 68]])
Y = np.array([78.5, 74.3, 104.3, 87.6, 95.9, 109.2, 102.7, 72.5, 93.1, 115.9, 83.8, 113.3, 109.4]).T
# 1.最小二乘得到b,y
n, m = X.shape
def two_match(X, Y):
return LA.pinv(X.T @ X) @ X.T @ Y
b = two_match(X, Y)
y = X @ b
# 2.回归方程显著性检验
def xianzhuxin(X,Y,y):
def get_sse(Y, y):
return np.sum(np.multiply(Y - y, Y - y))
def get_sst(Y):
u = np.sum(Y) / n
return np.sum(np.multiply(Y - u, Y - u))
def get_ssr(Y, y):
u = np.sum(Y) / n
return np.sum(np.multiply(y - u, y - u))
sse = get_sse(Y,y)
sst = get_sst(Y)
ssr = sst-sse#这两种方法一样
ssr = get_ssr(Y,y)
F = (ssr/m)/(sse/(n-m-1))
from scipy.stats import f
alpha = 0.05
sa=f.isf(q=alpha, dfn=m, dfd=n-m-1)#sa为上a分位
# print(b,F,sa,F>sa)
return F<sa
#a=f.ppf(q=alpha, dfn=m, dfd=n) #单侧左分位点
# b=f.isf(q=alpha, dfn=m, dfd=n) #单侧右分位点
# print('单侧左、右分位点:a=%.4f, b=%.4f'%(a, b))
# a, b=f.interval(1-alpha, dfn=m, dfd=n) #双侧分位点
# print('双侧左、右分位点:a=%.4f, b=%.4f'%(a, b))
print(xianzhuxin(X,Y,y))
# 3.相关系数:相关系数是用以反映变量之间相关关系密切程度的统计指标(检验是否有多重线性关系,影响结果准确,如果有,岭回归解决)
def xgxs(X,Y,y):
def var(Y):#求方差
u = np.sum(Y)/n
return np.sum(np.multiply(Y-u,Y-u))
def cov(Y,y):#求协方差
u = np.sum(Y)/n
return np.sum(np.multiply(Y-u,y-u))
vY = var(Y)
vy = var(y)
# print(Y,y)
# print(cov(Y,y)/np.sqrt(vY*vy))
return cov(Y,y)/np.sqrt(vY*vy)
print(xgxs(X,Y,y))
# 4.相关指数:相关指数表示一元多项式回归方程拟合度的高低,或者说表示一元多项式回归方程估测的可靠程度的高低。
def xgzs(X,Y,y):
def get_sse(Y, y):
return np.sum(np.multiply(Y - y, Y - y))
def get_sst(Y):
u = np.sum(Y) / n
return np.sum(np.multiply(Y - u, Y - u))
sse = get_sse(Y, y)
sst = get_sst(Y)
r = np.sqrt(1-sse/sst)
return r
print(xgzs(X,Y,y))
# 5.相对误差法 误差积累频率 中值误差 0.6745
def xdwc(X,Y,y):
k = (Y-y)/Y
e0 = 0.6745*np.sqrt(np.sum(k*k))
return e0
print(xdwc(X,Y,y))
# 6.计算灵敏度,判断单个变量对结果的灵敏度
def lingmingdu(f,x,dx = 0.01):
y0 = []
for i in range(-100,100):
y0.append(f(x+dx*i))
x0 = [i for i in range(-100,100)]
plot(x0,y0)
# 7.蒙特卡罗 计算机随机模拟 判断变量对结果的灵敏度
# x是一维参数组成的向量,v是允许的方差组成的向量
def monte_carlo(f,x,v,N=10000):
N =10000
mu = x
cov = np.diag(v)
a = np.random.multivariate_normal(mu,cov,size = N)
y = np.array([f(a[i,:]) for i in range(N)])
# 9.智能算法
def zhineng():
# 1.模拟退火求最小值,s为初始状态,以列表的形式保存各个参数,cost为s的代价,越小越好,change为改变s状态的方式,T是温度,af是降温系数,esp是结束条件
def GASA(s, cost, change, T=1, af=0.999, esp=0.1 ** 30):
while (T < esp):
c = cost(s)
s0 = change(s)
c0 = cost(s)
if c0 < c:
s = s0
else:
if math.exp(-(c0 - c) / T) >= np.random.rand(1):
s = s0
T = T * af
return s, cost(s)
# 2.遗传算法 s为初始种群,以列表的形式保存各个个体,各个个体又以列表的形式保存各自参数,cost为s对环境的适应度,越小越适应,change为s中期交叉与随机变异的方式,G为进化的代数
def genetic(s, cost, change, G=10):
n = len(s)
for g in range(G):
s0 = change(s)
w0 = [cost(i) for i in s0]
x = np.argsort(w0)
s = s0[x[:n]]
return s[0], cost[s[0]]
if __name__ == '__main__':
lingming()
部分算法模板
# -*- coding: utf-8 -*-
# @Time : 2021/9/4 15:02
# @Author : hzh
# @File : model.py
# @Software : PyCharm
import matplotlib.pyplot as plt
import pandas as pd
import math
import numpy as np
from numpy.linalg import *
from matplotlib.pyplot import *
from sklearn.preprocessing import minmax_scale,scale
#1.读取2020年A题附件,返回第一列和第二列组合元素(x,y)的元素为元组的列表
def read():
data = pd.read_excel("附件.xlsx",header=None)#可以用参数usecols选择提取哪些列,更多详细见P60
data = data.values#提取其中的数据为ndarray的形式
return tuple(zip(list(data[0])[1:],list(data[1])[1:]))
#2.画折现图,并做相关处理
def plot_line_pic(x,y1,y2,x_label='时间/s',y_label={'温度/℃'},mytitle='电路板炉温曲线',label1 = '原始数据',label2 = '拟合曲线'):
rc('font',size=16)
# re('text',usetex=True)#安装了Latex的两个宏包才可以使用,没有就注释掉
rc('font',family='SimHei')
plot(x,y1, '--b',label = label1,linewidth=1.6, markeredgecolor='k', markersize=8)
plot(x, y2, '--r',label = label2,linewidth=1.6, markeredgecolor='k', markersize=8)
xlabel(x_label, fontweight='bold', fontsize=12)
ylabel(y_label, fontweight='bold', fontsize=12)
title(mytitle, fontweight='bold', fontsize=12)
legend()
plt.savefig('figure1.png')
show()
#3.保存数据到csv,x是向量列表[x1,x2...],y是列表[y1,y2,...],保存为两列的数据
def savedata(x,y):
pdata = pd.DataFrame(np.vstack((np.array(x),np.array(y))).T,columns = ['时间(s)','温度(摄氏度)'])
pdata.to_csv('Result1.csv', index = 0,encoding='utf_8_sig')#每行前面索引要去掉,中文显示编码
#4.二分法求根,f为函数,eps为允许的误差,a,b为根的所在区间
def binary_search(f,eps,a,b):
c = (a+b)/2
while np.abs(f(c))>eps:
if f(a)*f(c)<0: b=c
else : a=c
c = (a+b)/2
return c
#求数值导数的函数
def diff(f,dx=1E-8):
return lambda x:(f(x+dx)-f(x-dx))/(2*dx)
#5.牛顿迭代法求根,f为函数,eps为允许的误差,求x0附近的零点
def newton_iter(f,eps,x0,dx=1E-8):
def diff(f, dx = dx):
return lambda x: (f(x + dx) - f(x - dx)) / (2 * dx)
df = diff(f,dx)
x1 = x0-f(x0)/df(x0)
while np.abs(x1-x0)>=eps:
x1,x0 = x1 - f(x1)/df(x1),x1
return x1
#6.矩阵规格化
def normalization1(a):
# amax = np.argmax(a)#最大值下标索引,暂时没用
amax = np.max(a,axis=0)#按行获取最大值
amax = np.tile(amax,(a.shape[0],1))#扩展成n*m大小
amin = np.min(a,axis=0)#按行获取最小值
amin = np.tile(amin,(a.shape[0],1))
b = (a-amin)/(amax-amin)
b1 = minmax_scale(a)#调用函数规格化
print(amax,'\n',amin,'\n',b,'\n\n',b1)
return b#可参考P265
#7.矩阵标准化
def normalization2(a):
n = a.shape[0]
u = np.sum(a,axis=0)/n
u = np.tile(u,(n,1))
s = np.sqrt(np.sum(np.multiply(a-u,a-u),axis=0)/(n-0))
s = np.tile(s,(n,1))
b = (a-u)/s
b1 = scale(a,axis=0)#调用函数标准化
print(u,'\n',s,'\n',b,'\n',b1)
return b
#8.检验级比是否符合使用灰色GM(1,1)标准
def check_gray11(x):
n = len(x)
jibi = x[:-1]/x[1:] #求级比
bd1 = [jibi.min(),jibi.max()] #求级比的范围
bd2 = [np.exp(-2/(n+1)),np.exp(2/n+1)]
if bd2[0]<=bd1[0] and bd1[1]<=bd2[1]:return True
else:return False
#9.最小二乘拟合 XA=Y 求A
def two_match(X, Y):
return np.linalg.pinv(X.T @ X) @ X.T @ Y
#10.模拟退火求最小值,s为初始状态,以列表的形式保存各个参数,cost为s的代价,越小越好,change为改变s状态的方式,T是温度,af是降温系数,esp是结束条件
def GASA(s,cost,change,T=1,af=0.999,esp = 0.1**30):
while(T<esp):
c = cost(s)
s0 = change(s)
c0 = cost(s0)
if c0<c:
s =s0
else:
if math.exp(-(c0-c)/T)>=np.random.rand(1):
s = s0
T = T*af
return s,cost(s)
#11.遗传算法 s为初始种群,以列表的形式保存各个个体,各个个体又以列表的形式保存各自参数,cost为s对环境的适应度,越小越适应,change为s中期交叉与随机变异的方式,G为进化的代数
def genetic(s,cost,change,G=10):
n = len(s)
for g in range(G):
s0 = change(s)
w0 = [cost(i) for i in s0]
x = np.argsort(w0)
s = s0[x[:n]]
return s[0],cost[s[0]]
#12.熵值法 (根据评价矩阵对系统整体的影响来确定指标权重系数)
def entropy(b):
n = b.shape[0]
cs = b.sum(axis=0)#逐列求和
p = 1/cs*b
e = -(p*np.log(p)).sum(axis=0)/np.log(n)#计算熵值
g = 1-e
w = g/sum(g)#计算权重
F = p@w
return F
#13.主成分分析(降低维度,减少变量个数) a是输入的样本n*m的数据,lada是要求的积累贡献率
def myPCA(a,lada=0.85):
b = scale(a)
n,m = b.shape
r = np.zeros((m,m))
for i in range(m):
for j in range(m):
# print(b[:,i],'\n',b[:,j],'\n',i,j)
r[i,j] = b[:,i].T@b[:,j]/n
e,u = eig(r)
u = orthonormalization(u)
print(r,'\n',e,'\n',u)
sum = np.sum(e)*lada
x = 0
for i in range(len(e)):#保留主要的维度
x = x+e[i]
if x>=sum:
e = e[:i+1]
u = u[:i+1,:]
break
print(e,'\n',u,'\n')
return r,e,u
#14.矩阵/特征向量 标准正交化
def orthonormalization(a):
b = np.zeros(a.shape)
for i in range(len(a)):
b[i] = a[i]
for j in range(0, i):
b[i] -= np.dot(a[i], b[j]) / np.dot(b[j], b[j]) * b[j]
for i in range(len(b)):
b[i] = b[i] / np.sqrt(np.dot(b[i], b[i]))
return b
#15.因子分析(分解为若干因子的线性组合,找出不同变量的公共因子)(就是用新的意义的变量来表示原来的变量)
def factor_analysis(a,lada=0.85):
r,e,u= myPCA(a,lada)#利用主成分分析得到 相关系数矩阵,及其特征值和特征向量
# print(e,'\n',u)
e = np.sqrt(e)
print(e)
A = np.array([np.array(e[i]*u[i,:]) for i in range(len(e))]).T#求因子载荷矩阵
d = np.diagonal(r-A@A.T)#估计特殊因子的方差
print(A,'\n',d)
return A,d
if __name__ == '__main__':
# a = np.matrix([[123,324,234,5,23],[2352,321,23,324,523],[235,26,89,7687,223],[3215,236,23,3461,234]])
# print(type(a),a)
# X_train = np.array([[1., 1., 100.],[10., 1., 1.],[1., 1., 1.]])
a2 = np.array([[149.5,69.5,38.5],[162.5,77,55.5],[162.7,78.5,50.8],[162.2,87.5,65.5],[156.5,74.5,49.0],
[156.1,74.5,45.5],[172.0,76.5,51.0],[173.2,81.5,59.5],[159.5,74.5,43.5],[157.7,79,53.5]])
x = [1,2,3]
y1 = [1,2,3]
y2 = [3,4,5]
plot_line_pic(x,y1,y2)
# normalization2(X_train)