Python 关联Data python 关联图

转载

laojean 2023-08-31 07:45:45

文章标签 Python 关联Data python 可视化数据坐标轴 文章分类 Python 后端开发

python画图

关联图 correlation
偏差图 deviation
排序图 Ranking
分布图 Distribution
组成图 Composition
时间序列Time Series
变化图 Change
分组 Groups

关联图 correlation

典型的关联图有：折线图、散点图、相关矩阵

1.散点图 scatter

#定义数据
x1 = np.random.randn(10,2)
x2 = np.array([0,1,0,1,1,1,0,1,0,1])
#确定画布
plt.figure(figsize=(10,6),#图像尺寸
		dpi = 80,       #图像分辨率
		facecolor = 'w',#图像背景颜色
		edgecolor = 'k')#图像边框颜色
#绘图
plt.scatter(x1[:,0],x1[:,1],
			s=50,
			c=x2)#对于多个颜色需要使用循环
			 #图例label = 'Positive'
#对已有图像进行装饰
#plt.gca().set(xlim=(),ylim=())#控制横纵坐标范围
plt.xticks(fontsize=12)#坐标轴上的标尺的字的大小
plt.yticks(fontsize=12)
plt.xlabel('数据',fontsize=12)#坐标轴的标题和字体大小
plt.xlabel('范围',fontsize=12)
#装饰图形（显示图例）
plt.legend()
#图形显示
plt.show()

Python 关联Data python 关联图_数据

偏差图 deviation

典型的偏差图有：发散型条形图、面积图…

plt.hlines() #水平条形图
plt.vlines() #竖直条形图

x = np.random.randn(10)
x.sort() #按顺序排列
x = x-x.mean()  #基于均值分为两部分
plt.figure(figsize =(8,4))

colors=[]
for i in x:  #x是大于0和小于0两部分
    if i>0:
        colors.append('red')
    else:
        colors.append('blue')

plt.hlines(y=range(10),
		xmin=0,xmax=x,
		linewidth=3,
		color=colors)
plt.grid(linestyle='--',alpha=0.5) # 配置网格线
#plt.legend()  #显示图例，标签
plt.show()

Python 关联Data python 关联图_可视化_02

排序图 Ranking

典型的排序图有：柱状图、坡度图、哑铃图

X=[1,2,3,4,5,6]
y=[22,31,24,51,19,25]
plt.vlines(x=X,ymin=0,ymax=y,
			linewidth=10,
			colors='g',
			linestyles='solid') #调节线型
plt.xticks(X,y,
	rotation=30,  #坐标旋转角度
	horizontalalignment='right',  #坐标位置
	fontsize=12)

Python 关联Data python 关联图_python_03

patches.rectangle 绘制长方形

fig,ax = plt.subplots(figsize=(16,10),facecolor ='white',dpi=80)
ax.vlines(x= X,ymin=0,ymax=y,color='firebrick',alpha=0.7,linewidth=20)
p1 = patches.Rectangle((0.57,0.03) #长方形所在的左下角的坐标x,y
                       , width=0.33 #长方形的宽（与x轴平行的那一段）
                       , height=0.07 #长方形的高（与y轴平行的那一段）
                       , alpha=0.1 #透明度
                       , facecolor='green' #矩形的颜色
                       , transform=fig.transFigure #进行对象的转换，fig.transFigure表示根据矩形所在的位置进行对象转换，以确保矩形显示在图像最上方
                      )
p2 = patches.Rectangle((0.124,0.03),
                       width = 0.446,height = 0.07,
                       alpha =0.1,facecolor='red',
                      transform = fig.transFigure
                      )                      
fig.add_artist(p1)
fig.add_artist(p2)
plt.show()

Python 关联Data python 关联图_数据_04

注：ax.add_patch作用的对象是ax,也就是坐标轴内的范围和标题/坐标轴的名称，但不包含这些背后的画布
ax.add_artist 功能同上
fig没有add_patch功能
fig.add_artist

分布图 Distribution

典型的分布图有：直方图、密度图、箱型图…
1、密度图 Density Plot
1>密度图是直方图的一种变形
2>密度图表示在不同取值区间里的概率密度
3>密度曲线下的总面积为1
4>密度图的纵坐标用于比较不同类别之间的相对值

data=np.random.randn(1000)
sns.kdeplot(data,
		shade =True,    #密度下方加阴影
		vertical = True,#控制密度轴向
		color='b',		
		alpha = 0.5,
		linewidth = 8,
		linestyle='-.')

Python 关联Data python 关联图_python_05

2.直方图

data=np.random.randn(1000)
plt.hist(data)

Python 关联Data python 关联图_可视化_06

3.直方密度曲线图 Density Curves with Histogram

x =np.random.randn(100)
sns.displot(x,
		bins=20,#直方图参数，把数据分成多少个箱子
		hist=True, #控制直方图显示的参数
		kde=True,  #控制密度曲线显示的参数
		hist_kws={'color':'g','histtype':'bar','alpha':0.4},#直方图控制参数
		kde_kws={'color':'r','linestyle':'--','linewidth':3,'alpha':0.7})#密度图控制参数

Python 关联Data python 关联图_坐标轴_07

组成图 Composition

典型的组成图有：饼图、树形图、华夫饼图…
1.华夫饼图

#绘图基本设置，适用于所有图
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import warnings; warnings.filterwarnings(action='once')

large = 22; med = 16; small = 12
params = {'axes.titlesize': large,
          'legend.fontsize': med,
          'figure.figsize': (16, 10),
          'axes.labelsize': med,
          'axes.titlesize': med,
          'xtick.labelsize': med,
          'ytick.labelsize': med,
          'figure.titlesize': large}
plt.rcParams.update(params)
plt.style.use('seaborn-whitegrid')
sns.set_style("white")
%matplotlib inline

from pywaffle import Waffle  #导入华夫饼库中的华夫饼模块

data={'Demo':32,'Re':24,'Libe':8} #三种类别
f = plt.figure(
			figsize =(10,6),
			FigureClass=Waffle,#图像的类型
			rows=5,#条形图在y轴上的起点
			values=data,#条形图在y轴上的终点
			colors=('b','r','g'),
			legend={'loc':'upper right','bbox_to_anchor':(1.5,1)})#loc表示图里的位置，bbox_to_anchor更精确的考虑位置

Python 关联Data python 关联图_坐标轴_08

时间序列Time Series

时间序列图使用折线图绘制的，衍生出了很多模型：
自回归模型（AR模型）、移动平均模型（MA模型）、自回归滑动平均模型（ARMA模型）、ARIMA模型
1、时间序列图 Time Series Plot

第一步：将文本型数据转为日期型数据

1>在导入数据集的时候设置Parse_dates

df = pd.read_csv('数据路径',parse_dates=['dates'])

2>数据导入之后更改数据类型 （pd.to_datetime()）

df1 = pd.read_csv('数据路径')
df1
df1['date']=pd.to_datetime(df1.date)

3>数据导入之后转换数据类型    datetime.datetime.strptime()

df2 = pd.read_csv('数据路径')
df2
import datetime as dt
df2['date'] = df2['date'].apply(lambda x:dt.datetime.strptime(x,'%Y-%m-%d'))

第二步：绘制时间序列图

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.xticks(range(df.shape[0])[::12],df.date.dt.year[::12])
plt.plot(df.value,color='b')
plt.grid(alpha=0.5)#配置网格线

Python 关联Data python 关联图_python_09

刻度标签稀疏化

xtick_location = df.index.tolist()[::12]  #每12个索引提取一个
xtick_labels = [x[:4] for x in df.date.tolist()[::12]] #df日期列每12个提取一个
#刻度标签
plt.xticks(ticks=xtick_location,
			labels=xtick_labels,#人为将x轴刻度稀疏化，并且设定年份作为刻度标签
			rotation=0,
			fontsize=12,
			horizontalalignment='center',#设定水平对齐方式，可算想有left/right/center
			alpha=0.7)#设定标签透明度

获取当前子图，并设定上边框透明度为0.3，弱化上边框

plt.gca().spines['top'].set_alpha(0.3)
plt.gca().spines['bottom'].set_alpha(0)#透明度为0，去掉了下边框

变化图 Change

1、带副坐标轴的折线图

import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import warnings; warnings.filterwarnings(action='once')

large = 22; med = 16; small = 12
params = {'axes.titlesize': large,
          'legend.fontsize': med,
          'figure.figsize': (16, 10),
          'axes.labelsize': med,
          'axes.titlesize': med,
          'xtick.labelsize': med,
          'ytick.labelsize': med,
          'figure.titlesize': large}
plt.rcParams.update(params)
plt.style.use('seaborn-whitegrid')
sns.set_style("white")
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif']=['Simhei'] 
plt.rcParams['axes.unicode_minus']=False
# 获取数据集
df = pd.read_csv("https://github.com/selva86/datasets/raw/master/economics.csv")
df.head()
x = df['date'] #横坐标
y1 = df['psavert']
y2 = df['unemploy']
#一行一列只生成一个画布和有一个子图
fig,ax1 = plt.subplots(1,1,figsize=(16,9),dpi=80)
ax1.plot(x,y1,color='tab:red')

ax2 = ax1.twinx()  #实例化共享相同x轴的第二个轴
ax2.plot(x,y2,color='tab:gray')

ax1.set_xlabel('年份',fontsize=12)
ax1.tick_params(axis='x',rotation=0,labelsize=12)#设置与标尺有关的参数
ax1.set_ylabel('数据',color='tab:red',fontsize =12)
ax1.tick_params(axis='y',rotation=0,labelcolor='tab:red')
ax1.grid(alpha=0.4)

ax2.set_ylabel('人数',color='tab:gray',fontsize=12)
ax2.tick_params(axis='y',labelcolor='tab:gray')
ax2.set_xticks(np.arange(0,len(x),30))#x轴坐标轴刻度
ax2.set_xticklabels(x[::30],rotation=90,fontdict={'fontsize':10})
fig.tight_layout()
plt.show()

Python 关联Data python 关联图_数据_10

2、不堆积的面积图

df = pd.read_csv("https://github.com/selva86/datasets/raw/master/economics.csv")
x = df['date'].values.tolist()
y1 = df['psavert'].values.tolist()
y2 = df['uempmed'].values.tolist()
mycolors = ['tab:red', 'tab:blue', 'tab:green', 'tab:orange', 'tab:brown', 'tab:grey', 'tab:pink', 'tab:olive']      
columns = ['数据', '中位数']
plt.fill_between(x,y1=y1,y2=0,#定义三个坐标
				label=columns[0],
				alpha=0.5,
				color=mycolors[0],
				linewidth=2
				)
#绘制横线
for y in np.arange(2.5, 30.0, 2.5): #一次只能绘制一条线，要循环绘制
	plt.hlines(y,xmin=0,xmax=len(x),colors='black',alpha=0.3,linestyles='--',lw=0.5)
;

Python 关联Data python 关联图_坐标轴_11

分组 Groups

1、安德鲁斯曲线 Andrews Curve

from pandas.plotting import andrews_curves #安德鲁斯曲线
andrews_curves(frame  #DataFrame数据
              ,class_column  #聚类依据的列
              ,ax            #子图对象，默认为None
              ,samples      #每条安德鲁斯曲线上的样本点数，默认200
              ,color        #颜色
              ,colormap)    #光谱

#示例
import pandas as pd 
import matplotlib.pyplot as plt
from pandas.plotting import andrews_curves  #安德鲁斯曲线
from sklearn.datasets import load_iris   #鸢尾花数据集
iris = load_iris()  #实例化数据集
X = pd.DataFrame(iris.data,columns = iris.feature_names) #提取特征并将其变为 DF格式 
y = pd.DataFrame(iris.target,columns=['species']) #提取标签并将其变为 DF格式
data = pd.concat([X,y],axis=1)  #将特征和标签拼接
andrews_curves(data,'species',colormap='Set1');

Python 关联Data python 关联图_数据_12

2、平行坐标 Parallel Coordinates

import pandas as pd
from pandas.plotting import parallel_coordinates   #平行坐标

parallel_coordinates(data   #DF格式的数据
					，class_columns #标签列
					,color=None  #颜色
					,colormap=None   #色谱
					,axvlines =True #控制是否添加垂直线的参数
					,sort_labels=False)#对class_column标签进行排序，在分配颜色是使用

#示例
from sklearn.datasets import load_iris
iris = load_iris()  #实例化数据集
X = pd.DataFrame(iris.data,columns = iris.feature_names) #提取特征并将其变为PDF格式 
y = pd.DataFrame(iris.target,columns=['species']) #提取标签并将其变PDF格式
data = pd.concat([X,y],axis=1)  #将特征和标签拼接

parallel_coordinates(data,'species',colormap='Set2');

Python 关联Data python 关联图_坐标轴_13