python dataframe后行与前一行比较 dataframe 第一行

转载

mob64ca14173efa 2023-12-27 11:02:37

文章标签 pandas 第一行 pandas 读取所有表头 pandas打印某一列 pandas批量修改列值 mvc 文章分类 Python 后端开发

Python Data Analysis Library 或 pandas 是基于NumPy 的一种工具，该工具是为了解决数据分析任务而创建的。简而言之是关于数据如何进行处理的库。

pandas数据读取

import pandas#用pandas读取数据，我的数据没有表头即用header=None,如不编写默认第一行为表头id_prop = pandas.read_csv("id_prop.csv",header=None)#DaraFrame是pandas的核心结构，相当于矩阵结构print(type(id_prop))#读进来数据每一个属性的类型print(id_prop.dtypes)#打印出前三行的数据，括号中不填三默认取前5行print(id_prop.head(3))#显示后4行print(id_prop.tail(4))#如果想知道读进来的数据每一列表示什么指标，即列名，由于我的数据没有列名，自动标为0和1print(id_prop.columns)#pandas中也可以看矩阵的维度,行为样本，列为属性指标print(id_prop.shape)

输出：

*************

0 object

1 float64

dtype: object

*************

0 1

0 mp-1025629 1.2732

1 mp-1026909 1.3755

2 mp-1027145 1.2888

*************

0 1

53672 mvc-9140 1.6519

53673 mvc-9141 0.8644

53674 mvc-9143 1.8924

53675 mvc-9147 1.1522

*************

Int64Index([0, 1], dtype='int64')

*************

(53676, 2)

pandas索引与计算

#pandas需要通过函数来调这个数据import pandasid_prop = pandas.read_csv("id_prop.csv")#取出导入数据的第一个数据,不能直接通过索引而需要用loc这个函数print(id_prop.loc[0])print("*************")#dataframe中常见的数据类型#object - string值#int - 整型值#float - 浮点值#datetime - 时间类型#bool - 布尔型#通过下面的代码可以知道所有数据的属性对应的数据类型#print(id_prop.dtypes)#切片---取第3,4,5,6行数据print(id_prop.loc[3:6])print("*************")#可以通过第一行的列名取得这个列--在数据第一行加了表头名0,1first_line = id_prop["0"]print(first_line)print("*************")#如果要取出两个列，先把列和列组成list，再把list传给DataFrame就可以取出来了columns = ["0","1"]line1_2 = id_prop[columns]print(line1_2)print("*************")#取出数据的所有列名组成一个listcol_names = id_prop.columns.tolist()print(col_names)#列取出来之后想乘用 *，除用 /,都是对列中所有值进行统一操作#添加列 id_prop["新建的列名"]=列值(行数必须一样)

输出：

0 mp-1025629

1 1.2732

Name: 0, dtype: object

*************

0 1

3 mp-1028422 0.6334

4 mp-1028589 0.9929

5 mp-1031256 1.1689

6 mp-1030165 0.8038

*************

0 mp-1025629

1 mp-1026909

2 mp-1027145

3 mp-1028422

4 mp-1028589

...

53671 mvc-913

53672 mvc-9140

53673 mvc-9141

53674 mvc-9143

53675 mvc-9147

Name: 0, Length: 53676, dtype: object

*************

0 1

0 mp-1025629 1.2732

1 mp-1026909 1.3755

2 mp-1027145 1.2888

3 mp-1028422 0.6334

4 mp-1028589 0.9929

... ... ...

53671 mvc-913 0.4539

53672 mvc-9140 1.6519

53673 mvc-9141 0.8644

53674 mvc-9143 1.8924

53675 mvc-9147 1.1522

[53676 rows x 2 columns]

*************

['0', '1']

pandas数据预处理

import pandasid_prop = pandas.read_csv("id_prop.csv")#对某一列的值进行排序，默认从小到大，inplace=True 会返回一个新的DataFrameid_prop.sort_values("1",inplace=True)print(id_prop["1"])print("*************")#从大到小的排序 指定升序为Falseid_prop.sort_values("1",inplace=True,ascending=False)print(id_prop["1"])

输出：

12594 0.0001

33181 0.0001

17654 0.0001

37641 0.0001

16652 0.0001

...

12953 9.0594

17985 9.3431

31560 9.3707

51254 9.7218

51402 10.0380

Name: 1, Length: 53676, dtype: float64

*************

51402 10.0380

51254 9.7218

31560 9.3707

17985 9.3431

12953 9.0594

...

27406 0.0001

9782 0.0001

46305 0.0001

47859 0.0001

12594 0.0001

Name: 1, Length: 53676, dtype: float64

#对缺失值进行处理#取出第1列first_line = id_prop["0"]#判断有误缺失值，如果有返回True，如果没有返回Falsenull = pd.isnull(first_line)#将生产的列(True or False)当作index传入，把为True的取出来，此处保留下来的True值都为缺失值null_true = first_line[null]#得到缺失值有多少个null_count = len(null_true)#打印有多少个缺失值print(null_count)#把缺失值的那一行去掉 dropna#new_data = id_prop.dropna(axis=0,subset=["某个标签"，“另一个标签”])#注意pivot_table#pandas.pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', fill_value=None, margins=False, dropna=True, margins_name='All', observed=False)#定位样本值#id_prop.loc[行号，“标签”]

axis=0,跨行，axis=1 跨列，

0值表示沿每一列或行标签、索引值向下执行方法。

1值表示沿每一行或列标签值执行对应方法。

pandas自定义函数

#可以把你的操作定义成一个函数def hundredth_row(column):    #取出第100条数据    hundredth_item = column.loc[99]    return hundredth_item#用apply这个函数，就会在当前DataFrame当中实现了hundredth_row = id_prop.apply(hundredth_row)print(hundredth_row)#当前每一个列的缺失值是多少def not_null_count(column):    column_null = pd.isnull(column)    null = column[column_null]    return len(null)column_null_count = id_prop.apply(not_null_count)print(column_null_count)

输出：

0 mp-28361

1 3.7051 #第100个值

dtype: object

0 0

1 0 #两列都没有缺失值

dtype: int64

下面是Numpy和Pandas的速查表

python dataframe后行与前一行比较 dataframe 第一行_mvc_02