本篇文章简单介绍一下python对csv文件的常见操作,由于代码注释写的很清楚,所以文字解释相对较少。
第一种:利用python基本模块操作
'''
利用python普通模块是实现读写csv文件
步骤1:取出A文件的第一行,去除空格,换行符等符号
步骤2:将A文件的第一行保存到一个列表中,然后写入到B文件中
步骤3:依次循环A文件后面的各行,然后写入到B文件中
'''
import sys
input_file='E:\\studytest\\checking.csv'
output_file='E:\\studytest\\checking111.csv'
with open(input_file,'r',newline='') as filereader:
with open(output_file,'w',newline='') as filewrite:
header=filereader.readline()
header=header.strip()
header_list=header.split(',')
print(header_list)
filewrite.write(','.join(map(str,header_list))+'\n')
for row in filereader:
row=row.strip()
row_list=row.split(',')
print(row_list)
filewrite.write(','.join(map(str,row_list))+'\n')
第二种:利用python中csv模块操作:
import sys
import csv
input_file='E:\\studytest\\checking.csv'
output_file='E:\\studytest\\checking11122.csv'
# input_file=sys.argv[1]
# output_file=sys.argv[2]
with open(input_file,'r',newline='') as csv_in_file:
with open(output_file,'w',newline='') as csv_out_file:
filereader=csv.reader(csv_in_file,delimiter=',')#将csv文件的每行以列表的形式返回
filewriter=csv.writer(csv_out_file,delimiter=',')#创建一个写入对象,delimiter是默认分隔符
for row_list in filereader:
print(row_list)
filewriter.writerow(row_list)
第三种:利用python中csv模块筛选特定的行
'''
利用python中csv模块读写文件(筛选特定的行)
固定格式:
for row in filereader:
***if value in row meets some business tule or set of rules:***
do something
else:
do something else
'''
import sys
import csv
input_file='E:\\studytest\\data.csv'
output_file='E:\\studytest\\data111.csv'
# input_file=sys.argv[1]
# output_file=sys.argv[2]
with open(input_file,'r',newline='') as csv_in_file:
with open(output_file,'w',newline='') as csv_out_file:
filereader=csv.reader(csv_in_file)#将csv文件的每行以列表的形式返回
filewriter=csv.writer(csv_out_file)#创建一个写入对象,delimiter是默认分隔符
#先处理第一行
header=next(filereader)
filewriter.writerow(header)
#再处理剩余所有行
for row_list in filereader:
supplier=str(row_list[0]).strip()
cost=str(row_list[3]).strip("$").replace(',','')
if supplier == 'Supplier Z' or float(cost)>600.00:
filewriter.writerow(row_list)
第四种:利用python中csv模块筛选特定的列:
'''
利用python中csv模块读写文件(筛选特定的列)
'''
import sys
import csv
input_file='E:\\studytest\\data.csv'
output_file='E:\\studytest\\data_column.csv'
# input_file=sys.argv[1]
# output_file=sys.argv[2]
'''第一种:根据列索引值选出特定的列'''
my_columns = [0,3]#限定我们需要的特定列
with open(input_file,'r',newline='') as csv_in_file:
with open(output_file,'w',newline='') as csv_out_file:
filereader=csv.reader(csv_in_file)#将csv文件的每行以列表的形式返回
filewriter=csv.writer(csv_out_file)#创建一个写入对象,delimiter是默认分隔符
#处理所有行
for row_list in filereader:#循环每一行
row_list_output = []
for index_value in my_columns:
row_list_output.append(row_list[index_value])#取出特定列的数据,并封装成列表
filewriter.writerow(row_list_output)#写入文件
'''
利用python中csv模块读写文件(筛选特定的列)
'''
import sys
import csv
input_file='E:\\studytest\\data.csv'
output_file='E:\\studytest\\data_column.csv'
# input_file=sys.argv[1]
# output_file=sys.argv[2]
'''第二种:根据列标题选出特定的列
注:方法和第一种的区别在在于,先根据列标题找出列索引,然后在运用第一种方法读写文件,好处是列标题是固定的
'''
my_columns = ['Invoice Number','Purchase Date']
my_columns_index = []
with open(input_file,'r',newline='') as csv_in_file:
with open(output_file,'w',newline='') as csv_out_file:
filereader=csv.reader(csv_in_file)#将csv文件的每行以列表的形式返回
filewriter=csv.writer(csv_out_file)#创建一个写入对象,delimiter是默认分隔符
header=next(filereader,None)
for index_value in range(len(header)):
if header[index_value] in my_columns:
my_columns_index.append(index_value)
filewriter.writerow(my_columns)
for row_list in filereader:
row_list_output = []
for index_value in my_columns_index:
row_list_output.append(row_list[index_value])
filewriter.writerow(row_list_output)
第五种:选取连续的行
import sys
import csv
'''
选取连续的行
'''
input_file='E:\\studytest\\data.csv'
output_file='E:\\studytest\\data_other1.csv'
row_counter = 0
with open(input_file,'r',newline='') as csv_in_file:
with open(output_file,'w',newline='') as csv_out_file:
filereader=csv.reader(csv_in_file)
filewriter=csv.writer(csv_out_file)
for row in filereader:
#选取3-15行
if row_counter >= 3 and row_counter <= 15:
filewriter.writerow([value.strip() for value in row])
row_counter += 1
第六种:添加标题列:
import sys
import csv
'''
添加标题行
'''
input_file='E:\\studytest\\data_notitle.csv'
output_file='E:\\studytest\\data_other2.csv'
with open(input_file,'r',newline='') as csv_in_file:
with open(output_file,'w',newline='') as csv_out_file:
filereader=csv.reader(csv_in_file)
filewriter=csv.writer(csv_out_file)
header_list = ['Title1','Title2','Title3','Title4','Title5']
filewriter.writerow(header_list)
for row in filereader:
filewriter.writerow(row)
第七种:对多个csv文件操作:
import sys
import csv
'''
读取多个CSV文件
1、文件记数和行列记数
'''
import glob
import os
#存放多个文件的文件夹目录,文件命令格式相似
# input_path='E:\\studytest\\multile'
# file_counter = 0
# #循环文件夹中所有文件
# for input_file in glob.glob(os.path.join(input_path,'data*')):
# row_counter = 1
# with open(input_file,'r',newline='') as csv_in_file:
# filereader = csv.reader(csv_in_file)
# header=next(filereader,None)
# for row in filereader:
# row_counter += 1
# print('{0!s}:\t{1:d} rows \t{2:d} columns'.format(os.path.basename(input_file),row_counter,len(header)))
# file_counter += 1;
# print('Number of files:{0:d}'.format(file_counter))
'''
2、将多个文件的数据放到一个文件中
'''
# input_path='E:\\studytest\\multile'
# output_file='E:\\studytest\\multile\\count_data.csv'
# first_file = True
# for input_file in glob.glob(os.path.join(input_path,'data*')):
# print(os.path.basename(input_file))
# with open(input_file,'r',newline='') as csv_in_file:
# with open(output_file,'a',newline='') as csv_out_file:
# filereader = csv.reader(csv_in_file)
# filewriter = csv.writer(csv_out_file)
# #如果是第一次,就全部添加
# if first_file:
# for row in filereader:
# filewriter.writerow(row)
# first_file = False
# #如果不是第一次,只添加除了标题外的其他行
# else:
# header = next(filereader,None)
# for row in filereader:
# filewriter.writerow(row)
'''
3、计算每个文件中值的总和和平均数
'''
input_path='E:\\studytest\\multile'
output_file='E:\\studytest\\multile\\count_average_data.csv'
#构造结果文件的标题列
output_header_list=['file_name','total_sales','average_sales']
csv_out_file=open(output_file,'a',newline='')
filewriter=csv.writer(csv_out_file)
filewriter.writerow(output_header_list)
#循环文件夹中每个文件
for input_file in glob.glob(os.path.join(input_path,'data*')):
with open(input_file,'r',newline='') as csv_in_file:
#读取文件
filereader = csv.reader(csv_in_file)
#构造插入数据格式['file_name','total_sales','average_sales']
output_list=[]
#插入文件名
output_list.append(os.path.basename(input_file))
header = next(filereader)
total_sales = 0.0
number_of_sales = 0.0
#循环除标题行的每一行
for row in filereader:
#获得总销售额
sale_count = row[3]
total_sales += float(str(sale_count).strip('$').replace(',',''))
#获取销售条数
number_of_sales += 1
#获取平均销售额
average_sales = '{0:.2f}'.format(total_sales / number_of_sales)
#插入总销售额
output_list.append(total_sales)
#插入平均销售额
output_list.append(average_sales)
#插入结果问价
filewriter.writerow(output_list)
csv_out_file.close()
由于是基础,对比图片就不贴出来了,大家使用代码的时候,只需要将涉及到文件名和路径的参数改一下就行了。