目录
前言
一、openpyxl简介
二、实现
1.数据筛选
2.数据整理
总结
前言
手动筛选和整理大量的数据,耗时非常多。自学做了个脚本工具对txt文件进行信息筛选、整理及绘制参数变化曲线
一、openpyxl简介
openpyxl,Python库的一个模块,可实现xlsx格式文档读写操作。
二、实现
1.数据筛选
以IDxxx为关键字,采用非贪婪模式筛选包含指定信息的数据行,并以IDxxx为名称,以txt格式存储搜索到的结果。使用非贪婪模式是为了加快搜索速度。
示例:
数据格式 2020.12.31 ID1 X10 Y20 Z30,以空格为分隔符。为了在筛选ID1时,不会错误地把ID11的数据也选出来,搜索时使用ID1加上空格作为关键字,matchObj = re.match('.*?ID' + str(value) + ' ', line)。
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# 导入字体、边框、颜色以及对齐方式相关库
import openpyxl #从excle里读/写数据
from openpyxl.styles import Font, Border, Side, PatternFill, colors, Alignment
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter
from openpyxl.chart import (
Series,
LineChart,
Reference,
)
from datetime import datetime
import time
import re
import os
import shutil #删除目录
inFileName = "log.txt"
def add_line_chart(title, wss, min_col, min_row, max_col, max_row, sheet):
c1 = LineChart()
c1.title = title # 图的标题
c1.style = 12 # 线条的style
c1.y_axis.title = 'Battery/V' # y坐标的标题
if 'IDC' not in title:
c1.x_axis.number_format = 'h:mm:ss.000' # 规定日期格式 这是月,年格式
c1.x_axis.majorTimeUnit = "Seconds" # 规定日期间隔 注意days;Months大写
c1.x_axis.title = "Time" # x坐标的标题
data = Reference(wss, min_col=min_col, min_row=min_row, max_col=max_col, max_row=max_row) # 图像的数据 起始行、起始列、终止行、终止列
c1.add_data(data, titles_from_data=True, from_rows=True)
dates = Reference(wss, min_col=2, min_row=1, max_col=max_col)
c1.set_categories(dates)
wss.add_chart(c1, sheet) # 将图表添加到 sheet中
def set_cells(cells,type,color=None):
aligncenter = Alignment(horizontal='center',vertical='center',wrap_text=True) #居中
sidestyle = Side(border_style='thin')
border = Border(left=sidestyle,right=sidestyle,top=sidestyle,bottom=sidestyle)
for i in cells:
for j in i:
if(type=='font'):
j.font = Font(name='Times New Roman', size=12, bold=False, italic=False, color=colors.BLACK)
elif(type=='bold'):
j.style='Pandas'
elif(type=='center'):
j.alignment = aligncenter
elif(type=='border'):
j.border=border
elif(type=='color'):
j.fill = PatternFill("solid", fgColor=color) #solid为样式
def main():
print('----------------------------------')
print('log analysis start:')
print(' Log file name is [%s]\r\n' %(inFileName))
with open(inFileName) as log:
line = log.readline();
folder_dir = os.getcwd() + '\\NODE_DATA_build'+ datetime.now().strftime("%Y%m%d") + '\\'
#判断当前路径是否存在,没有则创建NODE_DATA_buildXXXXXXXX文件夹
if not os.path.exists(folder_dir):
os.makedirs(folder_dir)
else:
while True:
try:
prompt = int(input('''NODE_DATA already exist, so?
\r\n 1.Cover the old folder\r\n 2.Create a new folder\r\n 3.Exit
\r\n Please enter a number(1-3):''').strip())
if prompt not in [1, 2, 3]:
print('[invalid]')
continue
if prompt == 1:
shutil.rmtree(folder_dir)
os.makedirs(folder_dir)
break
if prompt == 2:
folder_dir = os.getcwd() + '\\NODE_DATA_build'+ datetime.now().strftime("%Y%m%d_%H%M%S") + '\\'
os.makedirs(folder_dir)
break
if prompt == 3:
print('[Exit, see you next time]')
exit()
except ValueError as e:
print('[Fail, enter number please [%s]]' % e)
exit()
except (KeyboardInterrupt, EOFError):
print('[End, see you next time]')
exit()
#ws=wb['Sheet']
#ws.title='ID1' #修改名为Sheet1工作表名称
#日志按设备ID分类存储
while line:
for value in range(1,255):
matchObj = re.match('.*?ID' + str(value) + ' ', line);
if matchObj:
outFileName = folder_dir + 'ID' + str(value) + '.txt'
document = open(outFileName,"a+");
document.write(line);
document.close();
break;
line = log.readline();
log.close();
print('end')
print('----------------------------------')
if __name__ == '__main__':
main();
2.数据整理
将信息按一定规律筛选出来后,再对各txt文件中的数据进行整理到Excel表中、绘图。(可根据实际使用情况考虑是否存储中间数据。)
#按ID将数据存入Excel表中
xlsx = openpyxl.Workbook()
for value in range(1,255):
if os.path.exists(folder_dir + 'ID' + str(value) + '.txt'):
sheet = xlsx.create_sheet('ID' + str(value),0)
#先写表头 1行 1列
columns = ' ABCDEFGHIJKLMN'
x = 1
sheet.column_dimensions[columns[x]].width = 14
sheet.cell(row=1, column = x, value="Info 1")
x += 1
sheet.column_dimensions[columns[x]].width = 14
sheet.cell(row=1, column = x, value="Info 2")
document = open(folder_dir + 'ID' + str(value) + '.txt',"r+")
line = document.readline()
x = 2
while line:
line = line.strip()
line = line.strip('\n')
line = re.split(r'[;,\s]\s*', line)
length = len(line)
for i in range(length):
#去掉首字母
while line[i][0].isalpha():
line[i] = line[i][1:]
#类型转换
if i < 2 or i > 12:
sheet.cell(row=x, column= i + 1, value = str(line[i]))
elif i >= 4 and i <= 5:
sheet.cell(row=x, column= i + 1, value = float(line[i]))
else:
sheet.cell(row=x, column= i + 1, value = int(line[i]))
line = document.readline();
x += 1 #另起一行
rownum=str(sheet.max_row) #总行数
letter=get_column_letter(sheet.max_column) #最后一列的字母
cells = sheet['A1:'+letter+rownum] #全部单元格范围
set_cells(cells,'center') #单元格居中
set_cells(cells,'font') #单元格字体
# 将整个表的行高设置为25
sheet.row_dimensions.height = 25
sheet.row_dimensions[1].height = 50
#sheet.column_dimensions.width = 30
# 构建LineChart对象,绘制曲线
chart = LineChart()
data = Reference(worksheet=sheet, min_row=1, max_row=rownum, min_col=5, max_col=5)
#按列读取数据,第一列非标题列
chart.add_data(data, from_rows=False, titles_from_data=True)
sheet.add_chart(chart, 'O2')
cats = Reference(worksheet=sheet, min_col=1, max_col=1, min_row=2, max_row=rownum)
# chart为 chart = LineChart() 产生的对象
chart.set_categories(cats)
# chart为折线图对象
chart.x_axis.title = "X"
chart.y_axis.title = "Y"
# 设置折线图样式
chart.style = 24
document.close();
del xlsx['Sheet']
xlsx.save(folder_dir + 'report.xlsx') #保存表格
总结
openpyxl模块功能比较强大,可根据需要查询接口说明。此脚本也是参考别人的贴,然后一个一个小部分组合,实现自己想要的功能。比较粗糙,仅供参考