Python文本速度 python处理文本数据

转载

jacksky 2023-06-19 21:40:17

文章标签 Python文本速度 python excel 数据 ide 文章分类 Python 后端开发

前言

一、openpyxl简介

二、实现

1.数据筛选

2.数据整理

总结

前言

手动筛选和整理大量的数据，耗时非常多。自学做了个脚本工具对txt文件进行信息筛选、整理及绘制参数变化曲线

一、openpyxl简介

openpyxl，Python库的一个模块，可实现xlsx格式文档读写操作。

二、实现

1.数据筛选

以IDxxx为关键字，采用非贪婪模式筛选包含指定信息的数据行，并以IDxxx为名称，以txt格式存储搜索到的结果。使用非贪婪模式是为了加快搜索速度。

示例：

数据格式 2020.12.31 ID1 X10 Y20 Z30，以空格为分隔符。为了在筛选ID1时，不会错误地把ID11的数据也选出来，搜索时使用ID1加上空格作为关键字，matchObj = re.match('.*?ID' + str(value) + ' ', line)。

#!/usr/bin/env python
# -*- coding: utf-8 -*-

# 导入字体、边框、颜色以及对齐方式相关库
import openpyxl #从excle里读/写数据
from openpyxl.styles import Font, Border, Side, PatternFill, colors, Alignment
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter 
from openpyxl.chart import (
    Series,
    LineChart,
    Reference,
)

from datetime import datetime
import time
import re
import os
import shutil   #删除目录

inFileName = "log.txt" 

def add_line_chart(title, wss, min_col, min_row, max_col, max_row, sheet):
    c1 = LineChart()
    c1.title = title    # 图的标题
    c1.style = 12       # 线条的style
    c1.y_axis.title = 'Battery/V'                   # y坐标的标题
    if 'IDC' not in title:
        c1.x_axis.number_format = 'h:mm:ss.000'     # 规定日期格式  这是月,年格式
        c1.x_axis.majorTimeUnit = "Seconds"         # 规定日期间隔 注意days；Months大写
    c1.x_axis.title = "Time"                        # x坐标的标题
    data = Reference(wss, min_col=min_col, min_row=min_row, max_col=max_col, max_row=max_row)  # 图像的数据 起始行、起始列、终止行、终止列
    c1.add_data(data, titles_from_data=True, from_rows=True)
    dates = Reference(wss, min_col=2, min_row=1, max_col=max_col)
    c1.set_categories(dates)
    wss.add_chart(c1, sheet)                        # 将图表添加到 sheet中
    
    
    

def set_cells(cells,type,color=None):
    aligncenter = Alignment(horizontal='center',vertical='center',wrap_text=True)  #居中
    sidestyle = Side(border_style='thin')
    border = Border(left=sidestyle,right=sidestyle,top=sidestyle,bottom=sidestyle)
    for i in cells:
        for j in i:
            if(type=='font'):
                j.font = Font(name='Times New Roman', size=12, bold=False, italic=False, color=colors.BLACK)
            elif(type=='bold'):
                j.style='Pandas'
            elif(type=='center'):
                j.alignment = aligncenter
            elif(type=='border'):
                j.border=border
            elif(type=='color'):
                j.fill = PatternFill("solid", fgColor=color)  #solid为样式

def main():
    print('----------------------------------')
    print('log analysis start:')
    print('  Log file name is [%s]\r\n' %(inFileName))
    
    with open(inFileName) as log:
        line = log.readline();
        
        folder_dir = os.getcwd() + '\\NODE_DATA_build'+ datetime.now().strftime("%Y%m%d") + '\\'

        #判断当前路径是否存在，没有则创建NODE_DATA_buildXXXXXXXX文件夹
        if not os.path.exists(folder_dir):     
            os.makedirs(folder_dir)
        else:
            while True:
                try:
                    prompt = int(input('''NODE_DATA already exist, so?
                    \r\n  1.Cover the old folder\r\n  2.Create a new folder\r\n  3.Exit
                    \r\n  Please enter a number(1-3):''').strip())
                    if prompt not in [1, 2, 3]:
                        print('[invalid]')
                        continue
                    if prompt == 1:
                        shutil.rmtree(folder_dir)
                        os.makedirs(folder_dir)
                        break
                    if prompt == 2:
                        folder_dir = os.getcwd() + '\\NODE_DATA_build'+ datetime.now().strftime("%Y%m%d_%H%M%S") + '\\'
                        os.makedirs(folder_dir)
                        break
                    if prompt == 3:
                        print('[Exit, see you next time]')
                        exit()
                except ValueError as e:
                    print('[Fail, enter number please [%s]]' % e)
                    exit()
                except (KeyboardInterrupt, EOFError):
                    print('[End, see you next time]')
                    exit()

        #ws=wb['Sheet']
        #ws.title='ID1'                        #修改名为Sheet1工作表名称

        #日志按设备ID分类存储
        while line:
            for value in range(1,255):
                matchObj = re.match('.*?ID' + str(value) + ' ', line);
                if  matchObj:
                    outFileName = folder_dir + 'ID' + str(value) + '.txt'
                    document = open(outFileName,"a+");
                    document.write(line);
                    document.close();
                    break;
                    
            line = log.readline();
    log.close();
        
    print('end')
    print('----------------------------------')

if __name__ == '__main__':
    main();

2.数据整理

将信息按一定规律筛选出来后，再对各txt文件中的数据进行整理到Excel表中、绘图。（可根据实际使用情况考虑是否存储中间数据。）

#按ID将数据存入Excel表中
    xlsx = openpyxl.Workbook()
    for value in range(1,255):
        if os.path.exists(folder_dir + 'ID' + str(value) + '.txt'):
            sheet = xlsx.create_sheet('ID' + str(value),0)

            #先写表头  1行 1列
            columns = ' ABCDEFGHIJKLMN'
            x = 1
            sheet.column_dimensions[columns[x]].width = 14
            sheet.cell(row=1, column = x, value="Info 1")
            x += 1
            sheet.column_dimensions[columns[x]].width = 14
            sheet.cell(row=1, column = x, value="Info 2")

            
            document = open(folder_dir + 'ID' + str(value) + '.txt',"r+")
            line = document.readline()
            x = 2
            while line:
                line = line.strip()
                line = line.strip('\n')
                line = re.split(r'[;,\s]\s*', line)
                length = len(line)
                for i in range(length):
                    #去掉首字母
                    while line[i][0].isalpha():
                        line[i] = line[i][1:]
                    #类型转换
                    if i < 2 or i > 12:
                        sheet.cell(row=x, column= i + 1, value = str(line[i]))
                    elif i >= 4 and i <= 5:
                        sheet.cell(row=x, column= i + 1, value = float(line[i]))
                    else:
                        sheet.cell(row=x, column= i + 1, value = int(line[i]))
                
                line = document.readline();
                x += 1  #另起一行
                
            rownum=str(sheet.max_row)                   #总行数
            letter=get_column_letter(sheet.max_column)  #最后一列的字母
            cells = sheet['A1:'+letter+rownum]          #全部单元格范围
            set_cells(cells,'center')                   #单元格居中
            set_cells(cells,'font')                     #单元格字体
            
            # 将整个表的行高设置为25
            sheet.row_dimensions.height = 25
            sheet.row_dimensions[1].height = 50
            #sheet.column_dimensions.width = 30
            
            
            # 构建LineChart对象，绘制曲线
            chart = LineChart()
            data = Reference(worksheet=sheet, min_row=1, max_row=rownum, min_col=5, max_col=5)
            #按列读取数据，第一列非标题列
            chart.add_data(data, from_rows=False, titles_from_data=True)
            sheet.add_chart(chart, 'O2')
            cats = Reference(worksheet=sheet, min_col=1, max_col=1, min_row=2, max_row=rownum)
            # chart为 chart = LineChart() 产生的对象
            chart.set_categories(cats)
            
            # chart为折线图对象
            chart.x_axis.title = "X"
            chart.y_axis.title = "Y"
            # 设置折线图样式
            chart.style = 24
            
            document.close();
    
    del xlsx['Sheet']
    xlsx.save(folder_dir + 'report.xlsx')   #保存表格