在Python环境下,利用百度AI开放平台文字识别技术,批量对表格类图片进行识别,生成Excel文件,并下载到指定文件夹内。
工具/原料
Python 3 for Windows
百度智能云账号/百度账号
OCR Python SDK
一、运行环境搭建
1安装Python 3。请自行下载安装,记住安装路径,后面需要用。
2注册并登录百度AI开放平台(http://ai.baidu.com/)控制台。
3创建文字识别应用,并记下该应用的AppID、API Key和Secret Key。
4安装OCR Python SDK。1、如果已安装pip,执行pip install baidu-aip即可。2、如果已安装setuptools,执行python setup.py install即可。下面我们使用方法1安装SDK。
5执行pip install baidu-aip安装OCR Python SDK。1、快捷键win+r,打开运行窗口;输入cmd,并确定,打开MS-DOS;2、使用cd命令,进入Python安装目录下Scripts文件夹(pip.exe在这个文件夹里面);3、执行pip install baidu-aip等待安装完毕(如因网络问题未成功,多执行几次即可)。
END
二、编写Python代码
1打开代码编辑器编写代码,可以用WINDOWS自带的记事本编写,保存后将".txt"后缀改成".py"。这里我们用Python自带的IDLE编写。
2运行流程:1、选择图片所在文件夹;2、选择输出文件夹;3、调用百度文字识别功能进行分析;4、查询是否处理完毕,如果处理完毕,获取成功后的".xls"连接地址,并下载。
3参考代码:(Python对代码行缩进要求比较高,复制下列的代码请注意这一点,参考上图)
# ----------------------------------------
# Python环境下百度Ocr表格批量识别
# Email:fryflying@outlook.com
# ----------------------------------------
import os #加载操作系统模块
from aip import AipOcr #调用百度Ocr模块
import requests #调用反馈模块
import time #调用时间模块
import tkinter as tk #调用GUI图形模块
from tkinter import filedialog
#KEY信息,请输入自己申请的应用信息
APP_ID = '1******5'
API_KEY = 'c****************************S'
SECRET_KEY = 'N**************************************G'
client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
#读取文件函数(返回读取结果)
def get_file_content(filePath):
with open(filePath, 'rb') as fp:
return fp.read()
#文件下载函数
def file_download(url, file_path):
r = requests.get(url)
with open(file_path, 'wb') as f:
f.write(r.content)
root = tk.Tk()
root.withdraw()
data_dir = filedialog.askdirectory(title='请选择图片文件夹')+'/' #用对话框选择图片存储文件夹
result_dir = filedialog.askdirectory(title='请选择输出文件夹')+'/' #自选输出文件夹
num = 0
for name in os.listdir(data_dir):
print('{0}: {1} 正在处理:'.format(num+1, name.split('.')[0]))
image = get_file_content(os.path.join(data_dir,name)) #调用读取图片子程序
res = client.tableRecognitionAsync(image) #调用表格文字识别
req_id = res['result'][0]['request_id'] #获取识别ID号
for count in range(1,10): #OCR识别也需要一定时间,设定10秒内每隔1秒查询一次
res = client.getTableRecognitionResult(req_id) #通过ID获取表格文件XLS地址
print(res['result']['ret_msg'])
if res['result']['ret_msg'] == '已完成':
break #云端处理完毕,成功获取表格文件下载地址,跳出循环
else:
time.sleep(1)
url = res['result']['result_data']
xls_name = name.split('.')[0] + '.xls'
file_download(url, os.path.join(result_dir, xls_name)) #调用文件下载子程序
num = num + 1
print('{0}: {1} 下载完成。'.format(num, xls_name))
time.sleep(1)
运行程序:
方法一:直接双击"***.py"运行文件;
方法二:右键"***.py"使用IDLE打开,按F5执行;(此方法方便调试)
三、测试结果
准备待测试图片
执行过程
执行结果
END
注意事项
百度表格文字识别每天50次免费,已经识别过的表格可不限次下载(保存代码里面获取的URL地址即可);
图片大小不超过4M,最短边至少15px,最长边最大4096px,支持jpg/png/bmp格式,推荐使用JPG格式。
图片中的表格四个角都要有,否则会输出结果会缺行或缺列。
图片中的表格线条横平竖直最好,如果是斜的,识别率会大大降低,扫描的时候需要摆正,如果是拍照,推荐使用扫描全能王APP。
python-使用百度AipOcr实现表格文字图片识别
代码运行环境:win10 python3.7
需要aip库,使用pip install baidu-aip即可
(1)目的
通过百度AipOcr库,来实现识别图片中的表格,并输出问表格文件。
(2)实现
# encoding: utf-8
import os
import requests
import time
import tkinter as tk
from tkinter import filedialog
from aip import AipOcr
# 定义常量,需要自己去百度智能云申请
APP_ID = '1*****'
API_KEY = 'L**************'
SECRET_KEY = '8**************'
# 初始化AipFace对象
client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
# 读取图片
def get_file_content(filePath):
with open(filePath, 'rb') as fp:
return fp.read()
#文件下载函数
def file_download(url, file_path):
r = requests.get(url)
with open(file_path, 'wb') as f:
f.write(r.content)
if __name__ == "__main__":
root = tk.Tk()
root.withdraw()
data_dir = filedialog.askdirectory(title='请选择图片文件夹') + '/'
result_dir = filedialog.askdirectory(title='请选择输出文件夹') + '/'
num = 0
for name in os.listdir(data_dir):
print ('{0} : {1} 正在处理:'.format(num+1, name.split('.')[0]))
image = get_file_content(os.path.join(data_dir, name))
res = client.tableRecognitionAsync(image)
req_id = res['result'][0]['request_id'] #获取识别ID号
for count in range(1,10): #OCR识别也需要一定时间,设定10秒内每隔1秒查询一次
res = client.getTableRecognitionResult(req_id) #通过ID获取表格文件XLS地址
print(res['result']['ret_msg'])
if res['result']['ret_msg'] == '已完成':
break #云端处理完毕,成功获取表格文件下载地址,跳出循环
else:
time.sleep(1)
url = res['result']['result_data']
xls_name = name.split('.')[0] + '.xls'
file_download(url, os.path.join(result_dir, xls_name))
num += 1
print ('{0} : {1} 下载完成。'.format(num, xls_name))
time.sleep(1)
(3)实现效果
识别的表格图片为:
实现的效果为(注:表格的格式人为调整过,但内容没人为修改):
可以看出,识别的精度还是很高的,只有“Fellow”识别为了“Fel1low”。
(4)其它
百度API批量识别表格
安装OCR Python SDK
支持Python版本:2.7.+ ,3.+
安装使用Python SDK有如下方式:
如果已安装pip,执行pip install baidu-aip即可。
如果已安装setuptools,执行python setup.py install即可。
新建AipOcr
参考如下代码新建一个AipOcr:
from aip import AipOcr
""" 你的 APPID AK SK """
APP_ID = '你的 App ID'
API_KEY = '你的 Api Key'
SECRET_KEY = '你的 Secret Key'
client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
简单代码实现
import os
from aip import AipOcr
import requests
import time
""" 你的 APPID AK SK """
APP_ID = '1530xxxx'
API_KEY = 'fXp9dxxxxxxxxxxxxxxxxxxx'
SECRET_KEY = 'Ltvxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
""" 读取图片 """
def get_file_content(filePath):
with open(filePath, 'rb') as fp:
return fp.read()
"""文件下载"""
def file_download(url, name):
res = requests.get(url)
with open(name+'.xls', 'wb') as f:
f.write(res.content)
filelist = os.listdir('./规划情况表') #设定图片目录
for file in filelist:
image = get_file_content('./规划情况表/' + file)
res = client.tableRecognitionAsync(image)
requestId = res['result'][0]['request_id']
while True:
time.sleep(3)
res2=client.getTableRecognitionResult(requestId)
msg=res2['result']['ret_msg']
if msg == '已完成':
url = res2['result']['result_data']
file_download(url, file[:-4])
print(file + ' down')
break
else:
time.sleep(1)
利用Python和百度开放API接口,写一个文字或表格识别软件
代码中GUI绘制用的是PYQT5及相关的模块,代码测试好后用pyinstaller打包成exe使用
打开后输入app_id,api_key,api_secret后选择文件,然后选择图片开始识别,推荐使用高精度的,准确率较高,但是每日免费次数较少
成品自带我自己的免费接口,等哪天我想起来打开百度开放API的时候就关了吧
表格识别因为百度接口原因只能在识别结果网址中下载打开
# -*- coding: utf-8 -*-
# Form implementation generated from reading ui file 'ocr.ui'
#
# Created by: PyQt5 UI code generator 5.11.3
# WARNING! All changes made in this file will be lost!
import sys
from PyQt5 import QtCore, QtGui, QtWidgets
from PyQt5.QtWidgets import *
from aip import AipOcr
def get_file(filePath):
with open(filePath, 'rb') as fp:
return fp.read()
def message(Title,Text):
message = QMessageBox()
message.addButton(QPushButton('确定'), QMessageBox.YesRole)
message.setWindowTitle(Title)
message.setText(Text)
message.exec_()
class Ui_MainWindow(object):
def setupUi(self, MainWindow):
self.mod = 0
MainWindow.setObjectName("MainWindow")
MainWindow.resize(1024,720)
self.centralwidget = QtWidgets.QWidget(MainWindow)
self.centralwidget.setObjectName("centralwidget")
self.gridLayout = QtWidgets.QGridLayout(self.centralwidget)
self.gridLayout.setObjectName("gridLayout")
self.textEdit = QtWidgets.QTextEdit(self.centralwidget)
self.textEdit.setObjectName("result")
self.gridLayout.addWidget(self.textEdit,0,0,50,1)
self.textEdit_2 = QtWidgets.QTextEdit(self.centralwidget)
self.gridLayout.addWidget(self.textEdit_2,0,1,1,3)
self.textEdit_2.setObjectName("filepath")
self.textEdit_2.isReadOnly()
self.textEdit_3 = QtWidgets.QTextEdit(self.centralwidget)
self.gridLayout.addWidget(self.textEdit_3,3,1,1,3)
self.textEdit_3.setObjectName("url")
self.pushButton = QtWidgets.QPushButton(self.centralwidget)
self.gridLayout.addWidget(self.pushButton,2,2,1,1)
self.pushButton.setObjectName("choose_filepath")
self.pushButton_2 = QtWidgets.QPushButton(self.centralwidget)
self.gridLayout.addWidget(self.pushButton_2,49,3,1,1)
self.pushButton_2.setObjectName("start_OCR")
self.pushButton_3 = QtWidgets.QPushButton(self.centralwidget)
self.gridLayout.addWidget(self.pushButton_3,4,2,1,1)
self.pushButton_3.setObjectName("set_url")
self.pushButton_4 = QtWidgets.QPushButton(self.centralwidget)
self.gridLayout.addWidget(self.pushButton_4,49,1,1,1)
self.pushButton_4.setObjectName("set_url")
self.textEdit_4 = QtWidgets.QTextEdit(self.centralwidget)
self.gridLayout.addWidget(self.textEdit_4,5,1,1,2)
self.textEdit_4.setObjectName("app_id")
self.label = QtWidgets.QLabel(self.centralwidget)
self.gridLayout.addWidget(self.label,5,3,1,1)
self.label.setObjectName("set_app_id")
self.textEdit_5 = QtWidgets.QTextEdit(self.centralwidget)
self.gridLayout.addWidget(self.textEdit_5,6,1,1,2)
self.textEdit_5.setObjectName("app_id")
self.label_2 = QtWidgets.QLabel(self.centralwidget)
self.gridLayout.addWidget(self.label_2,6,3,1,1)
self.label_2.setObjectName("set_api_key")
self.textEdit_6 = QtWidgets.QTextEdit(self.centralwidget)
self.gridLayout.addWidget(self.textEdit_6,7,1,1,2)
self.textEdit_6.setObjectName("secret_key")
self.label_3 = QtWidgets.QLabel(self.centralwidget)
self.gridLayout.addWidget(self.label_3,7,3,1,1)
self.label_3.setObjectName("set_secret")
self.radiobutton = QtWidgets.QRadioButton(self.centralwidget)
self.gridLayout.addWidget(self.radiobutton,8,1,1,1)
self.radiobutton.setChecked(True)
self.radiobutton_2 = QtWidgets.QRadioButton(self.centralwidget)
self.gridLayout.addWidget(self.radiobutton_2,8,2,1,1)
self.radiobutton_3 = QtWidgets.QRadioButton(self.centralwidget)
self.gridLayout.addWidget(self.radiobutton_3,9,1,1,1)
self.radiobutton_4 = QtWidgets.QRadioButton(self.centralwidget)
self.gridLayout.addWidget(self.radiobutton_4,9,2,1,1)
MainWindow.setCentralWidget(self.centralwidget)
self.statusbar = QtWidgets.QStatusBar(MainWindow)
self.statusbar.setObjectName("statusbar")
MainWindow.setStatusBar(self.statusbar)
self.retranslateUi(MainWindow)
QtCore.QMetaObject.connectSlotsByName(MainWindow)
self.pushButton.setFont(QtGui.QFont("华文新魏", 15))
self.pushButton_2.setFont(QtGui.QFont("华文新魏", 15))
self.pushButton_3.setFont(QtGui.QFont("华文新魏", 15))
self.pushButton_4.setFont(QtGui.QFont("华文新魏", 15))
self.textEdit.setFont(QtGui.QFont("", 10))
self.textEdit.setText("识别前请填入app_id、api_key和secret_key\r\n"
"填入完成后选择本地文件或者输入网址\r\n"
"文字识别完成后会显示在这里\r\n"
"图片识别完成后是一个链接,打开下载到本地用Excel打开即可\r\n")
self.textEdit_2.setFont(QtGui.QFont("华文新魏", 10))
self.textEdit_2.setText("等待选择文件")
self.textEdit_3.setFont(QtGui.QFont("华文新魏", 10))
self.textEdit_3.setText("等待输入网址")
self.textEdit_4.setFont(QtGui.QFont("华文新魏", 10))
self.textEdit_4.setText("")#app_id
self.textEdit_5.setFont(QtGui.QFont("华文新魏", 10))
self.textEdit_5.setText("")#api_key
self.textEdit_6.setFont(QtGui.QFont("华文新魏", 10))
self.textEdit_6.setText("")#api_secert
self.label.setText("APP_ID")
self.label.setFont(QtGui.QFont("华文新魏",15))#设置字体样式
self.label.setAlignment(QtCore.Qt.AlignCenter)#居中
self.label_2.setText("API_KEY")
self.label_2.setFont(QtGui.QFont("华文新魏", 15)) # 设置字体样式
self.label_2.setAlignment(QtCore.Qt.AlignCenter) # 居中
self.label_3.setText("SECRET")
self.label_3.setFont(QtGui.QFont("华文新魏", 15)) # 设置字体样式
self.label_3.setAlignment(QtCore.Qt.AlignCenter) # 居中
self.radiobutton.setText("文字识别")
self.radiobutton.setFont(QtGui.QFont("华文新魏", 13))
self.radiobutton_2.setText("文字识别(高精度)")
self.radiobutton_2.setFont(QtGui.QFont("华文新魏", 13))
self.radiobutton_3.setText("表格识别")
self.radiobutton_3.setFont(QtGui.QFont("华文新魏", 13))
self.radiobutton_4.setText("表格识别(高精度)")
self.radiobutton_4.setFont(QtGui.QFont("华文新魏", 13))
self.pushButton.clicked.connect(self.choosefile)
self.pushButton_2.clicked.connect(self.start)
self.pushButton_3.clicked.connect(self.seturl)
self.pushButton_4.clicked.connect(self.clear)
def retranslateUi(self, MainWindow):
_translate = QtCore.QCoreApplication.translate
MainWindow.setWindowTitle(_translate("MainWindow", "Simple OCR"))
self.pushButton.setText(_translate("MainWindow", "选择文件"))
self.pushButton_2.setText(_translate("MainWindow", "开始识别"))
self.pushButton_3.setText(_translate("MainWindow","输入网址"))
self.pushButton_4.setText(_translate("MainWindow", "清空"))
def choosefile(self):
self.filename,self.filetype = QFileDialog.getOpenFileName(directory="C:\\Users\\Administrator\\Desktop",filter="JPG Files (*.jpg);;PNG Files (*.png);;BMP Files (*.bmp)")
self.textEdit_2.setText(self.filename)
self.filepath = self.textEdit_2.toPlainText()
self.filepath.replace("/","\\")
self.mod = 1
self.textEdit_3.setText("等待输入网址")
def start(self):
messageBox = QMessageBox()
if self.textEdit_4.toPlainText() == "等待输入APP_ID" or self.textEdit_5.toPlainText() == "等待输入API_KEY" or self.textEdit_6.toPlainText() == "等待输入SECRET_KEY":
message('警告','请输入正确的信息')
else:
self.app_id = self.textEdit_4.toPlainText()
self.api_key = self.textEdit_5.toPlainText()
self.secert = self.textEdit_6.toPlainText()
self.client = AipOcr(self.app_id,self.api_key,self.secert)
if self.radiobutton.isChecked():
#文字识别
if self.mod == 1:
self.img = get_file(self.filepath)
self.result = self.client.basicGeneral(self.img)
self.words = self.result["words_result"]
self.textEdit.setText("")
self.content = ""
for self.word in self.words:
self.content = self.content + self.word["words"] + "\r\n"
self.textEdit.insertPlainText(self.content)
elif self.mod == 2:
self.result = self.client.basicGeneralUrl(self.url)
self.words = self.result["words_result"]
self.textEdit.setText("")
self.content = ""
for self.word in self.words:
self.content = self.content + self.word["words"] + "\r\n"
self.textEdit.insertPlainText(self.content)
else:
message('警告','请选择文件或输入网址')
elif self.radiobutton_2.isChecked():
#文字识别(高精度)
if self.mod == 1:
self.img = get_file(self.filepath)
self.result = self.client.basicAccurate(self.img)
self.words = self.result["words_result"]
self.textEdit.setText("")
self.content = ""
for self.word in self.words:
self.content = self.content + self.word["words"] + "\r\n"
self.textEdit.insertPlainText(self.content)
elif self.mod == 2:
message('警告','该模式不支持网络图片\r\n请选择本地图片')
else:
message('警告','请选择文件或输入网址')
elif self.radiobutton_3.isChecked():
if self.mod == 1:
self.img = get_file(self.filepath)
self.result = self.client.tableRecognition(self.img)["result"]
self.result_data = self.result["result_data"]
self.textEdit.setText("网址为: "+ self.result_data + "\r\n请下载")
elif self.mod == 2:
message('警告','该模式不支持网络图片\r\n请选择本地图片')
else:
message('警告','请选择文件或输入网址')
#表格识别
elif self.radiobutton_4.isChecked():
message('警告','该模式不可用')
def seturl(self):
if self.textEdit_3.toPlainText() == "等待输入网址":
message('警告','请输入网址')
else:
self.url = self.textEdit_3.toPlainText()
self.mod = 2
self.textEdit_2.setText("等待选择文件")
def clear(self):
self.textEdit.setText("识别前请填入app_id、api_key和secret_key\r\n"
"填入完成后选择本地文件或者输入网址\r\n"
"文字识别完成后会显示在这里\r\n"
"图片识别完成后是一个链接,打开下载到本地用Excel打开即可\r\n")
if __name__ == '__main__':
app = QtWidgets.QApplication(sys.argv)
mainwindow = QtWidgets.QMainWindow()
ui = Ui_MainWindow()
ui.setupUi(mainwindow)
mainwindow.show()
sys.exit(app.exec_())