import os
import pytesseract
from PIL import Image

# 设置 pytesseract 路径
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

# 获取pic目录下的所有图片文件
pic_dir = './pic'
pic_files = [f for f in os.listdir(pic_dir) if f.endswith('.jpg')]

# 遍历所有图片文件并识别文字
with open('out.txt', 'w', encoding='utf-8') as f:
    for pic_file in pic_files:
        # 打开图片
        img_path = os.path.join(pic_dir, pic_file)
        try:
            with Image.open(img_path) as img:
                # 识别文字
                text = pytesseract.image_to_string(img, lang='eng')
                # 保存文字
                f.write(text)
        except Exception as e:
            print(f"图片 {img_path} 处理出错:{e}")

print('文字提取完成')