自动标注生词

原创
一切从心 2023-12-18 19:13:25 ©著作权
©著作权归作者所有：来自51CTO博客作者一切从心的原创作品，请联系作者获取转载授权，否则将追究法律责任
import multiprocessing
import os
import re
import time
import sqlite3
import ebooklib
from ebooklib import epub
import mobi
import PyPDF2
import docx
from docx import Document
from bs4 import BeautifulSoup
import html2text
import tkinter as tk
import tkinter.scrolledtext
from tkinter import ttk, filedialog, messagebox
import Pmw
from tqdm import tqdm
import threading
import shutil
from collections import Counter
import asyncio
import timeit
import concurrent.futures
from concurrent.futures import ThreadPoolExecutor
from queue import Queue

win = tk.Tk() #生成主窗口
win.title('安远英语')
win.geometry('1100x700')
win.resizable(False, False)
iTime = time.strftime('%Y_%m_%d_%H%M%S',time.localtime())
iDay = time.strftime('%Y%m%d',time.localtime())
notebook = ttk.Notebook()
Generation = tk.Frame()
Power = tk.Frame()
Import = tk.Frame()
Exam = tk.Frame()
Note = tk.Frame()
notebook.add(Generation, text='生成')
notebook.add(Power, text='力量')
notebook.add(Import, text='导入')
notebook.add(Exam, text='考场')
notebook.add(Note, text='批注')
notebook.pack(padx=10, pady=5, fill=tk.BOTH, expand=True)

#------------------两个主显示窗口
leftWindow = tk.scrolledtext.ScrolledText(width=60, height=23, font='1', foreground='blue')
leftWindow.place(x=20, y=125)
rightWindow = tkinter.scrolledtext.ScrolledText(width=40, height=20,font='1',foreground='blue')
rightWindow.place(x=650,y=185)

#------------------登录数据库标准函数
def loginDB():
    conn = sqlite3.connect('anyuandic.db')
    cur = conn.cursor()
    tableNamet1 = cur.execute("select name from sqlite_master where type = 'table' order by name").fetchall()
    tableName = [x[0] for x in tableNamet1]
    conn.commit()
    conn.close()
    return  tableName



#------------------保存批注书籍
posBookLb=tk.Label(Note,text='请选择保存批注书籍目录')
posBookLb.place(x=20,y=565)
posBookpathTx=tk.Text(Note,width=50, height=1) # 显示所选择的文件具体路径
posBookpathTx.place(x=20,y=625)
posBookpathTx.insert(tk.END, 'I:/Ayon')
def selPosbookDir():
    SavePath=filedialog.askdirectory()
    posBookpathTx.delete('1.0', tk.END)
    posBookpathTx.insert(tk.END, SavePath)
posBookpathBt=ttk.Button(Note,text='浏览保存书文件夹', command=selPosbookDir)
posBookpathBt.place(x=20,y=590)

#------------------书籍解析成TXT格式
class Books:
    def __init__(self,oriBooks,oribookname,oribookformat):
        self.file = oriBooks
        self.bookname = oribookname
        self.format = oribookformat
    def mybook(self):
        if self.format =='pdf': #处理PDF格式电子书
            pdfPb = tkinter.ttk.Progressbar(win)  # 显示PDF读取进度条
            pdfPb.place(x=250, y=37)
            pdfFile = open(self.file, 'rb')  # 创建一个PDF对象
            pdfReader = PyPDF2.PdfReader(pdfFile)  # 创建一个空字符串，用于存储提取的文本
            pageNo = len(pdfReader.pages)  # 获取PDF文件中的页面数量
            mybook = ''  # 循环遍历每一页并提取文本
            pdfPb['value'] = 0
            pdfPb['maximum'] = pageNo
            for page in range(pageNo):
                page = pdfReader.pages[page]
                mybook += page.extract_text()
                pdfPb['value'] += 1
                win.update()
            pdfFile.close()  # 关闭PDF文件
            return mybook
        elif self.format == 'epub': #处理Epub格式电子书
            epubReader = epub.read_epub(self.file)
            for item in epubReader.get_items():
                if item.get_type() == ebooklib.ITEM_DOCUMENT:
                    soup = BeautifulSoup(item.get_content(), 'xml')
                    for a in soup.find_all('a'):
                        del a['href']  # 在 HTML 中，超链接是由 <a> 标签来实现的。该标签最常用的属性是 href，它用来指定链接的目标地址；
                    for a in soup.find_all('img'):
                        a.decompose()
                    mybookt0 = html2text.html2text(str(soup))
                    tfile = open(self.bookname + r'.txt', 'a+', encoding='utf-8')
                    tfile.write(mybookt0)
                    tfile.close()
            mybook = open(self.bookname + r'.txt', 'r', encoding='utf-8').read()
            os.remove(self.bookname + r'.txt')
            return mybook
        elif self.format == 'txt':  # 处理TXT格式电子书
            mybook = open(self.file, 'r', encoding='utf-8').read()
            return mybook
        elif self.format == 'mobi':  # 处理Mobi格式电子书
            tempDir, tempBook = mobi.extract(self.file)  # 指定临时文件夹用来存放抽取出来的html格式电子书
            mobiReader = open(tempBook, 'r', encoding='utf-8')  # 打开html电子书
            content = mobiReader.read()
            mybook = html2text.html2text(content)  # 转换为txt格式
            mobiReader.close()  # 养成用完就关闭文件的好习惯
            shutil.rmtree(tempDir)  # 删除Mobi电子书临时处理文件
            return mybook

#------------------从转换的TXT书籍中分析出词汇表
class VacabAnalysis: 
    def __init__(self,txtBook,oribookname,oribookformat):
        self.book = txtBook
        self.bookname = oribookname
        self.bookformat = oribookformat
    def pureWords(self):
       words = self.book.lower().split()  #不要漏掉大写的单词
       tWords = {x for x in Counter(words).items() if re.match(r'^[a-z]+$', x[0])}  # 梳理纯正英文单词
       sortedTwords = sorted(tWords, key=lambda x: x[1], reverse=True)

       print(sortedTwords)
       WordRank = [x[1] for x in sortedTwords]  # 出现次数对应的单词数量
       UniWordRank = sorted(set(WordRank))
#生成数据表保存到数据库中
       conn = sqlite3.connect('anyuandic.db')
       cur = conn.cursor()
       def bookDB():
           SQLText = 'create table '+self.bookname+iDay+ '(Word TEXT,Frequency TEXT);'
           cur.execute(SQLText)
           for x in sortedTwords:
               cur.execute('INSERT INTO '+self.bookname+iDay+' VALUES ' + str(x))
           conn.commit()  # 提交更改
           conn.close()  # 保存
           AnyuanDB.CboxUpdate()
           AnyuanDB.cbox.set(self.bookname+iDay)

       tablename = loginDB()
       if self.bookname not in tablename:
           bookDB()
       else:
           cur.execute('drop table '+self.bookname)
           bookDB()

       OrderRank = []
       for x in enumerate(UniWordRank):
           OrderRank.append(x)

       MyRank = ['博二', '博一', '研三', '研二', '研一', '大四', '大三', '大二', '大一',
                 '高三', '高二', '高一', '初三', '初二', '初一',
                 '小六', '小五', '小四', '小三', '小二', '小一',
                 '大班', '中班', '小班', '婴儿']
       Keju1 = [(x[0], x[1], MyRank[x[0] // (len(UniWordRank) // 25)]) for x in OrderRank if
                x[0] <= ((len(UniWordRank) // 25) * 25 - 1)]  # 国内教育制度
       Keju2 = [(x[0], x[1], MyRank[24]) for x in OrderRank if x[0] > ((len(UniWordRank) // 25) * 25 - 1)]
       Keju = Keju1 + Keju2
       RR = [x[2] for x in Keju]  # 划分等级
       RRR = sorted(set([RR.index(x) for x in RR]))  # 等级边界序号
       FiRank = [Keju[x][1] for x in RRR]  # 单词出现次数等级
       MyScale = {x for x in range(1,26)}
       VacabPower = tk.StringVar()
       VacabDetails = tk.StringVar()

       def ValueCheck(value):  # 这里的Scale会传过来一个value
           NewValue = min(MyScale, key=lambda x: abs(x - float(value)))
           Vacab = [x for x in sortedTwords if re.match(r'^[a-z]+$', x[0])]
           rightWindow.delete('1.0', tk.END)  # 表示第一行第一列，注意行号以1开始，列号以0开始
           leftWindow.delete('1.0', tk.END)  # 表示第一行第一列，注意行号以1开始，列号以0开始

           SecWords= [ x for x in Vacab if FiRank[int(NewValue)]>= x[1] >= FiRank[int(NewValue)-1] ]
           for x in SecWords:
               WordNo = str(SecWords.index(x) + 1)
               leftWindow.insert(tk.END,
                          '【' + WordNo + '】' + (6 - len(WordNo)) * ' ' + x[0] + (15 - len(x[0])) * ' ' + str(
                              x[1]) + '次\n')
           TotalWords = [ x for x in Vacab if  x[1] >= FiRank[int(NewValue)-1] ]
           for x in TotalWords:
               WordNo = str(TotalWords.index(x) + 1)
               rightWindow.insert(tk.END,
                              '【' + WordNo + '】' + (6 - len(WordNo)) * ' ' + x[0] + (15 - len(x[0])) * ' ' + str(
                                  x[1]) + '次\n')

           Power = sum([x[1] for x in sortedTwords if x[1] >= FiRank[int(NewValue)-1]]) / sum([x[1] for x in sortedTwords])

           VacabPower.set(str(MyRank[int(NewValue)-1])+'水平的词汇量' + str(len(TotalWords)) + '，掌握的话'
                          '可以读懂本书' + '%.f%%' % (Power * 100) + '的内容！')
           tk.Label(Import,width=60, height=1, bg='SystemButtonFace', anchor='w', font=('微软雅黑',10), foreground='blue',
                    textvariable=VacabPower).place(x=650, y=515)
           VacabDetails.set('【'+str(MyRank[int(NewValue)-1])+'专有词汇：'+str(len(SecWords))+'个】')
           tk.Label(Import,width=60, height=1, bg='SystemButtonFace', anchor='w', font=('微软雅黑',16), foreground='red',
                    textvariable=VacabDetails).place(x=0, y=20)
           slider.set(NewValue)

       slider = tk.Scale(Import,label='博二     博一    研三    研二     研一    大四    大三     大二     大一    高三     高二'
                               '    高一    初三     初二     初一    小六    小五     小四    小三    小二     小一'
                        '     大班     中班    小班    婴儿', from_=min(MyScale), to=max(MyScale), orient=tk.HORIZONTAL,
                         length=1040, showvalue=True, tickinterval=1, resolution=1, troughcolor='green',
                         command=ValueCheck)
       slider.place(x=10, y=570)

#以Word形式保存各种词汇表-------------------------------------------------------------------------------
       VacabDoc = Document()  # 创建一个新的Word文档
       for x in sortedTwords:
           leftWindow.insert(tk.END, '【' + str(sortedTwords.index(x) + 1) + '】' +
                          (6 - len(str(sortedTwords.index(x) + 1))) * ' '
                          + x[0] + (20 - len(x[0])) * ' ' + str(x[1]) + '\n')
           paragraph = VacabDoc.add_paragraph()
           paragraph.add_run(x[0])
       VacabOut = tk.Label(Generation,text='保存书籍词汇表或者和总表差异部分表')
       VacabOut.place(x=650, y=565)
       VacabOutPath = tk.Text(Generation,width=50, height=1)
       VacabOutPath.place(x=650, y=625)
       VacabOutPath.insert(tk.END, r'E:/python/output')
       def VacabPath():
           SavePath = filedialog.askdirectory()
           VacabOutPath.delete('1.0', tk.END)
           VacabOutPath.insert(tk.END, SavePath)
       def VacabSave():
           Path = VacabOutPath.get(1.0, tk.END).strip('\n')
           VacabDoc.save(Path + '/' + self.bookname + '_' + self.bookformat + '_Vocab' + '_' + iTime + '.docx')  # 相对路径，转转义

       BrowseVacabOut = ttk.Button(Generation,text='选择保存文件夹...', command=VacabPath)
       BrowseVacabOut.place(x=650, y=590)
       VacabOutSave = ttk.Button(Generation,text='保存书词表', command=VacabSave)
       VacabOutSave.place(x=760, y=590)
       #保存差异词汇表
       def DVacabSave():
           DVacabDoc = Document()  # 创建一个新的Word文档
           conn = sqlite3.connect('anyuandic.db')
           cur = conn.cursor()
           AYDict1 = cur.execute('select * from AYDic').fetchall()
           AYDic = {x[0]:x[1] for x in AYDict1} #生成安远英语的总词典
           nVacab = [x[0] for x in sortedTwords if x[0] not in AYDic.keys()]
           rightWindow.delete(1.0,tk.END)
           for x in nVacab:
               rightWindow.insert(tk.END, '【' + str(nVacab.index(x) + 1) + '】' +
                               (6 - len(str(nVacab.index(x) + 1))) * ' '
                               + x + (20 - len(x)) * ' '+'\n')
               paragraph = DVacabDoc.add_paragraph()
               paragraph.add_run(x)
           Path = VacabOutPath.get(1.0, tk.END).strip('\n')
           if os.path.exists(Path) is False:
               os.mkdir(Path)
           DVacabDoc.save(Path + '/'+ self.bookname + '_' + self.bookformat + '_DiffVocab'+ '_'+ iTime+'.docx')  #相对路径，转转义
       DVacabOutSave = ttk.Button(Generation,text='保存暗词表', command=DVacabSave)
       DVacabOutSave.place(x=860, y=590)


#第一部 词汇的生成
def geBookSelector():
    geBookPath = tk.Text(Generation, width=70, height=1)  # 显示所选择的文件具体路径
    geBookPath.place(x=115, y=65)
    geBookPath.insert(tk.END, 'E:/python/Generation')
    file = filedialog.askopenfile(mode='r', filetypes=[('book', '*.txt *.pdf *.epub *.mobi')])
    geBookPath.delete('1.0', tk.END)
    geBookPath.insert(tk.END, file.name)
    geBooks = geBookPath.get(1.0, tk.END).replace('\n','')
    bookname = re.findall('\w+\.', geBooks)[0].strip('.')
    bookformat = re.findall('\.[a-z]+', geBooks)[0].strip('.')
    mybook=Books(geBooks,bookname,bookformat).mybook() #解析出书的内容
    VacabAnalysis(mybook,bookname,bookformat).pureWords() #生成词汇分析表
    return mybook
def GeBookSelector():
    thread=threading.Thread(target=geBookSelector,args=())
    thread.daemon = True
    thread.start()
wordpowerSBook=ttk.Button(Generation, text='选择要分析书',command=GeBookSelector)
wordpowerSBook.place(x=10, y=60)


def noteBook():
    oBookPath = tk.Text(Note, width=50, height=1)  # 显示所选择的文件具体路径
    oBookPath.place(x=115, y=65)
    oBookPath.insert(tk.END, 'E:/python/OriBook')
    posBookLb = tk.Label(Note, text='要批注书籍')
    posBookLb.place(x=20, y=65)
    file = filedialog.askopenfile(mode='r', filetypes=[('book', '*.txt *.pdf *.epub *.mobi')])
    oBookPath.delete('1.0', tk.END)
    oBookPath.insert(tk.END, file.name)
    oriBooks = oBookPath.get(1.0, tk.END).replace('\n','')
    bookname = re.findall('\w+\.', oriBooks)[0].strip('.')
    bookformat = re.findall('\.[a-z]+', oriBooks)[0].strip('.')
    mybook=Books(oriBooks,bookname,bookformat).mybook() #解析出书的内容

    empty_index=[i for i, char in enumerate(mybook) if char.isspace()]
    num_parts = 8
    part_size = len(empty_index) // num_parts
    remainder = len(empty_index) % num_parts

    parts = []
    start_index = 0
    for i in range(num_parts):
        if i < remainder:
            end_index = start_index + part_size + 1
        else:
            end_index = start_index + part_size
        part = empty_index[start_index:end_index]
        parts.append(part)
        start_index = end_index

    splitbook = [(x[0],x[-1]) for x in parts]

    final_split = []
    for x in splitbook:
        if splitbook.index(x) == len(splitbook) - 1:
            final_split.append((x[0], len(mybook)-1))
        elif splitbook.index(x) == 0:
            final_split.append((0, splitbook[splitbook.index(x) + 1][0]))
        else:
            final_split.append((x[0], splitbook[splitbook.index(x) + 1][0]))

    for x in final_split:
        with open(f'{final_split.index(x)}.txt','w',encoding='utf-8') as f:
            f.write(mybook[x[0]:x[1]])
    print(splitbook)

    ym = [mybook[x[0]:x[1]] for x in final_split]
    print(len(ym))
    return ym

splitBook=tk.Button(Note, text='一分为八',command=noteBook,background='purple', foreground='white')
splitBook.place(x=100, y=30)


#-----------------导入及分析书籍词汇-----------------------------------------------
SelVabLb = tk.Label(Power,text='请选择需要导入数据库的词汇表')
SelVabLb.place(x=10, y=10)
OriVocabPath = tk.Text(Power, width=50, height=1) # 显示所选择的文件具体路径
OriVocabPath.place(x=10,y=70)
OriVocabPath.insert(tk.END, 'E:/python/OriVocab')

def GetBiVocab(): #选择需要输出词汇分析的书籍
    myfile = filedialog.askopenfile(mode='r', filetypes=[('document', '*.docx')])
    OriVocabPath.delete('1.0', tk.END)
    OriVocabPath.insert(tk.END, myfile.name)
    stfile = OriVocabPath.get(1.0, tk.END).replace('\n','')
    stfilet1 = stfile.split('/')[-1].split('.')[0]
    table = stfile.maketrans("@-+[]【】《》^…&$#%!~`。，()（）", len("@-+[]【】《》^…&$#%!~`。，()（）") * '_')
    stfilet2 = stfilet1.translate(table)
    bookname = stfilet2.replace(' ', '')
    doc = docx.Document(stfile)
    TextList = []
    for paragraph in doc.paragraphs:
       TextList.append(paragraph.text)
    mywords = []  # 获取单词已经连续序号。
    for index, val in enumerate(TextList):
       mywords.append((val, index))
    # 转换成能输入到数据库的元组格式。
    odd = [x[0] for x in mywords if x[1] % 2 == 0]
    even = [x[0] for x in mywords if x[1] % 2 != 0]
    MyDataBase = list(zip(odd, even))

    conn = sqlite3.connect('anyuandic.db')
    cur = conn.cursor()
    TableNamet = cur.execute("select name from sqlite_master where type = 'table' order by name").fetchall()
    TableName= [x[0] for x in TableNamet]

    MyTable = bookname

    if MyTable not in TableName:
       SQLText = 'create table ' + MyTable+'_Dic' + '(English TEXT,Chinese TEXT);' #表名只能用字母数字和下划线
       cur.execute(SQLText)
       progressbarOne = tkinter.ttk.Progressbar(win,length=500)
       progressbarOne.place(x=110, y=700)
       progressbarOne['value'] = 0
       progressbarOne['maximum'] = len(MyDataBase)
       pbar = tqdm(total=len(MyDataBase))
       for x in MyDataBase:
           cur.execute('INSERT INTO ' + MyTable+'_Dic' + ' VALUES ' + str(x))
           rightWindow.delete(1.0,tk.END)
           pbar.update(1)
           progressbarOne['value'] += 1
           win.update()
       pbar.close()
       rightWindow.delete(1.0,tk.END)
       rightWindow.insert(tk.END,'词汇表已经导入完成。')
    else:
        rightWindow.delete(1.0,tk.END)
        rightWindow.insert(tk.END,'请勿使用和现有词汇表相同的名字。')
    # 查看数据库各书籍单词量
    for x in TableName:
       TabContent = cur.execute('select * from ' + x).fetchall()
       rightWindow.insert(tk.END,x + '词汇量:' + str(len(TabContent))+'\n')
    conn.commit() #提交更改
    conn.close()  #保存
    AnyuanDB.CboxUpdate()

def getBiVocabNt():
    thread=threading.Thread(target=GetBiVocab,args=())
    thread.daemon = True
    thread.start()
BrowVocabBt = ttk.Button(Power,text='浏览导入词汇表路径', command=getBiVocabNt)
BrowVocabBt.place(x=10, y=35)

class AnyuanDB: # 连接数据库且查看词汇表数量
    conn = sqlite3.connect('anyuandic.db')
    cur = conn.cursor()
    TableName = cur.execute("select name from sqlite_master where type = 'table' order by name").fetchall()
    DicTables = [x[0] for x in TableName]
    values=['sqlite3 database']
    cbox = ttk.Combobox(win,width=55,values=values)
    cbox.place(x=650, y=135)
    cbox['value'] = DicTables
    #cbox.current(0)  # 通过 current() 设置下拉菜单选项的默认值
    cboxname = tk.Label(text='书籍&个人词汇表')
    cboxname.place(x=650, y=108)
    @staticmethod
    def CboxUpdate(): #实时刷新下拉框的状态
        AnyuanDB.cbox['value'] = loginDB()
    @staticmethod
    # 显示数据库单词表中内容
    def ShowVacab(event):
        conn = sqlite3.connect('anyuandic.db')
        cur = conn.cursor()
        TabStru = cur.execute('PRAGMA table_info(' + AnyuanDB.cbox.get() + ')').fetchall()
        iVacab = cur.execute('select * from ' + AnyuanDB.cbox.get()).fetchall()
        rightWindow.delete('1.0', tk.END)
        rightWindow.insert('insert', '***词汇表详情***\n')
        rightWindow.insert('insert', '词数:'+str(len(iVacab))+'\n')
        rightWindow.insert('insert', '字段:类型\n')
        for x in TabStru:
            rightWindow.insert('insert', x[1]+':'+x[2]+'\n')
        leftWindow.delete('1.0', tk.END)
        for x in enumerate(iVacab, 1):
            leftWindow.insert('insert','['+ str(x[0])+ ']' +
                           (8 - len(str(x[0]))) * ' '+
            str(x[1][0]) + (30 - len(str(x[1][0]))) * ' '+str(x[1][1]))
            leftWindow.insert('insert', '\n')
    cbox.bind("<<ComboboxSelected>>", ShowVacab)  # 绑定下拉菜单事件,显示各本书词典内容

    @staticmethod
    def DelTab(): #删除一条，后面的跟上，如果是第一条，就是新下拉表的第一条跟上，也就是原来的第二条
        result = tk.messagebox.askyesnocancel(title='数据表删除提示', message='删除不可恢复，确认要继续吗？')
        if result == True:
            if 'AYDic'==AnyuanDB.cbox.get():
                rightWindow.delete(1.0,tk.END)
                rightWindow.insert(tk.END,'不能删除安远词典！')
            else:
                conn = sqlite3.connect('anyuandic.db')
                cur = conn.cursor()
                tableName= loginDB()
                sortedDicTables = [x for x in enumerate(tableName)]
                lostNo = [x[0] for x in sortedDicTables if x[1] == AnyuanDB.cbox.get()]
                cur.execute('drop table ' + AnyuanDB.cbox.get())
                AnyuanDB.CboxUpdate()
                if lostNo[0]==0:
                    AnyuanDB.cbox.current(0)
                    AnyuanDB.ShowVacab(event=None)
                else:
                    AnyuanDB.cbox.current(lostNo[0]-1)
                    AnyuanDB.ShowVacab(event=None)
                conn.commit()
                conn.close()

    DelTable = tk.Button(Generation,text='删除词汇表', command=DelTab, background='green', foreground='white')
    DelTable.place(x=960, y=40)

    @staticmethod
    def newTab(): #DIY个人词汇表
        if AnyuanDB.cbox.get() in loginDB():
                rightWindow.delete(1.0,tk.END)
                rightWindow.insert(tk.END,'已经存在同名词汇表！')
        else:
            diyTabName = AnyuanDB.cbox.get()
            conn = sqlite3.connect('anyuandic.db')
            cur = conn.cursor()
            OriIDic = leftWindow.get(1.0, tk.END).split('\n')
            MidIDic = [re.findall('\S+', x) for x in OriIDic if x != '']
            FiIDic = [(x[1], x[2]) for x in MidIDic if len(x) == 3]
            SQLText = 'create table '+AnyuanDB.cbox.get()+'_diy'+' (English TEXT,Chinese TEXT);'
            cur.execute(SQLText)
            for x in FiIDic:
                cur.execute('INSERT INTO '+AnyuanDB.cbox.get()+'_diy'+' VALUES ' + str(x))
            conn.commit()  # 提交更改
            conn.close()  # 保存
            AnyuanDB.CboxUpdate()
            AnyuanDB.cbox.set(diyTabName)

    newTable = tk.Button(Generation,text='新建词汇表', command=newTab, background='green', foreground='white')
    newTable.place(x=700, y=40)


    @staticmethod
    def GUIDesBook(file_tuple): #GUI输出备注目录
        ivacabt, file = file_tuple
        rightWindow.delete(1.0, tk.END)
        for x in ivacabt:
            rightWindow.insert(tk.END,(x[0])+':'+str(x[1])+'\n')
            x[0] = ' ' + x[0] + ' '  # 适配文本单词两边都是空格
            if x[0] in file:
                file = file.replace(x[0], str(x[0]).rstrip() + str('(' + x[1] + ') '))
            elif '    ' in file:
                file = file.replace('    ', '\r\n') #\r\n，先将光标移动到行首，然后将光标换到下一行（只移动光标，不会对已有内容造成影响）

        myfile = open((posBookpathTx.get(1.0,tk.END).strip('\n') +'/'+ AnyuanDB.cbox.get()+'_'+iTime+'.txt'),
                      'a+',encoding='utf-8')
        myfile.write(file)
        print('OOOOOK')
        return ivacabt,file

    @staticmethod
    def main():
        getGUIt1 = leftWindow.get(1.0,tk.END).split(' ')
        getGUIt2 = [re.findall(r'^[a-z]+$',x) for x in getGUIt1 ]
        getGUIt3 = [x[0] for x in getGUIt2 if x!=[]]
        FiIDic=set(getGUIt3)
        conn = sqlite3.connect('anyuandic.db')
        cur = conn.cursor()
        ivacab = cur.execute('select * from AYDic').fetchall()
        ivacabt = [[x[0], x[1]] for x in ivacab if x[0] in FiIDic]

        manager = multiprocessing.Manager()
        shared_list = manager.list()
        shared_list.append(ivacabt)

        filenames = noteBook()
        processes = multiprocessing.cpu_count()
        with multiprocessing.Pool(processes) as p:  # 使用 with 语句可以确保在结束后正确关闭进程池。
            # 使用 map 函数并行处理所有书籍：
            p.map(AnyuanDB.GUIDesBook, [(ivacabt, filename) for filename in filenames])

            #p.map(lambda x: AnyuanDB.GUIDesBook(*x), zip(filenames, filenames))
    @staticmethod
    def GeneralVacab():   #安远英语词汇总表
        conn = sqlite3.connect('anyuandic.db')
        cur = conn.cursor()
        TableNamet = cur.execute("select name from sqlite_master where type = 'table' order by name").fetchall()
        DicTables = [x[0] for x in TableNamet]
        SumNow = set(cur.execute('select * from ' + DicTables[0]).fetchall())
        for x in DicTables:
            if DicTables.index(x)<len(DicTables)-1 and x!='General':
                SumNow = SumNow | set(cur.execute('select * from ' + DicTables[(DicTables.index(x)+1)]).fetchall())

        if 'General' not in DicTables:
            SQLText = 'create table General (Word TEXT,Frequency TEXT);'
            cur.execute(SQLText)
            for x in SumNow:
                cur.execute('INSERT INTO General VALUES ' + str(x))
            conn.commit()  # 提交更改
            conn.close()  # 保存
            AnyuanDB.CboxUpdate()
            AnyuanDB.cbox.set('General')
        else:
            SumPast = set(cur.execute('select * from General').fetchall())
            SumNew = SumPast&SumNow
            print(SumNew)
            print(len(SumNew))
            rightWindow.delete(1.0,tk.END)
            for x in SumNew:
                rightWindow.insert(1.0, str(x)+'\n')
                cur.execute('INSERT INTO General VALUES ' + str(x))
            conn.commit()
            conn.close()
            AnyuanDB.CboxUpdate()
            AnyuanDB.cbox.set('General')
            AnyuanDB.ShowVacab(event=None)
    @staticmethod
    def GeneralVacabNt():
        thread = threading.Thread(target=AnyuanDB.GeneralVacab, args=())
        thread.daemon = True
        thread.start()
    GeneralVacabBt = tk.Button(Generation,text='词汇汇总', command=GeneralVacabNt, background='green', foreground='white')
    GeneralVacabBt.place(x=800, y=40)
    GeneralVacabBtTips = Pmw.Balloon(win)  # 新建Pmw.Balloon对象绑定窗口
    GeneralVacabBtTips.bind(GeneralVacabBt, '主要是汇总所有书籍的词汇到一张名叫General的词汇表！')  # 绑定按钮
    conn.commit()
    conn.close()

#汇总所有分词典到总词典
    @staticmethod
    def generalDic():   #安远英语词汇总词典
        conn = sqlite3.connect('anyuandic.db')
        cur = conn.cursor()
        TableNamet = cur.execute("select name from sqlite_master where type = 'table' order by name").fetchall()
        DicTables = [x[0] for x in TableNamet if '_Dic' in x[0]]
        AYDics = set(cur.execute('select * from AYDic').fetchall())
        for Dic in DicTables:
            AYDics = AYDics | set(cur.execute('select * from ' + DicTables[DicTables.index(Dic)]).fetchall())
        cur.execute('drop table AYDic')
        SQLText = 'create table AYDic (English TEXT, Chinese TEXT);'
        cur.execute(SQLText)
        for word in AYDics:
            cur.execute('INSERT INTO AYDic VALUES ' + str(word))
        conn.commit()
        conn.close()
        AnyuanDB.CboxUpdate()
        AnyuanDB.cbox.set('AYDic')
        rightWindow.delete(1.0,tk.END)
        rightWindow.insert(tk.END,'词典已经合并完成生成全新的AYDic词典！')
    @staticmethod
    def generalDicNt():
        thread = threading.Thread(target=AnyuanDB.generalDic, args=())
        thread.daemon = True
        thread.start()

    GeneralVacabBt = tk.Button(Generation,text='词典汇总', command=generalDicNt, background='green', foreground='white')
    GeneralVacabBt.place(x=880, y=40)
    GeneralVacabBtTips = Pmw.Balloon(win)  # 新建Pmw.Balloon对象绑定窗口
    GeneralVacabBtTips.bind(GeneralVacabBt, '主要是汇总所有词语解释到总词典AYDic！')  # 绑定按钮



#____________________________手动处理词汇_______________________________________________________________
class WordProcess:
    @staticmethod #类静态方法就是普通函数
    def KeepWords(event = None):
        cur_index = leftWindow.index('insert')
        row = re.findall('\w+\.', cur_index)[0].strip('.')
        leftWindow.tag_delete('start',str(int(row))+'.0',str(int(row))+'.end')
        leftWindow.mark_set('insert', str(int(row)+1) + '.0')
        leftWindow.tag_add('start',str(int(row)+1)+'.0',str(int(row)+1)+'.end')
        leftWindow.tag_configure('start', background='OliveDrab1', foreground='black')
        leftWindow.see('insert')
    DelRecords = {} #字典用来保留
    @staticmethod
    def DelWords(event = None): #E这里的event=None必须要有，也是键盘绑定的关键之处。
        if '_indiv' or '_diy' in AnyuanDB.cbox.get():
            cur_index = leftWindow.index('insert')
            row = re.findall('\w+\.', cur_index)[0].strip('.')
            if AnyuanDB.cbox.get() not in WordProcess.DelRecords.keys():
                WordProcess.DelRecords[AnyuanDB.cbox.get()]=[]
                WordProcess.DelRecords[AnyuanDB.cbox.get()].append(row)
            else:
                WordProcess.DelRecords[AnyuanDB.cbox.get()].append(row)
            leftWindow.delete(row + '.0', str(int(row)) + '.end')
            leftWindow.mark_set('insert', str(int(row)+1) + '.0')
            leftWindow.tag_add('start',str(int(row)+1)+'.0',str(int(row)+1)+'.end')
            leftWindow.tag_configure('start', background='OliveDrab1', foreground='black')
            leftWindow.see('insert')
        else:
            rightWindow.delete(1.0,tk.END)
            rightWindow.insert(1.0,'对不起，书籍主词汇表不允许删除！')
    @staticmethod
    def getIndivDic(): #生成书籍个人词汇表，便于定制化成个人盲点所需
        OriIDic = leftWindow.get(1.0,tk.END).split('\n')
        MidIDic = [re.findall('\S+',x) for x in OriIDic if x !='']
        FiIDic = [(x[1],x[2]) for x in MidIDic if len(x)==3]
        conn = sqlite3.connect('anyuandic.db')
        cur = conn.cursor()
        TableNamet = cur.execute("select name from sqlite_master where type = 'table' order by name").fetchall()
        TableName = [x[0] for x in TableNamet]
        if AnyuanDB.cbox.get()+'_indiv' in AnyuanDB.cbox['values']:
            rightWindow.delete(1.0,tk.END)
            rightWindow.insert(tk.END,'o(*￣︶￣*)o\n*该书的个人词汇表已经存在。')
        elif '_indiv' in AnyuanDB.cbox.get() and AnyuanDB.cbox.get() in AnyuanDB.cbox['values']:
            rightWindow.delete(1.0,tk.END)
            rightWindow.insert(tk.END,'o(*￣︶￣*)o\n*您已经点的就是该书的个人词汇表！')
        else:
            MyTable = AnyuanDB.cbox.get()+'_indiv'
            if MyTable not in TableName:
                SQLText = 'create table ' + MyTable + ' (Word TEXT, Frequency TEXT);'
                cur.execute(SQLText)
                for x in FiIDic:
                    cur.execute('INSERT INTO ' + MyTable + ' VALUES ' +str(x))
                conn.commit()
                conn.close()
                AnyuanDB.CboxUpdate()
                AnyuanDB.cbox.set(MyTable)
                AnyuanDB.ShowVacab(event=None)

    @staticmethod
    def getBatchIndivDic():  # 批量生成书籍个人词汇表，便于定制化成个人盲点所需
        conn = sqlite3.connect('anyuandic.db')
        cur = conn.cursor()
        TableNamet = cur.execute("select name from sqlite_master where type = 'table' order by name").fetchall()
        TableName = [x[0] for x in TableNamet]
        for table in TableName:
            if '_indiv' in table:
                TableName.remove(table)
                TableName.remove(table.strip('_indiv'))
        for table in TableName:
            SQLText = 'create table ' + table +'_indiv '+'as select * from '+table+';'
            cur.execute(SQLText)
        conn.commit()
        conn.close()
        AnyuanDB.CboxUpdate()

    @staticmethod
    def diffWords():  # 找出安远词典中没有的单词，解释完后导入
        conn = sqlite3.connect('anyuandic.db')
        cur = conn.cursor()
        TableNamet = cur.execute("select name from sqlite_master where type = 'table' order by name").fetchall()
        TableName = [x[0] for x in TableNamet]
        for table in TableName:
            if '_indiv' in table:
                TableName.remove(table)
                TableName.remove(table.strip('_indiv'))
        for table in TableName:
            SQLText = 'create table ' + table +'_indiv '+'as select * from '+table+';'
            cur.execute(SQLText)
        conn.commit()
        conn.close()
        AnyuanDB.CboxUpdate()

    @staticmethod
    def purifier(): #删掉长度超过20的单词，因为它们十有八九是错误的
        conn = sqlite3.connect('anyuandic.db')
        cur = conn.cursor()
        iVacab = cur.execute('select * from ' + AnyuanDB.cbox.get()).fetchall()
        TableNamet = cur.execute("select name from sqlite_master where type = 'table' order by name").fetchall()
        TableName = [x[0] for x in TableNamet]
        if '_indiv' in AnyuanDB.cbox.get():
            rightWindow.delete(1.0, tk.END)
            rightWindow.insert(1.0,'已经优化掉的超长单词（>=20）名单:\n')
            for x in iVacab:
                if len(x[0])>=20:
                    rightWindow.insert(tk.END, x[0]+'\n')
                    SQLText = 'delete from ' + AnyuanDB.cbox.get() + ' where English ='+'\''+ str(x[0])+'\'' +';'
                    cur.execute(SQLText)
        else:
            rightWindow.delete(1.0,tk.END)
            rightWindow.insert(1.0,'o(*￣︶￣*)o\n*仅仅个人词汇库（带indiv后缀）支持净化！')
        conn.commit()
        conn.close()
    @staticmethod
    def synDB(): #GUI界面的数据和数据库保持同步 2023-11-13
        conn = sqlite3.connect('anyuandic.db')
        cur = conn.cursor()
        if WordProcess.DelRecords=={}:
            mybook=AnyuanDB.cbox.get()
            mylen=cur.execute('select * from ' + mybook).fetchall()
            OriIDic = leftWindow.get(1.0, tk.END).split('\n')
            MidIDic = [re.findall('\S+', x) for x in OriIDic if x != '']
            FiIDic = [(x[0], x[1]) for x in MidIDic if len(x) == 3]
            aGUIvcabSet= {int(str(x[0]).strip('[]')) for x in FiIDic}
            stanSet = {x for x in range(1,len(mylen)+1)}
            maDelWords=stanSet-aGUIvcabSet

            delGUIVcab = [x for x in mylen if mylen.index(x)+1 in maDelWords]
            rightWindow.delete(1.0, tk.END)
            rightWindow.insert(1.0, '**已删除单词汇总**\n')
            for delItems in delGUIVcab:
                rightWindow.insert(tk.END, delItems[0] + '\n')
                SQLText = 'delete from ' + mybook + ' where Word =' + '\'' + delItems[0] + '\'' + ';'
                cur.execute(SQLText)
            leftWindow.delete(1.0, tk.END)
            iVacab = cur.execute('select * from ' + AnyuanDB.cbox.get()).fetchall()
            for x in enumerate(iVacab, 1):
                leftWindow.insert('insert', '[' + str(x[0]) + ']' +
                                (8 - len(str(x[0]))) * ' ' +
                                str(x[1][0]) + (30 - len(str(x[1][0]))) * ' ' + str(x[1][1]))
                leftWindow.insert('insert', '\n')
        else:
            for bookdb in WordProcess.DelRecords.keys():
                if '_indiv' or '_diy' in bookdb:
                    if WordProcess.DelRecords!={}:
                        leftWindowl = [biwords for biwords in enumerate(cur.execute('select * from ' + bookdb).fetchall(),1)]
                        delGUIVcab =[x for x in leftWindowl if str(x[0]) in WordProcess.DelRecords[bookdb]]
                        print(delGUIVcab)
                        rightWindow.delete(1.0,tk.END)
                        rightWindow.insert(1.0, '**已删除单词汇总**\n')
                        for delItems in delGUIVcab:
                            rightWindow.insert(tk.END,delItems[1][0]+'\n')
                            SQLText = 'delete from ' + bookdb + ' where Word ='+'\''+ delItems[1][0]+'\'' +';'
                            cur.execute(SQLText)
                        leftWindow.delete(1.0,tk.END)
                        iVacab = cur.execute('select * from ' + AnyuanDB.cbox.get()).fetchall()
                        for x in enumerate(iVacab, 1):
                            leftWindow.insert('insert', '[' + str(x[0]) + ']' +
                                            (8 - len(str(x[0]))) * ' ' +
                                            str(x[1][0]) + (30 - len(str(x[1][0]))) * ' ' + str(x[1][1]))
                            leftWindow.insert('insert', '\n')
                else:
                    rightWindow.delete(1.0,tk.END)
                    rightWindow.insert(1.0, '书籍词汇表不允许修改，只能删除！')
        conn.commit()
        conn.close()
        WordProcess.DelRecords.clear()

    @staticmethod
    def sinoDic(): #汉化个人生词表 2023-11-14
        conn = sqlite3.connect('anyuandic.db')
        cur = conn.cursor()
        iVacab = cur.execute('select * from ' + AnyuanDB.cbox.get()).fetchall()
        AYVacab = dict(cur.execute('select * from AYDic').fetchall())
        if '_indiv' in AnyuanDB.cbox.get():
            blindWords = [ x[0] for x in iVacab]
            biWords = [(word,AYVacab[word]) for word in blindWords if word in AYVacab.keys()]
            uniWords = [word for word in blindWords if word not in AYVacab.keys()]
            VacabDoc = Document()  # 创建一个新的Word文档
            leftWindow.delete('1.0', tk.END)
            rightWindow.delete('1.0', tk.END)
            rightWindow.insert(tk.END,'本书让安远词典为难的单词数：'+str(len(uniWords)))

            style = ttk.Style(win)
            style.layout('text.Horizontal.TProgressbar',
                         [('Horizontal.Progressbar.trough',
                           {'children': [('Horizontal.Progressbar.pbar',
                                          {'side': 'left', 'sticky': 'ns'})],
                            'sticky': 'nswe'}),
                          ('Horizontal.Progressbar.label', {'sticky': ''})])


            progressbarOne = tkinter.ttk.Progressbar(win,style='text.Horizontal.TProgressbar',length=500)
            progressbarOne.place(x=110, y=700)
            progressbarOne['value'] = 0
            progressbarOne['maximum'] = len(uniWords)
            for word in uniWords:
                leftWindow.insert(tk.END, '【' + str(uniWords.index(word) + 1) + '】' +
                                (6 - len(str(uniWords.index(word) + 1))) * ' '
                                + word + (20 - len(word)) * ' ' + '\n')
                paragraph = VacabDoc.add_paragraph()
                paragraph.add_run(word)
                style.configure('text.Horizontal.TProgressbar', text=str( '{:.0%}'.format(uniWords.index(word)/len(uniWords))))
                progressbarOne['value'] += 1
                win.update()

            VacabOutPath = tk.Text(width=50, height=1)
            VacabOutPath.place(x=650, y=610)
            VacabOutPath.insert(tk.END, r'E:/python/output')
            def VacabPath():
                SavePath = filedialog.askdirectory()
                VacabOutPath.delete('1.0', tk.END)
                VacabOutPath.insert(tk.END, SavePath)
            def VacabSave():
                Path = VacabOutPath.get(1.0, tk.END).strip('\n')
                VacabDoc.save(
                    Path + '/' + iTime + '-' +AnyuanDB.cbox.get() + '_Vocabulary.docx')  # 相对路径，转转义
            BrowseVacabOut = ttk.Button(text='选择保存文件夹...', command=VacabPath)
            BrowseVacabOut.place(x=650, y=575)
            VacabOutSave = ttk.Button(text='保存书籍词汇表', command=VacabSave)
            VacabOutSave.place(x=760, y=575)
        else:
            rightWindow.delete('1.0', tk.END)
            rightWindow.insert(tk.END, '请选择对应个人生词表进行汉化.')

def sinoDicNt():
    thread=threading.Thread(target=WordProcess.sinoDic,args=())
    thread.daemon = True
    thread.start()

#----------------五色按钮--------------
synDb= tk.Button(Generation, text='保存更改', command=WordProcess.synDB,background='brown',foreground='white')
synDb.place(x=100,y=575)
synDb= tk.Button(Generation, text='汉化个人生词表', command=sinoDicNt,background='brown',foreground='white')
synDb.place(x=100,y=610)
purifier= tk.Button(Generation, text='净化词库', command=WordProcess.purifier,background='purple',foreground='white')
purifier.place(x=180,y=575)
IndiDic= tk.Button(Generation, text='生成个人生词表', command=WordProcess.getIndivDic,background='blue',foreground='white')
IndiDic.place(x=260,y=575)
batchIndiDic= tk.Button(Generation, text='批量生成个人生词表', command=WordProcess.getBatchIndivDic,background='blue',foreground='white')
batchIndiDic.place(x=260,y=610)
diffWords= tk.Button(Generation, text='词典缺解释词', command=WordProcess.getBatchIndivDic,background='blue',foreground='white')
diffWords.place(x=400,y=610)
OldWords= tk.Button(Generation, text='熟词<CtrlR>', command=WordProcess.DelWords,background='green',foreground='white')
OldWords.place(x=400,y=575)
NewWords= tk.Button(Generation, text='生词<ShiftR>', command=WordProcess.KeepWords,background='red',foreground='white')
NewWords.place(x=500,y=575)

wordpowerBt = tk.Button(Import,background='SystemButtonFace',border=0)
wordpowerBt.place(x=500,y=10)

win.bind('<Control_R>',WordProcess.DelWords)
win.bind('<Shift_R>',WordProcess.KeepWords)

if __name__ == '__main__':
    GUIDesBoot = tk.Button(Note, text='一键批注',command=AnyuanDB.main,background ='purple',foreground='white')
    GUIDesBoot.place(x=20, y=30)
    GUIDesBootTips = Pmw.Balloon(win)  # 新建Pmw.Balloon对象绑定窗口
    GUIDesBootTips.bind(GUIDesBoot, '通过个性化的indiv词汇表批注输出书籍！')  # 绑定按钮
    win.mainloop()