pip install python-Levenshtein
from openpyxl import Workbook import xlrd import time import Levenshtein as Le target_city_list = [] file_name = 'DB任务_csv_py_wholeCSV-加百度170825095914' FEXCEL = '%s%s' % (file_name, '.xlsx') data = xlrd.open_workbook(FEXCEL) table = data.sheets()[0] nrows, ncols = table.nrows, table.ncols wb = Workbook() worksheet = wb.active file_title_str = 'dbid, area_code, ref_area_type_code, city, district, address, city_street, name_, BDpoi_list, BD_name, BD_addr, seqratio_res' file_title_l = file_title_str.replace(' ', '').split(',') worksheet.append(file_title_l) BDpoi_list_tag, BDpoi_list_tagb = '|-|', '|--|' for i in range(0, nrows): l = table.row_values(i) dbid, area_code, ref_area_type_code, city, district, address, city_street, name_, BDpoi_list = l if dbid == 'dbid': continue addr_ = '%s%s%s%s' % (city, district, address, city_street) chk_name_lsit,cmp_list,sorted_seqratio_res_dic = [name_,addr_],BDpoi_list.split(BDpoi_list_tag),{} for ii in cmp_list: cmp_, BD_name, BD_addr = ['', ''], '', '' cmp_one = ii.split(BDpoi_list_tagb) if len(cmp_one) == 2: BD_name, BD_addr = cmp_[0], cmp_[1] = cmp_one else: BD_name = cmp_[0] = cmp_one[0] seqratio_res = Le.seqratio(chk_name_lsit, cmp_) ll = dbid, area_code, ref_area_type_code, city, district, address, city_street, name_, BDpoi_list, BD_name, BD_addr, seqratio_res if seqratio_res not in sorted_seqratio_res_dic: sorted_seqratio_res_dic[seqratio_res] = [] sorted_seqratio_res_dic[seqratio_res].append(ll) sorted_seqratio_res_list = sorted(sorted_seqratio_res_dic) for seqratio_res in sorted_seqratio_res_list: lll = sorted_seqratio_res_dic[seqratio_res] for vl in lll: worksheet.append(vl) localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime()) file_name = '%s%s%s' % (file_name,'-Levenshtein',localtime_) file_name_save = '%s%s' % (file_name, '.xlsx') wb.save(file_name_save)