import re def process(input_data): """ 将同时有0号和33的用户IMSI和MSISDN提取出来 :param input_data: 用户信息文件 样例 <SUBBEGIN IMSI=1243560615528273; MSISDN=986768559232; VLRLIST=10; CF=CFD-TS10-REG-91986762386238-YES-NO-20-YES-65535-YES-YES-NO-NO-NO-YES-YES-YES-YES-NO; OPTGPRS=3-33-504-241-33-NONE-0-NONE-00000000-65535-0-0-PS_APN-NONE-65535-1; OPTGPRS=2-2-504-241-33-NONE-0-NONE-00000000-65535-0-0-PS_APN-NONE-65535-1; OPTGPRS=1-0-504-241-33-NONE-0-NONE-00000000-65535-0-0-PS_APN-NONE-65535-1; OPTGPRS=4-33-65535-241-33-NONE-0-3-00000000-25-1-0-EPS_APN-NONE-65535-1; CHARGE_GLOBAL=3; <SUBEND :return: 号码信息列表,每个格式如示例:IMSI=1243560615528273;MSISDN=986768559232 """ # 对文件进行处理,返回可读取的列表 f = open(input_data) message = f.readlines() f.close() list = [] for i in range(len(message)): # list.append(message[i]) # ['<SUBBEGIN\n', '\tIMSI=1243560615528273;\n', '\tMSISDN=986768559232;\n', list.append(message[i].strip('\t').strip('\n')) # ['<SUBBEGIN', 'IMSI=1243560615528273;', 'MSISDN=986768559232;', # print(list) # print(len(list)) # 对列表进行拆分,获取子列表的索引 start_index = [] stop_index = [] for i in range(len(list)): if list[i] == "<SUBBEGIN": start_index.append(i) elif list[i] == "<SUBEND": stop_index.append(i) # print(start_index) # [0, 11, 22, 33,... # print(stop_index) # [10, 21, 32, 43... # print(len(start_index)) # 1067 # 重组新列表 new_string result = [] for i in range(1): new_list = [] # 每次重组列表重新生成 for j in range(start_index[i], stop_index[i]): new_list.append(list[j]) new_string = ''.join(new_list) # 以指定字符串作为分隔符,将 seq 中所有的元素(的字符串表示)合并为一个新的字符串 print(new_string) # 转换为字符串 ''' <SUBBEGINIMSI=1243560615528273;MSISDN=986768559232;VLRLIST=10; CF=CFD-TS10-REG-91986762386238-YES-NO-20-YES-65535-YES-YES-NO-NO-NO-YES-YES-YES-YES-NO; OPTGPRS=3-33-504-241-33-NONE-0-NONE-00000000-65535-0-0-PS_APN-NONE-65535-1; OPTGPRS=2-2-504-241-33-NONE-0-NONE-00000000-65535-0-0-PS_APN-NONE-65535-1; OPTGPRS=1-0-504-241-33-NONE-0-NONE-00000000-65535-0-0-PS_APN-NONE-65535-1; OPTGPRS=4-33-65535-241-33-NONE-0-3-00000000-25-1-0-EPS_APN-NONE-65535-1;CHARGE_GLOBAL=3; ''' # 正则表达式进行匹配 apn_33 = re.findall('OPTGPRS=\d-33', new_string) apn_0 = re.findall('OPTGPRS=\d-0', new_string) if len(apn_33) > 0 and len(apn_0) > 0: content = re.findall('IMSI=.+MSISDN=\d+', new_string) # 正则表达式 返回列表 ['IMSI=1243560615528273;MSISDN=986768559232'] r = ''.join(content) # 将列表结果转换为字符串 'IMSI=1243560615528273;MSISDN=986768559232' result.append(r) # 将字符串写入result中 ['IMSI=1243560615528273;MSISDN=986768559232'] else: pass return result if __name__ == '__main__': process('input_data.txt')