import re
def process(input_data):
    """
    将同时有0号和33的用户IMSI和MSISDN提取出来
    :param input_data:  用户信息文件
样例
<SUBBEGIN
	IMSI=1243560615528273;
	MSISDN=986768559232;
	VLRLIST=10;
	CF=CFD-TS10-REG-91986762386238-YES-NO-20-YES-65535-YES-YES-NO-NO-NO-YES-YES-YES-YES-NO;
	OPTGPRS=3-33-504-241-33-NONE-0-NONE-00000000-65535-0-0-PS_APN-NONE-65535-1;
	OPTGPRS=2-2-504-241-33-NONE-0-NONE-00000000-65535-0-0-PS_APN-NONE-65535-1;
	OPTGPRS=1-0-504-241-33-NONE-0-NONE-00000000-65535-0-0-PS_APN-NONE-65535-1;
	OPTGPRS=4-33-65535-241-33-NONE-0-3-00000000-25-1-0-EPS_APN-NONE-65535-1;
	CHARGE_GLOBAL=3;
<SUBEND
    :return: 号码信息列表,每个格式如示例:IMSI=1243560615528273;MSISDN=986768559232
    """

   # 对文件进行处理,返回可读取的列表
    f = open(input_data)
    message = f.readlines()
    f.close()
    list = []
    for i in range(len(message)):
        # list.append(message[i]) # ['<SUBBEGIN\n', '\tIMSI=1243560615528273;\n', '\tMSISDN=986768559232;\n',
        list.append(message[i].strip('\t').strip('\n')) # ['<SUBBEGIN', 'IMSI=1243560615528273;', 'MSISDN=986768559232;',
    # print(list)
    # print(len(list))

    # 对列表进行拆分,获取子列表的索引
    start_index = []
    stop_index = []

    for i in range(len(list)):
        if list[i] == "<SUBBEGIN":
            start_index.append(i)
        elif list[i] == "<SUBEND":
            stop_index.append(i)
    # print(start_index) # [0, 11, 22, 33,...
    # print(stop_index) # [10, 21, 32, 43...
    # print(len(start_index)) # 1067

    # 重组新列表 new_string
    result = []
    for i in range(1):
        new_list = []   # 每次重组列表重新生成
        for j in range(start_index[i], stop_index[i]):
            new_list.append(list[j])
        new_string = ''.join(new_list) # 以指定字符串作为分隔符,将 seq 中所有的元素(的字符串表示)合并为一个新的字符串
        print(new_string)  # 转换为字符串
        '''
        <SUBBEGINIMSI=1243560615528273;MSISDN=986768559232;VLRLIST=10;
        CF=CFD-TS10-REG-91986762386238-YES-NO-20-YES-65535-YES-YES-NO-NO-NO-YES-YES-YES-YES-NO;
        OPTGPRS=3-33-504-241-33-NONE-0-NONE-00000000-65535-0-0-PS_APN-NONE-65535-1;
        OPTGPRS=2-2-504-241-33-NONE-0-NONE-00000000-65535-0-0-PS_APN-NONE-65535-1;
        OPTGPRS=1-0-504-241-33-NONE-0-NONE-00000000-65535-0-0-PS_APN-NONE-65535-1;
        OPTGPRS=4-33-65535-241-33-NONE-0-3-00000000-25-1-0-EPS_APN-NONE-65535-1;CHARGE_GLOBAL=3;
        '''
        # 正则表达式进行匹配
        apn_33 = re.findall('OPTGPRS=\d-33', new_string)
        apn_0 = re.findall('OPTGPRS=\d-0', new_string)
        if len(apn_33) > 0 and len(apn_0) > 0:
            content = re.findall('IMSI=.+MSISDN=\d+', new_string)  # 正则表达式 返回列表 ['IMSI=1243560615528273;MSISDN=986768559232']
            r = ''.join(content) # 将列表结果转换为字符串  'IMSI=1243560615528273;MSISDN=986768559232'
            result.append(r)  # 将字符串写入result中 ['IMSI=1243560615528273;MSISDN=986768559232']
        else:
            pass
    return result

if __name__ == '__main__':
    process('input_data.txt')