#-*-coding:utf-8-*-
import re
f1 = open('d:/test/mail/bounce_list.txt','r')
f2 = open('d:/test/mail/828-820.txt', 'r')
ff = open('d:/test/mail/ok1', 'w')
f3 = open('d:/test/mail/ok-sort', 'w')
f4 = open('d:/test/mail/ok-ok', 'w')

# 排除文件从f2里面踢除包含f1里的文件
file1 = [line for line in f1]
file1.sort()
file2 = [line for line in f2]
file2.sort()
for i in file2:
    if i not in file1:
        print i
        ff.write(i)
ff.close()

# 过滤重复的内容
fff = open('d:/test/mail/ok1', 'r')
lines_seen = set()
for line in fff:
    if line not in lines_seen:
        f3.write(line)
        lines_seen.add(line)
f3.close()

# 过滤关键字的内容
pat = '@oauth.*.com$'
f33 = open('d:/test/mail/ok-sort', 'r')
for line in f33:
    if not re.findall(pat, line):
        print line
        f4.write(line)       
f4.close()