1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 import poplib
4 import email
5 from datetime import datetime, timezone, timedelta
6 import time
7 import os
8 from email.parser import Parser
9 from email.header import decode_header
10 from email.utils import parseaddr
11
12 # 输入邮件地址, 口令和POP3服务器地址:
13
14 email = ''
15 password = '' # 此处密码是授权码,用于登录第三方邮件客户端(163邮箱需要授权码)
16 pop3_server = 'pop.126.com'
17
18
19 # 获取邮件自带的编码
20 # mail_encode = decode_header(mail.get("Subject"))[0][1]
21 # mail_title = decode_data(decode_header(mail.get("Subject"))[0][0], mail_encode)
22
23 def decode_data(bytes, added_encode=None):
24 """
25 字节解码
26 :param bytes:
27 :return:
28 """
29
30 def _decode(bytes, encoding):
31 try:
32 return str(bytes, encoding=encoding)
33 except Exception as e:
34 return None
35
36 encodes = ['UTF-8', 'GBK', 'GB2312']
37 if added_encode:
38 encodes = [added_encode] + encodes
39 for encoding in encodes:
40 str_data = _decode(bytes, encoding)
41 if str_data is not None:
42 return str_data
43 return None
44
45
46 def decode_str(s): # 字符编码转换
47 value, charset = decode_header(s)[0]
48 if charset:
49 value = value.decode(charset)
50 return value
51
52
53 def get_att(msg):
54 import email
55 attachment_files = []
56
57 for part in msg.walk():
58 file_name = part.get_filename() # 获取附件名称类型
59 contType = part.get_content_type()
60
61 if file_name:
62 h = email.header.Header(file_name)
63 dh = email.header.decode_header(h) # 对附件名称进行解码
64 filename = dh[0][0]
65 if dh[0][1]:
66 filename = decode_str(str(filename, dh[0][1])) # 将附件名称可读化
67 print(filename)
68 # filename = filename.encode("utf-8")
69 data = part.get_payload(decode=True) # 下载附件
70 # 在指定目录下创建文件,注意二进制文件需要用wb模式打开
71 att_file = open('G:\\3、hobby\\2、python\\电子发票整理\\下载文件\\' + filename, 'wb')
72 attachment_files.append(filename)
73 att_file.write(data) # 保存附件
74 att_file.close()
75 return attachment_files
76
77
78 def parse_mail_time(mail_datetime):
79 """
80 邮件时间解析
81 :param bytes:
82 :return:
83 """
84 print(mail_datetime)
85 print(len(mail_datetime))
86 GMT_FORMAT = "%a, %d %b %Y %H:%M:%S"
87 GMT_FORMAT2 = "%d %b %Y %H:%M:%S"
88 index = mail_datetime.find(' +0')
89 if index > 0:
90 mail_datetime = mail_datetime[:index] # 去掉+0800
91 print(mail_datetime)
92 formats = [GMT_FORMAT, GMT_FORMAT2]
93 for ft in formats:
94 try:
95 mail_datetime = datetime.strptime(mail_datetime, ft)
96 return mail_datetime
97 except:
98 pass
99
100 raise Exception("邮件时间格式解析错误")
101
102
103 # 连接到POP3服务器,有些邮箱服务器需要ssl加密,对于不需要加密的服务器可以使用poplib.POP3()
104 server = poplib.POP3(pop3_server)
105 server.set_debuglevel(1)
106 # 打印POP3服务器的欢迎文字:
107 print(server.getwelcome().decode('utf-8'))
108 # 身份认证:
109 server.user(email)
110 server.pass_(password)
111 # 返回邮件数量和占用空间:
112 print('Messages: %s. Size: %s' % server.stat())
113 # list()返回所有邮件的编号:
114 resp, mails, octets = server.list()
115 # 可以查看返回的列表类似[b'1 82923', b'2 2184', ...]
116 print(mails)
117 index = len(mails)
118
119 for i in range(index, 0, -1):
120 try:
121 # 倒序遍历邮件
122 resp, lines, octets = server.retr(i)
123 # lines存储了邮件的原始文本的每一行,
124 # 邮件的原始文本:
125 msg_content = b'\r\n'.join(lines).decode("utf8", "ignore")
126 # 解析邮件:
127 msg = Parser().parsestr(msg_content)
128 # 获取邮件时间
129
130 date1 = parse_mail_time(msg.get("Date")[0:30]) # 格式化收件时间
131 print(date1)
132 date2 = date1.strftime("%Y%m%d") # 邮件时间格式转换
133
134 if (date2 < '20201231') | (date2 > '20200101'):
135 f_list = get_att(msg) # 获取附件
136 print("下载成功")
137 except:
138 pass
139 continue
140
141 server.quit()