- cookie & session
- 由于http协议的无记忆性,人们为了弥补这个缺憾,所采用的一个补充协议
- cookie是发放给用户(即http浏览器)的一段信息,session是保存在服务器上的对应的另一半信息,用来记录用户信息
- cookie和session的区别
- 存放位置不同
- cookie不安全
- session会保存在服务器上一定时间,会过期
- 单个cookie保存数据不超过4k, 很多浏览器限制一个站点最多保存20个
- session的存放位置
- 存在服务器端
- 一般情况,session是放在内存中或者数据库中
- 没有cookie登录 案例v011,可以看到,没使用cookie则反馈网页为未登录状态
(注意:如果html 文件乱码,open 方法加上encoding="utf-8")
from urllib import request,parse
import chardet
if __name__ == '__main__':
url="http://www.renren.com/966226407/profile"
req = request.urlopen(url)
html = req.read()
cs = chardet.detect(html)
res = html.decode(cs.get("encoding","utf-8"))
print(res)
with open("webrenren.html",'w',encoding="utf-8") as f:
f.write(res)
- 使用cookie登录
- 1.直接把cookie复制下来,然后手动放入请求头, 案例 v12
from urllib import request
if __name__ == '__main__':
url = 'http://www.renren.com/966226407/profile'
headers = {
'Cookie':'JSESSIONID=xxx; wp_fold=0; anonymid=xxx; depovince=GW; jebecookies=xxx|||||; _r01_=1; ick_login=xxx; t=xxx; societyguester=xxx; id=xxx; xnsid=xxx'
}
req = request.Request(url=url,headers=headers)
res = request.urlopen(req)
html = res.read()
html = html.decode()
with open("welcome.html","w",encoding="UTF-8") as f:
f.write(html)
- 2.http模块包含一些关于cookie的模块,通过他们我们可以自动使用cookie
- CookieJar
- 管理存储cookie,向传出的http请求添加cookie,
- cookie存储在内存中,CookieJar实例回收后cookie将消失
- FileCookieJar(filename, delayload=None, policy=None):
- 使用文件管理cookie
- filename是保存cookie的文件
- MozillaCookieJar(filename, delayload=None, policy=None):
- 创建与mocilla浏览器cookie.txt兼容的FileCookieJar实例
- LwpCookieJar(filename, delayload=None, policy=None):
- 创建与libwww-perl标准兼容的Set-Cookie3格式的FileCookieJar实例
- 他们的关系是: CookieJar-->FileCookieJar-->MozillaCookieJar & LwpCookieJar
- 利用cookiejar访问人人,
- 自动使用cookie登录,大致流程是
- 打开登录页面后自动通过用户名密码登录
- 自动提取反馈回来的cookie
- 利用提取的cookie登录隐私页面
- 案例v13
from urllib import request,parse
from http import cookiejar
# 创建cookie 的实例
cookie = cookiejar.CookieJar()
# 生成cookie 管理器
cookie_handler = request.HTTPCookieProcessor(cookie)
# 创建http 请求管理器
http_handler = request.HTTPHandler()
# 创建https 请求管理器
https_handler = request.HTTPSHandler()
# 创建请求管理器
opener = request.build_opener(http_handler,https_handler,cookie_handler)
print("opener..type...info",type(opener)) #opener..type...info <class 'urllib.request.OpenerDirector'>
print("opener...........info",opener) #opener...........info <urllib.request.OpenerDirector object at 0x000001A165BD9208>
def login():
"""
用于初次登录,
需要输入用户名密码获取cookie 凭证
:return:
"""
url = "http://www.renren.com/PLogin.do"
data = {
"email":"136****1021", # O(∩_∩)O哈哈~
"password":"********" # ^_^
}
data = parse.urlencode(data)
data= data.encode()
req = request.Request(url=url,data=data)
rsp = opener.open(req)
print("login...type->rsp",type(rsp)) #<class 'http.client.HTTPResponse'>
print("login.........->rsp",rsp)
html = rsp.read().decode()
# rsp = request.urlopen(req) #opener.open(url, data, timeout) 来自源码
with open("login.html","w",encoding="utf-8") as f:
f.write(html)
def getHomePage():
url ="http://www.renren.com/966226407/profile"
rsp = opener.open(url)
html = rsp.read().decode()
with open("logintohomepage.html","w",encoding="utf-8") as f:
f.write(html)
if __name__ == '__main__':
login()
getHomePage()
- handler是Handler的实例,常用参看案例代码
- 用来处理复杂请求
# 生成 cookie的管理器
cookie_handler = request.HTTPCookieProcessor(cookie)
# 创建http请求管理器
http_handler = request.HTTPHandler()
# 生成https管理器
https_handler = request.HTTPSHandler()
- 创立handler后,使用opener打开,打开后相应的业务由相应的hanlder处理
- cookie作为一个变量,打印出来, 案例 v14
#打印cookie
from urllib import request,parse
from http import cookiejar
# 创建cookie 的实例
cookie = cookiejar.CookieJar()
# 生成cookie 管理器
cookie_handler = request.HTTPCookieProcessor(cookie)
# 创建http 请求管理器
http_handler = request.HTTPHandler()
# 创建https 请求管理器
https_handler = request.HTTPSHandler()
# 创建请求管理器
opener = request.build_opener(http_handler,https_handler,cookie_handler)
print("opener..type...info",type(opener)) #opener..type...info <class 'urllib.request.OpenerDirector'>
print("opener...........info",opener) #opener...........info <urllib.request.OpenerDirector object at 0x000001A165BD9208>
def login():
"""
用于初次登录,
需要输入用户名密码获取cookie 凭证
:return:
"""
url = "http://www.renren.com/PLogin.do"
data = {
"email":"136****1021", #^_^
"password":"********" # O(∩_∩)O哈哈~
}
data = parse.urlencode(data)
data= data.encode()
req = request.Request(url=url,data=data)
rsp = opener.open(req)
print("login...type->rsp",type(rsp)) #<class 'http.client.HTTPResponse'>
print("login.........->rsp",rsp)
html = rsp.read().decode()
# rsp = request.urlopen(req) #opener.open(url, data, timeout) 来自源码
# with open("login.html","w",encoding="utf-8") as f:
# f.write(html)
if __name__ == '__main__':
login()
print(type(cookie)) #<class 'http.cookiejar.CookieJar'>
#<CookieJar[<Cookie _de=9AA242F9622CF572B027474576520882 for .renren.com/>, <Cookie anonymid=jltlm70f-32dewa for .renren.com/>, <Cookie first_login_flag=1 for .renren.com/>, <Cookie id=849340563 for .renren.com/>, <Cookie ln_hurl=http://head.xiaonei.com/photos/0/0/men_main.gif for .renren.com/>, <Cookie ln_uact=136****1021 for .renren.com/>, <Cookie loginfrom=syshome for .renren.com/>, <Cookie p=f309512158772bad268d94655c5410633 for .renren.com/>, <Cookie societyguester=df704536b0c5ba5e4fb05a2eeb02ce6c3 for .renren.com/>, <Cookie t=df704536b0c5ba5e4fb05a2eeb02ce6c3 for .renren.com/>, <Cookie xnsid=462659e6 for .renren.com/>, <Cookie t=6202bdf017ba2edb8ae934a0473b3ff4 for .renren.com/xtalk/>, <Cookie JSESSIONID=abcGDA9y0Lrh2erwA25ww for zhibo.renren.com/>]>
print("cookie--->{}".format(cookie))
for item in cookie:
for i in dir(item):
print(i)
- cookie的属性
- name: 名称
- value: 值
- domain:可以访问此cookie的域名
- path: 可以访问此cookie的页面路径
- expires:过期时间
- size: 大小
- Http字段
for item in cookie:
print("name :",item.name," ,value :", item.value,", path :",item.path," ,domain..",item.domain,"...",item.expires)
>>>name : _de ,value : xxxxx42Fxxxxxxxxxx45765xxxxx , path : / ,domain.. .renren.com ... 1567526036
>>>name : anonymid ,value : xxtmxxxxxx9mi4m , path : / ,domain.. .renren.com ... 1694102036
>>>name : first_login_flag ,value : 1 , path : / ,domain.. .renren.com ... None
>>>name : id ,value : 849xxxx63 , path : / ,domain.. .renren.com ... None
>>>name : ln_hurl ,value : http://head.xiaonei.com/photos/0/0/men_main.gif , path : / ,domain.. .renren.com ... 1539014036
>>>name : ln_uact ,value : 136****1021 , path : / ,domain.. .renren.com ... 1539014036
>>>name : loginfrom ,value : syshome , path : / ,domain.. .renren.com ... None
>>>name : p ,value : xxxx5121587xxxxxxxxxxxc5410633 , path : / ,domain.. .renren.com ... None
>>>name : societyguester ,value : xxxx536xxxe4fbxxxeb0xxe6x , path : / ,domain.. .renren.com ... None
>>>name : t ,value : df704536b0c5ba5e4fb05a2eeb02ce6c3 , path : / ,domain.. .renren.com ... None
>>>name : xnsid ,value : 461c3954 , path : / ,domain.. .renren.com ... None
>>>name : t ,value : xxxxdf01xxedbxx934a04xx3fx , path : /xtalk/ ,domain.. .renren.com ... None
>>>name : JSESSIONID ,value : axxiDbxxJexx45xx , path : / ,domain.. zhibo.renren.com ... None
- cookie的保存-FileCookieJar, 案例v15
"""
cookie 保存
"""
from urllib import request,parse
from http import cookiejar
filename ="cookie.txt"
# cookie = cookiejar.CookieJar()
cookie = cookiejar.MozillaCookieJar(filename)
cookie_handler = request.HTTPCookieProcessor(cookie)
http_handler = request.HTTPHandler()
https_handler = request.HTTPSHandler()
opener = request.build_opener(http_handler,https_handler,cookie_handler)
def login():
url="http://www.renren.com/PLogin.do"
data = {
"email":"136****1021", #^_^
"password":"********" #O(∩_∩)O哈哈~
}
data = parse.urlencode(data)
req = request.Request(url=url,data=data.encode())
rsp = opener.open(req)
"""
保存cookie 到文件
"""
cookie.save(ignore_discard=True,ignore_expires=True)
if __name__ == '__main__':
login()
- cookie的读取, 案例v16
"""
cookie 保存
"""
from urllib import request, parse
from http import cookiejar
filename = "cookie.txt"
# cookie = cookiejar.CookieJar()
cookie = cookiejar.MozillaCookieJar(filename)
cookie.load("cookie.txt",ignore_discard=True,ignore_expires=True)
cookie_handler = request.HTTPCookieProcessor(cookie)
http_handler = request.HTTPHandler()
https_handler = request.HTTPSHandler()
opener = request.build_opener(http_handler, https_handler, cookie_handler)
def getHomePage():
url = "http://www.renren.com/966226407/profile"
rsp = opener.open(url)
html = rsp.read().decode()
with open("readcookie.html","w",encoding="utf-8") as r:
r.write(html)
if __name__ == '__main__':
getHomePage()