方法一通过python的requests包:

importrequests
url="https://fanyi.baidu.com"
res=requests.get(url)
ck=res.cookies
print(ck)
print(type(ck))
print(ck.keys())# 获取cookie中所有键名,以list格式输出
print(ck.items())
# 输出
,]>
['BAIDUID','locale']
[('BAIDUID','3A6AD66348038CBCB3BB6927F4A2CD77:FG=1'),('locale','zh')]

获取cookies是通过response的cookies 属性,可以通过res.cookies["cookie_name"]的方式获取。

并且需要注意的是,这个是一个RequestCookieJar的实例,也就是说,在requests的操作里的cookie一般都是包装到了

看源码:

class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping):
def get(self, name, default=None, domain=None, path=None):
try:
return self._find_no_duplicates(name, domain, path)
except KeyError:
return default
def set(self, name, value, **kwargs):
# support client code that unsets cookies by assignment of a None value:
if value is None:
remove_cookie_by_name(self, name, domain=kwargs.get('domain'), path=kwargs.get('path'))
return
if isinstance(value, Morsel):
c = morsel_to_cookie(value)
else:
c = create_cookie(name, value, **kwargs)
self.set_cookie(c)
return c
def iterkeys(self):
for cookie in iter(self):
yield cookie.name
def keys(self):
return list(self.iterkeys())
def itervalues(self):
for cookie in iter(self):
yield cookie.value
def values(self):
return list(self.itervalues())
def iteritems(self):
for cookie in iter(self):
yield cookie.name, cookie.value
def items(self):
return list(self.iteritems())

里面包括了很多的方法,可以根据需要进行使用,通常就是get, set,keys, itemitems等,和字典很像!

重点

如果需要在请求中添加cookie,可以实例化一个RequestCookieJar的类,然后把值set进去,最后在get,post方法里面指定cookies参数就行了,如下:

importrequests
fromrequests.cookiesimportRequestsCookieJar
url="http://fanyi.baidu.com/v2transapi"
cookie_jar=RequestsCookieJar()
cookie_jar.set("BAIDUID","B1CCDD4B4BC886BF99364C72C8AE1C01:FG=1",domain="baidu.com")
res=requests.get(url,cookies=cookie_jar)
printres.status_code
# 输出 200 则为正确

方法二使用python的cookielib包:

importcookielib,urllib2
loginUrl="https://fanyi.baidu.com"
cj=cookielib.CookieJar()
opener=urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
urllib2.install_opener(opener)
res=urllib2.urlopen(loginUrl)
opener.close()
forindex,cookieinenumerate(cj):
print'[',index,']',cookie
输出:
[0]
[1]

方法三使用python的httplib包:

importhttplib
cj=''
header={'Host':'fanyi.baidu.com',
'Accept-Language':'zh-CN',
'Connection':'Keep-Alive',
'Accept-Encoding':'gzip,deflate',
'Accept':'text/html, application/xhtml+xml, */*',
'User-Agent':'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/28.0.1500.71 Chrome/28.0.1500.71 Safari/537.36'
}
# con = httplib.HTTPConnection('fanyi.baidu.com')  # 因为网站使用的是https协议,所以res.status会返回301重定向
con=httplib.HTTPSConnection('fanyi.baidu.com')# 建立请求:fanyi.baidu.com/v2transapi
con.request(method='GET',url='/v2transapi',headers=header)
res=con.getresponse()
res.read()# 调用read函数以后,才能获取content。
con.close()
print(res.status)# 返回状态码
ifres.getheader('Set-Cookie')!=None:# 判断是否存在Set-Cookie,有的话,将cookie保存起来
print(res.getheaders())# 获取所有头部信息
print(res.getheader('Set-Cookie'))# 获取远程服务器响应后设置的全部Cookie信息
cj=res.getheader('Set-Cookie').split(';')[0]
print(cj)
else:
print('got no cookie')
exit()
# 输出
200
[('content-length','57'),('content-encoding','gzip'),('set-cookie','locale=zh; expires=Sat, 01-Feb-2020 03:26:10 GMT; path=/; domain=.baidu.com, BAIDUID=F3B1D486AEEF5CB69BCDBF801064CBEE:FG=1; expires=Mon, 06-Apr-20 03:26:10 GMT; max-age=31536000; path=/; domain=.baidu.com; version=1'),('vary','Accept-Encoding'),('server','Apache'),('date','Sun, 07 Apr 2019 03:26:10 GMT'),('p3p','CP=" OTI DSP COR IVA OUR IND COM "'),('content-type','application/json')]
locale=zh;expires=Sat,01-Feb-202003:26:10GMT;path=/;domain=.baidu.com,BAIDUID=F3B1D486AEEF5CB69BCDBF801064CBEE:FG=1;expires=Mon,06-Apr-2003:26:10GMT;max-age=31536000;path=/;domain=.baidu.com;version=1
locale=zh

方法四使用python的selenium包:

用的比较少的selenium包,用于模拟登陆并获取cookie。

importtime,random
fromseleniumimportwebdriver
importrequests
fromurllibimportrequest
fromlxmlimportetree
driver=webdriver.Chrome(executable_path=r'/Applications/Google Chrome.app/chromedriver')
driver.get('http://www.renren.com/PLogin.do')
time.sleep(2)
driver.find_element_by_id('email').clear()
driver.find_element_by_id('email').send_keys('myusername')# 输入用户名
driver.find_element_by_id('password').clear()
driver.find_element_by_id('password').send_keys('mypassword')# 输入密码
img_url='http://icode.renren.com/getcode.do?t=web_login&rnd='+str(random.random())
request.urlretrieve(img_url,'renren_yzm.jpg')
try:
driver.find_element_by_id('icode').clear()
img_res=input('输入验证码:')# 如果需要输入验证码,可以手工,或者接口给打码平台
driver.find_element_by_id('icode').send_keys(img_res)
except:
pass
driver.find_element_by_id('autoLogin').click()# 自动登陆
driver.find_element_by_id('login').click()# 登陆
time.sleep(3)
cookie_items=driver.get_cookies()# 获取cookie值
post={}# 保存cookie值
forcookieincookie_items:
post[cookie['name']]=cookie['value']
print(post['t'])# 人人网登陆后需要保持登陆的cookie信息
driver.quit()# 退出selenium
# ------------------------------------------------------------
url='http://www.renren.com/265025131/profile'
headers={
'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
'Cookie':'t='+post['t'],
}
response=requests.get(url,headers=headers)
print('-'*50)
html=etree.HTML(response.text)
title=html.xpath('//title/text()')
print('目前得到的页面信息',title)
print(response.url)

还有很多方法可以实现获取cookie和设置cookie,有待学习研究。