这里用到的库有两个
import requests
import re
登陆时首先要获取此时的session来维持对话,post请求时还需要请求头head
def postLoginsession():
# 请求登陆的页面
url = "https://passport.csdn.net/account/login"
#请求头
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
#获取session
session = requests.session()
ret = session.get(url, headers=headers)
ret = repr(ret.text) # 将获取到的ret页面内容解析成字符串以便得到lt内容
# 匹配获取lt参数和excution参数
lt = re.findall('name="lt" value="*(.*?)"', ret)[0]
execution = re.findall('name="execution" value="*(.*?)"', ret)[0]
post_data = {
"username": "17861121396",
"password": "wwx958260!!!",
"lt": lt,
"execution": execution,
"_eventId": "submit"}
r = session.post(url, headers=headers, data=post_data)
return r.cookies
获取session成功登陆后,利用session.get点击页面,获取一次session,可以得到访问后的页面
def getMycsdnsession(cookie):
session2 = requests.session()
session2.cookies = cookie
return session2
def getHtml_str(session):
url = "https://blog.csdn.net/efheoihfe"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
html_str = session.get(url, headers=headers)
return html_str.text
if __name__ == '__main__':
cookie = postLoginsession()
session = getMycsdnsession(cookie)
html = getHtml_str(session)
print(html)