今天来讲一下对请求头的处理的一些例子,注释啥的都在下面。

  1. 模拟浏览器登录->处理cookie(这种情况下是需要你登录之后才能获得你想要的信息,有些网站你可以在登录之后用F12->network里面去找它关于login的一些文件里面的参数能够看到它的账号和密码,一般学校的网站好像可以)
# 登录 -> 得到cookie
# 带着cookie 去请求到书架url -> 书架上的内容

# 必须得把上面的两个操作连起来
# 我们可以使用session进行请求 -> session你可以认为是一连串的请求. 在这个过程中的cookie不会丢失
import requests

# # 会话
# session = requests.session()
# data = {
#     "loginName": "18614075987",
#     "password": "q6035945"
# }
#
# # 1. 登录
# url = "https://passport.17k.com/ck/user/login"
# session.post(url, data=data)
# # print(resp.text)
# # print(resp.cookies)  # 看cookie
#
# # 2. 拿书架上的数据
# # 刚才的那个session中是有cookie的
# resp = session.get('https://user.17k.com/ck/author/shelf?page=1&appKey=2406394919')
#
# print(resp.json())

resp = requests.get("https://user.17k.com/ck/author/shelf?page=1&appKey=2406394919", headers={
    "Cookie":"GUID=bbb5f65a-2fa2-40a0-ac87-49840eae4ad1; c_channel=0; c_csc=web; UM_distinctid=17700bddefba54-0a914fc70f1ede-326d7006-1fa400-17700bddefc9e3; Hm_lvt_9793f42b498361373512340937deb2a0=1614327827; accessToken=avatarUrl%3Dhttps%253A%252F%252Fcdn.static.17k.com%252Fuser%252Favatar%252F16%252F16%252F64%252F75836416.jpg-88x88%253Fv%253D1610625030000%26id%3D75836416%26nickname%3D%25E9%25BA%25BB%25E8%25BE%25A3%25E5%2587%25A0%25E4%25B8%259D%26e%3D1629888002%26s%3D63b8b7687fc8a717; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%2275836416%22%2C%22%24device_id%22%3A%2217700ba9c71257-035a42ce449776-326d7006-2073600-17700ba9c728de%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%2C%22first_id%22%3A%22bbb5f65a-2fa2-40a0-ac87-49840eae4ad1%22%7D; Hm_lpvt_9793f42b498361373512340937deb2a0=1614336271"
})
print(resp.text)
  1. 防盗链处理->抓取梨视频
# 1. 拿到contId
# 2. 拿到videoStatus返回的json. ->  srcURL
# 3. srcURL里面的内容进行修整
# 4. 下载视频
import requests

# 拉取视频的网址
url = "https://www.pearvideo.com/video_1721605"
contId = url.split("_")[1]

videoStatusUrl = f"https://www.pearvideo.com/videoStatus.jsp?contId={contId}"

headers = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.192 Safari/537.36",
    # 防盗链: 溯源, 当前本次请求的上一级是谁
    "Referer": url
}

resp = requests.get(videoStatusUrl, headers=headers)
dic = resp.json()

srcUrl = dic['videoInfo']['videos']['srcUrl']
systemTime = dic['systemTime']
srcUrl = srcUrl.replace(systemTime, f"cont-{contId}")

# 下载视频
with open("a.mp4", mode="wb") as f:
    f.write(requests.get(srcUrl).content)
  1. 代理 -> 防⽌被封IP(这个东西还是不错的)
# 原理. 通过第三方的一个机器去发送请求
import requests


# 218.60.8.83:3129
proxies = {
    "https": "https://218.60.8.83:3129"
}

resp = requests.get("https://www.baidu.com", proxies=proxies)
resp.encoding = 'utf-8'
print(resp.text)

下面的这一段代码是以网易云音乐为例子做的综合性的训练。

# 1. 找到未加密的参数                       # window.arsea(参数, xxxx,xxx,xxx)
# 2. 想办法把参数进行加密(必须参考网易的逻辑), params  => encText, encSecKey => encSecKey
# 3. 请求到网易. 拿到评论信息

# 需要安装pycrypto:   pip install pycrypto
from Crypto.Cipher import AES
from base64 import b64encode
import requests
import json


url = "https://music.163.com/weapi/comment/resource/comments/get?csrf_token="

# 请求方式是POST
data = {
    "csrf_token": "",
    "cursor": "-1",
    "offset": "0",
    "orderType": "1",
    "pageNo": "1",
    "pageSize": "20",
    "rid": "R_SO_4_1325905146",
    "threadId": "R_SO_4_1325905146"
}

# 服务于d的
f = "00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7"
g = "0CoJUm6Qyw8W8jud"
e = "010001"
i = "d5bpgMn9byrHNtAh"  # 手动固定的. -> 人家函数中是随机的

def get_encSecKey():  # 由于i是固定的. 那么encSecText就是固定的.  c()函数的结果就是固定的
    return "1b5c4ad466aabcfb713940efed0c99a1030bce2456462c73d8383c60e751b069c24f82e60386186d4413e9d7f7a9c7cf89fb06e40e52f28b84b8786b476738a12b81ac60a3ff70e00b085c886a6600c012b61dbf418af84eb0be5b735988addafbd7221903c44d027b2696f1cd50c49917e515398bcc6080233c71142d226ebb"


# 把参数进行加密
def get_params(data):  # 默认这里接收到的是字符串
    first = enc_params(data, g)
    second = enc_params(first, i)
    return second  # 返回的就是params


# 转化成16的倍数, 位下方的加密算法服务
def to_16(data):
    pad = 16 - len(data) % 16
    data += chr(pad) * pad
    return data


# 加密过程
def enc_params(data, key):
    iv = "0102030405060708"
    data = to_16(data)
    aes = AES.new(key=key.encode("utf-8"), IV=iv.encode('utf-8'), mode=AES.MODE_CBC)  # 创建加密器
    bs = aes.encrypt(data.encode("utf-8"))  # 加密, 加密的内容的长度必须是16的倍数
    return str(b64encode(bs), "utf-8")  # 转化成字符串返回,


# 处理加密过程
"""
    function a(a = 16) {  # 随机的16位字符串
        var d, e, b = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", c = "";
        for (d = 0; a > d; d += 1)  # 循环16次
            e = Math.random() * b.length,  # 随机数 1.2345
            e = Math.floor(e),  # 取整  1 
            c += b.charAt(e);  # 去字符串中的xxx位置 b
        return c
    }
    function b(a, b) {  # a是要加密的内容, 
        var c = CryptoJS.enc.Utf8.parse(b) #  # b是秘钥
          , d = CryptoJS.enc.Utf8.parse("0102030405060708")
          , e = CryptoJS.enc.Utf8.parse(a)  # e是数据
          , f = CryptoJS.AES.encrypt(e, c, {  # c 加密的秘钥
            iv: d,  # 偏移量
            mode: CryptoJS.mode.CBC  # 模式: cbc
        });
        return f.toString()
    }
    function c(a, b, c) {   # c里面不产生随机数
        var d, e;
        return setMaxDigits(131),
        d = new RSAKeyPair(b,"",c),
        e = encryptedString(d, a)
    }
    function d(d, e, f, g) {  d: 数据,   e: 010001, f: 很长, g: 0CoJUm6Qyw8W8jud
        var h = {}  # 空对象
          , i = a(16);  # i就是一个16位的随机值, 把i设置成定值 
        h.encText = b(d, g)  # g秘钥
        h.encText = b(h.encText, i)  # 返回的就是params  i也是秘钥
        h.encSecKey = c(i, e, f)  # 得到的就是encSecKey, e和f是定死的 ,如果此时我把i固定, 得到的key一定是固定的
        return h
    }
    
    两次加密: 
    数据+g => b => 第一次加密+i => b = params
"""

# 发送请求. 得到评论结果
resp = requests.post(url, data={
    "params": get_params(json.dumps(data)),
    "encSecKey": get_encSecKey()
})

print(resp.text)