import pandas as pd
import requests
headers = {
"authority": "blog.csdn.net",
"accept": "application/json, text/plain, */*",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36",
"sec-fetch-site": "same-origin",
"sec-fetch-mode": "cors",
"sec-fetch-dest": "empty",
"referer": "h
"accept-language": "zh-CN,zh;q=0.9"
}
session = requests.session()
session.get("htders)
params = {
"page": "",
"pageSize": "25"
}
result = []
for i in range(4):
params["page"] = str(i)
res = session.get(
"httpsot-rank", params=params, headers=headers)
df = pd.DataFrame(res.json()['data'])[[
"userName", "nickName", "articleTitle", "viewCount",
"commentCount", "favorCount", "articleDetailUrl", "hotRankScore"]]
result.append(df)
hot_rank = pd.concat(result, ignore_index=True)
hot_rank.commentCount = hot_rank.commentCount.astype(int)
hot_rank.favorCount = hot_rank.favorCount.astype(int)
hot_rank.viewCount = hot_rank.viewCount.astype(int)
hot_rank.hotRankScore = hot_rank.hotRankScore.astype(int)
hot_rank.index += 1
hot_rank.reset_index(inplace=True)
hot_rank.columns = ["排名", "csdnid", "昵称", "标题", "阅读", "评论", "收藏", "链接", "热度"]
hot_rank