两周前,ip被封了,所以爬虫有风险,访问需谨慎,还好有代理IP

# coding=utf-8
import requests
from bs4 import BeautifulSoup
#总的访问量+评论数
sum = 0
#20是页数,自己设
for i in range(20):
print("第",i+1,"页")
url = +str(i+1)
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"}
html = requests.get(url, headers=headers)
soup = BeautifulSoup(html.text, features="html.parser")
for i in soup.find_all("span" , "read-num"):
num = i.string
sum += int(num.split(":")[1])
print(sum)