1.get请求搜狗浏览器_处理一个UA反爬

import requests
content = input("请输入你要搜索的内容:")
url = f"https://www.sogou.com/web?query={content}"
headers = {
# 添加一个请求头信息.UA
"user-agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36"
}
# 处理一个反爬
a = requests.get(url,headers = headers)
print(a.text)

2.post请求百度翻译调用

import requests
url = "https://fanyi.baidu.com/sug"
data = {
"kw" : input("请输入要翻译的汉字:")
}
resp = requests.post(url,data = data)
for i in resp.json()["data"]:
print(i)

3.豆瓣电影排行榜名单获取1.0

import requests

url = "https://movie.douban.com/j/chart/top_list"
params = {
"type": "13",
"interval_id": "100:90",
"action":"",
"start":"0",
"limit": "20"
}
headers = {
# UA反爬
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36"
}
resp = requests.get(url,params = params,headers = headers)
# print(resp.json()[0]["rating"][0])
# print(resp.json()[0]["title"])
j = 0
for i in resp.json():
a = "电影名称:"+ resp.json()[j]["title"]
b = "评分:"+ resp.json()[j]["rating"][0]
j+=1
print(a,b)

4.豆瓣top250排行榜数据获取2.0

import requests
import re
f = open('doubantop250.csv',mode='w',encoding='utf-8')
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36"
}
shu = 0
j = 0
while 1:
url = f"https://movie.douban.com/top250?start={shu}&filter="
shuju = requests.get(url, headers=headers)
shu+=25
if shu > 275:
break
obj = re.compile(r'<div class="item">.*?<span class="title">(?P<nume>.*?)</span>.*?'
r'<div class="bd">.*?导演: (?P<daoyan>.*?) .*?<div class="star">.*?'
r"</span>.*?<span>(?P<pingjia>.*?)</span>", re.S)
# 进行正则匹配
shuju1 = obj.finditer(shuju.text)
for i in shuju1:
j += 1
nume = i.group("nume")
daoyan = i.group('daoyan')
pingjia = i.group('pingjia')
print(f"电影名称{j}:" + nume, "导演:" + daoyan, "评价数量:" + pingjia)
f.write(f"电影名称{j}:{nume}, 导演: {daoyan}, 评价数量: {pingjia}\n")

f.close()
shuju.close()
print('完成')

5.优美图库图片爬取

import requests
from bs4 import BeautifulSoup
url = "http://www.umeituku.com/tags/siwa.htm"
resp = requests.get(url)
resp.encoding = "utf-8"
# print(resp.text)
main = BeautifulSoup(resp.text, "html.parser")
a = main.find_all("a", attrs={"class": "TypeBigPics"})
计数 = 0
for b in a:
子链接 = b.get("href")
name = b.text
resp2 = requests.get(子链接)
resp2.encoding = "utf-8"
main2 = BeautifulSoup(resp2.text, "html.parser")
a2 = main2.find("div", attrs={"class": "ImageBody"})
dow = a2.find("img").get("src")
dow_resp = requests.get(dow)
with open(rf"C:\Users\xiazhimiao\Desktop\swmn\{name}.jpg", mode="wb") as f:
f.write(dow_resp.content)
print(f"下载完毕{计数}")
计数+=1
resp.close()
resp2.close()

重要声明

本博客记录内容仅用来学习,无非法用途,如有不当之处,麻烦联系V:xia_12_13,删除内容。

Python爬虫学习经历_爬虫