import requests
import csv
import re
import random
from time import sleep
url = 'https://www.kuaishou.com/graphql'
headers = {
'Cookie':'kpf=PC_WEB; clientid=3; did=web_ab9aa1b9b464249f614aafce6cfa8c55; kpn=KUAISHOU_VISION',
'Host':'www.kuaishou.com',
'Referer':'https://www.kuaishou.com/search/video?searchKey=%E7%8E%8B%E8%80%85%E8%8D%A3%E8%80%80',
'Sec-Ch-Ua':'"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
'Sec-Ch-Ua-Mobile': '?0',
'Sec-Ch-Ua-Platform':"Windows",
'Sec-Fetch-Dest':'empty',
'Sec-Fetch-Mode':'cors',
'Sec-Fetch-Site':'same-origin',
'Origin':'https://www.kuaishou.com',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36'
}
for i in range(0,1):
json = {
"operationName": "visionSearchPhoto",
"variables": {
"keyword": "阳光",
"pcursor": f'{i}',
"page": "search"
},
"query": "fragment photoContent on PhotoEntity {\n __typename\n id\n duration\n caption\n originCaption\n likeCount\n viewCount\n commentCount\n realLikeCount\n coverUrl\n photoUrl\n photoH265Url\n manifest\n manifestH265\n videoResource\n coverUrls {\n url\n __typename\n }\n timestamp\n expTag\n animatedCoverUrl\n distance\n videoRatio\n liked\n stereoType\n profileUserTopPhoto\n musicBlocked\n}\n\nfragment recoPhotoFragment on recoPhotoEntity {\n __typename\n id\n duration\n caption\n originCaption\n likeCount\n viewCount\n commentCount\n realLikeCount\n coverUrl\n photoUrl\n photoH265Url\n manifest\n manifestH265\n videoResource\n coverUrls {\n url\n __typename\n }\n timestamp\n expTag\n animatedCoverUrl\n distance\n videoRatio\n liked\n stereoType\n profileUserTopPhoto\n musicBlocked\n}\n\nfragment feedContent on Feed {\n type\n author {\n id\n name\n headerUrl\n following\n headerUrls {\n url\n __typename\n }\n __typename\n }\n photo {\n ...photoContent\n ...recoPhotoFragment\n __typename\n }\n canAddComment\n llsid\n status\n currentPcursor\n tags {\n type\n name\n __typename\n }\n __typename\n}\n\nquery visionSearchPhoto($keyword: String, $pcursor: String, $searchSessionId: String, $page: String, $webPageArea: String) {\n visionSearchPhoto(keyword: $keyword, pcursor: $pcursor, searchSessionId: $searchSessionId, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n ...feedContent\n __typename\n }\n searchSessionId\n pcursor\n aladdinBanner {\n imgUrl\n link\n __typename\n }\n __typename\n }\n}\n"
}
resp = requests.post(url , headers=headers , json=json)
resp_json = resp.json()
resp_json_feeds = resp_json['data']['visionSearchPhoto']['feeds']
for info in resp_json_feeds:
sleep(random.uniform(1,2))
photoUrl = info['photo']['photoUrl']
caption = info['photo']['caption']
print(caption)
caption = re.sub(r'[/\*?<>|\n-_ ]','',caption)
caption = caption[0:20]
# photoUrl_mp4 = requests.get(photoUrl).content
# with open(caption + '.mp4','wb')as f:
# f.write(photoUrl_mp4)
# print(caption+'下载完成')
明天继续