前言

Python实现自动采集快手视频数据,自动评论,自动点赞,自动关注_爬虫

今天的这个脚本,是一个别人发的外包,交互界面的代码就不在这里说了,但是可以分享下自动评论、自动点赞、采集评论和视频的数据是如何实现的

开发环境

python 3.8 运行代码
pycharm 2021.2 辅助敲代码
requests 第三方模块

原理:

模拟客户端,向服务器发送请求

代码实现

1. 请求伪装

def __init__(self):
self.headers = {
'content-type': 'application/json',
'Cookie': 'kpf=PC_WEB; kpn=KUAISHOU_VISION; clientid=3; did=web_ea128125517a46bd491ae9ccb255e242; client_key=65890b29; didv=1646739254078; _bl_uid=pCldq3L00L61qCzj6fytnk2wmhz5; userId=270932146; kuaishou.server.web_st=ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABH2BHihXp4liEYWMBFv9aguyfs8BsbINQIWqgoDw0SimMkpXwM7PKpKdJcZbU12QOyeKFaG4unV5EUkkEswL0HnA8_A9z2ujLlKN__gRsxU2B5kIYgirTDPiVJ3uPN1sU9mqvog3auoNJxDdbKjVeFNK1wQ5HTM_yUvYvmWOx9iC8IKcvnmo9YnG_J9ske-t-wiCWMgSCA25HN6MRqCMxuhoSnIqSq99L0mk4jolsseGdcwiNIiC8rjheuewIA1Bk3LwkNIYikU2zobcuvgAiBbMnBuDixygFMAE; kuaishou.server.web_ph=55c7e6b2033ea94a3447ea98082642cd6f1a',
'Host': 'www.kuaishou.com',
'Origin': 'https://www.kuaishou.com',
'Referer': 'https://www.kuaishou.com/search/video?searchKey=%E9%BB%91%E4%B8%9D',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36',
}
self.url = 'https://www.kuaishou.com/graphql'

2. 获取搜索内容的方法

def get_search(self, keyword, pcursor):
"""
:param keyword: 关键词
:param pcursor: 页码
:return: 搜索到的作品
"""
json = {
'operationName': "visionSearchPhoto",
'query': "fragment photoContent on PhotoEntity {\n id\n duration\n caption\n likeCount\n viewCount\n realLikeCount\n coverUrl\n photoUrl\n photoH265Url\n manifest\n manifestH265\n videoResource\n coverUrls {\n url\n __typename\n }\n timestamp \n animatedCoverUrl\n distance\n videoRatio\n liked\n stereoType\n profileUserTopPhoto\n __typename\n}\n\nfragment feedContent on Feed {\n type\n author {\n id\n name\n headerUrl\n following\n headerUrls {\n url\n __typename\n }\n __typename\n }\n photo {\n ...photoContent\n __typename\n }\n canAddComment\n llsid\n status\n currentPcursor\n __typename\n}\n\nquery visionSearchPhoto($keyword: String, $pcursor: String, $searchSessionId: String, $page: String, $webPageArea: String) {\n visionSearchPhoto(keyword: $keyword, pcursor: $pcursor, searchSessionId: $searchSessionId, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n ...feedContent\n __typename\n }\n searchSessionId\n pcursor\n aladdinBanner {\n imgUrl\n link\n __typename\n }\n __typename\n }\n}\n",
'variables': {'keyword': keyword, 'pcursor': pcursor, 'page': "search"}
}
response = requests.post(url=self.url, json=json, headers=self.headers)
json_data = response.json()
print(json_data)
return json_data

3. 获取作品评论

def get_comments(self, photoId, pcursor):
"""
:param photoId: 作品id
:param pcursor: 页码
:return: 评论内容
"""
json = {
'operationName': "commentListQuery",
'query': "query commentListQuery($photoId: String, $pcursor: String) { visionCommentList(photoId: $photoId, pcursor: $pcursor) {\n commentCount\n rootComments {\n commentId\n authorId\n authorName\n content\n headurl\n timestamp\n likedCount\n realLikedCount\n liked\n status\n subCommentCount\n subCommentsPcursor\n subComments {\n commentId\n authorId\n authorName\n content\n headurl\n timestamp\n likedCount\n realLikedCount\n liked\n status\n replyToUserName\n replyTo\n __typename\n }\n __typename\n }\n __typename\n }\n}\n",
'variables': {'photoId': photoId, 'pcursor': pcursor}
}
response = requests.post(url=self.url, json=json, headers=self.headers)
json_data = response.json()
print(json_data)
return json_data

4. 自动评论

def post_comment(self, content, photoAuthorId, photoId):
"""
:param content: 评论内容
:param photoAuthorId: 该作品的作者id
:param photoId: 作品id
:return: 有没有成功
"""
json = {
'operationName': "visionAddComment",
'query': "mutation visionAddComment($photoId: String, $photoAuthorId: String, $content: String, $replyToCommentId: ID, $replyTo: ID, $expTag: String) { (photoId: $photoId, photoAuthorId: $photoAuthorId, content: $content, replyToCommentId: $replyToCommentId, replyTo: $replyTo, expTag: $expTag) {\n result\n commentId\n content\n timestamp\n status\n __typename\n }\n}\n",
'variables': {
'content': content,
'expTag': "1_a/2005158523885162817_xpcwebsearchxxnull0",
'photoAuthorId': photoAuthorId,
'photoId': photoId
}
}
response = requests.post(url=self.url, json=json, headers=self.headers)
json_data = response.json()
print(json_data)
return json_data

5. 点赞操作

def is_like(self, photoId, photoAuthorId):
"""
:param photoId: 作品id
:param photoAuthorId: 该作品的作者id
:return: 有没有成功
"""
json = {
'operationName': "visionVideoLike",
'query': "mutation visionVideoLike($photoId: String, $photoAuthorId: String, $cancel: Int, $expTag: String) {\n visionVideoLike(photoId: $photoId, photoAuthorId: $photoAuthorId, cancel: $cancel, expTag: $expTag) {\n result\n __typename\n }\n}",
'variables': {
'cancel': 0,
'expTag': "1_a/2005158523885162817_xpcwebsearchxxnull0",
'photoAuthorId': photoAuthorId,
'photoId': photoId
}
}
response = requests.post(url=self.url, json=json, headers=self.headers)
json_data = response.json()
print(json_data)
return json_data

6. 关注操作

def is_follow(self, touid):
"""
:param touid: 用户id
:return:
"""
json = {
'operationName': "visionFollow",
'query': "mutation visionFollow($touid: String, $ftype: Int, $followSource: Int, $expTag: String) {\n visionFollow(touid: $touid, ftype: $ftype, followSource: $followSource, expTag: $expTag) {\n followStatus\n hostName\n error_msg\n __typename\n }\n}\n",
'variables': {
'expTag': "1_a/2005158523885162817_xpcwebsearchxxnull0",
'followSource': 3,
'ftype': 1,
'touid': touid
}
}
response = requests.post(url=self.url, json=json, headers=self.headers)
json_data = response.json()
print(json_data)
return json_data

7. 获取创作者信息

def get_userInfo(self, userId):
"""

:param userId: 用户ID
:return: 用户信息
"""
json = {
'operationName': "visionProfile",
'query': "query visionProfile($userId: String) {\n visionProfile(userId: $userId) {\n hostName\n userProfile {\n ownerCount {\n fan\n photo\n follow\n photo_public\n __typename\n }\n profile {\n gender\n user_name\n user_id\n headurl\n user_text\n user_profile_bg_url\n __typename\n }\n isFollowing\n __typename\n }\n __typename\n }\n}\n",
'variables': {'userId': userId}
}
response = requests.post(url=self.url, json=json, headers=self.headers)
json_data = response.json()
print(json_data)
return json_data

8. 获取创作者视频

def get_video(self, userId, pcursor):
"""
:param userId: 用户id
:param pcursor: 页码
:return: 作品
"""
json = {
'operationName': "visionProfilePhotoList",
'query': "fragment photoContent on PhotoEntity {\n duration\n caption\n likeCount\n viewCount\n realLikeCount\n coverUrl\n photoUrl\n photoH265Url\n manifest\n manifestH265\n videoResource\n coverUrls {\n url\n __typename\n }\n timestamp\n expTag\n animatedCoverUrl\n distance\n videoRatio\n liked\n stereoType\n profileUserTopPhoto\n __typename\n}\n\nfragment feedContent on Feed {\n type\n author {\n id\n name\n headerUrl\n following\n headerUrls {\n url\n __typename\n }\n __typename\n }\n photo {\n ...photoContent\n __typename\n }\n canAddComment\n llsid\n status\n currentPcursor\n __typename\n}\n\nquery visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n ...feedContent\n __typename\n }\n hostName\n pcursor\n __typename\n }\n}\n",
'variables': {'userId': userId, 'pcursor': pcursor, 'page': "profile"}
}
response = requests.post(url=self.url, json=json, headers=self.headers)
json_data = response.json()
print(json_data)
return json_data

9. 调用函数

if __name__ == '__main__':
kuaishou = KuaiShou()

kuaishou.get_comments('3xzry7secwhunai', '')

kuaishou.post_comment('爱你', '3xgz9zaku7hig96', '3xydesqbvtrvcuq')

kuaishou.is_like('3xydesqbvtrvcuq', '3xgz9zaku7hig96')

kuaishou.is_follow('3xxhfqquuachnje')

kuaishou.get_userInfo('3xxhfqquuachnje')

kuaishou.get_video('3xxhfqquuachnje', '')


视频教程链接地址:​​https://www.bilibili.com/video/BV1g541197gS?share_source=copy_web​