from bs4 import BeautifulSoup #引用BeautifulSoup库
import requests #引用requests
import os #os
import pandas as pd
import csv
import codecs
import re
import xlwt #excel操作
import time
import json
from urllib import parse
from selenium import webdriver
# 视频的URL : Request URL:
cookies = {
'did': 'web_79a2acd64d164a82990e9e48130c0268',
'didv': '1637038675000',
'kpf': 'PC_WEB',
'kpn': 'KUAISHOU_VISION',
'clientid': '3',
'client_key': '65890b29',
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36',
'Content-Type': 'application/json',
}
#data中的东西可以通过apipost去代码生成一下复制免得报错
data = '{ "operationName": "visionProfilePhotoList", "variables": { "userId": "3x3zj6vu8dwariy", "pcursor": "", "page": "profile" }, "query": "query visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\\n visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\\n result\\n llsid\\n webPageArea\\n feeds {\\n type\\n author {\\n id\\n name\\n following\\n headerUrl\\n headerUrls {\\n cdn\\n url\\n __typename\\n }\\n __typename\\n }\\n tags {\\n type\\n name\\n __typename\\n }\\n photo {\\n id\\n duration\\n caption\\n likeCount\\n realLikeCount\\n coverUrl\\n coverUrls {\\n cdn\\n url\\n __typename\\n }\\n photoUrls {\\n cdn\\n url\\n __typename\\n }\\n photoUrl\\n liked\\n timestamp\\n expTag\\n animatedCoverUrl\\n stereoType\\n videoRatio\\n profileUserTopPhoto\\n __typename\\n }\\n canAddComment\\n currentPcursor\\n llsid\\n status\\n __typename\\n }\\n hostName\\n pcursor\\n __typename\\n }\\n}\\n" }'
response = requests.post('https://www.kuaishou.com/graphql', headers=headers, cookies=cookies, data=data)
data_json = response.json()
data_list = data_json['data']['visionProfilePhotoList']['feeds']
for data in data_list:
title = data['photo']['caption']
new_title = re.sub(r'[\/\\\:;\*#¥%$!@^……&()\?\"\<\>\|]', '_', title)
photoUrl = data['photo']['photoUrl']
# print(title, url_1)
#https://v2.kwaicdn.com/upic/2021/12/21/20/BMjAyMTEyMjEyMDE1MTFfMTE1OTYxMDYyM182MzI0NjQzODAwMV8xXzM=_b_Bab5f65132c16fc17452d4acb7d135c71.mp4
#?pkey=AAUgxwI4RONXKIiP_Py40aJlX4ruOXWkEvshsbJzwBadL1D6DtsD0EPYTsH5VqSr9pRHiuPxnY5OPq98rRXLOHnB_MsNuT7TeSfn7WkAf6U8lqUgIdr_YxSJoCyhRxPK0og
#&tag=1-1640240960-xpcwebprofile-0-u9e6n664af-c9b8d472abebf4e4&clientCacheKey=3xsbrqcnr28ucda_b.mp4&tt=b&di=274b47ea&bp=14734
content = requests.get(photoUrl).content
with open('./liutao/video/' + new_title + '.mp4', mode='wb') as f:
f.write(content)
print(new_title, '爬取成功!!!')
time.sleep(1)
print("wancheng")
python 爬取kuaishou主页视频无分页
原创
©著作权归作者所有:来自51CTO博客作者小楚留香的原创作品,请联系作者获取转载授权,否则将追究法律责任
上一篇:公共类tp6
提问和评论都可以,用心的回复会被更多人看到
评论
发布评论
相关文章
-
python写入分页符 python分页爬取
我们以abckg网址为例演示。首先爬取详情页。另外一种解析内容页: 然后是爬取分页:还有一种方法就是设置一个方法循环爬取:
python写入分页符 python scrapy框架 分页 爬取详情页 -
Yearning 监控慢sql
一、 ZooKeeper 简介 概念: 顾名思义 zookeeper 就是动物园管理员,他是用来管 hadoop(大象)、Hive(蜜蜂)、pig(小猪)的管理员, Apache Hbase 和 Apache Solr 的分布式集群都用到了 zookeeper;Zookeeper:是一个分布式的、开源的程序协调服务,是 hadoop 项目下的一个子项目。他提供的主要功能包括:配置管理、名字服务、分
Yearning 监控慢sql zookeeper 连接慢 zookeeper设置临时节点失效时间 zookeeper 数据 -
python栅格掩膜提取
GDAL(Geospatial Data Abstraction Library)是一个在X/MIT许可协议下的开源栅格空间数据转换库。它利用抽象数据模型来表达所支持的各种文件格式。它还有一系列命令行工具来进行数据转换和处理。 Python的GDAL
python栅格掩膜提取 python arcgis 笔记 开发语言