视频采集实战1
import requests
from bs4 import BeautifulSoup
# 获取所有视频地址
def get_url():
# 请求地址
url = 'http://699pic.com/video-sousuo-0-2-1-200-0-0.html?sem=1&sem_kid=126640&sem_type=2'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
}
r = requests.get(url, headers)
r.encoding = 'UTF-8'
# print(r.text) #初学很必要,至少可以看到成功获取了所有网页源代码
all_video = BeautifulSoup(r.text, 'lxml').select('div.video-box') # 找到所有属性为video-box的div节点
j = 0
num = 1
for video in all_video:
video_url = video.select('video')[0].attrs.get('data-original').strip()
video_url = 'https:' + video_url # 拼接视频url
print("video_url=", video_url)
name = str(num)
savefile(video_url, name)
num = num + 1
# 保存视频
def savefile(video_url, name):
print("开始下载···")
video = requests.get(video_url)
file_name = name + '.mp4'
f = open('E://spider_study//视频' + '/' +
file_name, 'ab')
f.write(video.content)
f.close()
print('视频下载完成', video_url)
if __name__ == '__main__':
get_url()