项目场景:
项目场景:通过Python
的selenium
包在B站上传视频并发布,视频内容来源于YouTube
网站。
依赖包下载:
pip install loguru selenium paddlehub numpy -i https://pypi.douban.com/simple
整体流程:
下载视频
本地存储
视频上传
视频描述信息填写
发布视频
- 下载视频 & 本地存储
import requests
import os
- 视频上传(包含B站登录验证)
import base64
import time
import os
from loguru import logger
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
import numpy as np
# import paddlehub as hub
import ddddocr
import cv2 as cv
def base64_to_img(base64_code):
"""
base64转图片
"""
# base64解码
img_data = base64.b64decode(base64_code)
# 转换为np数组
img_array = np.fromstring(img_data, np.uint8)
# 转换成opencv可用格式
img = cv.imdecode(img_array, cv.COLOR_RGB2BGR)
return img
def click_locxy(dr, x, y, left_click=True):
"""
点击页面指定位置
:param dr:浏览器
:param x:页面x坐标
:param y:页面y坐标
:param left_click:True为鼠标左键点击,否则为右键点击
"""
if left_click:
ActionChains(dr).move_by_offset(x, y).click().perform()
else:
ActionChains(dr).move_by_offset(x, y).context_click().perform()
# 将鼠标位置恢复到移动前
ActionChains(dr).move_by_offset(-x, -y).perform()
class BiliBili:
def __init__(self, base_url='https://www.bilibili.com/', user_number="", user_password="", auto_login=False):
"""
:param base_url: B站地址
:param user_number: 用户登录账号
:param user_password: 用户登录密码
:param auto_login: 是否自动化登录
"""
self.base_url = base_url
self.user_number = user_number
self.user_password = user_password
# 浏览器配置
self.browser = webdriver.Chrome()
self.browser.get(self.base_url)
# 登录
if auto_login:
# 自动化登录
# 配置OCR识别程序
# self.use_gpu = False
# if self.use_gpu:
# os.environ['CUDA_VISIBLE_DEVICES'] = '0'
# 文字检测实例
# self.ocr_detector = hub.Module(name="chinese_ocr_db_crnn_server", enable_mkldnn=True)
self.ocr_detector = ddddocr.DdddOcr()
while True:
try:
self._login()
logger.success("登录成功!")
break
except:
logger.error("登录失败,刷新页面重新登录......")
self.browser.refresh()
continue
else:
# 手动登录
login_success = input("请确认是否登录成功,若成功则输入成功:")
logger.info(f"执行手动登录成功:{login_success}")
def _ocr(self, img: np.ndarray):
"""
识别图片内的文字并返回固定格式数据
:param img: 图片
:return: 固定格式数据
"""
# ocr_res = self.ocr_detector.recognize_text(images=[img], use_gpu=self.use_gpu)
ocr_res = self.ocr_detector.classification(img.tobytes())
# 检测结果是否为空
if len(ocr_res) < 1 or 'data' not in ocr_res[0]:
raise ValueError(f"文字检测结果为空")
else:
ocr_res = ocr_res[0]['data']
return ocr_res
def _captcha_verification(self):
"""
验证码识别破解
"""
logger.debug("等待刷新图片验证码......")
time.sleep(10)
# 验证码根路径
root_element = self.browser.find_element(By.CLASS_NAME, "geetest_widget")
# 重新加载验证码按钮元素
reload_button = root_element.find_element(By.CLASS_NAME, "geetest_refresh")
# 确认按钮元素
confirm_button = root_element.find_element(By.CLASS_NAME, "geetest_commit")
logger.debug("刷新图片验证码成功!")
while True:
# 获取待验证文字图片
verify_img_element = root_element.find_element(By.CLASS_NAME, "geetest_tip_img")
verify_img = base64_to_img(verify_img_element.screenshot_as_base64)
# 获取验证码图片元素
img_element = root_element.find_element(By.CLASS_NAME, "geetest_item_wrap")
img = base64_to_img(img_element.screenshot_as_base64)
# 获取验证码图片在整个网页上的坐标
x, y = img_element.location['x'], img_element.location['y']
# 点击验证码
try:
# 获取验证文字
verify_code = self._ocr(verify_img)[0]['text']
logger.info(f'验证文字内容:{verify_code}')
# 获取验证码图片内文件及对应坐标
code_positions = self._ocr(img)
logger.info(f'验证码图片内容:{code_positions}')
# 按顺序点击验证码图片
success_count = 0
for code in verify_code:
for c_p in code_positions:
if c_p['text'] == code:
click_locxy(self.browser, x + c_p['x'], y + c_p['y'])
success_count += 1
time.sleep(1)
if success_count == len(verify_code):
# 点击确认
confirm_button.click()
break
else:
raise ValueError('验证失败')
except Exception as e:
logger.error(f"点击验证码失败:{e},重新刷新验证码......")
# 验证失败则点击刷新验证码
reload_button.click()
time.sleep(5)
def _login(self):
"""
自动登录
"""
# 打开登录界面
logger.debug('开始打开登录界面......')
time.sleep(5)
self.browser.find_element(By.CLASS_NAME, "header-login-entry").click()
logger.debug('打开登录界面成功!')
# 获取登录表单
logger.debug("开始获取登录表单......")
time.sleep(5)
login_form = self.browser.find_element(By.CLASS_NAME, "login-pwd-wp")
logger.debug("获取登录表单成功!")
# 输入账号密码
logger.debug("开始输入账号密码......")
for login_item in login_form.find_element(By.CLASS_NAME, "tab__form").find_elements(By.CLASS_NAME, "form__item"):
inputs = login_item.find_element(By.TAG_NAME, 'input')
inputs.click()
time.sleep(1)
inputs.clear()
if inputs.accessible_name == '请输入账号':
inputs.send_keys(self.user_number)
elif inputs.accessible_name == '请输入密码':
inputs.send_keys(self.user_password)
logger.debug("输入账号密码成功!")
# 点击登录按钮
logger.debug("点击登录按钮......")
time.sleep(2)
login_form.find_element(By.CLASS_NAME, "btn_wp").find_element(By.CLASS_NAME, 'btn_primary ').click()
# 验证码校验
self._captcha_verification()
return True
def to_upload_windows(self):
"""
切换到视频上传界面
"""
logger.debug("切换至视频上传界面......")
upload = self.browser.find_element(By.CLASS_NAME, 'header-upload-entry')
upload.click()
logger.debug("切换成功!")
self.browser.switch_to.window(self.browser.window_handles[-1])
def publish_video(self, path_mp4: str, param_dict: dict = None):
"""
视频预上传
:param path_mp4: 本地视频文件路径
:param param_dict: 视频上传表单参数参数
"""
# 上传视频
try:
logger.debug("上传视频")
self.browser.find_element(By.XPATH, '//input[@type="file" and contains(@accept,"mp4")]').send_keys(path_mp4)
# 等待视频上传完成
while True:
time.sleep(3)
try:
self.browser.find_element(By.XPATH, '//*[text()="上传完成"]')
break
except Exception:
logger.debug("视频还在上传中···")
logger.success("视频已上传完成!")
except Exception as e:
logger.error(f"视频上传失败:{e}")
return False
# 填写视频描述表格
return True
def close(self):
"""
关闭页面
"""
self.browser.close()
logger.debug("关闭页面成功")
if __name__ == '__main__':
bilibili = BiliBili(auto_login=True)
bilibili.to_upload_windows()
bilibili.publish_video('D:\\Desktop\\02.mp4')
bilibili.close()