项目场景:

项目场景:通过Pythonselenium包在B站上传视频并发布,视频内容来源于YouTube网站。

依赖包下载:

pip install loguru selenium paddlehub numpy -i https://pypi.douban.com/simple

整体流程:






下载视频

本地存储

视频上传

视频描述信息填写

发布视频

  1. 下载视频 & 本地存储
import requests
import os
  1. 视频上传(包含B站登录验证)
import base64
import time
import os

from loguru import logger
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
import numpy as np
# import paddlehub as hub
import ddddocr
import cv2 as cv


def base64_to_img(base64_code):
    """
    base64转图片
    """
    # base64解码
    img_data = base64.b64decode(base64_code)
    # 转换为np数组
    img_array = np.fromstring(img_data, np.uint8)
    # 转换成opencv可用格式
    img = cv.imdecode(img_array, cv.COLOR_RGB2BGR)
    return img


def click_locxy(dr, x, y, left_click=True):
    """
    点击页面指定位置
    :param dr:浏览器
    :param x:页面x坐标
    :param y:页面y坐标
    :param left_click:True为鼠标左键点击,否则为右键点击
    """
    if left_click:
        ActionChains(dr).move_by_offset(x, y).click().perform()
    else:
        ActionChains(dr).move_by_offset(x, y).context_click().perform()
    # 将鼠标位置恢复到移动前
    ActionChains(dr).move_by_offset(-x, -y).perform()


class BiliBili:

    def __init__(self, base_url='https://www.bilibili.com/', user_number="", user_password="", auto_login=False):
        """
        :param base_url: B站地址
        :param user_number: 用户登录账号
        :param user_password: 用户登录密码
        :param auto_login: 是否自动化登录
        """
        self.base_url = base_url
        self.user_number = user_number
        self.user_password = user_password

        # 浏览器配置
        self.browser = webdriver.Chrome()
        self.browser.get(self.base_url)

        # 登录
        if auto_login:
            # 自动化登录
            # 配置OCR识别程序
            # self.use_gpu = False
            # if self.use_gpu:
            #     os.environ['CUDA_VISIBLE_DEVICES'] = '0'
            # 文字检测实例
            # self.ocr_detector = hub.Module(name="chinese_ocr_db_crnn_server", enable_mkldnn=True)
            self.ocr_detector = ddddocr.DdddOcr()
            while True:
                try:
                    self._login()
                    logger.success("登录成功!")
                    break
                except:
                    logger.error("登录失败,刷新页面重新登录......")
                    self.browser.refresh()
                    continue
        else:
            # 手动登录
            login_success = input("请确认是否登录成功,若成功则输入成功:")
            logger.info(f"执行手动登录成功:{login_success}")

    def _ocr(self, img: np.ndarray):
        """
        识别图片内的文字并返回固定格式数据
        :param img: 图片
        :return: 固定格式数据
        """
        # ocr_res = self.ocr_detector.recognize_text(images=[img], use_gpu=self.use_gpu)
        ocr_res = self.ocr_detector.classification(img.tobytes())
        # 检测结果是否为空
        if len(ocr_res) < 1 or 'data' not in ocr_res[0]:
            raise ValueError(f"文字检测结果为空")
        else:
            ocr_res = ocr_res[0]['data']
        return ocr_res

    def _captcha_verification(self):
        """
        验证码识别破解
        """
        logger.debug("等待刷新图片验证码......")
        time.sleep(10)
        # 验证码根路径
        root_element = self.browser.find_element(By.CLASS_NAME, "geetest_widget")
        # 重新加载验证码按钮元素
        reload_button = root_element.find_element(By.CLASS_NAME, "geetest_refresh")
        # 确认按钮元素
        confirm_button = root_element.find_element(By.CLASS_NAME, "geetest_commit")
        logger.debug("刷新图片验证码成功!")

        while True:
            # 获取待验证文字图片
            verify_img_element = root_element.find_element(By.CLASS_NAME, "geetest_tip_img")
            verify_img = base64_to_img(verify_img_element.screenshot_as_base64)

            # 获取验证码图片元素
            img_element = root_element.find_element(By.CLASS_NAME, "geetest_item_wrap")
            img = base64_to_img(img_element.screenshot_as_base64)
            # 获取验证码图片在整个网页上的坐标
            x, y = img_element.location['x'], img_element.location['y']

            # 点击验证码
            try:
                # 获取验证文字
                verify_code = self._ocr(verify_img)[0]['text']
                logger.info(f'验证文字内容:{verify_code}')

                # 获取验证码图片内文件及对应坐标
                code_positions = self._ocr(img)
                logger.info(f'验证码图片内容:{code_positions}')

                # 按顺序点击验证码图片
                success_count = 0
                for code in verify_code:
                    for c_p in code_positions:
                        if c_p['text'] == code:
                            click_locxy(self.browser, x + c_p['x'], y + c_p['y'])
                            success_count += 1
                            time.sleep(1)
                if success_count == len(verify_code):
                    # 点击确认
                    confirm_button.click()
                    break
                else:
                    raise ValueError('验证失败')
            except Exception as e:
                logger.error(f"点击验证码失败:{e},重新刷新验证码......")
                # 验证失败则点击刷新验证码
                reload_button.click()
                time.sleep(5)

    def _login(self):
        """
        自动登录
        """
        # 打开登录界面
        logger.debug('开始打开登录界面......')
        time.sleep(5)
        self.browser.find_element(By.CLASS_NAME, "header-login-entry").click()
        logger.debug('打开登录界面成功!')

        # 获取登录表单
        logger.debug("开始获取登录表单......")
        time.sleep(5)
        login_form = self.browser.find_element(By.CLASS_NAME, "login-pwd-wp")
        logger.debug("获取登录表单成功!")

        # 输入账号密码
        logger.debug("开始输入账号密码......")
        for login_item in login_form.find_element(By.CLASS_NAME, "tab__form").find_elements(By.CLASS_NAME, "form__item"):
            inputs = login_item.find_element(By.TAG_NAME, 'input')
            inputs.click()
            time.sleep(1)
            inputs.clear()
            if inputs.accessible_name == '请输入账号':
                inputs.send_keys(self.user_number)
            elif inputs.accessible_name == '请输入密码':
                inputs.send_keys(self.user_password)
        logger.debug("输入账号密码成功!")

        # 点击登录按钮
        logger.debug("点击登录按钮......")
        time.sleep(2)
        login_form.find_element(By.CLASS_NAME, "btn_wp").find_element(By.CLASS_NAME, 'btn_primary ').click()
        # 验证码校验
        self._captcha_verification()
        return True

    def to_upload_windows(self):
        """
        切换到视频上传界面
        """
        logger.debug("切换至视频上传界面......")
        upload = self.browser.find_element(By.CLASS_NAME, 'header-upload-entry')
        upload.click()
        logger.debug("切换成功!")
        self.browser.switch_to.window(self.browser.window_handles[-1])

    def publish_video(self, path_mp4: str, param_dict: dict = None):
        """
        视频预上传
        :param path_mp4: 本地视频文件路径
        :param param_dict: 视频上传表单参数参数
        """
        # 上传视频
        try:
            logger.debug("上传视频")
            self.browser.find_element(By.XPATH, '//input[@type="file" and contains(@accept,"mp4")]').send_keys(path_mp4)
            # 等待视频上传完成
            while True:
                time.sleep(3)
                try:
                    self.browser.find_element(By.XPATH, '//*[text()="上传完成"]')
                    break
                except Exception:
                    logger.debug("视频还在上传中···")
            logger.success("视频已上传完成!")
        except Exception as e:
            logger.error(f"视频上传失败:{e}")
            return False

        # 填写视频描述表格
        return True

    def close(self):
        """
        关闭页面
        """
        self.browser.close()
        logger.debug("关闭页面成功")


if __name__ == '__main__':
    bilibili = BiliBili(auto_login=True)
    bilibili.to_upload_windows()
    bilibili.publish_video('D:\\Desktop\\02.mp4')
    bilibili.close()