概述
项目需要从网站上自动下载大量文件。
- 预使用PyQt5显示网站,并进行模型网站input填充,button或链接的点击,链接的下载等。 PyQt5提供了QWebEngineView显示html字符串或加载某网址显示web页面,同时提供了runJavascript()方法来执行JavaScript执行与web页面进行交互,且python有lxml包,进行xpath操作,但想实现类似selenium的web自动化操作,依然较为困难,原因是QWebEngineView虽然集成了Chrome内核,但和浏览器差异较大,无法通过Qt进行肆意操作。
- 最终,不得不选择使用selenium与Qt程序分开使用,完成该任务。
pyqt5的使用
安装
创建虚拟环境,安装相关包。
# 创建虚拟环境
conda create --name pyqt5
# 激活虚拟环境
conda activate pyqt5
# 安装pyqt5相关包
conda install python=3.10
conda install pyqt
conda install PyQtWebEngine
# xpath操作包
conda install lxml
试验代码
main.py
import sys
from PyQt5.QtWidgets import QApplication
from main_widget import MainWidget
if __name__ == '__main__':
app = QApplication(sys.argv)
main_widget = MainWidget()
main_widget.show()
sys.exit(app.exec_())
main_widget.py
from PyQt5.QtCore import QUrl
from PyQt5.QtGui import QPalette, QColor
from PyQt5.QtWidgets import QWidget, QVBoxLayout, QLabel, QComboBox, QPushButton, QHBoxLayout
from lxml import etree
from web_engine_view import WebEngineView
class MainWidget(QWidget):
def __init__(self):
super().__init__()
self.webview = None
self.cmd_station = None
self.init_widget()
def init_widget(self):
# top
height = 25
lb_station = QLabel("场站:")
self.cmd_station = QComboBox()
self.cmd_station.setFixedSize(200, height)
btn_start_download = QPushButton("开始下载")
btn_start_download.setFixedSize(100, height)
top_layout = QHBoxLayout()
top_layout.addStretch()
top_layout.addWidget(lb_station)
top_layout.addWidget(self.cmd_station)
top_layout.addWidget(btn_start_download)
top_layout.addStretch()
top_layout.setContentsMargins(0, 5, 0, 0)
# split line
lb_slit_line = QLabel()
lb_slit_line.setFixedHeight(1)
lb_slit_line.setStyleSheet("background-color: rgba(129,127,104,0.5);");
# web view
self.webview = WebEngineView()
self.webview.load(QUrl('https://www.bilibili.com'))
self.webview.page()
# main layout
main_layout = QVBoxLayout()
main_layout.addLayout(top_layout)
main_layout.addWidget(lb_slit_line)
main_layout.addWidget(self.webview)
main_layout.setContentsMargins(1, 1, 1, 1)
self.setLayout(main_layout)
# connects
btn_start_download.clicked.connect(self.slot_btn_start_download)
#
palette = QPalette()
palette.setColor(QPalette.Background, QColor(255, 255, 255))
self.setPalette(palette)
self.showMaximized()
def slot_btn_start_download(self):
# self.webview.page().runJavaScript("""
# document.querySelector('.nav-search-input').value='ddddd';
# document.querySelector('.nav-search-input').placeholder='aaaaa';
# //document.querySelector('.nav-search-btn').click();
# """)
self.webview.page().toHtml(self.web_page_html)
# def js_callback(self, result):
# print('js_callback: ', result)
def web_page_html(self, html_str):
print(html_str)
print('**********************')
encoding = 'utf-8'
tree_html = etree.HTML(html_str.encode(encoding), etree.HTMLParser(encoding=encoding))
elements = tree_html.xpath('//input[@class="nav-search-input"]')
elements[0].set('value', 'asdadfasf')
new_html = etree.tostring(tree_html, method='html', encoding='utf-8')
print(new_html)
self.webview.setHtml(new_html.decode(encoding))
web_engine_view.py
from PyQt5.QtWebEngineWidgets import QWebEngineView
class WebEngineView(QWebEngineView):
def __init__(self):
super().__init__()
self.urlChanged.connect(self.slot_url_changed)
def createWindow(self, type):
# 此处return self 即可实现点击当前页面内链接,依然将新链接页面加载到当前QWebEngineView
return self
def slot_url_changed(self, url):
print('url: ', url)