pip install lxml
代码示例
from lxml import etree
text = """
<html>
<head>
<title>这是标题</title>
</head>
<body>
<div>这是内容</div>
</body>
</html>"""
html = etree.HTML(text)
# 使用xpath解析
titles = html.xpath("//title")
for title in titles:
print(title.text)
# 使用css解析
titles = html.cssselect("title")
for title in titles:
print(title.text)