BeautifulSoup库
基本介绍:BeautifulSoup库是python第三方库,是一个解析库
1. 基本使用
from bs4 import BeautifulSoup #导入BeautifulSoup库 (注意:库名区分大小写)(BeautifulSoup库在bs4中)
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
<a title="到360搜索首页" data-i="home">360首页</a>
</div>
<div id="head">
<form name="f2" class="form" action="s" method="get">
<span id="suggest-align" >
<a href="javascript:;" title="清空">清空</a>
<input type="text" id="keyword" name="q" value="百度">百度
<input type="hidden" name="src" class="src" value="srp">
<input type="hidden" name="fr" value="se7_newtab_new">
<input type="hidden" name="psid" value="">
</span><input type="submit" value="搜索" class="s_btn">
'''
soup = BeautifulSoup(html,'lxml') #制成一锅汤
print(html) #打印整个网页
print(soup.prettify()) #格式化、补全代码,并打印出来
print(soup.a.string) #打印 a标签的内容
<body link="#1024ee" class="abv-115-control">
<div id="header" >
<a title="到360搜索首页" data-i="home">360首页</a>
</div>
<div id="head">
<form name="f2" class="form" action="s" method="get">
<span id="suggest-align" >
<a href="javascript:;" title="清空">清空</a>
<input type="text" id="keyword" name="q" value="百度">百度
<input type="hidden" name="src" class="src" value="srp">
<input type="hidden" name="fr" value="se7_newtab_new">
<input type="hidden" name="psid" value="">
</span><input type="submit" value="搜索" class="s_btn">
<html>
<body class="abv-115-control" link="#1024ee">
<div id="header">
<a data-i="home" title="到360搜索首页">
360首页
</a>
</div>
<div id="head">
<form action="s" class="form" method="get" name="f2">
<span id="suggest-align">
<a href="javascript:;" title="清空">
清空
</a>
<input id="keyword" name="q" type="text" value="百度"/>
百度
<input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span>
<input class="s_btn" type="submit" value="搜索"/>
</form>
</div>
</body>
</html>
360首页
2. 标签选择器
处理速度快,但是有一定局限性
from bs4 import BeautifulSoup #导入BeautifulSoup库 (注意:库名区分大小写)(BeautifulSoup库在bs4中)
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
<a title="到360搜索首页" data-i="home">360首页</a>
</div>
<div id="head">
<form name="f2" class="form" action="s" method="get">
<span id="suggest-align" >
<a href="javascript:;" title="清空">清空</a>
<input type="text" id="keyword" name="q" value="百度">百度
<input type="hidden" name="src" class="src" value="srp">
<input type="hidden" name="fr" value="se7_newtab_new">
<input type="hidden" name="psid" value="">
</span><input type="submit" value="搜索" class="s_btn">
'''
soup = BeautifulSoup(html,'lxml') #制成一锅汤
print (soup.body) #打印整个body标签
print(type(soup.body)) #打印body标签的属性
print(soup.div) #打印整个div标签
print(soup.span) #打印整个span标签
<body class="abv-115-control" link="#1024ee">
<div id="header">
<a data-i="home" title="到360搜索首页">360首页</a>
</div>
<div id="head">
<form action="s" class="form" method="get" name="f2">
<span id="suggest-align">
<a href="javascript:;" title="清空">清空</a>
<input id="keyword" name="q" type="text" value="百度"/>百度
<input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span><input class="s_btn" type="submit" value="搜索"/>
</form></div></body>
<class 'bs4.element.Tag'>
<div id="header">
<a data-i="home" title="到360搜索首页">360首页</a>
</div>
<span id="suggest-align">
<a href="javascript:;" title="清空">清空</a>
<input id="keyword" name="q" type="text" value="百度"/>百度
<input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span>
2.1. 获取标签名称
from bs4 import BeautifulSoup #导入BeautifulSoup库 (注意:库名区分大小写)(BeautifulSoup库在bs4中)
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
<a title="到360搜索首页" data-i="home">360首页</a>
</div>
<div id="head">
<form name="f2" class="form" action="s" method="get">
<span id="suggest-align" >
<a href="javascript:;" title="清空">清空</a>
<input type="text" id="keyword" name="q" value="百度">百度
<input type="hidden" name="src" class="src" value="srp">
<input type="hidden" name="fr" value="se7_newtab_new">
<input type="hidden" name="psid" value="">
</span><input type="submit" value="搜索" class="s_btn">
'''
soup = BeautifulSoup(html,'lxml') #制成一锅汤
print(soup.div.name) #获取body标签的名称
div
2.2. 获取标签属性
from bs4 import BeautifulSoup #导入BeautifulSoup库 (注意:库名区分大小写)(BeautifulSoup库在bs4中)
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
<a title="到360搜索首页" data-i="home">360首页</a>
</div>
<div id="head">
<form name="f2" class="form" action="s" method="get">
<span id="suggest-align" >
<a href="javascript:;" title="清空">清空</a>
<input type="text" id="keyword" name="q" value="百度">百度
<input type="hidden" name="src" class="src" value="srp">
<input type="hidden" name="fr" value="se7_newtab_new">
<input type="hidden" name="psid" value="">
</span><input type="submit" value="搜索" class="s_btn">
'''
soup = BeautifulSoup(html,'lxml') #制成一锅汤
print(soup.a['title']) #打印 属性名称为title 的标签 的内容
print(soup.a.attrs['title']) #打印 属性名称为title 的标签 的内容
到360搜索首页
到360搜索首页
2.3. 获取标签的内容
from bs4 import BeautifulSoup
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
<a title="到360搜索首页" data-i="home">360首页</a>
</div>
<div id="head">
<form name="f2" class="form" action="s" method="get">
<span id="suggest-align" >
<a href="javascript:;" title="清空">清空</a>
<input type="text" id="keyword" name="q" value="百度">百度
<input type="hidden" name="src" class="src" value="srp">
<input type="hidden" name="fr" value="se7_newtab_new">
<input type="hidden" name="psid" value="">
</span><input type="submit" value="搜索" class="s_btn">
'''
soup = BeautifulSoup(html,'lxml') #制成一锅汤
print(soup.a.string) #打印a标签的内容
360首页
2.4. 标签的嵌套选择
from bs4 import BeautifulSoup
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
<a title="到360搜索首页" data-i="home">360首页</a>
</div>
<div id="head">
<form name="f2" class="form" action="s" method="get">
<span id="suggest-align" >
<a href="javascript:;" title="清空">清空</a>
<input type="text" id="keyword" name="q" value="百度">百度
<input type="hidden" name="src" class="src" value="srp">
<input type="hidden" name="fr" value="se7_newtab_new">
<input type="hidden" name="psid" value="">
</span><input type="submit" value="搜索" class="s_btn">
'''
soup = BeautifulSoup(html,'lxml')
print(soup.span.a.string) #打印body标签内的a标签的内容
清空
2.5. 子节点和子孙节点
from bs4 import BeautifulSoup
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
<a title="到360搜索首页" data-i="home">360首页</a>
</div>
<div id="head">
<form name="f2" class="form" action="s" method="get">
<span id="suggest-align" >
<a href="javascript:;" title="清空">清空</a>
<input type="text" id="keyword" name="q" value="百度">百度
<input type="hidden" name="src" class="src" value="srp">
<input type="hidden" name="fr" value="se7_newtab_new">
<input type="hidden" name="psid" value="">
</span><input type="submit" value="搜索" class="s_btn">
'''
soup = BeautifulSoup(html,'lxml')
print(soup.prettify())
print(soup.body.contents) #打印body标签的所以自子节点
print(soup.body.children)
for i,child in enumerate(soup.body.children): #遍历body标签的所以子节点,并逐个打印
print(i,child)
print(soup.body.descendants)
for i,child in enumerate(soup.body.descendants): #遍历body标签的所以子节点,并逐个打印
print(i,child)
<html>
<body class="abv-115-control" link="#1024ee">
<div id="header">
<a data-i="home" title="到360搜索首页">
360首页
</a>
</div>
<div id="head">
<form action="s" class="form" method="get" name="f2">
<span id="suggest-align">
<a href="javascript:;" title="清空">
清空
</a>
<input id="keyword" name="q" type="text" value="百度"/>
百度
<input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span>
<input class="s_btn" type="submit" value="搜索"/>
</form>
</div>
</body>
</html>
['\n', <div id="header">
<a data-i="home" title="到360搜索首页">360首页</a>
</div>, '\n', <div id="head">
<form action="s" class="form" method="get" name="f2">
<span id="suggest-align">
<a href="javascript:;" title="清空">清空</a>
<input id="keyword" name="q" type="text" value="百度"/>百度
<input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span><input class="s_btn" type="submit" value="搜索"/>
</form></div>]
<list_iterator object at 0x00000000056F0710>
0
1 <div id="header">
<a data-i="home" title="到360搜索首页">360首页</a>
</div>
2
3 <div id="head">
<form action="s" class="form" method="get" name="f2">
<span id="suggest-align">
<a href="javascript:;" title="清空">清空</a>
<input id="keyword" name="q" type="text" value="百度"/>百度
<input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span><input class="s_btn" type="submit" value="搜索"/>
</form></div>
<generator object descendants at 0x0000000004CA5BA0>
0
1 <div id="header">
<a data-i="home" title="到360搜索首页">360首页</a>
</div>
2
3 <a data-i="home" title="到360搜索首页">360首页</a>
4 360首页
5
6
7 <div id="head">
<form action="s" class="form" method="get" name="f2">
<span id="suggest-align">
<a href="javascript:;" title="清空">清空</a>
<input id="keyword" name="q" type="text" value="百度"/>百度
<input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span><input class="s_btn" type="submit" value="搜索"/>
</form></div>
8
9 <form action="s" class="form" method="get" name="f2">
<span id="suggest-align">
<a href="javascript:;" title="清空">清空</a>
<input id="keyword" name="q" type="text" value="百度"/>百度
<input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span><input class="s_btn" type="submit" value="搜索"/>
</form>
10
11 <span id="suggest-align">
<a href="javascript:;" title="清空">清空</a>
<input id="keyword" name="q" type="text" value="百度"/>百度
<input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span>
12
13 <a href="javascript:;" title="清空">清空</a>
14 清空
15
16 <input id="keyword" name="q" type="text" value="百度"/>
17 百度
18 <input class="src" name="src" type="hidden" value="srp"/>
19
20 <input name="fr" type="hidden" value="se7_newtab_new"/>
21
22 <input name="psid" type="hidden" value=""/>
23
24 <input class="s_btn" type="submit" value="搜索"/>
25
2.6.父节点和祖先节点
from bs4 import BeautifulSoup
html = '''<body link="#1024ee" class="abv-115-control">
<div id="header" >
<a title="到360搜索首页" data-i="home">360首页</a>
</div>
<div id="head">
<form name="f2" class="form" action="s" method="get">
<span id="suggest-align" >
<a href="javascript:;" title="清空">清空</a>
<input type="text" id="keyword" name="q" value="百度">百度
<input type="hidden" name="src" class="src" value="srp">
<input type="hidden" name="fr" value="se7_newtab_new">
<input type="hidden" name="psid" value="">
</span><input type="submit" value="搜索" class="s_btn">
'''
soup = BeautifulSoup(html,'lxml')
print(soup.a.parent) #打印所以父节点
print(soup.a.parents)
print(list(enumerate(soup.a.parents))) #打印所以祖先节点
<div id="header">
<a data-i="home" title="到360搜索首页">360首页</a>
</div>
<generator object parents at 0x000000000549ED58>
[(0, <div id="header">
<a data-i="home" title="到360搜索首页">360首页</a>
</div>), (1, <body class="abv-115-control" link="#1024ee">
<div id="header">
<a data-i="home" title="到360搜索首页">360首页</a>
</div>
<div id="head">
<form action="s" class="form" method="get" name="f2">
<span id="suggest-align">
<a href="javascript:;" title="清空">清空</a>
<input id="keyword" name="q" type="text" value="百度"/>百度
<input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span><input class="s_btn" type="submit" value="搜索"/>
</form></div></body>), (2, <html><body class="abv-115-control" link="#1024ee">
<div id="header">
<a data-i="home" title="到360搜索首页">360首页</a>
</div>
<div id="head">
<form action="s" class="form" method="get" name="f2">
<span id="suggest-align">
<a href="javascript:;" title="清空">清空</a>
<input id="keyword" name="q" type="text" value="百度"/>百度
<input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span><input class="s_btn" type="submit" value="搜索"/>
</form></div></body></html>), (3, <html><body class="abv-115-control" link="#1024ee">
<div id="header">
<a data-i="home" title="到360搜索首页">360首页</a>
</div>
<div id="head">
<form action="s" class="form" method="get" name="f2">
<span id="suggest-align">
<a href="javascript:;" title="清空">清空</a>
<input id="keyword" name="q" type="text" value="百度"/>百度
<input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span><input class="s_btn" type="submit" value="搜索"/>
</form></div></body></html>)]
2.7. 标签的兄弟节点
from bs4 import BeautifulSoup
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
<a title="到360搜索首页" data-i="home">360首页</a>
</div>
<div id="head">
<form name="f2" class="form" action="s" method="get">
<span id="suggest-align" >
<a href="javascript:;" title="清空">清空</a>
<input type="text" id="keyword" name="q" value="百度">百度
<input type="hidden" name="src" class="src" value="srp">
<input type="hidden" name="fr" value="se7_newtab_new">
<input type="hidden" name="psid" value="">
</span><input type="submit" value="搜索" class="s_btn">
'''
soup = BeautifulSoup(html,'lxml')
print(list(enumerate(soup.a.previous_siblings))) #前面的兄弟节点
print(list(enumerate(soup.a.next_siblings))) #后面的兄弟节点
[(0, '\n')]
[(0, '\n')]
3. 标准选择器
### 3.1. find(name,attrs,recursive,text,kwargs)
可以根据标签名,属性,内容在HTML中查找第一个满足条件的标签
3.2. find_all(name,attrs,recursive,text,kwargs)
可以根据标签名,属性,内容在HTML中查找满足条件的所有标签
3.3. find_parent(name,attrs,recursive,text,kwargs)
可以根据标签名,属性,内容在HTML中查找所有满足条件的父节点
### 3.4. find_parents(name,attrs,recursive,text,kwargs)
可以根据标签名,属性,内容在HTML中查找所有满足条件的祖先节点
### 3.5. find_previous_sibling(name,attrs,recursive,text,kwargs)
可以根据标签名,属性,内容在HTML中查找满足条件的前面一个兄弟节点
3.6. find_previous_siblings(name,attrs,recursive,text,kwargs)
可以根据标签名,属性,内容在HTML中查找满足条件的前面所有兄弟节点
3.7. find_next_sibling(name,attrs,recursive,text,kwargs)
可以根据标签名,属性,内容在HTML中查找满足条件的后面一个兄弟节点
3.8. find_next_siblings(name,attrs,recursive,text,kwargs)
可以根据标签名,属性,内容在HTML中查找满足条件的后面所有兄弟节点
3.9. find_all_next(name,attrs,recursive,text,kwargs)
可以根据标签名,属性,内容在HTML中查找满足条件的后面所有节点
3.10. find_next(name,attrs,recursive,text,kwargs)
可以根据标签名,属性,内容在HTML中查找满足条件的后面一个节点
3.11. find_all_previous(name,attrs,recursive,text,kwargs)
可以根据标签名,属性,内容在HTML中查找满足条件的前面所有节点
3.12. find_previous(name,attrs,recursive,text,kwargs)
可以根据标签名,属性,内容在HTML中查找满足条件的前面一个节
12个find的用法相同,以其中的find_all为例进行讲解
from bs4 import BeautifulSoup
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
<a title="到360搜索首页" data-i="home">360首页</a>
</div>
<div id="head">
<form name="f2" class="form" action="s" method="get">
<span id="suggest-align" >
<a href="javascript:;" title="清空">清空</a>
<input type="text" id="keyword" name="q" value="百度">百度
<input type="hidden" name="src" class="src" value="srp">
<input type="hidden" name="fr" value="se7_newtab_new">
<input type="hidden" name="psid" value="">
</span><input type="submit" value="搜索" class="s_btn">
'''
soup = BeautifulSoup(html,'lxml')
print(soup.find_all('input')) #查找所有input标签并输出
print(type(soup.find_all('input')[0])) #打印input标签的类型
[<input id="keyword" name="q" type="text" value="百度"/>, <input class="src" name="src" type="hidden" value="srp"/>, <input name="fr" type="hidden" value="se7_newtab_new"/>, <input name="psid" type="hidden" value=""/>, <input class="s_btn" type="submit" value="搜索"/>]
<class 'bs4.element.Tag'>
from bs4 import BeautifulSoup
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
<a title="到360搜索首页" data-i="home">360首页</a>
</div>
<div id="head">
<form name="f2" class="form" action="s" method="get">
<span id="suggest-align" >
<a href="javascript:;" title="清空">清空</a>
<input type="text" id="keyword" name="q" value="百度">百度
<input type="hidden" name="src" class="src" value="srp">
<input type="hidden" name="fr" value="se7_newtab_new">
<input type="hidden" name="psid" value="">
</span><input type="submit" value="搜索" class="s_btn">
'''
soup = BeautifulSoup(html,'lxml')
print(soup.find_all('span')) #查找所有input标签并输出
for span in soup.find_all('span'): #遍历span标签
print(span.find_all('input')) #分别在每一个span标签中查找input标签并打印
[<span id="suggest-align">
<a href="javascript:;" title="清空">清空</a>
<input id="keyword" name="q" type="text" value="百度"/>百度
<input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span>]
[<input id="keyword" name="q" type="text" value="百度"/>, <input class="src" name="src" type="hidden" value="srp"/>, <input name="fr" type="hidden" value="se7_newtab_new"/>, <input name="psid" type="hidden" value=""/>]
from bs4 import BeautifulSoup
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
<a title="到360搜索首页" data-i="home">360首页</a>
</div>
<div id="head">
<form name="f2" class="form" action="s" method="get">
<span id="suggest-align" >
<a href="javascript:;" title="清空">清空</a>
<input type="text" id="keyword" name="q" value="百度">百度
<input type="hidden" name="src" class="src" value="srp">
<input type="hidden" name="fr" value="se7_newtab_new">
<input type="hidden" name="psid" value="">
</span><input type="submit" value="搜索" class="s_btn">
'''
soup = BeautifulSoup(html,'lxml')
print(soup.find_all(attrs = {'id':'head'})) #查找所有id=“head”的标签
print(soup.find_all(attrs = {'name':'f2'})) #查找所有name='f2'的标签
[<div id="head">
<form action="s" class="form" method="get" name="f2">
<span id="suggest-align">
<a href="javascript:;" title="清空">清空</a>
<input id="keyword" name="q" type="text" value="百度"/>百度
<input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span><input class="s_btn" type="submit" value="搜索"/>
</form></div>]
[<form action="s" class="form" method="get" name="f2">
<span id="suggest-align">
<a href="javascript:;" title="清空">清空</a>
<input id="keyword" name="q" type="text" value="百度"/>百度
<input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span><input class="s_btn" type="submit" value="搜索"/>
</form>]
from bs4 import BeautifulSoup
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
<a title="到360搜索首页" data-i="home">360首页</a>
</div>
<div id="head">
<form name="f2" class="form" action="s" method="get">
<span id="suggest-align" >
<a href="javascript:;" title="清空">ba清空</a>
<input type="text" id="keyword" name="q" value="百度">百度
<input type="hidden" name="src" class="src" value="srp">
<input type="hidden" name="fr" value="se7_newtab_new">
<input type="hidden" name="psid" value="">
</span><input type="submit" value="搜索" class="s_btn">
'''
soup = BeautifulSoup(html,'lxml')
print(soup.find_all(id='head')) #查找所有id=“head”的标签
print(soup.find_all(class_='form')) #查找所有class=“form”的标签
#(注意:由于这里的class与python的class有冲突,用class_加以区分)
[<div id="head">
<form action="s" class="form" method="get" name="f2">
<span id="suggest-align">
<a href="javascript:;" title="清空">ba清空</a>
<input id="keyword" name="q" type="text" value="百度"/>百度
<input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span><input class="s_btn" type="submit" value="搜索"/>
</form></div>]
[<form action="s" class="form" method="get" name="f2">
<span id="suggest-align">
<a href="javascript:;" title="清空">ba清空</a>
<input id="keyword" name="q" type="text" value="百度"/>百度
<input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span><input class="s_btn" type="submit" value="搜索"/>
</form>]
4. CSS选择器
from bs4 import BeautifulSoup
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
<a title="到360搜索首页" data-i="home">360首页</a>
</div>
<div id="head">
<form name="f2" class="form" action="s" method="get">
<span id="suggest" >
<a href="javascript:;" title="清空">ba清空</a>
<input type="text" id="keyword" name="q" value="百度">百度
<input type="hidden" name="src" class="src" value="srp">
<input type="hidden" name="fr" value="se7_newtab_new">
<input type="hidden" name="psid" value="">
</span><input type="submit" value="搜索" class="s_btn">
'''
soup = BeautifulSoup(html,'lxml')
print(soup.select('#header')) #查找所有id='header'的标签
print(soup.select('.s_btn')) #查找所有class=s_btn的标签
print(soup.select(' input')) #查找所有input的标签
print(soup.select('input.src')) #查找 标签名称为input且class='src'的标签
print(soup.select('#head input')) #在id='head'的标签中查找input标签
for input_ in soup.select('#head'): #遍历标签
print(input_.select('.src')) #再在每个标签中查找class=src的标签
[<div id="header">
<a data-i="home" title="到360搜索首页">360首页</a>
</div>]
[<input class="s_btn" type="submit" value="搜索"/>]
[<input id="keyword" name="q" type="text" value="百度"/>, <input class="src" name="src" type="hidden" value="srp"/>, <input name="fr" type="hidden" value="se7_newtab_new"/>, <input name="psid" type="hidden" value=""/>, <input class="s_btn" type="submit" value="搜索"/>]
[<input class="src" name="src" type="hidden" value="srp"/>]
[<input id="keyword" name="q" type="text" value="百度"/>, <input class="src" name="src" type="hidden" value="srp"/>, <input name="fr" type="hidden" value="se7_newtab_new"/>, <input name="psid" type="hidden" value=""/>, <input class="s_btn" type="submit" value="搜索"/>]
[<input class="src" name="src" type="hidden" value="srp"/>]