BeautifulSoup库

基本介绍:BeautifulSoup库是python第三方库,是一个解析库

1. 基本使用



from bs4 import BeautifulSoup  #导入BeautifulSoup库 (注意:库名区分大小写)(BeautifulSoup库在bs4中)
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
                <a  title="到360搜索首页" data-i="home">360首页</a>
                </div>
                <div id="head">
                <form name="f2" class="form" action="s" method="get">
    <span id="suggest-align" >
        <a href="javascript:;"  title="清空">清空</a>
        <input type="text" id="keyword" name="q"  value="百度">百度
        <input type="hidden" name="src" class="src" value="srp">
                <input type="hidden" name="fr" value="se7_newtab_new">
                <input type="hidden" name="psid" value="">
    </span><input type="submit"  value="搜索" class="s_btn">
'''
soup = BeautifulSoup(html,'lxml') #制成一锅汤
print(html) #打印整个网页
print(soup.prettify())  #格式化、补全代码,并打印出来
print(soup.a.string) #打印 a标签的内容
<body link="#1024ee" class="abv-115-control">
<div id="header" >
                <a  title="到360搜索首页" data-i="home">360首页</a>
                </div>
                <div id="head">
                <form name="f2" class="form" action="s" method="get">
    <span id="suggest-align" >
        <a href="javascript:;"  title="清空">清空</a>
        <input type="text" id="keyword" name="q"  value="百度">百度
        <input type="hidden" name="src" class="src" value="srp">
                <input type="hidden" name="fr" value="se7_newtab_new">
                <input type="hidden" name="psid" value="">
    </span><input type="submit"  value="搜索" class="s_btn">

<html>
 <body class="abv-115-control" link="#1024ee">
  <div id="header">
   <a data-i="home" title="到360搜索首页">
    360首页
   </a>
  </div>
  <div id="head">
   <form action="s" class="form" method="get" name="f2">
    <span id="suggest-align">
     <a href="javascript:;" title="清空">
      清空
     </a>
     <input id="keyword" name="q" type="text" value="百度"/>
     百度
     <input class="src" name="src" type="hidden" value="srp"/>
     <input name="fr" type="hidden" value="se7_newtab_new"/>
     <input name="psid" type="hidden" value=""/>
    </span>
    <input class="s_btn" type="submit" value="搜索"/>
   </form>
  </div>
 </body>
</html>
360首页



2. 标签选择器

处理速度快,但是有一定局限性



from bs4 import BeautifulSoup  #导入BeautifulSoup库 (注意:库名区分大小写)(BeautifulSoup库在bs4中)
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
                <a  title="到360搜索首页" data-i="home">360首页</a>
                </div>
                <div id="head">
                <form name="f2" class="form" action="s" method="get">
    <span id="suggest-align" >
        <a href="javascript:;"  title="清空">清空</a>
        <input type="text" id="keyword" name="q"  value="百度">百度
        <input type="hidden" name="src" class="src" value="srp">
                <input type="hidden" name="fr" value="se7_newtab_new">
                <input type="hidden" name="psid" value="">
    </span><input type="submit"  value="搜索" class="s_btn">
'''
soup = BeautifulSoup(html,'lxml') #制成一锅汤
print (soup.body)  #打印整个body标签
print(type(soup.body))  #打印body标签的属性
print(soup.div)  #打印整个div标签
print(soup.span)  #打印整个span标签
<body class="abv-115-control" link="#1024ee">
<div id="header">
<a data-i="home" title="到360搜索首页">360首页</a>
</div>
<div id="head">
<form action="s" class="form" method="get" name="f2">
<span id="suggest-align">
<a href="javascript:;" title="清空">清空</a>
<input id="keyword" name="q" type="text" value="百度"/>百度
        <input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span><input class="s_btn" type="submit" value="搜索"/>
</form></div></body>
<class 'bs4.element.Tag'>
<div id="header">
<a data-i="home" title="到360搜索首页">360首页</a>
</div>
<span id="suggest-align">
<a href="javascript:;" title="清空">清空</a>
<input id="keyword" name="q" type="text" value="百度"/>百度
        <input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span>



2.1. 获取标签名称



from bs4 import BeautifulSoup  #导入BeautifulSoup库 (注意:库名区分大小写)(BeautifulSoup库在bs4中)
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
                <a  title="到360搜索首页" data-i="home">360首页</a>
                </div>
                <div id="head">
                <form name="f2" class="form" action="s" method="get">
    <span id="suggest-align" >
        <a href="javascript:;"  title="清空">清空</a>
        <input type="text" id="keyword" name="q"  value="百度">百度
        <input type="hidden" name="src" class="src" value="srp">
                <input type="hidden" name="fr" value="se7_newtab_new">
                <input type="hidden" name="psid" value="">
    </span><input type="submit"  value="搜索" class="s_btn">
'''
soup = BeautifulSoup(html,'lxml') #制成一锅汤
print(soup.div.name)  #获取body标签的名称
div



2.2. 获取标签属性



from bs4 import BeautifulSoup  #导入BeautifulSoup库 (注意:库名区分大小写)(BeautifulSoup库在bs4中)
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
                <a  title="到360搜索首页" data-i="home">360首页</a>
                </div>
                <div id="head">
                <form name="f2" class="form" action="s" method="get">
    <span id="suggest-align" >
        <a href="javascript:;"  title="清空">清空</a>
        <input type="text" id="keyword" name="q"  value="百度">百度
        <input type="hidden" name="src" class="src" value="srp">
                <input type="hidden" name="fr" value="se7_newtab_new">
                <input type="hidden" name="psid" value="">
    </span><input type="submit"  value="搜索" class="s_btn">
'''
soup = BeautifulSoup(html,'lxml') #制成一锅汤
print(soup.a['title'])  #打印 属性名称为title 的标签 的内容
print(soup.a.attrs['title'])  #打印 属性名称为title 的标签 的内容
到360搜索首页
到360搜索首页



2.3. 获取标签的内容



from bs4 import BeautifulSoup
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
                <a  title="到360搜索首页" data-i="home">360首页</a>
                </div>
                <div id="head">
                <form name="f2" class="form" action="s" method="get">
    <span id="suggest-align" >
        <a href="javascript:;"  title="清空">清空</a>
        <input type="text" id="keyword" name="q"  value="百度">百度
        <input type="hidden" name="src" class="src" value="srp">
                <input type="hidden" name="fr" value="se7_newtab_new">
                <input type="hidden" name="psid" value="">
    </span><input type="submit"  value="搜索" class="s_btn">
'''
soup = BeautifulSoup(html,'lxml')  #制成一锅汤
print(soup.a.string)  #打印a标签的内容
360首页



2.4. 标签的嵌套选择



from bs4 import BeautifulSoup
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
                <a  title="到360搜索首页" data-i="home">360首页</a>
                </div>
                <div id="head">
                <form name="f2" class="form" action="s" method="get">
    <span id="suggest-align" >
        <a href="javascript:;"  title="清空">清空</a>
        <input type="text" id="keyword" name="q"  value="百度">百度
        <input type="hidden" name="src" class="src" value="srp">
                <input type="hidden" name="fr" value="se7_newtab_new">
                <input type="hidden" name="psid" value="">
    </span><input type="submit"  value="搜索" class="s_btn">
'''
soup = BeautifulSoup(html,'lxml')
print(soup.span.a.string)  #打印body标签内的a标签的内容
清空



2.5. 子节点和子孙节点



from bs4 import BeautifulSoup
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
                <a  title="到360搜索首页" data-i="home">360首页</a>
                </div>
                <div id="head">
                <form name="f2" class="form" action="s" method="get">
    <span id="suggest-align" >
        <a href="javascript:;"  title="清空">清空</a>
        <input type="text" id="keyword" name="q"  value="百度">百度
        <input type="hidden" name="src" class="src" value="srp">
                <input type="hidden" name="fr" value="se7_newtab_new">
                <input type="hidden" name="psid" value="">
    </span><input type="submit"  value="搜索" class="s_btn">
'''
soup = BeautifulSoup(html,'lxml')
print(soup.prettify())
print(soup.body.contents)  #打印body标签的所以自子节点
print(soup.body.children) 
for i,child in enumerate(soup.body.children):  #遍历body标签的所以子节点,并逐个打印
    print(i,child)
print(soup.body.descendants)
for i,child in enumerate(soup.body.descendants):   #遍历body标签的所以子节点,并逐个打印
    print(i,child)
<html>
 <body class="abv-115-control" link="#1024ee">
  <div id="header">
   <a data-i="home" title="到360搜索首页">
    360首页
   </a>
  </div>
  <div id="head">
   <form action="s" class="form" method="get" name="f2">
    <span id="suggest-align">
     <a href="javascript:;" title="清空">
      清空
     </a>
     <input id="keyword" name="q" type="text" value="百度"/>
     百度
     <input class="src" name="src" type="hidden" value="srp"/>
     <input name="fr" type="hidden" value="se7_newtab_new"/>
     <input name="psid" type="hidden" value=""/>
    </span>
    <input class="s_btn" type="submit" value="搜索"/>
   </form>
  </div>
 </body>
</html>
['\n', <div id="header">
<a data-i="home" title="到360搜索首页">360首页</a>
</div>, '\n', <div id="head">
<form action="s" class="form" method="get" name="f2">
<span id="suggest-align">
<a href="javascript:;" title="清空">清空</a>
<input id="keyword" name="q" type="text" value="百度"/>百度
        <input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span><input class="s_btn" type="submit" value="搜索"/>
</form></div>]
<list_iterator object at 0x00000000056F0710>
0

1 <div id="header">
<a data-i="home" title="到360搜索首页">360首页</a>
</div>
2

3 <div id="head">
<form action="s" class="form" method="get" name="f2">
<span id="suggest-align">
<a href="javascript:;" title="清空">清空</a>
<input id="keyword" name="q" type="text" value="百度"/>百度
        <input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span><input class="s_btn" type="submit" value="搜索"/>
</form></div>
<generator object descendants at 0x0000000004CA5BA0>
0

1 <div id="header">
<a data-i="home" title="到360搜索首页">360首页</a>
</div>
2

3 <a data-i="home" title="到360搜索首页">360首页</a>
4 360首页
5

6

7 <div id="head">
<form action="s" class="form" method="get" name="f2">
<span id="suggest-align">
<a href="javascript:;" title="清空">清空</a>
<input id="keyword" name="q" type="text" value="百度"/>百度
        <input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span><input class="s_btn" type="submit" value="搜索"/>
</form></div>
8

9 <form action="s" class="form" method="get" name="f2">
<span id="suggest-align">
<a href="javascript:;" title="清空">清空</a>
<input id="keyword" name="q" type="text" value="百度"/>百度
        <input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span><input class="s_btn" type="submit" value="搜索"/>
</form>
10

11 <span id="suggest-align">
<a href="javascript:;" title="清空">清空</a>
<input id="keyword" name="q" type="text" value="百度"/>百度
        <input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span>
12

13 <a href="javascript:;" title="清空">清空</a>
14 清空
15

16 <input id="keyword" name="q" type="text" value="百度"/>
17 百度

18 <input class="src" name="src" type="hidden" value="srp"/>
19

20 <input name="fr" type="hidden" value="se7_newtab_new"/>
21

22 <input name="psid" type="hidden" value=""/>
23

24 <input class="s_btn" type="submit" value="搜索"/>
25



2.6.父节点和祖先节点



from bs4 import BeautifulSoup
html = '''<body link="#1024ee" class="abv-115-control">
<div id="header" >
                <a  title="到360搜索首页" data-i="home">360首页</a>
                </div>
                <div id="head">
                <form name="f2" class="form" action="s" method="get">
    <span id="suggest-align" >
        <a href="javascript:;"  title="清空">清空</a>
        <input type="text" id="keyword" name="q"  value="百度">百度
        <input type="hidden" name="src" class="src" value="srp">
                <input type="hidden" name="fr" value="se7_newtab_new">
                <input type="hidden" name="psid" value="">
    </span><input type="submit"  value="搜索" class="s_btn">
'''
soup = BeautifulSoup(html,'lxml')
print(soup.a.parent)  #打印所以父节点
print(soup.a.parents)  
print(list(enumerate(soup.a.parents)))  #打印所以祖先节点
<div id="header">
<a data-i="home" title="到360搜索首页">360首页</a>
</div>
<generator object parents at 0x000000000549ED58>
[(0, <div id="header">
<a data-i="home" title="到360搜索首页">360首页</a>
</div>), (1, <body class="abv-115-control" link="#1024ee">
<div id="header">
<a data-i="home" title="到360搜索首页">360首页</a>
</div>
<div id="head">
<form action="s" class="form" method="get" name="f2">
<span id="suggest-align">
<a href="javascript:;" title="清空">清空</a>
<input id="keyword" name="q" type="text" value="百度"/>百度
        <input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span><input class="s_btn" type="submit" value="搜索"/>
</form></div></body>), (2, <html><body class="abv-115-control" link="#1024ee">
<div id="header">
<a data-i="home" title="到360搜索首页">360首页</a>
</div>
<div id="head">
<form action="s" class="form" method="get" name="f2">
<span id="suggest-align">
<a href="javascript:;" title="清空">清空</a>
<input id="keyword" name="q" type="text" value="百度"/>百度
        <input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span><input class="s_btn" type="submit" value="搜索"/>
</form></div></body></html>), (3, <html><body class="abv-115-control" link="#1024ee">
<div id="header">
<a data-i="home" title="到360搜索首页">360首页</a>
</div>
<div id="head">
<form action="s" class="form" method="get" name="f2">
<span id="suggest-align">
<a href="javascript:;" title="清空">清空</a>
<input id="keyword" name="q" type="text" value="百度"/>百度
        <input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span><input class="s_btn" type="submit" value="搜索"/>
</form></div></body></html>)]



2.7. 标签的兄弟节点



from bs4 import BeautifulSoup
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
                <a  title="到360搜索首页" data-i="home">360首页</a>
                </div>
                <div id="head">
                <form name="f2" class="form" action="s" method="get">
    <span id="suggest-align" >
        <a href="javascript:;"  title="清空">清空</a>
        <input type="text" id="keyword" name="q"  value="百度">百度
        <input type="hidden" name="src" class="src" value="srp">
                <input type="hidden" name="fr" value="se7_newtab_new">
                <input type="hidden" name="psid" value="">
    </span><input type="submit"  value="搜索" class="s_btn">
'''
soup = BeautifulSoup(html,'lxml')
print(list(enumerate(soup.a.previous_siblings)))  #前面的兄弟节点
print(list(enumerate(soup.a.next_siblings)))  #后面的兄弟节点
[(0, '\n')]
[(0, '\n')]



3. 标准选择器

### 3.1. find(name,attrs,recursive,text,kwargs)
可以根据标签名,属性,内容在HTML中查找第一个满足条件的标签

3.2. find_all(name,attrs,recursive,text,kwargs)



可以根据标签名,属性,内容在HTML中查找满足条件的所有标签



3.3. find_parent(name,attrs,recursive,text,kwargs)



可以根据标签名,属性,内容在HTML中查找所有满足条件的父节点



### 3.4. find_parents(name,attrs,recursive,text,kwargs)
可以根据标签名,属性,内容在HTML中查找所有满足条件的祖先节点
### 3.5. find_previous_sibling(name,attrs,recursive,text,kwargs)
可以根据标签名,属性,内容在HTML中查找满足条件的前面一个兄弟节点

3.6. find_previous_siblings(name,attrs,recursive,text,kwargs)



可以根据标签名,属性,内容在HTML中查找满足条件的前面所有兄弟节点



3.7. find_next_sibling(name,attrs,recursive,text,kwargs)



可以根据标签名,属性,内容在HTML中查找满足条件的后面一个兄弟节点



3.8. find_next_siblings(name,attrs,recursive,text,kwargs)



可以根据标签名,属性,内容在HTML中查找满足条件的后面所有兄弟节点



3.9. find_all_next(name,attrs,recursive,text,kwargs)



可以根据标签名,属性,内容在HTML中查找满足条件的后面所有节点



3.10. find_next(name,attrs,recursive,text,kwargs)



可以根据标签名,属性,内容在HTML中查找满足条件的后面一个节点



3.11. find_all_previous(name,attrs,recursive,text,kwargs)



可以根据标签名,属性,内容在HTML中查找满足条件的前面所有节点



3.12. find_previous(name,attrs,recursive,text,kwargs)



可以根据标签名,属性,内容在HTML中查找满足条件的前面一个节



12个find的用法相同,以其中的find_all为例进行讲解



from bs4 import BeautifulSoup
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
                <a  title="到360搜索首页" data-i="home">360首页</a>
                </div>
                <div id="head">
                <form name="f2" class="form" action="s" method="get">
    <span id="suggest-align" >
        <a href="javascript:;"  title="清空">清空</a>
        <input type="text" id="keyword" name="q"  value="百度">百度
        <input type="hidden" name="src" class="src" value="srp">
                <input type="hidden" name="fr" value="se7_newtab_new">
                <input type="hidden" name="psid" value="">
    </span><input type="submit"  value="搜索" class="s_btn">
'''
soup = BeautifulSoup(html,'lxml')
print(soup.find_all('input'))   #查找所有input标签并输出
print(type(soup.find_all('input')[0]))   #打印input标签的类型
[<input id="keyword" name="q" type="text" value="百度"/>, <input class="src" name="src" type="hidden" value="srp"/>, <input name="fr" type="hidden" value="se7_newtab_new"/>, <input name="psid" type="hidden" value=""/>, <input class="s_btn" type="submit" value="搜索"/>]
<class 'bs4.element.Tag'>
from bs4 import BeautifulSoup
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
                <a  title="到360搜索首页" data-i="home">360首页</a>
                </div>
                <div id="head">
                <form name="f2" class="form" action="s" method="get">
    <span id="suggest-align" >
        <a href="javascript:;"  title="清空">清空</a>
        <input type="text" id="keyword" name="q"  value="百度">百度
        <input type="hidden" name="src" class="src" value="srp">
                <input type="hidden" name="fr" value="se7_newtab_new">
                <input type="hidden" name="psid" value="">
    </span><input type="submit"  value="搜索" class="s_btn">
'''
soup = BeautifulSoup(html,'lxml')
print(soup.find_all('span'))  #查找所有input标签并输出
for span in soup.find_all('span'):   #遍历span标签
    print(span.find_all('input'))    #分别在每一个span标签中查找input标签并打印
[<span id="suggest-align">
<a href="javascript:;" title="清空">清空</a>
<input id="keyword" name="q" type="text" value="百度"/>百度
        <input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span>]
[<input id="keyword" name="q" type="text" value="百度"/>, <input class="src" name="src" type="hidden" value="srp"/>, <input name="fr" type="hidden" value="se7_newtab_new"/>, <input name="psid" type="hidden" value=""/>]
from bs4 import BeautifulSoup
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
                <a  title="到360搜索首页" data-i="home">360首页</a>
                </div>
                <div id="head">
                <form name="f2" class="form" action="s" method="get">
    <span id="suggest-align" >
        <a href="javascript:;"  title="清空">清空</a>
        <input type="text" id="keyword" name="q"  value="百度">百度
        <input type="hidden" name="src" class="src" value="srp">
                <input type="hidden" name="fr" value="se7_newtab_new">
                <input type="hidden" name="psid" value="">
    </span><input type="submit"  value="搜索" class="s_btn">
'''
soup = BeautifulSoup(html,'lxml')
print(soup.find_all(attrs = {'id':'head'}))  #查找所有id=“head”的标签
print(soup.find_all(attrs = {'name':'f2'}))  #查找所有name='f2'的标签
[<div id="head">
<form action="s" class="form" method="get" name="f2">
<span id="suggest-align">
<a href="javascript:;" title="清空">清空</a>
<input id="keyword" name="q" type="text" value="百度"/>百度
        <input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span><input class="s_btn" type="submit" value="搜索"/>
</form></div>]
[<form action="s" class="form" method="get" name="f2">
<span id="suggest-align">
<a href="javascript:;" title="清空">清空</a>
<input id="keyword" name="q" type="text" value="百度"/>百度
        <input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span><input class="s_btn" type="submit" value="搜索"/>
</form>]
from bs4 import BeautifulSoup
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
                <a  title="到360搜索首页" data-i="home">360首页</a>
                </div>
                <div id="head">
                <form name="f2" class="form" action="s" method="get">
    <span id="suggest-align" >
        <a href="javascript:;"  title="清空">ba清空</a>
        <input type="text" id="keyword" name="q"  value="百度">百度
        <input type="hidden" name="src" class="src" value="srp">
                <input type="hidden" name="fr" value="se7_newtab_new">
                <input type="hidden" name="psid" value="">
    </span><input type="submit"  value="搜索" class="s_btn">
'''
soup = BeautifulSoup(html,'lxml')
print(soup.find_all(id='head'))  #查找所有id=“head”的标签
print(soup.find_all(class_='form'))  #查找所有class=“form”的标签
#(注意:由于这里的class与python的class有冲突,用class_加以区分)
[<div id="head">
<form action="s" class="form" method="get" name="f2">
<span id="suggest-align">
<a href="javascript:;" title="清空">ba清空</a>
<input id="keyword" name="q" type="text" value="百度"/>百度
        <input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span><input class="s_btn" type="submit" value="搜索"/>
</form></div>]
[<form action="s" class="form" method="get" name="f2">
<span id="suggest-align">
<a href="javascript:;" title="清空">ba清空</a>
<input id="keyword" name="q" type="text" value="百度"/>百度
        <input class="src" name="src" type="hidden" value="srp"/>
<input name="fr" type="hidden" value="se7_newtab_new"/>
<input name="psid" type="hidden" value=""/>
</span><input class="s_btn" type="submit" value="搜索"/>
</form>]



4. CSS选择器



from bs4 import BeautifulSoup
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
                <a  title="到360搜索首页" data-i="home">360首页</a>
                </div>
                <div id="head">
                <form name="f2" class="form" action="s" method="get">
        <span id="suggest" >
        <a href="javascript:;"  title="清空">ba清空</a>
        <input type="text" id="keyword" name="q"  value="百度">百度
        <input type="hidden" name="src" class="src" value="srp">
        <input type="hidden" name="fr" value="se7_newtab_new">
        <input type="hidden" name="psid" value="">
    </span><input type="submit"  value="搜索" class="s_btn">
'''
soup = BeautifulSoup(html,'lxml')
print(soup.select('#header'))  #查找所有id='header'的标签
print(soup.select('.s_btn'))   #查找所有class=s_btn的标签
print(soup.select(' input'))  #查找所有input的标签
print(soup.select('input.src')) #查找 标签名称为input且class='src'的标签
print(soup.select('#head input'))  #在id='head'的标签中查找input标签
for input_ in soup.select('#head'):  #遍历标签
    print(input_.select('.src'))  #再在每个标签中查找class=src的标签
[<div id="header">
<a data-i="home" title="到360搜索首页">360首页</a>
</div>]
[<input class="s_btn" type="submit" value="搜索"/>]
[<input id="keyword" name="q" type="text" value="百度"/>, <input class="src" name="src" type="hidden" value="srp"/>, <input name="fr" type="hidden" value="se7_newtab_new"/>, <input name="psid" type="hidden" value=""/>, <input class="s_btn" type="submit" value="搜索"/>]
[<input class="src" name="src" type="hidden" value="srp"/>]
[<input id="keyword" name="q" type="text" value="百度"/>, <input class="src" name="src" type="hidden" value="srp"/>, <input name="fr" type="hidden" value="se7_newtab_new"/>, <input name="psid" type="hidden" value=""/>, <input class="s_btn" type="submit" value="搜索"/>]
[<input class="src" name="src" type="hidden" value="srp"/>]