- urllib.error
     - URLError产生的原因:
         - 没网
         - 服务器链接失败
         - 知不道制定服务器
         - 是OSError的子类
         - 案例V07
from  urllib import  request,error
"""
URLError 的使用
"""
if __name__ == '__main__':
    url = 'http://'
    try:
        req = request.Request(url)
        rsp = request.urlopen(req,timeout=5)
        rsp = rsp.read()
        html = rsp.decode()
        print(html)
    except error.URLError as u:
        print("URLERROR..{}".format(u))
        print("URLError..{}".format(u.reason))
    except Exception as e:
        print(e)
>>>URLERROR..<urlopen error timed out>
>>>URLError..timed out     - HTTPError, 是URLError的一个子类
         - 案例v08
     
from  urllib import  request,error
"""
URLError 的使用
"""
if __name__ == '__main__':
    url = ''
    try:
        req = request.Request(url)
        rsp = request.urlopen(req)
        rsp = rsp.read()
        html = rsp.decode()
        print(html)
    except error.HTTPError as h:
        print("HTTPERROR..{}".format(h))
        print("HTTPError...{}".format(h.reason))
    except error.URLError as u:
        print("URLERROR..{}".format(u))
        print("URLError..{}".format(u.reason))
    except Exception as e:
        print(e)
>>> HTTPERROR..HTTP Error 404: Not Found
>>> HTTPError...Not Found     - 两者区别:
         - HTTPError是对应的HTTP请求的返回码错误, 如果返回错误码是400以上的,则引发HTTPError
         - URLError对应的一般是网络出现问题,包括url问题
         - 关系区别: OSError > URLError > HTTPError
 - UserAgent 
     - UserAgent: 用户代理,简称UA, 属于heads的一部分,服务器通过UA来判断访问者身份
     - 常见的UA值,使用的时候可以直接复制粘贴,也可以用浏览器访问的时候抓包
               
1.Android
      Mozilla/5.0 (Linux; Android 4.1.1; Nexus 7 Build/JRO03D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Safari/535.19
      Mozilla/5.0 (Linux; U; Android 4.0.4; en-gb; GT-I9300 Build/IMM76D) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30
      Mozilla/5.0 (Linux; U; Android 2.2; en-gb; GT-P1000 Build/FROYO) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1
   2.Firefox
      Mozilla/5.0 (Windows NT 6.2; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0
      Mozilla/5.0 (Android; Mobile; rv:14.0) Gecko/14.0 Firefox/14.0
   3.Google Chrome
      Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36
      Mozilla/5.0 (Linux; Android 4.0.4; Galaxy Nexus Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19
   4.iOS
      Mozilla/5.0 (iPad; CPU OS 5_0 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A334 Safari/7534.48.3
      Mozilla/5.0 (iPod; U; CPU like Mac OS X; en) AppleWebKit/420.1 (KHTML, like Gecko) Version/3.0 Mobile/3A101a Safari/419.3    - 设置UA可以通过两种方式:
         - heads
         - add_header
         - 案例v09 
         
from  urllib import  request,error
"""
访问一个网站,更改自己的UserAgent进行伪装
"""
if __name__ == '__main__':
    try:
        url = "http://www.baidu.com"
        """
        1.使用head 
        """
        # headers = {}
        # headers['UserAgent'] ='Mozilla/5.0 (Linux; Android 4.1.1; Nexus 7 Build/JRO03D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Safari/535.19'
        # req = request.Request(url, headers=headers)
        """
        2.使用add_header 方法
        """
        req = request.Request(url)
        req.add_header("UserAgent",'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36')
        rsp = request.urlopen(req)
        html = rsp.read().decode()
        print(len(html))
        print(html)
    except error.HTTPError as h:
        print("HTTPERROR..{}".format(h.reason))
    except error.URLError as u:
        print("URLERROR...{}".format(u.reason))
    except Exception as e:
        print("Exception..{}".format(e)) - ProxyHandler处理(代理服务器)
     - 使用代理IP,是爬虫的常用手段
     - 获取代理服务器的地址:
         - 
         - www.goubanjia.com
     - 代理用来隐藏真实访问中,代理也不允许频繁访问某一个固定网站,所以,代理一定要很多很多
     - 基本使用步骤:
         1. 设置代理地址
         2. 创建ProxyHandler
         3. 创建Opener
         4. 安装Opener
     - 案例v10(请更换ip地址)
"""
使用代理访问百度
"""
from  urllib import  request,error
from  urllib import  request,error
"""
访问一个网站,更改自己的UserAgent进行伪装
"""
if __name__ == '__main__':
    try:
        url = "http://www.ithome.com"
        # 1.设置代理ip地址
        proxy ={'http':'39.135.24.12:80'}
        # 2.创建ProxyHandler
        proxy_handler = request.ProxyHandler(proxy)
        # 3.创建opener
        opener = request.build_opener(proxy_handler)
        # 4.安装opener
        request.install_opener(opener)
        # req = request.Request(url)
        # req.add_header("UserAgent",'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36')
        rsp = request.urlopen(url)
        html = rsp.read().decode()
        print(len(html))
        print(html)
    except error.HTTPError as h:
        print("HTTPERROR..{}".format(h.reason))
    except error.URLError as u:
        print("URLERROR...{}".format(u.reason))
    except Exception as e:
        print("Exception..{}".format(e))
 
 
                     
            
        













 
                    

 
                 
                    