1、代码实现

#-*- encoding: utf-8 -*-
'''
Created on 2019/12/06 14:46
Copyright (c) 2019/12/06, Google Copy right
@author: com
'''
import urllib2, urllib,cookielib,threading,gzip,sys

from selenium.webdriver import PhantomJS
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities

class RequestUtil:
    # 浏览器代理全局设置
    __browserAgent = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0'
    # 初始化对象,对象有cookies和线程锁
    def __init__(self):
        self.cookies=''
        self._lock = threading.RLock()
    # 取得连接
    def http_get_request(self, url, referer, timeout=''):
        '''
        get请求获得对应网页的两个操作对象
        :param url:
        :param referer:
        :param timeout:
        :return:
        '''
        # 获得锁
        self._lock.acquire()
        # 获取cookie
        cookie = cookielib.CookieJar()
        # opener对象
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie),SmartRedirectHandler())
        urllib2.install_opener(opener)
        # 请求头
        headers = {'User-Agent': self.__browserAgent,
                     'Referer': referer,
                     'Cache-Control': 'max-age=0',
                     'Accept': '*/*',
                     'Connection': 'Keep-Alive',
                     'Accept-encoding':'gzip'}
        # 页面请求对象
        req = urllib2.Request(url=url,headers=headers)
        # 请求获得网页操作对象
        if timeout == '':
            open = urllib2.urlopen(req)
        else:
            open = urllib2.urlopen(req, timeout=timeout)

        if self.cookies == '':
            for item in cookie:
                self.cookies = self.cookies + item.name + '=' + item.value + ';'
            self.cookies = self.cookies[:-1]

        if url != open.url:
            req = urllib2.Request(url=open.url,headers=headers)
        # 执行完成,释放锁
        self._lock.release()
        # 返回网页的操作对象
        return (open,req)

    def http_post_request(self, url, datas, referer, timeout=''):
        '''
        post请求获得对应网页的两个操作对象
        :param url:
        :param datas:
        :param referer:
        :param timeout:
        :return:
        '''
        self._lock.acquire()
        postdata = urllib.urlencode(datas)
        headers={'User-Agent': self.__browserAgent,
                     'Referer': referer,
                     'Content-Type': 'application/x-www-form-urlencoded',
                     'Cache-Control': 'no-cache',
                     'Accept': '*/*',
                     'Connection': 'Keep-Alive',
                     'Accept-encoding':'gzip',
                     'Cookie':self.cookies}
        req = urllib2.Request(url=url,data=postdata,headers=headers)
        req.get_host()
        if timeout == '':
            open = urllib2.urlopen(req)
        else:
            open = urllib2.urlopen(req, timeout=timeout)
        if url != open.url:
            req = urllib2.Request(url=open.url,headers=headers)
        self._lock.release()
        return (open,req)

    def http_get(self, url ,refer='https://www.baidu.com'):
        return self.http_get_request(url, refer, 60)

    def http_post(self, url, datas ,refer='https://www.baidu.com'):
        return self.http_post_request(url, datas, refer, 60)

    def http_post_request2(self, url, datas, timeout=''):
        if timeout == '':
            open = urllib2.urlopen(url,datas)
        else:
            open = urllib2.urlopen(url, datas,timeout=timeout)
        data= open.read()
        return data

    def http_post2(self,url,datas):
        return self.http_post_request2(url,datas,300)

    def create_phandomjs(self, service_args, caps, timeout=30):
        self.driver = PhantomJS(desired_capabilities=caps,service_args = service_args)
        self.driver.set_page_load_timeout(timeout)
        self.driver.set_script_timeout(timeout)
        self.driver.implicitly_wait(timeout)

    def close_phandomjs(self):
        try:
            self.driver.quit()
        except:
            pass

    def http_get_phandomjs(self, url, refer='https://www.baidu.com', timeout=1000):
        caps = dict(DesiredCapabilities.PHANTOMJS)
        caps['browserName'] = 'chrome'
        caps["phantomjs.page.settings.resourceTimeout"] = timeout
        caps["phantomjs.page.settings.loadImages"] = False
        caps["phantomjs.page.settings.userAgent"] = (self.__browserAgent)
        caps["phantomjs.page.customHeaders.Referer"] = (refer)

        service_args=[]
        service_args.append('--load-images=no')
        service_args.append('--disk-cache=yes')
        service_args.append('--cookies-file=')

        self.create_phandomjs(timeout = timeout,service_args = service_args, caps = caps)
        self.driver.get(url)
        # self.driver.save_screenshot('hainiu.png')
        return self.driver.page_source

class SmartRedirectHandler(urllib2.HTTPRedirectHandler):

    def http_error_301(self, req, fp, code, msg, headers):
        result = urllib2.HTTPRedirectHandler.http_error_301(self, req, fp, code, msg, headers)
        result.status = code
        return result

    def http_error_302(self, req, fp, code, msg, headers):
        result = urllib2.HTTPRedirectHandler.http_error_302(self, req, fp, code, msg, headers)
        result.status = code
        return result

if __name__ == '__main__':
    reload(sys)
    sys.setdefaultencoding('utf-8')
    r = RequestUtil()

    html =  r.http_get_phandomjs('https://mil.news.sina.com.cn/china/2019-12-06/doc-iihnzahi5616327.shtml')
    html = html.decode('utf-8').encode(sys.getfilesystemencoding())
    print html
    r.close_phandomjs()