方式:
1.手动登入,获取cookie
2.使用cookielib库 和 HTTPCookieProcessor处理器
#_*_ coding: utf-8 _*_ ''' Created on 2018年7月13日 @author: sss ''' import urllib.request import urllib.request import urllib.parse import random import ssl # url = 'https://www.ctguqmx.com/account/ajax/login_process/' url = "http://www.ctguqmx.com/people/%E4%BD%99%E6%9D%BE" #表示忽视未经核实的ssl证书认证 context = ssl._create_unverified_context() #user-agent列表,每次请求随机选一个: ua_list = [ "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko", "Mozilla/5.0 (X11; CrOS i686 2268.111.0)like Gecko", "Mozilla/5.0 (Macintosh; U; PPC Mac OS X ", "Mozilla/5.0 (Macintosh; Intel Mac OS " ] user_agent = random.choice(ua_list) headers = { 'User_Agent' : user_agent, 'Cookie': 'mcb__Session=giq2h72grr3643fcobi88el8t4; Hm_lvt_bad1d0a80942d2554f202adea54e0555=1530434463,1530703915,1531411239,1531451926; mcb__user_login=2M7Kkb9knW5lU51m2dae05ejlZ6Vw45dR-3NFgP1gm2Q1cem1bDVqJWQv2SZlZyTmWiVaJWeZ2SbkWRmyZuTZZXHx2XGncyYaWqbnA..; Hm_lpvt_bad1d0a80942d2554f202adea54e0555=1531452044If-Modified-Since: Fri, 13 Jul 2018 03:39:06 GMT' } formdata = { "user_name" : "name", "password" : "password" } data = urllib.parse.urlencode(formdata).encode(encoding='utf_8') request = urllib.request.Request(url, headers = headers, data = data) response = urllib.request.urlopen(request, context = context) print(response.read()) print('finish!')
#_*_ coding: utf-8 _*_ ''' Created on 2018年7月14日 @author: sss function: 用账号登入qmx,爬去个人页面 ''' import urllib import http.cookiejar #构建一个CookieJar对象实例来保存cookie cookiejar = http.cookiejar.CookieJar() #使用HTTPCookieProcessor()来创建cookie处理器对象,参数为CookieJar()对象的值 Handler = urllib.request.HTTPCookieProcessor(cookiejar) #构建opener opener = urllib.request.build_opener(Handler) #addheaders 接受一个列表,里面每个元素都是一个headers信息的元组,opener将附带headers信息 opener.addheaders = [('User-Agent', "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36")] #需要登入密码和账户 data = { "user_name" : "name", "password" : "password" } #data转码 postdata = urllib.parse.urlencode(data).encode(encoding='utf_8') #url url = 'https://www.ctguqmx.com/account/ajax/login_process/' #构建request请求对象,包含要发送的数据 request = urllib.request.Request(url, data = postdata) #以get方法访问页面访问之后会自动保存cookie到cookiejar中 f = opener.open(request) #网址后面必须要有/ print(f.read().decode('utf-8')) print('访问登入后界面:') #个人主页的urL url = 'https://www.ctguqmx.com/people/xxxxxxxx' #x换成自己的 #opener包含用户登入后的cookie值,可以直接访问登入后的页面 response = opener.open(url) html = response.read() with open('qmx_ys_person.html', 'wb+') as f: f.write(html) # print(html) print('个人界面下载完毕!') response = opener.open('http://210.42.38.26:84/jwc_glxt/Login.aspx?xttc=1') print('finish!')