python requests包的基本使用

转载

qq5aa0d81e43aed 2019-10-26 14:52:43

Requests 安装requests模块： D:\Install\Python36>pip3 install requests

请求方式 #coding:utf-8 import requests requests.get("http://www.baidu.com") requests.post("http://www.baidu.com") #requests.put("http请求") #requests.delete("http请求") #requests.head("http请求") #requests.options("http请求")

带参数GET请求 第一种 #coding:utf-8 import requests

response = requests.get("http://httpbin.org/get?name=gemey&age=22") print(response.text) 第二种 #coding:utf-8 import requests

d = { 'name': 'tom', 'age': 20 } response = requests.get('http://httpbin.org/get', params=d) print(response.text)

get请求也可以跟data参数，与params的区别是： 1、params直接将 d 参数拼接到url，而使用data类似post请求的data使用不会拼接到url； 2、对类似flask服务端而言，params参数通过request.args读取传入参数，而data参数通过request.data读取，post请求同样处理 3、headers的Content-Type传输格式对服务端获取请求数据，也有影响，如果传输json格式，　　heads={'Content-Type':'application/json'},服务端可以通过request.json或者request.get_json(),直接转化为字典操作；不设置headers 可能只有request.data获取数据，不能直接转换dict 4、服务端如果返回的json串，中文乱码，则可以设置json.dumps(d,ensure_ascii=False) 或者flask的app设置 app.config['JSON_AS_ASCII'] = False

response = requests.get('http://httpbin.org/get', data=d)

带请求头 #coding:utf-8 import requests heads = {}

heads['User-Agent'] = 'Mozilla/5.0 '
'(Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 '
'(KHTML, like Gecko) Version/5.1 Safari/534.50' response = requests.get('http://www.baidu.com',headers=heads)

POST请求 #coding:utf-8 import requests

data = {'name':'tom','age':'22'}

response = requests.post('http://httpbin.org/post', data=data).status_code #需要加cokkie使用下面的请求模式，get请求格式一致 #response = requests.post('http://httpbin.org/post', data=data, cookies=cookies).status_code

print (response)

#持续保持同一会话（使用相同的 session） #coding:utf-8 import requests

session = requests.Session()#保持同一会话的核心 session.get('http://httpbin.org/cookies/set/number/12345') response = session.get('http://httpbin.org/cookies') print(response.text)

#证书取消验证 #coding:utf-8 import requests from requests.packages import urllib3

urllib3.disable_warnings() #从urllib3中消除警告 response = requests.get('https://www.12306.cn',verify=False) #证书验证设为FALSE print(response.status_code)

#超时异常
import urllib.request response=urllib.request.urlopen('http://httpbin.org/get',timeout=0.1) #设置超时时间为0.1秒,将抛出异常 print(response.read()) #运行结果 #urllib.error.URLError: <urlopen error timed out>

import urllib.request from urllib import error import socket try: response=urllib.request.urlopen('http://httpbin.org/get',timeout=0.1) print(response.read()) except error.URLError as e: if isinstance(e.reason, socket.timeout): # 判断对象是否为类的实例 print(e.reason) # 返回错误信息

#运行结果 #timed out 服务返回结果用法 #coding:utf-8 import requests

response = requests.get('http://www.baidu.com') print (response.status_code) # 打印状态码 print (response.url) # 打印请求url print (response.headers) # 打印头信息 print (response.cookies) # 打印cookie信息 print (response.text) #以文本形式打印网页源码 print (response.content) #以字节流形式打印

上传文件

import requests url = 'http://httpbin.org/post' files = {'file': ('e:\a.py', open('e:\a.py', 'rb'), 'application/python','Expires': '0'})} r = requests.post(url, files=files) r.text 返回json格式结果处理

#coding:utf-8 import requests

response = requests.get('http://httpbin.org/get') print (response.text) #返回的是Unicode型的数据 print (response.json()) #response.json()方法同json.loads(response.text) print (u"对返回结果进行json处理之后的类型是: ") print (type(response.json()))

Urllib urllib是python内置的HTTP请求库，无需安装即可使用，它包含了4个模块： request：它是最基本的http请求模块，用来模拟发送请求 error：异常处理模块，如果出现错误可以捕获这些异常 parse：一个工具模块，提供了许多URL处理方法，如：拆分、解析、合并等 robotparser：主要用来识别网站的robots.txt文件，然后判断哪些网站可以爬

URL编码 #coding:utf-8 from urllib import parse print (parse.quote('http://www.baidu.com')) #未编码斜杠 print (parse.quote_plus('http://www.baidu.com')) #编码斜杠

URL参数拼接及get、post请求 #coding:utf-8 import urllib import urllib.parse import urllib.request param={'spam':1,'eggs':2,'bacon':0} print (u"初始参数") print (param) params= urllib.parse.urlencode(param) print (u"编码后的参数") print (params) url="http://python.org/query?%s" % params print (u"最终get请求") print ('urllib.request.urlopen("http://python.org/query?%s" % params)') print (u"最终post请求方式") print ('urllib.request.urlopen("http://python.org/query",parmas)')

服务器返回结果

coding: utf-8

import urllib import urllib.request

response = urllib.request.urlopen('http://www.baidu.com') print (response.getcode()) #打印状态码信息其方法和response.getcode() 一样都是打印当前response的状态码 print (response.headers) #打印出响应的头部信息，内容有服务器类型，时间、文本内容、连接状态等等 print (response.headers['Server']) #这种拿到响应头的方式需要加上参数，指定你想要获取的头部中那一条数据 print (response.geturl()) #获取响应的url print (response.readline()) #读取html页面第一行 print (response.fileno()) #文件描述符 print (response.read()) #使用read()方法得到响应体内容，这时是一个字节流bytes，看到明文还需要decode为charset格式

带有header 发送数据 #! /usr/bin/env python3

import urllib.parse

import urllib.request

url = 'http://httpbin.org/post'

user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'

values = { 'act' : 'login', 'login[email]' : 'abc@abc.com', 'login[password]' : '123456' }

headers = { 'User-Agent' : user_agent }

data = urllib.parse.urlencode(values)

req = urllib.request.Request(url, data.encode("utf-8"), headers)

response = urllib.request.urlopen(req)

the_page = response.read()

print(the_page.decode("utf8"))

异常处理 #! /usr/bin/env python3

from urllib.request import Request, urlopen

from urllib.error import URLError, HTTPError

req = Request('http://www.python.org/')

try: response = urlopen(req) except HTTPError as e: print('The (www.python.org)server couldn't fulfill the request.') print('Error code: ', e.code) except URLError as e: print('We failed to reach a server.') print('Reason: ', e.reason) else: print("good!") print(response.read().decode("utf8"))

设定超时时间 import socket

import urllib.request

timeout in seconds

#timeout = 0.1 #超时的 timeout = 2 socket.setdefaulttimeout(timeout)

this call to urllib.request.urlopen now uses the default timeout

we have set in the socket module

req = urllib.request.Request('http://www.python.org/')

a = urllib.request.urlopen(req).read()

服务返回结果保存为html

coding: utf-8

import urllib import urllib.request

result=urllib.request.urlretrieve('https://www.cnblogs.com/sysu-blackbear/p/3629420.html',filename="e://pic//baidu.html") print (u"网页保存文件地址地址: ",result[0]) print (u"网页内容: ",result[1]) urllib.request.urlcleanup() #清除urllib.urlretrieve产生的缓存

下载一张图片方法1： from urllib.request import urlretrieve urlretrieve("http://pic1.win4000.com/pic/b/20/b42b4ca4c5_250_350.jpg", "e:\pic\1.jpg")

方法2： from urllib.request import urlretrieve import urllib

imgPath="http://pic1.win4000.com/pic/7/8a/1e81de5511_250_350.jpg" pic_content = (urllib.request.urlopen(imgPath)).read() f = open('e:\pic\img2.jpg', 'wb') f.write(pic_content) f.close()

Urllib2:(python3以后已经合并成了urllib库) python 3.x中urllib库和urilib2库合并成了urllib库。。其中urllib2.urlopen()变成了urllib.request.urlopen() urllib2.Request()变成了urllib.request.Request()

提问和评论都可以，用心的回复会被更多人看到评论

发布评论

相关文章

官方博客	全部文章	热门标签	班级博客
了解我们	网站地图	意见反馈

鸿蒙开发者社区	51CTO学堂
51CTO	软考资讯