基本用法:

爬虫之协程异步    asyncio和aiohttp_xml爬虫之协程异步    asyncio和aiohttp_html_02

# 高性能之协程爬虫
import asyncio
# async def fun(url):
# print(f"正在请求{url}")
# print(f"{url}请求完毕")
# return f"{url}你爸爸已经搞定了"
# f = fun("http://www.baidu.com")


# loop怎么玩?
# loop = asyncio.get_event_loop()
# loop.run_until_complete(f)

# task怎么玩呢?
# loop = asyncio.get_event_loop()
# task = loop.create_task(f)
# loop.run_until_complete(task)

# future怎么玩呢?
# loop = asyncio.get_event_loop()
# task = asyncio.ensure_future(f)
# loop.run_until_complete(task)

# 回调函数
# def callback(task):
# print(task.result())
# 绑定回调
# loop = asyncio.get_event_loop()
# task = loop.create_task(f)
# task.add_done_callback(callback)
# loop.run_until_complete(task)

View Code

对多个任务爬取的例子

爬虫之协程异步    asyncio和aiohttp_xml爬虫之协程异步    asyncio和aiohttp_html_02

# =============正题来了==============
# 对多个任务进行爬取
import requests
import aiohttp
import asyncio
import random,time
import time
headers = {
'Accept-Encoding': 'gzip, deflate, sdch',
'Accept-Language': 'en-US,en;q=0.8',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Referer': 'http://www.baidu.com/',
'Connection': 'keep-alive',
}
start = time.time()
arr = ["http://music.163.com/song/media/outer/url?id=1820550501.mp3",
"http://music.163.com/song/media/outer/url?id=1820550501.mp3",
"http://music.163.com/song/media/outer/url?id=1820550501.mp3",
"http://music.163.com/song/media/outer/url?id=1820550501.mp3",
"http://music.163.com/song/media/outer/url?id=1820550501.mp3",
"http://music.163.com/song/media/outer/url?id=1820550501.mp3",
"http://music.163.com/song/media/outer/url?id=1820550501.mp3",
"http://music.163.com/song/media/outer/url?id=1820550501.mp3",
"http://music.163.com/song/media/outer/url?id=1820550501.mp3",
"http://music.163.com/song/media/outer/url?id=1820550501.mp3",
"http://music.163.com/song/media/outer/url?id=1820550501.mp3"]
async def downsong(url):
print(f"{url}开始")
# 记住: 在异步协程中如果出现了同步模块相关的代码,那么就无法实现异步
# time.sleep(3)
# await asyncio.sleep(3) # 得用这个

# res = requests.get(url,headers=headers)
# print(res.text)

async with aiohttp.ClientSession() as session:
async with await session.get(url,headers=headers) as response:
pass
# page_text = await response.text()
# print(page_text)
print(f"{url}结束")

stasks = []
loop = asyncio.get_event_loop()
for i in range(len(arr)):
f = downsong(arr[i])
task = loop.create_task(f)
stasks.append(task)

loop.run_until_complete(asyncio.wait(stasks))
end = time.time()
print(end-start)

View Code

aiohttp基本用法

爬虫之协程异步    asyncio和aiohttp_xml爬虫之协程异步    asyncio和aiohttp_html_02

# 注意解决协程异步需要一个模块aiohttp
# pip install aiohttp

# aiohttp解决问题的基本代码
# async def get_page(url):
# async with aiohttp.ClientSession() as session:
# #get()、post():
# #headers,params/data,proxy='http://ip:port'
# async with await session.get(url) as response:
# #text()返回字符串形式的响应数据
# #read()返回的二进制形式的响应数据
# #json()返回的就是json对象
# #注意:获取响应数据操作之前一定要使用await进行手动挂起
# page_text = await response.text()
# print(page_text)

View Code

 基本用法

爬虫之协程异步    asyncio和aiohttp_xml爬虫之协程异步    asyncio和aiohttp_html_02

import asyncio


async def a():
await asyncio.sleep(3)
print('Resuming a')


async def b():
await asyncio.sleep(3)
print('In b')


async def main():
await asyncio.gather(a(), b())


if __name__ == '__main__':
asyncio.run(main())
print("dasahbi")

View Code

怎么说呢,他就是一个线程里面,把所有的IO进行协程异步。

 

有一点还没有解决,我不知道写入本地怎么写

 

1-1;程序报错:

There is no current event loop in thread 'Thread-1'

爬虫之协程异步    asyncio和aiohttp_xml爬虫之协程异步    asyncio和aiohttp_html_02

loop = asyncio.get_event_loop()
改为:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)

View Code

 

-----------------------------------------------------------------------------------------------------------------------------------------