开启并发进程:
方式一:
import time
from multiprocessing import Process
def task(name):
print('%s is running' % name)
time.sleep(5)
print('%s done' % name)
if __name__ == '__main__':
p1 = Process(target=task, args=('子进程1',))
p1.start()
print('main Process')
View Code
方式二:
# -*- coding: utf-8 -*-
import time
from multiprocessing import Process
class MyProcess(Process):
def __init__(self, name):
super(MyProcess, self).__init__()
self.name = name
def run(self):
print('%s is runing ' % self.name)
time.sleep(5)
print('%s done' % self.name)
if __name__ == '__main__':
p = MyProcess('子进程1')
p.start()
print('this is main process')
View Code
socket通信多用户同时操作(多进程方式)
服务端:
import socket
from multiprocessing import Process
def talk(conn):
while 1:
try:
data = conn.recv(1024)
conn.send(data.upper())
except ConnectionResetError:
print('客户端端开...')
break
conn.close()
def server_xxx(ip, port):
total = 0
server1 = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server1.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
server1.bind((ip, port))
server1.listen(2)
print('服务器已经启动....')
client_list = []
while True:
total += 1
conn, addr = server1.accept()
# 有客户端链接上来的时候,就开一个进程,并且把进程信息加入到列表中
def start_process(total, conn):
print('有客户端连接上来了....')
p_name = 'p' + str(total)
p_name = Process(target=talk, name=p_name, args=(conn,))
p_name.start()
client_list.append(p_name)
start_process(total, conn)
alive_num = 0
# 判断下子进程是否是alive的状态
for pp in client_list:
if pp.is_alive():
alive_num += 1
print('总的连接数:%s,alive的连接数:%s' % (total, alive_num))
server1.close()
if __name__ == '__main__':
server_xxx('localhost', 8080)
View Code
客户端:
# -*- coding: utf-8 -*-
import socket
client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
client.connect(('localhost', 8080))
while True:
cmd = input('>>:').strip()
if cmd:
client.send(cmd.encode('utf-8'))
res = client.recv(1024)
print(res.decode('utf-8'))
View Code
互斥锁:
# -*- coding: utf-8 -*-
import time
from multiprocessing import Process, Lock
def task(name, mutex):
mutex.acquire()
print('%s 1' % name)
time.sleep(1)
print('%s 2' % name)
time.sleep(1)
print('%s 3' % name)
mutex.release()
if __name__ == '__main__':
mutex = Lock()
for i in range(3):
p = Process(target=task, name=str(i), args=('进程%s' % i, mutex))
p.start()
View Code
互斥锁模拟抢票流程
import json
import os
import time
from multiprocessing import Process, Lock
if not os.path.exists('db.txt'):
db_json = {'count': 3}
with open('db.txt', 'w', encoding='utf-8') as f:
json.dump(db_json, f)
def search(name):
time.sleep(1)
dic = json.load(open('db.txt', 'r', encoding='utf-8'))
print('用户【%s】查到的剩余票数为:%s' % (name, dic['count']))
def get(name):
time.sleep(1)
dic = json.load(open('db.txt', 'r', encoding='utf-8'))
if dic['count'] > 0:
dic['count'] -= 1
time.sleep(3)
json.dump(dic, open('db.txt', 'w', encoding='utf-8'))
print('%s购票成功!' % name)
else:
print('%s购票失败!' % name)
# 定义子程序的函数
def task(name, mutex):
search(name)
# 实际上互斥锁在这儿的作用就是保证了get函数是一个串行的方式运行,因为买完票改数据库只能是每次只能操作一人操作保证数据唯一
mutex.acquire()
get(name)
mutex.release()
if __name__ == '__main__':
mutex = Lock()
for i in range(10):
p = Process(target=task, args=('用户%s' % i, mutex))
p.start()
View Code
生产者消费者模型:
1、程序中有两类角色
一类负责生产数据(生产者)
一类负责处理数据(消费者)
2、引入生产者消费者模型为了解决的问题是
平衡生产者与消费者之间的速度差
程序解开耦合
3、如何实现生产者消费者模型
生产者<--->队列<--->消费者
import time
from multiprocessing import Queue, Process
def producer(q, p_name, food_name):
for i in range(10):
res = food_name + str(i)
time.sleep(0.5)
print('生产者[%s]生产了[%s][%s]号' % (p_name, food_name, i))
q.put(res)
def consumer(q, c_name):
while True:
res = q.get()
if res is not None:
time.sleep(1)
print('消费者[%s]吃了[%s]' % (c_name, res))
else:
print('数据处理完毕,准备结束!')
break
if __name__ == '__main__':
q = Queue() # 生成个队列
# 生产者开始并行生产数据
p1 = Process(target=producer, args=(q, '1号', '苹果',))
p2 = Process(target=producer, args=(q, '2号', '蔬菜',))
p3 = Process(target=producer, args=(q, '3号', '牛奶',))
# 处理数据的人
c1 = Process(target=consumer, args=(q, '吃货1号',))
c2 = Process(target=consumer, args=(q, '吃货2号',))
p1.start()
c1.start()
p2.start()
c2.start()
p3.start()
p1.join()
p2.join()
p3.join()
# 必须等生产数据的程序都生产完成之后,放入两个空数据,作为结束的标记,因为消费者是两个,所以两个空数据
q.put(None)
q.put(None)
print('main process')
View Code
作了解的JoinableQueue:
import time
from multiprocessing import JoinableQueue, Process
def producer(q, p_name, food_name):
for i in range(10):
res = food_name + str(i)
time.sleep(0.1)
print('生产者[%s]生产了[%s][%s]号' % (p_name, food_name, i))
q.put(res)
q.join()
def consumer(q, c_name):
while True:
res = q.get()
if res is None: break
time.sleep(1)
print('消费者[%s]吃了[%s]' % (c_name, res))
q.task_done() # 结束了才给生产者发送q.join的信号,因为主程序在等生产者的结束,所以会处理完所有的数据
if __name__ == '__main__':
q = JoinableQueue() # 生成个队列
# 生产者开始并行生产数据
p1 = Process(target=producer, args=(q, '1号', '苹果',))
p2 = Process(target=producer, args=(q, '2号', '蔬菜',))
p3 = Process(target=producer, args=(q, '3号', '牛奶',))
# 处理数据的人
c1 = Process(target=consumer, args=(q, '吃货1号',))
c2 = Process(target=consumer, args=(q, '吃货2号',))
c1.daemon = 1
c2.daemon = 1
p1.start()
p2.start()
p3.start()
c1.start()
c2.start()
p1.join()
p2.join()
p3.join()
print('main process')
View Code
线程:
开启方式一:
import time
from threading import Thread
def task(name):
print('%s is running' % name)
time.sleep(5)
print('%s done' % name)
if __name__ == '__main__':
p1 = Thread(target=task, args=('线程1',))
p1.start()
print('main Process')
View Code
开启方式二:
# -*- coding: utf-8 -*-
import time
from threading import Thread
class MyThread(Thread):
def __init__(self, name):
super(MyThread, self).__init__()
self.name = name
def run(self):
print('%s is runing ' % self.name)
time.sleep(5)
print('%s done' % self.name)
if __name__ == '__main__':
p = MyThread('子线程1')
p.start()
print('this is main process')
View Code
线程的其他属性:
import time
from threading import Thread, currentThread, enumerate
def task():
print('%s is ruuning' % currentThread().getName())
time.sleep(2)
print('%s is done' % currentThread().getName())
if __name__ == '__main__':
t = Thread(target=task, name='子线程1')
t.start()
# t.setName('儿子线程1')
# t.join()
# print(t.getName())
# currentThread().setName('主线程')
# print(t.isAlive())
# print('主线程',currentThread().getName())
# t.join()
# print(active_count())
print(enumerate())
View Code
线程的互斥锁
import time
from threading import Thread, Lock
n = 100
# 因为线程是共享进程的内存空间,所以都对同一个数据操作时可能导致数据不安全
def task():
global n
mutex.acquire()
temp = n
time.sleep(0.1)
n = temp - 1
mutex.release()
if __name__ == '__main__':
mutex = Lock() # 获得锁 因为线程是共享进程的内存空间的,所以不不要把锁传给线程
t_list = []
for i in range(100):
t = Thread(target=task)
t_list.append(t)
t.start()
# 为了线程都结束
for t in t_list:
t.join()
print('main', n)
View Code
死锁
import time
from threading import Thread, Lock
mutexA = Lock()
mutexB = Lock()
class MyThread(Thread):
def run(self):
self.f1()
self.f2()
def f1(self):
mutexA.acquire()
print('%s拿到了A锁' % self.name)
mutexB.acquire()
print('%s拿到B锁' % self.name)
mutexB.release()
mutexA.release()
def f2(self):
mutexB.acquire()
print('%s拿到了A锁' % self.name)
time.sleep(0.1)
mutexA.acquire()
print('%s拿到B锁' % self.name)
mutexA.release()
mutexB.release()
if __name__ == '__main__':
for i in range(10):
t = MyThread()
t.start()
View Code
递归锁:
# 递归锁:可以连续acquire多次,每acquire一次计数器+1,只有计数为0时,才能被抢到acquire
import time
from threading import Thread, RLock
mutexB = mutexA = RLock()
class MyThread(Thread):
def run(self):
self.f1()
self.f2()
def f1(self):
mutexA.acquire()
print('f1 中%s拿到了A锁' % self.name)
mutexB.acquire()
print('f1 中%s拿到B锁' % self.name)
mutexB.release()
print('f1 中%s释放B锁' % self.name)
mutexA.release()
print('f1 中%s释放A锁' % self.name)
def f2(self):
mutexB.acquire()
print('f2 中%s拿到了A锁' % self.name)
time.sleep(3)
mutexA.acquire()
print('f2 中%s拿到B锁' % self.name)
mutexA.release()
print('f2 中%s释放A锁' % self.name)
mutexB.release()
print('f2 中%s释放B锁' % self.name)
if __name__ == '__main__':
for i in range(10):
t = MyThread()
t.start()
View Code
信号量:
信号量也是一把锁,可以指定信号量为5,对比互斥锁同一时间只能有一个任务抢到锁去执行,信号量同一时间可以有5个任务拿到锁去执行,如果说互斥锁是合租房屋的人去抢一个厕所,那么信号量就相当于一群路人争抢公共厕所,公共厕所有多个坑位,这意味着同一时间可以有多个人上公共厕所,但公共厕所容纳的人数是一定的,这便是信号量的大小
import threading
import time
from threading import Thread, Semaphore
def func():
sm.acquire()
print('%s get sm' % threading.current_thread().getName())
time.sleep(3)
sm.release()
if __name__ == '__main__':
sm = Semaphore(5)
for i in range(23):
t = Thread(target=func)
t.start()
View Code
加锁解锁的另一种写法:
import time
from threading import Thread, Lock
n = 100
# 因为线程是共享进程的内存空间,所以都对同一个数据操作时可能导致数据不安全
def task():
global n
with mutex:
temp = n
print(n)
time.sleep(0.1)
n = temp - 1
if __name__ == '__main__':
mutex = Lock() # 获得锁 因为线程是共享进程的内存空间的,所以不不要把锁传给线程
t_list = []
for i in range(100):
t = Thread(target=task)
t_list.append(t)
t.start()
# 为了线程都结束
for t in t_list:
t.join()
print('main', n)
View Code
Event:
线程的一个关键特性是每个线程都是独立运行且状态不可预测。如果程序中的其 他线程需要通过判断某个线程的状态来确定自己下一步的操作,这时线程同步问题就会变得非常棘手。为了解决这些问题,我们需要使用threading库中的Event对象。 对象包含一个可由线程设置的信号标志,它允许线程等待某些事件的发生。在 初始情况下,Event对象中的信号标志被设置为假。如果有线程等待一个Event对象, 而这个Event对象的标志为假,那么这个线程将会被一直阻塞直至该标志为真。一个线程如果将一个Event对象的信号标志设置为真,它将唤醒所有等待这个Event对象的线程。如果一个线程等待一个已经被设置为真的Event对象,那么它将忽略这个事件, 继续执行
import threading
import time
from threading import Thread, Event
# from threading import Event
#
# event.isSet():返回event的状态值;
#
# event.wait():如果 event.isSet()==False将阻塞线程;
#
# event.set(): 设置event的状态值为True,所有阻塞池的线程激活进入就绪状态, 等待操作系统调度;
#
# event.clear():恢复event的状态值为False。
def conn_mysql():
count = 1
while not event.is_set():
if count > 3:
print('have tried too many times ')
return
print('【%s】第%s次尝试链接' % (threading.current_thread(), count))
event.wait(3)
count += 1
print('【%s】链接成功!' % threading.current_thread().getName())
def check_mysql():
print('[%s]正在检查mysql' % threading.current_thread().getName())
time.sleep(14)
event.set()
if __name__ == '__main__':
event = Event()
conn1 = Thread(target=conn_mysql)
conn2 = Thread(target=conn_mysql)
check = Thread(target=check_mysql)
conn1.start()
conn2.start()
check.start()
View Code
定时器实现20s更新验证码:
import random
from threading import Timer
class Code(object):
def __init__(self):
self.make_cache()
def make_cache(self, interval=20):
self.cache = self.make_code()
print(self.cache)
self.t = Timer(interval, self.make_cache)
self.t.start()
def make_code(self, n=4):
res = ''
for i in range(n):
s1 = str(random.randint(0, 9))
s2 = chr(random.randint(65, 90))
res += random.choice([s1, s2])
return res
def check(self):
while True:
code = input('输入你的验证码:').strip()
if code.upper() == self.cache:
print('正确!')
else:
print('错误!')
obj = Code()
obj.check()
View Code
线程queue
import queue
# 先进先出队列
q = queue.Queue(3)
q.put('asd')
q.put('asd2')
q.put('asd3')
# q.put('asd4', block=True, timeout=5) # 阻塞5秒
q.get_nowait()
# 等同于 # q.put('asd4', block=False)
# get有一样的方式
# print(q.get())
# # print(q.get(block=False)) #q.get_nowait()
# # print(q.get_nowait())
# 先进后出队列,堆栈的方式
q = queue.LifoQueue(3)
q.put(1)
q.put(2)
print(q.get())
print(q.get())
# 优先级队列,数字越小,优先级越高
q = queue.PriorityQueue(3)
q.put((8, 'ssss'))
q.put((4, '44444'))
q.put((9, '9999'))
print(q.get())
print(q.get())
print(q.get())
View Code
进程池和线程池
用法一样,使用场景不同:线程是I/O密集型应用,如,socket,web,爬虫。进程是计算密集型,利用多核计算的优势,如金融软件。
进程池和线程池的目的都是为了限制开启进程(线程)并发的最大数
import os
import random
import time
from concurrent.futures import ThreadPoolExecutor
# from concurrent.futures import ProcessPoolExecutor # 进程和线程的用法一样,但是使用场景不同
from threading import currentThread
def task():
print('name:[%s],pid:[%s] is running ' % (currentThread().getName(), os.getpid()))
time.sleep(random.randint(1, 3))
if __name__ == '__main__':
pool = ThreadPoolExecutor(5)
for i in range(10):
pool.submit(task, )
pool.shutdown(wait=True) # 关闭线程池,为了不能让新的线程再加入到池中,等池中线程都执行完再往下执行
print('main')
View Code
map 取代 for + submit
import os
import random
import time
from concurrent.futures import ThreadPoolExecutor
# from concurrent.futures import ProcessPoolExecutor # 进程和线程的用法一样,但是使用场景不同
from threading import currentThread
def task(n):
print('name:[%s],pid:[%s] is running ' % (currentThread().getName(), os.getpid()))
time.sleep(random.randint(1, 3))
if __name__ == '__main__':
pool = ThreadPoolExecutor(max_workers=5)
# for i in range(10):
# pool.submit(task, i)
pool.map(task, range(10)) # map取代了for+submit
pool.shutdown(wait=True) # 关闭线程池,为了不能让新的线程再加入到池中,等池中线程都执行完再往下执行
print('main')
View Code
同步:提交任务后,原地等待任务执行结果,拿到结果后再往下执行下一行代码。结果:程序串行
异步:提交任务后不等待任务执行完毕。
这样便需要异步回调,以爬虫来解释就是,同时爬取多个网站的时候,要先下载网络上的内容,再解析得到自己想要的结果,所以在下载(读取网页内容)后回调解析的命令
from concurrent.futures import ThreadPoolExecutor
from threading import current_thread
import requests
def get_page(url):
print('线程 [%s] 正在下载[%s]' % (current_thread().getName(), url))
response = requests.get(url)
if response.status_code == 200:
return {'url': url, 'text': response.text}
def parse_page(res):
res = res.result()
print('线程[%s]正在解析[%s]' % (current_thread().getName(), res['url']))
parse_res = 'url[%s] 大小[%s]\n' % (res['url'], len(res['text']))
with open('db.txt', 'a', encoding='utf-8') as f:
f.write(parse_res)
if __name__ == '__main__':
urls = ['https://www.baidu.com',
'https://www.python.org',
'https://www.openstack.org',
'https://help.github.com/',
'http://www.sina.com.cn/'
]
p = ThreadPoolExecutor(3)
for url in urls:
p.submit(get_page, url).add_done_callback(parse_page) # parse_page拿到的是一个future对象obj,需要用obj.result()拿到结果
View Code
小练习:线程池方式实现socket并行并且限制并行数量
服务端
import socket
# from threading import Thread
from concurrent.futures import ThreadPoolExecutor
def talk(conn):
while 1:
try:
data = conn.recv(1024)
conn.send(data.upper())
except ConnectionResetError:
print('客户端端开...')
break
conn.close()
def server_xxx(ip, port):
server1 = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server1.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
server1.bind((ip, port))
server1.listen(2)
print('服务器已经启动....')
while True:
conn, addr = server1.accept()
print('有客户端连接上来了....')
pool.submit(talk, conn)
server1.close()
if __name__ == '__main__':
pool = ThreadPoolExecutor(3)
server_xxx('localhost', 8080)
View Code
客户端
# -*- coding: utf-8 -*-
import socket
client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
client.connect(('localhost', 8080))
while True:
cmd = input('>>:').strip()
if cmd:
client.send(cmd.encode('utf-8'))
res = client.recv(1024)
print(res.decode('utf-8'))
View Code
协程:单线程下并发(遇到IO阻塞就切换任务操作)
生成器(yield)方式
greenlet模块和gevent模块,其中gevent模块和monkey.patch_all() 即可标记所有IO操作
IO模型:
(前言:)
简单版本:
到目前为止,已经将四个IO Model都介绍完了。现在回过头来回答最初的那几个问题:blocking和non-blocking的区别在哪,synchronous IO和asynchronous IO的区别在哪。
先回答最简单的这个:blocking vs non-blocking。前面的介绍中其实已经很明确的说明了这两者的区别。调用blocking IO会一直block住对应的进程直到操作完成,而non-blocking IO在kernel还准备数据的情况下会立刻返回。
再说明synchronous IO和asynchronous IO的区别之前,需要先给出两者的定义。Stevens给出的定义(其实是POSIX的定义)是这样子的:
A synchronous I/O operation causes the requesting process to be blocked until that I/O operationcompletes;
An asynchronous I/O operation does not cause the requesting process to be blocked;
两者的区别就在于synchronous IO做”IO operation”的时候会将process阻塞。按照这个定义,四个IO模型可以分为两大类,
之前所述的blocking IO,non-blocking IO,IO multiplexing都属于synchronous IO这一类,而 asynchronous I/O后一类 。
有人可能会说,non-blocking IO并没有被block啊。这里有个非常“狡猾”的地方,定义中所指的”IO operation”是指真实的IO操作,
就是例子中的recvfrom这个system call。non-blocking IO在执行recvfrom这个system call的时候,如果kernel的数据没有准备好,
这时候不会block进程。但是,当kernel中数据准备好的时候,recvfrom会将数据从kernel拷贝到用户内存中,这个时候进程是被block了,
在这段时间内,进程是被block的。而asynchronous IO则不一样,当进程发起IO 操作之后,就直接返回再也不理睬了,直到kernel发送一个信号,
告诉进程说IO完成。在这整个过程中,进程完全没有被block。