关于多线程操作。

对于IO操作,如访问网站,写入磁盘这种需要时间等待响应的操作,多个cpu也几乎不能提高效率。

对于CPU密集型操作,如这个格式转换,可以通过多个cpu同时去进行。

但是对于python来讲,python存在GIL全局解释器的锁,导致只有一个python线程能被解释器接收。所以等于python只能对IO操作使用线程操作。

1 #coding:utf8
 2 import csv
 3 from xml.etree.ElementTree import Element,ElementTree
 4 import requests
 5 from StringIO import StringIO
 6 from test_retractxml import pretty
 7 
 8 def download(url):
 9     #IO操作很慢,因为不能直接得到数据。如这步:是发送请求,等待数据,在等待的过程中让出CPU,自己睡眠。
10     response = requests.get(url,timeout=3)
11     if response.ok:
12         return StringIO(response.content)
13 
14 def csvToxml(scsv,fxml):
15     #这是CPU密集型操作,多个CPU可以同时操作
16     reader = csv.reader(scsv)
17     headers = reader.next()
18     headers = map(lambda h:h.replace(' ',''),headers)
19     
20     root = Element('Data')
21     for row in reader:
22         eRow = Element('Row')
23         root.append(eRow)
24         for tag,text in zip(headers,row):
25             e = Element(tag)
26             e.text = text
27             eRow.append(e)
28             
29     pretty(root)
30     et = ElementTree(root)
31     et.write(fxml)
32     
33     
34 def handle(sid):
35     print 'Download ...(%d)' % sid
36     url = 'http://table.finance.yahoo.com/table.csv?s=%s.sz'
37     url %= str(sid).rjust(6,'0')
38     rf = download(url)
39     if rf is None:return
40     
41     print 'Convert to XML...(%d)' % sid
42     fname = str(sid).rjust(6,'0')+'.xml'
43     with open(fname,'wb') as wf:
44         csvToxml(rf, wf)
45         
46 from threading import Thread
47 
48 '''
49 t = Thread(target=handle,args=(1,))
50 t.start()
51 
52 print 'main thread'
53 '''
54 class MyThread(Thread):
55     def __init__(self,sid):
56         Thread.__init__(self)
57         self.sid = sid
58         
59     def run(self):
60         handle(self.sid)
61 
62 threads = []
63 for i in xrange(1,11):
64     t = MyThread(i)
65     threads.append(t)
66     t.start()
67     
68 for t in threads:
69     t.join()
70     
71 print 'main thread'
72 #t.join()#阻塞函数,保证主线程在所有子线程结束后再退出
73 
74 
75 '''
76     #这是串行的方法
77     for sid in xrange(1,11):
78         print 'Download ...(%d)' % sid
79         url = 'http://table.finance.yahoo.com/table.csv?s=%s.sz'
80         url %= str(sid).rjust(6,'0')
81         rf = download(url)
82         if rf is None:continue
83         
84         print 'Convert to XML...(%d)' % sid
85         fname = str(sid).rjust(6,'0')+'.xml'
86         with open(fname,'wb') as wf:
87             csvToxml(rf, wf)
88 '''

 

线程间通信,可以用全局变量,但是不够安全,可以用Queue.Queue来存储通信内容。Queue作为线程安全的队列。

1 #coding:utf8
 2 import requests
 3 import csv
 4 from xml.etree.ElementTree import Element,ElementTree
 5 from test_retractxml import pretty
 6 from threading import Thread
 7 from StringIO import StringIO
 8 
 9 from Queue import Queue
10 
11 
12 class DownloadThread(Thread):
13     
14     def __init__(self,sid,queue):
15         Thread.__init__(self)
16         self.sid = sid
17         self.url = 'http://table.finance.yahoo.com/table.csv?s=%s.sz'
18         self.url %=str(sid).rjust(6,'0')
19         self.queue = queue
20     
21     def download(self,url):
22         response = requests.get(url,timeout=3)
23         if response.ok:
24             return StringIO(response.content)
25         
26     def run(self):
27         print'download',self.sid
28         data = self.download(self.url)
29         self.queue.put((self.sid,data))
30         
31             
32 class ConverThread(Thread):
33     def __init__(self,queue):
34         Thread.__init__(self)
35         self.queue = queue
36         
37     def csvToxml(self,rf,wf):
38         reader = csv.reader(rf)
39         headers = reader.next()
40         headers = map(lambda h:h.replace(' ',''),headers)
41         
42         root = Element('Data')
43         for row in reader:
44             eRow = Element('Row')
45             root.append(eRow)
46             for tag,text in zip(headers,row):
47                 e = Element(tag)
48                 e.text = text
49                 eRow.append(e)
50                 
51         pretty(root)
52         et = ElementTree(root)
53         et.write(wf)
54         
55     def run(self): 
56         while True:
57             sid,data = self.queue.get()
58             print 'Convert', sid
59             if sid  == -1:
60                 break
61             if data:
62                 fname = str(sid).rjust(6,'0')+'.xml'
63                 with open(fname,'wb') as wf:
64                     self.csvToxml(data, wf)        
65 
66 
67 
68 q = Queue()
69 dThreads = [DownloadThread(i,q) for i in xrange(1,11)]
70 cThread = ConverThread(q)
71 
72 for t in dThreads:#多个线程下载
73     t.start()
74     
75 cThread.start()#一个线程处理
76 
77 for t in dThreads:
78     t.join()
79 
80 q.put((-1,None))

 

由于全局锁GIL的存在,无法用多个线程来对cpu密集操作,所以此例子中是1,用多个线程来进行IO操作;2,将所有下载的内容传给1个线程进行转换。他们之间的交换是通过存入Queue这个安全队列里面。

 

而进程之间的的事件通知,需要调用thread库里的Event。事件的等待是Event.wait(),事件的响应是Event.set(),需要注意的是,set之后事件就不会再wait,需要Event.clear()来重新激活wait。要把等待,响应的逻辑弄清楚。

这节还引入了守护线程setDaemon的概念,当其值为True时 ,其他线程结束时,自身也会结束。

#coding:utf8class DownloadThread(Thread):
            ****
class ConverThread(Thread):
    def __init__(self,queue,cEvent,tEvent):
        Thread.__init__(self)
        self.queue = queue
        self.cEvent = cEvent
        self.tEvent = tEvent        
        
    def csvToxml(self,rf,wf):
         ****

def run(self): 
        count = 0
        while True:
            sid,data = self.queue.get()
            print 'Convert', sid
            if sid  == -1:
                self.cEvent.set()
                self.tEvent.wait()
                break
            if data:
                fname = str(sid).rjust(6,'0')+'.xml'
                with open(fname,'wb') as wf:
                    self.csvToxml(data, wf)        
                count += 1
                if count == 5:   #注意这里的逻辑
                    self.cEvent.set()#激活cEvent,表示转换完成
                    
                    self.tEvent.wait()#等待tEvent事件完成
                    self.tEvent.clear()#重新激活tEevent
                    count = 0
import tarfile
import os

class TarThread(Thread):
    def __init__(self,cEvent,tEvent):
        Thread.__init__(self)
        self.count = 0
        self.cEvent = cEvent
        self.tEvent = tEvent
        self.setDaemon(True)#守护线程,其他线程退出后,他也退出
        
    def tarXML(self):
        self.count += 1
        tfname = '%d.tgz'%self.count
        tf = tarfile.open(tfname,'w:gz')#打包命令,打包格式为gz
        for fname in os.listdir('.'):#遍历当前文件夹的文件
            if fname.endswith('.xml'):#找到.xml结尾的文件
                tf.add(fname)#添加到压缩包中
                os.remove(fname)#删除掉已添加加的文件
        tf.close()
        
        if not tf.members:#如果打包文件为空,则删除
            os.remove(tfname)
            
    def run(self):
        while True:
            self.cEvent.wait()#等待cEvent事件
            self.tarXML()
            self.cEvent.clear()#重新激活等待
            
            self.tEvent.set()#激活tEvent,表示完成打包
            
            
if __name__ == '__main__':
    q = Queue()
    dThreads =[DownloadThread(i,q) for i in xrange(1,11)]
    
    cEvent = Event()
    tEvent = Event()
    
    cThread = ConverThread(q,cEvent,tEvent)
    tThread = TarThread(cEvent,tEvent)
    tThread.start()#注意这里要start线程
    
    for t in dThreads:
        t.start()
    cThread.start()
    
    for t in dThreads:
        t.join()
        
    q.put((-1,None))
    print 'main thread'

 

本地线程这一章开始之后都是用了python3,我暂时还是想用python2来实现,所以先放一下,以后在回来补充。

线程池:pass

多进程:pass

:pass