#coding=utf-8
import Queue
import threading
import urllib2
import time
from BeautifulSoup import BeautifulSoup
#url读取队列
queue = Queue.Queue()
#http数据读取队列
out_queue = Queue.Queue()
class ThreadUrl(threading.Thread):
    """Threaded Url Grab"""
    def __init__(self, queue, out_queue):
        threading.Thread.__init__(self)
        #url读取队列
        self.queue = queue
        #http数据读取队列
        self.out_queue = out_queue
    def run(self):
        while True:
            #grabs host from queue
            #从队列中读取url数据
            host = self.queue.get()
            #grabs urls of hosts and then grabs chunk of webpage
            url = urllib2.urlopen(host)
            chunk = url.read()
            #place chunk into out queue
            #将http数据放入列队
            self.out_queue.put(chunk)
            #signals to queue job is done
            #发送信号给队列
            self.queue.task_done()
class DatamineThread(threading.Thread):
    """Threaded Url Grab"""
    def __init__(self, out_queue):
        threading.Thread.__init__(self)
        #http数据读取队列
        self.out_queue = out_queue
    def run(self):
        while True:
            #grabs host from queue
            #从队列中读取http数据
            chunk = self.out_queue.get()
            #parse the chunk
            soup = BeautifulSoup(chunk)
            print soup.findAll(['title'])
            #signals to queue job is done
            #发送信息给队列
            self.out_queue.task_done()
start = time.time()
def main():
    #spawn a pool of threads, and pass them queue instance
    #生成线程池,传入url列队与http数据读取列队
    for i in range(5):
        t = ThreadUrl(queue, out_queue)
        t.setDaemon(True)
        t.start()
    #populate queue with data
    #将url数据加入队列
    for host in hosts:
        queue.put(host)
    #启动线程池处理列队中的http数据
    for i in range(5):
        dt = DatamineThread(out_queue)
        dt.setDaemon(True)
        dt.start()

    #wait on the queue until everything has been processed
    #等待列队数据处理完成
    queue.join()
    out_queue.join()
main()
#输入处理时间
print "Elapsed Time: %s" % (time.time() - start)