我看好多人写着玩,下载下来没执行成功(我没看代码不知原因),然后自己写个玩玩 -_#
请自行在代码目录下 创建 teba文件夹。。
当然了 口味重的 可以把 贴吧地址 改成 LES 吧 或者 同志吧 之类的


#!/usr/bin/env python
# -*- coding:utf-8 -*-

import urllib2
import re
import sys
from threading import Thread
import time
import random
import hashlib

class tieba(object):
    
    url = None
    dirPath = None
    __md5 = None

    def __init__(self):
        self.url = "http://tieba.baidu.com/f?kw=%BD%E3%CD%D1&tp=0&pn="
        self.dirPath = sys.path[0] + "/tieba/"
        self.__md5 = hashlib.md5()

    def getImages(self, page):
        url = self.url + str(page*50)
        req = urllib2.Request(url)
        res = urllib2.urlopen(url)
        html = res.read()
        rc = '<img src="[^"]*" original="[^"]*"  bpic="([^"]*)"[^>]*\/>'
        html = re.findall(rc, html, re.MULTILINE | re.DOTALL)
        return html

    def saveImg(self, images):
        for i in images:
            rand = str(random.randint(1, 10000)) + i
            self.__md5.update(rand)
            fname = self.__md5.hexdigest()
            fname = self.dirPath + fname + ".jpg"

            req = urllib2.Request(i)
            res = urllib2.urlopen(i)
            pic = res.read()

            f = open(fname, "wb");
            f.write(pic);
            f.close()

class catch(Thread):
    startPage = None
    endPage = None

    def __init__(self, start, end):
        Thread.__init__(self)
        self.startPage = start
        self.endPage = end
        
    def run(self):
        loop = range(self.startPage, self.endPage + 1)
        for i in loop:
            t = tieba()
            imgs = t.getImages(i)
            t.saveImg(imgs)
            print "get page %d success" % i
            sys.stdout.flush()

if __name__ == '__main__':
    maxPage = 500
    threadSum = 50
    if threadSum > maxPage:
        threadSum = maxPage
    urlCount = maxPage / threadSum
    
    for i in range(0, threadSum):
        c = catch(i * urlCount, (i + 1)* urlCount - 1)
        c.start()