import urllib.request  
import socket  
import re  
import sys  
import os
import socket  
import random
import threading
targetDir = r"C:\pic"  
def destFile(path):  
    if not os.path.isdir(targetDir):  
        os.mkdir(targetDir)  
    pos = path.rindex('/')
    a = random.randint(1,10000)
    b = '%d' %a  
    t = os.path.join(targetDir,  b+path[pos+1:]) 
    return t  
def getPic(link):
    try:
        urllib.request.urlretrieve(link, destFile(link))
    except:
        pass     
if __name__ == "__main__":  
    m=4
    for i in range(86981,131306):#71460,131306
         hostname = "http://www.xxx.com/html/tupian/xxx/%d.html" %(i)  
         req = urllib.request.Request(hostname)  
         if(m==4):
             try:  
                webpage = urllib.request.urlopen(req)
                
             except:
                print(i)
                m=0
                continue
         else:
             m=m+1
             continue
         contentBytes = webpage.read()    
         print(i)
         print("*************************************")
         s=0;
         threads = []
        # for k in range(10):
         for link, t in set(re.findall(r'(http:[^\s]*?(jpg|png|gif))', str(contentBytes))):  
            print(link)
            try:
                socket.setdefaulttimeout(2)
                urllib.request.urlopen(link)
            except:
                break
            try:
                d=threading.Thread(target=getPic,args=(link,))
                threads.append(d)
                #d.start()
                #   urllib.request.urlretrieve(link, destFile(link))
            except:
                pass
            s=s+1
         for c in range(s):
             threads[c].start()
             print(c)