import re
import urllib
def main():
die= {'d':'0', 'c':'1' ,'k':'2' ,'z':'3' ,'m':'4' ,'b':'5', 'w':'6', 'i':'7', 'r':'8', 'l':'9'}
headers = ('User-Agent','Mozilla/5.0 (Windows NT 5.1; rv:14.0) Gecko/20100101 Firefox/14.0.1')
opener = urllib.URLopener()
opener.addheaders = [headers]
f = open(r'E:/ip.txt','w')
for page in range(1,11):
url = "http:///res/ip/page_"+str(page)+".php"
data = opener.open(url).read()
data = data.decode('GBK')
ip_list = re.findall(re.compile(r'<td>(.*)<S'),data)
port_list = re.findall(re.compile(r'\"(\+.*?\+.)\)'),data)
for x in range(len(ip_list)):
port_list[x] = port_list[x].replace('+', '')
if 'e' in port_list[x] or 'x' in port_list[x] or 'a' in port_list[x] or 'f' in port_list[x]:
s = ip_list[x]
else:
s = (ip_list[x] + ':' + port_list[x])
for key in die:
s = s.replace(key, die[key])
f.write(s + '\n')
print(s)
print(len(ip_list))
f.close()
if __name__ == '__main__':
main()