mysql信息:ip: 10.1.0.39 password:saurav48@jp database:ip_lib
第一步:下载ip段归属数据库superadmin.csv
第二步:准备好IP地理信息标准库地域编码表.xlsx
第三步:以superadmin.csv表为中心,关联地域信息表和高校信息表;已经写好脚本(执行脚本需要20分钟)
脚本如下:
# -*- coding: utf-8 -*-
#!/bin/env python
# function: make ip list for mysql
# write : lejie
# version: 1.0
# date: 2015-12-08
#open the file
#iplist = open('superadmin_utf8.csv','r')
#city = open('city_utf8.csv','r')
#school = open('school_utf8.csv','r')
iplist = open('superadmin.csv','r')
#iplist = open('super.csv','r')
city = open('city.csv','r')
school = open('school.csv','r')
end_file = open('iplib.csv','w+')
#readline and match
ip_lines = iplist.readlines()
city_lines = city.readlines()
school_lines = school.readlines()
tmp1 = [' ',' ',' ']
tmp2 = [' ',' ',' ',' ']
#print "start_ip,end_ip,region_code,edu_college_code,country,province,city,edu_city,edu_college"
head = "start_ip,end_ip,region_code,edu_college_code,country,province,city,edu_city,edu_college"
end_file.write(head+'\n')
for line_ip in ip_lines:
#line_ip = line_ip.decode('utf-8')
line_ip=line_ip.strip()
split_ip=line_ip.split(',')
split_ip1 = split_ip[:]
#print split_ip
#
for line_city in city_lines:
line_city = line_city.decode('gbk')
line_city = line_city.strip()
split_city = line_city.split(',')
#if split_city[0].strip() == u'全球' and (split_city[1].strip() != u"中国" or split_city[1].strip() != u"台湾" or split_city[1].strip() != u"澳门"):
if split_city[0].strip() == u'全球' :
#print "****",split_city[1].strip(),u"澳门"
if split_city[1].strip() == u"中国" or split_city[1].strip() == u"台湾" or split_city[1].strip() == u"澳门" or split_city[1].strip() == u"香港":
#if split_city[0].strip() == '\u5168\u7403':
split_city[0] = u"中国,"+split_city[1]
else:
split_city[0] = split_city[1]+u",国外"
split_city[1] = u"国外"
#elif split_city[0].strip() != u'全球' and (split_city[1].strip() != u"中国" or split_city[1].strip() != u"台湾" or split_city[1].strip() != u"澳门"):
else :
#if split_city[1].strip() != u"中国" or split_city[1].strip() != u"台湾" or split_city[1].strip() != u"澳门" or split_city[1].strip() != u"香港":
split_city[0] = u"中国,"+split_city[0]
#else:
#split_city[0] = u"中国,"+split_city[1]
#print "****",split_city[0]
#print split_ip1[2],split_city[2]
#print split_city
if split_ip1[2] == split_city[2]:
#print "*************"
#print split_ip1
#split_ip1 = split_ip
split_ip1.extend(split_city)
#print split_ip1
break
#all_tmp.write("".join(alllist))
if cmp(split_ip1,split_ip) == 0 :
#print "#######"
#print split_ip1
#split_ip1 = split_ip
split_ip1.extend(tmp1)
#print "#######"
#print split_ip1
#all_tmp.write("".join(alllist))
#end = ","
#end = end.join(split_ip1)
#print end.encode('GBK')
#print end
###
split_ip2 = split_ip1[:]
if split_ip1[3] != 0:
for line_school in school_lines:
line_school = line_school.decode('gbk')
line_school = line_school.strip()
split_school = line_school.split(',')
#print split_ip[3],split_school[3]
if split_ip1[3] == split_school[3]:
split_ip1.extend(split_school)
break
if cmp(split_ip1,split_ip2) == 0 :
split_ip1.extend(tmp2)
else:
split_ip1.extend(tmp2)
#print "+++++++++++"
#print split_ip2
all = [split_ip1[0],split_ip1[1],split_ip1[2],split_ip1[3],split_ip1[4],split_ip1[5],split_ip1[8],split_ip1[9]]
end = ","
end = end.join(all)
end = end.encode('utf8')
#print end
end_file.write(end+'\n')
#print split_ip1[0].encode('gbk'),",",split_ip1[1].encode('gbk'),",",split_ip1[2].encode('gbk'),",",split_ip1[3].encode('gbk'),",",split_ip1[4].encode('gbk'),",",split_ip1[5].encode('gbk'),",",split_ip1[8].encode('gbk'),",",split_ip1[9].encode('gbk')
#print split_ip1[0],",",split_ip1[1],",",split_ip1[2],",",split_ip1[3],",",split_ip1[4],",",split_ip1[5],",",split_ip1[8],",",split_ip1[9]
#print split_ip1[0].encode('utf-8'),",",split_ip1[1].encode('uft-8'),",",split_ip1[2].encode('utf-8'),",",split_ip1[3].encode('utf-8'),",",split_ip1[4].encode('utf-8'),",",split_ip1[5].encode('utf-8'),",",split_ip1[8].encode('utf-8'),",",split_ip1[9].encode('utf-8')
iplist.close()
city.close()
school.close()
end_file.close()
数据库结构:
create table ip_lib(
id int(11) NOT NULL auto_increment,
start_ip varchar(15),
end_ip varchar(15),
region_code int(10),
edu_college_code int(10) ,
country varchar(45),
province varchar(20),
city varchar(36),
edu_city varchar(17),
edu_college varchar(45)
primary key(id));
导入csv文件语法:
LOAD DATA LOCAL INFILE '/tmp/iplib.csv'
INTO TABLE ip_lib
FIELDS TERMINATED BY ','
lines terminated by '\n'
ignore 1 lines;