mysql信息:ip: 10.1.0.39  password:saurav48@jp  database:ip_lib

第一步:下载ip段归属数据库superadmin.csv

网站http://www.iac-i.org

第二步:准备好IP地理信息标准库地域编码表.xlsx

第三步:以superadmin.csv表为中心,关联地域信息表和高校信息表;已经写好脚本(执行脚本需要20分钟)

脚本如下:

# -*- coding: utf-8 -*-
#!/bin/env python
# function: make ip list for mysql
# write : lejie
# version: 1.0
# date: 2015-12-08
#open the file

#iplist = open('superadmin_utf8.csv','r')
#city = open('city_utf8.csv','r')
#school = open('school_utf8.csv','r')
iplist = open('superadmin.csv','r')
#iplist = open('super.csv','r')
city = open('city.csv','r')
school = open('school.csv','r')
end_file = open('iplib.csv','w+')
#readline and match

ip_lines = iplist.readlines()
city_lines = city.readlines()
school_lines = school.readlines()
tmp1 = [' ',' ',' ']
tmp2 = [' ',' ',' ',' ']

#print "start_ip,end_ip,region_code,edu_college_code,country,province,city,edu_city,edu_college"
head = "start_ip,end_ip,region_code,edu_college_code,country,province,city,edu_city,edu_college"
end_file.write(head+'\n')

for line_ip in ip_lines:
    #line_ip = line_ip.decode('utf-8')
    line_ip=line_ip.strip()
    split_ip=line_ip.split(',')
    split_ip1 = split_ip[:]
    #print split_ip
#
    for line_city in city_lines:
        line_city = line_city.decode('gbk')
        line_city = line_city.strip()
        split_city = line_city.split(',')
        #if split_city[0].strip() == u'全球' and (split_city[1].strip() != u"中国" or split_city[1].strip() != u"台湾" or split_city[1].strip() != u"澳门"):
        if split_city[0].strip() == u'全球' :
            #print "****",split_city[1].strip(),u"澳门"
            if split_city[1].strip() == u"中国" or split_city[1].strip() == u"台湾" or split_city[1].strip() == u"澳门" or split_city[1].strip() == u"香港":
        #if split_city[0].strip() == '\u5168\u7403':
                split_city[0] = u"中国,"+split_city[1]
            else:
                split_city[0] = split_city[1]+u",国外"
                split_city[1] = u"国外"
        #elif split_city[0].strip() != u'全球' and (split_city[1].strip() != u"中国" or split_city[1].strip() != u"台湾" or split_city[1].strip() != u"澳门"):
        else :
            #if split_city[1].strip() != u"中国" or split_city[1].strip() != u"台湾" or split_city[1].strip() != u"澳门" or split_city[1].strip() != u"香港":
            split_city[0] = u"中国,"+split_city[0]
            #else:
            #split_city[0] = u"中国,"+split_city[1]
        #print "****",split_city[0]
        #print split_ip1[2],split_city[2]
        #print split_city
        if split_ip1[2] == split_city[2]:
            #print "*************"
            #print split_ip1
            #split_ip1 = split_ip
            split_ip1.extend(split_city)
            #print split_ip1
            break
            #all_tmp.write("".join(alllist))
    if cmp(split_ip1,split_ip) == 0 :
        #print "#######"
        #print split_ip1
        #split_ip1 = split_ip
        split_ip1.extend(tmp1)
    #print "#######"
    #print split_ip1
        #all_tmp.write("".join(alllist))
    #end = ","
    #end = end.join(split_ip1)
    #print end.encode('GBK')
    #print end
###
    split_ip2 = split_ip1[:]
    if split_ip1[3] != 0:
        for line_school in school_lines:
        line_school = line_school.decode('gbk')
        line_school = line_school.strip()
        split_school = line_school.split(',')
        #print split_ip[3],split_school[3]
        if split_ip1[3] == split_school[3]:
            split_ip1.extend(split_school)
            break
        if cmp(split_ip1,split_ip2) == 0 :
        split_ip1.extend(tmp2)
    else:
        split_ip1.extend(tmp2)
    
    #print "+++++++++++"
    #print split_ip2
    all = [split_ip1[0],split_ip1[1],split_ip1[2],split_ip1[3],split_ip1[4],split_ip1[5],split_ip1[8],split_ip1[9]]
    end = ","
    end = end.join(all)
    end = end.encode('utf8')
    #print end
    end_file.write(end+'\n')
    #print split_ip1[0].encode('gbk'),",",split_ip1[1].encode('gbk'),",",split_ip1[2].encode('gbk'),",",split_ip1[3].encode('gbk'),",",split_ip1[4].encode('gbk'),",",split_ip1[5].encode('gbk'),",",split_ip1[8].encode('gbk'),",",split_ip1[9].encode('gbk')
    #print split_ip1[0],",",split_ip1[1],",",split_ip1[2],",",split_ip1[3],",",split_ip1[4],",",split_ip1[5],",",split_ip1[8],",",split_ip1[9]
    #print split_ip1[0].encode('utf-8'),",",split_ip1[1].encode('uft-8'),",",split_ip1[2].encode('utf-8'),",",split_ip1[3].encode('utf-8'),",",split_ip1[4].encode('utf-8'),",",split_ip1[5].encode('utf-8'),",",split_ip1[8].encode('utf-8'),",",split_ip1[9].encode('utf-8')

iplist.close()
city.close()
school.close()
end_file.close()

数据库结构:

create table ip_lib(

id int(11) NOT NULL  auto_increment,

start_ip varchar(15),

end_ip varchar(15),

region_code int(10),

edu_college_code  int(10) ,

country varchar(45),

province varchar(20),

city varchar(36),

edu_city varchar(17),

edu_college varchar(45)

primary key(id));



导入csv文件语法:

LOAD DATA LOCAL INFILE '/tmp/iplib.csv'

INTO TABLE ip_lib

FIELDS TERMINATED BY ','

lines terminated by '\n'

ignore 1 lines;