接口:
https://cx.shouji.360.cn/?number=str(电话号码)
接口推荐理由
- 上一篇文章中,我们有用https://www.so.com/s?ie=utf-8&q="+ str(电话号码)尝试获取我们想要的结果,可用,但是接口反爬机制严格,获取不了几条就封停,需要构建代理ip池和间隔访问时间。
- 这个接口非常稳定,基本上没有反爬机制,不用代理ip就可完成我们想要的结果,更简单,稳定,高效。
数据源:
可以座机或者手机,如果是非电话号码会自动剔除
结果展示:
代码
# coding: utf-8
# -*- coding: utf-8 -*-
import urllib.request
import urllib.parse
from urllib.error import HTTPError ,URLError
import socket #请求超时异常
from bs4 import BeautifulSoup
import requests
import time
import json
import csv
import re
import random
import datetime
import pandas as pd
def response(url):
try:
headers = { }
headers["User-Agent"]="Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36"
#random.seed(datetime.datetime.now())
req = urllib.request.Request(url, headers=headers)
bsobj = urllib.request.urlopen(req, timeout=60).read().decode("utf-8")
#bsobj = BeautifulSoup(html,"html.parser")
jsonobj = json.loads(bsobj)
#print(jsonobj)
provinces = jsonobj.get("data").get("province")
if len(provinces) > 0:
province = provinces
else:
if len(provinces) < 1 and len(str(re.match('00852', telephone))) != 4:
province = "香港"
elif len(provinces) < 1 and len(str(re.match('00886', telephone)))!= 4:
province = "台湾"
else:
province = "未知"
citys = jsonobj.get("data").get("city")
if len(citys)>0:
city = citys
else:
city = province
sps = jsonobj.get("data").get("sp")
if len(sps)>0:
sp = sps
elif len(sps)<1 and len(provinces)>0:
sp = "座机"
else:
sp = province
print(telephone,province,city,sp)
writer.writerows([[str(telephone),province,city,sp]])
except (HTTPError, URLError, socket.timeout, AttributeError,UnicodeEncodeError,TypeError) as e:
return
if __name__ == '__main__':
# 创建一个存储数据的表
file_name = r"D:\Case_data/telephone_r" + ".csv"
f = open(file_name, "w+", newline='',encoding = 'gb18030')
writer = csv.writer(f, dialect='excel')
# 先写入columns_name
writer.writerow(['电话', '省份', '市级', '分类'])
# 打开数据源
read_file_path=r'D:/Case_data/telephone.xlsx'
df = pd.read_excel(read_file_path,encoding='utf-8')
df["电话"] = df["电话"].replace('-', "", regex=True) #单个值的替换
display(df.tail(10))
for telephone in df["电话"]:
#print(telephone)
time.sleep(0)
urls = r"https://cx.shouji.360.cn/phonearea.php?number=" + str(telephone)
#print(urls)
response(urls)
f.close()
扫
码
关
注
更多数据分析与运营知识
干货在此,随时学习!