java获取 行政区划获取到街道
- 前言
- 代码获取数据
- 引入依赖
- 构建数据类
- 获取全部省份数据
- 获取每个省份的数据
- 文件保存
前言
这几短时间需要获取行政区划,在网上看了很多的网站大部分都是直接网页直接加载,一个一个界面获取太麻烦所以自己录了段代码,自己获取行政区划。
之前试着做了一段java的爬虫,很不幸没做成功,但是这也对我这段代码做了铺垫吧。数据处理都是那时候学的,所以处理起来不算太难。
代码获取数据
引入依赖
引入jsoup用来处理页面数据
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.12.1</version>
</dependency>
构建数据类
构建基础类用来存放
@Data
@Builder
static class Area {
/**
* 街道编码
*/
private String streetCode;
/**
* 街道名称
*/
private String streetName;
/**
* 区县编码
*/
private String districtCode;
/**
* 区县名称
*/
private String districtName;
/**
* 城市编码
*/
private String cityCode;
/**
* 城市名称
*/
private String cityName;
/**
* 省份编码
*/
private String provinceCode;
/**
* 省份名称
*/
private String provinceName;
}
获取全部省份数据
从这里获取全部省份的数据
这个界面中全部数据主要获取的每个省的行政代码,因为行政代码需要用来获取每个省的数据的相关参数。从金额面代码中可以看出来,根据id为’example‘的节点,可以获取数据的表格,然后根据标签获取每一行数据,然后根据td获取每个节点的数据
private List<Area> getData() {
try {
URL url = new URL("https://www.tianzhicun.com");
HttpsURLConnection con = (HttpsURLConnection) url.openConnection();
// https请求
X509TrustManager xtm = new X509TrustManager() {
@Override
public X509Certificate[] getAcceptedIssuers() {
// TODO Auto-generated method stub
return null;
}
@Override
public void checkServerTrusted(X509Certificate[] arg0, String arg1)
throws CertificateException {
// TODO Auto-generated method stub
}
@Override
public void checkClientTrusted(X509Certificate[] arg0, String arg1)
throws CertificateException {
// TODO Auto-generated method stub
}
};
TrustManager[] tm = {xtm};
SSLContext ctx = SSLContext.getInstance("TLS");
ctx.init(null, tm, null);
con.setSSLSocketFactory(ctx.getSocketFactory());
con.setHostnameVerifier(new HostnameVerifier() {
@Override
public boolean verify(String arg0, SSLSession arg1) {
return true;
}
});
InputStream inStream = con.getInputStream();
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int len = 0;
while ((len = inStream.read(buffer)) != -1) {
outStream.write(buffer, 0, len);
}
byte[] b = outStream.toByteArray();//网页的二进制数据
outStream.close();
inStream.close();
// 返回的html界面字符串
String rtn = new String(b, "utf-8");
Document document = Jsoup.parse(rtn);
// 获取example下的表格
Element postList = document.getElementById("example");
// 获取tr元素每一行数据代表一条数据
Elements postItems = postList.getElementsByTag("tr");
//存储查询到的数据
List<Area> list = new ArrayList<>();
// 前两个数据为标题头的数据所以舍弃掉了
int i = 0;
for (Element postItem : postItems) {
i++;
if (i < 3) {
continue;
}
Area area = Area.builder()
.provinceCode(postItem.getElementsByTag("td").get(1).text())
.build();
list.add(area);
}
return list;
} catch (Exception e) {
e.printStackTrace();
System.out.println(string);
}
return null;
}
获取每个省份的数据
List<Area> list = new ArrayList<>();
for (Area area : areas) {
String code = area.getProvinceCode();
// 处理逻辑和上面相同
URL url = new URL("https://www.tianzhicun.com/province/" + code + "/town");
HttpsURLConnection con = (HttpsURLConnection) url.openConnection();
X509TrustManager xtm = new X509TrustManager() {
@Override
public X509Certificate[] getAcceptedIssuers() {
// TODO Auto-generated method stub
return null;
}
@Override
public void checkServerTrusted(X509Certificate[] arg0, String arg1)
throws CertificateException {
// TODO Auto-generated method stub
}
@Override
public void checkClientTrusted(X509Certificate[] arg0, String arg1)
throws CertificateException {
// TODO Auto-generated method stub
}
};
TrustManager[] tm = {xtm};
SSLContext ctx = SSLContext.getInstance("TLS");
ctx.init(null, tm, null);
con.setSSLSocketFactory(ctx.getSocketFactory());
con.setHostnameVerifier(new HostnameVerifier() {
@Override
public boolean verify(String arg0, SSLSession arg1) {
return true;
}
});
InputStream inStream = con.getInputStream();
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int len = 0;
while ((len = inStream.read(buffer)) != -1) {
outStream.write(buffer, 0, len);
}
byte[] b = outStream.toByteArray();//网页的二进制数据
outStream.close();
inStream.close();
String rtn = new String(b, "utf-8");
// System.out.println(rtn);
Document document = Jsoup.parse(rtn);
//像js一样,通过标签获取title
// System.out.println(document.getElementsByTag("title").first());
//像js一样,通过id 获取文章列表元素对象
Element postList = document.getElementById("example");
//像js一样,通过class 获取列表下的所有博客
Elements postItems = postList.getElementsByTag("tr");
int i = 0;
for (Element postItem : postItems) {
i++;
if (i < 3) {
continue;
}
//处街道编码,区域编码都在<a>标签中,并且带有链接 需哦一需要对器做特殊处理 先获取a标签在获取线面href的链接在把非编码替换掉
Area area = Area.builder()
.streetName(postItem.getElementsByTag("td").get(0).text())
.streetCode(postItem.getElementsByTag("td").get(1).text())
.districtCode(postItem.getElementsByTag("td").get(5).getElementsByTag("a").attr("href").replace("/province/city/county/", ""))
.districtName(postItem.getElementsByTag("td").get(5).text())
.cityCode(postItem.getElementsByTag("td").get(4).getElementsByTag("a").attr("href").replace("/province/city/", ""))
.cityName(postItem.getElementsByTag("td").get(4).text())
.provinceCode(postItem.getElementsByTag("td").get(3).getElementsByTag("a").attr("href").replace("/province/", ""))
.provinceName(postItem.getElementsByTag("td").get(3).text())
.build();
list.add(area);
}
} catch (Exception e) {
e.printStackTrace();
System.out.println(string);
}
}
此时数据已经获取完毕
此时json很大所以添加了保存到文件中
文件保存
使用poi展示数据
HSSFWorkbook wb = new HSSFWorkbook();
// 全部区域到街道
HSSFSheet streetSheet = wb.createSheet("街道");
for (int i = 0; i < list.size(); i++) {
Area area = list.get(i);
HSSFRow streetRow = streetSheet.createRow(i);
HSSFCell cell00 = streetRow.createCell(0);
cell00.setCellValue(area.getProvinceName());
HSSFCell cell10 = streetRow.createCell(1);
cell10.setCellValue(area.getProvinceCode());
HSSFCell cell20 = streetRow.createCell(2);
cell20.setCellValue(area.getCityName());
HSSFCell cell30 = streetRow.createCell(3);
cell30.setCellValue(area.getCityCode());
HSSFCell cell40 = streetRow.createCell(4);
cell40.setCellValue(area.getDistrictName());
HSSFCell cell50 = streetRow.createCell(5);
cell50.setCellValue(area.getDistrictCode());
HSSFCell cell60 = streetRow.createCell(6);
cell60.setCellValue(area.getStreetName());
HSSFCell cell70 = streetRow.createCell(7);
cell70.setCellValue(area.getStreetCode());
}
FileOutputStream fout = new FileOutputStream("D:/区域.xls");
wb.write(fout);
在这里插入图片描述