针对较大规模的数据,可使用数据库进行存储。数据库是基于数据结构来组织、存储和管理数据的仓库,每个数据库都有一个或多个不同的API用于创建、访问、管理、搜索和复制所存储的数据。目前,Oracle旗下的Mysql是最流行的关系型数据库管理系统,其具有体积小、性能高、开源免费等特点。
本小节以程序5-3来演示如何将采集到的药监局国产药品数据存储到Mysql数据库中(读者需具备Java、Spring、SpringMVC、Mybatis以及Mysql基础)。请读者自行搭建SSM框架,然后参照下述三层代码将药监局国产药品数据插入到数据表中。

//程序5-3
//Controller层
@RequestMapping("/drug")
public class DrugController {
@Autowired
DrugImpl drugImpl;

@RequestMapping(value = "/drugAdd", method = RequestMethod.POST)
@ResponseBody
public void drugAdd(@RequestBody MedicineParams params) {
if (params.getStart() != null && params.getEnd() != null) {
drugImpl.drugAdd(params);
}
}
}
//Service层
public interface IDrug {
void drugAdd(MedicineParams params);
}

@Service
public class DrugImpl implements IDrug {
@Autowired
private DrugMapper drugMapper;

@Override
public void drugAdd(MedicineParams params) {
WebDriver driver = WebDriverUtils.getWebDriver();
List<Map<String, String>> excelMapList = new ArrayList<>();
for (int id = params.getStart(); id <= params.getEnd(); id++) {
driver.get(PathConstant.drug + id);
String source = driver.getPageSource();
Document document = Jsoup.parse(source);
Elements tbody = document.select("body > div > div > table:nth-child(1) > tbody");
Elements trs = tbody.get(0).getElementsByTag("tr");
//该id有数据
if (trs.size() > 4) {
//药监局数据入mysql数据库,无需对字段进行处理
Map<String, String> mysqlMap = new HashMap<>();
MysqlUtils.saveInMysql(trs, id, 3, mysqlMap);
drugMapper.insertSelective(mysqlMap);
}else{
//保存空id
}
}
driver.quit();
}
}

public class MysqlUtils {
/**
* 将药监局数据保存到mysql的通用方法
*
* @param trs 含有需要保存数据的Elements对象
* @param id 药监局网址中的id
* @param tailCount Elements对象中需要舍弃的子节点个数
*/
public static void saveInMysql(Elements trs, int id, int tailCount,Map<String,String> map) {
map.put("id",Integer.valueOf(id).toString());
for(int i = 1;i < trs.size() - tailCount;i++){
map.put("f" + i, Jsoup.parse(trs.get(i).child(1).text()).text());
}
}
}

public class WebDriverUtils {
public static WebDriver getWebDriver(){
System.setProperty("webdriver.chrome.driver","/Users/steven/Documents/chrome/chromedriver");
//声明使用的是谷歌浏览器
ChromeOptions chromeOptions=new ChromeOptions();
//实例化一个Chrome浏览器的实例
WebDriver driver = new ChromeDriver();
//设置打开的浏览器窗口最大化
driver.manage().window().maximize();
//设置隐性的等待时间
driver.manage().timeouts().pageLoadTimeout(30, TimeUnit.SECONDS);
return driver;
}
}
//Dao层
public interface DrugMapper {
int insertSelective(Map<String,String> record);
}

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.wind.medicineSpider.dao.mappers.DrugMapper">
<insert id="insertSelective" parameterType="HashMap">
insert into drug
<trim prefix="(" suffix=")" suffixOverrides=",">
<if test="id != null">
id,
</if>
<if test="f1 != null">
f1,
</if>
<if test="f2 != null">
f2,
</if>
<if test="f3 != null">
f3,
</if>
<if test="f4 != null">
f4,
</if>
<if test="f5 != null">
f5,
</if>
<if test="f6 != null">
f6,
</if>
<if test="f7 != null">
f7,
</if>
<if test="f8 != null">
f8,
</if>
<if test="f9 != null">
f9,
</if>
<if test="f10 != null">
f10,
</if>
<if test="f11 != null">
f11,
</if>
<if test="f12 != null">
f12,
</if>
<if test="f13 != null">
f13,
</if>
<if test="f14 != null">
f14,
</if>
</trim>
<trim prefix="values (" suffix=")" suffixOverrides=",">
<if test="id != null">
#{id,jdbcType=INTEGER},
</if>
<if test="f1 != null">
#{f1,jdbcType=VARCHAR},
</if>
<if test="f2 != null">
#{f2,jdbcType=VARCHAR},
</if>
<if test="f3 != null">
#{f3,jdbcType=VARCHAR},
</if>
<if test="f4 != null">
#{f4,jdbcType=VARCHAR},
</if>
<if test="f5 != null">
#{f5,jdbcType=VARCHAR},
</if>
<if test="f6 != null">
#{f6,jdbcType=VARCHAR},
</if>
<if test="f7 != null">
#{f7,jdbcType=VARCHAR},
</if>
<if test="f8 != null">
#{f8,jdbcType=VARCHAR},
</if>
<if test="f9 != null">
#{f9,jdbcType=VARCHAR},
</if>
<if test="f10 != null">
#{f10,jdbcType=VARCHAR},
</if>
<if test="f11 != null">
#{f11,jdbcType=VARCHAR},
</if>
<if test="f12 != null">
#{f12,jdbcType=VARCHAR},
</if>
<if test="f13 != null">
#{f13,jdbcType=VARCHAR},
</if>
<if test="f14 != null">
#{f14,jdbcType=VARCHAR},
</if>
</trim>
</insert>
</mapper>
//mysql脚本
DROP TABLE IF EXISTS `drug`;
CREATE TABLE `drug` (
`id` int(6) NOT NULL,
`f1` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
`f2` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
`f3` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
`f4` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
`f5` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
`f6` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
`f7` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
`f8` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
`f9` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
`f10` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
`f11` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
`f12` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
`f13` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
`f14` varchar(500) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Dynamic;
SET FOREIGN_KEY_CHECKS = 1;

启动程序后,在postman中发起如下图所示的请求,即可采集数据。

5.3 网络数据Mysql存储_chrome