一、前言

在当今数字化高速发展的时代,数据规模呈现爆炸式增长的态势。无论是电子商务平台的海量交易记录,社交媒体中的海量用户动态,还是企业级应用中的复杂业务数据,其数量都极为庞大。在这样的背景下,如何高效地处理和存储海量数据成为应用开发中至关重要的挑战。

Spring Boot 作为强大且成熟的开发框架,为构建稳定可靠的应用奠定了坚实基础。SpringData JPA 在数据操作方面提供了便捷高效的途径。而 ThreadPoolTaskExecutor 能够充分发挥多核 CPU 的优势,借助并发处理大幅提升数据插入的速度。

通过将这三者有机结合,我们能够构建一个快速、稳定且能够处理百万级数据批量插入的系统,满足业务对于数据处理的高性能和高可靠性要求。这不仅能够显著提升系统的整体性能,还能为用户带来更为流畅的使用体验,增强系统在激烈市场竞争中的核心竞争力。

二、示例项目

1.创建项目,添加依赖

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <parent>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-parent</artifactId>
        <version>2.7.17</version>
        <relativePath/> <!-- lookup parent from repository -->
    </parent>
    <groupId>com.example</groupId>
    <artifactId>DataProject</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <name>DataProject</name>
    <description>DataProject</description>
    <url/>
    <licenses>
        <license/>
    </licenses>
    <developers>
        <developer/>
    </developers>
    <scm>
        <connection/>
        <developerConnection/>
        <tag/>
        <url/>
    </scm>
    <properties>
        <java.version>1.8</java.version>
    </properties>
    <dependencies>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
        </dependency>
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>8.0.31</version>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-data-jpa</artifactId>
        </dependency>
        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-test</artifactId>
            <scope>test</scope>
        </dependency>
    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-maven-plugin</artifactId>
            </plugin>
        </plugins>
    </build>

</project>

2.添加项目配置

#数据库配置
spring.datasource.driver-class-name=com.mysql.cj.jdbc.Driver
spring.datasource.username=root
spring.datasource.password=123456
spring.datasource.url=jdbc:mysql://localhost:3306/my_db?useUnicode=true&characterEncoding=UTF-8&useSSL=false
#线程池配置
spring.task.executor.core-pool-size=100
spring.task.executor.max-pool-size=300
spring.task.executor.queue-capacity=99999
#jpa配置
spring.jpa.hibernate.ddl-auto=update
spring.jpa.show-sql=true
spring.datasource.hikari.maximum-pool-size=100
spring.datasource.hikari.connection-timeout=6000

3.创建实体类

package com.example.dataproject.entity;

import lombok.Getter;
import lombok.Setter;

import javax.persistence.*;

/**
 * @author qx
 * @date 2024/8/1
 * @des 测试实体
 */
@Entity
@Table(name = "t_data")
@Getter
@Setter
public class MyData {

    @Id
    @GeneratedValue(strategy = GenerationType.IDENTITY)
    private Long id;

    private String name;

    private String description;

}

4.数据持久层

package com.example.dataproject.dao;

import com.example.dataproject.entity.MyData;
import org.springframework.data.jpa.repository.JpaRepository;

/**
 * @author qx
 * @date 2024/8/1
 * @des 数据持久层
 */
public interface MyDataDao extends JpaRepository<MyData, Long> {
}

5.服务层

package com.example.dataproject.service;

import com.example.dataproject.dao.MyDataDao;
import com.example.dataproject.entity.MyData;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.stereotype.Service;

import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadPoolExecutor;

/**
 * @author qx
 * @date 2024/8/1
 * @des 服务层
 */
@Service
public class MyDataService {

    @Autowired
    private ThreadPoolTaskExecutor taskExecutor;
    @Autowired
    private MyDataDao myDataDao;

    public void batchInsertData(List<MyData> dataList) {
        //每次插入的数量
        int batchSize = 1000;
        List<Future<?>> futures = new ArrayList<>();
        for (int i = 0; i < dataList.size(); i += batchSize) {
            List<MyData> subList = dataList.subList(i, Math.min(i + batchSize, dataList.size()));
            futures.add(taskExecutor.submit((Callable<Void>) () -> {
                myDataDao.saveAll(subList);
                return null;
            }));
        }
        for (Future<?> future : futures) {
            try {
                future.get();
            } catch (InterruptedException | ExecutionException e) {
                throw new RuntimeException(e);
            }
        }
    }
}

6.线程池配置

package com.example.dataproject.config;

import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;

/**
 * @author qx
 * @date 2024/8/1
 * @des 线程池配置类
 */
@Configuration
public class ExecutorConfig {

    @Value("${spring.task.executor.core-pool-size}")
    private int corePoolSize;

    @Value("${spring.task.executor.max-pool-size}")
    private int maxPoolSize;

    @Value("${spring.task.executor.queue-capacity}")
    private int queueCapacity;

    @Bean
    public ThreadPoolTaskExecutor taskExecutor() {
        ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
        executor.setCorePoolSize(corePoolSize);
        executor.setMaxPoolSize(maxPoolSize);
        executor.setQueueCapacity(queueCapacity);
        return executor;
    }
}

7.创建控制层

package com.example.dataproject.controller;

import com.example.dataproject.entity.MyData;
import com.example.dataproject.service.MyDataService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RestController;

import java.util.ArrayList;
import java.util.List;
import java.util.Random;

/**
 * @author qx
 * @date 2024/8/1
 * @des 控制层
 */
@RestController
public class MyDataController {

    @Autowired
    private MyDataService myDataService;

    /**
     * 批量插入数据
     */
    @PostMapping("/batchInsert")
    public String batchInsertData(@RequestBody List<MyData> dataList) {
        myDataService.batchInsertData(dataList);
        return "batchInsert success";
    }

    /**
     * 生成数据
     */
    @GetMapping("/generateData")
    public List<MyData> generateData() {
        List<MyData> dataList = new ArrayList<>();
        Random random = new Random();
        MyData myData;
        for (int i = 0; i < 1000000; i++) {
            myData = new MyData();
            myData.setName("name:" + i);
            myData.setDescription("desc:" + random.nextInt());
            dataList.add(myData);
        }
        return dataList;
    }

}

8.启动项目进行测试

我们先生成需要插入的数据。

SpringBoot+ThreadPoolTaskExecutor实现批量插入百万级数据_Executor

然后在postman中进行数据的批量插入。

SpringBoot+ThreadPoolTaskExecutor实现批量插入百万级数据_SpringBoot_02

点击send进行批量插入测试。

SpringBoot+ThreadPoolTaskExecutor实现批量插入百万级数据_ThreadPool_03

我们查看数据库发现批量数据添加成功。

SpringBoot+ThreadPoolTaskExecutor实现批量插入百万级数据_Executor_04