Job Repository来存储Job执行期的元数据,并提供两种默认实现。一种是存放在内存中,默认实现类为MapJobRepositoryFactoryBean;另一种是存入在数据库中。这里是使用数据库来存储Job执行期间的元数据的项目举例。
一、项目创建步骤
1.项目结构
BatchMain.java:
package com.xj.demo2;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobExecution;
import org.springframework.batch.core.JobParameters;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
/**
* @Author : xjfu
* @Date : 2021/11/04 20:01
* @Description :demo2的启动主类
*/
public class BatchMain {
public static void main(String[] args) {
ApplicationContext context = new ClassPathXmlApplicationContext("demo2/job/demo2-job.xml");
//Spring Batch的作业启动器,
JobLauncher launcher = (JobLauncher) context.getBean("jobLauncher");
//在batch.xml中配置的一个作业
Job job = (Job)context.getBean("billJob");
try{
//开始执行这个作业,获得处理结果(要运行的job,job参数对象)
JobExecution result = launcher.run(job, new JobParameters());
System.out.println(result.toString());
}catch (Exception e){
e.printStackTrace();
}
}
}
CreditBill.java:
package com.xj.demo2;
/**
* @Author : xjfu
* @Date : 2021/11/04 19:27
* @Description :demo2的CreditBill
*/
public class CreditBill {
//银行卡账户ID
private String accountID = "";
//持卡人姓名
private String name = "";
//消费金额
private double amount = 0;
//消费日期
private String date = "";
//消费场所
private String address = "";
public String getAccountID() {
return accountID;
}
public void setAccountID(String accountID) {
this.accountID = accountID;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public double getAmount() {
return amount;
}
public void setAmount(double amount) {
this.amount = amount;
}
public String getDate() {
return date;
}
public void setDate(String date) {
this.date = date;
}
public String getAddress() {
return address;
}
public void setAddress(String address) {
this.address = address;
}
@Override
public String toString() {
return this.accountID + "," + this.name + "," + this.amount + "," + this.date + "," + this.address;
}
}
CreditBillProcessor:
package com.xj.demo2;
import org.springframework.batch.item.ItemProcessor;
/**
* @Author : xjfu
* @Date : 2021/11/04 19:29
* @Description :demo2的处理类
*/
public class CreditBillProcessor implements ItemProcessor<CreditBill, CreditBill> {
@Override
public CreditBill process(CreditBill bill) throws Exception {
System.out.println(bill.toString());
//做一些简单的处理
bill.setAccountID(bill.getAccountID() + "1");
bill.setName(bill.getName() + "2");
bill.setAmount(bill.getAmount() + 3);
bill.setDate(bill.getDate() + "4");
bill.setAddress(bill.getAddress() + 5);
return bill;
}
}
demo2-inputFile.csv:
4101231234656,tom,100.00,2013-12-31 12:00:08,Lu lit
4101236543210,tom,120.00,2013-12-31 12:00:08,Lu Zui
demo2-job.xml:
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:batch="http://www.springframework.org/schema/batch"
xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd http://www.springframework.org/schema/batch http://www.springframework.org/schema/batch/spring-batch.xsd">
<!--导入文件-->
<import resource="classpath:demo2/job/demo2-jobContext.xml"/>
<!--定义名字为billJob的作业-->
<batch:job id="billJob">
<!--定义名字为billStep的作业步-->
<batch:step id="billStep">
<batch:tasklet transaction-manager="transactionManager">
<!--定义读、处理、写操作,规定每处理两条数据,进行一次写入操作,这样可以提高写的效率-->
<batch:chunk reader="csvItemReader" processor="creditBillProcessor" writer="csvItemWriter" commit-interval="2">
</batch:chunk>
</batch:tasklet>
</batch:step>
</batch:job>
</beans>
demo2-jobContext.xml:
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:batch="http://www.springframework.org/schema/batch"
xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd http://www.springframework.org/schema/batch http://www.springframework.org/schema/batch/spring-batch.xsd">
<!--
data-source:定义数据源,默认dataSource
transaction-manager:定义事务管理器
isolation-level-for-create:定义创建Job Execution时候的事务隔离级别,避免多个Job Execution执行一个Job Instance,默认SERIALIZABLE
table_prefix:定义使用的数据库表的前缀为BATCH_,默认BATCH_
max-varchar-length:定义varchar的最大长度为1000,默认值为2500
-->
<batch:job-repository
id="jobRepository"
data-source="dataSource"
transaction-manager="transactionManager2"
isolation-level-for-create="SERIALIZABLE"
table-prefix="BATCH_"
max-varchar-length="1000"/>
<!--数据库的事务管理器-->
<bean id="transactionManager2" class="org.springframework.jdbc.datasource.DataSourceTransactionManager">
<property name="dataSource" ref="dataSource"/>
</bean>
<!--数据源-->
<bean id="dataSource" class="org.springframework.jdbc.datasource.DriverManagerDataSource">
<property name="driverClassName">
<value>com.mysql.jdbc.Driver</value>
</property>
<property name="url">
<value>jdbc:mysql://127.0.0.1:3306/spring_batch_demo2</value>
</property>
<property name="username" value="root"></property>
<property name="password" value="12345"></property>
</bean>
<!--定义作业调度器,用来启动job-->
<bean id="jobLauncher" class="org.springframework.batch.core.launch.support.SimpleJobLauncher">
<!--注入jobRepository-->
<property name="jobRepository" ref="jobRepository"/>
</bean>
<!--定义事务管理器,用于Spring Batch框架中对数据操作提供事务能力-->
<bean id="transactionManager" class="org.springframework.batch.support.transaction.ResourcelessTransactionManager"/>
<!--读取信用卡账单文件,CSV 格式-->
<!--使用FlatFileItemReader读文本文件-->
<bean id="csvItemReader" class="org.springframework.batch.item.file.FlatFileItemReader" scope="step">
<!--指定读取的资源文件-->
<property name="resource" value="classpath:demo2/data/demo2-inputFile.csv"/>
<!--通过lineMapper把文本中的一行转换为领域对象creditBill-->
<property name="lineMapper">
<bean class="org.springframework.batch.item.file.mapping.DefaultLineMapper">
<!--lineTokenizer定义文本中每行的分隔符号-->
<property name="lineTokenizer" ref="lineTokenizer"/>
<!--fieldSetMapper定义了转换结果映射,即具体映射到哪个Java类对象-->
<property name="fieldSetMapper">
<bean class="org.springframework.batch.item.file.mapping.BeanWrapperFieldSetMapper">
<property name="prototypeBeanName" value="creditBill"/>
</bean>
</property>
</bean>
</property>
</bean>
<!--lineTokenizer-->
<bean id="lineTokenizer" class="org.springframework.batch.item.file.transform.DelimitedLineTokenizer">
<!--按","符号对行进行切割-->
<property name="delimiter" value=","/>
<!--属性名称列表,将切割后的行按顺序投入-->
<property name="names">
<list>
<value>accountID</value>
<value>name</value>
<value>amount</value>
<value>date</value>
<value>address</value>
</list>
</property>
</bean>
<!--注入实体类-->
<bean id="creditBill" class="com.xj.demo2.CreditBill" scope="prototype"></bean>
<!--数据处理类-->
<bean id="creditBillProcessor" class="com.xj.demo2.CreditBillProcessor" scope="step"></bean>
<!--写信用卡账单文件,CSV格式-->
<bean id="csvItemWriter" class="org.springframework.batch.item.file.FlatFileItemWriter" scope="step">
<!--要写入的文件位置,因为[classpath:]不是一个具体的目录,这里应当用[file:](从项目根目录开始)指明输出位置-->
<property name="resource" value="file:src/main/resources/demo2/data/demo2-outputFile.csv"/>
<!--[lineAggregator成员]指明行聚合器,用来将对象输出到文件时构造文件中的每行的格式-->
<property name="lineAggregator">
<!--这里使用Spring Batch自带的DelimitedLineAggregator来作为行聚合器(可以拼接一个个属性形成行)-->
<bean class="org.springframework.batch.item.file.transform.DelimitedLineAggregator">
<!--使用","拼接-->
<property name="delimiter" value=","/>
<!--fieldExtractor成员用来将Java类的属性组成的数组拼接成行字符串-->
<property name="fieldExtractor">
<bean class="org.springframework.batch.item.file.transform.BeanWrapperFieldExtractor">
<property name="names" value="accountID,name,amount,date,address">
</property>
</bean>
</property>
</bean>
</property>
</bean>
</beans>
二、配套数据库建立
1.寻找对应的数据库脚本语句
使用数据库的仓库时,需要首先根据Spring Batch框架提供的数据脚本完成数据库的初始化,数据库脚本的位置放在:
spring-batch-core\3.0.7.RELEASE\spring-batch-core-3.0.7.RELEASE.jar\org\springframework\batch\core\
Spring Batch框架JobRepository支持如下的数据库:DB2、Derby、H2、HSQLDB、MySQL、Oracle、PostgreSQL、SQLServer、Sybase。
因为我用的是MySQL数据库,所以我选择:schema-mysql.sql
schema-mysql.sql:
-- Autogenerated: do not edit this file
CREATE TABLE BATCH_JOB_INSTANCE (
JOB_INSTANCE_ID BIGINT NOT NULL PRIMARY KEY ,
VERSION BIGINT ,
JOB_NAME VARCHAR(100) NOT NULL,
JOB_KEY VARCHAR(32) NOT NULL,
constraint JOB_INST_UN unique (JOB_NAME, JOB_KEY)
) ENGINE=InnoDB;
CREATE TABLE BATCH_JOB_EXECUTION (
JOB_EXECUTION_ID BIGINT NOT NULL PRIMARY KEY ,
VERSION BIGINT ,
JOB_INSTANCE_ID BIGINT NOT NULL,
CREATE_TIME DATETIME NOT NULL,
START_TIME DATETIME DEFAULT NULL ,
END_TIME DATETIME DEFAULT NULL ,
STATUS VARCHAR(10) ,
EXIT_CODE VARCHAR(2500) ,
EXIT_MESSAGE VARCHAR(2500) ,
LAST_UPDATED DATETIME,
JOB_CONFIGURATION_LOCATION VARCHAR(2500) NULL,
constraint JOB_INST_EXEC_FK foreign key (JOB_INSTANCE_ID)
references BATCH_JOB_INSTANCE(JOB_INSTANCE_ID)
) ENGINE=InnoDB;
CREATE TABLE BATCH_JOB_EXECUTION_PARAMS (
JOB_EXECUTION_ID BIGINT NOT NULL ,
TYPE_CD VARCHAR(6) NOT NULL ,
KEY_NAME VARCHAR(100) NOT NULL ,
STRING_VAL VARCHAR(250) ,
DATE_VAL DATETIME DEFAULT NULL ,
LONG_VAL BIGINT ,
DOUBLE_VAL DOUBLE PRECISION ,
IDENTIFYING CHAR(1) NOT NULL ,
constraint JOB_EXEC_PARAMS_FK foreign key (JOB_EXECUTION_ID)
references BATCH_JOB_EXECUTION(JOB_EXECUTION_ID)
) ENGINE=InnoDB;
CREATE TABLE BATCH_STEP_EXECUTION (
STEP_EXECUTION_ID BIGINT NOT NULL PRIMARY KEY ,
VERSION BIGINT NOT NULL,
STEP_NAME VARCHAR(100) NOT NULL,
JOB_EXECUTION_ID BIGINT NOT NULL,
START_TIME DATETIME NOT NULL ,
END_TIME DATETIME DEFAULT NULL ,
STATUS VARCHAR(10) ,
COMMIT_COUNT BIGINT ,
READ_COUNT BIGINT ,
FILTER_COUNT BIGINT ,
WRITE_COUNT BIGINT ,
READ_SKIP_COUNT BIGINT ,
WRITE_SKIP_COUNT BIGINT ,
PROCESS_SKIP_COUNT BIGINT ,
ROLLBACK_COUNT BIGINT ,
EXIT_CODE VARCHAR(2500) ,
EXIT_MESSAGE VARCHAR(2500) ,
LAST_UPDATED DATETIME,
constraint JOB_EXEC_STEP_FK foreign key (JOB_EXECUTION_ID)
references BATCH_JOB_EXECUTION(JOB_EXECUTION_ID)
) ENGINE=InnoDB;
CREATE TABLE BATCH_STEP_EXECUTION_CONTEXT (
STEP_EXECUTION_ID BIGINT NOT NULL PRIMARY KEY,
SHORT_CONTEXT VARCHAR(2500) NOT NULL,
SERIALIZED_CONTEXT TEXT ,
constraint STEP_EXEC_CTX_FK foreign key (STEP_EXECUTION_ID)
references BATCH_STEP_EXECUTION(STEP_EXECUTION_ID)
) ENGINE=InnoDB;
CREATE TABLE BATCH_JOB_EXECUTION_CONTEXT (
JOB_EXECUTION_ID BIGINT NOT NULL PRIMARY KEY,
SHORT_CONTEXT VARCHAR(2500) NOT NULL,
SERIALIZED_CONTEXT TEXT ,
constraint JOB_EXEC_CTX_FK foreign key (JOB_EXECUTION_ID)
references BATCH_JOB_EXECUTION(JOB_EXECUTION_ID)
) ENGINE=InnoDB;
CREATE TABLE BATCH_STEP_EXECUTION_SEQ (
ID BIGINT NOT NULL,
UNIQUE_KEY CHAR(1) NOT NULL,
constraint UNIQUE_KEY_UN unique (UNIQUE_KEY)
) ENGINE=InnoDB;
INSERT INTO BATCH_STEP_EXECUTION_SEQ (ID, UNIQUE_KEY) select * from (select 0 as ID, '0' as UNIQUE_KEY) as tmp where not exists(select * from BATCH_STEP_EXECUTION_SEQ);
CREATE TABLE BATCH_JOB_EXECUTION_SEQ (
ID BIGINT NOT NULL,
UNIQUE_KEY CHAR(1) NOT NULL,
constraint UNIQUE_KEY_UN unique (UNIQUE_KEY)
) ENGINE=InnoDB;
INSERT INTO BATCH_JOB_EXECUTION_SEQ (ID, UNIQUE_KEY) select * from (select 0 as ID, '0' as UNIQUE_KEY) as tmp where not exists(select * from BATCH_JOB_EXECUTION_SEQ);
CREATE TABLE BATCH_JOB_SEQ (
ID BIGINT NOT NULL,
UNIQUE_KEY CHAR(1) NOT NULL,
constraint UNIQUE_KEY_UN unique (UNIQUE_KEY)
) ENGINE=InnoDB;
INSERT INTO BATCH_JOB_SEQ (ID, UNIQUE_KEY) select * from (select 0 as ID, '0' as UNIQUE_KEY) as tmp where not exists(select * from BATCH_JOB_SEQ);
2.创建数据库,运行schema-mysql.sql中的脚本
3.各个表的关系
(1)各个表的结构关系
(2)instance、execution和context之间的关系
1>一个Job可以拥有一到多个Step
2>一个Step可以有一到多个Step Execution(当一个Step执行失败,下次重新执行该任务时,会为该Step重新生成一个Step Execution)
3>一个Job Execution可以有一到多个Step Execution(当一个Job由多个Step组成时,每个Step执行都会生成一个新的Step Execution,则一个Job Execution会拥有多个Step Execution)
4>一个Job Execution对应一个Job Execution Context
5>每个Step Execution对应一个Step Execution Context
6>同一个Job中的Step Executionton共用Job Execution Context,因此如果同一个Job的不同Step间需要共享数据时,则可以通过Job Execution的上下文共享数据。
三、配套数据库建立
1.运行结果:
控制台:
demo2-outputFile.csv:
batch_job_instance:
batch_job_execution:
batch_step_execution:
四、一些问题总结
1.当程序第一次运行成功之后,demo2-outputFile文件被成功写入,再重新将demo2-outputFile文件清空,再次运行程序时,发现程序运行成功,没有报错,但是demo2-outputFile文件里却没有被写入,这是为什么呢?
查看batch_job_execution表发现这样的信息:
大概意思是:所有的steps已经执行完毕了,或者该job没有配置step。
查看batch_step_execution表:
分析:
当执行一个Job的时候,就会生成一个Job Instance,然后Job Instance会生成一个Job Execution,并根据step的id即“billStep”生成对应的Step Execution时发现该step的状态是“COMPLETED”即已经执行完毕了。这时就不执行该id为“billStep”的step了,直接在batch_job_execution中记录一条信息,表示没有可执行的step。
解决方法:
在batch_step_execution中删除执行成功的step的信息,因为涉及到外键,所以先要把batch_step_execution_context中对应的信息删除,执行脚本:
DELETE FROM batch_step_execution_context WHERE STEP_EXECUTION_ID = "1";
DELETE FROM batch_step_execution WHERE STEP_EXECUTION_ID = "1";
再执行程序,就会发现又可以写入了:
控制台:
其他解决方法:
1>更改Step的id名称,即将“billStep”更换成其他名称也可以执行。
2>更改Job的名称,即将“billJob”更换成其他名称也可以执行。
3>若不想更改Job的名称,可以为“billJob”添加参数也行,毕竟Job Instance = Job Name + Job Parameters。
见下面代码:
BatchMain.java:
package com.xj.demo2;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobExecution;
import org.springframework.batch.core.JobParameters;
import org.springframework.batch.core.JobParametersBuilder;
import org.springframework.batch.core.job.builder.JobBuilder;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
/**
* @Author : xjfu
* @Date : 2021/11/04 20:01
* @Description :demo2的启动主类
*/
public class BatchMain {
public static void main(String[] args) {
ApplicationContext context = new ClassPathXmlApplicationContext("demo2/job/demo2-job.xml");
//Spring Batch的作业启动器,
JobLauncher launcher = (JobLauncher) context.getBean("jobLauncher");
//在batch.xml中配置的一个作业
Job job = (Job)context.getBean("billJob");
try{
//开始执行这个作业,获得处理结果(要运行的job,job参数对象)
JobExecution result = launcher.run(job, new JobParametersBuilder().addString("date","20211108").toJobParameters());
System.out.println(result.toString());
}catch (Exception e){
e.printStackTrace();
}
}
}
可以看出,其中为billJob新增了String类型的参数“date”:“20211108”:
//开始执行这个作业,获得处理结果(要运行的job,job参数对象)
JobExecution result = launcher.run(job, new JobParametersBuilder().addString("date","20211108").toJobParameters());
运行程序后,查看数据库:
batch_job_instance表:
batch_job_execution表:
batch_step_execution表:
有一点儿需要说明,采用Job Instance = Job Name + Job Parameters这种方式后,若第一次Job已经运行成功了,在不改变Job Name和Job Parameters时,再次运行程序,会报错:
org.springframework.batch.core.repository.JobInstanceAlreadyCompleteException: A job instance already exists and is complete for parameters={date=20211108}. If you want to run this job again, change the parameters.
如果出现这种情况呢,解决方法可以考虑从“Job Instance = Job Name + Job Parameters”入手,我们一般不会变动“Job Name”,所以我们就从“Job Parameters”入手,即赋予一个每次执行时都会自动变动的参数。对BatchMain.java代码变更为如下:
package com.xj.demo2;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobExecution;
import org.springframework.batch.core.JobParametersBuilder;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
/**
* @Author : xjfu
* @Date : 2021/11/04 20:01
* @Description :DB Job Repository测试的启动主类
*/
public class BatchMain {
public static void main(String[] args) {
ApplicationContext context = new ClassPathXmlApplicationContext("demo2/job/demo2-job.xml");
//Spring Batch的作业启动器,
JobLauncher launcher = (JobLauncher) context.getBean("jobLauncher");
//在batch.xml中配置的一个作业
Job job = (Job)context.getBean("billJob");
try{
//开始执行这个作业,获得处理结果(要运行的job,job参数对象)
JobExecution result = launcher.run(job, new JobParametersBuilder()
.addLong("time",System.currentTimeMillis())//主要是为了防止出现“A job instance already exists and is complete for parameters”异常
.toJobParameters());
System.out.println(result.toString());
}catch (Exception e){
e.printStackTrace();
}
}
}
2.删除数据库的脚本
若是数据建立之后,想要清空数据表或者删除数据表时,因为有外键相关联,所以操作表需要一定的顺序:
清空表:
DELETE FROM batch_job_execution_context;
DELETE FROM batch_job_execution_params;
DELETE FROM batch_job_execution_seq;
DELETE FROM batch_step_execution_context;
DELETE FROM batch_step_execution;
DELETE FROM batch_job_execution;
DELETE FROM batch_job_seq;
DELETE FROM batch_step_execution_seq;
DELETE FROM batch_job_instance;
删除表:
DROP TABLE batch_job_execution_context;
DROP TABLE batch_job_execution_params;
DROP TABLE batch_job_execution_seq;
DROP TABLE batch_step_execution_context;
DROP TABLE batch_step_execution;
DROP TABLE batch_job_execution;
DROP TABLE batch_job_seq;
DROP TABLE batch_step_execution_seq;
DROP TABLE batch_job_instance;
五、参考
1.Spring Batch : A job instance already exists and is complete for parameters={}