Spring Batch Remote Partitioning Http

  • 参考:,谢谢大神
  • 基本的配置就不多说了,请参考上面文章说明或者下载里面的示例看下里面kafka的实现

Step Build

  1. 根据官方文档说明,对于远程分区需要添加依赖@EnableBatchIntegration注解,可以自动注入RemotePartitioningManagerStepBuilderFactory、RemotePartitioningWorkerStepBuilderFactory,分别用于构建管理节点的step、工作节点的step

管理节点step构建示例如下:

@Bean(name = "masterStep")
    public Step masterStep(@Qualifier("slaveStep") Step slaveStep) {
    	return managerStepBuilderFactory().get("masterStep")
						    	.partitioner(slaveStep.getName(), new FilePartitioner())
						        .step(slaveStep)
						        .gridSize(1)
						        .aggregator(stepExecutionAggregator())
						        .outputChannel(requests())
						        .inputChannel(reply())
						        .build();
    }

可以看到上面的代码中没有配置PartitionHandler,实际对于RemotePartitioningManagerStepBuilderFactory已经默认配置了PartitionHandler。
对于RemotePartitioningManagerStepBuilderFactory的get方法如下:

public RemotePartitioningManagerStepBuilder get(String name) {
		return new RemotePartitioningManagerStepBuilder(name)
				.repository(this.jobRepository)
				.jobExplorer(this.jobExplorer)
				.beanFactory(this.beanFactory)
				.transactionManager(this.transactionManager);
	}

再看RemotePartitioningManagerStepBuilder的build方法:

public Step build() {
		Assert.state(this.outputChannel == null || this.messagingTemplate == null,
				"You must specify either an outputChannel or a messagingTemplate but not both.");

		// configure messaging template
		if (this.messagingTemplate == null) {
			this.messagingTemplate = new MessagingTemplate();
			this.messagingTemplate.setDefaultChannel(this.outputChannel);
			if (this.logger.isDebugEnabled()) {
				this.logger.debug("No messagingTemplate was provided, using a default one");
			}
		}

		// Configure the partition handler
		final MessageChannelPartitionHandler partitionHandler = new MessageChannelPartitionHandler();
		partitionHandler.setStepName(getStepName());
		partitionHandler.setGridSize(getGridSize());
		partitionHandler.setMessagingOperations(this.messagingTemplate);
		//确认是否是轮询,根据是否设置了inputChannel判断
		//如果是轮询,这设置了下面3个属性,注意此处没有设置dataSource
		if (isPolling()) {
			partitionHandler.setJobExplorer(this.jobExplorer);
			partitionHandler.setPollInterval(this.pollInterval);
			partitionHandler.setTimeout(this.timeout);
		}
		else {
			PollableChannel replies = new QueueChannel();
			partitionHandler.setReplyChannel(replies);
			StandardIntegrationFlow standardIntegrationFlow = IntegrationFlows
					.from(this.inputChannel)
					.aggregate(aggregatorSpec -> aggregatorSpec.processor(partitionHandler))
					.channel(replies)
					.get();
			IntegrationFlowContext integrationFlowContext = this.beanFactory.getBean(IntegrationFlowContext.class);
			integrationFlowContext.registration(standardIntegrationFlow)
					.autoStartup(false)
					.register();
		}

		try {
		   //配置partitionHandler属性,这个方法请继续看下面的说明
			partitionHandler.afterPropertiesSet();
			//此处设置了默认的partitionHandler
			super.partitionHandler(partitionHandler);
		}
		catch (Exception e) {
			throw new BeanCreationException("Unable to create a manager step for remote partitioning", e);
		}

		return super.build();
	}

下面看下MessageChannelPartitionHandler.afterPropertiesSet()

@Override
	public void afterPropertiesSet() throws Exception {
		Assert.notNull(stepName, "A step name must be provided for the remote workers.");
		Assert.state(messagingGateway != null, "The MessagingOperations must be set");
      //第一坑到了,要判断dataSource跟jobExplorer都有值才可以使用轮询的方式,前面dataSource都没设置轮询个唧唧
		pollRepositoryForResults = !(dataSource == null && jobExplorer == null);

		if(pollRepositoryForResults) {
			logger.debug("MessageChannelPartitionHandler is configured to poll the job repository for worker results");
		}

		if(dataSource != null && jobExplorer == null) {
			JobExplorerFactoryBean jobExplorerFactoryBean = new JobExplorerFactoryBean();
			jobExplorerFactoryBean.setDataSource(dataSource);
			jobExplorerFactoryBean.afterPropertiesSet();
			jobExplorer = jobExplorerFactoryBean.getObject();
		}

		if (!pollRepositoryForResults && replyChannel == null) {
			replyChannel = new QueueChannel();
		}//end if

	}

由于管理节点配置了inputChannel、outputChannel(也就是使用监听工作节点通知的方式),下面看下这两个通道的详细配置:

@Bean
    public IntegrationFlow httpIn() {
		return IntegrationFlows
				.from(Http.inboundGateway("/reply")
						  .requestChannel(reply())
						  .requestMapping(m -> {
								m.consumes("application/x-java-serialized-object");
								m.methods(HttpMethod.POST);
							})
							//这个可以配可以不配,不配就是默认的DefaultHttpHeaderMapper,如果使用默认的,记得将下面的correlationId、sequenceNumber、sequenceSize设置到InboundHeaderNames、OutboundHeaderNames
							//为什么要一定要这几个变量呢,因为spring-integration的aggregator的correlationStrategy基于correlationId来关联所有工作节点的通知分组的
							//releaseStrategy是基于SimpleSequenceSizeReleaseStrategy来确定分组通知已全部收到,不再等待通知,该策略是通过获取header中的sequenceSize判断
						  .headerMapper(httpMapper())
						  //spring-integration这东西也有个坑,会自动将header里面的key全部小写,所以这里不得不重新赋值
						  .headerFunction("correlationId", new Function<HttpEntity<String>, String>() {
							@Override
							public String apply(HttpEntity<String> t) {
								return t.getHeaders().getFirst("correlationid");
							}
						})
						  .headerFunction("sequenceNumber", new Function<HttpEntity<String>, Integer>() {
								@Override
								public Integer apply(HttpEntity<String> t) {
									return Integer.parseInt(t.getHeaders().getFirst("sequencenumber"));
								}
							})
						  .headerFunction("sequenceSize", new Function<HttpEntity<String>, Integer>() {
								@Override
								public Integer apply(HttpEntity<String> t) {
									return Integer.parseInt(t.getHeaders().getFirst("sequencesize"));
								}
							})
						  .messageConverters(new SerializingHttpMessageConverter())
						  )
				.log()
				.get();
    }
    
    
    @Bean
	public DirectChannel requests() {
		return new DirectChannel();
	}
    
    @Bean
    public IntegrationFlow httpOut() {
        return IntegrationFlows.from(requests())
                .handle(Http.outboundGateway("http://localhost:8088/store")
                        .charset("UTF-8")
                        .httpMethod(HttpMethod.POST)
                        .messageConverters(new SerializingHttpMessageConverter())
                        .requestFactory(new OkHttp3ClientHttpRequestFactory())
                        .headerMapper(httpMapper())
                        )
                .log()
                .get();
    }
    }

工作节点的Step构建

工作节点配置比较多样,如果是基于RemotePartitioningWorkerStepBuilderFactory配置workStep:

@Bean
        public Step workerStep() {
                 return this.workerStepBuilderFactory
                    .get("workerStep")
                    .inputChannel(incomingRequestsFromManager())
                    .outputChannel(outgoingRepliesToManager())
                    .chunk(100)
                    .reader(itemReader())
                    .processor(itemProcessor())
                    .writer(itemWriter())
                    .build();
        }

或者基于StepBuilderFactory配置workStep:

@Bean(name = "slaveStep")
    public Step slaveStep(ItemReader<Person> reader, ItemWriter<Person> writer, ItemProcessor<Person, Person> processor) {
        return stepBuilderFactory.get("slaveStep")
                .<Person, Person>chunk(5)
                .reader(reader)
                .processor(processor)
                .writer(writer)
                .build();
    }

通过@ServiceActivator注解配置进出通道,进出通道的配置就不再列代码了与管理节点差不多

@Service
public class StepExecutionActivator {

	private StepExecutionRequestHandler stepExecutionRequestHandler;
	
	public StepExecutionActivator(StepExecutionRequestHandler stepExecutionRequestHandler) {
		this.stepExecutionRequestHandler = stepExecutionRequestHandler;
	}
	
	@ServiceActivator(inputChannel = "inputRequest", outputChannel = "replyMaster",async="true")
	public Message<StepExecution> handleMessage(Message<?> message,@Header(value = "correlationid") String correlationId) throws MessagingException {
		StepExecutionRequest stepExec = (StepExecutionRequest)message.getPayload();
		StepExecution execution = stepExecutionRequestHandler.handle(stepExec);
		Map<String,Object> headermap = new HashMap<String, Object>();
		headermap.put("correlationId", correlationId);
		MessageHeaders headers = new MessageHeaders(headermap);
		Message<StepExecution> response = new GenericMessage<StepExecution>(execution, headers);
		return response;
	}
}

以上两种方式配置workStep都有一个问题,就是如果workStep的耗时太长,如需读取解析较大文件等,就会造成管理节点请求超时导致任务失败。因此在工作节点的提供管理节点的服务需要通过 Controller来实现,将StepExecutionRequestHandler处理做成异步,快速给管理节点http返回。
工作节点一步StepExecutionRequestHandler处理示例如下:

@RestController
public class RemoteStepController {

	@Autowired
	private PublishEventService publish;
	
	@RequestMapping(value="/store",consumes="application/x-java-serialized-object",method=RequestMethod.POST,produces=MediaType.TEXT_PLAIN_VALUE)
	public String stepExection(@RequestBody StepExecutionRequest stepRequest,HttpServletRequest request) {
		AsynStepEvent event = new AsynStepEvent(RemoteStepController.class);
		event.setStepRequest(stepRequest);
		event.setCorrelationId(request.getHeader("correlationId"));
		event.setSequenceNumber(request.getHeader("sequenceNumber"));
		event.setSequenceSize(request.getHeader("sequenceSize"));
		//基于事件驱动将StepExecutionRequestHandler做成异步
		publish.sendStep(event);
		return "success";
	}
}

事件驱动的处理代码示例如下:

private final StepExecutionRequestHandler stepExecutionRequestHandler;

	public AsynStepExectHandler(StepExecutionRequestHandler stepExecutionRequestHandler) {
		this.stepExecutionRequestHandler = stepExecutionRequestHandler;
	}
	
	@Override
	public void onApplicationEvent(AsynStepEvent event) {
		StepExecution stepExecution = stepExecutionRequestHandler.handle(event.getStepRequest());
		try {
			ByteArrayOutputStream byteStream = new ByteArrayOutputStream();
			ObjectOutputStream objectStream = new ObjectOutputStream(byteStream);
			objectStream.writeObject(stepExecution);
			objectStream.flush();
			objectStream.close();
			byte[] bytes = byteStream.toByteArray();
			HttpHeaders header = new HttpHeaders();
			header.put("correlationId", Arrays.asList(event.getCorrelationId()));
			header.put("sequenceNumber", Arrays.asList(event.getSequenceNumber()));
			header.put("sequenceSize", Arrays.asList(event.getSequenceSize()));
			header.put("Content-Type", Arrays.asList("application/x-java-serialized-object"));
			HttpUtil.post("http://127.0.0.1:8088/reply", header, bytes);
		} catch(Exception e) {
			log.error(e.getMessage(),e);
		}
	}

最后还有个问题是工作节点给管理节点发送通知时,管理节点由于不会给返回内容导致spring-integration直接给httpCode=500的返回,但这个不影响任务的完成,在发送http发送管理端通知时抓下异常不要处理就行。

示例代码地址:
https://gitee.com/lajigitsb/spring-batch-partition-http