默认大家都是maven工程
第一步
在pom文件中 引入
<dependencies>
<dependency>
<groupId>org.springframework.kafka</groupId>
<artifactId>spring-kafka</artifactId>
</dependency>
</dependencies>
在项目中创建多线程消费者的类,因为频繁创建和销毁线程也会有性能消耗,所以先创建线程池
package com.adasplus.gps_handler.server;
import lombok.extern.slf4j.Slf4j;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.kafka.annotation.KafkaListener;
import org.springframework.stereotype.Component;
import javax.annotation.PostConstruct;
import java.util.List;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
@Slf4j
@Component
public class KafkaMessageListener{
private final int coreNum = Runtime.getRuntime().availableProcessors();
private ThreadPoolExecutor threadPoolExecutor;
@PostConstruct
private void initThreadPool() {
threadPoolExecutor = new ThreadPoolExecutor(coreNum,
2 * coreNum,
10,
TimeUnit.SECONDS,
new LinkedBlockingQueue<>(10000),
Executors.defaultThreadFactory(),
new ThreadPoolExecutor.DiscardOldestPolicy());
}
@Autowired
private GpsInsertWithMultiThreading gpsInsertWithMultiThreading;
@KafkaListener(topics = {"#{'${spring.kafka.consumer.topics}'.split(',')}"}, containerFactory = "batchFactory")
public void getMessage(List<ConsumerRecord<String, String>> records) {
try {
log.info("consumer kafka data one: {} ", records);
log.info("consumer kafka data count: {} ", records.size());
gpsInsertWithMultiThreading.setThreadPoolExecutor(threadPoolExecutor);
gpsInsertWithMultiThreading.execute(records);
} catch (Exception e) {
log.error("handle kafka data error:{} ,stack:{}", e.getMessage(), e.getStackTrace());
}
}
}
GpsInsertWithMultiThreading为处理后续业务的中间层
@Slf4j
@Service
@Data
public class GpsInsertWithMultiThreading {
//线程池
private ThreadPoolExecutor threadPoolExecutor;
//从kafka拉取下来的数据
private List<ConsumerRecord<String, String>> data
public void execute(List<ConsumerRecord<String, String>> data) {
try {
handle(data);
} catch (Exception e) {
e.printStackTrace();
log.error("处理异常,error:{},stack:{}", e.getMessage(), e.getStackTrace());
}
}
public void handle(List<ConsumerRecord<String, String>> data) {
//这个过程中可以对数据进行格式化,比如将data转为json
HashMap<String, ArrayList<JSONObject>> gpsMap = Maps.newHashMap();
startThreadPool(gpsMap);
}
private void startThreadPool(HashMap<String, ArrayList<JSONObject>> gpsMap) {
try {
//GpsHandlerCallable类为具体要执行的业务逻辑,比如写库,更新redis等,需是个线程
threadPoolExecutor.submit(new GpsHandlerCallable());
} catch (Exception e) {
log.error("失败,error:{},stackTrace:{}", e.getMessage(), e.getStackTrace());
}
}
}
那kafka的相关配置在哪里呢 比如链接kafka的服务地址啊,组id啊,我们从上面图片中看到了个 config文件夹,在里面创建BatchConsumerConfig类,具体内容如下
package com.adasplus.gps_handler.config;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.kafka.annotation.EnableKafka;
import org.springframework.kafka.config.ConcurrentKafkaListenerContainerFactory;
import org.springframework.kafka.config.KafkaListenerContainerFactory;
import org.springframework.kafka.core.ConsumerFactory;
import org.springframework.kafka.core.DefaultKafkaConsumerFactory;
import java.util.HashMap;
import java.util.Map;
@Configuration
@EnableKafka
public class BatchConsumerConfig {
@Value("${spring.kafka.consumer.auto-offset-reset}")
private String autoOffsetReset;
@Value("${spring.kafka.bootstrap-servers}")
private String bootstrapServers;
@Value("${spring.kafka.consumer.group-id}")
private String groupID;
@Value("${spring.kafka.consumer.max-poll-records}")
private int maxPoll;
@Value("${spring.kafka.consumer.enable-auto-commit}")
private String autoCommit;
@Value("${spring.kafka.consumer.auto-commit-interval}")
private int autoCommitInterval;
/**
* 多线程-批量消费
*
* @return
*/
@Bean
public KafkaListenerContainerFactory<?> batchFactory() {
ConcurrentKafkaListenerContainerFactory<String, String> factory =
new ConcurrentKafkaListenerContainerFactory<>();
factory.setConsumerFactory(consumerFactory());
// 控制多线程消费
factory.setConcurrency(1);
// poll超时时间 5 秒
factory.getContainerProperties().setPollTimeout(5000);
// 控制批量消费
// 设置为批量消费,每个批次数量在Kafka配置参数中设置(max.poll.records)
factory.setBatchListener(true);
return factory;
}
public ConsumerFactory<String, String> consumerFactory() {
return new DefaultKafkaConsumerFactory<>(consumerConfigs());
}
/**
* 消费者配置
*
* @return
*/
public Map<String, Object> consumerConfigs() {
Map<String, Object> configProps = new HashMap<>();
// 不用指定全部的broker,它将自动发现集群中的其余的borker, 最好指定多个,万一有服务器故障
configProps.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
//topic
// key序列化方式
configProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
// value序列化方式
configProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
// GroupID
configProps.put(ConsumerConfig.GROUP_ID_CONFIG, groupID);
// 批量消费消息数量
configProps.put(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, maxPoll);
// -----------------------------额外配置,可选--------------------------
// 自动提交偏移量
// 如果设置成true,偏移量由auto.commit.interval.ms控制自动提交的频率
// 如果设置成false,不需要定时的提交offset,可以自己控制offset,当消息认为已消费过了,这个时候再去提交它们的偏移量。
// 这个很有用的,当消费的消息结合了一些处理逻辑,这个消息就不应该认为是已经消费的,直到它完成了整个处理。
configProps.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, autoCommit);
// 自动提交的频率
configProps.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, autoCommitInterval);
// Session超时设置
configProps.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, 5 * 60 * 1000);
// 心跳时间 30s
configProps.put(ConsumerConfig.HEARTBEAT_INTERVAL_MS_CONFIG, 30 * 1000);
configProps.put(ConsumerConfig.MAX_POLL_INTERVAL_MS_CONFIG, 10 * 60 * 1000);
// 该属性指定了消费者在读取一个没有偏移量的分区或者偏移量无效的情况下该作何处理:
// latest(默认值)在偏移量无效的情况下,消费者将从最新的记录开始读取数据(在消费者启动之后生成的记录)
// earliest :在偏移量无效的情况下,消费者将从起始位置读取分区的记录
configProps.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, autoOffsetReset);
return configProps;
}
}
介绍完配置,我们再看看GpsHandlerCallable 具体业务类的实现
实现 Runnable接口,创建一个线程的方式有很多,比如继承Thread类,这里我们选择实现Runnable,然后线程池那边直接submit 来将任务提交到线程池
@Slf4j
@Service
public class GpsHandlerCallable implements Runnable {
private Map<String, ArrayList<JSONObject>> gpsMap;
private String redisSetKey;
public GpsHandlerCallable(Map<String, ArrayList<JSONObject>> gpsMap) {
this.gpsMap = gpsMap;
}
@Autowired
private MongoTemplate mongoTemplate;
private final UpdateOptions updateOptions = new UpdateOptions().upsert(true);
@Override
public void run() {
if (this.gpsMap.size() > 0) {
handData();
}
}
public void handData() {
insertGps();
}
/**
* 清除报警数据(施工巡检)
*/
private void insertGps(){
mongoTemplate.insert(this.gpsMap,"goosTable");
}
}
好了,整体流程就结束了