环境准备
pom.xml
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>0.11.0.0</version>
</dependency>
生产者API
需要用到的类:
KafkaProducer:需要创建一个生产者对象,用来发送数据
ProducerConfig:获取所需的一系列配置参数
ProducerRecord:每条数据都要封装成一个 ProducerRecord 对象
无回调发送API
发送完成后,无法感知是否成功,不推荐使用
public class ProductorDemo {
private Properties props = new Properties();
@Before
public void beforePrepareProperties(){
//kafka 集群,broker-list
// props.put("bootstrap.servers", "hadoop102:9092");
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,"hadoop101:9092");
// props.put("acks", "all");
props.put(ProducerConfig.ACKS_CONFIG,"-1");
//重试次数
// props.put("retries", 1);
props.put(ProducerConfig.RETRIES_CONFIG,"1");
//批次大小
// props.put("batch.size", 16384);
props.put(ProducerConfig.BATCH_SIZE_CONFIG,16384);
//等待时间
// props.put("linger.ms", 1);
props.put(ProducerConfig.LINGER_MS_CONFIG,1);
//RecordAccumulator 缓冲区大小
// props.put("buffer.memory", 33554432);
props.put(ProducerConfig.BUFFER_MEMORY_CONFIG,33554432);
// props.put("key.serializer","org.apache.kafka.common.serialization.StringSerializer");
props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
// props.put("value.serializer","org.apache.kafka.common.serialization.StringSerializer");
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
}
@Test
public void test无回调发送API(){
Producer<String, String> producer = new KafkaProducer<>(props);
for (int i = 0; i < 100; i++) {
producer.send(new ProducerRecord<String, String>("first",Integer.toString(i), Integer.toString(i)));
}
producer.close();
}
}
带回调API
可根据API判断是否发送成功public void onCompletion(RecordMetadata metadata, Exception exception)
发送成功返回metadata,发送失败exceptipon不为空
@Test
public void test回调发送API(){
Producer<String, String> producer = new KafkaProducer<>(props);
for (int i = 0; i < 1; i++) {
producer.send(new ProducerRecord<String, String>("first", Integer.toString(i), Integer.toString(i)), new Callback() {
@Override
public void onCompletion(RecordMetadata metadata, Exception exception) {
System.out.println("---------------------");
System.out.println(metadata);
System.out.println(exception);
System.out.println("---------------------");
}
});
}
producer.close();
}
同步发送API
以上两种方式调用send发送后,都是回调处理,为异步处理方式。
有些场景需要同步发送,即发送时需要等待发送成功或者失败再继续向下处理
@Test
public void test同步发送API() throws ExecutionException, InterruptedException {
Producer<String, String> producer = new KafkaProducer<>(props);
for (int i = 0; i < 1; i++) {
producer.send(new ProducerRecord<String, String>("first", Integer.toString(i), Integer.toString(i))).get();
System.out.println("发送【"+i+"】完成");
}
producer.close();
}
消费者API
同步提交API
- 为保证消费消息不丢失,需要保证关闭自动提交
- 手动提交 offset 的方法有两种:分别是 commitSync(同步提交)和 commitAsync(异步
提交)。两者的相同点是,都会将本次 poll 的一批数据最高的偏移量提交;不同点是,
commitSync 阻塞当前线程,一直到提交成功,并且会自动失败重试(由不可控因素导致,
也会出现提交失败);而 commitAsync 则没有失败重试机制,故有可能提交失败。
重点代码
// 关闭自动提交
props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,false);
//处理数据,完成时,同步手动提交
//【注意:是poll的一批数据,不要单条处理完就调用】
consumer.commitSync();
public class ConsumerDemo {
private Properties props = new Properties();
@Before
public void beforePrepareProperties(){
props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"hadoop101:9092");
//关闭自动提交
props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,false);
props.put(ConsumerConfig.GROUP_ID_CONFIG,"xbz-study-01");
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
}
@Test
public void test同步提交Offset的API(){
KafkaConsumer<String, String> consumer = new KafkaConsumer<>(props);
consumer.subscribe(Arrays.asList("first"));
while(true){
ConsumerRecords<String, String> records = consumer.poll(100);
for(ConsumerRecord record:records ){
System.out.printf("offset = %d, key = %s, value= %s%n", record.offset(), record.key(), record.value());
}
//同步提交,当前线程会阻塞直到 offset 提交成功
consumer.commitSync();
}
}
}
异步提交API
因为同步提交会造成程序阻塞,影响效率,所以大部分时候,我们使用异步提交
@Test
public void test异步提交Offset的API(){
KafkaConsumer<String, String> consumer = new KafkaConsumer<>(props);
consumer.subscribe(Arrays.asList("first"));
while(true){
ConsumerRecords<String, String> records = consumer.poll(100);
for(ConsumerRecord record:records ){
System.out.printf("offset = %d, key = %s, value= %s%n", record.offset(), record.key(), record.value());
}
//同步提交,当前线程会阻塞直到 offset 提交成功
consumer.commitAsync((m,e)->{
if(e!=null){
System.out.println("提交 offset 失败");
}
});
}
}
数据漏消费和重复消费
无论是同步提交还是异步提交 offset,都有可能会造成数据的漏消费或者重复消费。先
提交 offset 后消费,有可能造成数据的漏消费;而先消费后提交 offset,有可能会造成数据
的重复消费。
所以实际开发中,需要消费保证幂等性。
生产者添加拦截器
实现org.apache.kafka.clients.producer.ProducerInterceptor
案例:
发送消息时,为数据增加一个时间戳key
拦截器类
package com.xbz.study.bigdata.kafka;
import org.apache.kafka.clients.producer.ProducerInterceptor;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.clients.producer.RecordMetadata;
import org.apache.kafka.common.header.Headers;
import org.apache.kafka.common.header.internals.RecordHeader;
import org.apache.kafka.common.header.internals.RecordHeaders;
import java.io.UnsupportedEncodingException;
import java.util.Date;
import java.util.Map;
public class TimestampInterceptor implements ProducerInterceptor {
@Override
public ProducerRecord onSend(ProducerRecord record) {
Headers headers = record.headers();//new ArrayList<>();
if(headers == null){
headers = new RecordHeaders();
}
try {
headers.add(new RecordHeader("my_timestamp",(new Date().getTime()+"").getBytes("UTF-8")));
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
ProducerRecord recordWrapper = new ProducerRecord<>(record.topic(),record.partition(),record.timestamp(),record.key(),record.value(),headers);
return recordWrapper;
}
//该方法会在消息从 RecordAccumulator 成功发送到 Kafka Broker 之后,或者在发送过程
//中失败时调用。
@Override
public void onAcknowledgement(RecordMetadata metadata, Exception exception) {
}
//释放资源
@Override
public void close() {
}
//初始化:获取配置信息和初始化数据时调用
@Override
public void configure(Map<String, ?> configs) {
}
}
生产者中,加入拦截器
//设置拦截器
props.put(ProducerConfig.INTERCEPTOR_CLASSES_CONFIG, Arrays.asList(TimestampInterceptor.class.getName()));