1.环境准备
1)启动zk和Kafka集群,在Kafka集群中打开一个消费者
[bigdata@hadoop003 kafka]$ bin/kafka-console-consumer.sh
–zookeeper hadoop003:2181 --topic second
2)导入pom依赖
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.kafka/kafka-clients -->
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>0.11.0.0</version>
</dependency>
</dependencies>
2.Kafka生产者Java API
kafkaProducer发送数据流程
Kafka 的 Producer 发送消息采用的是 异步发送
的方式。在消息发送的过程中,涉及到了两个线程 ——main 线程
和Sender
线程,以及 一个线程共享变量 ——RecordAccumulator
。main 线程将消息发送给 RecordAccumulator,Sender 线程不断从 RecordAccumulator 中拉取
消息发送到 Kafka broker。
异步和ack并不冲突,生产者一直发送数据,不等应答,如果某条数据迟迟没有应答,生产者会再发一次
创建生产者
package com.demo;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import java.util.Properties;
/**
* @author ljh
* @create 2021-02-26 14:15
* 创建生产者对象
* 传入配置文件对象
* 发送数据,数据封装在ProducerRecord对象
* 关闭
*/
public class MyProducer {
public static void main(String[] args){
Properties props = new Properties();
// 连接Kafka集群broker-list
props.put("bootstrap.servers", "hadoop003:9092");
// 应答机制,0,1,all=-1
props.put("acks", "all");
props.put(ProducerConfig.ACKS_CONFIG,"all");//等同
// 重试次数
props.put("retries", 0);
//RecordAccumulator 缓冲区大小
props.put("buffer.memory", 33554432);
// key序列化
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
// value序列化
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
KafkaProducer producer = new KafkaProducer<String,String>(props);
for (int i=0;i<10;i++){
//发送数据
//("second",0,key,"");指定分区
//("second",key,"");指定key,根据key分区
//("second","");不指定,随机分区,轮询
producer.send(new ProducerRecord("second","hello-"+i));
}
//因为生产者按批 发送数据,触发批次大小或者时间才发送,
// props.put("batch.size", 16384);props.put("linger.ms", 1);
// 如果二者都没达到条件,就需要close把缓冲区的数据取出来,所以close至关重要
producer.close();
}
}
创建生产者带回调函数
package com.demo;
import org.apache.kafka.clients.producer.*;
import java.util.Properties;
/**
* @author ljh
* @create 2021-02-26 14:15
* 创建生产者对象
* 传入配置文件对象
* 发送数据
* 关闭
*/
public class CallBackProducer {
public static void main(String[] args){
Properties props = new Properties();
// 连接Kafka集群broker-list
props.put("bootstrap.servers", "hadoop003:9092");
// 应答机制,0,1,all=-1
props.put("acks", "all");
// 重试次数
props.put("retries", 0);
//RecordAccumulator 缓冲区大小
props.put("buffer.memory", 33554432);
// key序列化
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
// value序列化
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
KafkaProducer producer = new KafkaProducer<String,String>(props);
for (int i=0;i<10;i++){
//发送数据
//("second",0,key,"");指定分区
//("second",key,"");指定key,根据key分区
//("second","");不指定,随机分区,轮询
producer.send(new ProducerRecord("second", "hello-" + i), new Callback() {
public void onCompletion(RecordMetadata recordMetadata, Exception e) {
if(e==null){
System.out.println(recordMetadata.topic()+"-"+recordMetadata.partition()+"-"+recordMetadata.offset());
}
}
});
}
producer.close();
}
}
自定义分区生产者
1)需求:将所有数据存储到topic的第0号分区上
2)自定义分区
package com.demo;
import org.apache.kafka.clients.producer.Partitioner;
import org.apache.kafka.common.Cluster;
import java.util.Map;
/**
* @author ljh
* @create 2021-02-26 18:16
*/
public class CustomPartitioner implements Partitioner {
public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
//分区,今后可根据业务需要做更复杂的分区,如按照value分区,参考DefaultPartitioner
return 2;//数据都进分区2
}
public void close() {
}
public void configure(Map<String, ?> map) {
}
}
3)代码调用
package com.demo;
import org.apache.kafka.clients.producer.*;
import java.util.Properties;
/**
* @author ljh
* @create 2021-02-26 18:17
*/
public class PartitionerProducer {
public static void main(String[] args) {
Properties props = new Properties();
// Kafka服务端的主机名和端口号
props.put("bootstrap.servers", "hadoop003:9092");
// 等待所有副本节点的应答
props.put("acks", "all");
// 消息发送最大尝试次数
props.put("retries", 0);
// 发送缓存区内存大小
props.put("buffer.memory", 33554432);
// key序列化
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
// value序列化
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
// 自定义分区
props.put("partitioner.class", "com.demo.CustomPartitioner");
Producer<String, String> producer = new KafkaProducer(props);
for (int i=0;i<10;i++){
producer.send(new ProducerRecord<String, String>("second", "1", "bigdata" + i), new Callback() {
public void onCompletion(RecordMetadata recordMetadata, Exception e) {
if(e==null){
//观察数据的分区情况
System.out.println(recordMetadata.partition()+"-"+recordMetadata.offset());
}
}
});
}
producer.close();
}
}
3.Kafka消费者Java API
高级API
1)在控制台创建发送者
[root@hadoop003 kafka]$ bin/kafka-console-producer.sh
–broker-list hadoop003:9092 --topic second
>hello world
2)官方提供案例(自动维护消费情况)
package com.bigdata.kafka.consumer.high;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import java.util.Arrays;
import java.util.Properties;
public class CustomNewConsumer {
public static void main(String[] args) {
Properties props = new Properties();
// 定义kakfa 服务的地址,不需要将所有broker指定上
props.put("bootstrap.servers", "hadoop003:9092");
// 制定consumer group
props.put("group.id", "test");
// 是否自动确认offset
props.put("enable.auto.commit", "true");
// 自动确认offset的时间间隔
props.put("auto.commit.interval.ms", "1000");
// key的序列化类
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
// value的序列化类
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
// 定义consumer
KafkaConsumer<String, String> consumer = new KafkaConsumer(props);
// 消费者订阅的topic, 可同时订阅多个
consumer.subscribe(Arrays.asList("first", "second","third"));
while (true) {
// 读取数据,读取超时时间为100ms
ConsumerRecords<String, String> records = consumer.poll(100);
for (ConsumerRecord<String, String> record : records){
System.out.println("主题:"+record.topic()+",分区:"+record.partition()+",offset:"+record.offset()+",value:"+record.value());
}
}
}
}
2
package com.demo.customer;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import java.util.Arrays;
import java.util.Properties;
/**
* @author ljh
* @create 2021-02-27 12:47
*/
public class MyConsumer {
public static void main(String[] args){
Properties properties = new Properties();
properties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"hadoop102:9092");
//开启自动提交
properties.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,"true");
//properties.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,"false");//关闭自动提交
//两种情况生效,消费者组第一次消费数据(可以通过更改组名继续生效),或者7天到了原始数据删除,原来的offect已经不在了
//latest(默认),earliest
properties.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");//等于--from-beginning,
//自动提交的时间间隔
properties.setProperty(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG,"1000");
//kv反序列化
properties.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,"org.apache.kafka.common.serialization.StringDeserializer");
properties.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,"org.apache.kafka.common.serialization.StringDeserializer");
//指定消费者组
properties.setProperty(ConsumerConfig.GROUP_ID_CONFIG,"big");
//创建消费者
KafkaConsumer<String, String> consumer = new KafkaConsumer<String, String>(properties);
//订阅主题,不存在的主题不会创建,但订阅也不会报错。生产者发送的主题不存在会自动创建
consumer.subscribe(Arrays.asList("second","a"));
while (true){
//重复拉取数据,没有数据到达的等待时间,时间结束,再去拉取
ConsumerRecords<String, String> consumerRecords = consumer.poll(100);
for(ConsumerRecord<String, String> records:consumerRecords){
System.out.println(records.key()+"--"+records.topic()+"--"+records.partition()+"--"+records.value());
}
}
}
}
低级API
实现使用低级API读取指定topic,指定partition,指定offset的数据。
1)方法描述:
getLeaderBroker() 客户端向种子节点发送主题元数据,将副本集加入备用节点
getLastOffset() 消费者客户端发送偏移量请求,获取分区最近的偏移量
read() 消费者低级AP I拉取消息的主要方法
getNewLeader() 当分区的主副本节点发生故障,客户将要找出新的主副本
2)代码
简单版:
package com.bigdata.kafka.consumer.low;
import kafka.api.FetchRequest;
import kafka.api.FetchRequestBuilder;
import kafka.cluster.BrokerEndPoint;
import kafka.javaapi.*;
import kafka.javaapi.consumer.SimpleConsumer;
import kafka.javaapi.message.ByteBufferMessageSet;
import kafka.message.MessageAndOffset;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
public class EasyLowConsumer {
public static void main(String[] args) {
// brokers
List<String> brokers = new ArrayList<>();
brokers.add("hadoop003");
brokers.add("hadoop004");
brokers.add("hadoop005");
// 端口号
int port = 9092;
//主题
String topic = "second";
//分区
int partition = 0;
// 偏移量
long offset = 4;
EasyLowConsumer consumer = new EasyLowConsumer();
consumer.getData(brokers,port,topic,partition,offset);
}
public void getData(List<String> brokers,int port,String topic,int partition,long offset){
// 1,找到该分区的leader在哪个broker上
BrokerEndPoint leaderBroker = getLeaderBroker(brokers, port, topic, partition);
if(leaderBroker == null){
return;
}
// 2,跟该leader 通信,获取数据
String host = leaderBroker.host();
SimpleConsumer consumer = new SimpleConsumer(host, port, 2000, 4 * 1024, "getData");
//构建fetch请求体,获取指定topic,partition,offset的数据
FetchRequest build = new FetchRequestBuilder().addFetch(topic, partition, offset, 10000).build();
FetchResponse fetch = consumer.fetch(build);
ByteBufferMessageSet messageAndOffsets = fetch.messageSet(topic, partition);
for (MessageAndOffset messageAndOffset : messageAndOffsets) {
long offset1 = messageAndOffset.offset();
ByteBuffer payload = messageAndOffset.message().payload();
byte [] b = new byte[payload.limit()];
payload.get(b);
System.out.println("topic:"+topic+",partition:"+partition+",offset:"+offset1+",value:"+new String(b));
// break;
}
//关闭资源
consumer.close();
}
public BrokerEndPoint getLeaderBroker(List<String> brokers, int port, String topic, int partition){
for (String broker : brokers) {
SimpleConsumer getLeader = new SimpleConsumer(broker, port, 2000, 4 * 1024, "getLeader");
//创建一个主题元数据请求对象,该对象可以设置获取多个主题的元数据信息,只不过在这里,我们只获取second主题的元数据
TopicMetadataRequest topicMetadataRequest = new TopicMetadataRequest(Collections.singletonList(topic));
//主题的元数据响应对象,该对象能够存储多个主题的元数据信息,只不过在这里,只包含second主题的元数据
TopicMetadataResponse topicMetadataResponse = getLeader.send(topicMetadataRequest);
List<TopicMetadata> topicMetadata = topicMetadataResponse.topicsMetadata();
for (TopicMetadata topicMetadatum : topicMetadata) {
//topicMetadatum 这里面存储的是某个主题的元数据信息,该主题下可能有很多分区
List<PartitionMetadata> partitionMetadata = topicMetadatum.partitionsMetadata();
for (PartitionMetadata partitionMetadatum : partitionMetadata) {
//partitionMetadata 这里存储了某个分区的元数据的信息
if(partitionMetadatum.partitionId() == partition){
return partitionMetadatum.leader();
}
}
}
}
return null;
}
}
复杂版:
package com.bigdata.kafka.consumer.low;
import kafka.api.FetchRequest;
import kafka.api.FetchRequestBuilder;
import kafka.api.OffsetRequest;
import kafka.api.PartitionOffsetRequestInfo;
import kafka.cluster.BrokerEndPoint;
import kafka.common.ErrorMapping;
import kafka.common.TopicAndPartition;
import kafka.javaapi.*;
import kafka.javaapi.consumer.SimpleConsumer;
import kafka.javaapi.message.ByteBufferMessageSet;
import kafka.message.MessageAndOffset;
import java.nio.ByteBuffer;
import java.util.*;
public class ComplexLowConsumer {
//第一次找到leader之后,用于存放相应分区的replicas,目的是为了如果该leader挂掉之后,再次寻找新leader的时候
// 只需要从这个集合寻找就可以了,因为新的leader只可能会出现在replicas当中,这样能够缩小查找新leader的范围。
private List<String> replicasList ;
public ComplexLowConsumer (){
replicasList = new ArrayList<>();
}
public static void main(String[] args) throws Exception {
// brokers
List<String> brokers = new ArrayList<>();
brokers.add("hadoop003");
brokers.add("hadoop004");
brokers.add("hadoop005");
// 端口号
int port = 9092;
//主题
String topic = "second";
//分区
int partition = 0;
// 读取的条数
int maxReads = 4;
ComplexLowConsumer consumer = new ComplexLowConsumer();
consumer.read(brokers,port,topic,partition,maxReads);
}
public void read(List<String> brokers,int port,String topic,int partition,int maxReads) throws Exception {
// 1 获取指定主题,分区的leader broker
PartitionMetadata leaderBroker = getLeaderBroker(brokers, port, topic, partition);
if(leaderBroker == null){
System.out.println("error find leader broker from topic:"+topic+",and for partition:"+partition);
return;
}
if(leaderBroker.leader() == null){
System.out.println("error find leader broker from topic:"+topic+",and for partition:"+partition);
return;
}
String host = leaderBroker.leader().host();
String clientName = "client_name_"+topic+"_"+partition;
// 2 循环打印指定条数的数据
SimpleConsumer consumer = new SimpleConsumer(host, port, 2000, 4 * 1024, clientName);
long readOffset = getLastOffset(consumer, topic, partition, OffsetRequest.EarliestTime(), clientName);
int numErrors = 0;
int numRead = 0;
while(numRead < maxReads){
if(consumer == null){
consumer = new SimpleConsumer(host, port, 2000, 4 * 1024, clientName);
}
FetchRequest fetchRequest = new FetchRequestBuilder().addFetch(topic, partition, readOffset, 10000).build();
FetchResponse fetchResponse = consumer.fetch(fetchRequest);
if(fetchResponse.hasError()){
numErrors++;
short errorCode = fetchResponse.errorCode(topic, partition);
System.out.println("error get data,reason:"+errorCode);
if(numErrors > 5){
break;
}
if(errorCode == ErrorMapping.OffsetOutOfRangeCode()){
readOffset = getLastOffset(consumer, topic, partition, OffsetRequest.LatestTime(), clientName);
continue;
}
// 走到此处,证明现在所用的leader broker失效了,所以,要从剩余的副本中重新找出一个leader
consumer.close();
consumer = null;
host = getNewLeader(host,port,topic,partition);
continue;
}
ByteBufferMessageSet messageAndOffsets = fetchResponse.messageSet(topic, partition);
for (MessageAndOffset messageAndOffset : messageAndOffsets) {
long currentOffset = messageAndOffset.offset();
ByteBuffer payload = messageAndOffset.message().payload();
byte [] b = new byte[payload.limit()];
payload.get(b);
System.out.println("offset:"+currentOffset+",value:"+new String(b));
numRead ++;
if(numRead == maxReads){
break;
}
readOffset = messageAndOffset.nextOffset();
}
}
consumer.close();
}
// 从分区的副本中找出新的leader
public String getNewLeader(String oldLeader,int port,String topic,int partition) throws Exception {
for (int i = 0;i < 3;i ++){
PartitionMetadata leaderBroker = getLeaderBroker(replicasList, port, topic, partition);
boolean gotoSleep = false;
if(leaderBroker == null){
gotoSleep = true;
} else if(leaderBroker.leader() == null){
gotoSleep = true;
} else if(leaderBroker.leader().host().equalsIgnoreCase(oldLeader) && i==0){
gotoSleep = true;
}else {
return leaderBroker.leader().host();
}
// 如果拿不到leader broker 则sleep
if(gotoSleep){
Thread.sleep(10000);
}
}
System.out.println("error get new leader for topic:"+topic+",and for partition:"+partition);
throw new Exception("error get new leader for topic:"+topic+",and for partition:"+partition);
}
public long getLastOffset(SimpleConsumer consumer,String topic,int partition,long whichTime,String clientName){
Map<TopicAndPartition,PartitionOffsetRequestInfo> requestInfo = new HashMap<>();
TopicAndPartition topicAndPartition = new TopicAndPartition(topic, partition);
PartitionOffsetRequestInfo partitionMetadata = new PartitionOffsetRequestInfo(whichTime,1);
requestInfo.put(topicAndPartition,partitionMetadata);
kafka.javaapi.OffsetRequest offsetRequest = new kafka.javaapi.OffsetRequest(requestInfo, OffsetRequest.CurrentVersion(), clientName);
OffsetResponse offsetsBefore = consumer.getOffsetsBefore(offsetRequest);
if(offsetsBefore.hasError()){
System.out.println("error get last offset");
return 0;
}
long[] offsets = offsetsBefore.offsets(topic, partition);
return offsets[0];
}
public PartitionMetadata getLeaderBroker(List<String> brokers, int port, String topic, int partition){
PartitionMetadata partMetadata = null;
loop:
for (String broker : brokers) {
SimpleConsumer getLeader = null;
try {
getLeader = new SimpleConsumer(broker, port, 2000, 4 * 1024, "getLeader");
//创建一个主题元数据请求对象,该对象可以设置获取多个主题的元数据信息,只不过在这里,我们只获取second主题的元数据
TopicMetadataRequest topicMetadataRequest = new TopicMetadataRequest(Collections.singletonList(topic));
//主题的元数据响应对象,该对象能够存储多个主题的元数据信息,只不过在这里,只包含second主题的元数据
TopicMetadataResponse topicMetadataResponse = getLeader.send(topicMetadataRequest);
List<TopicMetadata> topicMetadata = topicMetadataResponse.topicsMetadata();
for (TopicMetadata topicMetadatum : topicMetadata) {
//topicMetadatum 这里面存储的是某个主题的元数据信息,该主题下可能有很多分区
List<PartitionMetadata> partitionMetadata = topicMetadatum.partitionsMetadata();
for (PartitionMetadata partitionMetadatum : partitionMetadata) {
//partitionMetadata 这里存储了某个分区的元数据的信息
if(partitionMetadatum.partitionId() == partition){
partMetadata = partitionMetadatum;
break loop;
}
}
}
} catch (Exception e){
System.out.println("error get leader broker,reason:"+e.getMessage());
} finally {
if(getLeader != null){
getLeader.close();
}
}
}
if(partMetadata != null){
List<BrokerEndPoint> replicas = partMetadata.replicas();
for (BrokerEndPoint replica : replicas) {
replicasList.add(replica.host());
}
}
return partMetadata;
}
}