1.环境准备

1)启动zk和Kafka集群,在Kafka集群中打开一个消费者
[bigdata@hadoop003 kafka]$ bin/kafka-console-consumer.sh
–zookeeper hadoop003:2181 --topic second
2)导入pom依赖

<dependencies>
    <!-- https://mvnrepository.com/artifact/org.apache.kafka/kafka-clients -->
    <dependency>
        <groupId>org.apache.kafka</groupId>
        <artifactId>kafka-clients</artifactId>
        <version>0.11.0.0</version>
    </dependency>
</dependencies>

2.Kafka生产者Java API

kafkaProducer发送数据流程

Kafka 的 Producer 发送消息采用的是 异步发送的方式。在消息发送的过程中,涉及到了两个线程 ——main 线程Sender线程,以及 一个线程共享变量 ——RecordAccumulator。main 线程将消息发送给 RecordAccumulator,Sender 线程不断从 RecordAccumulator 中拉取

消息发送到 Kafka broker。

异步和ack并不冲突,生产者一直发送数据,不等应答,如果某条数据迟迟没有应答,生产者会再发一次

kafka使用jaas kafka使用教程java_kafka使用jaas

创建生产者

package com.demo;

import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;

import java.util.Properties;


/**
 * @author ljh
 * @create 2021-02-26 14:15
 * 创建生产者对象
 * 传入配置文件对象
 * 发送数据,数据封装在ProducerRecord对象
 * 关闭
 */
public class MyProducer {
    public static void main(String[] args){
        Properties props = new Properties();
        // 连接Kafka集群broker-list
        props.put("bootstrap.servers", "hadoop003:9092");
        // 应答机制,0,1,all=-1
        props.put("acks", "all");
        props.put(ProducerConfig.ACKS_CONFIG,"all");//等同
        // 重试次数
        props.put("retries", 0);
        //RecordAccumulator 缓冲区大小
        props.put("buffer.memory", 33554432);
        // key序列化
        props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        // value序列化
        props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");

        KafkaProducer producer = new KafkaProducer<String,String>(props);
        for (int i=0;i<10;i++){
            //发送数据
            //("second",0,key,"");指定分区
            //("second",key,"");指定key,根据key分区
            //("second","");不指定,随机分区,轮询
            producer.send(new ProducerRecord("second","hello-"+i));
        }
        //因为生产者按批 发送数据,触发批次大小或者时间才发送,
        // props.put("batch.size", 16384);props.put("linger.ms", 1);
        // 如果二者都没达到条件,就需要close把缓冲区的数据取出来,所以close至关重要
        producer.close();
    }
}

创建生产者带回调函数

package com.demo;

import org.apache.kafka.clients.producer.*;

import java.util.Properties;


/**
 * @author ljh
 * @create 2021-02-26 14:15
 * 创建生产者对象
 * 传入配置文件对象
 * 发送数据
 * 关闭
 */
public class CallBackProducer {
    public static void main(String[] args){
        Properties props = new Properties();
        // 连接Kafka集群broker-list
        props.put("bootstrap.servers", "hadoop003:9092");
        // 应答机制,0,1,all=-1
        props.put("acks", "all");

        // 重试次数
        props.put("retries", 0);
        //RecordAccumulator 缓冲区大小
        props.put("buffer.memory", 33554432);
        // key序列化
        props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        // value序列化
        props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");

        KafkaProducer producer = new KafkaProducer<String,String>(props);
        for (int i=0;i<10;i++){
            //发送数据
            //("second",0,key,"");指定分区
            //("second",key,"");指定key,根据key分区
            //("second","");不指定,随机分区,轮询
            producer.send(new ProducerRecord("second", "hello-" + i), new Callback() {
                public void onCompletion(RecordMetadata recordMetadata, Exception e) {
                    if(e==null){
                        System.out.println(recordMetadata.topic()+"-"+recordMetadata.partition()+"-"+recordMetadata.offset());
                    }
                }
            });
        }
     
        producer.close();
    }
}

自定义分区生产者

1)需求:将所有数据存储到topic的第0号分区上
2)自定义分区

package com.demo;

import org.apache.kafka.clients.producer.Partitioner;
import org.apache.kafka.common.Cluster;

import java.util.Map;

/**
 * @author ljh
 * @create 2021-02-26 18:16
 */
public class CustomPartitioner implements Partitioner {

    public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
        //分区,今后可根据业务需要做更复杂的分区,如按照value分区,参考DefaultPartitioner
        return 2;//数据都进分区2
    }

    public void close() {

    }

    public void configure(Map<String, ?> map) {

    }
}

3)代码调用

package com.demo;

import org.apache.kafka.clients.producer.*;

import java.util.Properties;

/**
 * @author ljh
 * @create 2021-02-26 18:17
 */
public class PartitionerProducer {
    public static void main(String[] args) {

        Properties props = new Properties();
        // Kafka服务端的主机名和端口号
        props.put("bootstrap.servers", "hadoop003:9092");
        // 等待所有副本节点的应答
        props.put("acks", "all");
        // 消息发送最大尝试次数
        props.put("retries", 0);
        // 发送缓存区内存大小
        props.put("buffer.memory", 33554432);
        // key序列化
        props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        // value序列化
        props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        // 自定义分区
        props.put("partitioner.class", "com.demo.CustomPartitioner");

        Producer<String, String> producer = new KafkaProducer(props);
        for (int i=0;i<10;i++){
            producer.send(new ProducerRecord<String, String>("second", "1", "bigdata" + i), new Callback() {
                public void onCompletion(RecordMetadata recordMetadata, Exception e) {
                    if(e==null){
                        //观察数据的分区情况
                        System.out.println(recordMetadata.partition()+"-"+recordMetadata.offset());
                    }

                }
            });
        }


        producer.close();
    }
}

3.Kafka消费者Java API

高级API

1)在控制台创建发送者

[root@hadoop003 kafka]$ bin/kafka-console-producer.sh 
 –broker-list hadoop003:9092 --topic second
 >hello world


2)官方提供案例(自动维护消费情况)

package com.bigdata.kafka.consumer.high;

import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;

import java.util.Arrays;
import java.util.Properties;

public class CustomNewConsumer {

    public static void main(String[] args) {

        Properties props = new Properties();
        // 定义kakfa 服务的地址,不需要将所有broker指定上
        props.put("bootstrap.servers", "hadoop003:9092");
        // 制定consumer group
        props.put("group.id", "test");
        // 是否自动确认offset
        props.put("enable.auto.commit", "true");
        // 自动确认offset的时间间隔
        props.put("auto.commit.interval.ms", "1000");
        // key的序列化类
        props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        // value的序列化类
        props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        // 定义consumer
        KafkaConsumer<String, String> consumer = new KafkaConsumer(props);

        // 消费者订阅的topic, 可同时订阅多个
        consumer.subscribe(Arrays.asList("first", "second","third"));

        while (true) {
            // 读取数据,读取超时时间为100ms
            ConsumerRecords<String, String> records = consumer.poll(100);
            for (ConsumerRecord<String, String> record : records){
                System.out.println("主题:"+record.topic()+",分区:"+record.partition()+",offset:"+record.offset()+",value:"+record.value());
            }
        }
    }
}

2

package com.demo.customer;

import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;

import java.util.Arrays;
import java.util.Properties;

/**
 * @author ljh
 * @create 2021-02-27 12:47
 */
public class MyConsumer {
    public static void main(String[] args){
        Properties properties = new Properties();
        properties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"hadoop102:9092");
        //开启自动提交
        properties.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,"true");
        //properties.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,"false");//关闭自动提交
        //两种情况生效,消费者组第一次消费数据(可以通过更改组名继续生效),或者7天到了原始数据删除,原来的offect已经不在了
        //latest(默认),earliest
        properties.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");//等于--from-beginning,
        //自动提交的时间间隔
        properties.setProperty(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG,"1000");
        //kv反序列化
        properties.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,"org.apache.kafka.common.serialization.StringDeserializer");
        properties.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,"org.apache.kafka.common.serialization.StringDeserializer");
        //指定消费者组
        properties.setProperty(ConsumerConfig.GROUP_ID_CONFIG,"big");
        //创建消费者
        KafkaConsumer<String, String> consumer = new KafkaConsumer<String, String>(properties);
        //订阅主题,不存在的主题不会创建,但订阅也不会报错。生产者发送的主题不存在会自动创建
        consumer.subscribe(Arrays.asList("second","a"));
        while (true){
            //重复拉取数据,没有数据到达的等待时间,时间结束,再去拉取
            ConsumerRecords<String, String> consumerRecords = consumer.poll(100);
            for(ConsumerRecord<String, String> records:consumerRecords){
                System.out.println(records.key()+"--"+records.topic()+"--"+records.partition()+"--"+records.value());
            }
        }
    }
}

低级API

实现使用低级API读取指定topic,指定partition,指定offset的数据。
1)方法描述:
getLeaderBroker() 客户端向种子节点发送主题元数据,将副本集加入备用节点
getLastOffset() 消费者客户端发送偏移量请求,获取分区最近的偏移量
read() 消费者低级AP I拉取消息的主要方法
getNewLeader() 当分区的主副本节点发生故障,客户将要找出新的主副本
2)代码

简单版:

package com.bigdata.kafka.consumer.low;

import kafka.api.FetchRequest;
import kafka.api.FetchRequestBuilder;
import kafka.cluster.BrokerEndPoint;
import kafka.javaapi.*;
import kafka.javaapi.consumer.SimpleConsumer;
import kafka.javaapi.message.ByteBufferMessageSet;
import kafka.message.MessageAndOffset;

import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

public class EasyLowConsumer {

    public static void main(String[] args) {
        // brokers
        List<String> brokers = new ArrayList<>();
        brokers.add("hadoop003");
        brokers.add("hadoop004");
        brokers.add("hadoop005");
        // 端口号
        int port = 9092;
        //主题
        String  topic = "second";
        //分区
        int partition = 0;
        // 偏移量
        long offset = 4;
        EasyLowConsumer consumer = new EasyLowConsumer();
        consumer.getData(brokers,port,topic,partition,offset);

    }

    public void getData(List<String> brokers,int port,String topic,int partition,long offset){

        // 1,找到该分区的leader在哪个broker上
        BrokerEndPoint leaderBroker = getLeaderBroker(brokers, port, topic, partition);
        if(leaderBroker == null){
            return;
        }
        // 2,跟该leader 通信,获取数据
        String host = leaderBroker.host();
        SimpleConsumer consumer = new SimpleConsumer(host, port, 2000, 4 * 1024, "getData");
        //构建fetch请求体,获取指定topic,partition,offset的数据
        FetchRequest build = new FetchRequestBuilder().addFetch(topic, partition, offset, 10000).build();
        FetchResponse fetch = consumer.fetch(build);
        ByteBufferMessageSet messageAndOffsets = fetch.messageSet(topic, partition);
        for (MessageAndOffset messageAndOffset : messageAndOffsets) {
            long offset1 = messageAndOffset.offset();
            ByteBuffer payload = messageAndOffset.message().payload();
            byte [] b = new byte[payload.limit()];
            payload.get(b);
            System.out.println("topic:"+topic+",partition:"+partition+",offset:"+offset1+",value:"+new String(b));
            // break;
        }
        //关闭资源
        consumer.close();


    }
    public BrokerEndPoint getLeaderBroker(List<String> brokers, int port, String topic, int partition){

        for (String broker : brokers) {
            SimpleConsumer getLeader = new SimpleConsumer(broker, port, 2000, 4 * 1024, "getLeader");
            //创建一个主题元数据请求对象,该对象可以设置获取多个主题的元数据信息,只不过在这里,我们只获取second主题的元数据
            TopicMetadataRequest topicMetadataRequest = new TopicMetadataRequest(Collections.singletonList(topic));
            //主题的元数据响应对象,该对象能够存储多个主题的元数据信息,只不过在这里,只包含second主题的元数据
            TopicMetadataResponse topicMetadataResponse = getLeader.send(topicMetadataRequest);
            List<TopicMetadata> topicMetadata = topicMetadataResponse.topicsMetadata();
            for (TopicMetadata topicMetadatum : topicMetadata) {
                //topicMetadatum 这里面存储的是某个主题的元数据信息,该主题下可能有很多分区
                List<PartitionMetadata> partitionMetadata = topicMetadatum.partitionsMetadata();
                for (PartitionMetadata partitionMetadatum : partitionMetadata) {
                    //partitionMetadata 这里存储了某个分区的元数据的信息
                    if(partitionMetadatum.partitionId() == partition){
                        return partitionMetadatum.leader();
                    }
                }
            }
        }

        return null;
    }


}

复杂版:

package com.bigdata.kafka.consumer.low;

import kafka.api.FetchRequest;
import kafka.api.FetchRequestBuilder;
import kafka.api.OffsetRequest;
import kafka.api.PartitionOffsetRequestInfo;
import kafka.cluster.BrokerEndPoint;
import kafka.common.ErrorMapping;
import kafka.common.TopicAndPartition;
import kafka.javaapi.*;
import kafka.javaapi.consumer.SimpleConsumer;
import kafka.javaapi.message.ByteBufferMessageSet;
import kafka.message.MessageAndOffset;

import java.nio.ByteBuffer;
import java.util.*;

public class ComplexLowConsumer {

    //第一次找到leader之后,用于存放相应分区的replicas,目的是为了如果该leader挂掉之后,再次寻找新leader的时候
    // 只需要从这个集合寻找就可以了,因为新的leader只可能会出现在replicas当中,这样能够缩小查找新leader的范围。
    private List<String> replicasList ;

    public ComplexLowConsumer (){
        replicasList = new ArrayList<>();
    }

    public static void main(String[] args) throws Exception {
        // brokers
        List<String> brokers = new ArrayList<>();
        brokers.add("hadoop003");
        brokers.add("hadoop004");
        brokers.add("hadoop005");
        // 端口号
        int port = 9092;
        //主题
        String  topic = "second";
        //分区
        int partition = 0;
        // 读取的条数
        int maxReads = 4;
        ComplexLowConsumer consumer = new ComplexLowConsumer();
        consumer.read(brokers,port,topic,partition,maxReads);
    }

    public void read(List<String> brokers,int port,String topic,int partition,int maxReads) throws Exception {

        // 1 获取指定主题,分区的leader broker
        PartitionMetadata leaderBroker = getLeaderBroker(brokers, port, topic, partition);
        if(leaderBroker == null){
            System.out.println("error find leader broker from topic:"+topic+",and for partition:"+partition);
            return;
        }
        if(leaderBroker.leader() == null){
            System.out.println("error find leader broker from topic:"+topic+",and for partition:"+partition);
            return;
        }
        String host = leaderBroker.leader().host();
        String clientName = "client_name_"+topic+"_"+partition;
        // 2 循环打印指定条数的数据
        SimpleConsumer consumer = new SimpleConsumer(host, port, 2000, 4 * 1024, clientName);
        long readOffset = getLastOffset(consumer, topic, partition, OffsetRequest.EarliestTime(), clientName);
        int numErrors = 0;
        int numRead = 0;
        while(numRead < maxReads){
            if(consumer == null){
                consumer = new SimpleConsumer(host, port, 2000, 4 * 1024, clientName);
            }
            FetchRequest fetchRequest = new FetchRequestBuilder().addFetch(topic, partition, readOffset, 10000).build();
            FetchResponse fetchResponse = consumer.fetch(fetchRequest);
            if(fetchResponse.hasError()){
                numErrors++;
                short errorCode = fetchResponse.errorCode(topic, partition);
                System.out.println("error get data,reason:"+errorCode);
                if(numErrors > 5){
                    break;
                }
                if(errorCode == ErrorMapping.OffsetOutOfRangeCode()){
                    readOffset = getLastOffset(consumer, topic, partition, OffsetRequest.LatestTime(), clientName);
                    continue;
                }
                // 走到此处,证明现在所用的leader broker失效了,所以,要从剩余的副本中重新找出一个leader
                consumer.close();
                consumer = null;
                host = getNewLeader(host,port,topic,partition);
                continue;
            }

            ByteBufferMessageSet messageAndOffsets = fetchResponse.messageSet(topic, partition);
            for (MessageAndOffset messageAndOffset : messageAndOffsets) {
                long currentOffset = messageAndOffset.offset();
                ByteBuffer payload = messageAndOffset.message().payload();
                byte [] b = new byte[payload.limit()];
                payload.get(b);
                System.out.println("offset:"+currentOffset+",value:"+new String(b));
                numRead ++;
                if(numRead == maxReads){
                    break;
                }
                readOffset = messageAndOffset.nextOffset();
            }

        }
        consumer.close();
    }

    // 从分区的副本中找出新的leader
    public String getNewLeader(String oldLeader,int port,String topic,int partition) throws Exception {

        for (int i = 0;i < 3;i ++){
            PartitionMetadata leaderBroker = getLeaderBroker(replicasList, port, topic, partition);
            boolean gotoSleep = false;
            if(leaderBroker == null){
                gotoSleep = true;
            } else if(leaderBroker.leader() == null){
                gotoSleep = true;
            } else if(leaderBroker.leader().host().equalsIgnoreCase(oldLeader) && i==0){
                gotoSleep = true;
            }else {
                return leaderBroker.leader().host();
            }
            // 如果拿不到leader broker 则sleep
            if(gotoSleep){
                Thread.sleep(10000);
            }
        }

        System.out.println("error get new leader for topic:"+topic+",and for partition:"+partition);
        throw new Exception("error get new leader for topic:"+topic+",and for partition:"+partition);
    }

    public long getLastOffset(SimpleConsumer consumer,String topic,int partition,long whichTime,String clientName){
        Map<TopicAndPartition,PartitionOffsetRequestInfo> requestInfo = new HashMap<>();
        TopicAndPartition topicAndPartition = new TopicAndPartition(topic, partition);
        PartitionOffsetRequestInfo partitionMetadata = new PartitionOffsetRequestInfo(whichTime,1);
        requestInfo.put(topicAndPartition,partitionMetadata);

        kafka.javaapi.OffsetRequest offsetRequest = new kafka.javaapi.OffsetRequest(requestInfo, OffsetRequest.CurrentVersion(), clientName);
        OffsetResponse offsetsBefore = consumer.getOffsetsBefore(offsetRequest);
        if(offsetsBefore.hasError()){
            System.out.println("error get last offset");
            return 0;
        }
        long[] offsets = offsetsBefore.offsets(topic, partition);
        return offsets[0];
    }

    public PartitionMetadata getLeaderBroker(List<String> brokers, int port, String topic, int partition){
        PartitionMetadata partMetadata = null;
        loop:
        for (String broker : brokers) {
            SimpleConsumer getLeader = null;
            try {
                getLeader = new SimpleConsumer(broker, port, 2000, 4 * 1024, "getLeader");
                //创建一个主题元数据请求对象,该对象可以设置获取多个主题的元数据信息,只不过在这里,我们只获取second主题的元数据
                TopicMetadataRequest topicMetadataRequest = new TopicMetadataRequest(Collections.singletonList(topic));
                //主题的元数据响应对象,该对象能够存储多个主题的元数据信息,只不过在这里,只包含second主题的元数据
                TopicMetadataResponse topicMetadataResponse = getLeader.send(topicMetadataRequest);
                List<TopicMetadata> topicMetadata = topicMetadataResponse.topicsMetadata();
                for (TopicMetadata topicMetadatum : topicMetadata) {
                    //topicMetadatum 这里面存储的是某个主题的元数据信息,该主题下可能有很多分区
                    List<PartitionMetadata> partitionMetadata = topicMetadatum.partitionsMetadata();
                    for (PartitionMetadata partitionMetadatum : partitionMetadata) {
                        //partitionMetadata 这里存储了某个分区的元数据的信息
                        if(partitionMetadatum.partitionId() == partition){
                            partMetadata = partitionMetadatum;
                            break loop;
                        }
                    }
                }
            } catch (Exception e){
                System.out.println("error get leader broker,reason:"+e.getMessage());
            } finally {
                if(getLeader != null){
                  getLeader.close();
                }
            }
        }

        if(partMetadata != null){
            List<BrokerEndPoint> replicas = partMetadata.replicas();
            for (BrokerEndPoint replica : replicas) {
                replicasList.add(replica.host());
            }
        }
        return partMetadata;
    }

}