java接口参数记录kafka写入消息

转载

angel 2024-09-14 10:44:08

文章标签 java接口参数记录kafka写入消息 kafka java redis 大数据 文章分类 Java 后端开发

1.KafkaJavaApi操作

1.添加maven依赖

<dependencies>
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka-clients</artifactId>
            <version>0.10.0.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka-streams</artifactId>
            <version>0.10.0.0</version>
        </dependency>

    </dependencies>

    <build>
        <plugins>
            <!-- java编译插件 -->
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.2</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                    <encoding>UTF-8</encoding>
                </configuration>
            </plugin>
        </plugins>
    </build>

2.生产者代码

kafkaproducerAPI文档

import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;

import java.util.Properties;

public class MyProducer {
    /**
     * 实现生产数据到kafka test这个topic里面去
     * @param args
     */

    public static void main(String[] args) throws InterruptedException {
        Properties props = new Properties();
        props.put("bootstrap.servers", "node01:9092");
        props.put("acks", "all"); //消息确认机制
        props.put("retries", 0); //消息发送失败后重试次数
        props.put("batch.size", 16384); //处理一批数据大小
        props.put("linger.ms", 1); //消息每天都进行确认
        props.put("buffer.memory", 33554432); //缓冲区的大小
        //指定k和v序列化类StringSerializer
        props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");

        //获取kafkaProduce这个类
        Producer<String,String> kafkaProducer = new KafkaProducer<>(props);

        //使用循环发送消失
        for (int i = 0; i < 100; i++) {
            Thread.sleep(1200);
            kafkaProducer.send(new ProducerRecord<String, String>("test","mymessage"+i));//向test这个topic发送messagei这这个信息
        }
        //关闭资源
        kafkaProducer.close();
    }
}

3.1生产者分区策略

如果指定分区号,那么数据直接产生到对应的分区里面去
如果没有指定分区号,通过数据的key取其hashCode来计算数据落到那个分区
如果没有分区号,数据也不存在key,那么使用round-robin轮询来实现

package it.yuge;

import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;

import java.util.Properties;

public class PartitionProducer {
    /**
     * kafka生成数据
     * @param args
     */
    public static void main(String[] args) throws InterruptedException {
        Properties props = new Properties();
        props.put("bootstrap.servers", "node01:9092");
        props.put("acks", "all"); //消息确认机制
        props.put("retries", 0); //消息发送失败后重试次数
        props.put("batch.size", 16384); //处理一批数据大小
        props.put("linger.ms", 1); //消息每天都进行确认
        props.put("buffer.memory", 33554432); //缓冲区的大小
        //指定k和v序列化类StringSerializer
        props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        
        //匹配自定义分区类
        props.put("partitioner.class","it.yuge.MyPartition")
        
        //获取kafkaProduce这个类
        Producer<String,String> kafkaProducer = new KafkaProducer<>(props);

        //使用循环发送消失
        for (int i = 0; i < 100; i++) {
            
            //第一种分区策略:即没有指定分区号,又没有指定数据的key,那么使用轮询的方式将数据均匀的发送到不同的分区里面去
            ProducerRecord<String, String> producerRecord1 = new ProducerRecord<>("mypartition", "message" + i);
            //第二种分区策略:没有指定分区号,指定了数据的key,通过key.hashCode % numPartition来计算数据会落到那个分区
            ProducerRecord<String, String> producerRecord2 = new ProducerRecord<>("mypartition", "mykey", "mymessage" + i);
            //第三种分区策略:如果指定了分区号,那么就会将数据直接写入到对应的分区里面去
            ProducerRecord<String, String> producerRecord3 = new ProducerRecord<>("mypartition", 0, "mykey", "mymessage" + i);
            
            //自定义分区
            ProducerRecord<String, String> producerRecord4 = new ProducerRecord<>("mypartition", 0, "mykey", "mymessage" + i);

            kafkaProducer.send(producerRecord1);//向test这个topic发送messagei这这个信息
        }
        //关闭资源
        kafkaProducer.close();
    }
}

自定义分区类

package it.yuge;

import org.apache.kafka.clients.producer.Partitioner;
import org.apache.kafka.common.Cluster;

import java.util.Map;

public class MyPartition implements Partitioner {
    //这个方法就是确定分区数据到哪一个分区里面去
    //直接return 2 表示将数据写入到2号分区里面去
    @Override
    public int partition(String s, Object o, byte[] bytes, Object o1, byte[] bytes1, Cluster cluster) {
        return 0;
    }

    @Override
    public void close() {

    }

    @Override
    public void configure(Map<String, ?> map) {

    }
}

3.消费者代码

cunsumerAPI文档

offsit:记录了消息消费到了那一条,下一次来的时候,我们继续从上一次的记录接着消费
自动提交
手动提交

(1)自动提交offset

import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;

import java.util.Arrays;
import java.util.Properties;

public class MyConsumer {
    /**
     * 自动提交offset
     * @param args
     */
    public static void main(String[] args) {
        Properties props = new Properties();
        props.put("bootstrap.servers", "node01:9092");
        props.put("group.id", "test_group"); //消费组
        props.put("enable.auto.commit", "true"); //允许自动提交
        props.put("auto.commit.interval.ms", "1000"); //自动提交的间隔时间
        props.put("session.timeout.ms", "30000"); //超时时间
        //指定k和v的反序列化类StringDeserializer
        props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        KafkaConsumer<String, String> consumer = new KafkaConsumer<>(props);
        //指定消费那个topic里面的数据
        consumer.subscribe(Arrays.asList("test"));
        //使用死循环来消费test这个topic里面的数据
        while (true) {
            //records是所有拉取到的数据
            ConsumerRecords<String, String> records = consumer.poll(1000); //1000毫秒没拉到数据就认为超时
            for (ConsumerRecord<String, String> record : records) {
                long offset = record.offset();
                String value = record.value();
                System.out.println("消息的offset值为:"+offset+"消息的内容是:"+value);
            }
        }
    }
}

(2)手动提交offset

import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;

public class manualConsumer {
    /**
     * 实现手动提交offset
     * @param args
     */
    public static void main(String[] args) {
        Properties props = new Properties();
        props.put("bootstrap.servers", "node01:9092");
        props.put("group.id", "test_group");
        props.put("enable.auto.commit", "false"); //禁用自动提交offset,后期我们手动提交offset
        props.put("auto.commit.interval.ms", "1000");
        props.put("session.timeout.ms", "30000");
        props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        KafkaConsumer<String, String> consumer = new KafkaConsumer<>(props);
        
        //消费者订阅test这个topic
        consumer.subscribe(Arrays.asList("test"));
        
        final int minBatchSize = 100;//达到100条进行批次处理,处理完成后提交offset
        //定义一个集合,用于存储我们的ConsumerRecord(拉取的数据对象)
        List<ConsumerRecord<String, String>> consumerRecordList = new ArrayList<>();
        while (true) {
            ConsumerRecords<String, String> consumerRecords1 = consumer.poll(1000);
            for (ConsumerRecord<String, String> consumerRecord : consumerRecords1) {
                consumerRecordList.add(consumerRecord); //拉取的一批批数据往集合中存储
                if (consumerRecordList.size() >= minBatchSize) {
                    //如果集合当中的数据大于等于200条,我们批量进行一个处理
                    //将这一批次的数据保存到数据库里面
                    //insertTODb(consumerRecordList);//jdbc-伪代码

                    //提交offset,表示这一批次的数据全部都处理完了
                    //consumer.commitAsync(); //异步提交offset值,异步提交效率更高,不会阻塞代码的执行.

                    //同步提交offset值,同步是一个进入提交就上锁,其他等待,以保障线程安全,但是判断锁,释放锁线程效率低下
                    consumer.commitSync();
                    System.out.println("提交完成");
                    //清空集合数据
                    consumerRecordList.clear();
                }
            }
        }
    }
}

(3)处理完每个分区里面的数据之后,然后就进行一次提交(相比上面两种方式数据更安全)

package it.yuge;

import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.clients.consumer.OffsetAndMetadata;
import org.apache.kafka.common.TopicPartition;

import java.util.*;

public class ConmsumerPartition {
    /**
     * 处理完每一个分区里面数据,就马上提交这个分区里面的数据
     * @param args
     */
    public static void main(String[] args) {
        Properties props = new Properties();
        props.put("bootstrap.servers", "node01:9092");
        props.put("group.id", "test_group");
        props.put("enable.auto.commit", "false"); //禁用自动提交offset,后期我们手动提交offset
        props.put("auto.commit.interval.ms", "1000");
        props.put("session.timeout.ms", "30000");
        props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");

        KafkaConsumer<String, String> kafkaConsumer = new KafkaConsumer<>(props);

        kafkaConsumer.subscribe(Arrays.asList("mypartition"));
        while (true){
            //通过while true消费数据
            ConsumerRecords<String, String> consumerRecords = kafkaConsumer.poll(1000);
            //获取mypartition这个topic里面所有的分区
            Set<TopicPartition> partitions = consumerRecords.partitions();

            //循环遍历每一个分区里面数据,然后将每一个分区里面的数据进行处理,处理完成后再进行提交
            for (TopicPartition partition : partitions) {
                //获取每一个分区里面的数据
                List<ConsumerRecord<String, String>> records = consumerRecords.records(partition);
                for (ConsumerRecord<String, String> record : records) {
                    System.out.println(record.value()+"==="+record.offset());
                }
                //获取我们的分区里面最后一条数据的offset,表示我们已经消费到了这个offset了
                long offset = records.get(records.size() - 1).offset();

                //提交offset,使用Collection创建一个线程安全的map集合
                //提交我们offset,并且给offset值加1,表示我们从下沉没有消费的那一条数据开始消费
                kafkaConsumer.commitSync(Collections.singletonMap(partition,new OffsetAndMetadata(offset+1)));
            }
        }
    }
}

(4)指定消费topic当中某些分区的数据

package it.yuge;

import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.TopicPartition;

import java.util.Arrays;
import java.util.Properties;

public class ConsumerSomePartition {
    //实现消费一个topic里面某些分区的数据
    public static void main(String[] args) {
        Properties props = new Properties();
        props.put("bootstrap.servers", "node01:9092");
        props.put("group.id", "test_group");
        props.put("enable.auto.commit", "true"); //禁用自动提交offset,后期我们手动提交offset
        props.put("auto.commit.interval.ms", "1000");
        props.put("session.timeout.ms", "30000");
        props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");

        //获取kafkaConsumer
        KafkaConsumer<String, String> kafkaConsumer = new KafkaConsumer<String, String>(props);
        
        //通过consumer订阅某一个topic,进行消费,会消费topic里面所有的分区的数据
        //kafkaConsumer.subscribe();
        
        //通过调用assian发法实现消费mypartition这个topic里面0号和1号分区里面的数据
        TopicPartition topicPartition1 = new TopicPartition("mypartition", 0);
        TopicPartition topicPartition2 = new TopicPartition("mypartition", 1);
        kafkaConsumer.assign(Arrays.asList(topicPartition1,topicPartition2));
        
        while (true){
            ConsumerRecords<String, String> records = kafkaConsumer.poll(1000);
            //得到一条条的数据redcord
            for (ConsumerRecord<String, String> record : records) {
                System.out.println("数据值为"+record.value()+"偏移量为:"+record.offset());
            }
        }
    }
}

5.kafka Streams API开发

使用场景:

解决这样的需求:使用StreamAPI获取test这个topic当中的数据，然后将数据全部转为大写，写入到test2这个topic当中去

java接口参数记录kafka写入消息_java

(1)创建一个topic

cd /export/servers/kafka_2.11-0.10.0.0/
bin/kafka-topics.sh --create  --partitions 3 --replication-factor 2 --topic test2 --zookeeper node01:2181,node02:2181,node03:2181

–create表示创建
–partition 3 表示有三个分区
–replication-factor 2 表示有两个副本
–topic test2 表示topic名字叫test2
–zookeeper 指定我们zookeeper的连接地址

(2)开发StreamAPI

public class StreamAPI {
	//通过StreamAPI实现将数据从test里面读取出来,写入到test2里面去
    public static void main(String[] args) {
    	//封装配置信息的方法
        Properties props = new Properties();
        //put一些参数
        props.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-application");//应用id名称
        props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "node01:9092");//指定kafka连接地址
        //数据序列化反序列化
        props.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
        props.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());

		//获取核心类KStreamBuilder
        KStreamBuilder builder = new KStreamBuilder();
        //通过KStreamBuilder调用stream方法,表示从那个topic当中获取数据
        //调用maoValues方法,表示将每一行value都给取出来,做map映射
        //.to("test2")将转成大写的数据写到test2这个topic当中去
        builder.stream("test").mapValues(line -> line.toString().toUpperCase()).to("test2");
        //通过KStreamBuilder和Properties(所有配置文件),来创建KafkaStreams,通过KafkaStreams来实现流式编程的启动
        KafkaStreams streams = new KafkaStreams(builder, props);
        //调用start启动kafka的流API
        streams.start();
    }
}

(3)生产数据

//node01执行以下命令，向test这个topic当中生产数据
cd /export/servers/kafka_2.11-0.10.0.0
bin/kafka-console-producer.sh --broker-list node01:9092,node02:9092,node03:9092 --topic test

(4)消费数据

//node02执行一下命令消费test2这个topic当中的数据
cd /export/servers/kafka_2.11-0.10.0.0
bin/kafka-console-consumer.sh --from-beginning  --topic test2 --zookeeper node01:2181,node02:2181,node03:2181

本文章为转载内容，我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题，欢迎原作者联系我们进行内容更正或删除文章。

上一篇：BIOS设置串口类型

下一篇：mysql 启动配置group by 查询所有字段

提问和评论都可以，用心的回复会被更多人看到评论

发布评论

相关文章

官方博客	全部文章	热门标签	班级博客
了解我们	网站地图	意见反馈

鸿蒙开发者社区	51CTO学堂
51CTO	软考资讯

java接口参数记录kafka写入消息

java接口参数记录kafka写入消息

1.KafkaJavaApi操作

1.添加maven依赖

2.生产者代码

3.1生产者分区策略

3.消费者代码

(1)自动提交offset

(2)手动提交offset

(3)处理完每个分区里面的数据之后,然后就进行一次提交(相比上面两种方式数据更安全)

(4)指定消费topic当中某些分区的数据

5.kafka Streams API开发

(1)创建一个topic

(2)开发StreamAPI

(3)生产数据

(4)消费数据

51CTO博客