提纲
1、c++访问kafka的函数库librdkafka
2、安装
3、封装KafkaConsumer和KafkaProducer
4、问题解决
1、c++访问kafka的函数库librdkafka
librdkafka的项目地址是https://github.com/edenhill/librdkafka。
librdkafka是一个用C实现的kafka协议,提供了生产者、消费者和管理客户端。它在设计消息发送的时候,重点考虑了可靠性和高性能。
目前,librdkafka可以达到每秒生产一百万个消息,并且每秒可以消费三百万个消息,无论是生产,还是消费,它的性能都非常彪悍。
2、安装
在Debian和Ubuntu上,用一下命令安装:
$ apt install librdkafka-dev
在RedHat、Centos、Fedora上,用下面的命令安装:
$ yum install librdkafka-devel
安装后
头文件在:
/usr/include/librdkafka
root@DF-01:/usr/include/librdkafka# pwd
/usr/include/librdkafka
root@DF-01:/usr/include/librdkafka# ll
total 224
drwxr-xr-x 2 root root 4096 Mar 8 18:17 ./
drwxr-xr-x 49 root root 20480 Mar 7 18:58 ../
-rw-r--r-- 1 root root 70853 Feb 6 2018 rdkafkacpp.h
-rw-r--r-- 1 root root 125600 Feb 6 2018 rdkafka.h
root@DF-01:/usr/include/librdkafka#
库文件在:
/usr/lib/x86_64-linux-gnu
root@DF-01:/usr/lib/x86_64-linux-gnu# ll librdkafka*
-rw-r--r-- 1 root root 1522178 Feb 6 2018 librdkafka.a
-rw-r--r-- 1 root root 347738 Feb 6 2018 librdkafka++.a
lrwxrwxrwx 1 root root 15 Feb 6 2018 librdkafka.so -> librdkafka.so.1
lrwxrwxrwx 1 root root 17 Feb 6 2018 librdkafka++.so -> librdkafka++.so.1
-rw-r--r-- 1 root root 821952 Feb 6 2018 librdkafka.so.1
-rw-r--r-- 1 root root 116744 Feb 6 2018 librdkafka++.so.1
root@DF-01:/usr/lib/x86_64-linux-gnu#
3、封装KafkaConsumer和KafkaProducer
原生的librdkafka的函数直接用在项目中太复杂,所以,做了一层封装,封装出了简单的接口。
点击查看KafkaClient的目录结构
root@DF-01:/home/dfcv_dev/fastdds/soa_v2c/src/Util/KafkaClient# tree .
.
├── CMakeLists.txt
├── KafkaConsumer.cxx
├── KafkaConsumer.h
├── KafkaConsumerMain.cxx
├── KafkaProducer.cxx
├── KafkaProducer.h
└── KafkaProducerMain.cxx
0 directories, 7 files
root@DF-01:/home/dfcv_dev/fastdds/soa_v2c/src/Util/KafkaClient#
封装的代码用CMake去管理,KafkaConsumer.cxx中封装出了一个简单的Consumer类,KafkaProducer.cxx中封装出了一个简单的Producer类,而KafkaConsumerMain和KafkaProducerMain这两个文件则是使用封装的Consumer和Producer的示例代码。
封装后各个文件代码如下所示,下面依次贴出CMakeLists.txt、KafkaConsumer.h、KafkaConsumer.cxx、KafkaConsumerMain.cxx、KafkaProducer.h、KafkaProducer.cxx、KafkaProducerMain.cxx的代码。
点击查看CMakeLists.txt代码
# CMakeLists.txt
cmake_minimum_required(VERSION 3.16.3)
project("KafkaClient")
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_EXTENSIONS OFF)
include_directories(
/usr/include
)
link_directories(
/usr/lib/x86_64-linux-gnu
)
message(STATUS "Configuring KafkaConsumerMain...")
add_executable(KafkaConsumerMain KafkaConsumer.cxx KafkaConsumerMain.cxx)
target_link_libraries(KafkaConsumerMain rdkafka++)
message(STATUS "Configuring KafkaProducerMain...")
add_executable(KafkaProducerMain KafkaProducer.cxx KafkaProducerMain.cxx)
target_link_libraries(KafkaProducerMain rdkafka++)
点击查看KafkaConsumer.h代码
#ifndef __KAFKACONSUMER_H_
#define __KAFKACONSUMER_H_
#include <string>
#include <iostream>
#include <vector>
#include <stdio.h>
#include "librdkafka/rdkafkacpp.h"
class KafkaConsumer
{
public:
explicit KafkaConsumer(const std::string &brokers, const std::string &groupID,
const std::vector<std::string> &topics, int partition);
~KafkaConsumer();
std::string pullMessage();
protected:
std::string m_brokers;
std::string m_groupId;
std::vector<std::string> m_topicVector; // 一个消费者可以同时订阅多个主题,所有用vector
int m_partition;
RdKafka::Conf *m_config; // GLOBAL 级别的配置(Consumer客户端级别)
RdKafka::Conf *m_topicConfig; // TOPIC 级别的配置
RdKafka::KafkaConsumer *m_consumer; // 消费者客户端实例
RdKafka::EventCb *m_event_cb; // Event事件回调
RdKafka::RebalanceCb *m_rebalance_cb; // 再均衡 回调
};
class ConsumerEventCb : public RdKafka::EventCb
{
public:
void event_cb(RdKafka::Event &event)
{
switch (event.type())
{
case RdKafka::Event::EVENT_ERROR:
std::cerr << "ERROR (" << RdKafka::err2str(event.err()) << "): " << event.str() << std::endl;
break;
case RdKafka::Event::EVENT_STATS:
std::cerr << "STATS: " << event.str() << std::endl;
break;
case RdKafka::Event::EVENT_LOG:
fprintf(stderr, "LOG-%i-%s: %sn", event.severity(), event.fac().c_str(), event.str().c_str());
break;
case RdKafka::Event::EVENT_THROTTLE:
std::cerr << "THROTTLED: " << event.throttle_time() << "ms by " << event.broker_name() << " id " << (int)event.broker_id() << std::endl;
break;
default:
std::cerr << "EVENT " << event.type() << " (" << RdKafka::err2str(event.err()) << "): " << event.str() << std::endl;
break;
}
}
};
class ConsumerRebalanceCb : public RdKafka::RebalanceCb
{
public:
void rebalance_cb(RdKafka::KafkaConsumer *consumer, RdKafka::ErrorCode err,
std::vector<RdKafka::TopicPartition *> &partitions) // Kafka服务端通过 err参数传入再均衡的具体事件(发生前、发生后),通过partitions参数传入再均衡 前/后,旧的/新的 分区信息
{
std::cerr << "RebalanceCb: " << RdKafka::err2str(err) << ": ";
printTopicPartition(partitions);
if (err == RdKafka::ERR__ASSIGN_PARTITIONS)
{ // ERR__ASSIGN_PARTITIONS: 表示“再均衡发生之后,消费者开始消费之前”,此时消费者客户端可以从broker上重新加载offset
consumer->assign(partitions); // 再均衡后,重新 assign() 订阅这些分区
partition_count = (int)partitions.size();
}
else if (err == RdKafka::ERR__REVOKE_PARTITIONS)
{ // ERR__REVOKE_PARTITIONS: 表示“消费者停止消费之后,再均衡发生之前”,此时应用程序可以在这里提交 offset
consumer->unassign(); // 再均衡前,unassign() 退订这些分区
partition_count = 0; // 退订所有分区后,清0
}
else
{
std::cerr << "Rebalancing error: " << RdKafka::err2str(err) << std::endl;
}
}
private:
static void printTopicPartition(const std::vector<RdKafka::TopicPartition *> &partitions)
{ // 打印出所有的主题、分区信息
for (unsigned int i = 0; i < partitions.size(); i++)
{
std::cerr << partitions[i]->topic() << "[" << partitions[i]->partition() << "], ";
}
std::cerr << "n";
}
private:
int partition_count; // 保存consumer消费者客户端 当前订阅的分区数
};
#endif
点击查看KafkaConsumer.cxx代码
#include "KafkaConsumer.h"
KafkaConsumer::KafkaConsumer(const std::string &brokers, const std::string &groupId,
const std::vector<std::string> &topics, int partition)
{
m_brokers = brokers;
m_groupId = groupId;
m_topicVector = topics;
m_partition = partition;
// 创建Conf实例:
m_config = RdKafka::Conf::create(RdKafka::Conf::CONF_GLOBAL);
if (m_config == nullptr)
{
std::cout << "Create Rdkafka Global Conf Failed." << std::endl;
}
m_topicConfig = RdKafka::Conf::create(RdKafka::Conf::CONF_TOPIC);
if (m_topicConfig == nullptr)
{
std::cout << "Create Rdkafka Topic Conf Failed." << std::endl;
}
// 设置Conf的各个配置参数:
RdKafka::Conf::ConfResult result;
std::string error_str;
result = m_config->set("bootstrap.servers", m_brokers, error_str);
if (result != RdKafka::Conf::CONF_OK)
{
std::cout << "Conf set 'bootstrap.servers' failed: " << error_str << std::endl;
}
result = m_config->set("group.id", m_groupId, error_str); // 设置消费组名:group.id(string类型)
if (result != RdKafka::Conf::CONF_OK)
{
std::cout << "Conf set 'group.id' failed: " << error_str << std::endl;
}
result = m_config->set("max.partition.fetch.bytes", "1024000", error_str); // 消费消息的最大大小
if (result != RdKafka::Conf::CONF_OK)
{
std::cout << "Conf set 'max.partition.fetch.bytes' failed: " << error_str << std::endl;
}
result = m_config->set("enable.partition.eof", "false", error_str); // enable.partition.eof: 当消费者到达分区结尾,发送 RD_KAFKA_RESP_ERR__PARTITION_EOF 事件,默认值 true
if (result != RdKafka::Conf::CONF_OK)
{
std::cout << "Conf set 'enable.partition.eof' failed: " << error_str << std::endl;
}
m_event_cb = new ConsumerEventCb;
result = m_config->set("event_cb", m_event_cb, error_str);
if (result != RdKafka::Conf::CONF_OK)
{
std::cout << "Conf set 'event_cb' failed: " << error_str << std::endl;
}
m_rebalance_cb = new ConsumerRebalanceCb;
result = m_config->set("rebalance_cb", m_rebalance_cb, error_str);
if (result != RdKafka::Conf::CONF_OK)
{
std::cout << "Conf set 'rebalance_cb' failed: " << error_str << std::endl;
}
// 设置topic_conf的配置项:
result = m_topicConfig->set("auto.offset.reset", "latest", error_str);
if (result != RdKafka::Conf::CONF_OK)
{
std::cout << "Topic Conf set 'auto.offset.reset' failed: " << error_str << std::endl;
}
result = m_config->set("default_topic_conf", m_topicConfig, error_str);
if (result != RdKafka::Conf::CONF_OK)
{
std::cout << "Conf set 'default_topic_conf' failed: " << error_str << std::endl;
}
// 创建消费者客户端:
m_consumer = RdKafka::KafkaConsumer::create(m_config, error_str);
if (m_consumer == nullptr)
{
std::cout << "Create KafkaConsumer failed: " << error_str << std::endl;
}
std::cout << "Create KafkaConsumer succeed, consumer name : " << m_consumer->name() << std::endl;
// 订阅m_topicVector中的topic
RdKafka::ErrorCode error_code = m_consumer->subscribe(m_topicVector);
if (error_code != RdKafka::ErrorCode::ERR_NO_ERROR)
{
std::cerr << "Consumer subscribe topics failed: " << RdKafka::err2str(error_code) << std::endl;
}
}
KafkaConsumer::~KafkaConsumer()
{
delete m_config;
delete m_topicConfig;
delete m_consumer;
delete m_event_cb;
delete m_rebalance_cb;
}
std::string KafkaConsumer::pullMessage()
{
RdKafka::Message *m_message = m_consumer->consume(5000);
if (m_message->err() == RdKafka::ErrorCode::ERR_NO_ERROR)
{
return (char *)0;
}
else
{
return static_cast<const char *>(m_message->payload());
}
}
点击查看KafkaConsumerMain.cxx代码
#include "KafkaConsumer.h"
int main()
{
std::string brokers = "127.0.0.1:9092";
std::vector<std::string> topics; // 待消费主题的集合
topics.push_back("topic-demo");
std::string group = "consumer-group-demo"; // 消费组
KafkaConsumer consumer(brokers, group, topics, RdKafka::Topic::OFFSET_BEGINNING);
std::string msgStr;
while (msgStr.c_str() != nullptr)
{
std::cout << consumer.pullMessage() << std::endl;
}
RdKafka::wait_destroyed(5000);
return 0;
}
点击查看KafkaProducer.h代码
#ifndef __KAFKAPRODUCER_H_
#define __KAFKAPRODUCER_H_
#include <string>
#include <iostream>
#include "librdkafka/rdkafkacpp.h"
class KafkaProducer
{
public:
// explicit:禁止隐式转换,例如不能通过string的构造函数转换出一个broker
explicit KafkaProducer(const std::string &brokers, const std::string &topic);
~KafkaProducer();
void pushMessage(const std::string &msg, const std::string &key);
protected:
std::string m_brokers;
std::string m_topicStr;
RdKafka::Conf *m_producerConfig; // RdKafka::Conf 配置接口类,用来设置对生产者、消费者、broker的各项配置值
RdKafka::Conf *m_topicConfig;
RdKafka::Producer *m_producer;
RdKafka::Topic *m_topic;
RdKafka::DeliveryReportCb *m_dr_cb; // RdKafka::DeliveryReportCb 用于在调用 RdKafka::Producer::produce() 后返回发送结果,RdKafka::DeliveryReportCb是一个类,需要自行填充其中的回调函数及处理返回结果的方式
RdKafka::EventCb *m_event_cb; // RdKafka::EventCb 用于从librdkafka向应用程序传递errors,statistics,logs 等信息的通用接口
RdKafka::PartitionerCb *m_partitioner_cb; // Rdkafka::PartitionerCb 用于设定自定义分区器
};
class ProducerDeliveryReportCb : public RdKafka::DeliveryReportCb
{
public:
void dr_cb(RdKafka::Message &message)
{ // 重载基类RdKafka::DeliveryReportCb中的虚函数dr_cb()
if (message.err() != 0)
{ // 发送出错
std::cerr << "Message delivery failed: " << message.errstr() << std::endl;
}
else
{ // 发送成功
std::cerr << "Message delivered to topic: " << message.topic_name()
<< " [" << message.partition()
<< "] at offset " << message.offset() << std::endl;
}
}
};
class ProducerEventCb : public RdKafka::EventCb
{
public:
void event_cb(RdKafka::Event &event)
{
switch (event.type())
{
case RdKafka::Event::EVENT_ERROR:
std::cout << "RdKafka::EVENT::EVENT_ERROR: " << RdKafka::err2str(event.err()) << std::endl;
break;
case RdKafka::Event::EVENT_STATS:
std::cout << "RdKafka::EVENT::EVENT_STATS: " << event.str() << std::endl;
break;
case RdKafka::Event::EVENT_LOG:
std::cout << "RdKafka::EVENT::EVENT_LOG: " << event.fac() << std::endl;
break;
case RdKafka::Event::EVENT_THROTTLE:
std::cout << "RdKafka::EVENT::EVENT_THROTTLE: " << event.broker_name() << std::endl;
break;
}
}
};
class HashPartitionerCb : public RdKafka::PartitionerCb
{
// 自定义生产者分区器,作用就是返回一个分区id。 对key计算Hash值,得到待发送的分区号(其实这跟默认的分区器计算方式是一样的)
public:
int32_t partitioner_cb(const RdKafka::Topic *topic, const std::string *key,
int32_t partition_cnt, void *msg_opaque)
{
char msg[128] = {0};
sprintf(msg, "HashPartitionCb:[%s][%s][%d]", topic->name().c_str(), key->c_str(), partition_cnt);
std::cout << msg << std::endl;
// 前面的操作只是为了在分区器回调中打印出一行打印
// 分区器真正的操作是在下面generate_hash,生成一个待发送的分区ID
return generate_hash(key->c_str(), key->size()) % partition_cnt;
}
private:
static inline unsigned int generate_hash(const char *str, size_t len)
{
unsigned int hash = 5381;
for (size_t i = 0; i < len; i++)
{
hash = ((hash << 5) + hash) + str[i];
}
//返回值必须在0到partition_cnt之间。如果出错则发回PARTITION_UA(-1)
return hash;
}
};
#endif
点击查看KafkaProducer.cxx代码
#include "KafkaProducer.h"
//("192.168.0.105:9092", "topic_demo", 0)
KafkaProducer::KafkaProducer(const std::string &brokers, const std::string &topic)
{
m_brokers = brokers;
m_topicStr = topic;
// 先填充构造生产者客户端的参数配置:
m_producerConfig = RdKafka::Conf::create(RdKafka::Conf::CONF_GLOBAL);
if (m_producerConfig == nullptr)
{
std::cout << "Create Rdkafka Global Conf Failed." << std::endl;
}
m_topicConfig = RdKafka::Conf::create(RdKafka::Conf::CONF_TOPIC);
if (m_topicConfig == nullptr)
{
std::cout << "Create Rdkafka Topic Conf Failed." << std::endl;
}
// 下面开始配置各种需要的配置项:
RdKafka::Conf::ConfResult result;
std::string error_str;
// 设置生产者待发送服务器的地址: "ip:port" 格式
result = m_producerConfig->set("booststrap.servers", m_brokers, error_str);
if (result != RdKafka::Conf::CONF_OK)
{
std::cout << "Global Conf set 'booststrap.servers' failed: " << error_str << std::endl;
}
result = m_producerConfig->set("statistics.interval.ms", "10000", error_str);
if (result != RdKafka::Conf::CONF_OK)
{
std::cout << "Global Conf set ‘statistics.interval.ms’ failed: " << error_str << std::endl;
}
// 设置发送端发送的最大字节数,如果发送的消息过大则返回失败
result = m_producerConfig->set("message.max.bytes", "10240000", error_str);
if (result != RdKafka::Conf::CONF_OK)
{
std::cout << "Global Conf set 'message.max.bytes' failed: " << error_str << std::endl;
}
m_dr_cb = new ProducerDeliveryReportCb;
result = m_producerConfig->set("dr_cb", m_dr_cb, error_str); // 设置每个消息发送后的发送结果回调
if (result != RdKafka::Conf::CONF_OK)
{
std::cout << "Global Conf set ‘dr_cb’ failed: " << error_str << std::endl;
}
m_event_cb = new ProducerEventCb;
result = m_producerConfig->set("event_cb", m_event_cb, error_str);
if (result != RdKafka::Conf::CONF_OK)
{
std::cout << "Global Conf set ‘event_cb’ failed: " << error_str << std::endl;
}
m_partitioner_cb = new HashPartitionerCb;
result = m_topicConfig->set("partitioner_cb", m_partitioner_cb, error_str); // 设置自定义分区器
if (result != RdKafka::Conf::CONF_OK)
{
std::cout << "Topic Conf set ‘partitioner_cb’ failed: " << error_str << std::endl;
}
// 创建Producer生产者客户端:
// RdKafka::Producer::create(const RdKafka::Conf *conf, std::string &errstr);
m_producer = RdKafka::Producer::create(m_producerConfig, error_str);
if (m_producer == nullptr)
{
std::cout << "Create Producer failed: " << error_str << std::endl;
}
// 创建Topic对象,后续produce发送消息时需要使用
// RdKafka::Topic::create(Hanle *base, const std::string &topic_str, const Conf *conf, std::string &errstr);
m_topic = RdKafka::Topic::create(m_producer, m_topicStr, m_topicConfig, error_str);
if (m_topic == nullptr)
{
std::cout << "Create Topic failed: " << error_str << std::endl;
}
}
void KafkaProducer::pushMessage(const std::string &msg, const std::string &key)
{
int32_t len = msg.length();
void *payload = const_cast<void *>(static_cast<const void *>(msg.data()));
RdKafka::ErrorCode error_code = m_producer->produce(m_topic,
RdKafka::Topic::PARTITION_UA,
RdKafka::Producer::RK_MSG_COPY,
payload, len, &key, NULL);
m_producer->poll(0); // poll()参数为0意味着不阻塞;poll(0)主要是为了触发应用程序提供的回调函数
if (error_code != RdKafka::ErrorCode::ERR_NO_ERROR)
{
std::cerr << "Produce failed: " << RdKafka::err2str(error_code) << std::endl;
if (error_code == RdKafka::ErrorCode::ERR__QUEUE_FULL)
{
m_producer->poll(1000); // 如果发送失败的原因是队列正满,则阻塞等待一段时间
}
else if (error_code == RdKafka::ErrorCode::ERR_MSG_SIZE_TOO_LARGE)
{
// 如果发送消息过大,超过了max.size,则需要裁减后重新发送
}
else
{
std::cerr << "ERR_UNKNOWN_PARTITION or ERR_UNKNOWN_TOPIC" << std::endl;
}
}
}
KafkaProducer::~KafkaProducer()
{
while (m_producer->outq_len() > 0)
{
// 当 Handle->outq_len() 客户端的“出队列” 的长度大于0
std::cerr << "Waiting for: " << m_producer->outq_len() << std::endl;
m_producer->flush(5000);
}
delete m_producerConfig;
delete m_topicConfig;
delete m_topic;
delete m_producer;
delete m_dr_cb;
delete m_event_cb;
delete m_partitioner_cb;
}
点击查看KafkaProducerMain.cxx代码
#include "KafkaProducer.h"
#include <iostream>
#include <unistd.h>
int main() {
KafkaProducer producer("127.0.0.1:9092", "topic-demo");
sleep(5);
for(int i = 0; i < 10; i++) {
char msg[64] = {0};
sprintf(msg, "%s%4d", "Hello Kafka ", i); //msg = "Hello Kafka 0001";
char key[8] = {0};
sprintf(key, "%d", i); //key = "1";
producer.pushMessage(msg, key);
}
RdKafka::wait_destroyed(50000);//等待50s,然后结束进程
return 0;
}
以上就是封装的全部代码。
使用时需要把KafkaConsumer.h,KafkaConsumer.cxx, KafkaProducer.h, KafkaProducer.cxx这4个文件集成到自己的项目中,自己项目如何调用KafkaConsumer和KafkaProducer呢?这就要参考KafkaConsumerMain.cpp和KafkaProducerMain.cpp这两个main中的写法了。
自己项目中的CMakeLists.txt中应该如何写呢?这要参考上面的CMakeLists.txt的写法。
4、问题解决
以上代码编译Cmake,make后都能正确生成可执行文件。
执行KafkaProducerMain的时候也能正确生成数据,存入kafka。
但是,在执行KafkaConsumerMain的遇到问题:
root@DF-01:/home/dfcv_dev/fastdds/soa_v2c/build/Util/KafkaClient# ./KafkaConsumerMain
Create KafkaConsumer succeed, consumer name : rdkafka#consumer-1
Consumer subscribe topics succeed, topic name : topic-demo
RebalanceCb: Local: Assign partitions: topic-demo[0], n0
terminate called after throwing an instance of 'std::logic_error'
what(): basic_string::_M_construct null not valid
Aborted
root@DF-01:/home/dfcv_dev/fastdds/soa_v2c/build/Util/KafkaClient#
看起来是rebalance的时候遇到一个错误,有待解决。