最近刚好要用到kafka进行数据传输,又要鉴权,就研究了一下kafka的鉴权推送和消费,现在将代码放出来,有兴趣的可以看一下,鉴权的加密方式各有不同,所以需要注意哦!

一、生产者

生产者采用的是异步推送的形式,另外加入了计数模块,担心因为脚本推送后未回调但是脚本就停止的情况。

import datetime
from loguru import logger
import time
import random
import json
import gc
from kafka import KafkaProducer
from kafka.errors import KafkaTimeoutError
import traceback


class KProducer:
    def __init__(self, bootstrap_servers, topic, sasl_plain_username, sasl_plain_password, retry_max_times=5, type=0):
        """
        kafka 生产者
        :param bootstrap_servers: 地址
        :param topic:  topic
        """
        self.producer = self.get_kafkaProduct(bootstrap_servers, sasl_plain_username, sasl_plain_password, type)
        self.topic = topic
        self.flage = True
        self.text = ''''''
        self.retry_max_times = retry_max_times
        self.error_max_cout = 1000
        self.error_cout = 0

        self.item_count = 0
        self.total_push_count = 0
        self.total_receive_s_count = 0
        self.total_receive_e_count = 0

        self.kafka_items = {}

    def get_kafkaProduct(self, bootstrap_servers, sasl_plain_username, sasl_plain_password, type=0):
        '''
       获取kafka 生产者实例
       :param bootstrap_servers:
       :return:
       '''
        producer = None
        for i in range(5):
            try:
                if type in [1, '1']: # 判断是否要进行鉴权
                    producer = KafkaProducer(
                        bootstrap_servers=bootstrap_servers,
                        key_serializer=lambda k: json.dumps(k).encode(),
                        value_serializer=lambda v: json.dumps(v).encode(),  # value值为 json 格式化发送的内容
                        acks='all',  # acks:kafka返回的模式,1为发送成功则返回成功,‘all’为kafka全流程保存成功才返回成功。否则返回连接超时
                        retries=5,  # 重试发送次数,有时候网络出现短暂的问题的时候,会自动重发消息,这个值是需要在acks=1或all时候才有效,并且后面的max_in_flight_requests_per_connection 需要设置成1
                        request_timeout_ms=10000,  # 默认超时时间是30000ms
                        max_in_flight_requests_per_connection=1,  # 每个发送数据的网络连接对并未接收到响应的消息的最大数。默认值是5
                        batch_size=1048576,
                        # linger_ms=100,

                        sasl_mechanism="SCRAM-SHA-256",  # PLAIN 数据加密方式
                        # ssl_context=context,
                        security_protocol='SASL_PLAINTEXT',  # SASL_SSL
                        api_version=(0, 10),
                        sasl_plain_username=sasl_plain_username, # 鉴权账号
                        sasl_plain_password=sasl_plain_password, # 鉴权账号密码
                    )
                else:
                    producer = KafkaProducer(
                        bootstrap_servers=bootstrap_servers,
                        key_serializer=lambda k: json.dumps(k).encode(),
                        value_serializer=lambda v: json.dumps(v).encode(),  # value值为 json 格式化发送的内容
                        acks='all',  # acks:kafka返回的模式,1为发送成功则返回成功,‘all’为kafka全流程保存成功才返回成功。否则返回连接超时
                        retries=5,  # 重试发送次数,有时候网络出现短暂的问题的时候,会自动重发消息, 这个值是需要在acks=1或all
                                    # 时候才有效,并且后面的max_in_flight_requests_per_connection 需要设置成1

                        request_timeout_ms=10000,  # 默认超时时间是30000ms
                        batch_size=1048576,
                        linger_ms=300,
                        max_in_flight_requests_per_connection=1,  # 每个发送数据的网络连接对并未接收到响应的消息的最大数。默认值是5
                    )

                break
            except Exception as e:
                logger.error(f'【kafka_error】{bootstrap_servers}  报错: {e} kafka连接断开,重连中!')
                time.sleep(5)

        return producer

    def asyn_producer_callback(self, data, key, now_time=1):
        """
        异步发送数据 + 发送状态处理
        :param data_list:
        :param data_li:发送数据
        :return:
        """
        # gc.disable()
        try:
            if self.producer:
                key_name = key[key.find('|', 1) + 1:key.rfind('|', 1)]
                now_num = self.kafka_items.setdefault(key_name, {'push_num': 0, 'callbakc_num': 0, 'error_num': 0})[
                              'push_num'] + 1
                self.kafka_items[key_name]['push_num'] = now_num

                res = self.producer.send(self.topic, value=data, key=key).add_callback(
                    self.send_success, key_name=key_name).add_errback(self.send_error, data=data, key=key, now_time=now_time, key_name=key_name)
                self.item_count += 1
                self.total_push_count += 1
            # res.get(timeout=5)  # producer默认是异步的 如果加了get就变成了同步,也就是说要等待get到服务端返回的结果后再往下执行
            # self.producer.flush()  # 批量提交

        except KafkaTimeoutError as err:
            if self.error_cout < self.error_max_cout:
                self.error_cout += 1
                logger.error(f'【Kafka】超时 {err}')

            if now_time <= self.retry_max_times:
                now_time += 1
                return self.asyn_producer_callback(data=data, key=key, now_time=now_time)

        except Exception as err:
            if self.error_cout < self.error_max_cout:
                self.error_cout += 1
                logger.error(f'【kafka_error】key: {key} => 异步发送错误: {err} ,重新发送。')

            if now_time <= self.retry_max_times:
                now_time += 1
                return self.asyn_producer_callback(data=data, key=key, now_time=now_time)

        return ''

    def send_success(self, *args, **kwargs):
        """异步发送成功回调函数"""
        if self.item_count > 0:
            self.item_count -= 1
        # print('send_success')
        key_name = kwargs['key_name']
        self.kafka_items[key_name]['callbakc_num'] += 1
        self.total_receive_s_count += 1
        return

    def send_error(self, excp=None, *args, **kwargs):
        """异步发送错误回调函数"""
        if self.item_count > 0:
            self.item_count -= 1

        self.total_receive_e_count += 1
        try:
            data = kwargs['data']
            key = kwargs['key']
            now_time = kwargs['now_time']
            key_name = kwargs['key_name']
            self.kafka_items[key_name]['error_num'] += 1

            if now_time <= self.retry_max_times:
                now_time += 1
                time.sleep(10)
                yield self.asyn_producer_callback(data, key, now_time)

        except KafkaTimeoutError as err:
            if self.error_cout < self.error_max_cout:
                self.error_cout += 1
                logger.error(f'【kafka_error】key: {key} => 异步发送错误: {err} ,重新发送。')

            if now_time <= self.retry_max_times:
                now_time += 1
                return self.asyn_producer_callback(data=data, key=key, now_time=now_time)

        except Exception as e:
            if self.error_cout < self.error_max_cout:
                self.error_cout += 1
                logger.info(f'异步发送错误回调函数错误: {excp} | {e} | {args} |------| {kwargs}')

            if now_time <= self.retry_max_times:
                now_time += 1
                return self.asyn_producer_callback(data=data, key=key, now_time=now_time)

    def close_producer(self):
        try:
            self.producer.close()
        except Exception as e:
            logger.error(f'【kafka_error】kafka 关闭失败 原因:{e}')
            


kafka_config = {
        "bootstrap_servers": [],
        "topic": "",
        "sasl_plain_username": "", # 鉴权账号
        "sasl_plain_password": "", # 鉴权账号密码
        "type": 1
    }

topic = kafka_config.get('topic', '')
kakfka_producer = KProducer(topic=kafka_config['topic'],
                            bootstrap_servers=kafka_config['bootstrap_servers'],
                            sasl_plain_username=kafka_config['sasl_plain_username'],
                            sasl_plain_password=kafka_config['sasl_plain_password'],
                            type=kafka_config['type'])
# 发送的内容               
item_new = {
    'database': '',
    'table': '',
    'data': 'msg'
}
# 定义kafka推送数据唯一标识
keys = '|'.join([item_new['database'], item_new['table'], str(int(time.time() * 1000))])
kakfka_producer.asyn_producer_callback(item_new, key=keys)

二、消费者

消费者的代码就比较简单,只需要加入鉴权的账号密码以及加密方式即可。

from kafka import KafkaProducer, KafkaConsumer
from kafka.errors import kafka_errors
import traceback
import json
import datetime
import time

def consumer_demo():
    config = {
        "bootstrap_servers": [""],
        "topic": "",
        "sasl_plain_username": "",
        "sasl_plain_password": "",
        "type": 1
    }

    consumer = KafkaConsumer(
        config['topic'],
        bootstrap_servers=config['bootstrap_servers'],
        sasl_mechanism="SCRAM-SHA-256",  # PLAIN
        # ssl_context=context,
        security_protocol='SASL_PLAINTEXT',  # SASL_SSL
        api_version=(0, 10),
        sasl_plain_username=config['sasl_plain_username'],
        sasl_plain_password=config['sasl_plain_password'],

    )
    
    print('等待接收....', config)
    for message in consumer:
        # if 'gzzfcj_collection_monitor' in message.key.decode():
        print("receive, key: {}, value: {}".format(
            json.loads(message.key.decode()),
            json.loads(message.value.decode())
        )
        )

if __name__ == '__main__':
    consumer_demo()

以上就是全部代码,有兴趣的可以了解一下,我也当做记录。