kafka源码对 java 和 gradle 版本有适配要求,否则导入源码会编译不通过,笔者引用各版本如下。
Kafka源码 | |
java版本 | 11 |
grade版本 | 7.4.1 |
图解KafkaProducer流程
源代码分析
上图对 kafka 生产者初始化的整体流程进行了简单的梳理,接下来我们从代码层面对整体流程进行一个复原。
源码导入idea后都会看到一个example工程包,这个包里面有生产者和消费者的例子,可以作为源码学习的切入口。
在初始化生产者之前会设置一些配置,包括kafka地址、key/value序列化器等,当然上面的配置在我们实际使用时是远远不够的(后续在分析过程中会整理一些kafka调优的参数),通过层层构造函数,会进入到 KafkaProducer 核心构造方法,完整代码如下:
KafkaProducer(ProducerConfig config,
Serializer<K> keySerializer,
Serializer<V> valueSerializer,
ProducerMetadata metadata,
KafkaClient kafkaClient,
ProducerInterceptors<K, V> interceptors,
Time time) {
try {
this.producerConfig = config;
this.time = time;
String transactionalId = config.getString(ProducerConfig.TRANSACTIONAL_ID_CONFIG);// 获取事务id
this.clientId = config.getString(ProducerConfig.CLIENT_ID_CONFIG); // 获取客户端id 如果没有设置系统会每个都默认会生成一个client.id,producer-自增长的数字,producer-1,producer-2等
/* 日志管理 */
LogContext logContext;
if (transactionalId == null)
logContext = new LogContext(String.format("[Producer clientId=%s] ", clientId));
else
logContext = new LogContext(String.format("[Producer clientId=%s, transactionalId=%s] ", clientId, transactionalId));
log = logContext.logger(KafkaProducer.class);
log.trace("Starting the Kafka producer");
/* 监控当前 clientId 客户端相关的一些指标 Metrics */
Map<String, String> metricTags = Collections.singletonMap("client-id", clientId); //创建一个不可变的Map集合
MetricConfig metricConfig = new MetricConfig()
.samples(config.getInt(ProducerConfig.METRICS_NUM_SAMPLES_CONFIG))
.timeWindow(config.getLong(ProducerConfig.METRICS_SAMPLE_WINDOW_MS_CONFIG), TimeUnit.MILLISECONDS)
.recordLevel(Sensor.RecordingLevel.forName(config.getString(ProducerConfig.METRICS_RECORDING_LEVEL_CONFIG)))
.tags(metricTags);
List<MetricsReporter> reporters = config.getConfiguredInstances(ProducerConfig.METRIC_REPORTER_CLASSES_CONFIG,
MetricsReporter.class,
Collections.singletonMap(ProducerConfig.CLIENT_ID_CONFIG, clientId));
JmxReporter jmxReporter = new JmxReporter();
jmxReporter.configure(config.originals(Collections.singletonMap(ProducerConfig.CLIENT_ID_CONFIG, clientId)));
reporters.add(jmxReporter);
MetricsContext metricsContext = new KafkaMetricsContext(JMX_PREFIX,
config.originalsWithPrefix(CommonClientConfigs.METRICS_CONTEXT_PREFIX));
this.metrics = new Metrics(metricConfig, reporters, time, metricsContext);
/* 获取分区器 (反射获取) */
this.partitioner = config.getConfiguredInstance(
ProducerConfig.PARTITIONER_CLASS_CONFIG,
Partitioner.class,
Collections.singletonMap(ProducerConfig.CLIENT_ID_CONFIG, clientId));
long retryBackoffMs = config.getLong(ProducerConfig.RETRY_BACKOFF_MS_CONFIG);
/* 获取key和value的序列化 */
if (keySerializer == null) {
this.keySerializer = config.getConfiguredInstance(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, Serializer.class);
this.keySerializer.configure(config.originals(Collections.singletonMap(ProducerConfig.CLIENT_ID_CONFIG, clientId)), true);
} else {
config.ignore(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG);
this.keySerializer = keySerializer;
}
if (valueSerializer == null) {
this.valueSerializer = config.getConfiguredInstance(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, Serializer.class);
this.valueSerializer.configure(config.originals(Collections.singletonMap(ProducerConfig.CLIENT_ID_CONFIG, clientId)), false);
} else {
config.ignore(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG);
this.valueSerializer = valueSerializer;
}
/*拦截器处理(拦截器可以有多个)*/
List<ProducerInterceptor<K, V>> interceptorList = (List) config.getConfiguredInstances(
ProducerConfig.INTERCEPTOR_CLASSES_CONFIG,
ProducerInterceptor.class,
Collections.singletonMap(ProducerConfig.CLIENT_ID_CONFIG, clientId));
if (interceptors != null)
this.interceptors = interceptors;
else
this.interceptors = new ProducerInterceptors<>(interceptorList);
// 抽象了一个接收元数据更新集群资源的监听器集合。
ClusterResourceListeners clusterResourceListeners = configureClusterResourceListeners(keySerializer,
valueSerializer, interceptorList, reporters);
/*
* 单条日志大小 max.request.size 默认1m
* 缓冲区大小 buffer.memory 默认32m
* 压缩 compression.type默认是none
*/
this.maxRequestSize = config.getInt(ProducerConfig.MAX_REQUEST_SIZE_CONFIG);
this.totalMemorySize = config.getLong(ProducerConfig.BUFFER_MEMORY_CONFIG);
this.compressionType = CompressionType.forName(config.getString(ProducerConfig.COMPRESSION_TYPE_CONFIG));
this.maxBlockTimeMs = config.getLong(ProducerConfig.MAX_BLOCK_MS_CONFIG);
int deliveryTimeoutMs = configureDeliveryTimeout(config, log);
this.apiVersions = new ApiVersions();
this.transactionManager = configureTransactionState(config, logContext);
/* Producer客户端新建缓存区 */
this.accumulator = new RecordAccumulator(logContext,
config.getInt(ProducerConfig.BATCH_SIZE_CONFIG), // 批次大小 默认16k
this.compressionType, // 压缩方式,默认是none
lingerMs(config),
retryBackoffMs,
deliveryTimeoutMs,
metrics,
PRODUCER_METRIC_GROUP_NAME,
time,
apiVersions,
transactionManager,
new BufferPool(this.totalMemorySize, // 缓冲区对象 默认是32m
config.getInt(ProducerConfig.BATCH_SIZE_CONFIG),
metrics, time, PRODUCER_METRIC_GROUP_NAME));
// 解析连接上kafka集群地址
List<InetSocketAddress> addresses = ClientUtils.parseAndValidateAddresses(
config.getList(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG),
config.getString(ProducerConfig.CLIENT_DNS_LOOKUP_CONFIG));// client.dns.lookup 控制客户端如何使用DNS查找
/* 获取元数据 */
if (metadata != null) {
this.metadata = metadata;
} else {
this.metadata = new ProducerMetadata(retryBackoffMs,
config.getLong(ProducerConfig.METADATA_MAX_AGE_CONFIG),
config.getLong(ProducerConfig.METADATA_MAX_IDLE_CONFIG),
logContext,
clusterResourceListeners,
Time.SYSTEM);
this.metadata.bootstrap(addresses);
}
this.errors = this.metrics.sensor("errors");
/* 创建拉缓存区数据到分区的sender线程 */
this.sender = newSender(logContext, kafkaClient, this.metadata); // 本身是一个线程
String ioThreadName = NETWORK_THREAD_PREFIX + " | " + clientId; // 线程名称
// 把sender线程放到后台
this.ioThread = new KafkaThread(ioThreadName, this.sender, true);
// 启动sender线程 调用线程run方法
this.ioThread.start();
config.logUnused();
AppInfoParser.registerAppInfo(JMX_PREFIX, clientId, metrics, time.milliseconds());
log.debug("Kafka producer started");
} catch (Throwable t) {
// call close methods if internal objects are already constructed this is to prevent resource leak. see KAFKA-2121
close(Duration.ofMillis(0), true);
// now propagate the exception
throw new KafkaException("Failed to construct kafka producer", t);
}
}
分区器 Partitioner
Kafka发送每条消息都会有一个路由操作,其实就是被分配到哪个分区里去。我们可以通过指定生产者partitioner.class
参数实现数据自定义分区,系统默认分区器:DefaultPartitioner
。
序列化器 Serializer
拦截器 Interceptor
对于 producer 而言,interceptor 使得用户在消息发送前以及 produce r回调逻辑前有机会对消息做一些定制化需求,比如修改消息等。
记录收集器 RecordAccumulator
kafka发送消息为了减少网络请求、提高吞吐,并不是直接将消息从客户端通过网络发送给服务器端,而是先将消息存储在客户端的记录收集器(缓冲区)中,当队列满了batch.size
或者发送时间linger.ms
已到的时候才会去发送,这个记录收集器就是RecordAccumulator。
元数据管理 MetaData
当一条消息要写入broker,需要先知道这条数据要写入哪个分区及在哪个broker上,MetaData是用来从broker集群去拉取元数据的Topics(Topic -> Partitions(Leader+Followers,ISR))
网络通信 NetworkClient
Kafka发消息默认是异步的,主线程生产消息,放在我们上面说的记录收集器(RecordAccumulator)里,另一个线程 Sender 拉取消息发送到Broker。
在 Sender 线程初始化之前会通过 NetworkClient 组件来构建网络传输桥梁。