kafka整体运行流程图
kafka用例
package com.pablo_kafka.kafka;
import org.apache.kafka.clients.producer.*;
import org.apache.kafka.common.serialization.IntegerSerializer;
import org.apache.kafka.common.serialization.StringSerializer;
import java.util.Properties;
import java.util.UUID;
public class PABLO_KafkaProducer extends Thread {
//生产者对象
KafkaProducer<Integer, String> producer;
//主题
String topic;
public PABLO_KafkaProducer(String topic) {
Properties properties = new Properties();
//bootstrap.servers
properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,
"localhost:9092,localhost:9093,localhost:9094");
//client.id
properties.put(ProducerConfig.CLIENT_ID_CONFIG, "gp-producer");
//partitioner.class 关联自定义分区器,默认是default
properties.put(ProducerConfig.PARTITIONER_CLASS_CONFIG,
"com.pablo_kafka.kafka.PABLO_MyPartition");
//key 和 value的序列化
//org.apache.kafka.common.serialization.IntegerSerializer
properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
IntegerSerializer.class.getName());
properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
StringSerializer.class.getName());
//----------------优化参数------------------
//分区器缓冲区大小默认32M
properties.put(ProducerConfig.BUFFER_MEMORY_CONFIG, 1024L * 1024 * 32 * 2);
//默认16K
properties.put(ProducerConfig.BATCH_SIZE_CONFIG, 16384 * 2);
//默认sendThread等待0ms
properties.put(ProducerConfig.LINGER_MS_CONFIG, 50);
//配置压缩,默认none
properties.put(ProducerConfig.COMPRESSION_TYPE_CONFIG, "snappy");
//-----------------------------------------
//应答策略 默认-1all ,开始事务这里必须是-1/all
properties.put(ProducerConfig.ACKS_CONFIG, "-1");
//重试次数,即sender线程重试次数,即集群返回失败后重试,默认int最大值,死磕~~
properties.put(ProducerConfig.RETRIES_CONFIG, 3);
//事务必须手动指定事务id
properties.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG, UUID.randomUUID().toString());
//通过配置文件初始化生产者
producer = new KafkaProducer<Integer, String>(properties);
//初始化topic
this.topic = topic;
}
@Override
public void run() {
//初始化事务
producer.initTransactions();
//开启事务
producer.beginTransaction();
try {
for (int i = 0; i < 20; i++) {
//普通异步发送
//producer.send(new ProducerRecord<>(topic,msg));
//异步回调,返回发送的结果
producer.send(new ProducerRecord<>(topic, "hello" + i), (metadata, exception) -> {
//打印回调的结果
if (exception == null) {
System.out.println("偏移量:" + metadata.offset() + " 分区是:"
+ metadata.partition() + " 主题是:"
+ metadata.topic());
}
});
Thread.sleep(2000);
}
//提交事务
producer.commitTransaction();
} catch (Exception e) {
e.printStackTrace();
//关闭事务
producer.abortTransaction();
} finally {
producer.close();
}
}
public static void main(String[] args) {
new PABLO_KafkaProducer("test_partitions").start();
}
}
producer初始化源码
KafkaProducer(ProducerConfig config,
Serializer<K> keySerializer,
Serializer<V> valueSerializer,
ProducerMetadata metadata,
KafkaClient kafkaClient,
ProducerInterceptors<K, V> interceptors,
Time time) {
try {
//解析配置
this.producerConfig = config;
this.time = time;
//获取事务id
String transactionalId = config.getString(ProducerConfig.TRANSACTIONAL_ID_CONFIG);
//消费者组id
this.clientId = config.getString(ProducerConfig.CLIENT_ID_CONFIG);
LogContext logContext;
if (transactionalId == null)
logContext = new LogContext(String.format("[Producer clientId=%s] ", clientId));
else
logContext = new LogContext(String.format("[Producer clientId=%s, transactionalId=%s] ", clientId, transactionalId));
log = logContext.logger(KafkaProducer.class);
log.trace("Starting the Kafka producer");
Map<String, String> metricTags = Collections.singletonMap("client-id", clientId);
MetricConfig metricConfig = new MetricConfig().samples(config.getInt(ProducerConfig.METRICS_NUM_SAMPLES_CONFIG))
.timeWindow(config.getLong(ProducerConfig.METRICS_SAMPLE_WINDOW_MS_CONFIG), TimeUnit.MILLISECONDS)
.recordLevel(Sensor.RecordingLevel.forName(config.getString(ProducerConfig.METRICS_RECORDING_LEVEL_CONFIG)))
.tags(metricTags);
List<MetricsReporter> reporters = config.getConfiguredInstances(ProducerConfig.METRIC_REPORTER_CLASSES_CONFIG,
MetricsReporter.class,
Collections.singletonMap(ProducerConfig.CLIENT_ID_CONFIG, clientId));
//监控kafka运行,如使用efak,必须在kafka的run-class文件中配置JMX_PORT
JmxReporter jmxReporter = new JmxReporter();
jmxReporter.configure(config.originals(Collections.singletonMap(ProducerConfig.CLIENT_ID_CONFIG, clientId)));
reporters.add(jmxReporter);
MetricsContext metricsContext = new KafkaMetricsContext(JMX_PREFIX,
config.originalsWithPrefix(CommonClientConfigs.METRICS_CONTEXT_PREFIX));
this.metrics = new Metrics(metricConfig, reporters, time, metricsContext);
//分区器,实例化自定义分区器
this.partitioner = config.getConfiguredInstance(
//我们自定义的分区器类的全路径
ProducerConfig.PARTITIONER_CLASS_CONFIG,
Partitioner.class,
Collections.singletonMap(ProducerConfig.CLIENT_ID_CONFIG, clientId));
long retryBackoffMs = config.getLong(ProducerConfig.RETRY_BACKOFF_MS_CONFIG);
//key value 序列化
if (keySerializer == null) {
this.keySerializer = config.getConfiguredInstance(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
Serializer.class);
this.keySerializer.configure(config.originals(Collections.singletonMap(ProducerConfig.CLIENT_ID_CONFIG, clientId)), true);
} else {
config.ignore(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG);
this.keySerializer = keySerializer;
}
if (valueSerializer == null) {
this.valueSerializer = config.getConfiguredInstance(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
Serializer.class);
this.valueSerializer.configure(config.originals(Collections.singletonMap(ProducerConfig.CLIENT_ID_CONFIG, clientId)), false);
} else {
config.ignore(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG);
this.valueSerializer = valueSerializer;
}
//拦截器,可对数据进行拦截增强操作
List<ProducerInterceptor<K, V>> interceptorList = (List) config.getConfiguredInstances(
ProducerConfig.INTERCEPTOR_CLASSES_CONFIG,
ProducerInterceptor.class,
Collections.singletonMap(ProducerConfig.CLIENT_ID_CONFIG, clientId));
if (interceptors != null)
this.interceptors = interceptors;
else
this.interceptors = new ProducerInterceptors<>(interceptorList);
ClusterResourceListeners clusterResourceListeners = configureClusterResourceListeners(keySerializer,
valueSerializer, interceptorList, reporters);
//单条日志大小 默认1M
this.maxRequestSize = config.getInt(ProducerConfig.MAX_REQUEST_SIZE_CONFIG);
//缓冲区大小 默认32M
this.totalMemorySize = config.getLong(ProducerConfig.BUFFER_MEMORY_CONFIG);
//数据压缩类型
this.compressionType = CompressionType.forName(config.getString(ProducerConfig.COMPRESSION_TYPE_CONFIG));
this.maxBlockTimeMs = config.getLong(ProducerConfig.MAX_BLOCK_MS_CONFIG);
int deliveryTimeoutMs = configureDeliveryTimeout(config, log);
this.apiVersions = new ApiVersions();
this.transactionManager = configureTransactionState(config, logContext);
//缓冲区对象
this.accumulator = new RecordAccumulator(logContext,
//批次16K
config.getInt(ProducerConfig.BATCH_SIZE_CONFIG),
//压缩类型 默认none
this.compressionType,
//sender等待时间
lingerMs(config),
retryBackoffMs,
deliveryTimeoutMs,
metrics,
PRODUCER_METRIC_GROUP_NAME,
time,
apiVersions,
transactionManager,
//新建缓存池
new BufferPool(this.totalMemorySize, config.getInt(ProducerConfig.BATCH_SIZE_CONFIG), metrics, time, PRODUCER_METRIC_GROUP_NAME));
//kafka集群地址
List<InetSocketAddress> addresses = ClientUtils.parseAndValidateAddresses(
config.getList(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG),
config.getString(ProducerConfig.CLIENT_DNS_LOOKUP_CONFIG));
if (metadata != null) {
this.metadata = metadata;
} else {
this.metadata = new ProducerMetadata(retryBackoffMs,
config.getLong(ProducerConfig.METADATA_MAX_AGE_CONFIG),
config.getLong(ProducerConfig.METADATA_MAX_IDLE_CONFIG),
logContext,
clusterResourceListeners,
Time.SYSTEM);
this.metadata.bootstrap(addresses);
}
this.errors = this.metrics.sensor("errors");
//新建sender线程
this.sender = newSender(logContext, kafkaClient, this.metadata);
String ioThreadName = NETWORK_THREAD_PREFIX + " | " + clientId;
//sender线程后台启动
this.ioThread = new KafkaThread(ioThreadName, this.sender, true);
this.ioThread.start();
config.logUnused();
AppInfoParser.registerAppInfo(JMX_PREFIX, clientId, metrics, time.milliseconds());
log.debug("Kafka producer started");
} catch (Throwable t) {
// call close methods if internal objects are already constructed this is to prevent resource leak. see KAFKA-2121
close(Duration.ofMillis(0), true);
// now propagate the exception
throw new KafkaException("Failed to construct kafka producer", t);
}
}
Sender newSender(LogContext logContext, KafkaClient kafkaClient, ProducerMetadata metadata) {
//集群缓存最大请求个数
//这个可以配合幂等性来实现有序
int maxInflightRequests = configureInflightRequests(producerConfig);
//sender线程请求卡夫卡集群超时时间 30S
int requestTimeoutMs = producerConfig.getInt(ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG);
ChannelBuilder channelBuilder = ClientUtils.createChannelBuilder(producerConfig, time, logContext);
ProducerMetrics metricsRegistry = new ProducerMetrics(this.metrics);
Sensor throttleTimeSensor = Sender.throttleTimeSensor(metricsRegistry.senderMetrics);
//新建生产者客户端,对于broker来说,producer和consumer都是client
KafkaClient client = kafkaClient != null ? kafkaClient : new NetworkClient(
new Selector(producerConfig.getLong(ProducerConfig.CONNECTIONS_MAX_IDLE_MS_CONFIG),
this.metrics, time, "producer", channelBuilder, logContext),
metadata,
clientId,
maxInflightRequests,
//客户端重连broker重试时间
producerConfig.getLong(ProducerConfig.RECONNECT_BACKOFF_MS_CONFIG),
//总的重试时间
producerConfig.getLong(ProducerConfig.RECONNECT_BACKOFF_MAX_MS_CONFIG),
//sender发送数据到broker的缓冲区128K
producerConfig.getInt(ProducerConfig.SEND_BUFFER_CONFIG),
//sender接受broker响应缓冲区32K
producerConfig.getInt(ProducerConfig.RECEIVE_BUFFER_CONFIG),
requestTimeoutMs,
producerConfig.getLong(ProducerConfig.SOCKET_CONNECTION_SETUP_TIMEOUT_MS_CONFIG),
producerConfig.getLong(ProducerConfig.SOCKET_CONNECTION_SETUP_TIMEOUT_MAX_MS_CONFIG),
time,
true,
apiVersions,
throttleTimeSensor,
logContext);
//broker响应ack标记
//0 发送不响应
//1 分区leader收到信息就响应
//-1 分区leader和follower均接受后响应 等同all
short acks = configureAcks(producerConfig, log);
//sender 实现了runnable,是线程即render线程
return new Sender(logContext,
client,
metadata,
//缓冲队列大小
this.accumulator,
maxInflightRequests == 1,
producerConfig.getInt(ProducerConfig.MAX_REQUEST_SIZE_CONFIG),
acks,
producerConfig.getInt(ProducerConfig.RETRIES_CONFIG),
metricsRegistry.senderMetrics,
time,
requestTimeoutMs,
producerConfig.getLong(ProducerConfig.RETRY_BACKOFF_MS_CONFIG),
this.transactionManager,
apiVersions);
}
数据通过producer发送到kafka集群源码
send,开始发送数据
public Future<RecordMetadata> send(ProducerRecord<K, V> record, Callback callback) {
// 执行自定义拦截器
ProducerRecord<K, V> interceptedRecord = this.interceptors.onSend(record);
//发送数据到缓冲队列
return doSend(interceptedRecord, callback);
}
//main.send发送数据到缓冲队列
private Future<RecordMetadata> doSend(ProducerRecord<K, V> record, Callback callback) {
TopicPartition tp = null;
try {
throwIfProducerClosed();
//缓冲队列中的分区对应kafka集群的元数据中的topic分区
//kafka不会直接发送数据到broker,而是先发送到缓冲区,缓冲区需拿到broker的元数据信息,以确保分区对应
long nowMs = time.milliseconds();
KafkaProducer.ClusterAndWaitTime clusterAndWaitTime;
try {
clusterAndWaitTime = waitOnMetadata(record.topic(), record.partition(), nowMs, maxBlockTimeMs);
} catch (KafkaException e) {
if (metadata.isClosed())
throw new KafkaException("Producer closed while send in progress", e);
throw e;
}
nowMs += clusterAndWaitTime.waitedOnMetadataMs;
long remainingWaitMs = Math.max(0, maxBlockTimeMs - clusterAndWaitTime.waitedOnMetadataMs);
Cluster cluster = clusterAndWaitTime.cluster;
//序列化
byte[] serializedKey;
try {
serializedKey = keySerializer.serialize(record.topic(), record.headers(), record.key());
} catch (ClassCastException cce) {
throw new SerializationException("Can't convert key of class " + record.key().getClass().getName() +
" to class " + producerConfig.getClass(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG).getName() +
" specified in key.serializer", cce);
}
byte[] serializedValue;
try {
serializedValue = valueSerializer.serialize(record.topic(), record.headers(), record.value());
} catch (ClassCastException cce) {
throw new SerializationException("Can't convert value of class " + record.value().getClass().getName() +
" to class " + producerConfig.getClass(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG).getName() +
" specified in value.serializer", cce);
}
//分区
//如果send指定了分区,按照自定义分区
//如果未指定分区
//指定了key,计算key.hashcode % 分区数,确定分区
//未指定key,粘性,随机将数据发送到分区,当达到16K(默认)或linger.ms等待时候到了,发送到broker完毕后,在更换分区
int partition = partition(record, serializedKey, serializedValue, cluster);
tp = new TopicPartition(record.topic(), partition);
setReadOnly(record.headers());
Header[] headers = record.headers().toArray();
//压缩
int serializedSize = AbstractRecords.estimateSizeInBytesUpperBound(apiVersions.maxUsableProduceMagic(),
compressionType, serializedKey, serializedValue, headers);
//保证序列化和压缩后的数据大小能够传输
//判断单条信息是否大于1M(可设置)
//判断发送数据是否大于缓冲区大小32M(可设置)
ensureValidRecordSize(serializedSize);
long timestamp = record.timestamp() == null ? nowMs : record.timestamp();
if (log.isTraceEnabled()) {
log.trace("Attempting to append record {} with callback {} to topic {} partition {}", record, callback, record.topic(), partition);
}
//拦截器回调
Callback interceptCallback = new KafkaProducer.InterceptorCallback<>(callback, this.interceptors, tp);
if (transactionManager != null && transactionManager.isTransactional()) {
transactionManager.failIfNotReadyForSend();
}
//向缓冲队列中对应主题分区发送数据
//按照每个主题的分区创建队列
//向对应队列添加数据
//判断真实数据是否大于16K(批次处理默认值),如大于,按照真实数据存入队列,此处不会将真实数据分割
//根据本批次的大小申请内存池大小(如每批次16K,内存池中申请大小,使用完毕后回收,享元模式)
//确定最后的批次大小,最小16K,可能存在大于16K的情况
//添加数据到主题分区对应队列中
//result是 是否添加成功的结果
RecordAccumulator.RecordAppendResult result = accumulator.append(tp, timestamp, serializedKey,
serializedValue, headers, interceptCallback, remainingWaitMs, true, nowMs);
if (result.abortForNewBatch) {
int prevPartition = partition;
partitioner.onNewBatch(record.topic(), cluster, prevPartition);
partition = partition(record, serializedKey, serializedValue, cluster);
tp = new TopicPartition(record.topic(), partition);
if (log.isTraceEnabled()) {
log.trace("Retrying append due to new batch creation for topic {} partition {}. The old partition was {}", record.topic(), partition, prevPartition);
}
// producer callback will make sure to call both 'callback' and interceptor callback
interceptCallback = new KafkaProducer.InterceptorCallback<>(callback, this.interceptors, tp);
result = accumulator.append(tp, timestamp, serializedKey,
serializedValue, headers, interceptCallback, remainingWaitMs, false, nowMs);
}
if (transactionManager != null && transactionManager.isTransactional())
transactionManager.maybeAddPartitionToTransaction(tp);
//批次大小满了16K
//或新创建批次了
if (result.batchIsFull || result.newBatchCreated) {
log.trace("Waking up the sender since topic {} partition {} is either full or getting a new batch", record.topic(), partition);
//唤醒sender线程发送,即达到了sender发送数据到kafka集群条件,NIO-Selector
this.sender.wakeup();
}
return result.future;
// handling exceptions and record the errors;
// for API exceptions return them in the future,
// for other exceptions throw directly
} catch (ApiException e) {
log.debug("Exception occurred during message send:", e);
if (callback != null)
callback.onCompletion(null, e);
this.errors.record();
this.interceptors.onSendError(record, tp, e);
return new KafkaProducer.FutureFailure(e);
} catch (InterruptedException e) {
this.errors.record();
this.interceptors.onSendError(record, tp, e);
throw new InterruptException(e);
} catch (KafkaException e) {
this.errors.record();
this.interceptors.onSendError(record, tp, e);
throw e;
} catch (Exception e) {
// we notify interceptor about all exceptions, since onSend is called before anything else in this method
this.interceptors.onSendError(record, tp, e);
throw e;
}
}
sender线程发送缓冲区数据到kafka集群(主要代码)
//Sender.run
public void run() {
log.debug("Starting Kafka producer I/O thread.");
//一直执行直到close
while (running) {
try {
runOnce();
} catch (Exception e) {
log.error("Uncaught error in kafka producer I/O thread: ", e);
}
}
//........
}
void runOnce() {
//事务相关操作
if (transactionManager != null) {
try {
transactionManager.maybeResolveSequences();
//判断事务管理器状态
if (transactionManager.hasFatalError()) {
RuntimeException lastError = transactionManager.lastError();
if (lastError != null)
maybeAbortBatches(lastError);
client.poll(retryBackoffMs, time.milliseconds());
return;
}
// Check whether we need a new producerId. If so, we will enqueue an InitProducerId
// request which will be sent below
transactionManager.bumpIdempotentEpochAndResetIdIfNeeded();
if (maybeSendAndPollTransactionalRequest()) {
return;
}
} catch (AuthenticationException e) {
// This is already logged as error, but propagated here to perform any clean ups.
log.trace("Authentication exception while processing transactional request", e);
transactionManager.authenticationFailed(e);
}
}
long currentTimeMs = time.milliseconds();
//发送数据
long pollTimeout = sendProducerData(currentTimeMs);
//获取对应发送结果
client.poll(pollTimeout, currentTimeMs);
}
//执行发送数据 sender-->kafka-cluster
private long sendProducerData(long now) {
//抓取元数据信息,sender需要知道将缓冲区中队列的数据发送到kafka-cluster中哪一个分区中
Cluster cluster = metadata.fetch();
//获取缓冲区准备好的要发送的数据
//判断linger.ms,即sender的等待时间
RecordAccumulator.ReadyCheckResult result = this.accumulator.ready(cluster, now);
//........
//创建批次记录
Map<Integer, List<ProducerBatch>> batches = this.accumulator.drain(cluster, result.readyNodes, this.maxRequestSize, now);
addToInflightBatches(batches);
//.......
sendProduceRequests(batches, now);
return pollTimeout;
}
//sendProduceRequests最终调用到doSend
private void doSend(ClientRequest clientRequest, boolean isInternalRequest, long now, AbstractRequest request) {
String destination = clientRequest.destination();
RequestHeader header = clientRequest.makeHeader(request.version());
if (log.isDebugEnabled()) {
log.debug("Sending {} request with header {} and timeout {} to node {}: {}",
clientRequest.apiKey(), header, clientRequest.requestTimeoutMs(), destination, request);
}
Send send = request.toSend(header);
NetworkClient.InFlightRequest inFlightRequest = new NetworkClient.InFlightRequest(
clientRequest,
header,
isInternalRequest,
request,
send,
now);
this.inFlightRequests.add(inFlightRequest);
//NIO通信
selector.send(new NetworkSend(clientRequest.destination(), send));
}