解压flume

cd /root
tar -xvzf apache-flume-1.9.0-bin.tar.gz
cd apache-flume-1.9.0-bin


配置环境变量(可忽略)

vim /etc/profile

export FLUME_HOME=/root/apache-flume-1.9.0-bin
export PATH=$PATH:$FLUME_HOME/bin

source /etc/profile


修改配置文件

cd conf
cp flume-env.sh.template flume-env.sh
cp flume-conf.properties.template flume-kafka.properties
vim flume-kafka.properties

a1.sources = r1
a1.channels = c1
a1.sinks = k1


a1.sources.r1.type = exec
#tail -F 根据文件名进行追踪
a1.sources.r1.command = tail -F /tmp/logs/update.log
#把source和channel关联在一起
a1.sources.r1.channels = c1


a1.channels.c1.type=memory
a1.channels.c1.capacity=10000
a1.channels.c1.transactionCapacity=100


a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
#指定kafka类型
a1.sinks.k1.topic = logtopic
#kafka集群地址
a1.sinks.k1.brokerList = 10.99.67.164:9092,10.99.67.166:9092,10.99.67.168:9092
a1.sinks.k1.requiredAcks = 1
a1.sinks.k1.batchSize = 20
a1.sinks.k1.channel = c1

flume集成kafka_java


先去kafka创建对应topic

由于hdp3.1.5集成的kafka是2.0。使用的命令叫旧,看帮助吧

./bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic logtopic

返回下面即为正常。

Created topic "logtopic".

flume集成kafka_apache_02


编写生产日志文件的脚本:

vim createLog.sh
#!/bin/bash
while true
do
echo "This is create log script wirte Log. " >> /tmp/logs/update.log;
time3=$(date "+%Y-%m-%d %H:%M:%S")
echo $time3 >> /tmp/logs/update.log;
sleep 0.5;
done

运行脚本生成日志。


运行flume,把日志送到kafka

./bin/flume-ng agent -c conf -f conf/flume-kafka.properties -n a1 -Dflume.root.logger=INFO,console

flume集成kafka_kafka_03

出现下面内容即为正常。

2022-10-25 11:18:45,684 (lifecycleSupervisor-1-0) [INFO - org.apache.flume.instrumentation.MonitoredCounterGroup.register(MonitoredCounterGroup.java:119)] Monitored counter group for type: CHANNEL, name: c1: Successfully registered new MBean.
2022-10-25 11:18:45,684 (lifecycleSupervisor-1-0) [INFO - org.apache.flume.instrumentation.MonitoredCounterGroup.start(MonitoredCounterGroup.java:95)] Component type: CHANNEL, name: c1 started
2022-10-25 11:18:45,684 (conf-file-poller-0) [INFO - org.apache.flume.node.Application.startAllComponents(Application.java:196)] Starting Sink k1
2022-10-25 11:18:45,685 (conf-file-poller-0) [INFO - org.apache.flume.node.Application.startAllComponents(Application.java:207)] Starting Source r1
2022-10-25 11:18:45,685 (lifecycleSupervisor-1-0) [INFO - org.apache.flume.source.ExecSource.start(ExecSource.java:170)] Exec source starting with command: tail -F /tmp/logs/update.log
2022-10-25 11:18:45,686 (lifecycleSupervisor-1-0) [INFO - org.apache.flume.instrumentation.MonitoredCounterGroup.register(MonitoredCounterGroup.java:119)] Monitored counter group for type: SOURCE, name: r1: Successfully registered new MBean.
2022-10-25 11:18:45,686 (lifecycleSupervisor-1-0) [INFO - org.apache.flume.instrumentation.MonitoredCounterGroup.start(MonitoredCounterGroup.java:95)] Component type: SOURCE, name: r1 started
2022-10-25 11:18:45,713 (lifecycleSupervisor-1-1) [INFO - org.apache.kafka.common.config.AbstractConfig.logAll(AbstractConfig.java:279)] ProducerConfig values:
acks = 1
batch.size = 16384
bootstrap.servers = [10.99.67.164:9092, 10.99.67.166:9092, 10.99.67.168:9092]
buffer.memory = 33554432
client.id =
compression.type = none
connections.max.idle.ms = 540000
enable.idempotence = false
interceptor.classes = []
key.serializer = class org.apache.kafka.common.serialization.StringSerializer
linger.ms = 0
max.block.ms = 60000
max.in.flight.requests.per.connection = 5
max.request.size = 1048576
metadata.max.age.ms = 300000
metric.reporters = []
metrics.num.samples = 2
metrics.recording.level = INFO
metrics.sample.window.ms = 30000
partitioner.class = class org.apache.kafka.clients.producer.internals.DefaultPartitioner
receive.buffer.bytes = 32768
reconnect.backoff.max.ms = 1000
reconnect.backoff.ms = 50
request.timeout.ms = 30000
retries = 0
retry.backoff.ms = 100
sasl.client.callback.handler.class = null
sasl.jaas.config = null
sasl.kerberos.kinit.cmd = /usr/bin/kinit
sasl.kerberos.min.time.before.relogin = 60000
sasl.kerberos.service.name = null
sasl.kerberos.ticket.renew.jitter = 0.05
sasl.kerberos.ticket.renew.window.factor = 0.8
sasl.login.callback.handler.class = null
sasl.login.class = null
sasl.login.refresh.buffer.seconds = 300
sasl.login.refresh.min.period.seconds = 60
sasl.login.refresh.window.factor = 0.8
sasl.login.refresh.window.jitter = 0.05
sasl.mechanism = GSSAPI
security.protocol = PLAINTEXT
send.buffer.bytes = 131072
ssl.cipher.suites = null
ssl.enabled.protocols = [TLSv1.2, TLSv1.1, TLSv1]
ssl.endpoint.identification.algorithm = https
ssl.key.password = null
ssl.keymanager.algorithm = SunX509
ssl.keystore.location = null
ssl.keystore.password = null
ssl.keystore.type = JKS
ssl.protocol = TLS
ssl.provider = null
ssl.secure.random.implementation = null
ssl.trustmanager.algorithm = PKIX
ssl.truststore.location = null
ssl.truststore.password = null
ssl.truststore.type = JKS
transaction.timeout.ms = 60000
transactional.id = null
value.serializer = class org.apache.kafka.common.serialization.ByteArraySerializer

2022-10-25 11:18:45,772 (lifecycleSupervisor-1-1) [INFO - org.apache.kafka.common.utils.AppInfoParser$AppInfo.<init>(AppInfoParser.java:109)] Kafka version : 2.0.1
2022-10-25 11:18:45,772 (lifecycleSupervisor-1-1) [INFO - org.apache.kafka.common.utils.AppInfoParser$AppInfo.<init>(AppInfoParser.java:110)] Kafka commitId : fa14705e51bd2ce5
2022-10-25 11:18:45,774 (lifecycleSupervisor-1-1) [INFO - org.apache.flume.instrumentation.MonitoredCounterGroup.register(MonitoredCounterGroup.java:119)] Monitored counter group for type: SINK, name: k1: Successfully registered new MBean.
2022-10-25 11:18:45,774 (lifecycleSupervisor-1-1) [INFO - org.apache.flume.instrumentation.MonitoredCounterGroup.start(MonitoredCounterGroup.java:95)] Component type: SINK, name: k1 started
2022-10-25 11:18:48,226 (kafka-producer-network-thread | producer-1) [INFO - org.apache.kafka.clients.Metadata.update(Metadata.java:285)] Cluster ID: lrVPVSVvTBG2XH_KCa4s0w

flume集成kafka_java_04

flume集成kafka_java_05


kafka控制台消费验证

./bin/kafka-console-consumer.sh --from-beginning --bootstrap-server 10.99.67.164:9092 --topic logtopic
./bin/kafka-console-consumer.sh --from-beginning --bootstrap-server 10.99.67.166:9092 --topic logtopic
./bin/kafka-console-consumer.sh --from-beginning --bootstrap-server 10.99.67.168:9092 --topic logtopic
./bin/kafka-console-consumer.sh --from-beginning --bootstrap-server 10.99.67.171:9092 --topic logtopic

flume集成kafka_kafka_06