单节点测试数据接入到Kafka。
阿里镜像:flume
1. 解压安装
cd /opt/apps/
tar -zxvf apache-flume-1.9.0-bin.tar.gz
mv apache-flume-1.9.0-bin flume-1.9.0
2. 添加环境变量
echo 'export FLUME_HOME=/opt/apps/flume-1.9.0' >> /etc/profile \
&& echo 'export PATH=$PATH:$FLUME_HOME/bin' >> /etc/profile
source /etc/profile
3. 修改配置文件
flume-env.sh
# 查看java路径
which java
/usr/local/java/bin/java
cd flume-1.9.0/conf/
cp flume-env.sh.template flume-env.sh
vim flume-env.sh
# 释放修改
export JAVA_HOME=/usr/local/java
flume-conf.properties
cp flume-conf.properties.template flume-conf.properties
vim flume-conf.properties
这里使用 taildir,支持断点续传
a1.sources=r1
a1.channels=c1
a1.sinks=k1
# configure source
a1.sources.r1.type = TAILDIR
# 断点续传记录文件
a1.sources.r1.positionFile = /data3/flume/log_position.json
# 监控目录,可以设置多个
a1.sources.r1.filegroups = f1
a1.sources.r1.filegroups.f1 = /data2/data/sdk_log/user_login/.*
a1.sources.r1.fileHeader = false
# configure channel
a1.channels.c1.type = memory
a1.channels.c1.capacity=20000
a1.channels.c1.byteCapacityBufferPercentage=20
# configure sink
a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k1.kafka.topic = topic_sdk_log
a1.sinks.k1.kafka.bootstrap.servers = hadoop-master:9092
a1.sinks.k1.kafka.flumeBatchSize = 20
a1.sinks.k1.kafka.kafka.producer.acks = 1
# 组装
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c
创建断点记录文件并修改权限
touch /data3/flume/log_position.json
chmod a+w log_position.json
4. 启动
flume-ng agent -n a1 -c /opt/apps/flume-1.9.0/conf -f /opt/apps/flume-1.9.0/conf/flume-conf.properties &
参数 |
作用 |
举例 |
–conf / -c |
配置文件夹,包含flume-env.sh和 log4j 的配置文件 |
–c ../conf |
–conf-file / -f |
配置文件地址 |
–f ...conf/flume-conf.properties |
–name / -n |
agent名称 |
–n a1 |