单节点测试数据接入到Kafka。

阿里镜像:flume

 

1. 解压安装

cd /opt/apps/
tar -zxvf apache-flume-1.9.0-bin.tar.gz
mv apache-flume-1.9.0-bin flume-1.9.0

2. 添加环境变量

echo 'export FLUME_HOME=/opt/apps/flume-1.9.0' >> /etc/profile \
&& echo 'export PATH=$PATH:$FLUME_HOME/bin' >> /etc/profile

source /etc/profile

 

3. 修改配置文件

flume-env.sh

# 查看java路径
which java
/usr/local/java/bin/java
cd flume-1.9.0/conf/
cp flume-env.sh.template flume-env.sh

vim flume-env.sh
# 释放修改
export JAVA_HOME=/usr/local/java

flume-conf.properties

cp flume-conf.properties.template flume-conf.properties
vim flume-conf.properties

这里使用 taildir,支持断点续传

a1.sources=r1
a1.channels=c1
a1.sinks=k1

# configure source
a1.sources.r1.type = TAILDIR
# 断点续传记录文件
a1.sources.r1.positionFile = /data3/flume/log_position.json
# 监控目录,可以设置多个
a1.sources.r1.filegroups = f1
a1.sources.r1.filegroups.f1 = /data2/data/sdk_log/user_login/.*
a1.sources.r1.fileHeader = false

# configure channel
a1.channels.c1.type = memory
a1.channels.c1.capacity=20000
a1.channels.c1.byteCapacityBufferPercentage=20

# configure sink
a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k1.kafka.topic = topic_sdk_log
a1.sinks.k1.kafka.bootstrap.servers = hadoop-master:9092
a1.sinks.k1.kafka.flumeBatchSize = 20
a1.sinks.k1.kafka.kafka.producer.acks = 1

# 组装
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c

 

创建断点记录文件并修改权限

touch /data3/flume/log_position.json 
chmod a+w log_position.json

 

4. 启动

flume-ng agent -n a1 -c /opt/apps/flume-1.9.0/conf -f /opt/apps/flume-1.9.0/conf/flume-conf.properties &

 

参数

作用

举例

–conf / -c

配置文件夹,包含flume-env.sh和 log4j 的配置文件

–c ../conf

–conf-file / -f

配置文件地址

–f ...conf/flume-conf.properties

–name / -n

agent名称

–n a1

 

 

Kafka Zookeeper单节点测试