1.数据生产

使用java代码往一个文件中写入数据

package com.mobile;

import java.io.*;
import java.text.DecimalFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;

/**
 * @author kaiya
 * @Desc 数据生产
 * @date 2020/5/6 20:33
 */
public class Producer {

    // 数据要求:caller,callee,buildTime,duration 主叫,被叫,通话建立时间,通话持续时间

    // 电话号码
    List<String> phoneNumList = new ArrayList<>();
    // 电话号码,姓名
    Map<String, String> phoneNameMap = new HashMap<>();
    private String startTime = "2020-01-01";
    private String endTime = "2020-12-31";

    /**
     * caller :20个电话号码用List存储,Math.random()*20随机产生0-19的随机数,
     * 去获取随机的号码,这里获取的随机数箱单与存放号码的索引
     */
    /**
     * 数据初始化
     */
    public void initPhone() {
        // 电话号码
        phoneNumList.add("17078388295");
        phoneNumList.add("13980337439");
        phoneNumList.add("14575535933");
        phoneNumList.add("19902496992");
        phoneNumList.add("18549641558");
        phoneNumList.add("17005930322");
        phoneNumList.add("18468618874");
        phoneNumList.add("18576581848");
        phoneNumList.add("15978226424");
        phoneNumList.add("15542823911");
        phoneNumList.add("17526304161");
        phoneNumList.add("15422018558");
        phoneNumList.add("17269452013");
        phoneNumList.add("17764278604");
        phoneNumList.add("15711910344");
        phoneNumList.add("15714728273");
        phoneNumList.add("16061028454");
        phoneNumList.add("16264433631");
        phoneNumList.add("17601615878");
        phoneNumList.add("15897468949");

        // 电话号码,姓名
        phoneNameMap.put("17078388295", "施耐庵");
        phoneNameMap.put("13980337439", "李世民");
        phoneNameMap.put("14575535933", "程咬金");
        phoneNameMap.put("19902496992", "猪八戒");
        phoneNameMap.put("18549641558", "孙悟空");
        phoneNameMap.put("17005930322", "唐三藏");
        phoneNameMap.put("18468618874", "沙僧");
        phoneNameMap.put("18576581848", "沙悟净");
        phoneNameMap.put("15978226424", "猪悟能");
        phoneNameMap.put("15542823911", "观世音");
        phoneNameMap.put("17526304161", "太白金星");
        phoneNameMap.put("15422018558", "赤脚大仙");
        phoneNameMap.put("17269452013", "二郎神");
        phoneNameMap.put("17764278604", "哮天犬");
        phoneNameMap.put("15711910344", "嫦娥");
        phoneNameMap.put("15714728273", "玉皇大帝");
        phoneNameMap.put("16061028454", "王母娘娘");
        phoneNameMap.put("16264433631", "如来");
        phoneNameMap.put("17601615878", "白骨精");
        phoneNameMap.put("15897468949", "牛魔王");
    }

    /**
     * 生产并返回数据
     * 数据要求:caller,callee,buildTime,duration 主叫,被叫,通话建立时间,通话持续时间
     */
    public String product() {
        /**
         * caller :随机从电话号码中取出一个作为主叫
         */
        int callerIndex = (int)(Math.random() * phoneNumList.size());
        String caller = phoneNumList.get(callerIndex);
        /**
         * callee :随机从电话号码中取出一个作为被叫
         */
        String callee = null;
        while (true) {
            int calleeIndex = (int)(Math.random() * phoneNumList.size());
            callee = phoneNumList.get(calleeIndex);
            // 需要主叫和被叫不是同一个号码
            if (!callee.equals(caller)) {
                break;
            }
        }
        /**
         * buildTime
         */
        String buildTime = randomBuildTime(startTime, endTime);

        /**
         * duration
         */
        DecimalFormat df = new DecimalFormat("0000");
        String duration = df.format((int)30 * 60 * Math.random());

        // 拼接最终数据
        StringBuffer sb = new StringBuffer();
        sb.append(caller).append(",").append(callee).append(",")
                .append(buildTime).append(",").append(duration);
        return sb.toString();
    }

    /**
     * 返回一个介于开始时间和结束时间之间的时间
     * @param startTime 开始时间 yyyy-MM-dd
     * @param endTime 结束时间 yyyy-MM-dd
     * @return
     */
    private String randomBuildTime(String startTime, String endTime) {
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
        SimpleDateFormat sdf2 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        try {
            Date startDate = sdf.parse(startTime);
            Date endDate = sdf.parse(endTime);
            // 结束时间 > 开始时间
            if (startDate.getTime() >= endDate.getTime()) {
                return null;
            }
            // 获取一个随机时间  (结束时间-开始时间)*随机数+起始时间
            Long timeMill = (long)((endDate.getTime() - startDate.getTime())
                    * Math.random() + startDate.getTime());
            Date date = new Date(timeMill);
            return sdf2.format(date);
        } catch (ParseException e) {
            e.printStackTrace();
        }
        return null;
    }

    /**
     * 将数据写出到文件
     * @param filePath
     */
    public  void writeToFile(String filePath) {
        OutputStreamWriter osw = null;
        try {
            osw = new OutputStreamWriter(new FileOutputStream(new File(filePath), true)
                     , "UTF-8");
           while (true) {
               // 每0.5s写一次
               Thread.sleep(500);
               String data = product();
               System.out.println(data);
               osw.write(data + "\n");
               osw.flush();
           }
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } catch (InterruptedException e) {
            e.printStackTrace();
        } finally {
            if (osw != null) {
                try {
                    osw.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }

    }

    public static void main(String[] args) {
        Producer producer = new Producer();
        // 初始化时间
        producer.initPhone();
        producer.writeToFile(args[0]);

    }
}

2.flume+kafka消费数据【flume与kafka的搭建在此省略】

2.1 kafka消费数据代码

maven依赖

<dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka-clients</artifactId>
            <version>0.11.0.2</version>
        </dependency>

消费代码 【2.4】

package com.bigdata.kafka;

import com.bigdata.hbase.HBaseDAO;
import com.bigdata.utils.PropertiesUtils;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;

import java.util.Arrays;
import java.util.Properties;

/**
 * @author kaiya
 * @Desc kafka消费数据写入Hbase
 * @date 2020/5/10 12:22
 */
public class Kafka2HBase {
    public static void main(String[] args) {
        // kafka 消费者
        KafkaConsumer<String, String> kafkaConsumer = new KafkaConsumer<>(PropertiesUtils.properties);
        // kafka订阅【subscribe】主题【Topic】
        kafkaConsumer.subscribe(Arrays.asList(PropertiesUtils.getProperty("kafka.topics")));
        // 拉取数据
        while (true) {
            ConsumerRecords<String, String> poll = kafkaConsumer.poll(100);
            for (ConsumerRecord<String, String> consumeRecord : poll) {
                //  14575535933,15422018558,2020-01-25 22:41:22,1551
                String line = consumeRecord.value();
                System.out.println(line);
             
            }
        }

    }
}

配置文件 properties

# 设置kafka的brokerlist
bootstrap.servers=bigdata111:9092,bigdata112:9092,bigdata113:9092
# 设置消费者所属的消费组
group.id=hbase_consumer_group
# 设置是否自动确认offset
enable.auto.commit=true
# 自动确认offset的时间间隔
auto.commit.interval.ms=30000
# 设置key,value的反序列化类的全名
key.deserializer=org.apache.kafka.common.serialization.StringDeserializer
value.deserializer=org.apache.kafka.common.serialization.StringDeserializer

# 以下为自定义属性设置
# 设置本次消费的主题
kafka.topics=calllog

读取配置文件

package com.bigdata.utils;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;

/**
 * @author kaiya
 * @Desc 读取配置文件的工具类
 * @date 2020/5/8 20:51
 */

public class PropertiesUtils {
    public static Properties properties = null;
    private static final Logger LOG = LoggerFactory.getLogger(PropertiesUtils.class);
    static {
        // 读取hbase的配置文件
        InputStream is = ClassLoader.getSystemResourceAsStream("hbase.properties");
        properties = new Properties();
        try {
            // 装载文件
            properties.load(is);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 获取配置文件中key对应的值
     * @param key
     * @return
     */
    public static String getProperty(String key) {
        return properties.getProperty(key);
    }
}

2.2 生产数据

将数据生产的代码用maven打包成jar包,上传到liunx环境,运行jar包生产数据

java -cp Producer-1.0-SNAPSHOT.jar com.mobile.Producer /opt/datas/calllog.csv

flume kafkaChannel 生产数据 flume消费kafka数据_kafka

2.3 flume拉取数据到kafka

配置文件

# define
a1.sources = r1
a1.sinks = k1
a1.channels = c1

# source
a1.sources.r1.type = exec
#监控的文件,及数据生产的文件
a1.sources.r1.command = tail -F -c +0 /opt/datas/calllog.csv
a1.sources.r1.shell = /bin/bash -c

# sink
a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k1.brokerList = bigdata111:9092,bigdata112:9092,bigdata113:9092
a1.sinks.k1.topic = calllog
a1.sinks.k1.batchSize = 20
a1.sinks.k1.requiredAcks = 1

# channel
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100

# bind
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

启动flume

为了方便,直接保存成脚本

#!/bin/bash
/opt/module/flume-1.8.0/bin/flume-ng agent -n a1 -c /opt/module/flume-1.8.0/conf/ -f /opt/module/flume-1.8.0/jobconf/flume-to_kafka.conf

2.4运行kafka消费数据代码,由于我把消费到的数据打印到控制台,可以直接看到数据

flume kafkaChannel 生产数据 flume消费kafka数据_java_02