项目架构图 

基于Flink流处理的电商实时分析系统代码案例_大数据

上报服务->Kafka

模拟产生用户浏览记录

package com.youfan.data;

import com.alibaba.fastjson.JSONObject;
import com.youfan.log.UserscanLog;
import com.youfan.utils.UrlsendUtil;

import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Random;

/**
 * Created by Administrator on 2018/10/14 0014.
 */
public class Scanlogproduce {
    private static Long[] pindaoids = new Long[]{1l,2l,3l,4l,5l,6l,7l,8l};//频道id集合
    private static Long[] leibieids = new Long[]{1l,2l,3l,4l,5l,6l,7l,8l};//产品类别id集合
    private static Long[] chanpinids = new Long[]{1l,2l,3l,4l,5l,6l,7l,8l};//产品id集合
    private static Long[] yonghuids = new Long[]{1l,2l,3l,4l,5l,6l,7l,8l};//用户id集合

    /**
     * 地区
     */
    private static String[] contrys = new String[]{"America","china"};//地区-国家集合
    private static String[] provinces = new String[]{"America","china"};//地区-省集合
    private static String[] citys = new String[]{"America","china"};//地区-市集合

    /**
     *网络方式
     */
    private static String[] networks = new String[]{"电信","移动","联通"};

    /**
     * 来源方式
     */
    private static String[] sources = new String[]{"直接输入","百度跳转","360搜索跳转","必应跳转"};

    /**
     * 浏览器
     */
    private static String[] liulanqis = new String[]{"火狐","qq浏览器","360浏览器","谷歌浏览器"};

    /**
     * 打开时间 离开时间
     */
    private static List<Long[]> usetimelog = new Scanlogproduce().producetimes();

    public List<Long[]> producetimes(){
        List<Long[]> usetimelog = new ArrayList<Long[]>();
        for(int i=0;i<10;i++){
            Long [] timesarray = gettimes("2018-02-09 12:45:45:014");
            usetimelog.add(timesarray);
        }
        return usetimelog;
    }

    private Long [] gettimes(String time){
        DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss:SSS");
        try {
            Date date = dateFormat.parse(time);
            long timetemp = date.getTime();
            Random random = new Random();
            int randomint = random.nextInt(10);
            long starttime = timetemp - randomint*3600*1000;
            long endtime = starttime + randomint*3600*1000;
            return new Long[]{starttime,endtime};
        } catch (ParseException e) {
            e.printStackTrace();
        }
        return new Long[]{0l,0l};
    }

    public static void main(String[] args) {
        Random random = new Random();
        for(int i=0;i<20;i++){
            //频道id 类别id 产品id 用户id 打开时间 离开时间 地区 网络方式 来源方式 浏览器
            UserscanLog userscanLog = new UserscanLog();
//            userscanLog.setPingdaoid(pindaoids[random.nextInt(pindaoids.length)]);
            userscanLog.setPingdaoid(1l);
            userscanLog.setLeibieid(leibieids[random.nextInt(leibieids.length)]);
            userscanLog.setChanpinid(chanpinids[random.nextInt(chanpinids.length)]);
            userscanLog.setUserid(yonghuids[random.nextInt(yonghuids.length)]);
            userscanLog.setContry(contrys[random.nextInt(contrys.length)]);
            userscanLog.setProvince(provinces[random.nextInt(provinces.length)]);
            userscanLog.setCity(citys[random.nextInt(citys.length)]);

            userscanLog.setNetwork(networks[random.nextInt(networks.length)]);
            userscanLog.setSources(sources[random.nextInt(sources.length)]);
            userscanLog.setLiulanqitype(liulanqis[random.nextInt(liulanqis.length)]);

            Long[] times = usetimelog.get(random.nextInt(usetimelog.size()));
            userscanLog.setStarttime(times[0]);
            userscanLog.setEndetime(times[1]);

            String jonstr = JSONObject.toJSONString(userscanLog);
            System.out.println(jonstr);
            UrlsendUtil.sendmessage("http://127.0.0.1:6097/DsInfoSJservice/webInfoSJService",jonstr);
        }

    }


}

用户浏览记录数据模型

package com.youfan.log;

import lombok.Data;

/**
 * 用户浏览记录
 */
@Data
public class UserscanLog {

    private Long pingdaoid;//频道id;
    private Long leibieid;//产品类别id
    private Long chanpinid;//产品id
    private String contry;//国家
    private String province;//省份
    private String city;//城市
    private String network;//网络方式
    private String sources;//来源方式
    private String liulanqitype;//浏览器类型
    private Long starttime;//打开时间
    private Long endetime;//离开时间
    private Long userid;//用户id
}

 把用户浏览记录写入kafka

package com.youfan.contorl;

import com.alibaba.fastjson.JSON;
import com.youfan.input.KafkaMessage;
import com.youfan.log.UserscanLog;
import org.apache.catalina.servlet4preview.http.HttpServletRequest;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.HttpStatus;
import org.springframework.kafka.core.KafkaTemplate;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;

import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.util.Date;

/**
 * Created by Administrator on 2018/10/14 0014.
 */
@Controller
@RequestMapping("DsInfoSJservice")
public class DsInfoSJservice {

    @Autowired
    private KafkaTemplate kafkaTemplate;

    @RequestMapping(value="webInfoSJService",method = RequestMethod.POST)
    public void webInfoSJService(@RequestBody String jsonstr, HttpServletRequest request, HttpServletResponse response){
        System.out.println("hello Jin来了==未转换kafkamessage之前的=="+jsonstr);
        KafkaMessage kafkaMessage = new KafkaMessage();
        kafkaMessage.setJsonmessage(jsonstr);
        kafkaMessage.setCount(1);
        kafkaMessage.setTimestamp(new Date().getTime());
        jsonstr = JSON.toJSONString(kafkaMessage);
        System.out.println("hello Jin来了==转换kafkamessage之后的=="+jsonstr);
        //业务开始
        kafkaTemplate.send("test1","key",jsonstr);
        //业务结束
        PrintWriter printWriter = getWriter(response);
        response.setStatus(HttpStatus.OK.value());
        printWriter.write("success");
        closeprintwriter(printWriter);

    }

    private PrintWriter getWriter(HttpServletResponse response){
        response.setCharacterEncoding("utf-8");
        response.setContentType("application/json");
        OutputStream out = null;
        PrintWriter printWriter = null;
        try {
            out = response.getOutputStream();
            printWriter = new PrintWriter(out);
        } catch (IOException e) {
            e.printStackTrace();
        }
        return printWriter;
    }

    private void closeprintwriter(PrintWriter printWriter){
        printWriter.flush();
        printWriter.close();
    }
}

Mysql->Hive(sqoop)

hive表的创建

CREATE TABLE cartinfo(userid string, productid string,num string,productamount string,createtime string,mechartid string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

CREATE TABLE huodonginfo(huodongid string, huodongname string,hdstarttime string,hdendtime string,productid string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';


CREATE TABLE mechart(mechartid string, mechatname string,mechartarea string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

CREATE TABLE merchartshop(mechatid string, mechartshopname string,mechartshopid string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';


CREATE TABLE order(orderid string,userid string,mechartid string,orderamount string,paytype string,paytime string,hbamount string,djjamount string,productid string,huodongnumber string,createtime string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

CREATE TABLE product(productid string,productname string,producttypeid string,price string,huodongprice string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';


CREATE TABLE producttype(producttypeid string,producttypename string,producttypeleave string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';


CREATE TABLE userinfo(userid string,username string,age string,area string,telphone string,birthday string,mail string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

mysql的数据同步到hive 

sqoop import --connect jdbc:mysql://aliyun.lzh:3306/bigdata --username root --password 123456.. --table cartinfo --fields-terminated-by '\t'  --null-string '**'  --target-dir /user/hive/warehouse/cartinfo/1  --hive-table cartinfo --m 1 --hive-import


sqoop import --connect jdbc:mysql://aliyun.lzh:3306/bigdata --username root --password 123456.. --table huodonginfo --fields-terminated-by '\t'  --null-string  '**' --target-dir /user/hive/warehouse/huodonginfo/1  --hive-table huodonginfo --m 1 --hive-import

sqoop import --connect jdbc:mysql://aliyun.lzh:3306/bigdata --username root --password 123456.. --table mechart --fields-terminated-by '\t'  --null-string '**' --target-dir /user/hive/warehouse/mechart/1  --hive-table mechart --m 1 --hive-import

sqoop import --connect jdbc:mysql://aliyun.lzh:3306/bigdata --username root --password 123456.. --table merchartshop --fields-terminated-by '\t'  --null-string '**'  --target-dir /user/hive/warehouse/merchartshop/1  --hive-table merchartshop --m 1 --hive-import

sqoop import --connect jdbc:mysql://aliyun.lzh:3306/bigdata --username root --password 123456.. --table order --fields-terminated-by '\t'  --null-string '**'  --target-dir /user/hive/warehouse/order/1  --hive-table order --m 1 --hive-import

sqoop import --connect jdbc:mysql://aliyun.lzh:3306/bigdata --username root --password 123456.. --table product --fields-terminated-by '\t'  --null-string '**' --target-dir /user/hive/warehouse/product/1  --hive-table product --m 1 --hive-import

sqoop import --connect jdbc:mysql://aliyun.lzh:3306/bigdata --username root --password 123456.. --table producttype --fields-terminated-by '\t'  --null-string '**'  --target-dir /user/hive/warehouse/producttype/1  --hive-table producttype --m 1 --hive-import

sqoop import --connect jdbc:mysql://aliyun.lzh:3306/bigdata --username root --password 123456.. --table userinfo --fields-terminated-by '\t'  --null-string '**' --target-dir /user/hive/warehouse/userinfo/1  --hive-table userinfo --m 1 --hive-import

FlinkSQL(Hive->Hbase)

package com.youfan.table.task;

import com.youfan.batch.analy.OrderInfo;
import com.youfan.batch.analy.OrderInfotable;
import com.youfan.table.analy.MetchartOrder;
import com.youfan.table.map.OrderAnalyMap;
import com.youfan.util.HbaseUtil;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableEnvironment;
import org.apache.flink.table.api.java.BatchTableEnvironment;

import java.util.HashMap;
import java.util.List;
import java.util.Map;


/**
 * Created by Administrator on 2018/11/3 0003.
 */
public class OrderAnalyProcess {

    public static void main(String[] args) {

        args = new String[]{"--input","hdfs://aliyun.lzh:9000//user/hive/warehouse/order/part-m-00000"};
        final ParameterTool params = ParameterTool.fromArgs(args);

        // set up the execution environment
         ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

        BatchTableEnvironment tEnv = TableEnvironment.getTableEnvironment(env);

        // make parameters available in the web interface
        env.getConfig().setGlobalJobParameters(params);

        // get input data
        DataSet<String> text = env.readTextFile(params.get("input"));
        DataSet<OrderInfotable> map = text.flatMap(new OrderAnalyMap());

        tEnv.registerDataSet("OrderInfotable", map, "orderid, userid, mechartid, orderamount, " +
                "paytype, paytime, hbamount, djjamount, productid, huodongnumber");

        Table table = tEnv.sqlQuery(
                "SELECT mechartid,sum(orderamount) AS orderamout,count(1) AS countvalue FROM OrderInfotable where paytime is not null GROUP BY mechartid ");

        DataSet<MetchartOrder> result = tEnv.toDataSet(table, MetchartOrder.class);

        try {
            result.print();
        } catch (Exception e) {
            e.printStackTrace();
        }
        try {
            List<MetchartOrder> list = result.collect();
            for(MetchartOrder metchartOrder:list){
                Map<String,String> datamap = new HashMap<String,String>();
                datamap.put("mechertordercount",metchartOrder.getCount()+"");
                datamap.put("mechertorderamount",metchartOrder.getOrderamout()+"");
                HbaseUtil.put("orderinfo",metchartOrder.getMechartid()+"","info",datamap);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
package com.youfan.batch.analy;
import lombok.Data;
import java.util.Date;
 
@Data
public class OrderInfo {
    private long orderid;
    private long userid;
    private long mechartid;
    private double orderamount;
    private long paytype;
    private Date paytime;
    private long hbamount;
    private long djjamount;
    private long productid;
    private long huodongnumber;
    private Date createtime;

    @Override
    public String toString() {
        return "OrderInfo{" +
                "orderid=" + orderid +
                ", userid=" + userid +
                ", mechartid=" + mechartid +
                ", orderamount=" + orderamount +
                ", paytype=" + paytype +
                ", paytime=" + paytime +
                ", hbamount=" + hbamount +
                ", djjamount=" + djjamount +
                ", productid=" + productid +
                ", huodongnumber=" + huodongnumber +
                ", createtime=" + createtime +
                '}';
    }
}
package com.youfan.batch.analy;

import lombok.Data;
@Data
public class OrderInfotable {
     public String orderid;
    public String userid;
    public String  mechartid;
    public double  orderamount;
    public String paytype;
    public String paytime;
    public String hbamount;
    public String djjamount;
    public String productid;
    public String huodongnumber;
    public String createtime;

    public OrderInfotable(){}

    public OrderInfotable(String orderid, String userid, String mechartid, double orderamount, String paytype, String paytime, String hbamount, String djjamount, String productid, String huodongnumber, String createtime) {
        this.orderid = orderid;
        this.userid = userid;
        this.mechartid = mechartid;
        this.orderamount = orderamount;
        this.paytype = paytype;
        this.paytime = paytime;
        this.hbamount = hbamount;
        this.djjamount = djjamount;
        this.productid = productid;
        this.huodongnumber = huodongnumber;
        this.createtime = createtime;
    }

}
package com.youfan.table.analy;

import lombok.Data;

import java.io.Serializable;

@Data
public class MetchartOrder implements Serializable{
    private String mechartid;
    private double orderamout;
    private long countvalue;

}
package com.youfan.table.map;

import com.youfan.batch.analy.OrderInfotable;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.util.Collector;

public class OrderAnalyMap  implements FlatMapFunction<String, OrderInfotable> {
    @Override
    public void flatMap(String value, Collector<OrderInfotable> out) throws Exception {
        String [] temparray = value.split("\t");
        String orderid = temparray[0];
        String userid = temparray[1];
        String  mechartid = temparray[2];
        double orderamount = Double.valueOf(temparray[3]);
        String paytype = temparray[4];
         String paytime = temparray[5];
        String hbamount = temparray[6];
        String djjamount = temparray[7];
        String productid = temparray[8];
        String huodongnumber = temparray[9];
        String createtime = temparray[10];

        OrderInfotable orderInfo = new OrderInfotable(orderid, userid, mechartid,  orderamount, paytype,  paytime, hbamount,djjamount,productid, huodongnumber, createtime);
        out.collect(orderInfo);
    }
}

频道分析(Flink->Redis)

package com.youfan.stream.task;

import com.youfan.analy.PindaoRD;
import com.youfan.input.KafkaMessage;
import com.youfan.stream.map.PindaoKafkaMap;
import com.youfan.stream.reduce.PindaoReduce;
import com.youfan.transfer.KafkaMessageSchema;
import com.youfan.transfer.KafkaMessageWatermarks;
import com.youfan.util.RedisUtil;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSink;
import org.apache.flink.streaming.api.datastream.IterativeStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.SinkFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010;

public class IntervalProcessData {

    public static void main(String[] args) {

        args = new String[]{"--input-topic","test1","--bootstrap.servers","aliyun.lzh:9092",
                "--zookeeper.connect","aliyun.lzh:2181","--group.id","myconsumer1","--winsdows.size","50","--winsdows.slide","5"};

        final ParameterTool parameterTool = ParameterTool.fromArgs(args);

        if (parameterTool.getNumberOfParameters() < 6) {
            System.out.println("Missing parameters!\n" +
                    "Usage: Kafka --input-topic <topic>" +
                    "--bootstrap.servers <kafka brokers> " +
                    "--zookeeper.connect <zk quorum> --group.id <some id>");
            return;
        }

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        env.getConfig().disableSysoutLogging();
        env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(4, 10000));
        //env.enableCheckpointing(5000); // create a checkpoint every 5 seconds
        env.getConfig().setGlobalJobParameters(parameterTool); // make parameters available in the web interface
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

        FlinkKafkaConsumer010  flinkKafkaConsumer = new FlinkKafkaConsumer010<KafkaMessage>(parameterTool.getRequired("input-topic"), new KafkaMessageSchema(), parameterTool.getProperties());
        DataStream<KafkaMessage> input = env.addSource(flinkKafkaConsumer.assignTimestampsAndWatermarks(new KafkaMessageWatermarks()));
        DataStream<PindaoRD> map = input.map(new PindaoKafkaMap());
        DataStream<PindaoRD> reduce = map.keyBy("pingdaoid").countWindow(Long.valueOf(parameterTool.getRequired("winsdows.size")),Long.valueOf(parameterTool.getRequired("winsdows.slide"))).reduce(new PindaoReduce());
        reduce.addSink(new SinkFunction<PindaoRD>() {
            @Override
            public void invoke(PindaoRD value) {
                long count = value.getCount();
                long pindaoid = value.getPingdaoid();
                System.out.println("输出==pindaoid"+pindaoid+":"+count);
                try {
                    RedisUtil.jedis.lpush("SlidingEventTimeWindowspingdaord:" + pindaoid, count + "");
                } catch (Exception e) {
                        e.printStackTrace();
                        throw e;
                }
            }
        }).name("pdrdreduce");
        try {
            env.execute("pindaoredian");
        }catch (Exception e) {
            e.printStackTrace();
        }
    }


}

Flink WaterMark 来处理乱序消事件

​
package com.youfan.transfer;

import com.youfan.input.KafkaMessage;
import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
import org.apache.flink.streaming.api.watermark.Watermark;

import javax.annotation.Nullable;

/**
 * Created by Administrator on 2018/10/27 0027.
 */
public class KafkaMessageWatermarks implements AssignerWithPeriodicWatermarks<KafkaMessage> {

    private long currentTimestamp = Long.MIN_VALUE;

    @Override
    public long extractTimestamp(KafkaMessage event, long previousElementTimestamp) {
        // the inputs are assumed to be of format (message,timestamp)
        this.currentTimestamp = event.getTimestamp();
        return event.getTimestamp();
    }

    @Nullable
    @Override
    public Watermark getCurrentWatermark() {
        Watermark watermark = new Watermark(currentTimestamp == Long.MIN_VALUE ? Long.MIN_VALUE : currentTimestamp - 1);
        return watermark;
    }

}

​
package com.youfan.transfer;

import com.alibaba.fastjson.JSON;
import com.youfan.input.KafkaMessage;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.serialization.SerializationSchema;
import org.apache.flink.api.common.typeinfo.TypeInformation;

import java.io.IOException;

/**
 * Created by Administrator on 2018/10/27 0027.
 */
public class KafkaMessageSchema implements DeserializationSchema<KafkaMessage>, SerializationSchema<KafkaMessage> {

    @Override
    public KafkaMessage deserialize(byte[] message) throws IOException {
        String jsonString = new String(message);
        KafkaMessage kafkaMessage = JSON.parseObject(jsonString,KafkaMessage.class);
        return kafkaMessage;
    }

    @Override
    public byte[] serialize(KafkaMessage element) {
        String jsonstring = JSON.toJSONString(element);
        return jsonstring.getBytes();
    }

    @Override
    public boolean isEndOfStream(KafkaMessage nextElement) {
        return false;
    }



    @Override
    public TypeInformation<KafkaMessage> getProducedType() {
        return TypeInformation.of(KafkaMessage.class);
    }
}
package com.youfan.stream.map;

import com.alibaba.fastjson.JSON;
import com.youfan.analy.PindaoRD;
import com.youfan.input.KafkaMessage;
import com.youfan.log.UserscanLog;
import org.apache.flink.api.common.functions.RichMapFunction;

import java.util.Date;
import java.util.HashMap;
import java.util.Map;

/**
 * Created by Administrator on 2018/10/27 0027.
 */
public class PindaoKafkaMap extends RichMapFunction<KafkaMessage, PindaoRD> {

    @Override
    public PindaoRD map(KafkaMessage value) throws Exception {
        String jsonstring = value.getJsonmessage();
        System.out.println("map进来的数据=="+jsonstring);
        UserscanLog userscanLog = JSON.parseObject(jsonstring, UserscanLog.class);
        long pingdaoid = userscanLog.getPingdaoid();
        Map<Long,Long> pingdaomap = new HashMap<Long,Long>();
        String mapstring = JSON.toJSONString(pingdaomap);
        PindaoRD pindaoRD = new PindaoRD();
        pindaoRD.setPingdaoid(pingdaoid);
        pindaoRD.setCount(Long.valueOf(value.getCount()+""));
        return pindaoRD;
    }
}
package com.youfan.stream.reduce;

import com.youfan.analy.PindaoRD;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.java.tuple.Tuple2;

/**
 * Created by Administrator on 2018/10/28 0028.
 */
public class PindaoReduce implements ReduceFunction<PindaoRD> {

    @Override
    public PindaoRD reduce(PindaoRD value1, PindaoRD value2) throws Exception {
        PindaoRD pindaoRD = new PindaoRD();
        System.out.println("value1=="+value1);
        System.out.println("value2=="+value2);
        pindaoRD.setPingdaoid(value1.getPingdaoid());
        pindaoRD.setCount(value1.getCount()+value2.getCount());
        return  pindaoRD;
    }
}
package com.youfan.analy;

import lombok.Data;

/**
 * 频道热点
 */
@Data
public class PindaoRD {

    private Long pingdaoid;
    private Long count;


    @Override
    public String toString() {
        return "PindaoRD{" +
                "pingdaoid=" + pingdaoid +
                ", count=" + count +
                '}';
    }
}

频道新鲜度(Flink->Hbase)

新鲜度:每天的活跃用户中,新用户的数量和老用户的数量的一个比例

package com.youfan.stream.task;

import com.youfan.analy.PidaoXinXianDu;
import com.youfan.input.KafkaMessage;
import com.youfan.stream.map.PindaoXinXianDuMap;
import com.youfan.stream.reduce.PindaoXinXiandusinkreduce;
import com.youfan.stream.reduce.PindaoXinxianduReduce;
import com.youfan.transfer.KafkaMessageSchema;
import com.youfan.transfer.KafkaMessageWatermarks;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010;

/**
 * Created by Administrator on 2018/10/27 0027.
 */
public class SSDQFBProcessData {

    public static void main(String[] args) {

        args = new String[]{"--input-topic","test1","--bootstrap.servers","aliyun.lzh:9092",
                "--zookeeper.connect","aliyun.lzh:2181","--group.id","myconsumer1","--winsdows.size","50"};

        final ParameterTool parameterTool = ParameterTool.fromArgs(args);

        if (parameterTool.getNumberOfParameters() < 5) {
            System.out.println("Missing parameters!\n" +
                    "Usage: Kafka --input-topic <topic>" +
                    "--bootstrap.servers <kafka brokers> " +
                    "--zookeeper.connect <zk quorum> --group.id <some id>");
            return;
        }

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        env.getConfig().disableSysoutLogging();
        env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(4, 10000));
        env.enableCheckpointing(5000); // create a checkpoint every 5 seconds
        env.getConfig().setGlobalJobParameters(parameterTool); // make parameters available in the web interface
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);


        FlinkKafkaConsumer010  flinkKafkaConsumer = new FlinkKafkaConsumer010<KafkaMessage>(parameterTool.getRequired("input-topic"), new KafkaMessageSchema(), parameterTool.getProperties());
        DataStream<KafkaMessage> input = env.addSource(flinkKafkaConsumer.assignTimestampsAndWatermarks(new KafkaMessageWatermarks()));
        DataStream<PidaoXinXianDu> map = input.flatMap(new PindaoXinXianDuMap());
        DataStream<PidaoXinXianDu> reduce = map.keyBy("groupbyfield").countWindow(Long.valueOf(parameterTool.getRequired("winsdows.size"))).reduce(new PindaoXinxianduReduce());
//        reduce.print();
        reduce.addSink(new PindaoXinXiandusinkreduce()).name("pdxinxiandureduce");
        try {
            env.execute("pindaossfx");
        } catch (Exception e) {
            e.printStackTrace();
        }


    }
}
package com.youfan.stream.map;

import com.alibaba.fastjson.JSON;
import com.youfan.analy.PidaoPvUv;
import com.youfan.analy.PidaoXinXianDu;
import com.youfan.analy.UserState;
import com.youfan.dao.PdvisterDao;
import com.youfan.input.KafkaMessage;
import com.youfan.log.UserscanLog;
import com.youfan.util.DateUtil;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.util.Collector;


/**
 * Created by Administrator on 2018/10/27 0027.
 */
public class PindaoXinXianDuMap implements FlatMapFunction<KafkaMessage,PidaoXinXianDu> {

    @Override
    public void flatMap(KafkaMessage value, Collector<PidaoXinXianDu> out) throws Exception {
        String jsonstring = value.getJsonmessage();
        long timestamp = value.getTimestamp();


        String hourtimestamp = DateUtil.getDateby(timestamp,"yyyyMMddhh");//小时
        String daytimestamp = DateUtil.getDateby(timestamp,"yyyyMMdd");//天
        String monthtimestamp = DateUtil.getDateby(timestamp,"yyyyMM");//月

        UserscanLog userscanLog = JSON.parseObject(jsonstring, UserscanLog.class);
        long pingdaoid = userscanLog.getPingdaoid();
        long userid = userscanLog.getUserid();

        UserState userState = PdvisterDao.getUserSatebyvistertime(userid+"",timestamp);
        boolean isFirsthour = userState.isFisrthour();
        boolean isFisrtday = userState.isFisrtday();
        boolean isFisrtmonth = userState.isFisrtmonth();

        PidaoXinXianDu pidaoXinXianDu = new PidaoXinXianDu();
        pidaoXinXianDu.setPingdaoid(pingdaoid);
        pidaoXinXianDu.setTimestamp(timestamp);
        /**
         * 新增用户
         */
        long newuser = 0l;
        if(userState.isnew()){
            newuser = 1l;
        }
        pidaoXinXianDu.setNewcount(newuser);

        /**
         * 小时
         */
        long olduser = 0l;
        if(!userState.isnew()&&isFirsthour){
            olduser = 1l;
        }
        pidaoXinXianDu.setOldcount(olduser);
        pidaoXinXianDu.setTimestring(hourtimestamp);
        pidaoXinXianDu.setGroupbyfield(hourtimestamp+pingdaoid);
        out.collect(pidaoXinXianDu);
        System.out.println("小时=="+pidaoXinXianDu);
        /**
         * 天
         */
        olduser = 0l;
        if(!userState.isnew()&&isFisrtday){
            olduser = 1l;
        }
        pidaoXinXianDu.setOldcount(olduser);
        pidaoXinXianDu.setTimestring(daytimestamp);
        pidaoXinXianDu.setGroupbyfield(daytimestamp+pingdaoid);
        out.collect(pidaoXinXianDu);
        System.out.println("小时=="+pidaoXinXianDu);
        /**
         * 月
         */
        olduser = 0l;
        if(!userState.isnew()&&isFisrtmonth){
            olduser = 1l;
        }
        pidaoXinXianDu.setOldcount(olduser);
        pidaoXinXianDu.setTimestring(monthtimestamp);
        pidaoXinXianDu.setGroupbyfield(monthtimestamp+pingdaoid);
        out.collect(pidaoXinXianDu);
        System.out.println("小时=="+pidaoXinXianDu);
    }
}
package com.youfan.dao;

import com.youfan.analy.PidaoPvUv;
import com.youfan.analy.UserState;
import com.youfan.util.DateUtil;
import com.youfan.util.HbaseUtil;
import org.apache.commons.lang3.StringUtils;

import java.util.HashMap;
import java.util.Map;

/**
 * Created by Administrator on 2018/10/29 0029.
 */
public class PdvisterDao {


    /**
     * 查询本次用户的访问状态
     * @param userid
     * @param timestamp
     * @return
     */
    public static UserState getUserSatebyvistertime(String userid,long timestamp){
        UserState userState = new UserState();
        try {
           String result =  HbaseUtil.getdata("baseuserscaninfo",userid,"time","fisrtvisittime");
            if(result == null){//第一次访问
                Map<String,String> datamap = new HashMap<String,String>();
                datamap.put("fisrtvisittime",timestamp+"");
                datamap.put("lastvisittime",timestamp+"");
                HbaseUtil.put("baseuserscaninfo",userid,"time",datamap);
                userState.setIsnew(true);
                userState.setFisrtday(true);
                userState.setFisrthour(true);
                userState.setFisrtmonth(true);
            }else{
                String lastvisittimestring = HbaseUtil.getdata("baseuserscaninfo",  userid, "time","lastvisittime");
                if(StringUtils.isNotBlank(lastvisittimestring)){
                    long lastvisittime = Long.valueOf(lastvisittimestring);
                    //小时
                    long timstamp = DateUtil.getDatebyConditon(timestamp,"yyyyMMddhh");
                    if(lastvisittime < timestamp){
                        userState.setFisrthour(true);
                    }
                    //天
                    timstamp = DateUtil.getDatebyConditon(timestamp,"yyyyMMdd");
                    if(lastvisittime < timestamp){
                        userState.setFisrtday(true);
                    }
                    //月
                    timstamp = DateUtil.getDatebyConditon(timestamp,"yyyyMM");
                    if(lastvisittime < timestamp){
                        userState.setFisrtmonth(true);
                    }
                }
                HbaseUtil.putdata("baseuserscaninfo", userid, "time","lastvisittime",timestamp+"");
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return userState;
    }
}
package com.youfan.util;

import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;

/**
 * Created by Administrator on 2018/10/29 0029.
 */
public class DateUtil {

    public static String getDateby(long timestamp,String dateformat){
        Date date = new Date(timestamp);
        DateFormat dateFormat = new SimpleDateFormat(dateformat);
        String formatdate = dateFormat.format(date);
        return formatdate;
    }


    public static long getDatebyConditon(long timestamp, String dateformat) throws ParseException {
        Date datetemp = new Date(timestamp);
        DateFormat dateFormat = new SimpleDateFormat(dateformat);
        String formatdate = dateFormat.format(datetemp);
        Date date = dateFormat.parse(formatdate);
        return date.getTime();
    }
}
package com.youfan.stream.reduce;

import com.youfan.analy.PidaoPvUv;
import com.youfan.analy.PidaoXinXianDu;
import org.apache.flink.api.common.functions.ReduceFunction;

/**
 * Created by Administrator on 2018/10/28 0028.
 */
public class PindaoXinxianduReduce implements ReduceFunction<PidaoXinXianDu> {

    @Override
    public PidaoXinXianDu reduce(PidaoXinXianDu value1,PidaoXinXianDu value2) throws Exception {
        System.out.println( "value1=="+value1);
        System.out.println( "value2=="+value2);
        long pingdaoid = value1.getPingdaoid();
        long timestampvalue = value1.getTimestamp();
        String timestring = value1.getTimestring();
        long newcountvalue1 = value1.getNewcount();
        long oldcountvalue1 = value1.getOldcount();

        long newcountvalue2 = value2.getNewcount();
        long oldcountvalue2 = value2.getOldcount();

        PidaoXinXianDu pidaoXinXianDu = new PidaoXinXianDu();
        pidaoXinXianDu.setPingdaoid(pingdaoid);
        pidaoXinXianDu.setTimestamp(timestampvalue);
        pidaoXinXianDu.setTimestring(timestring);
        pidaoXinXianDu.setNewcount(newcountvalue1+newcountvalue2);
        pidaoXinXianDu.setOldcount(oldcountvalue1+oldcountvalue2);
        System.out.println( "recuduce --pidaoXinXianDu=="+pidaoXinXianDu);
        return  pidaoXinXianDu;
    }
}