一:准备

1.统计的维度

  guid

  tracktime

  provice

2.key与value的设定

  key:date+provice_guid

  value:NullWritable

3.案例分析

  表示某天某个省份的某个人无论访问网站多少次,仅仅记做一次访问统计

  UV:统计页面访问的总人数---》userID对于用户进行去重

 

二:程序

1.map程序

  036 关于网站的UV分析_java

   

2.reduce程序

  036 关于网站的UV分析_mapreduce_02

 

3.结果

  036 关于网站的UV分析_ide_03

4.理解点

  1)怎么去重

    数据key的形式:date+provice_guid。

    当guid是相同的时候,在shuffle的group分组时,key被分组,一起的放在一起,而value则是nullwritable,没有使用value。

    所以到达reduce的时候,数据已经被去重了。

  2)NullWritable.get()

    使用反射,获得NullWritable的对象。

 

5.完整程序

  1 package com.senior.network;
  2 
  3 import java.io.IOException;
  4 import java.util.HashMap;
  5 import java.util.Map;
  6 import java.util.Set;
  7 
  8 import org.apache.commons.lang.StringUtils;
  9 import org.apache.hadoop.conf.Configuration;
 10 import org.apache.hadoop.conf.Configured;
 11 import org.apache.hadoop.fs.Path;
 12 import org.apache.hadoop.io.IntWritable;
 13 import org.apache.hadoop.io.LongWritable;
 14 import org.apache.hadoop.io.NullWritable;
 15 import org.apache.hadoop.io.Text;
 16 import org.apache.hadoop.mapreduce.Job;
 17 import org.apache.hadoop.mapreduce.Mapper;
 18 import org.apache.hadoop.mapreduce.Mapper.Context;
 19 import org.apache.hadoop.mapreduce.Reducer;
 20 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 21 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 22 import org.apache.hadoop.util.Tool;
 23 import org.apache.hadoop.util.ToolRunner;
 24 
 25 public class WebUvCount extends Configured implements Tool{
 26     //Mapper
 27     public static class WebUvCountMapper extends Mapper<LongWritable,Text,Text,NullWritable>{
 28         private Text mapoutputkey=new Text();
 29         @Override
 30         protected void cleanup(Context context) throws IOException,InterruptedException {
 31             
 32         }
 33         @Override
 34         protected void setup(Context context) throws IOException,InterruptedException {
 35             
 36         }
 37         
 38         @Override
 39         protected void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {
 40             String lineValue=value.toString();
 41             String[] strs=lineValue.split("\t");
 42             if(30>strs.length){
 43                 context.getCounter("webPvMapper_counter", "length_LT_30").increment(1L);
 44                 return;
 45             }
 46             String guidValue=strs[5];    //
 47             if(StringUtils.isEmpty(guidValue)){
 48                 return;
 49             }
 50             String trackTimeValue=strs[17];
 51             if(StringUtils.isEmpty(trackTimeValue)){
 52                 return;
 53             }
 54             String dateVAlue=trackTimeValue.substring(0,13);//
 55             String priviceIdValue=strs[23];      
 56             
 57             Integer priviceId=Integer.MAX_VALUE;
 58             try{
 59                 priviceId = Integer.valueOf(priviceIdValue);  //
 60             }catch(Exception e){
 61                 return;
 62             }
 63             
 64             mapoutputkey.set(dateVAlue+"\t"+priviceIdValue+"_"+guidValue);
 65             context.write(mapoutputkey,NullWritable.get());
 66         }
 67         
 68     }
 69     
 70     
 71     
 72     //Reducer
 73     public static class WebUvCountReducer extends Reducer<Text,NullWritable,Text,IntWritable>{
 74         private Text outputkey=new Text();
 75         private Map<String,Integer> dateMap;
 76         private IntWritable outputvalue=new IntWritable();
 77         
 78         @Override
 79         protected void setup(Context context)throws IOException, InterruptedException {
 80             dateMap=new HashMap<String,Integer>();
 81         }
 82 
 83         @Override
 84         protected void reduce(Text key, Iterable<NullWritable> values,Context context)throws IOException, InterruptedException {
 85             String date=key.toString().split("_")[0];
 86             if(dateMap.containsKey(date)){
 87                 Integer previousUV=dateMap.get(date);
 88                 Integer uv=previousUV+1;
 89                 dateMap.put(date, uv);
 90             }else{
 91                 dateMap.put(date, 1);
 92             }
 93         }
 94 
 95         @Override
 96         protected void cleanup(Context context)throws IOException, InterruptedException {
 97             Set<String> dateSet=dateMap.keySet();
 98             for(String date:dateSet){
 99                 Integer uv=dateMap.get(date);
100                 outputkey.set(date);
101                 outputvalue.set(uv);
102                 context.write(outputkey, outputvalue);
103             }
104         }
105         
106         
107     }
108     
109     //Driver
110     public int run(String[] args)throws Exception{
111         Configuration conf=this.getConf();
112         Job job=Job.getInstance(conf,this.getClass().getSimpleName());
113         job.setJarByClass(WebUvCount.class);
114         //input
115         Path inpath=new Path(args[0]);
116         FileInputFormat.addInputPath(job, inpath);
117         
118         //output
119         Path outpath=new Path(args[1]);
120         FileOutputFormat.setOutputPath(job, outpath);
121         
122         //map
123         job.setMapperClass(WebUvCountMapper.class);
124         job.setMapOutputKeyClass(Text.class);
125         job.setMapOutputValueClass(NullWritable.class);
126         
127         //shuffle
128         
129         //reduce
130         job.setReducerClass(WebUvCountReducer.class);
131         job.setOutputKeyClass(Text.class);
132         job.setOutputValueClass(IntWritable.class);
133         
134         //submit
135         boolean isSucess=job.waitForCompletion(true);
136         return isSucess?0:1;
137     }
138     
139     //main
140     public static void main(String[] args)throws Exception{
141         Configuration conf=new Configuration();
142         //compress
143         conf.set("mapreduce.map.output.compress", "true");
144         conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
145         args=new String[]{
146                 "hdfs://linux-hadoop01.ibeifeng.com:8020/user/beifeng/mapreduce/wordcount/inputWebData",
147                 "hdfs://linux-hadoop01.ibeifeng.com:8020/user/beifeng/mapreduce/wordcount/outputWebData6"
148         };
149         int status=ToolRunner.run(new WebUvCount(), args);
150         System.exit(status);
151     }
152 
153 }