1、修改LogParser.java
src/main/java/project/utils/LogParser.java
package project.utils; import org.apache.commons.lang.StringUtils; import java.util.HashMap; import java.util.Map; //日志解析 public class LogParser { public Map<String, String> parser(String log){ Map<String, String> info = new HashMap<>(); IPParser ipParser = IPParser.getInstance(); //IP是第13个字段 if (StringUtils.isNotBlank(log)){ String[] splits = log.split("\001"); String ip = splits[13]; String country = "-"; String province = "-"; String city = "-"; IPParser.RegionInfo regionInfo = ipParser.analyseIp(ip); if (regionInfo != null){ country = regionInfo.getCountry(); province = regionInfo.getProvince(); city = regionInfo.getCity(); } info.put("ip",ip); info.put("country",country); info.put("province",province); info.put("city",city); String url = splits[1]; info.put("url",url); String time = splits[17]; info.put("time",time); } return info; } public Map<String, String> parserv2(String log){ Map<String, String> info = new HashMap<>(); if (StringUtils.isNotBlank(log)){ String[] splits = log.split("\t"); String ip = splits[0]; String country = splits[1]; String province = splits[2]; //String city = "-"; info.put("ip",ip); info.put("country",country); info.put("province",province); //info.put("city",city); String time = splits[3]; info.put("time",time); String pageId = splits[4]; info.put("pageId",pageId); // String url = splits[5]; // info.put("url",url); } return info; } }
2、ProvinceStartV2App.java
src/main/java/project/mrv2/ProvinceStartV2App.java
package project.mrv2; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import project.utils.IPParser; import project.utils.LogParser; import java.io.IOException; import java.util.Map; //省份浏览量统计 public class ProvinceStartV2App { public static void main(String[] args) throws Exception { Configuration configuration = new Configuration(); FileSystem fileSystem = FileSystem.get(configuration); Path outputPath = new Path("output/v2/provincestart"); if (fileSystem.exists(outputPath)) { fileSystem.delete(outputPath, true); } Job job = Job.getInstance(configuration); job.setJarByClass(ProvinceStartV2App.class); job.setMapperClass(ProvinceStartV2App.MyMapper.class); job.setReducerClass(ProvinceStartV2App.MyReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.setInputPaths(job, new Path("input/etl")); FileOutputFormat.setOutputPath(job, new Path("output/v2/provincestart")); job.waitForCompletion(true); } //Text:省份 //LongWritable:量的统计 static class MyMapper extends Mapper<LongWritable, Text, Text, LongWritable> { private LongWritable ONE = new LongWritable(1); private LogParser logParser; @Override protected void setup(Context context) throws IOException, InterruptedException { logParser = new LogParser(); } @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { //value:是日志 //拿到日志 String log = value.toString(); //将日志传进来 Map<String, String> info = logParser.parserv2(log); context.write(new Text(info.get("province")), ONE); } } static class MyReducer extends Reducer<Text, LongWritable, Text, LongWritable>{ @Override protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { long count = 0; for(LongWritable value: values){ count++; } context.write(key, new LongWritable(count)); } } }
3、问题
(1)LogParser.java中添加url,则会报溢出错误。所以这里将url注释掉。