1、url选择
(1)必选带
topicId=
2、目标
(1)获取符合规则的pageId,然后进行统计
3、修改LogParser.java
src/main/java/project/utils/LogParser.java
package project.utils; import org.apache.commons.lang.StringUtils; import java.util.HashMap; import java.util.Map; //日志解析 public class LogParser { IPParser ipParser = IPParser.getInstance(); public Map<String, String> parser(String log){ Map<String, String> info = new HashMap<>(); //IP是第13个字段 if (StringUtils.isNotBlank(log)){ String[] splits = log.split("\001"); String ip = splits[13]; String country = "-"; String province = "-"; String city = "-"; IPParser.RegionInfo regionInfo = ipParser.analyseIp(ip); if (regionInfo != null){ country = regionInfo.getCountry(); province = regionInfo.getProvince(); city = regionInfo.getCity(); } info.put("ip",ip); info.put("country",country); info.put("province",province); info.put("city",city); String url = splits[1]; info.put("url",url); } return info; } }
4、添加工具类ContentUtils.java
src/main/java/project/utils/ContentUtils.java
package project.utils; import org.apache.commons.lang.StringUtils; import java.util.regex.Matcher; import java.util.regex.Pattern; public class ContentUtils { //传入的参数为url public static String getPageId(String url){ //为了防止报错,写一个为空的配置Id String pageId = ""; //如果字符串url为空,直接返回配置Id if (StringUtils.isBlank(url)){ return pageId; } //对于topicId=19004,要使用正则表达式选择0-9的数据 Pattern pattern = Pattern.compile("topicId=[0-9]+"); //选择好了之后,将url传进来,进行matcher Matcher matcher = pattern.matcher(url); //如果marcher上:从topicId=后截取 if(matcher.find()){ pageId = matcher.group().split("topicId=")[1]; } return pageId; } }
5、进行测试,修改LogParserTest.java
src/test/java/mr/project/utils/LogParserTest.java
package mr.project.utils; import org.junit.After; import org.junit.Before; import org.junit.Test; import project.utils.ContentUtils; import project.utils.LogParser; import java.util.Map; public class LogParserTest { LogParser logParser; @Before public void setUp(){ logParser = new LogParser(); } @After public void tearDown(){ logParser = null; } @Test public void test01(){ Map<String, String> map = logParser.parser("20960991758\u0001http://www.yihaodian.com/cms/view.do?topicId=19004\u0001http://www.yihaodian.com/cms/view.do?topicId=22331&cache=false&merchant=1\u0001\u00013\u00016ZD1N3J3ECTNX96DSGESX9GN12U1JD2R9YGP\u0001\u0001\u0001\u0001\u0001PPHK3755F3XK3HDYT1AHW7XNS9GZBECK\u000110931041909\u0001\\N\u0001101.85.27.156\u0001\u0001msessionid:D7UNQY44Z6GYKXHWB7QW8NMT6TPVQZKK,unionKey:10931041909\u0001\u00012013-07-21 13:34:10\u0001\\N\u0001http://hao.360.cn/?wd_xp1\u00015\u0001\u0001\\N\u00011\u0001-10\u0001\u0001\u0001\u0001\u0001Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729)\u0001Win32\u0001\u0001\u0001\u0001\u0001\u0001上海市\u00011\u0001\u0001\u0001上海市\u0001\u00014\u0001\u0001\u0001\u0001\u0001\\N\u0001\\N\u0001\\N\u0001\\N\u00012013-07-21\n"); for (Map.Entry<String, String> entry : map.entrySet()){ System.out.println(entry.getKey() + " : " + entry.getValue()); } } @Test public void test02(){ String pageId = ContentUtils.getPageId("http://www.yihaodian.com/cms/view.do?topicId=19004"); System.out.println(pageId); } }