1、url选择

(1)必选带

topicId=

 

2、目标

(1)获取符合规则的pageId,然后进行统计

 

3、修改LogParser.java

src/main/java/project/utils/LogParser.java

package project.utils;

import org.apache.commons.lang.StringUtils;
import java.util.HashMap;
import java.util.Map;

//日志解析
public class LogParser {

    IPParser ipParser = IPParser.getInstance();

    public Map<String, String> parser(String log){
        Map<String, String> info = new HashMap<>();

        //IP是第13个字段
        if (StringUtils.isNotBlank(log)){
            String[] splits = log.split("\001");

            String ip = splits[13];
            String country = "-";
            String province = "-";
            String city = "-";
            IPParser.RegionInfo regionInfo = ipParser.analyseIp(ip);

            if (regionInfo != null){
                country = regionInfo.getCountry();
                province = regionInfo.getProvince();
                city = regionInfo.getCity();
            }

            info.put("ip",ip);
            info.put("country",country);
            info.put("province",province);
            info.put("city",city);

            String url = splits[1];
            info.put("url",url);

        }
        return info;
    }
}

 

4、添加工具类ContentUtils.java

src/main/java/project/utils/ContentUtils.java

package project.utils;

import org.apache.commons.lang.StringUtils;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class ContentUtils {

    //传入的参数为url
    public static String getPageId(String url){

        //为了防止报错,写一个为空的配置Id
        String pageId = "";
        //如果字符串url为空,直接返回配置Id
        if (StringUtils.isBlank(url)){
            return pageId;
        }
        //对于topicId=19004,要使用正则表达式选择0-9的数据
        Pattern pattern = Pattern.compile("topicId=[0-9]+");
        //选择好了之后,将url传进来,进行matcher
        Matcher matcher = pattern.matcher(url);
        //如果marcher上:从topicId=后截取
        if(matcher.find()){
            pageId = matcher.group().split("topicId=")[1];
        }
        return pageId;
    }

}

 

5、进行测试,修改LogParserTest.java

src/test/java/mr/project/utils/LogParserTest.java

package mr.project.utils;

import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import project.utils.ContentUtils;
import project.utils.LogParser;
import java.util.Map;

public class LogParserTest {

    LogParser logParser;

    @Before
    public void setUp(){
        logParser = new LogParser();
    }

    @After
    public void tearDown(){
        logParser = null;
    }

    @Test
    public void test01(){
        Map<String, String> map = logParser.parser("20960991758\u0001http://www.yihaodian.com/cms/view.do?topicId=19004\u0001http://www.yihaodian.com/cms/view.do?topicId=22331&cache=false&merchant=1\u0001\u00013\u00016ZD1N3J3ECTNX96DSGESX9GN12U1JD2R9YGP\u0001\u0001\u0001\u0001\u0001PPHK3755F3XK3HDYT1AHW7XNS9GZBECK\u000110931041909\u0001\\N\u0001101.85.27.156\u0001\u0001msessionid:D7UNQY44Z6GYKXHWB7QW8NMT6TPVQZKK,unionKey:10931041909\u0001\u00012013-07-21 13:34:10\u0001\\N\u0001http://hao.360.cn/?wd_xp1\u00015\u0001\u0001\\N\u00011\u0001-10\u0001\u0001\u0001\u0001\u0001Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729)\u0001Win32\u0001\u0001\u0001\u0001\u0001\u0001上海市\u00011\u0001\u0001\u0001上海市\u0001\u00014\u0001\u0001\u0001\u0001\u0001\\N\u0001\\N\u0001\\N\u0001\\N\u00012013-07-21\n");

        for (Map.Entry<String, String> entry : map.entrySet()){
            System.out.println(entry.getKey() + " : " + entry.getValue());
        }
    }

    @Test
    public void test02(){
        String pageId = ContentUtils.getPageId("http://www.yihaodian.com/cms/view.do?topicId=19004");
        System.out.println(pageId);
    }
}

电商项目实战-页面浏览量统计之页面编号获取测试7_java