1、java UDF 

package com.zqk.useragentutils;
 
import org.apache.pig.EvalFunc;
import org.apache.pig.data.BinSedesTupleFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DefaultBagFactory;
import org.apache.pig.data.Tuple;
 
import java.io.IOException;
 
 
/**
 * Created with IntelliJ IDEA.
 * User: qeekey
 * Date: 13-4-15
 * Time: 下午2:30
 * To change this template use File | Settings | File Templates.
 */
public class UserAgentParse extends EvalFunc<Tuple> {
 
@Override
public Tuple exec(Tuple input) throws IOException {
if (input == null || input.size() == 0) {
return null;
}
 
try {
DataBag result = DefaultBagFactory.getInstance().newDefaultBag();
UserAgent userAgent = UserAgent.valueOf((Long)input.get(0));
if (userAgent != null ){
Tuple tuple = BinSedesTupleFactory.getInstance().newTuple();
tuple.append(userAgent.getOperatingSystem() != null && userAgent.getOperatingSystem().getName() !=null ? userAgent.getOperatingSystem().getName() : "" );
tuple.append(userAgent.getBrowser()!= null && userAgent.getBrowser().getName()!=null ? userAgent.getBrowser().getName() : "" );
tuple.append(userAgent.getDevice()!= null && userAgent.getDevice().getName()!=null ? userAgent.getDevice().getName() : "" );
return tuple;
}else {
//can not parse uaid
}
return null;
} catch (Exception e) {
throw new IOException(e);
}
 
}

 

public static void main(String[] args) throws Exception {

Tuple input = BinSedesTupleFactory.getInstance().newTuple();

input.append(72621750893412442l);

 

UserAgentParse t = new UserAgentParse();
Tuple result = t.exec(input);
for (int i = 0; i < result.size(); i++) {
System.out.println(result.get(i));
}
}
 
}

 

2、pig调用

REGISTER useragent.jar;

DEFINE UserAgentParse  com.zqk.useragentutils.UserAgentParse();

......

data = FOREACH data GENERATE FLATTEN(UserAgentParse(user_agent)) AS (os,browser,device);