主要参考文献有:
Hbase源代码:
IdentityTableMap.java TestTableMapReduce.java
如果是利用Mapreduce程序插入数据到Hbase表 里,那wiki上的例子就基本可用跑通了(当然,前提是环境部署都没问题)。
如果是利用MapreduceHbase表里读取数据的话,wiki上的例子就有点问题了。现在举个已经跑通了的例子(hadoophbase都用了0.20.1的版本):
package com.koubei.fullbuild.offer;
 
import
 
 
/**
 * 利用mapreduce处理hbase的表数据
 *
 * @author fengliang
 * @version 1.0
 */
public class GetDataFromHbaseTest extends MapReduceBase implements TableMap<Text, Text>, Tool {
    static final String NAME = "GetDataFromHbaseTest";
    private Configuration conf;
 
    public void map(ImmutableBytesWritable row, RowResult value,
           OutputCollector<Text, Text> output, Reporter reporter)
           throws IOException {
       StringBuilder sb = new StringBuilder();
       for (Map.Entry<byte[], Cell> e : value.entrySet()) {
           Cell cell = e.getValue();
           if (cell != null && cell.getValue().length > 0) {
              sb.append(new String(e.getKey())).append(new String(cell.getValue()));
           }
       }
       output.collect(new Text(row.get()), new Text(sb.toString()));
    }
 
    public static void initJob(String table, String columns,
           Class<? extends TableMap> mapper, JobConf job) {
       TableMapReduceUtil.initTableMapJob(table, columns, mapper, Text.class,Text.class, job);
    }
 
    public JobConf createSubmittableJob(String[] args) throws IOException {
       JobConf c = new JobConf(getConf(), GetDataFromHbaseTest.class);
       // Columns are space delimited
       StringBuilder sb = new StringBuilder();
       final int columnoffset = 2;
       for (int i = columnoffset; i < args.length; i++) {
           if (i > columnoffset) {
               sb.append(" ");
             }
           sb.append(args[i]);
       }
       c.setJobName(NAME);
       c.setNumReduceTasks(0);
       this.initJob(args[1], sb.toString(), GetDataFromHbaseTest.class, c);
       FileOutputFormat.setOutputPath(c, new Path(args[0]));
       // Second argument is the table name.
       // First arg is the output directory.
       return c;
    }
 
    static int printUsage() {
       /**
        * outputdir:为map处理结果 在hdfs的存放路径
        * input tablename:为需要处理的hbase表名
        * column:相当于表的字段名
        */
       System.out.println(NAME + " <outputdir> <input tablename> <column1> [<column2>...]");
       return -1;
    }
 
    public int run(final String[] args) throws Exception {
       // Make sure there are at least 3 parameters
       if (args.length < 2) {
           System.err.println("ERROR: Wrong number of parameters: " + args.length);
           return printUsage();
       }
       JobClient.runJob(createSubmittableJob(args));
       return 0;
    }
 
    public Configuration getConf() {
       return this.conf;
    }
 
    public void setConf(final Configuration c) {
       this.conf = c;
    }
 
    public static void main(String[] args) throws Exception {
       int errCode = ToolRunner.run(new HBaseConfiguration(),new GetDataFromHbaseTest(), args);
       System.exit(errCode);
    }
 
}