过滤器查询

引言:过滤器的类型很多,但是可以分为两大类——比较过滤器,专用过滤器

过滤器的作用是在服务端判断数据是否满足条件,然后只将满足条件的数据返回给客户端;

hbase过滤器的比较运算符:

LESS  <
LESS_OR_EQUAL <=
EQUAL =
NOT_EQUAL <>
GREATER_OR_EQUAL >=
GREATER >
NO_OP 排除所有

Hbase过滤器的比较器(指定比较机制):

BinaryComparator  按字节索引顺序比较指定字节数组,采用Bytes.compareTo(byte[])
BinaryPrefixComparator 跟前面相同,只是比较左端的数据是否相同
NullComparator 判断给定的是否为空
BitComparator 按位比较
RegexStringComparator 提供一个正则的比较器,仅支持 EQUAL 和非EQUAL
SubstringComparator 判断提供的子串是否出现在value中。

Hbase的过滤器分类

  • 比较过滤器

1.1  行键过滤器RowFilter

Filter filter1 = new RowFilter(CompareOp.LESS_OR_EQUAL, new BinaryComparator(Bytes.toBytes("row-22"))); 
scan.setFilter(filter1);

1.2  列族过滤器FamilyFilter

Filter filter1 = new FamilyFilter(CompareFilter.CompareOp.LESS, new BinaryComparator(Bytes.toBytes("colfam3")));
scan.setFilter(filter1);

1.3 列过滤器QualifierFilter

filter = new QualifierFilter(CompareFilter.CompareOp.LESS_OR_EQUAL, new BinaryComparator(Bytes.toBytes("col-2")));
scan.setFilter(filter1);

1.4 值过滤器 ValueFilter 

Filter filter = new ValueFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator(".4") ); 
scan.setFilter(filter1);
  • 专用过滤器

2.1 单列值过滤器 SingleColumnValueFilter  ----会返回满足条件的整行

SingleColumnValueFilter filter = new SingleColumnValueFilter( 
    Bytes.toBytes("colfam1"), 
    Bytes.toBytes("col-5"), 
    CompareFilter.CompareOp.NOT_EQUAL, 
    new SubstringComparator("val-5")); 
filter.setFilterIfMissing(true);  //如果不设置为true,则那些不包含指定column的行也会返回
scan.setFilter(filter1);

2.2  SingleColumnValueExcludeFilter

与上相反

2.3 前缀过滤器 PrefixFilter----针对行键

Filter filter = new PrefixFilter(Bytes.toBytes("row1")); 
scan.setFilter(filter1);

2.4 列前缀过滤器 ColumnPrefixFilter

Filter filter = new ColumnPrefixFilter(Bytes.toBytes("qual2")); 
scan.setFilter(filter1);

2.4分页过滤器 PageFilter

public static void main(String[] args) throws Exception {
              Configuration conf = HBaseConfiguration.create();
              conf.set("hbase.zookeeper.quorum", "spark01:2181,spark02:2181,spark03:2181");
              String tableName = "testfilter"; 
              String cfName = "f1"; 
              final byte[] POSTFIX = new byte[] { 0x00 }; 
              HTable table = new HTable(conf, tableName); 
              Filter filter = new PageFilter(3); 
              byte[] lastRow = null; 
              int totalRows = 0; 
              while (true) { 
                  Scan scan = new Scan(); 
                  scan.setFilter(filter); 
                  if(lastRow != null){ 
                //注意这里添加了POSTFIX操作,用来重置扫描边界 
                      byte[] startRow = Bytes.add(lastRow,POSTFIX); 
                      scan.setStartRow(startRow); 
                  } 
                  ResultScanner scanner = table.getScanner(scan); 
                  int localRows = 0; 
                  Result result; 
                  while((result = scanner.next()) != null){ 
                      System.out.println(localRows++ + ":" + result); 
                      totalRows ++; 
                      lastRow = result.getRow(); 
                  } 
                  scanner.close(); 
                  if(localRows == 0) break; 
              } 
              System.out.println("total rows:" + totalRows); 
       }
/**
     * 多种过滤条件的使用方法
        * @throws Exception
        */
       @Test
       public void testScan() throws Exception{
              HTable table = new HTable(conf, "person_info".getBytes());
              Scan scan = new Scan(Bytes.toBytes("person_rk_bj_zhang_000001"), Bytes.toBytes("person_rk_bj_zhang_000002"));
        //前缀过滤器----针对行键
              Filter filter = new PrefixFilter(Bytes.toBytes("rk"));
        //行过滤器  ---针对行键
              ByteArrayComparable rowComparator = new BinaryComparator(Bytes.toBytes("person_rk_bj_zhang_000001"));
              RowFilter rf = new RowFilter(CompareOp.LESS_OR_EQUAL, rowComparator);
              /**
         * 假设rowkey格式为:创建日期_发布日期_ID_TITLE
         * 目标:查找  发布日期  为  2014-12-21  的数据
         * sc.textFile("path").flatMap(line=>line.split("\t")).map(x=>(x,1)).reduceByKey(_+_).map((_(2),_(1))).sortByKey().map((_(2),_(1))).saveAsTextFile("")
         *
         *
         */
        rf = new RowFilter(CompareOp.EQUAL , new SubstringComparator("_2014-12-21_"));
        //单值过滤器1完整匹配字节数组
              new SingleColumnValueFilter("base_info".getBytes(), "name".getBytes(), CompareOp.EQUAL, "zhangsan".getBytes());
        //单值过滤器2 匹配正则表达式
              ByteArrayComparable comparator = new RegexStringComparator("zhang.");
              new SingleColumnValueFilter("info".getBytes(), "NAME".getBytes(), CompareOp.EQUAL, comparator);
        //单值过滤器3匹配是否包含子串,大小写不敏感
              comparator = new SubstringComparator("wu");
              new SingleColumnValueFilter("info".getBytes(), "NAME".getBytes(), CompareOp.EQUAL, comparator);
        //键值对元数据过滤-----family过滤----字节数组完整匹配
        FamilyFilter ff = new FamilyFilter(
                CompareOp.EQUAL ,
                new BinaryComparator(Bytes.toBytes("base_info"))   //表中不存在inf列族,过滤结果为空
                );
        //键值对元数据过滤-----family过滤----字节数组前缀匹配
        ff = new FamilyFilter(
                CompareOp.EQUAL ,
                new BinaryPrefixComparator(Bytes.toBytes("inf"))   //表中存在以inf打头的列族info,过滤结果为该列族所有行
                );
       //键值对元数据过滤-----qualifier过滤----字节数组完整匹配
        filter = new QualifierFilter(
                CompareOp.EQUAL ,
                new BinaryComparator(Bytes.toBytes("na"))   //表中不存在na列,过滤结果为空
                );
        filter = new QualifierFilter(
                CompareOp.EQUAL ,
                new BinaryPrefixComparator(Bytes.toBytes("na"))   //表中存在以na打头的列name,过滤结果为所有行的该列数据
                      );
        //基于列名(即Qualifier)前缀过滤数据的ColumnPrefixFilter
        filter = new ColumnPrefixFilter("na".getBytes());
        //基于列名(即Qualifier)多个前缀过滤数据的MultipleColumnPrefixFilter
        byte[][] prefixes = new byte[][] {Bytes.toBytes("na"), Bytes.toBytes("me")};
        filter = new MultipleColumnPrefixFilter(prefixes);
        //为查询设置过滤条件
        scan.setFilter(filter);
              scan.addFamily(Bytes.toBytes("base_info"));
        //一行
//            Result result = table.get(get);
        //多行的数据
              ResultScanner scanner = table.getScanner(scan);
              for(Result r : scanner){
                     /**
                     for(KeyValue kv : r.list()){
                            String family = new String(kv.getFamily());
                            System.out.println(family);
                            String qualifier = new String(kv.getQualifier());
                            System.out.println(qualifier);
                            System.out.println(new String(kv.getValue()));
                     }
                     */
            //直接从result中取到某个特定的value
                     byte[] value = r.getValue(Bytes.toBytes("base_info"), Bytes.toBytes("name"));
                     System.out.println(new String(value));
              }
              table.close();
       }