简介
Hbase提供的专用过滤器直接继承自FilterBase,其中一些过滤器只能做行筛选,因此只适合于scan操作,对get操作这些过滤器限制的更苛刻:要么包含整行,要么什么都不包括。
过滤器
1. 单列值过滤器(SingleColumnValueFilter):用一列的值决定是否一行数据被过滤。
public void singleColumnValueFilter() throws IOException {
SingleColumnValueFilter filter = new SingleColumnValueFilter(
Bytes.toBytes("family"), Bytes.toBytes("qualifier"), CompareOp.LESS_OR_EQUAL,
new BinaryComparator(Bytes.toBytes("loe")));
filter.setFilterIfMissing(true); // 所有不包含参考列的行都可以被过滤掉,默认这一行包含在结果中
HConnection hConnection = HConnectionManager.createConnection(conf);
HTableInterface htable = hConnection.getTable(tableName);
Scan scan = new Scan();
scan.setFilter(filter);
ResultScanner scanner = htable.getScanner(scan);
for (Result res : scanner) {
for (Cell cell : res.rawCells()) {
System.out.println("Rowkey : " + Bytes.toString(res.getRow())
+ " Familiy:Quilifier : " + Bytes.toString(CellUtil.cloneQualifier(cell))
+ " Value : " + Bytes.toString(CellUtil.cloneValue(cell))
);
}
}
scanner.close();
Get get = new Get(Bytes.toBytes("10086"));
get.setFilter(filter);
Result result = htable.get(get);
System.out.println("Result of get(): " + result);
for (Cell cell : result.rawCells()) {
System.out.println("Rowkey : " + Bytes.toString(result.getRow())
+ " Familiy:Quilifier : " + Bytes.toString(CellUtil.cloneQualifier(cell))
+ " Value : " + Bytes.toString(CellUtil.cloneValue(cell))
);
}
htable.close();
}
public void singleColumnValueFilter() throws IOException {
SingleColumnValueFilter filter = new SingleColumnValueFilter(
Bytes.toBytes("family"), Bytes.toBytes("qualifier"), CompareOp.LESS_OR_EQUAL,
new BinaryComparator(Bytes.toBytes("loe")));
filter.setFilterIfMissing(true); // 所有不包含参考列的行都可以被过滤掉,默认这一行包含在结果中
HConnection hConnection = HConnectionManager.createConnection(conf);
HTableInterface htable = hConnection.getTable(tableName);
Scan scan = new Scan();
scan.setFilter(filter);
ResultScanner scanner = htable.getScanner(scan);
for (Result res : scanner) {
for (Cell cell : res.rawCells()) {
System.out.println("Rowkey : " + Bytes.toString(res.getRow())
+ " Familiy:Quilifier : " + Bytes.toString(CellUtil.cloneQualifier(cell))
+ " Value : " + Bytes.toString(CellUtil.cloneValue(cell))
);
}
}
scanner.close();
Get get = new Get(Bytes.toBytes("10086"));
get.setFilter(filter);
Result result = htable.get(get);
System.out.println("Result of get(): " + result);
for (Cell cell : result.rawCells()) {
System.out.println("Rowkey : " + Bytes.toString(result.getRow())
+ " Familiy:Quilifier : " + Bytes.toString(CellUtil.cloneQualifier(cell))
+ " Value : " + Bytes.toString(CellUtil.cloneValue(cell))
);
}
htable.close();
}
2. 单列排除过滤器(SingleColumnValueExcludeFilter):
该过滤器继承SingleColumnValueFilter,参考列不会包含在结果中
3. 前缀过滤器(PrefixFilter):
所用与前缀匹配的行都会被返回。扫描操作以字典序查找,当遇到比前缀大的行时,扫描结束。此过滤器对get方法作用不大。
public void prefixFilter() throws IOException {
Filter filter = new PrefixFilter(Bytes.toBytes("10086"));
HConnection hConnection = HConnectionManager.createConnection(conf);
HTableInterface htable = hConnection.getTable(tableName);
Scan scan = new Scan();
scan.setFilter(filter);
ResultScanner scanner = htable.getScanner(scan);
for (Result res : scanner) {
for (Cell cell : res.rawCells()) {
System.out.println("Rowkey : " + Bytes.toString(res.getRow())
+ " Familiy:Quilifier : " + Bytes.toString(CellUtil.cloneQualifier(cell))
+ " Value : " + Bytes.toString(CellUtil.cloneValue(cell))
);
}
}
scanner.close();
htable.close();
// 此过滤器对get()方法作用不大
}
public void prefixFilter() throws IOException {
Filter filter = new PrefixFilter(Bytes.toBytes("10086"));
HConnection hConnection = HConnectionManager.createConnection(conf);
HTableInterface htable = hConnection.getTable(tableName);
Scan scan = new Scan();
scan.setFilter(filter);
ResultScanner scanner = htable.getScanner(scan);
for (Result res : scanner) {
for (Cell cell : res.rawCells()) {
System.out.println("Rowkey : " + Bytes.toString(res.getRow())
+ " Familiy:Quilifier : " + Bytes.toString(CellUtil.cloneQualifier(cell))
+ " Value : " + Bytes.toString(CellUtil.cloneValue(cell))
);
}
}
scanner.close();
htable.close();
// 此过滤器对get()方法作用不大
}
4. 分页过滤器(PageFilter): 对结果按行分页。
public void pageFilter() throws IOException {
Filter filter = new PageFilter(4);
int totalRows = 0;
byte[] lastRow = null;
byte[] POSTFIX = new byte[0];
HConnection hConnection = HConnectionManager.createConnection(conf);
HTableInterface htable = hConnection.getTable(tableName);
while (true) {
Scan scan = new Scan();
scan.setFilter(filter);
if (lastRow != null) {
byte[] startRow = Bytes.add(lastRow, POSTFIX);
System.out.println("start row: " + Bytes.toString(startRow));
scan.setStartRow(startRow);
}
ResultScanner scanner = htable.getScanner(scan);
int localRows = 0;
Result result;
while ((result = scanner.next()) != null) {
System.out.println(localRows++ + ": " + result);
totalRows++;
lastRow = result.getRow();
}
scanner.close();
if (localRows == 0)
break;
}
htable.close();
System.out.println("total rows: " + totalRows);
}
public void pageFilter() throws IOException {
Filter filter = new PageFilter(4);
int totalRows = 0;
byte[] lastRow = null;
byte[] POSTFIX = new byte[0];
HConnection hConnection = HConnectionManager.createConnection(conf);
HTableInterface htable = hConnection.getTable(tableName);
while (true) {
Scan scan = new Scan();
scan.setFilter(filter);
if (lastRow != null) {
byte[] startRow = Bytes.add(lastRow, POSTFIX);
System.out.println("start row: " + Bytes.toString(startRow));
scan.setStartRow(startRow);
}
ResultScanner scanner = htable.getScanner(scan);
int localRows = 0;
Result result;
while ((result = scanner.next()) != null) {
System.out.println(localRows++ + ": " + result);
totalRows++;
lastRow = result.getRow();
}
scanner.close();
if (localRows == 0)
break;
}
htable.close();
System.out.println("total rows: " + totalRows);
}
5. 行键过滤器(KeyOnlyFilter):
只需要将结果中KeyValue实例的键返回,不需要返回实际的数据。
6. 首次行键过滤器(FirstKeyOnlyFilter):
只需要访问一行中的第一列。该过滤器常用在行数统计。
7. 包含结束的过滤器(InclusiveStopFilter):
开始行被包含在结果中,但终止行被排斥在外,使用这个过滤器,也可以将结束行包含在结果中。
public void inclusiveStopFilter() throws IOException {
HConnection hConnection = HConnectionManager.createConnection(conf);
HTableInterface htable = hConnection.getTable(tableName);
Filter filter = new InclusiveStopFilter(Bytes.toBytes("10004"));
Scan scan = new Scan();
scan.setStartRow(Bytes.toBytes("10001"));
scan.setFilter(filter);
ResultScanner scanner = htable.getScanner(scan);
for (Result res : scanner) {
System.out.println(res);
}
htable.close();
}
public void inclusiveStopFilter() throws IOException {
HConnection hConnection = HConnectionManager.createConnection(conf);
HTableInterface htable = hConnection.getTable(tableName);
Filter filter = new InclusiveStopFilter(Bytes.toBytes("10004"));
Scan scan = new Scan();
scan.setStartRow(Bytes.toBytes("10001"));
scan.setFilter(filter);
ResultScanner scanner = htable.getScanner(scan);
for (Result res : scanner) {
System.out.println(res);
}
htable.close();
}
8.时间戳过滤器(TimestampsFilter):
需要在扫描结果中对版本进行细粒度控制。一个版本是指一个列在一个特定时间的值。
public void timestampsFilter() throws IOException {
List<Long> ts = new ArrayList<Long>();
ts.add(new Long(5));
ts.add(new Long(10));
ts.add(new Long(15));
HConnection hConnection = HConnectionManager.createConnection(conf);
HTableInterface htable = hConnection.getTable(tableName);
Filter filter = new TimestampsFilter(ts);
Scan scan = new Scan();
scan.setFilter(filter);
ResultScanner scanner = htable.getScanner(scan);
for (Result res : scanner) {
System.out.println(res);
}
scanner.close();
Scan scan2 = new Scan();
scan2.setFilter(filter);
scan2.setTimeRange(8, 12);
ResultScanner scanner2 = htable.getScanner(scan2);
for (Result res : scanner2) {
System.out.println(res);
}
scanner2.close();
htable.close();
}
public void timestampsFilter() throws IOException {
List<Long> ts = new ArrayList<Long>();
ts.add(new Long(5));
ts.add(new Long(10));
ts.add(new Long(15));
HConnection hConnection = HConnectionManager.createConnection(conf);
HTableInterface htable = hConnection.getTable(tableName);
Filter filter = new TimestampsFilter(ts);
Scan scan = new Scan();
scan.setFilter(filter);
ResultScanner scanner = htable.getScanner(scan);
for (Result res : scanner) {
System.out.println(res);
}
scanner.close();
Scan scan2 = new Scan();
scan2.setFilter(filter);
scan2.setTimeRange(8, 12);
ResultScanner scanner2 = htable.getScanner(scan2);
for (Result res : scanner2) {
System.out.println(res);
}
scanner2.close();
htable.close();
}
9.列计数过滤器(ColumnCountGetFilter):
限制每行最多取回多少列。设置ColumnCountGetFilter(int n),它不适合扫描操作,更适合get方法。
10.列分页过滤器(ColumnPaginationFilter):
可以对一行中所有列进行分页。ColumnPaginationFilter(int limit, int offset),跳过所有偏移量小于offset的列,并包含之前所有偏移量在limit之前的列。
public void columnPaginationFilter() throws IOException {
HConnection hConnection = HConnectionManager.createConnection(conf);
HTableInterface htable = hConnection.getTable(tableName);
Filter filter = new ColumnPaginationFilter(2, 3);
Scan scan = new Scan();
scan.setFilter(filter);
ResultScanner scanner = htable.getScanner(scan);
for (Result res : scanner) {
System.out.println(res);
}
scanner.close();
htable.close();
}
public void columnPaginationFilter() throws IOException {
HConnection hConnection = HConnectionManager.createConnection(conf);
HTableInterface htable = hConnection.getTable(tableName);
Filter filter = new ColumnPaginationFilter(2, 3);
Scan scan = new Scan();
scan.setFilter(filter);
ResultScanner scanner = htable.getScanner(scan);
for (Result res : scanner) {
System.out.println(res);
}
scanner.close();
htable.close();
}
11.列前缀过滤器(ColumnPrefixFilter):
对列名称前缀进行匹配。
12. 随机行过滤器(RandomRowFilter):
可以让结果中包含随机行。RandomRowFilter(float chance) Chance在0~1之间。