Hbase 根据某一列尽心过滤 hbase列值过滤器

转载

mob6454cc6ba5a5 2024-06-14 22:59:05

文章标签 Hbase 根据某一列尽心过滤大数据 System 分页偏移量 文章分类 Hbase 数据库

简介

Hbase提供的专用过滤器直接继承自FilterBase，其中一些过滤器只能做行筛选，因此只适合于scan操作，对get操作这些过滤器限制的更苛刻：要么包含整行，要么什么都不包括。

过滤器

1. 单列值过滤器（SingleColumnValueFilter）：用一列的值决定是否一行数据被过滤。

public void singleColumnValueFilter() throws IOException {
		SingleColumnValueFilter filter = new SingleColumnValueFilter(
				Bytes.toBytes("family"), Bytes.toBytes("qualifier"), CompareOp.LESS_OR_EQUAL, 
				new BinaryComparator(Bytes.toBytes("loe")));
		filter.setFilterIfMissing(true); // 所有不包含参考列的行都可以被过滤掉，默认这一行包含在结果中

		HConnection hConnection = HConnectionManager.createConnection(conf);
		HTableInterface htable = hConnection.getTable(tableName);
		Scan scan = new Scan();
		scan.setFilter(filter);
		ResultScanner scanner = htable.getScanner(scan);
		for (Result res : scanner) {
			for (Cell cell : res.rawCells()) {
				System.out.println("Rowkey : " + Bytes.toString(res.getRow())
						+ "   Familiy:Quilifier : " + Bytes.toString(CellUtil.cloneQualifier(cell))
						+ "   Value : " + Bytes.toString(CellUtil.cloneValue(cell))

				);
			}
		}
		scanner.close();

		Get get = new Get(Bytes.toBytes("10086"));
		get.setFilter(filter);
		Result result = htable.get(get);
		System.out.println("Result of get(): " + result);
		for (Cell cell : result.rawCells()) {
			System.out.println("Rowkey : " + Bytes.toString(result.getRow()) 
					+ "   Familiy:Quilifier : " + Bytes.toString(CellUtil.cloneQualifier(cell)) 
					+ "   Value : " + Bytes.toString(CellUtil.cloneValue(cell))
			);
		}
		htable.close();
	}
        public void singleColumnValueFilter() throws IOException {
		SingleColumnValueFilter filter = new SingleColumnValueFilter(
				Bytes.toBytes("family"), Bytes.toBytes("qualifier"), CompareOp.LESS_OR_EQUAL, 
				new BinaryComparator(Bytes.toBytes("loe")));
		filter.setFilterIfMissing(true); // 所有不包含参考列的行都可以被过滤掉，默认这一行包含在结果中

		HConnection hConnection = HConnectionManager.createConnection(conf);
		HTableInterface htable = hConnection.getTable(tableName);
		Scan scan = new Scan();
		scan.setFilter(filter);
		ResultScanner scanner = htable.getScanner(scan);
		for (Result res : scanner) {
			for (Cell cell : res.rawCells()) {
				System.out.println("Rowkey : " + Bytes.toString(res.getRow())
						+ "   Familiy:Quilifier : " + Bytes.toString(CellUtil.cloneQualifier(cell))
						+ "   Value : " + Bytes.toString(CellUtil.cloneValue(cell))

				);
			}
		}
		scanner.close();

		Get get = new Get(Bytes.toBytes("10086"));
		get.setFilter(filter);
		Result result = htable.get(get);
		System.out.println("Result of get(): " + result);
		for (Cell cell : result.rawCells()) {
			System.out.println("Rowkey : " + Bytes.toString(result.getRow()) 
					+ "   Familiy:Quilifier : " + Bytes.toString(CellUtil.cloneQualifier(cell)) 
					+ "   Value : " + Bytes.toString(CellUtil.cloneValue(cell))
			);
		}
		htable.close();
	}

2. 单列排除过滤器（SingleColumnValueExcludeFilter）:

该过滤器继承SingleColumnValueFilter，参考列不会包含在结果中

3. 前缀过滤器（PrefixFilter）:

所用与前缀匹配的行都会被返回。扫描操作以字典序查找，当遇到比前缀大的行时，扫描结束。此过滤器对get方法作用不大。

public void prefixFilter() throws IOException {
		Filter filter = new PrefixFilter(Bytes.toBytes("10086"));

		HConnection hConnection = HConnectionManager.createConnection(conf);
		HTableInterface htable = hConnection.getTable(tableName);
		Scan scan = new Scan();
		scan.setFilter(filter);
		ResultScanner scanner = htable.getScanner(scan);
		for (Result res : scanner) {
			for (Cell cell : res.rawCells()) {
				System.out.println("Rowkey : " + Bytes.toString(res.getRow()) 
					+ "   Familiy:Quilifier : " + Bytes.toString(CellUtil.cloneQualifier(cell))
					+ "   Value : " + Bytes.toString(CellUtil.cloneValue(cell))
				);
			}
		}
		scanner.close();
		htable.close();
		// 此过滤器对get（）方法作用不大
	}
public void prefixFilter() throws IOException {
		Filter filter = new PrefixFilter(Bytes.toBytes("10086"));

		HConnection hConnection = HConnectionManager.createConnection(conf);
		HTableInterface htable = hConnection.getTable(tableName);
		Scan scan = new Scan();
		scan.setFilter(filter);
		ResultScanner scanner = htable.getScanner(scan);
		for (Result res : scanner) {
			for (Cell cell : res.rawCells()) {
				System.out.println("Rowkey : " + Bytes.toString(res.getRow()) 
					+ "   Familiy:Quilifier : " + Bytes.toString(CellUtil.cloneQualifier(cell))
					+ "   Value : " + Bytes.toString(CellUtil.cloneValue(cell))
				);
			}
		}
		scanner.close();
		htable.close();
		// 此过滤器对get（）方法作用不大
	}

4. 分页过滤器（PageFilter）: 对结果按行分页。

public void pageFilter() throws IOException {
		Filter filter = new PageFilter(4);
		int totalRows = 0;
		byte[] lastRow = null;
		byte[] POSTFIX = new byte[0];
		
		HConnection hConnection = HConnectionManager.createConnection(conf);
		HTableInterface htable = hConnection.getTable(tableName);
		while (true) {
			Scan scan = new Scan();
			scan.setFilter(filter);
			if (lastRow != null) {
				byte[] startRow = Bytes.add(lastRow, POSTFIX);
				System.out.println("start row: " + Bytes.toString(startRow));
				scan.setStartRow(startRow);
			}
			ResultScanner scanner = htable.getScanner(scan);
			int localRows = 0;
			Result result;
			while ((result = scanner.next()) != null) {
				System.out.println(localRows++ + ": " + result);
				totalRows++;
				lastRow = result.getRow();
			}
			scanner.close();
			if (localRows == 0)
				break;
		}
		htable.close();
		System.out.println("total rows: " + totalRows);
	}
public void pageFilter() throws IOException {
		Filter filter = new PageFilter(4);
		int totalRows = 0;
		byte[] lastRow = null;
		byte[] POSTFIX = new byte[0];
		
		HConnection hConnection = HConnectionManager.createConnection(conf);
		HTableInterface htable = hConnection.getTable(tableName);
		while (true) {
			Scan scan = new Scan();
			scan.setFilter(filter);
			if (lastRow != null) {
				byte[] startRow = Bytes.add(lastRow, POSTFIX);
				System.out.println("start row: " + Bytes.toString(startRow));
				scan.setStartRow(startRow);
			}
			ResultScanner scanner = htable.getScanner(scan);
			int localRows = 0;
			Result result;
			while ((result = scanner.next()) != null) {
				System.out.println(localRows++ + ": " + result);
				totalRows++;
				lastRow = result.getRow();
			}
			scanner.close();
			if (localRows == 0)
				break;
		}
		htable.close();
		System.out.println("total rows: " + totalRows);
	}

5. 行键过滤器（KeyOnlyFilter）:

只需要将结果中KeyValue实例的键返回，不需要返回实际的数据。

6. 首次行键过滤器（FirstKeyOnlyFilter）:

只需要访问一行中的第一列。该过滤器常用在行数统计。

7. 包含结束的过滤器（InclusiveStopFilter）:

开始行被包含在结果中，但终止行被排斥在外，使用这个过滤器，也可以将结束行包含在结果中。

public void inclusiveStopFilter() throws IOException {
		HConnection hConnection = HConnectionManager.createConnection(conf);
		HTableInterface htable = hConnection.getTable(tableName);

		Filter filter = new InclusiveStopFilter(Bytes.toBytes("10004"));
		Scan scan = new Scan();
		scan.setStartRow(Bytes.toBytes("10001"));
		scan.setFilter(filter);
		ResultScanner scanner = htable.getScanner(scan);
		for (Result res : scanner) {
			System.out.println(res);
		}
		htable.close();
	}
public void inclusiveStopFilter() throws IOException {
		HConnection hConnection = HConnectionManager.createConnection(conf);
		HTableInterface htable = hConnection.getTable(tableName);

		Filter filter = new InclusiveStopFilter(Bytes.toBytes("10004"));
		Scan scan = new Scan();
		scan.setStartRow(Bytes.toBytes("10001"));
		scan.setFilter(filter);
		ResultScanner scanner = htable.getScanner(scan);
		for (Result res : scanner) {
			System.out.println(res);
		}
		htable.close();
	}

8.时间戳过滤器（TimestampsFilter）:

需要在扫描结果中对版本进行细粒度控制。一个版本是指一个列在一个特定时间的值。

public void timestampsFilter() throws IOException {
		List<Long> ts = new ArrayList<Long>();
		ts.add(new Long(5));
		ts.add(new Long(10));
		ts.add(new Long(15));

		HConnection hConnection = HConnectionManager.createConnection(conf);
		HTableInterface htable = hConnection.getTable(tableName);

		Filter filter = new TimestampsFilter(ts);
		Scan scan = new Scan();
		scan.setFilter(filter);
		ResultScanner scanner = htable.getScanner(scan);
		for (Result res : scanner) {
			System.out.println(res);
		}
		scanner.close();

		Scan scan2 = new Scan();
		scan2.setFilter(filter);
		scan2.setTimeRange(8, 12);
		ResultScanner scanner2 = htable.getScanner(scan2);
		for (Result res : scanner2) {
			System.out.println(res);
		}
		scanner2.close();
		htable.close();
	}
public void timestampsFilter() throws IOException {
		List<Long> ts = new ArrayList<Long>();
		ts.add(new Long(5));
		ts.add(new Long(10));
		ts.add(new Long(15));

		HConnection hConnection = HConnectionManager.createConnection(conf);
		HTableInterface htable = hConnection.getTable(tableName);

		Filter filter = new TimestampsFilter(ts);
		Scan scan = new Scan();
		scan.setFilter(filter);
		ResultScanner scanner = htable.getScanner(scan);
		for (Result res : scanner) {
			System.out.println(res);
		}
		scanner.close();

		Scan scan2 = new Scan();
		scan2.setFilter(filter);
		scan2.setTimeRange(8, 12);
		ResultScanner scanner2 = htable.getScanner(scan2);
		for (Result res : scanner2) {
			System.out.println(res);
		}
		scanner2.close();
		htable.close();
	}

9.列计数过滤器（ColumnCountGetFilter）:

限制每行最多取回多少列。设置ColumnCountGetFilter(int n)，它不适合扫描操作，更适合get方法。

10.列分页过滤器（ColumnPaginationFilter）:

可以对一行中所有列进行分页。ColumnPaginationFilter（int limit, int offset），跳过所有偏移量小于offset的列，并包含之前所有偏移量在limit之前的列。

public void columnPaginationFilter() throws IOException {
		HConnection hConnection = HConnectionManager.createConnection(conf);
		HTableInterface htable = hConnection.getTable(tableName);

		Filter filter = new ColumnPaginationFilter(2, 3);
		Scan scan = new Scan();
		scan.setFilter(filter);
		ResultScanner scanner = htable.getScanner(scan);
		for (Result res : scanner) {
			System.out.println(res);
		}
		scanner.close();
		htable.close();
	}
public void columnPaginationFilter() throws IOException {
		HConnection hConnection = HConnectionManager.createConnection(conf);
		HTableInterface htable = hConnection.getTable(tableName);

		Filter filter = new ColumnPaginationFilter(2, 3);
		Scan scan = new Scan();
		scan.setFilter(filter);
		ResultScanner scanner = htable.getScanner(scan);
		for (Result res : scanner) {
			System.out.println(res);
		}
		scanner.close();
		htable.close();
	}