标签(空格分隔): hadoop
hbase的操作
hbase的shell交互操作(常用操作)
入口: $HBASE_HOME/bin/hbase shell
退出: > quit
定义: >create ‘表名’, ‘列族名1’,‘列族名2’,‘列族名N’
例: create ‘people’,‘student’,‘businessman’ 创建一个人的表 列族有学生和商人。
list # 查询所有表
desc ‘people’ 查看表结构
exists ‘people’ 查看表是否存在
is_enabled ‘people’ 启用表
is_disabled ‘people’ 禁用表
#添加修改记录(如果rowkey和列族列名都一致则后者覆盖前者,但前者版本记录还是会有保留默认版本为时间戳)
put ‘表名’, ‘rowKey’, ‘列族 : 列’ , ‘值’
put ‘people’,‘stu_123’,‘student:name’,‘zhangsan’
put ‘people’,‘stu_123’,‘student:age’,‘10’
put ‘people’,‘stu_123’,‘businessman:name’,‘lisi’
put ‘people’,‘stu_1234’,‘student:name’,‘wangwu’
查询记录
get ‘表名’ , ‘rowKey’ # 获取rowkey下的所有数据
get ‘people’,‘stu_123’
count ‘表名’ 注意这里是1条,以rowkey作为条数,虽然是列式的
get ‘表名’,‘rowkey’,‘列族’ # 获取某个列族
get ‘people’,‘stu_123’,‘student’
get ‘表名’,‘rowkey’,‘列族:列’ # 获取某个列 由此可以看出要获取的越详细其实就是在后面的多用,间隔子结构即可
get ‘people’,‘stu_123’,‘student:name’
查询整张表记录
scan “表名”
查看某个表某个列中所有数据
scan “表名” , {COLUMNS=>‘列族名:列名’}
删除记录
delete ‘表名’ ,‘行名’ , ‘列族:列’
delete ‘people’,‘stu_123’,‘student:name’
deleteall ‘表名’,‘rowkey’ 删除郑航
deleteall ‘people’,‘stu_123’
删除整张表
先要屏蔽该表,才能对该表进行删除
第一步 disable ‘表名’ ,第二步 drop ‘表名’
清空表:
truncate ‘表名’
hbase的api操作
首先我们来介绍一下相关的api,让我们大体上有个了解,之后再根据这些api来做一下hbase的相关操作。
套路一:配置项
HBaseConfiguration与Hdfs的操作一样需要配置类作为操作的相关配置。
之后是hbase的核心操作类
HBaseAdmin 该类提供接口提供hbase的相关操作入口
HTableDescriptor 表描述类,包含表和列族的信息,用于表设计
既然有表描述类了就必须要有列族信息
HColumnDescriptor 这个是用于做列族信息的定义类
以下是做的一些相关hbase操作的javaapi,总结一下就是与命令行是一一对应的关系,在方法和类的命名上。还有如果涉及到定义的话用HbaseAdmin如果涉及到表的数据操作的话用Table类操作。
package com.dinfo.hbase;
import org.apache.curator.framework.state.ConnectionStateManager;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* Created by: luo
* date: 2019/7/15.
* desc:
*/
public class HBaseApiOps {
Configuration conf = null;
Table table = null;//用于做相关表操作的信息
Connection conn =null;
@Before
public void before() throws IOException {
conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "hadoop1,hadoop2,hadoop3");
conf.set("hbase.zookeeper.property.clientPort", "2181");
conn = ConnectionFactory.createConnection(conf);
table = conn.getTable(TableName.valueOf("user"));
}
@Test
public void testCreateTable() throws IOException {
Admin admin = conn.getAdmin();
TableName tableName = TableName.valueOf("user"); // 表名称
HTableDescriptor desc = new HTableDescriptor(tableName);
// 创建列族的描述类
HColumnDescriptor family = new HColumnDescriptor("info"); // 列族
// 将列族添加到表中
desc.addFamily(family);
HColumnDescriptor family2 = new HColumnDescriptor("info2"); // 列族
// 将列族添加到表中
desc.addFamily(family2);
admin.createTable(desc);
}
@Test
public void testDropTable() throws IOException {
Admin admin = conn.getAdmin();
TableName tName = TableName.valueOf("user");
admin.disableTable(tName);
admin.deleteTable(tName);
}
/**
* 插入 前面定义我们用了hadmin 这里我们dml的时候直接用table就可了
* @throws IOException
*/
@Test
public void testInsertOrUpdate() throws IOException {
Put put = new Put(Bytes.toBytes("u_1234"));
put.add(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes("zhangsan"));
table.put(put);
}
@Test
public void batchInsert() throws IOException {
List<Put> puts = new ArrayList<>();
for(int i =0;i<10;i++){
Put put = new Put(Bytes.toBytes("u_123"+i));
put.add(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes("zhangsan" + i));
put.add(Bytes.toBytes("info"), Bytes.toBytes("age"), Bytes.toBytes(i+10));
puts.add(put);
}
table.put(puts);
}
/**
* 删除一行rowkey
* @throws IOException
*/
@Test
public void testDel() throws IOException {
//从上面可以知道 table的操作还是比较好记的 用table调用put传put对象 delte传delete对象 scan传scan对象我们试下del
Delete delete = new Delete(Bytes.toBytes("u_1230"));
table.delete(delete);
table.flushCommits();
}
@Test
public void testDelColumnFamily() throws IOException {
//删除某个列
Delete del = new Delete(Bytes.toBytes("u_1232"));
del.addColumn(Bytes.toBytes("info"),Bytes.toBytes("name"));
table.delete(del);
}
/**
* 以下为相关查询的api
*/
//单条查询
@Test
public void testGet() throws IOException {
//这里对照命令行的get
Get get = new Get(Bytes.toBytes("u_1233"));
Result result = table.get(get);
Cell[] cells = result.rawCells();
for(Cell ce:cells){
System.out.println("row:"+ new String(ce.getRowArray(),ce.getRowOffset(),ce.getRowLength()));
System.out.println("family:"+ new String(ce.getFamilyArray(),ce.getFamilyOffset(),ce.getFamilyLength()));
String qualifie = new String(ce.getQualifierArray(),ce.getQualifierOffset(),ce.getQualifierLength());
System.out.println("Qualifier:"+qualifie );
if(qualifie.equals("name")){
System.out.println("value:"+ new String(ce.getValueArray(),ce.getValueOffset(),ce.getValueLength()));
}else{
System.out.println("value:"+ Bytes.toInt(ce.getValueArray()));
}
System.out.println("time:"+ce.getTimestamp());
}
}
@Test
public void testBatchQuery() throws IOException {
Scan scan = new Scan();
scan.setStartRow("u_1232".getBytes());
scan.setStopRow("u_1235".getBytes());
ResultScanner result = table.getScanner(scan);
printScanResult(result);
}
private void printScanResult(ResultScanner result) {
result.forEach(r -> Arrays.stream(r.rawCells()).forEach(
ce -> {
System.out.println("row:"+ new String(ce.getRowArray(),ce.getRowOffset(),ce.getRowLength()));
System.out.println("family:"+ new String(ce.getFamilyArray(),ce.getFamilyOffset(),ce.getFamilyLength()));
String qualifie = new String(ce.getQualifierArray(),ce.getQualifierOffset(),ce.getQualifierLength());
System.out.println("Qualifier:"+qualifie );
if(qualifie.equals("name")){
System.out.println("value:"+ new String(ce.getValueArray(),ce.getValueOffset(),ce.getValueLength()));
}else{
System.out.println("value:"+ Bytes.toInt(ce.getValueArray()));
}
System.out.println("time:"+ce.getTimestamp());
}
));
}
/**
* 全面一种是一行扫描一种是scan扫描 接下来这个是过滤扫描
*/
@Test
public void testSingleFilterQuery() throws IOException {
Scan scan = new Scan();
FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ONE);//过滤器列表 本身也是实现过滤器
filterList.addFilter(new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("name"), CompareFilter.CompareOp.EQUAL,Bytes.toBytes("zhangsan4")));
filterList.addFilter(new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("name"), CompareFilter.CompareOp.EQUAL,Bytes.toBytes("zhangsan3")));//单一列值过滤器
scan.setFilter(filterList);
ResultScanner result = table.getScanner(scan);
printScanResult(result);
//注意:如果过滤器过滤的列在数据表中有的行中不存在,那么这个过滤器对此行无法过滤。
}
@Test
public void testColumnPreffix() throws IOException {
Scan scan = new Scan();
scan.setFilter(new ColumnPrefixFilter("age".getBytes()));
ResultScanner result = table.getScanner(scan);
printScanResult(result);
}
//多个列值前缀
@Test
public void testMColumnPreffix() throws IOException {
Scan scan = new Scan();
byte[][] bys = new byte[][]{Bytes.toBytes("n"),Bytes.toBytes("a")};
scan.setFilter(new MultipleColumnPrefixFilter(bys));
ResultScanner result = table.getScanner(scan);
printScanResult(result);
}
//对rowkey上做文章的过滤器
@Test
public void testRowFilter() throws IOException {
Scan scan = new Scan();
scan.setFilter(new RowFilter(CompareFilter.CompareOp.EQUAL,new RegexStringComparator("1239$")));
ResultScanner result = table.getScanner(scan);
printScanResult(result);
}
@After
public void close() throws IOException {
conn.close();
}
}
自选删除相关操作,无论是删除还是get对列族啊 列啊的设置都差不多,这个api还是很容易上手的。如下图是Delete类的相关设置方法截图。