标签(空格分隔): hadoop


hbase的操作

hbase的shell交互操作(常用操作)

入口: $HBASE_HOME/bin/hbase shell
退出: > quit
定义: >create ‘表名’, ‘列族名1’,‘列族名2’,‘列族名N’
例: create ‘people’,‘student’,‘businessman’ 创建一个人的表 列族有学生和商人。

list # 查询所有表
 desc ‘people’ 查看表结构
 exists ‘people’ 查看表是否存在
 is_enabled ‘people’ 启用表
 is_disabled ‘people’ 禁用表
 #添加修改记录(如果rowkey和列族列名都一致则后者覆盖前者,但前者版本记录还是会有保留默认版本为时间戳)
 put ‘表名’, ‘rowKey’, ‘列族 : 列’ , ‘值’
 put ‘people’,‘stu_123’,‘student:name’,‘zhangsan’
 put ‘people’,‘stu_123’,‘student:age’,‘10’
 put ‘people’,‘stu_123’,‘businessman:name’,‘lisi’
 put ‘people’,‘stu_1234’,‘student:name’,‘wangwu’
 查询记录
 get ‘表名’ , ‘rowKey’ # 获取rowkey下的所有数据
 get ‘people’,‘stu_123’
 count ‘表名’ 注意这里是1条,以rowkey作为条数,虽然是列式的
 get ‘表名’,‘rowkey’,‘列族’ # 获取某个列族
 get ‘people’,‘stu_123’,‘student’
 get ‘表名’,‘rowkey’,‘列族:列’ # 获取某个列 由此可以看出要获取的越详细其实就是在后面的多用,间隔子结构即可
 get ‘people’,‘stu_123’,‘student:name’
 查询整张表记录
 scan “表名”
 查看某个表某个列中所有数据
 scan “表名” , {COLUMNS=>‘列族名:列名’}
 删除记录
 delete ‘表名’ ,‘行名’ , ‘列族:列’
 delete ‘people’,‘stu_123’,‘student:name’
 deleteall ‘表名’,‘rowkey’ 删除郑航
 deleteall ‘people’,‘stu_123’
 删除整张表
 先要屏蔽该表,才能对该表进行删除
 第一步 disable ‘表名’ ,第二步 drop ‘表名’
 清空表:
 truncate ‘表名’

hbase的api操作

首先我们来介绍一下相关的api,让我们大体上有个了解,之后再根据这些api来做一下hbase的相关操作。
套路一:配置项
HBaseConfiguration与Hdfs的操作一样需要配置类作为操作的相关配置。
之后是hbase的核心操作类
HBaseAdmin 该类提供接口提供hbase的相关操作入口
HTableDescriptor 表描述类,包含表和列族的信息,用于表设计
既然有表描述类了就必须要有列族信息
HColumnDescriptor 这个是用于做列族信息的定义类
以下是做的一些相关hbase操作的javaapi,总结一下就是与命令行是一一对应的关系,在方法和类的命名上。还有如果涉及到定义的话用HbaseAdmin如果涉及到表的数据操作的话用Table类操作。

package com.dinfo.hbase;

import org.apache.curator.framework.state.ConnectionStateManager;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

/**
 * Created by: luo
 * date: 2019/7/15.
 * desc:
 */
public class HBaseApiOps {
    Configuration conf = null;
    Table table = null;//用于做相关表操作的信息
    Connection conn =null;
    @Before
    public void before() throws IOException {
        conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum", "hadoop1,hadoop2,hadoop3");
        conf.set("hbase.zookeeper.property.clientPort", "2181");
        conn = ConnectionFactory.createConnection(conf);
        table = conn.getTable(TableName.valueOf("user"));
    }

    @Test
    public void testCreateTable() throws IOException {
        Admin admin = conn.getAdmin();
        TableName tableName = TableName.valueOf("user"); // 表名称
        HTableDescriptor desc = new HTableDescriptor(tableName);
        // 创建列族的描述类
        HColumnDescriptor family = new HColumnDescriptor("info"); // 列族
        // 将列族添加到表中
        desc.addFamily(family);
        HColumnDescriptor family2 = new HColumnDescriptor("info2"); // 列族
        // 将列族添加到表中
        desc.addFamily(family2);
        admin.createTable(desc);
    }

@Test
public void testDropTable() throws IOException {
    Admin admin = conn.getAdmin();
    TableName tName = TableName.valueOf("user");
    admin.disableTable(tName);
    admin.deleteTable(tName);
}

/**
 * 插入 前面定义我们用了hadmin 这里我们dml的时候直接用table就可了
 * @throws IOException
 */
@Test
public void testInsertOrUpdate() throws IOException {
    Put put = new Put(Bytes.toBytes("u_1234"));
    put.add(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes("zhangsan"));
    table.put(put);
}

@Test
public void batchInsert() throws IOException {
    List<Put> puts = new ArrayList<>();
    for(int i =0;i<10;i++){
        Put put = new Put(Bytes.toBytes("u_123"+i));
        put.add(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes("zhangsan" + i));
        put.add(Bytes.toBytes("info"), Bytes.toBytes("age"), Bytes.toBytes(i+10));
        puts.add(put);
    }
    table.put(puts);
}

/**
 * 删除一行rowkey
 * @throws IOException
 */
@Test
public void testDel() throws IOException {
    //从上面可以知道 table的操作还是比较好记的 用table调用put传put对象 delte传delete对象 scan传scan对象我们试下del
    Delete delete = new Delete(Bytes.toBytes("u_1230"));
    table.delete(delete);
    table.flushCommits();
}

@Test
public void testDelColumnFamily() throws IOException {
    //删除某个列
    Delete del = new Delete(Bytes.toBytes("u_1232"));
    del.addColumn(Bytes.toBytes("info"),Bytes.toBytes("name"));
    table.delete(del);
}

/**
 * 以下为相关查询的api
 */
//单条查询
@Test
public void testGet() throws IOException {
    //这里对照命令行的get
    Get get = new Get(Bytes.toBytes("u_1233"));
    Result result = table.get(get);
    Cell[] cells = result.rawCells();
    for(Cell ce:cells){
        System.out.println("row:"+ new String(ce.getRowArray(),ce.getRowOffset(),ce.getRowLength()));
        System.out.println("family:"+ new String(ce.getFamilyArray(),ce.getFamilyOffset(),ce.getFamilyLength()));
        String qualifie = new String(ce.getQualifierArray(),ce.getQualifierOffset(),ce.getQualifierLength());
        System.out.println("Qualifier:"+qualifie );
        if(qualifie.equals("name")){
            System.out.println("value:"+ new String(ce.getValueArray(),ce.getValueOffset(),ce.getValueLength()));
        }else{
            System.out.println("value:"+ Bytes.toInt(ce.getValueArray()));
        }
        System.out.println("time:"+ce.getTimestamp());
    }

}

@Test
public void testBatchQuery() throws IOException {
    Scan scan = new Scan();
    scan.setStartRow("u_1232".getBytes());
    scan.setStopRow("u_1235".getBytes());
    ResultScanner result = table.getScanner(scan);
    printScanResult(result);
}

private void printScanResult(ResultScanner result) {
    result.forEach(r -> Arrays.stream(r.rawCells()).forEach(
            ce -> {
                System.out.println("row:"+ new String(ce.getRowArray(),ce.getRowOffset(),ce.getRowLength()));
                System.out.println("family:"+ new String(ce.getFamilyArray(),ce.getFamilyOffset(),ce.getFamilyLength()));
                String qualifie = new String(ce.getQualifierArray(),ce.getQualifierOffset(),ce.getQualifierLength());
                System.out.println("Qualifier:"+qualifie );
                if(qualifie.equals("name")){
                    System.out.println("value:"+ new String(ce.getValueArray(),ce.getValueOffset(),ce.getValueLength()));
                }else{
                    System.out.println("value:"+ Bytes.toInt(ce.getValueArray()));
                }
                System.out.println("time:"+ce.getTimestamp());
            }
    ));
}

/**
 * 全面一种是一行扫描一种是scan扫描 接下来这个是过滤扫描
 */
@Test
public void testSingleFilterQuery() throws IOException {
    Scan scan = new Scan();
    FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ONE);//过滤器列表 本身也是实现过滤器
    filterList.addFilter(new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("name"), CompareFilter.CompareOp.EQUAL,Bytes.toBytes("zhangsan4")));
    filterList.addFilter(new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("name"), CompareFilter.CompareOp.EQUAL,Bytes.toBytes("zhangsan3")));//单一列值过滤器
    scan.setFilter(filterList);
    ResultScanner result = table.getScanner(scan);
    printScanResult(result);
//注意:如果过滤器过滤的列在数据表中有的行中不存在,那么这个过滤器对此行无法过滤。
}

@Test
public void testColumnPreffix() throws IOException {
    Scan scan = new Scan();
    scan.setFilter(new ColumnPrefixFilter("age".getBytes()));
    ResultScanner result = table.getScanner(scan);
    printScanResult(result);
}
//多个列值前缀
@Test
public void testMColumnPreffix() throws IOException {
    Scan scan = new Scan();
    byte[][] bys = new byte[][]{Bytes.toBytes("n"),Bytes.toBytes("a")};
    scan.setFilter(new MultipleColumnPrefixFilter(bys));
    ResultScanner result = table.getScanner(scan);
    printScanResult(result);
}
//对rowkey上做文章的过滤器
@Test
public void testRowFilter() throws IOException {
    Scan scan = new Scan();
    scan.setFilter(new RowFilter(CompareFilter.CompareOp.EQUAL,new RegexStringComparator("1239$")));
    ResultScanner result = table.getScanner(scan);
    printScanResult(result);
}

@After
public void close() throws IOException {
    conn.close();
}
}

自选删除相关操作,无论是删除还是get对列族啊 列啊的设置都差不多,这个api还是很容易上手的。如下图是Delete类的相关设置方法截图。

hadoop中的元数据全部删除了 hadoop删除表字段_表名