1.show databases;
2.show tables;
3.show tables in 数据库名;(不在对应数据库下可查到想要的表名)
4.show tables in hive 'tom*'; 查询在hive数据库下,tom开头的表名。
5.desc extended tablename;
可以查看表详细信息,其中tableType=MANAGED_TABLE或EXTENDED_TATBLE 看出是内部表还是外部表
6.数据库的增删改
(1)创建
create database if not exists hive;
使用if not exists 可以避免抛出错误信息。
(2)删除数据库
drop database if exists hive;
if exists 是可选的,如果加了这个子句,避免hive不存在抛出的异常。
默认情况,hive是不允许删除一个带有表的数据库的。用户可以先清空数据库表,再删除库,或者使用关键字cascade:
drop database if exists hive cascade;
(3)修改数据库
可以修改数据库的dbproperties,设置键值对属性值,但是其他元数据信息是不能修改的。如:
alter database hive set dbproperties(‘edited-by’=’wang’);
alter database mytest set dbproperties('creator'='wangdd');
7.创建表
(1)基本语法
CREATE [TEMPORARY(临时表)] [EXTERNAL(外部表,如果不加该关键字就是创建内部表)] TABLE [IF NOT EXISTS] [db_name.]table_name
[(col_name data_type [COMMENT col_comment](列的注释), ... [constraint_specification])]
[COMMENT table_comment](表的注释)
[PARTITIONED BY (分区)(col_name data_type [COMMENT col_comment], ...)]
[CLUSTERED BY(分布) (col_name, col_name, ...) [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS(分桶)]
[SKEWED BY (col_name, col_name, ...) -- (Note: Available in Hive 0.10.0 and later)]
ON ((col_value, col_value, ...), (col_value, col_value, ...), ...)
[STORED AS DIRECTORIES]
[
[ROW FORMAT row_format] 分隔符的指定
[STORED AS file_format] 数据存储格式
| STORED BY 'storage.handler.class.name' [WITH SERDEPROPERTIES (...)] -- (Note: Available in Hive 0.6.0 and later)
]
[LOCATION hdfs_path]真实数据存放的位置
[TBLPROPERTIES (property_name=property_value, ...)] -- (Note: Available in Hive 0.6.0 and later)
[AS select_statement]; -- (Note: Available in Hive 0.5.0 and later; not supported for external tables)
create table hive.person(
id int,
name string,
likes array<string>,
desc map<string,string>
desc struct<city:string,area:string,streetID:int>
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
COLLECTION ITEMS TERMINATED BY '-'
LINES TERMINATED BY '\n'
NULL DEFINED AS '@' --在显示的时候就是null了
STORED AS TEXTFILE;
(2)复制一个表结构(不包含数据)
CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
LIKE existing_table_or_view_name
[LOCATION hdfs_path];
CREATE TABLE like_student LIKE student;
(3)通过查询来创建另外一张表
CREATE TABLE new_key_value_store
ROW FORMAT SERDE "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"
STORED AS RCFile
AS
SELECT (key % 1024) new_key, concat(key, value) key_value_pair
FROM key_value_store
SORT BY new_key, key_value_pair;
CREATE TABLE s_person1
ROW FORMAT DELIMITED FIELDS TERMINATED BY '#'
COLLECTION ITEMS TERMINATED BY '^'
MAP KEYS TERMINATED BY '_'
NULL DEFINED AS '@'
AS SELECT * FROM person1;
(4)分区表
create table hive.tomcat_log(
id string,
page string,
status int,
traffic int
)
partitioned by (year string,month string,day string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE;
(5)分桶表
create table clus2(
cc int)
CLUSTERED BY (cc)
SORTED by (cc)
into 3 buckets;
8.数据的导入
(1)LOAD DATA
LOAD DATA [LOCAL] INPATH 'filepath' [OVERWRITE](覆盖,不写就是追加) INTO TABLE tablename [PARTITION (partcol1=val1, partcol2=val2 ...)]
在本地导入(Linux系统下)复制的过程
load data local inpath '/home/wangfutai/a/1.txt' INTO TABLE hive.union_test;
在hdfs导入(hdfs系统下)剪切的过程,原来目录数据没了
追加:
load data inpath '/user/wangfutai/hive/warehouse/hive.db/st/st.txt' INTO TABLE hive.union_test;
覆盖:
load data inpath '/user/wangfutai/hive/warehouse/hive.db/st/1.txt' OVERWRITE INTO TABLE hive.union_test;
(2)分区表的导入
静态模式:
load data local inpath '/home/wangfutai/a/2.txt' OVERWRITE INTO TABLE hive.tomcat_log PARTITION(year='2017',month='11',day='5');
load data inpath '/user/wangfutai/mr/ETLOutPut16/part-r-00001' OVERWRITE INTO TABLE hive.tomcatelog PARTITION(days='20170531');
动态模式:
覆盖模式:只会覆盖相同分区数据,其他分区不受影响·
INSERT OVERWRITE TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...) [IF NOT EXISTS]] select_statement1 FROM from_statement;
追加模式:
INSERT INTO TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...)] select_statement1 FROM from_statement;
例子:
INSERT OVERWRITE TABLE dynamic_human2 PARTITION (sexs) select * from human1;
以下插入,需要human3的列比dynamic_human3 少一列
INSERT OVERWRITE TABLE dynamic_human3 PARTITION (sexs=‘nan’) select * from human3;
9.数据的导出
(1)通过建表的方式
CREATE TABLE new_key_value_store
ROW FORMAT SERDE "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"
STORED AS RCFile
AS
SELECT (key % 1024) new_key, concat(key, value) key_value_pair
FROM key_value_store
SORT BY new_key, key_value_pair;
CREATE TABLE s_person1
ROW FORMAT DELIMITED FIELDS TERMINATED BY '#'
COLLECTION ITEMS TERMINATED BY '^'
MAP KEYS TERMINATED BY '_'
NULL DEFINED AS '@'
AS SELECT * FROM person1;
(2)通过insert
INSERT OVERWRITE [LOCAL] DIRECTORY directory1
[ROW FORMAT row_format] [STORED AS file_format]
SELECT ... FROM ...
保存到本地
INSERT OVERWRITE LOCAL DIRECTORY
'/home/wangfutai/a/partition_data'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '#'
NULL DEFINED AS '@'
SELECT * FROM st;
保存到hdfs中 会覆盖掉原来目录
INSERT OVERWRITE DIRECTORY
'/user/candle/hive_data/person1_data'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '#'
COLLECTION ITEMS TERMINATED BY '^'
MAP KEYS TERMINATED BY '_'
NULL DEFINED AS '@'
SELECT * FROM person1;
(3)通过查询插入表中,指定列作为分区
覆盖模式:只会覆盖相同分区数据,其他分区不受影响·
INSERT OVERWRITE TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...) [IF NOT EXISTS]] select_statement1 FROM from_statement;
追加模式:
INSERT INTO TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...)] select_statement1 FROM from_statement;
覆盖:
INSERT OVERWRITE TABLE dynamic_human1 PARTITION(sex) SELECT * FROM human1;
追加:
INSERT INTO TABLE dynamic_human1 PARTITION(sex) SELECT * FROM human2;
也可以在插入的时候 指定分区列名,但是指定了分区列名,那么human3就必须没有sex这列,因为sex分区已经指定了
INSERT OVERWRITE TABLE dynamic_human1 PARTITION(sex='aaa') SELECT * FROM human1;