hive

  • 一、内部表和外部表
  • 1、内部表(管理表)
  • 2、外部表(External Tables)
  • 二、创建临时表
  • 三、hive建表语句-CTAS and WITH
  • 四、删除表/修改表
  • 1、删除表
  • 2、修改表
  • 五、装载数据


一、内部表和外部表

1、内部表(管理表)

HDFS中为所属数据库目录下的子文件夹
数据完全由Hive管理,删除表(元数据)会删除数据

2、外部表(External Tables)

数据保存在指定位置的HDFS路径中
Hive不完全管理数据,删除表(元数据)不会删除数据

--创建一个外部表
create external table if not exists employee_external(
name string,
work_place array<string>,
sex_age struct<sex:string,age:int>,
skills_score map<string,int>,
depart_title map<string,array<string>>
)
row format delimited
fields terminated by '|'
collection items terminated by ','
map keys terminated by ':'
stored as textfile
location '/data/hive/employee_external'; 

--查看完整的建表语句
show create table employee_external;
//查看元数据信息
desc formatted employee_external;

创建外部表是加上关键字external,创建内部表时候注意不用添加位置信息

//创建一个内部表
create  table if not exists employee_2(
name string,
work_place array<string>,
sex_age struct<sex:string,age:int>,
skills_score map<string,int>,
depart_title map<string,array<string>>
);

//插入数据(通过select表的方式插入数据)
insert into table imployee_2
select * from employee_external;

二、创建临时表

临时表是应用程序自动管理在复杂查询期间生成的中间数据的方法
表只对当前session有效,session退出后自动删除
表空间位于/tmp/hive-<user_name>(安全考虑)
如果创建的临时表表名已存在,实际用的是临时表

//创建一个临时表,记住关键字temporary
create temporary table if not exists employee_temp(
name string,
work_place array<string>,
sex_age struct<sex:string,age:int>,
skills_score map<string,int>,
depart_title map<string,array<string>>
);

三、hive建表语句-CTAS and WITH

--CTAS
create table ctas_employee
as select * from employee_external;


--WITH
create table cte_employee as
with
r1 as(select * from employee_external limit 1),
r2 as(select * from employee_external where name = 'Will')
select * from r1 union all select * from r2;

select * from cte_employee;
show create table cte_employee;
desc formatted cte_employee;

--LIKE
create table like_employee like employee_external;

四、删除表/修改表

1、删除表

--删除一个表
drop table if exists like_employee;
--修改表名
alter table cte_employee rename to cte_employee2;

2、修改表

ALTER TABLE employee RENAME TO new_employee;
ALTER TABLE c_employee SET TBLPROPERTIES ('comment'='New name, comments');
ALTER TABLE employee_internal SET SERDEPROPERTIES ('field.delim' = '$');
ALTER TABLE c_employee SET FILEFORMAT RCFILE; -- 修正表文件格式
-- 修改表的列操作
ALTER TABLE employee_internal CHANGE old_name new_name STRING; -- 修改列名
ALTER TABLE c_employee ADD COLUMNS (work string); -- 添加列
ALTER TABLE c_employee REPLACE COLUMNS (name string); -- 替换列

五、装载数据

LOAD用于在Hive中移动数据

LOAD DATA LOCAL INPATH '/home/dayongd/Downloads/employee.txt' 
OVERWRITE INTO TABLE employee;
-- 加LOCAL关键字,表示原始文件位于Linux本地,执行后为拷贝数据
LOAD DATA LOCAL INPATH '/home/dayongd/Downloads/employee.txt' 
OVERWRITE INTO TABLE employee_partitioned  PARTITION (year=2014, month=12);
-- 没有LOCAL关键字,表示文件位于HDFS文件系统中,执行后为直接移动数据
LOAD DATA INPATH '/tmp/employee.txt'  
OVERWRITE INTO TABLE employee_partitioned PARTITION (year=2017, month=12);

LOCAL:指定文件位于本地文件系统,执行后为拷贝数据
OVERWRITE:表示覆盖表中现有数据
ROW FORMAT DELIMITED 分隔符设置开始语句
FIELDS TERMINATED BY:设置字段与字段之间的分隔符
COLLECTION ITEMS TERMINATED BY:设置一个复杂类型(array,struct)字段的各个item之间的分隔符
MAP KEYS TERMINATED BY:设置一个复杂类型(Map)字段的key value之间的分隔符
LINES TERMINATED BY:设置行与行之间的分隔符

--load local overwrite
create table if not exists employee(
name string,
work_place array<string>,
sex_age struct<sex:string,age:int>,
skills_score map<string,int>,
depart_title map<string,array<string>>
)
row format delimited
fields terminated by '|'
collection items terminated by ','
map keys terminated by ':'
stored as textfile;
--不会清空之前的数据
load data local inpath '/root/employee.txt'
into table employee;
--overwrite会把之前的数据清空重新添加
load data local inpath '/root/employee.txt'
overwrite into table employee;

ALTER TABLE employee SET TBLPROPERTIES('EXTERNAL'='False'); 
--测试从hdfs加载数据
hdfs dfs -mkdir -p /data/hive/employee
hdfs dfs -put employee.txt /data/hive/employee