hive alter add hive alter add partition

转载

mob64ca140a59b0 2023-12-12 19:47:03

文章标签 hive alter add hadoop hive 字段分区表 文章分类 Hive 大数据

文章目录

Hive静态分区和动态分区

1. 静态分区

1.1 增加3个分区

向每个分区中添加数据

1.2 查询数据

1.2.1 直接查询
1.2.2 添加分区信息查询

2. 动态分区

开启Hive的动态分区支持

2.2 建原始表
2.3 建立分区表
2.4 加载数据

3. 多级分区

Hive静态分区和动态分区

1. 静态分区

建立分区表

create external table students_pt1
(
    id bigint,
    name string,
    age int,
    gender string,
    clazz string
)
PARTITIONED BY(pt string)  
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';

1.1 增加3个分区

alter table students_pt1 add partition(pt='20210101');
alter table students_pt1 add partition(pt='20210102');
alter table students_pt1 add partition(pt='20210103');

查看某个表的所有分区信息

show partitions students_pt;

hive alter add hive alter add partition_hive alter add

也可以从web界面直接查看

hive alter add hive alter add partition_hive alter add_02

向每个分区中添加数据

insert into table students_pt partition(pt='20210902') select * from students;
或
load data local inpath '/usr/local/soft/data/students.txt' into table students_pt partition(pt='20210902');

hive alter add hive alter add partition_hadoop_03

1.2 查询数据

1.2.1 直接查询

// 直接查询会全表扫描，不推荐，效率低
select count(*) from students_pt1;

hive alter add hive alter add partition_分区表_04

1.2.2 添加分区信息查询

// 使用where条件进行分区裁剪，避免了全表扫描，效率高
select count(*) from students_pt where pt='20210101';

hive alter add hive alter add partition_hadoop_05

2. 动态分区

动态分区：根据数据中某几列的不同的取值划分不同的分区
有的时候原始表中的数据里面包含了 ‘‘日期字段 dt’’，需要根据dt中不同的日期，分为不同的分区，将原始表改造成分区表。
hive默认不开启动态分区

开启Hive的动态分区支持

# 表示开启动态分区
hive> set hive.exec.dynamic.partition=true;
# 表示动态分区模式：strict（需要配合静态分区一起使用）、nostrict
# strict： insert into table students_pt partition(dt='anhui',pt) select ......,pt from students;
hive> set hive.exec.dynamic.partition.mode=nostrict;
# 表示支持的最大的分区数量为1000，可以根据业务自己调整
hive> set hive.exec.max.dynamic.partitions.pernode=1000;

2.2 建原始表

create table students_dt
(
    id bigint,
    name string,
    age int,
    gender string,
    clazz string,
    dt string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';

加载数据

# students_dt表中添加了dt字段，从20210101 -- 20210110
load data local inpath '/usr/local/soft/data/students_dt.txt' into table students_dt;

2.3 建立分区表

create table students_dt_p
(
    id bigint,
    name string,
    age int,
    gender string,
    clazz string
)
PARTITIONED BY(dt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';

2.4 加载数据

从原始表中向分区表加载数据

使用动态分区插入数据

# 分区字段需要放在 select 的最后，如果有多个分区字段 同理，它是按位置匹配，不是按名字匹配
insert into table students_dt_p partition(dt) select id,name,age,gender,clazz,dt from students_dt;
#比如下面这条语句会使用age作为分区字段，而不会使用student_dt中的dt作为分区字段
insert into table students_dt_p partition(dt) select id,name,age,gender,dt,age from students_dt;

web界面查看

hive alter add hive alter add partition_hadoop_06

3. 多级分区

# /students_year_month_pt/year/month
insert into table students_year_month_pt partition(year,month) select id,name,age,gender,clazz,year,month from students_year_month;

本文章为转载内容，我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题，欢迎原作者联系我们进行内容更正或删除文章。