cd $SPARK_3_0_1_HOME/bin
./beeline
!connect jdbc:hive2://cdh-datanode1:10005/default
# 出现弹框提示输入账号密码信息,由于没有添加鉴权,直接按enter即可
-- 创建测试表 student_info
create table default.student_info(
id bigint comment '学生id'
, name string comment '学生姓名'
, birthday string comment '出生日期 yyyy-MM-dd'
, class string comment '班级名称 xxyy xx年级 yy班'
)
comment '学生信息表'
stored as parquet;
-- 初始化测算表数据
insert into default.student_info values(1,'黄小明','2002-01-09','0302')
,(2,'王小刚','2001-06-03','0305')
,(3,'董小梅','2002-07-03','0302')
,(4,'杨小宝','2002-05-03','0301')
,(5,'周小龙','2002-04-23','0302')
,(6,'周小龙','2002-06-28','0304')
,(7,'方小玉','2000-09-28','0204');
-- 查询插入进去的数据
select id, name, birthday, class from default.student_info;
0: jdbc:hive2://cdh-datanode1:10005/default> select id, name, birthday, class from default.student_info;
+-----+-------+-------------+--------+
| id | name | birthday | class |
+-----+-------+-------------+--------+
| 4 | 杨小宝 | 2002-05-03 | 0301 |
| 5 | 周小龙 | 2002-04-23 | 0302 |
| 6 | 周小龙 | 2002-06-28 | 0304 |
| 7 | 方小玉 | 2000-09-28 | 0204 |
| 1 | 黄小明 | 2002-01-09 | 0302 |
| 2 | 王小刚 | 2001-06-03 | 0305 |
| 3 | 董小梅 | 2002-07-03 | 0302 |
+-----+-------+-------------+--------+
7 rows selected (0.275 seconds)
-- 对测试表 student_info的一些操作
-- a.查看各个班级的人数
select class, count(1) from default.student_info group by class;
0: jdbc:hive2://cdh-datanode1:10005/default> select class, count(1) from default.student_info group by class;
+--------+-----------+
| class | count(1) |
+--------+-----------+
| 0204 | 1 |
| 0305 | 1 |
| 0304 | 1 |
| 0301 | 1 |
| 0302 | 3 |
+--------+-----------+
5 rows selected (0.816 seconds)
-- b.使用explain查看 a 的执行计划
explain select class, count(1) from default.student_info group by class;
0: jdbc:hive2://cdh-datanode1:10005/default> explain select class, count(1) from default.student_info group by class;
+----------------------------------------------------+
| plan |
+----------------------------------------------------+
| == Physical Plan ==
*(2) HashAggregate(keys=[class#9977], functions=[count(1)])
+- Exchange hashpartitioning(class#9977, 200), true, [id=#15603]
+- *(1) HashAggregate(keys=[class#9977], functions=[partial_count(1)])
+- *(1) ColumnarToRow
+- FileScan parquet default.student_info[class#9977] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex[hdfs://cdh-namenode1:8020/user/hive/warehouse/student_info], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<class:string>
|
+----------------------------------------------------+
1 row selected (0.087 seconds)
-- c.查询班级人数最多的班级名称
select
class
from default.student_info
group by class
order by count(1) desc
limit 1;
0: jdbc:hive2://cdh-datanode1:10005/default> select
. . . . . . . . . . . . . . . . . . . . . .> class
. . . . . . . . . . . . . . . . . . . . . .> from default.student_info
. . . . . . . . . . . . . . . . . . . . . .> group by class
. . . . . . . . . . . . . . . . . . . . . .> order by count(1) desc
. . . . . . . . . . . . . . . . . . . . . .> limit 1;
+--------+
| class |
+--------+
| 0302 |
+--------+
1 row selected (0.757 seconds)
-- d.查看年龄最小的学生所在的班级
select
class
from default.student_info
order by birthday
limit 1;
0: jdbc:hive2://cdh-datanode1:10005/default> select
. . . . . . . . . . . . . . . . . . . . . .> class
. . . . . . . . . . . . . . . . . . . . . .> from default.student_info
. . . . . . . . . . . . . . . . . . . . . .> order by birthday
. . . . . . . . . . . . . . . . . . . . . .> limit 1;
+--------+
| class |
+--------+
| 0204 |
+--------+
1 row selected (0.202 seconds)
-- e.查看各个年级的总人数
select
count(1) as cnt
, substr(class,1,2) as grade
from default.student_info
group by substr(class,1,2);
0: jdbc:hive2://cdh-datanode1:10005/default> select
. . . . . . . . . . . . . . . . . . . . . .> count(1) as cnt
. . . . . . . . . . . . . . . . . . . . . .> , substr(class,1,2) as grade
. . . . . . . . . . . . . . . . . . . . . .> from default.student_info
. . . . . . . . . . . . . . . . . . . . . .> group by substr(class,1,2);
+------+--------+
| cnt | grade |
+------+--------+
| 6 | 03 |
| 1 | 02 |
+------+--------+
2 rows selected (0.752 seconds)
-- f.在同一年级中,给每个学生根据出生年月从年纪小到大编号
select
substr(class,1,2) as grade
, name
, birthday
, row_number() over(partition by substr(class,1,2) order by birthday desc) as rn
from default.student_info;
0: jdbc:hive2://cdh-datanode1:10005/default> select
. . . . . . . . . . . . . . . . . . . . . .> substr(class,1,2) as grade
. . . . . . . . . . . . . . . . . . . . . .> , name
. . . . . . . . . . . . . . . . . . . . . .> , birthday
. . . . . . . . . . . . . . . . . . . . . .> , row_number() over(partition by substr(class,1,2) order by birthday desc) as rn
. . . . . . . . . . . . . . . . . . . . . .> from default.student_info;
+--------+-------+-------------+-----+
| grade | name | birthday | rn |
+--------+-------+-------------+-----+
| 03 | 董小梅 | 2002-07-03 | 1 |
| 03 | 周小龙 | 2002-06-28 | 2 |
| 03 | 杨小宝 | 2002-05-03 | 3 |
| 03 | 周小龙 | 2002-04-23 | 4 |
| 03 | 黄小明 | 2002-01-09 | 5 |
| 03 | 王小刚 | 2001-06-03 | 6 |
| 02 | 方小玉 | 2000-09-28 | 1 |
+--------+-------+-------------+-----+
7 rows selected (1.166 seconds)
-- g.查询同一姓名学生的信息
select
id
, name
, birthday
, class
from (
select
id
, name
, birthday
, class
, count(1) over(partition by name) as cnt
from default.student_info
) t
where t.cnt > 1
;
0: jdbc:hive2://cdh-datanode1:10005/default> select
. . . . . . . . . . . . . . . . . . . . . .> id
. . . . . . . . . . . . . . . . . . . . . .> , name
. . . . . . . . . . . . . . . . . . . . . .> , birthday
. . . . . . . . . . . . . . . . . . . . . .> , class
. . . . . . . . . . . . . . . . . . . . . .> from (
. . . . . . . . . . . . . . . . . . . . . .> select
. . . . . . . . . . . . . . . . . . . . . .> id
. . . . . . . . . . . . . . . . . . . . . .> , name
. . . . . . . . . . . . . . . . . . . . . .> , birthday
. . . . . . . . . . . . . . . . . . . . . .> , class
. . . . . . . . . . . . . . . . . . . . . .> , count(1) over(partition by name) as cnt
. . . . . . . . . . . . . . . . . . . . . .> from default.student_info
. . . . . . . . . . . . . . . . . . . . . .> ) t
. . . . . . . . . . . . . . . . . . . . . .> where t.cnt > 1
. . . . . . . . . . . . . . . . . . . . . .> ;
+-----+-------+-------------+--------+
| id | name | birthday | class |
+-----+-------+-------------+--------+
| 5 | 周小龙 | 2002-04-23 | 0302 |
| 6 | 周小龙 | 2002-06-28 | 0304 |
+-----+-------+-------------+--------+
2 rows selected (3.426 seconds)
-- 退出beeline
!quit