1、模仿建表语句,创建subject表,并使用hdfs dfs -put 命令加载数据 CREATE TABLE `subject`( `subject_id` bigint COMMENT '科目id', `subject_name` string COMMENT '科目名称') COMMENT '科目表' ROW FORMAT DELIMITED FIELDS TERMINATED BY ','; 2、查询学生分数(输出:学号,姓名,班级,科目id,科目名称,成绩) input tables: 3张表 不需要分组、聚合、排序,直接三张表关联即可 select t1.id ,t1.name ,t1.clazz ,t2.score_id ,t2.score ,t3.subject_name from students t1 left join score t2 on t1.id = t2.id left join subject t3 on t2.score_id = t3.subject_id; 3、查询学生总分(输出:学号,姓名,班级,总分) input tables: students score 要求总分,得按照学生id分组求sum 不需要排序,需要关联一次 关联次数:6000次 先关联再分组求和 select t1.id ,t1.name ,t1.clazz ,sum(t2.score) as sum_score from students t1 left join score t2 on t1.id = t2.id group by t1.id,t1.name,t1.clazz limit 10; 关联1000次 先分组求和,再关联 select t1.id ,t1.name ,t1.clazz ,t2.sum_score from students t1 left join( select id ,sum(score) sum_score from score group by id ) t2 on t1.id = t2.id limit 10; 4、查询全年级总分排名前三(不分文理科)的学生(输出:学号,姓名,班级,总分) select t1.id ,t1.name ,t1.clazz ,t2.sum_score from students t1 left join( select id ,sum(score) sum_score from score group by id ) t2 on t1.id = t2.id order by t2.sum_score desc limit 3; 5、查询文科一班学生总分排名前10的学生(输出:学号,姓名,班级,总分) select t1.id ,t1.name ,t1.clazz ,t2.sum_score from students t1 left join( select id ,sum(score) sum_score from score group by id ) t2 on t1.id = t2.id where t1.clazz = "文科一班" order by t2.sum_score desc limit 10; 6、查询每个班级学生总分的平均成绩(输出:班级,平均分) select t1.clazz ,avg(t2.sum_score) avg_sum_score from students t1 left join( select id ,sum(score) sum_score from score group by id ) t2 on t1.id = t2.id group by t1.clazz; 7、查询每个班级的最高总分(输出:班级,总分) select t1.clazz ,max(t2.sum_score) max_sum_score from students t1 left join( select id ,sum(score) sum_score from score group by id ) t2 on t1.id = t2.id group by t1.clazz; 8、(思考)查询每个班级总分排名前三的学生(输出:学号,姓名,班级,总分) 窗口函数:row_number() select ttt1.name ,ttt1.score_sum ,ttt1.clazz ,ttt1.rn from( select name ,score_sum ,clazz ,row_number() over(partition by clazz order by score_sum desc) as rn from ( select t1.id ,t1.name ,t1.clazz ,t2.score_sum from students t1 left join ( select id ,sum(score) as score_sum from score group by id ) t2 on t1.id = t2.id ) tt1 ) ttt1 where ttt1.rn <=3;
create table students( > id bigint comment '学生 id', > name string comment '学生姓名', > age int comment '学生年龄', > gender string comment '学生性别', > clazz string comment '学生班级' > ) comment '学生信息表' > ROW FORMAT DELIMITED FIELDS TERMINATED BY ','; create table score( > id bigint comment '学生 id', > score_id bigint comment '科目 id', > score int comment '学生成绩' > ) comment '学生成绩表' > ROW FORMAT DELIMITED FIELDS TERMINATED BY ','; create table subject( > subject_id bigint comment '科目id', > subject_name string comment '科目名称' > )comment '科目目表' > ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';