离线数仓分为ods、dwb、dwd、dws、dm层
遇到问题及解决方案
1、创建udf函数
创建udf函数的时候需要对数据进行过滤,否则会报异常
在进行创建udf函数的时候一定要用打包插件,否则也会报错
2、在运行mr的时候进行数据插入的时候报reduce异常,主要原因是前数据表中没有数据
注意是:在进行mr的时候一定不要空的数据或者数据的字段为null,否则都会报错,网上说的是内存溢出(并不一定)
3、在进行hive表中的数据插入到mysql中的时候会出现编码异常
1、在结果集数据库中设置字符串为latin1
2、并且在该数据库中执行set character_set_database=utf8;set character_set_server=utf8;
3、在hive中的进行导出的时候需要写characterEncoding=utf-8
eg:JDBC_URL=jdbc:mysql://172.16.53.10:3306/qianfeng_result?characterEncoding=utf-8
该项目中美中不足的是
azkaban进行调度没有成功
第三题写得不太好
在hive表导到mysql中忘使用脚本
收获
1、遇到的问题
2、加强了hql的学习(感觉自己的hql还有一段很长的路要走)
3、加强了多表联动的思考能力
4、更加熟悉了数仓搭建的基本流程
ods
init_qianfeng_ods.hql
-- 创建ods层:qianfeng_ods
CREATE DATABASE IF NOT EXISTS `qianfeng_ods`;
use `qianfeng_ods`;
-- 建表
-- 答卷表
create table if not exists `qianfeng_ods`.`answer_paper`(
`id` int,
`exam_id` int,
`paper_id` int,
`examinee_id` int,
`examinee_name` string,
`examinee_num` string,
`class_id` int,
`class_name` string,
`start_date` string,
`exam_time` string,
`submit_time` string,
`objective_mark` int,
`subject_mark` int,
`subject_smart_mark` int,
`check_state` int,
`teacher_id` int,
`objective_answer_json` string,
`subject_answer_json` string,
`subject_check_json` string,
`objective_check_json` string,
`evaluation_opinions` string
)
row format delimited
fields terminated by '\001';
-- 分类表(小题库)
create table if not exists `qianfeng_ods`.`category`(
`id` int,
`name` string,
`outline_json` string,
`subject_id` int,
`subject_name` string,
`remark` string,
`del` int,
`modifier_id` int,
`modifier_name` string,
`modify_time` string,
`creator_id` int,
`creator_name` string,
`create_time` string
)
row format delimited
fields terminated by '\001';
-- 考试表
create table if not exists `qianfeng_ods`.`exam`(
`id` int,
`name` string,
`paper_template_id` int,
`subject_id` int,
`subject_name` string,
`limit_minute` int,
`pass_score` int,
`full_mark` int,
`volume_num` int,
`remark` string,
`state` int,
`del` int,
`start_time` string,
`modifier_id` int,
`modifier_name` string,
`modify_time` string,
`creator_id` int,
`creator_name` string,
`create_time` string,
`publish` int
)
row format delimited
fields terminated by '\001';
-- 考试班级关联表
create table if not exists `qianfeng_ods`.`exam_class_ref`(
`id` int,
`exam_id` int,
`class_id` int,
`class_name` string
)
row format delimited
fields terminated by '\001';
-- 试卷表
create table if not exists `qianfeng_ods`.`paper`(
`id` int,
`exam_id` int,
`is_objective` int,
`is_subjective` int,
`creator_id` int,
`creator_name` string,
`create_time` string
)
row format delimited
fields terminated by '\001';
-- 试卷试题关联表
create table if not exists `qianfeng_ods`.`paper_question`(
`id` int,
`paper_id` int,
`question_id` int
)
row format delimited
fields terminated by '\001';
-- 试题模板(规则)表
create table if not exists `qianfeng_ods`.`paper_template`(
`id` int,
`name` string,
`subject_id` int,
`subject_name` string,
`state` int,
`total_mark` int,
`del` int,
`modifier_id` int,
`modifier_name` string,
`modify_time` string,
`creator_id` int,
`creator_name` string,
`create_time` string
)
row format delimited
fields terminated by '\001';
-- 试卷模板分类关联表
create table if not exists `qianfeng_ods`.`paper_template_category_ref`(
`id` int,
`paper_template_id` int,
`category_id` int
)
row format delimited
fields terminated by '\001';
-- 组卷表
create table if not exists `qianfeng_ods`.`paper_template_part`(
`id` int,
`paper_template_id` int,
`question_type_id` int,
`per_question_mark` int,
`sort` int,
`selected` int
)
row format delimited
fields terminated by '\001';
-- 试题数量-难易度设置表
create table if not exists `qianfeng_ods`.`paper_template_part_question_number`(
`id` int,
`paper_template_part_id` int,
`question_difficulty_id` int,
`question_number` int
)
row format delimited
fields terminated by '\001';
-- 试题表
create table if not exists `qianfeng_ods`.`question`(
`id` int,
`category_id` int,
`question_type_id` int,
`question_difficulty_id` int,
`state` int,
`content` string,
`right_answer` string,
`analyse` string,
`del` int,
`modifier_id` int,
`modifier_name` string,
`modify_time` string,
`creator_id` int,
`creator_name` string,
`create_time` string
)
row format delimited
fields terminated by '\001';
-- 难易度表
create table if not exists `qianfeng_ods`.`question_difficulty`(
`id` int,
`difficulty` int,
`name` string
)
row format delimited
fields terminated by '\001';
-- 试题选项表
create table if not exists `qianfeng_ods`.`question_option`(
`id` int,
`content` string,
`sort` int,
`question_id` int,
`is_right` int
)
row format delimited
fields terminated by '\001';
-- 试题类型表
create table if not exists `qianfeng_ods`.`question_type`(
`id` int,
`type` int,
`name` string,
`is_objective` int
)
row format delimited
fields terminated by '\001';
init_qianfeng_ods.sh
#!/bin/bash
echo "init_qianfeng_ods.hql is running ........................................"
hive -f /opt/apps/qianfeng_students_apps/sql/init_qianfeng_ods.hql
echo "init_qianfeng_ods.hql is finshed"
echo "init_qianfeng_ods.sh is running ........................................"
SQOOP_HOME=/opt/apps/sqoop-1.4.7
JDBC_URL=jdbc:mysql://hadoop:3306/qianfeng
USERNAME=root
PASSWORD=123456
## 将数据导入答卷表1
$SQOOP_HOME/bin/sqoop import \
--connect $JDBC_URL \
--username $USERNAME \
--password $PASSWORD \
--table answer_paper \
--hive-import \
--hive-table qianfeng_ods.answer_paper \
--delete-target-dir \
--fields-terminated-by '\001' \
--num-mappers 1 \
--as-textfile
## 将数据导入类表2
$SQOOP_HOME/bin/sqoop import \
--connect $JDBC_URL \
--username $USERNAME \
--password $PASSWORD \
--table category \
--hive-import \
--hive-table qianfeng_ods.category \
--delete-target-dir \
--fields-terminated-by '\001' \
--num-mappers 1 \
--as-textfile
## 将数据导入考试表3
$SQOOP_HOME/bin/sqoop import \
--connect $JDBC_URL \
--username $USERNAME \
--password $PASSWORD \
--table exam \
--hive-import \
--hive-table qianfeng_ods.exam \
--delete-target-dir \
--fields-terminated-by '\001' \
--num-mappers 1 \
--as-textfile
## 将数据导入考试与班级关联表4
$SQOOP_HOME/bin/sqoop import \
--connect $JDBC_URL \
--username $USERNAME \
--password $PASSWORD \
--table exam_class_ref \
--hive-import \
--hive-table qianfeng_ods.exam_class_ref \
--delete-target-dir \
--fields-terminated-by '\001' \
--num-mappers 1 \
--as-textfile
## 将数据导入试卷表5
$SQOOP_HOME/bin/sqoop import \
--connect $JDBC_URL \
--username $USERNAME \
--password $PASSWORD \
--table paper \
--hive-import \
--hive-table qianfeng_ods.paper \
--delete-target-dir \
--fields-terminated-by '\001' \
--num-mappers 1 \
--as-textfile
## 将数据导入试卷试题表6
$SQOOP_HOME/bin/sqoop import \
--connect $JDBC_URL \
--username $USERNAME \
--password $PASSWORD \
--table paper_question \
--hive-import \
--hive-table qianfeng_ods.paper_question \
--delete-target-dir \
--fields-terminated-by '\001' \
--num-mappers 1 \
--as-textfile
## 将数据导入试卷模板题型组成表7
$SQOOP_HOME/bin/sqoop import \
--connect $JDBC_URL \
--username $USERNAME \
--password $PASSWORD \
--table paper_template \
--hive-import \
--hive-table qianfeng_ods.paper_template \
--delete-target-dir \
--fields-terminated-by '\001' \
--num-mappers 1 \
--as-textfile
## 将数据导入试卷模板8
$SQOOP_HOME/bin/sqoop import \
--connect $JDBC_URL \
--username $USERNAME \
--password $PASSWORD \
--table paper_template_category_ref \
--hive-import \
--hive-table qianfeng_ods.paper_template_category_ref \
--delete-target-dir \
--fields-terminated-by '\001' \
--num-mappers 1 \
--as-textfile
## 将数据导入试卷模板与分类关联表9
$SQOOP_HOME/bin/sqoop import \
--connect $JDBC_URL \
--username $USERNAME \
--password $PASSWORD \
--table paper_template_part \
--hive-import \
--hive-table qianfeng_ods.paper_template_part \
--delete-target-dir \
--fields-terminated-by '\001' \
--num-mappers 1 \
--as-textfile
## 将数据导入试卷模板题型组成各难度题数表10
$SQOOP_HOME/bin/sqoop import \
--connect $JDBC_URL \
--username $USERNAME \
--password $PASSWORD \
--table paper_template_part_question_number \
--hive-import \
--hive-table qianfeng_ods.paper_template_part_question_number \
--delete-target-dir \
--fields-terminated-by '\001' \
--num-mappers 1 \
--as-textfile
## 将数据导入试题表11
$SQOOP_HOME/bin/sqoop import \
--connect $JDBC_URL \
--username $USERNAME \
--password $PASSWORD \
--table question \
--hive-import \
--hive-table qianfeng_ods.question \
--delete-target-dir \
--fields-terminated-by '\001' \
--num-mappers 1 \
--as-textfile
## 将数据导入试题难度表12
$SQOOP_HOME/bin/sqoop import \
--connect $JDBC_URL \
--username $USERNAME \
--password $PASSWORD \
--table question_ difficulty \
--hive-import \
--hive-table qianfeng_ods.question_ difficulty \
--delete-target-dir \
--fields-terminated-by '\001' \
--num-mappers 1 \
--as-textfile
## 将数据导入试题选项表13
$SQOOP_HOME/bin/sqoop import \
--connect $JDBC_URL \
--username $USERNAME \
--password $PASSWORD \
--table question_option \
--hive-import \
--hive-table qianfeng_ods.question_option \
--delete-target-dir \
--fields-terminated-by '\001' \
--num-mappers 1 \
--as-textfile
## 将数据导入试题类型表14
$SQOOP_HOME/bin/sqoop import \
--connect $JDBC_URL \
--username $USERNAME \
--password $PASSWORD \
--table question_type \
--hive-import \
--hive-table qianfeng_ods.question_type \
--delete-target-dir \
--fields-terminated-by '\001' \
--num-mappers 1 \
--as-textfile
dwb
init_qianfeng_dwb.hql
这个层主要是对ods层的数据进行封装,把后面需要的数据放在这一层
-- 创建dwb层:
CREATE DATABASE IF NOT EXISTS `qianfeng_dwb`;
USE `qianfeng_dwb`;
-- 试题类型
create table if not exists `qianfeng_dwb`.`dwb_question_type` (
`question_type_id` int,
`type` int,
`question_type_name` string,
`is_objective` int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
insert overwrite table `qianfeng_dwb`.`dwb_question_type`
select * from `qianfeng_ods`.`question_type`;
-- 试题难度
create table if not exists `qianfeng_dwb`.`dwb_question_difficulty` (
`question_difficulty_id` int,
`diffculty` int,
`question_difficulty_name` string
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
insert overwrite table `qianfeng_dwb`.`dwb_question_difficulty`
select * from `qianfeng_ods`.`question_difficulty`;
-- 试卷
create table if not exists `qianfeng_dwb`.`dwb_paper` (
`paper_id` int,
`exam_id` int,
`is_objective` int,
`is_subjective` int,
`creator_id` int,
`creator_name` string,
`creator_time` string
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
insert overwrite table `qianfeng_dwb`.`dwb_paper`
select * from `qianfeng_ods`.`paper`;
-- 试卷模板题型组成各难度题数
create table if not exists `qianfeng_dwb`.`dwb_paper_template_part_question_number` (
`paper_template_part_question_number_id` int,
`paper_template_part_id` int,
`question_difficulty_id` int,
`question_number` int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
insert overwrite table `qianfeng_dwb`.`dwb_paper_template_part_question_number`
select * from `qianfeng_ods`.`paper_template_part_question_number`;
-- 合并试题和试卷
insert overwrite table `qianfeng_dwb`.`dwb_question`
select
qoq.`id` `question_id` ,
qopq.`paper_id` ,
qoq.`category_id` ,
qoq.`question_type_id` ,
qoq.`question_difficulty_id` ,
qoq.`state` ,
qoq.`content` ,
qoq.`right_answer` ,
qoq.`analyse` ,
qoq.`del` ,
qoq.`modifier_id` ,
qoq.`modifier_name` ,
qoq.`modify_time` ,
qoq.`creator_id` ,
qoq.`creator_name` ,
qoq.`create_time`
from `qianfeng_ods`.`question` qoq
join `qianfeng_ods`.`paper_question` qopq
on qoq.`id` = qopq.`question_id`;
-- 考试
create table if not exists `qianfeng_dwb`.`dwb_exam` (
`exam_id` int,
`exam_name` string,
`paper_template_id` int,
`paper_template_name` string,
`category_id` int,
`category_name` string,
`stage` int,
`subject_id` int,
`subject_name` string,
`class_id` int,
`class_name` string,
`limit_minute` int,
`pass_score` int,
`total_mark` int,
`full_mark` int,
`volume_num` int,
`remark` string,
`state` int,
`del` int,
`start_time` string,
`modifier_id` int,
`modifier_name` string,
`modify_time` string,
`creator_id` int,
`creator_name` string,
`create_time` string,
`publish` int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
-- 合并考试-
insert overwrite table `qianfeng_dwb`.`dwb_exam`
select
ooe.`id` exam_id,
ooe.name `exam_name` ,
ooe.`paper_template_id` ,
oopt.name `paper_template_name` ,
ooptcr.`category_id` ,
ooc.name `category_name` ,
case when oopt.id>=117 and oopt.id<=122 then 1
when oopt.id>=123 and oopt.id<=128 then 2
when oopt.id>=129 and oopt.id<=134 then 3
else 4 end stage,
ooe.`subject_id` ,
ooe.`subject_name` ,
ooecr.`class_id` ,
ooecr.`class_name` ,
ooe.`limit_minute` ,
ooe.`pass_score` ,
oopt.`total_mark` ,
ooe.`full_mark` ,
ooe.`volume_num` ,
ooe.`remark` ,
ooe.`state` ,
ooe.`del` ,
ooe.`start_time` ,
ooe.`modifier_id` ,
ooe.`modifier_name` ,
ooe.`modify_time` ,
ooe.`creator_id` ,
ooe.`creator_name` ,
ooe.`create_time` ,
ooe.`publish`
from `qianfeng_ods`.`exam` ooe
join `qianfeng_ods`.`exam_class_ref` ooecr
on ooe.`id` = ooecr.`exam_id`
join `qianfeng_ods`.`paper_template` oopt
on ooe.`paper_template_id` = oopt.`id`
join `qianfeng_ods`.`paper_template_category_ref` ooptcr on oopt.`id` = ooptcr. `paper_template_id`
join `qianfeng_ods`.`category` ooc
on ooptcr.`category_id` = ooc.`id`;
init_qianfeng_dwb.sh
#!/bin/bash
echo "init_qianfeng_dwd.hql is running ..........................................."
hive -f /opt/apps/qianfeng_students_apps/sql/init_qianfeng_dwb.hql
echo "init_qianfeng_dwd.hql is finshed ..........................................."
~
dwd层
init_qianfeng_dwd.hql
这是一张主题表(就是把后期需要的共同数据放到这张表里,减少后期的表的join,代码主要参考网上内容的理解)
-- 创建dwd层:qianfeng_dwd
CREATE DATABASE IF NOT EXISTS `qianfeng_dwd`;
USE `qianfeng_dwd`;
-- 导jars
add jar /data/jars/parse1.jar;
create temporary function `parse1` as 'parseJSON';
-- 创建答题详情表
CREATE TABLE IF NOT EXISTS `qianfeng_dwd`.`answer_details` (
`exam_id` int,
`start_date` string,
`class_id` int,
`examinee_id` int,
`examinee_name` string,
`number` int,
`score` int,
`leixing` int,
`is_objective` int,
`nandu` string,
`per_question_mark` int,
`name` string,
`is_right` int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
-- 插入数据
insert into `qianfeng_dwd`.`answer_details`
select
s.exam_id,
s.start_date,
s.class_id,
s.examinee_id,
s.examinee_name,
s.number,
s.score,
cast(odsqt.type as int) as leixing,
odsqt.is_objective,
odsqd.question_difficulty_name as nandu,
odsptp.per_question_mark,
odse.exam_name as name,
(case when s.score=odsptp.per_question_mark then 1 ELSE 0 end) is_right
from
(
select
exam_id,
start_date,
class_id,
examinee_id,
examinee_name,
paper_id,
cast(split(numberscore,',')[0] as int) as number,
cast(split(numberscore,',')[1] as int) as score
from qianfeng_ods.answer_paper
lateral view explode(split(parse1(subject_answer_json),'\073')) t as numberscore
union
select
exam_id,
start_date,
class_id,
examinee_id,
examinee_name,
paper_id,
cast(split(numberscore,',')[0] as int) as number,
cast(split(numberscore,',')[1] as int) as score
from qianfeng_ods.answer_paper
lateral view explode(split(parse1(objective_answer_json),'\073')) t as numberscore
)as s
join
qianfeng_dwb.dwb_question as odsq
on s.number=odsq.question_id and s.paper_id = odsq.paper_id
join
qianfeng_dwb.dwb_question_type as odsqt
on odsq.question_type_id=odsqt.`type`
join
qianfeng_dwb.dwb_question_difficulty as odsqd
on odsq.question_difficulty_id=odsqd.question_difficulty_id
join
qianfeng_dwb.dwb_exam as odse
on odse.exam_id=s.exam_id
join
qianfeng_dwb.dwb_paper_template_part as odsptp
on odse.paper_template_id=odsptp.paper_template_id
and odsqt.type =odsptp.question_type_id;
init_qianfeng_dwd.sh
#!/bin/bash
echo "init_qianfeng_dwd.hql is running...................................."
hive -f /opt/apps/qianfeng_students_apps/sql/init_qianfeng_dwd.hql
echo "init_qianfeng_dwd.hql is finished........................................."
~
dws层
init_qianfeng_dws.hql(这层数据的表都是参考老师图上的表,然后求各个题型、各个阶段的正确率基础表和总难易度正确率基础表)
-- 创建dws层:qianfeng_dws
CREATE DATABASE IF NOT EXISTS `qianfeng_dws`;
USE `qianfeng_dws`;
-- 个人单次考试单选题各阶段总正确率基础表
CREATE TABLE IF NOT EXISTS `qianfeng_dws`.`choice_questions_correct` (
`exam_id` int,
`start_date` string,
`class_id` int,
`student_id` int,
`name` string,
`stage` string,
`wrongnum`int,
`allnum` int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
INSERT into qianfeng_dws.choice_questions_correct
select exam_id,start_date,class_id,examinee_id,examinee_name,name,
sum(case when is_right=0 then 1 else 0 end)as wrongnum,
count (1) as allnum
from(
select dwdad.exam_id,dwdad.start_date,dwdad.class_id,dwdad.examinee_id,dwdad.examinee_name,dwdad.name,
dwdad.is_right
from qianfeng_dwd.answer_details as dwdad
join qianfeng_ods.question_type as odsqt
on dwdad.leixing=odsqt.`type`
where odsqt.name="单选题"
)a
group by exam_id,start_date,class_id,examinee_id,examinee_name,name;
-- 个人单次考试单选题各阶段总难易度正确率基础表
CREATE TABLE IF NOT EXISTS `qianfeng_dws`.`choice_questions_difficulty` (
`exam_id` int,
`start_date` string,
`class_id` int,
`examinee_id` int,
`examinee_name` string,
`name` string,
`nandu` string,
`wrongnum`int,
`allnum` int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
INSERT into qianfeng_dws.choice_questions_difficulty
select exam_id,start_date,class_id,examinee_id,examinee_name,name,nandu,
sum(case when is_right=0 then 1 else 0 end)as wrongnum,
count (1) as allnum
from(
select dwdad.exam_id,dwdad.start_date,dwdad.class_id,dwdad.examinee_id,dwdad.examinee_name,dwdad.name,dwdad.nandu,
dwdad.is_right
from qianfeng_dwd.answer_details as dwdad
join qianfeng_ods.question_type as odsqt
on dwdad.leixing=odsqt.`type`
where odsqt.name="单选题"
)a
group by exam_id,start_date,class_id,examinee_id,examinee_name,name,nandu;
-- 多选题
CREATE TABLE IF NOT EXISTS `qianfeng_dws`.`more_choice_questions_correct` (
`exam_id` int,
`start_date` string,
`class_id` int,
`student_id` int,
`name` string,
`stage` string,
`wrongnum`int,
`allnum` int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
INSERT into qianfeng_dws.more_choice_questions_correct
select exam_id,start_date,class_id,examinee_id,examinee_name,name,
sum(case when is_right=0 then 1 else 0 end)as wrongnum,
count (1) as allnum
from(
select dwdad.exam_id,dwdad.start_date,dwdad.class_id,dwdad.examinee_id,dwdad.examinee_name,dwdad.name,
dwdad.is_right
from qianfeng_dwd.answer_details as dwdad
join qianfeng_ods.question_type as odsqt
on dwdad.leixing=odsqt.`type`
where odsqt.name="多选题"
)a
group by exam_id,start_date,class_id,examinee_id,examinee_name,name;
CREATE TABLE IF NOT EXISTS `qianfeng_dws`.`more_choice_questions_difficulty` (
`exam_id` int,
`start_date` string,
`class_id` int,
`examinee_id` int,
`examinee_name` string,
`name` string,
`nandu` string,
`wrongnum`int,
`allnum` int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
INSERT into qianfeng_dws.more_choice_questions_difficulty
select exam_id,start_date,class_id,examinee_id,examinee_name,name,nandu,
sum(case when is_right=0 then 1 else 0 end)as wrongnum,
count (1) as allnum
from(
select dwdad.exam_id,dwdad.start_date,dwdad.class_id,dwdad.examinee_id,dwdad.examinee_name,dwdad.name,dwdad.nandu,
dwdad.is_right
from qianfeng_dwd.answer_details as dwdad
join qianfeng_ods.question_type as odsqt
on dwdad.leixing=odsqt.`type`
where odsqt.name="多选题"
)a
group by exam_id,start_date,class_id,examinee_id,examinee_name,name,nandu;
-- 判断
CREATE TABLE IF NOT EXISTS `qianfeng_dws`.`judge_questions_correct` (
`exam_id` int,
`start_date` string,
`class_id` int,
`student_id` int,
`name` string,
`stage` string,
`wrongnum`int,
`allnum` int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
INSERT into qianfeng_dws.judge_questions_correct
select exam_id,start_date,class_id,examinee_id,examinee_name,name,
sum(case when is_right=0 then 1 else 0 end)as wrongnum,
count (1) as allnum
from(
select dwdad.exam_id,dwdad.start_date,dwdad.class_id,dwdad.examinee_id,dwdad.examinee_name,dwdad.name,
dwdad.is_right
from qianfeng_dwd.answer_details as dwdad
join qianfeng_ods.question_type as odsqt
on dwdad.leixing=odsqt.`type`
where odsqt.name="判断"
)a
group by exam_id,start_date,class_id,examinee_id,examinee_name,name;
CREATE TABLE IF NOT EXISTS `qianfeng_dws`.`judge_questions_difficulty` (
`exam_id` int,
`start_date` string,
`class_id` int,
`examinee_id` int,
`examinee_name` string,
`name` string,
`nandu` string,
`wrongnum`int,
`allnum` int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
INSERT into qianfeng_dws.judge_questions_difficulty
select exam_id,start_date,class_id,examinee_id,examinee_name,name,nandu,
sum(case when is_right=0 then 1 else 0 end)as wrongnum,
count (1) as allnum
from(
select dwdad.exam_id,dwdad.start_date,dwdad.class_id,dwdad.examinee_id,dwdad.examinee_name,dwdad.name,dwdad.nandu,
dwdad.is_right
from qianfeng_dwd.answer_details as dwdad
join qianfeng_ods.question_type as odsqt
on dwdad.leixing=odsqt.`type`
where odsqt.name="判断"
)a
group by exam_id,start_date,class_id,examinee_id,examinee_name,name,nandu;
-- 填空
CREATE TABLE IF NOT EXISTS `qianfeng_dws`.`blanks_questions_correct` (
`exam_id` int,
`start_date` string,
`class_id` int,
`student_id` int,
`name` string,
`stage` string,
`wrongnum`int,
`allnum` int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
INSERT into qianfeng_dws.blanks_questions_correct
select exam_id,start_date,class_id,examinee_id,examinee_name,name,
sum(case when is_right=0 then 1 else 0 end)as wrongnum,
count (1) as allnum
from(
select dwdad.exam_id,dwdad.start_date,dwdad.class_id,dwdad.examinee_id,dwdad.examinee_name,dwdad.name,
dwdad.is_right
from qianfeng_dwd.answer_details as dwdad
join qianfeng_ods.question_type as odsqt
on dwdad.leixing=odsqt.`type`
where odsqt.name="填空题"
)a
group by exam_id,start_date,class_id,examinee_id,examinee_name,name;
CREATE TABLE IF NOT EXISTS `qianfeng_dws`.`blanks_questions_difficulty` (
`exam_id` int,
`start_date` string,
`class_id` int,
`examinee_id` int,
`examinee_name` string,
`name` string,
`nandu` string,
`wrongnum`int,
`allnum` int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
INSERT into qianfeng_dws.blanks_questions_correct
select exam_id,start_date,class_id,examinee_id,examinee_name,name,
sum(case when is_right=0 then 1 else 0 end)as wrongnum,
count (1) as allnum
from(
select dwdad.exam_id,dwdad.start_date,dwdad.class_id,dwdad.examinee_id,dwdad.examinee_name,dwdad.name,
dwdad.is_right
from qianfeng_dwd.answer_details as dwdad
join qianfeng_ods.question_type as odsqt
on dwdad.leixing=odsqt.`type`
where odsqt.name="填空题"
)a
group by exam_id,start_date,class_id,examinee_id,examinee_name,name;
CREATE TABLE IF NOT EXISTS `qianfeng_dws`.`blanks_questions_difficulty` (
`exam_id` int,
`start_date` string,
`class_id` int,
`examinee_id` int,
`examinee_name` string,
`name` string,
`nandu` string,
`wrongnum`int,
`allnum` int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
INSERT into qianfeng_dws.blanks_questions_difficulty
select exam_id,start_date,class_id,examinee_id,examinee_name,name,nandu,
sum(case when is_right=0 then 1 else 0 end)as wrongnum,
count (1) as allnum
from(
select dwdad.exam_id,dwdad.start_date,dwdad.class_id,dwdad.examinee_id,dwdad.examinee_name,dwdad.name,dwdad.nandu,
dwdad.is_right
from qianfeng_dwd.answer_details as dwdad
join qianfeng_ods.question_type as odsqt
on dwdad.leixing=odsqt.`type`
where odsqt.name="填空题"
)a
group by exam_id,start_date,class_id,examinee_id,examinee_name,name,nandu;
-- 简答题
CREATE TABLE IF NOT EXISTS `qianfeng_dws`.`Brief_answer_questions_correct` (
`exam_id` int,
`start_date` string,
`class_id` int,
`student_id` int,
`name` string,
`stage` string,
`wrongnum`int,
`allnum` int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
INSERT into qianfeng_dws.Brief_answer_questions_correct
select exam_id,start_date,class_id,examinee_id,examinee_name,name,
sum(case when is_right=0 then 1 else 0 end)as wrongnum,
count (1) as allnum
from(
select dwdad.exam_id,dwdad.start_date,dwdad.class_id,dwdad.examinee_id,dwdad.examinee_name,dwdad.name,
dwdad.is_right
from qianfeng_dwd.answer_details as dwdad
join qianfeng_ods.question_type as odsqt
on dwdad.leixing=odsqt.`type`
where odsqt.name="简答题"
)a
group by exam_id,start_date,class_id,examinee_id,examinee_name,name;
CREATE TABLE IF NOT EXISTS `qianfeng_dws`.`Brief_answer_questions_difficulty` (
`exam_id` int,
`start_date` string,
`class_id` int,
`examinee_id` int,
`examinee_name` string,
`name` string,
`nandu` string,
`wrongnum`int,
`allnum` int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
INSERT into qianfeng_dws.Brief_answer_questions_difficulty
select exam_id,start_date,class_id,examinee_id,examinee_name,name,nandu,
sum(case when is_right=0 then 1 else 0 end)as wrongnum,
count (1) as allnum
from(
select dwdad.exam_id,dwdad.start_date,dwdad.class_id,dwdad.examinee_id,dwdad.examinee_name,dwdad.name,dwdad.nandu,
dwdad.is_right
from qianfeng_dwd.answer_details as dwdad
join qianfeng_ods.question_type as odsqt
on dwdad.leixing=odsqt.`type`
where odsqt.name="简答题"
)a
group by exam_id,start_date,class_id,examinee_id,examinee_name,name,nandu;
-- 编程
CREATE TABLE IF NOT EXISTS `qianfeng_dws`.`programming_questions_correct` (
`exam_id` int,
`start_date` string,
`class_id` int,
`student_id` int,
`name` string,
`stage` string,
`wrongnum`int,
`allnum` int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
INSERT into qianfeng_dws.programming_questions_correct
select exam_id,start_date,class_id,examinee_id,examinee_name,name,
sum(case when is_right=0 then 1 else 0 end)as wrongnum,
count (1) as allnum
from(
select dwdad.exam_id,dwdad.start_date,dwdad.class_id,dwdad.examinee_id,dwdad.examinee_name,dwdad.name,
dwdad.is_right
from qianfeng_dwd.answer_details as dwdad
join qianfeng_ods.question_type as odsqt
on dwdad.leixing=odsqt.`type`
where odsqt.name="编程"
)a
group by exam_id,start_date,class_id,examinee_id,examinee_name,name;
CREATE TABLE IF NOT EXISTS `qianfeng_dws`.`programming_questions_difficulty` (
`exam_id` int,
`start_date` string,
`class_id` int,
`examinee_id` int,
`examinee_name` string,
`name` string,
`nandu` string,
`wrongnum`int,
`allnum` int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
INSERT into qianfeng_dws.programming_questions_difficulty
select exam_id,start_date,class_id,examinee_id,examinee_name,name,nandu,
sum(case when is_right=0 then 1 else 0 end)as wrongnum,
count (1) as allnum
from(
select dwdad.exam_id,dwdad.start_date,dwdad.class_id,dwdad.examinee_id,dwdad.examinee_name,dwdad.name,dwdad.nandu,
dwdad.is_right
from qianfeng_dwd.answer_details as dwdad
join qianfeng_ods.question_type as odsqt
on dwdad.leixing=odsqt.`type`
where odsqt.name="编程"
)a
group by exam_id,start_date,class_id,examinee_id,examinee_name,name,nandu;
-- 为第二个指标做准备
CREATE TABLE IF NOT EXISTS `qianfeng_dws`.`all_summary_information` (
`class_id` int,
`examinee_id` int,
`examinee_name` string,
leixing string,
`wrongnum`int,
`allnum` int,
Accuracy decimal(18,2)
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
INSERT INTO `qianfeng_dws`.`all_summary_information`
select *,(allnum-wrongnum)*1.0/allnum as Accuracy
from(
select class_id,examinee_id,examinee_name,leixing,
sum(case when is_right=0 then 1 else 0 end)as wrongnum,
count (1) as allnum
from(
select dwdad.class_id,dwdad.examinee_id,dwdad.examinee_name,dwdad.leixing,odsqt.name,
dwdad.is_right
from qianfeng_dwd.answer_details as dwdad
join qianfeng_ods.question_type as odsqt
on dwdad.leixing=odsqt.type
)a
group by class_id,examinee_id,examinee_name,leixing
)b;
-- 单选题
CREATE TABLE IF NOT EXISTS `qianfeng_dws`.`choice_summary_information` (
`exam_id` int,
`start_date` string,
`class_id` int,
`examinee_id` int,
`examinee_name` string,
`wrongnum`int,
`allnum` int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
INSERT into qianfeng_dws.choice_summary_information
select exam_id,start_date,class_id,examinee_id,examinee_name,
sum(case when is_right=0 then 1 else 0 end)as wrongnum,
count (1) as allnum
from(
select dwdad.exam_id,dwdad.start_date,dwdad.class_id,dwdad.examinee_id,dwdad.examinee_name,
dwdad.is_right
from qianfeng_dwd.answer_details as dwdad
join qianfeng_ods.question_type as odsqt
on dwdad.leixing=odsqt.`type`
where odsqt.name="单选题"
)a
group by exam_id,start_date,class_id,examinee_id,examinee_name;
-- 多选题
CREATE TABLE IF NOT EXISTS `qianfeng_dws`.`more_choice_summary_information` (
`exam_id` int,
`start_date` string,
`class_id` int,
`examinee_id` int,
`examinee_name` string,
`wrongnum`int,
`allnum` int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
INSERT into qianfeng_dws.more_choice_summary_information
select exam_id,start_date,class_id,examinee_id,examinee_name,
sum(case when is_right=0 then 1 else 0 end)as wrongnum,
count (1) as allnum
from(
select dwdad.exam_id,dwdad.start_date,dwdad.class_id,dwdad.examinee_id,dwdad.examinee_name,
dwdad.is_right
from qianfeng_dwd.answer_details as dwdad
join qianfeng_ods.question_type as odsqt
on dwdad.leixing=odsqt.`type`
where odsqt.name="多选题"
)a
group by exam_id,start_date,class_id,examinee_id,examinee_name
;
-- 判断
CREATE TABLE IF NOT EXISTS `qianfeng_dws`.`judge_summary_information` (
`exam_id` int,
`start_date` string,
`class_id` int,
`examinee_id` int,
`examinee_name` string,
`wrongnum`int,
`allnum` int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
INSERT into qianfeng_dws.judge_summary_information
select exam_id,start_date,class_id,examinee_id,examinee_name,
sum(case when is_right=0 then 1 else 0 end)as wrongnum,
count (1) as allnum
from(
select dwdad.exam_id,dwdad.start_date,dwdad.class_id,dwdad.examinee_id,dwdad.examinee_name,
dwdad.is_right
from qianfeng_dwd.answer_details as dwdad
join qianfeng_ods.question_type as odsqt
on dwdad.leixing=odsqt.`type`
where odsqt.name="判断"
)a
group by exam_id,start_date,class_id,examinee_id,examinee_name
;
-- 填空题
CREATE TABLE IF NOT EXISTS `qianfeng_dws`.`blanks_summary_information` (
`exam_id` int,
`start_date` string,
`class_id` int,
`examinee_id` int,
`examinee_name` string,
`wrongnum`int,
`allnum` int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
INSERT into qianfeng_dws.blanks_summary_information
select exam_id,start_date,class_id,examinee_id,examinee_name,
sum(case when is_right=0 then 1 else 0 end)as wrongnum,
count (1) as allnum
from(
select dwdad.exam_id,dwdad.start_date,dwdad.class_id,dwdad.examinee_id,dwdad.examinee_name,
dwdad.is_right
from qianfeng_dwd.answer_details as dwdad
join qianfeng_ods.question_type as odsqt
on dwdad.leixing=odsqt.`type`
where odsqt.name="填空题"
)a
group by exam_id,start_date,class_id,examinee_id,examinee_name
;
-- 简答题
CREATE TABLE IF NOT EXISTS `qianfeng_dws`.`Brief_answer_summary_information` (
`exam_id` int,
`start_date` string,
`class_id` int,
`examinee_id` int,
`examinee_name` string,
`wrongnum`int,
`allnum` int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
INSERT into qianfeng_dws.Brief_answer_summary_information
select exam_id,start_date,class_id,examinee_id,examinee_name,
sum(case when is_right=0 then 1 else 0 end)as wrongnum,
count (1) as allnum
from(
select dwdad.exam_id,dwdad.start_date,dwdad.class_id,dwdad.examinee_id,dwdad.examinee_name,
dwdad.is_right
from qianfeng_dwd.answer_details as dwdad
join qianfeng_ods.question_type as odsqt
on dwdad.leixing=odsqt.`type`
where odsqt.name="简答题"
)a
group by exam_id,start_date,class_id,examinee_id,examinee_name
;
-- 编程
CREATE TABLE IF NOT EXISTS `qianfeng_dws`.`programming_summary_information` (
`exam_id` int,
`start_date` string,
`class_id` int,
`examinee_id` int,
`examinee_name` string,
`wrongnum`int,
`allnum` int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
INSERT into qianfeng_dws.programming_summary_information
select exam_id,start_date,class_id,examinee_id,examinee_name,
sum(case when is_right=0 then 1 else 0 end)as wrongnum,
count (1) as allnum
from(
select dwdad.exam_id,dwdad.start_date,dwdad.class_id,dwdad.examinee_id,dwdad.examinee_name,
dwdad.is_right
from qianfeng_dwd.answer_details as dwdad
join qianfeng_ods.question_type as odsqt
on dwdad.leixing=odsqt.`type`
where odsqt.name="编程"
)a
group by exam_id,start_date,class_id,examinee_id,examinee_name
;
init_qianfeng_dws.sh
#!/bin/bash
echo "init_qianfeng_dws.hql is running .................."
hive -f /opt/apps/qianfeng_students_apps/sql/init_qianfeng_dws.hql
echo "init_qianfeng_dws.hql is running .........................."
~
dm(这个存储的是查询的结果层)
init_qianfeng_dm.hql
-- 创建dm层:qianfeng_dm
CREATE DATABASE IF NOT EXISTS `qianfeng_dm`;
USE `qianfeng_dm`;
-- 个人单次考试各阶段总正确率表
CREATE database if not EXISTS qianfeng_dm;
CREATE TABLE IF NOT EXISTS `qianfeng_dm`.`test_stage_right` (
`exam_id` int,
`start_date` string,
`class_id` int,
`student_id` int,
`student_name` string,
`stage` string,
`is_right` decimal(18,2)
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
insert into qianfeng_dm.test_stage_right
select exam_id,START_date as start_date,
class_id,examinee_id as student_id,examinee_name as student_name ,name as stage,(allsum-falsenum)/allsum as is_right
from (
select exam_id,START_date,
class_id,examinee_id,examinee_name,name,
sum(case when is_right=0 then 1 else 0 end) as falsenum,COUNT(1) as allsum
from
qianfeng_dwd.answer_details
group by exam_id,START_date,
class_id,examinee_id,examinee_name,name
)a;
-- 个人单次考试各阶段难易度正确率表
CREATE TABLE IF NOT EXISTS `qianfeng_dm`.`test_stage_difficulty` (
`exam_id` int,
`start_date` string,
`class_id` int,
`student_id` int,
`student_name` string,
`stage` string,
`difficulty` String,
`is_right` decimal(18,2)
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
insert into qianfeng_dm.test_stage_difficulty
select exam_id, start_date,
class_id,student_id,student_name, stage, difficulty,(allsum-falsenum)/allsum as is_right
from(
select exam_id,START_date as start_date,
class_id,examinee_id as student_id,examinee_name as student_name,name as stage,nandu as difficulty,
sum(case when is_right=0 then 1 else 0 end) as falsenum,COUNT(1) as allsum
from
qianfeng_dwd.answer_details
group by exam_id,START_date,
class_id,examinee_id,examinee_name,name,nandu
)b ;
-- 用户模型表
CREATE TABLE IF NOT EXISTS `qianfeng_dm`.`student_ability_information` (
`class_id` int,
`examinee_id` int,
`examinee_name` string,
Expressive_ability decimal(18,2),
Understanding_ability decimal(18,2),
Programming_ability decimal(18,2)
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
INSERT into `qianfeng_dm`.`student_ability_information`
select
class_id,
examinee_id,examinee_name,
sum(case when leixing =5 then accuracy else 0 end) as Expressive_ability,
sum(case when leixing =1 then accuracy*0.3
when leixing =2 then accuracy*0.5
when leixing =3 then accuracy*0.2 else 0 end)as Understanding_ability,
sum(case when leixing =6 then accuracy else 0 end)as Programming_ability
from
qianfeng_dws.all_summary_information
group by class_id,examinee_id,examinee_name;
CREATE database if not EXISTS qianfeng_dm;
CREATE TABLE IF NOT EXISTS `qianfeng_dm`.`test_stage_right` (
`exam_id` int,
`start_date` string,
`class_id` int,
`student_id` int,
`student_name` string,
`stage` string,
`is_right` decimal(18,2)
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
insert into qianfeng_dm.test_stage_right
select exam_id,START_date as start_date,
class_id,examinee_id as student_id,examinee_name as student_name ,name as stage,(allsum-falsenum)/allsum as is_right
from (
select exam_id,START_date,
class_id,examinee_id,examinee_name,name,
sum(case when is_right=0 then 1 else 0 end) as falsenum,COUNT(1) as allsum
from
qianfeng_dwd.answer_details
group by exam_id,START_date,
class_id,examinee_id,examinee_name,name
)a;
CREATE TABLE IF NOT EXISTS `qianfeng_dm`.`test_stage_difficulty` (
`exam_id` int,
`start_date` string,
`class_id` int,
`student_id` int,
`student_name` string,
`stage` string,
`difficulty` String,
`is_right` decimal(18,2)
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
insert into qianfeng_dm.test_stage_difficulty
select exam_id, start_date,
class_id,student_id,student_name, stage, difficulty,(allsum-falsenum)/allsum as is_right
from(
select exam_id,START_date as start_date,
class_id,examinee_id as student_id,examinee_name as student_name,name as stage,nandu as difficulty,
sum(case when is_right=0 then 1 else 0 end) as falsenum,COUNT(1) as allsum
from
qianfeng_dwd.answer_details
group by exam_id,START_date,
class_id,examinee_id,examinee_name,name,nandu
)b ;
CREATE TABLE IF NOT EXISTS `qianfeng_dm`.`student_ability_information` (
`class_id` int,
`examinee_id` int,
`examinee_name` string,
Expressive_ability decimal(18,2),
Understanding_ability decimal(18,2),
Programming_ability decimal(18,2)
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001';
INSERT into `qianfeng_dm`.`student_ability_information`
select
class_id,
examinee_id,examinee_name,
sum(case when leixing =5 then accuracy else 0 end) as Expressive_ability,
sum(case when leixing =1 then accuracy*0.3
when leixing =2 then accuracy*0.5
when leixing =3 then accuracy*0.2 else 0 end)as Understanding_ability,
sum(case when leixing =6 then accuracy else 0 end)as Programming_ability
from
qianfeng_dws.all_summary_information
group by class_id,examinee_id,examinee_name;
init_qianfeng_dm.sh
#!/bin/bash
echo "init_qianfeng_dm.hql is running ..........................................."
hive -f /opt/apps/qianfeng_students_apps/sql/init_qianfeng_dm.hql
echo "init_qianfeng_dm.hql is finshed ..........................................."
~
数据导出层
#!/bin/bash
SQOOP_HOME=/opt/apps/sqoop-1.4.7
JDBC_URL=jdbc:mysql://172.16.53.10:3306/qianfeng_result?characterEncoding=utf-8
USERNAME=root
PASSWORD=123456
echo "数据导出正在开始.........................."
echo "第一张表正在导出.........................."
$SQOOP_HOME/bin/sqoop export \
--connect $JDBC_URL \
--username $USERNAME \
--password $PASSWORD \
--table single_test_sum_right \
--export-dir /user/hive/warehouse/qianfeng_dm.db/test_stage_right \
--columns exam_id,start_date,class_id,student_id,student_name,stage,is_right \
--input-fields-terminated-by '\001' \
--num-mappers 1
echo "第一张表导出成功.........................."
echo "第二张表正在导出.........................."
$SQOOP_HOME/bin/sqoop export \
--connect $JDBC_URL \
--username $USERNAME \
--password $PASSWORD \
--table single_test_difficulty_right \
--export-dir /user/hive/warehouse/qianfeng_dm.db/test_stage_difficulty \
--columns exam_id,start_date,class_id,student_id,student_name,stage,difficulty,is_right \
--input-fields-terminated-by '\001' \
--num-mappers 1
echo "第二张表导出成功.........................."
echo "第三张表正在导出.........................."
$SQOOP_HOME/bin/sqoop export \
--connect $JDBC_URL \
--username $USERNAME \
--password $PASSWORD \
--table user_model \
--export-dir /user/hive/warehouse/qianfeng_dm.db/student_ability_information \
--columns class_id,examinee_id,examinee_name,expressive_ability,understanding_ability,programming_ability \
--input-fields-terminated-by '\001' \
--num-mappers 1
echo "第三张数据表导出成功............................"
#!/bin/bash
SQOOP_HOME=/opt/apps/sqoop-1.4.7
JDBC_URL=jdbc:mysql://172.16.53.10:3306/qianfeng_result?characterEncoding=utf-8
USERNAME=root
PASSWORD=123456
echo "数据导出正在开始.........................."
echo "第四张表正在导出.........................."
$SQOOP_HOME/bin/sqoop export \
--connect $JDBC_URL \
--username $USERNAME \
--password $PASSWORD \
--table single_test_sum_right \
--export-dir /user/hive/warehouse/qianfeng_dm.db/test_stage_right \
--columns exam_id,start_date,class_id,student_id,student_name,stage,is_right \
--input-fields-terminated-by '\001' \
--num-mappers 1
echo "第四张表导出成功.........................."
echo "第五张表正在导出.........................."
$SQOOP_HOME/bin/sqoop export \
--connect $JDBC_URL \
--username $USERNAME \
--password $PASSWORD \
--table single_test_difficulty_right \
--export-dir /user/hive/warehouse/qianfeng_dm.db/test_stage_difficulty \
--columns exam_id,start_date,class_id,student_id,student_name,stage,difficulty,is_right \
--input-fields-terminated-by '\001' \
--num-mappers 1
echo "第五张表导出成功.........................."
echo "第六张表正在导出.........................."
$SQOOP_HOME/bin/sqoop export \
--connect $JDBC_URL \
--username $USERNAME \
--password $PASSWORD \
--table user_model \
--export-dir /user/hive/warehouse/qianfeng_dm.db/student_ability_information \
--columns class_id,examinee_id,examinee_name,expressive_ability,understanding_ability,programming_ability \
--input-fields-terminated-by '\001' \
--num-mappers 1
echo "第六张数据表导出成功............................"
自定义udf函数
import com.alibaba.fastjson.JSONObject;
import org.apache.hadoop.hive.ql.exec.UDF;
import java.util.Iterator;
import java.util.Map;
public class parseJSON extends UDF {
public String evaluate(String json){
if (json.equals("{}")){
return "";
}
//将json字符串转换为json对象
String result="";
JSONObject jsonObject = JSONObject.parseObject(json);
Map map = (Map) jsonObject;
Iterator iterator = map.entrySet().iterator();
while (iterator.hasNext()){
Map.Entry entry = (Map.Entry) iterator.next();
JSONObject map1 = JSONObject.parseObject(entry.getValue().toString());
String score="";
if (map1.get("score").toString().equals("")){
score="0";
}else {
score = map1.get("score").toString();
}
result=result+entry.getKey()+","+score+";";
}
return result.substring(0,result.length()-1);
}
/* public Map<String, String> evaluate(String str) throws JSONException {
Map<String, Object> map = (Map<String, Object>) JSONObject.parseObject(str);
Map<String, String> reMap = new HashMap<String, String>();
for (Object obj : map.keySet())
reMap.put(obj.toString(), ((JSONObject) map.get(obj)).get("score").toString());
return reMap;
}
*/
}
hive导入到mysql语句
create database if not exists qianfeng_result;
-- 建立第一张表
CREATE TABLE `qianfeng_result`.`single_test_sum_right` (
`exam_id` int(50),
`start_date` varchar(100),
`class_id` int(50),
`student_id` int(50),
`student_name` varchar(100),
`stage` varchar(100),
`is_right` decimal(18,2)
) ENGINE=InnoDB AUTO_INCREMENT=689 DEFAULT CHARSET=utf8;
-- 建立第二张表
CREATE TABLE `qianfeng_result`.`single_test_difficulty_right` (
`exam_id` int(50),
`start_date` varchar(100),
`class_id` int(50),
`student_id` int(50),
`student_name` varchar(100),
`stage` varchar(100),
`difficulty` varchar(100),
`is_right` decimal(18,2)
) ENGINE=InnoDB AUTO_INCREMENT=689 DEFAULT CHARSET=utf8;
-- 第三张表
CREATE TABLE `qianfeng_result`.`user_model` (
`class_id` int(50),
`examinee_id` int(50),
`examinee_name` varchar(100),
expressive_ability decimal(18,2),
understanding_ability decimal(18,2),
programming_ability decimal(18,2)
)
ENGINE=InnoDB AUTO_INCREMENT=689 DEFAULT CHARSET=utf8;