大数据平时工作使用总结整理
set mapred.max.split.size=100000000;
set mapred.min.split.size.per.node=100000000;
set mapred.min.split.size.per.rack=100000000;
set hive.exec.reducers.max=200;
set hive.exec.compress.output=false;
set hive.exec.compress.intermediate=true;
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nostrick;
set map red.queue.name=hive;
set map red.job.queue.name=hive;use dm_hrmis;
--处理换行数据信息
drop table if exists dm_hrmis.tmp_tm_hrmis_search_info;
CREATE TABLE dm_hrmis.tmp_tm_hrmis_search_info as
select info_id ,
emp_code ,
type_code ,
type_name ,
styp_code ,
styp_name ,
label_id ,
label_code ,
label_name ,
label_alias ,
invalid_start ,
invalid_end ,
invalid_status,
create_date ,
update_date ,
regexp_replace(regexp_replace(attr1,'\\\\n',''),'\\\\r','') attr1 ,
regexp_replace(regexp_replace(attr2,'\\\\n',''),'\\\\r','') attr2 ,
regexp_replace(regexp_replace(attr3,'\\\\n',''),'\\\\r','') attr3 ,
regexp_replace(regexp_replace(attr4,'\\\\n',''),'\\\\r','') attr4 ,
regexp_replace(regexp_replace(attr5,'\\\\n',''),'\\\\r','') attr5 ,
regexp_replace(regexp_replace(attr6,'\\\\n',''),'\\\\r','') attr6 ,
regexp_replace(regexp_replace(attr7,'\\\\n',''),'\\\\r','') attr7 ,
regexp_replace(regexp_replace(attr8,'\\\\n',''),'\\\\r','') attr8 ,
regexp_replace(regexp_replace(attr9,'\\\\n',''),'\\\\r','') attr9 ,
regexp_replace(regexp_replace(attr10,'\\\\n',''),'\\\\r','') attr10 ,
regexp_replace(regexp_replace(attr11,'\\\\n',''),'\\\\r','') attr11 ,
regexp_replace(regexp_replace(attr12,'\\\\n',''),'\\\\r','') attr12 ,
regexp_replace(regexp_replace(attr13,'\\\\n',''),'\\\\r','') attr13 ,
regexp_replace(regexp_replace(attr14,'\\\\n',''),'\\\\r','') attr14 ,
regexp_replace(regexp_replace(attr15,'\\\\n',''),'\\\\r','') attr15 ,
regexp_replace(regexp_replace(attr16,'\\\\n',''),'\\\\r','') attr16 ,
regexp_replace(regexp_replace(attr17,'\\\\n',''),'\\\\r','') attr17 ,
regexp_replace(regexp_replace(attr18,'\\\\n',''),'\\\\r','') attr18 ,
regexp_replace(regexp_replace(attr19,'\\\\n',''),'\\\\r','') attr19 ,
regexp_replace(regexp_replace(attr20,'\\\\n',''),'\\\\r','') attr20 ,
description ,
data_source ,
is_label from ods_hrss.tm_hrmis_search_info where inc_day='${yyyymmdd}';drop table if exists dm_hrmis.tm_hrmis_user_full_info;
CREATE TABLE dm_hrmis.tm_hrmis_user_full_info(
--员工基本信息
USER_ID STRING comment '用户ID',
EMP_CODE STRING comment '员工工号',
EMP_NAME STRING comment '员工姓名',
MAIN_EMP_CODE INT comment '员工工号排序去掉00',
STATUS INT comment '员工状态:1:有效 0:无效',
ORG_ID STRING comment '组织id',
ORG_NAME STRING comment '组织名称',
ORG_PATH STRING comment '组织全路径',
POS_RANK STRING comment '职级',
MAIN_SEQ_ID STRING comment '主序列id',
MAIN_SEQ_NAME STRING comment '主序列名称',
SUB_SEQ_ID STRING comment '子序列id',
SUB_SEQ_NAME STRING comment '子序列名称',
POSITION_ID STRING comment '职位id',
POSITION_NAME STRING comment '职位名称',
POST_ID STRING comment '岗id',
POST_NAME STRING comment '岗名称',
JOB_ID STRING comment '岗位id',
JOB_NAME STRING comment '岗位名称',
SEX STRING comment '性别:M:男 F:女',
POS_ATTR STRING comment '职位属性:一,二,三线',
EDUCATION STRING comment '最高学历编码',
POST_AGE DOUBLE comment '岗龄',
ENTRY_AGE DOUBLE comment '司龄',
POS_FUNCTION STRING comment '职能(人事子范围)',
PICTURE STRING comment '照片',
WORK_AREA STRING comment '工作地点',
EMAIL STRING comment '邮箱',
IN_TIME STRING comment '入职日期',
OUT_TIME STRING comment '离职日期',
BIRTH_DATE STRING comment '出生日期',
CUR_RANK_START STRING comment '当前职级开始时间',
CUR_POST_START STRING comment '当前岗位开始时间',
CUR_SEQ_START STRING comment '当前子序列开始时间',
CREATE_DT STRING comment '创建时间',
UPDATE_DT STRING comment '更新时间',
ORG_SHORT_NAME STRING comment '组织短名称',
POS_FUNCTION_NAME STRING comment '职能名称',
EDUCATION_TXT STRING comment '最高学历文本',
AGE INT comment '年龄',
RCNT_1YEAR_DMARKS DOUBLE comment '近一年行政扣分:直接责任扣分/管理责任扣分/诚信类扣分',
SUPER_EMP_CODE STRING comment '上级工号',
SUPER_EMP_NAME STRING comment '上级姓名',
MAIN_POS_RANK STRING comment '主职级(如:M4.1,存储4.1)',
SUPERIOR_PATH STRING comment '按上下级关系整理的一个工号路径',
--标签信息,汇总时只同步当前有效的数据,过滤已过期等无效的数据
QLFCTN_CODE ARRAY<STRING> comment '任职资格编码(等级编码+"-"+类型编码)',
QLFCTN_NAME ARRAY<STRING> comment '任职资格名称(等级文本+"-"+类型文本+"-任职资格")',
UNDGR_TYPE_CODE STRING comment '大学生类型编码(年度+"-"+类型编码,如:2015-1)',
UNDGR_TYPE_NAME STRING comment '大学生类型名称(年度+"-"+类型,如:2015-优才)',
HIGH_POT_FALG STRING comment '高潜标识(HIGH_POT)',
HIGH_POT ARRAY<STRING> comment '高潜(yyyy高潜)',
RESERVE_FLAG STRING comment '储备标识(RESERVE)',
RESERVE_CODE ARRAY<STRING> comment '后备信息类型',
RESERVE_NAME ARRAY<STRING> comment '后备信息名称(后备经理)',
ABIL_ADV_FLAG STRING comment '能力优势标识(ABIL_ADV)',
ABIL_ADV_CODE ARRAY<STRING> comment '能力优势编码',
ABIL_ADV_LABEL ARRAY<STRING> comment '能力优势名称',
ABIL_OTHER ARRAY<STRING> comment '其他能力优势',
PROF_SKILL_INFO ARRAY<STRUCT<
BEGDA:BIGINT, --开始时间
ENDDA:BIGINT, --结束日期
ZHRZXL:STRING, --子序列ID
ZHRJNLB:STRING, --专业技能类别
ZHRJNMC:STRING, --专业技能名称
ZHRJTCD:STRING, --精通程度
ZHRZMR:STRING, --在司证明人
ZHRDH:STRING --证明人电话
>>
comment '专业技能信息',
TRANS_ORG_CODE STRING comment '跨组织经验编码',
TRANS_ORG_LABEL STRING comment '跨组织经验名称',
TRANS_POS_FLAG STRING comment '跨职能经验标识(TRANS_POS)',
TRANS_POS_CODE ARRAY<STRING> comment '跨职能经验编码',
TRANS_POS_LABEL ARRAY<STRING> comment '跨职能经验名称',
TEAM_MGMT_CODE STRING comment '团队管理经验标识',
TEAM_MGMT_LABEL STRING comment '团队管理经验名称',
INDS_BENM_CODE STRING comment '行业标杆经验标识',
INDS_BENM_LABEL STRING comment '行业标杆经验名称',
PROJ_EXP_FLAG STRING comment '项目经验标识(PROJ_EXP)',
PROJ_EXP_LABEL ARRAY<STRUCT<
PROJ_SDT:BIGINT,--项目开始时间
PROJ_EDT:BIGINT,--项目结束日期
PROJ_TYPE_CODE:STRING,--项目类别编码
PROJ_TYPE:STRING,--项目类别
PROJ_NAME:STRING,--项目名称
PROJ_PCODE:STRING,--参与状态编码
PROJ_PTEXT:STRING,--参与状态文本
PROJ_ROLE:STRING--担任角色
>>
comment '项目经验名称',
--格式 类别编码:“类别编码”+“#”类别名称:“类别名称”+“#”项目名称:“项目名称”+“#”开始日期:+“开始日期”+“#”+结束日期:+“结束日期”+...+担任角色:+“担任角色”
--PROJ_EXP_DESC STRING comment '项目经验描述,多项之间用“||”分隔(如:类别编码:26#类别名称:人力资源#项目名称:ORACLE人事系统#开始日期:20070501#结束日期:20080131#参与状态编码:2#参与状态文本:全职#担任角色:项目经理)', --
DEV_DIR_CODE ARRAY<STRING> comment '发展方向编码,年度加编码(2016-ZYYJX)只汇总当前有效的数据',
DEV_DIR_LABEL ARRAY<STRING> comment '发展方向名称,年度加名称(2016-专业研究型)',
--RCNT_1YEAR_DMARKS DOUBLE, --近一年
FOR_LANG_CODE ARRAY<STRING> comment '外语能力编码(语种编码+级别编码)',
FOR_LANG_LABEL ARRAY<STRING> comment '外语能力文本(语种文本+级别文本)',
PERFOR_CODE ARRAY<STRING> comment '年度绩效编码(2015A1)',
PERFOR_LABEL ARRAY<STRING> comment '年度绩效编码(2015A1)',
PERFOR_ADV ARRAY<STRING> comment '年度绩效评价优势(2015+评价信息)',
PERFOR_DISADV ARRAY<STRING> comment '年度绩效评价劣势(2015+评价信息)',
TLNTS_CHECK_Q ARRAY<STRING> comment '人才盘点Q值(年度加Q值)',
TLNTS_CHECK_PLAN ARRAY<STRING> comment '人才盘点发展计划(年度+发展计划,如:2016随时可调任到新的岗位)',
TLNTS_CHECK_ADV ARRAY<STRING> comment '人才盘点评价优势',
TLNTS_CHECK_DISADV ARRAY<STRING> comment '人才盘点评价劣势',
--司外经历
EXTERNAL_EXP ARRAY<STRUCT<
BEGDA:BIGINT,--开始时间
ENDDA:BIGINT,--结束时间
SUBTY:STRING,--子信息类型
SPRPS:STRING,--人力资源主数据记录的锁定标志
SEQNR:STRING,--相同关键值信息类型记录的编号
ZHRGZJLLX:STRING,--工作经历类型
ZHRDW:STRING,--工作单位
LAND1:STRING,--国家
BRANC:STRING,--行业
ZHRBM:STRING,--部门
ZHRZW:STRING,--职位
ZHRLZYY:STRING,--离职原因
ZHRZMR:STRING,--证明人
ZHRZMRLXDH:STRING,--证明人联系电话
ZHRZWXLT:STRING,--岗位序列
ZHRZWSX:STRING,--岗位属性
ZHRSFLG:STRING,--是否轮岗
ZHRLGDQSJ:BIGINT,--轮岗到期日期
ZHRSFNP:STRING,--是否内聘
ZHRBDLX:STRING,--变动类型
ZHRFPLB:STRING,--分配类别
ZHRYGZT:STRING,--员工状态
ZHRJB:STRING,--级别
ZHRGZDT:STRING,--工作地
ZHRSFZZCF:STRING,--是否因组织机构拆分引起
DESCRIPTION:STRING--描述
>> comment '司外经历',
--教育信息
EDUCATIONAL_INFO ARRAY<STRUCT<
BEGDA:BIGINT,--开始时间
ENDDA:BIGINT,--结束时间
SUBTY:STRING,--子信息类型
OBJPS:STRING,--对象标识
SPRPS:STRING,--人力资源主数据记录的锁定标志
SEQNR:STRING,--相同关键值信息类型记录的编号
SLART:STRING,--学历
INSTI:STRING,--学校名称
SLAND:STRING,--国家
ZHRXKML:STRING,--学科门类
ZHRZYL:STRING,--专业类
ZHRZY:STRING,--专业
ZHRZYT:STRING,--其他专业
ANZKL:STRING,--学制
ZDEHRXXFS:STRING,--学习方式
ZDEHRSFZGXL:STRING,--最高学历标识
ZDEHRSFZGXW:STRING,--最高学位标识
SLABS:STRING,--学位
ZHRBYZT:STRING,--毕业状态
ZHRYXLB:STRING,--院校类别
ZHRZSBH:STRING,--证书编号
ZHRBZ:STRING--备注
>> comment '教育经历信息',
CERTI_INFO ARRAY<STRUCT<
BEGDA:BIGINT,--开始时间
ENDDA:BIGINT,--结束日期
SUBTY:STRING,--子信息类型
OBJPS:STRING,--对象标识
SPRPS:STRING,--人力资源主数据记录的锁定标志
SEQNR:STRING,--相同关键值信息类型记录的编号
ZHRZJLX:STRING,--证书类型
ZHRZGZSMC:STRING,--资格证书名称
ZHRZSJB:STRING,--证书类别
ZHRZSPDDW:STRING,--证书评定单位
ZHRZSJB1:STRING,--证书级别
ZHRZZHM:STRING--执照号码
>>
comment '技能证书信息',
POS_EXP_INFO ARRAY<STRING> comment '岗位经验',
PERSON_LABEL STRING comment '个人标签,用于展示在搜索结果页面',
INC_DAY STRING comment '数据同步日期'
) stored as parquet;
-- 汇总人才池信息
drop table if exists dm_hrmis.tmp_emp_talent_pool_info;
create table dm_hrmis.tmp_emp_talent_pool_info stored as parquet as
select u.emp_code emp_code,
o.path org_path,
t_undgr.undgr_code undgr_type_code,
t_undgr.undgr_label undgr_type_name,
t_potent.styp_code high_pot_falg,
t_potent.label high_pot,
t_reserve.styp_code reserve_flag,
t_reserve.label_code reserve_code,
t_reserve.label_name reserve_name
from (select emp_code, org_id
from ods_hrss.tm_hrmis_user
where status = 1
and nvl(out_time/1000, unix_timestamp()) >= unix_timestamp()
and inc_day = '${yyyymmdd}') u
left join (select distinct orgid, path
from ods_hrss.tm_org
where state = 1
and inc_day = '${yyyymmdd}') o on u.org_id = o.orgid
left join (select emp_code,
concat(attr2, '-UNIVERSITY0', attr3) undgr_code,
concat(attr2, '-', label_name) undgr_label
from dm_hrmis.tmp_tm_hrmis_search_info
where styp_code = 'UNDGR'
and unix_timestamp() >= invalid_start / 1000
and unix_timestamp() < invalid_end / 1000) t_undgr on u.emp_code =
t_undgr.emp_code
left join (select emp_code, styp_code, collect_set(label_alias) label
from dm_hrmis.tmp_tm_hrmis_search_info
where styp_code = 'HIGH_POT'
group by emp_code, styp_code) t_potent on u.emp_code =
t_potent.emp_code
left join (select emp_code,
styp_code,
collect_set(concat('RESERVE0', '', label_code)) label_code,
collect_set(label_name) label_name
from dm_hrmis.tmp_tm_hrmis_search_info
where styp_code = 'RESERVE'
and unix_timestamp() >= invalid_start / 1000
and unix_timestamp() < invalid_end / 1000
group by emp_code, styp_code) t_reserve on u.emp_code =
t_reserve.emp_code;
-- 汇总能力信息
drop table if exists dm_hrmis.tmp_emp_abil_info;
create table dm_hrmis.tmp_emp_abil_info stored as parquet as
select u.emp_code,
t_qlfctn.qlfctn_code,
t_qlfctn.qlfctn_name,
t_skill.skill prof_skill_info,
certi_info.certis certi_info,
t_abil.styp_code abil_adv_flag,
t_abil.abil_code abil_adv_code,
t_abil.abil_label abil_adv_label,
t_abil.other_abil abil_other
from (select emp_code
from ods_hrss.tm_hrmis_user
where status = 1
and nvl(out_time/1000, unix_timestamp()) >= unix_timestamp()
and inc_day = '${yyyymmdd}') u
left join (select emp_code,
collect_set(label_code) qlfctn_code,
collect_set(label_name) qlfctn_name
from dm_hrmis.tmp_tm_hrmis_search_info
where styp_code = 'QLFCTN'
and unix_timestamp() >= invalid_start / 1000
and unix_timestamp() < invalid_end / 1000
group by emp_code) t_qlfctn on u.emp_code = t_qlfctn.emp_code
left join (select k.pernr pernr,
collect_set(named_struct('begda',
k.begda,
'endda',
k.endda,
'zhrzxl',
k.zhrzxl,
'zhrjnlb',
k.zhrjnlb,
'zhrjnmc',
k.zhrjnmc,
'zhrjtcd',
k.zhrjtcd,
'zhrzmr',
k.zhrzmr,
'zhrdh',
k.zhrdh)) skill
from ods_hrss.tm_hrmis_prof_skill k
where k.inc_day = '${yyyymmdd}'
group by k.pernr) t_skill on u.emp_code = t_skill.pernr
left join (select ssc.pernr, collect_set(ssc.clms) certis
from (select sc.pernr,
named_struct('begda',
sc.begda,
'endda',
sc.endda,
'subty',
sc.subty,
'objps',
sc.objps,
'sprps',
sc.sprps,
'seqnr',
sc.seqnr,
'zhrzjlx',
sc.zhrzjlx,
'zhrzgzsmc',
sc.zhrzgzsmc,
'zhrzsjb',
sc.zhrzsjb,
'zhrzspddw',
sc.zhrzspddw,
'zhrzsjb1',
sc.zhrzsjb1,
'zhrzzhm',
sc.zhrzzhm) as clms
from ods_hrss.sys_sap_certi sc
where sc.inc_day = '${yyyymmdd}') ssc
group by ssc.pernr) certi_info on u.emp_code =
certi_info.pernr
left join (select emp_code,
styp_code,
collect_set(label_code) abil_code,
collect_set(label_name) abil_label,
collect_set(attr1) other_abil
from dm_hrmis.tmp_tm_hrmis_search_info
where styp_code = 'ABIL_ADV'
and unix_timestamp() >= invalid_start / 1000
and unix_timestamp() < invalid_end / 1000
group by emp_code, styp_code) t_abil on u.emp_code =
t_abil.emp_code;
-- 汇总经验信息
drop table if exists dm_hrmis.tmp_emp_exp_info;
create table dm_hrmis.tmp_emp_exp_info stored as parquet as
select u.emp_code,
t_trans_org.label_code trans_org_code,
t_trans_org.label_name trans_org_label,
t_trans_pos.styp_code trans_pos_flag,
t_trans_pos.pos_code trans_pos_code,
t_trans_pos.pos_label trans_pos_label,
t_team_mgmt.label_code team_mgmt_code,
t_team_mgmt.label_name team_mgmt_label,
t_inds_benm.label_code inds_benm_code,
t_inds_benm.label_name inds_benm_label,
t_pos_exp.pos_exp_info pos_exp_info
from (select emp_code
from ods_hrss.tm_hrmis_user
where status = 1
and nvl(out_time/1000, unix_timestamp()) >= unix_timestamp()
and inc_day = '${yyyymmdd}') u
left join (select distinct emp_code, label_code, label_name
from dm_hrmis.tmp_tm_hrmis_search_info
where styp_code = 'TRANS_ORG') t_trans_org on u.emp_code =
t_trans_org.emp_code
left join (select emp_code,
styp_code,
collect_set(attr1) pos_code,
collect_set(attr3) pos_label
from dm_hrmis.tmp_tm_hrmis_search_info
where styp_code = 'TRANS_POS'
group by emp_code, styp_code) t_trans_pos on u.emp_code =
t_trans_pos.emp_code
left join (select distinct emp_code, label_code, label_name
from dm_hrmis.tmp_tm_hrmis_search_info
where styp_code = 'TEAM_MGMT') t_team_mgmt on u.emp_code =
t_team_mgmt.emp_code
left join (select distinct emp_code, label_code, label_name
from dm_hrmis.tmp_tm_hrmis_search_info
where styp_code = 'INDS_BENM') t_inds_benm on u.emp_code =
t_inds_benm.emp_code
left join (select g.empcode, collect_set(g.constr1) pos_exp_info
from ods_hrss.tm_hrmis_es_gwqsj g
where g.inc_day = '${yyyymmdd}'
group by empcode) t_pos_exp on u.emp_code =
t_pos_exp.empcode;-- 汇总“司外经历”、“项目经验”、“绩效”、“盘点”等信息
drop table if exists dm_hrmis.tmp_emp_proj_check_info;
create table dm_hrmis.tmp_emp_proj_check_info stored as parquet as
select u.emp_code,
b.infos external_exp,
t_proj_exp.styp_code proj_exp_flag,
t_proj_exp.exps proj_exp_label,
t_perfor.code perfor_code,
t_perfor.name perfor_label,
t_perfor.advs perfor_adv,
t_perfor.disadvs perfor_disadv,
t_check.name tlnts_check_q,
t_check.dirs tlnts_check_plan,
t_check.advs tlnts_check_adv,
t_check.disadvs tlnts_check_disadv
from (select emp_code
from ods_hrss.tm_hrmis_user
where status = 1
and nvl(out_time/1000, unix_timestamp()) >= unix_timestamp()
and inc_day = '${yyyymmdd}') u
left join (select t.pernr, collect_set(clms) infos
from (select p.pernr,
named_struct('begda',
p.begda,
'endda',
p.endda,
'subty',
p.subty,
'sprps',
p.sprps,
'seqnr',
p.seqnr,
'zhrgzjllx',
p.zhrgzjllx,
'zhrdw',
p.zhrdw,
'land1',
p.land1,
'branc',
p.branc,
'zhrbm',
p.zhrbm,
'zhrzw',
p.zhrzw,
'zhrlzyy',
p.zhrlzyy,
'zhrzmr',
p.zhrzmr,
'zhrzmrlxdh',
p.zhrzmrlxdh,
'zhrzwxlt',
p.zhrzwxlt,
'zhrzwsx',
p.zhrzwsx,
'zhrsflg',
p.zhrsflg,
'zhrlgdqsj',
p.zhrlgdqsj,
'zhrsfnp',
p.zhrsfnp,
'zhrbdlx',
p.zhrbdlx,
'zhrfplb',
p.zhrfplb,
'zhrygzt',
p.zhrygzt,
'zhrjb',
p.zhrjb,
'zhrgzdt',
p.zhrgzdt,
'zhrsfzzcf',
p.zhrsfzzcf,
'description',
p.description) as clms
from ods_hrss.tm_hrmis_external_exp p
where p.inc_day = '${yyyymmdd}') t
group by t.pernr) b on u.emp_code = b.pernr
left join (select s.emp_code,
s.styp_code,
collect_set(named_struct('proj_sdt',
s.invalid_start,
'proj_edt',
s.invalid_end,
'proj_type_code',
s.attr2,
'proj_type',
s.attr3,
'proj_name',
attr1,
'proj_pcode',
attr4,
'proj_ptext',
attr5,
'proj_role',
attr6)) exps
from dm_hrmis.tmp_tm_hrmis_search_info s
where s.styp_code = 'PROJ_EXP'
group by s.emp_code, s.styp_code) t_proj_exp on u.emp_code =
t_proj_exp.emp_code
left join (select t.emp_code,
collect_set(label_code) code,
collect_set(label_name) name,
collect_set(adv) advs,
collect_set(disadv) disadvs
from (select emp_code,
concat(attr2, '-', label_code) label_code,
concat(attr2, '-', label_name) label_name,
concat(attr2, '-', attr11) adv,
concat(attr2, '-', attr12) disadv
from dm_hrmis.tmp_tm_hrmis_search_info
where styp_code = 'PERFOR'
) t
group by t.emp_code) t_perfor on u.emp_code =
t_perfor.emp_code
left join (select t.emp_code,
collect_set(label_name) name,
collect_set(dir) dirs,
collect_set(adv) advs,
collect_set(disadv) disadvs
from (select emp_code,
concat(attr2, '-', label_name) label_name,
concat(attr2, '-', attr3) dir,
concat(attr2, '-', attr11) adv,
concat(attr2, '-', attr12) disadv
from dm_hrmis.tmp_tm_hrmis_search_info
where styp_code = 'TLNTS_CHECK'
) t
group by t.emp_code) t_check on u.emp_code = t_check.emp_code;-- 汇总“发展方向”、“人才标签” 、“教育信息”、“外语能力”等信息
drop table if exists dm_hrmis.tmp_emp_dir_label_info;
create table dm_hrmis.tmp_emp_dir_label_info stored as parquet as
select u.emp_code,
t_dev_dir.code dev_dir_code,
t_dev_dir.name dev_dir_label,
pl.emp_label person_label,
edu_info.infos educational_info,
t_for_lang.code for_lang_code,
t_for_lang.name for_lang_label
from (select emp_code
from ods_hrss.tm_hrmis_user
where status = 1
and nvl(out_time/1000, unix_timestamp()) >= unix_timestamp()
and inc_day = '${yyyymmdd}') u
left join (select t.emp_code,
collect_set(label_code) code,
collect_set(label_name) name
from (select emp_code,
concat(attr2, '-', label_code) label_code,
concat(attr2, '-', label_name) label_name
from dm_hrmis.tmp_tm_hrmis_search_info
where styp_code = 'DEV_DIR') t
group by t.emp_code) t_dev_dir on u.emp_code =
t_dev_dir.emp_code
left join (select el.emp_code emp_code,
concat_ws(',', collect_set(el.label_alias)) emp_label
from (select distinct m.serial_num, n.emp_code, n.label_alias
from ods_hrss.tm_hrmis_user_label_config m,
(select a.emp_code, a.type_code, a.label_alias
from dm_hrmis.tmp_tm_hrmis_search_info a
where a.invalid_start / 1000 <= unix_timestamp()
and a.invalid_end / 1000 >= unix_timestamp()
and a.is_label = 1
and a.label_code <> 'OTHER'
union all
select b.emp_code, b.type_code, b.label_alias
from dm_hrmis.tmp_tm_hrmis_search_info b
where b.is_label = 1
and b.label_code <> 'OTHER'
and b.styp_code in
('TEAM_MGMT', 'INDS_BENM', 'PROJ_EXP',
'TRANS_POS', 'TRANS_ORG')) n
where m.type_code = 'SEARCH_LABEL_TYPE'
and n.type_code = m.code
order by m.serial_num) el
group by el.emp_code) pl on u.emp_code = pl.emp_code
left join (select thei.pernr, collect_set(thei.clms) infos
from (select ei.pernr,
named_struct('begda',
ei.begda,
'endda',
ei.endda,
'subty',
ei.subty,
'objps',
ei.objps,
'sprps',
ei.sprps,
'seqnr',
ei.seqnr,
'slart',
ei.slart,
'insti',
ei.insti,
'sland',
ei.sland,
'zhrxkml',
ei.zhrxkml,
'zhrzyl',
ei.zhrzyl,
'zhrzy',
ei.zhrzy,
'zhrzyt',
ei.zhrzyt,
'anzkl',
ei.anzkl,
'zdehrxxfs',
ei.zdehrxxfs,
'zdehrsfzgxl',
ei.zdehrsfzgxl,
'zdehrsfzgxw',
ei.zdehrsfzgxw,
'slabs',
ei.slabs,
'zhrbyzt',
ei.zhrbyzt,
'zhryxlb',
ei.zhryxlb,
'zhrzsbh',
ei.zhrzsbh,
'zhrbz',
ei.zhrbz) as clms
from ods_hrss.tm_hrmis_educational_info ei
where ei.inc_day = '${yyyymmdd}') thei
group by thei.pernr) edu_info on u.emp_code = edu_info.pernr
left join (select emp_code,
collect_set(label_code) code,
collect_set(label_name) name
from dm_hrmis.tmp_tm_hrmis_search_info
where styp_code = 'FOR_LANG'
group by emp_code) t_for_lang on u.emp_code =
t_for_lang.emp_code;
-- 将上述5个临时表的信息汇总到一个临时表:员工附属信息表
drop table if exists dm_hrmis.tmp_emp_satellite_info;
create table dm_hrmis.tmp_emp_satellite_info stored as parquet as
select a.emp_code, --工号
a.org_path, --员工所在组织的组织全路径
a.high_pot_falg, --高潜标识
a.high_pot, --高潜
a.reserve_flag, --后备信息标识
a.reserve_code, --后备信息编码
a.reserve_name, --后备信息名称
a.undgr_type_code,--大学生类型编码
a.undgr_type_name,--大学生类型
b.qlfctn_code, --任职资格编码
b.qlfctn_name, --任职资格名称
b.abil_adv_flag, --能力优势标识
b.abil_adv_code, --能力优势编码
b.abil_adv_label, --能力优势文本
b.abil_other, --能力优势其他信息
b.certi_info, --证书信息
b.prof_skill_info,--技能信息
c.inds_benm_code, --行业标杆经验编码
c.inds_benm_label,--行业标杆经验
c.team_mgmt_code, --团队管理经验编码
c.team_mgmt_label,--团队管理经验
c.trans_org_code, --跨组织经验编码
c.trans_org_label,--跨组织经验
c.trans_pos_flag, --跨职能经验标识
c.trans_pos_code, --跨职能经验编码
c.trans_pos_label,--跨组织经验文本
c.pos_exp_info, --岗位经验信息
d.dev_dir_code, --发展方向编码
d.dev_dir_label, --发展方向名称
d.educational_info,--教育信息
d.for_lang_code, --外语能力编码
d.for_lang_label, --外语能力文本
d.person_label, --个人标签信息
e.perfor_code, --个人绩效编码
e.perfor_label, --个人绩效文本
e.perfor_adv, --绩效评价信息优势
e.perfor_disadv, --绩效评价信息不足
e.external_exp, --外部工作经历信息
e.proj_exp_flag, --项目经验标识
e.proj_exp_label, --项目经验信息
e.tlnts_check_q, --盘点结果
e.tlnts_check_plan,--盘点发展计划
e.tlnts_check_adv, --盘点评价信息优势
e.tlnts_check_disadv --盘点评价信息不足
from dm_hrmis.tmp_emp_talent_pool_info a,
dm_hrmis.tmp_emp_abil_info b,
dm_hrmis.tmp_emp_exp_info c,
dm_hrmis.tmp_emp_dir_label_info d,
dm_hrmis.tmp_emp_proj_check_info e
where a.emp_code = b.emp_code
and a.emp_code = c.emp_code
and a.emp_code = d.emp_code
and a.emp_code = e.emp_code;
--员工基础信息表与员工附属信息临时表合成大宽表
insert overwrite table dm_hrmis.tm_hrmis_user_full_info
select u.user_id,
u.emp_code,
u.emp_name,
u.emp_code main_emp_code,
u.status,
u.org_id,
u.org_name,
s.org_path,
u.pos_rank,
u.main_seq_id,
u.main_seq_name,
u.sub_seq_id,
u.sub_seq_name,
u.position_id,
u.position_name,
u.post_id,
u.post_name,
u.job_id,
u.job_name,
u.sex,
u.pos_attr,
u.education,
u.post_age,
u.entry_age,
u.pos_function,
u.picture,
u.work_area,
u.email,
u.in_time,
u.out_time,
u.birth_date,
u.cur_rank_start,
u.cur_post_start,
u.cur_seq_start,
u.create_dt,
u.update_dt,
u.org_short_name,
u.pos_function_name,
u.education_txt,
u.age,
u.rcnt_1year_dmarks,
u.super_emp_code,
u.super_emp_name,
u.main_pos_rank,
u.superior_path,
s.qlfctn_code,
s.qlfctn_name,
s.undgr_type_code,
s.undgr_type_name,
s.high_pot_falg,
s.high_pot,
s.reserve_flag,
s.reserve_code,
s.reserve_name,
s.abil_adv_flag,
s.abil_adv_code,
s.abil_adv_label,
s.abil_other,
s.prof_skill_info,
s.trans_org_code,
s.trans_org_label,
s.trans_pos_flag,
s.trans_pos_code,
s.trans_pos_label,
s.team_mgmt_code,
s.team_mgmt_label,
s.inds_benm_code,
s.inds_benm_label,
s.proj_exp_flag,
s.proj_exp_label,
s.dev_dir_code,
s.dev_dir_label,
s.for_lang_code,
s.for_lang_label,
s.perfor_code,
s.perfor_label,
s.perfor_adv,
s.perfor_disadv,
s.tlnts_check_q,
s.tlnts_check_plan,
s.tlnts_check_adv,
s.tlnts_check_disadv,
s.external_exp,
s.educational_info,
s.certi_info,
s.pos_exp_info,
s.person_label,
'${yyyymmdd}'
from ods_hrss.tm_hrmis_user u,
dm_hrmis.tmp_emp_satellite_info s
where u.status = 1
and nvl(out_time/1000, unix_timestamp()) >= unix_timestamp()
and u.inc_day = '${yyyymmdd}'
and u.emp_code = s.emp_code;
set mapred.max.split.size=100000000;
set mapred.min.split.size.per.node=100000000;
set mapred.min.split.size.per.rack=100000000;
set hive.exec.reducers.max=200;
set hive.exec.compress.output=false;
set hive.exec.compress.intermediate=true;
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nostrick;
set map red.queue.name=hive;
set map red.job.queue.name=hive;use dm_hrmis;
---清理实战值-----------------岗位经历数据
drop table if exists dm_hrmis.tm_hrmis_Temp_gwqsj; --1步创建20160701岗位套改前临时表--yes就不出数据创建表
create table dm_hrmis.tm_hrmis_Temp_gwqsj stored as parquet as
select emp.employee empcode,tzcg.txtsh ,emp.dateto,emp.datefrom ,''createTm from ods_sap.bi0_memployee emp
left join ods_sap.bic_awom1d00200 gwq on gwq.job=emp.job
left join ods_sap.bi0_TJOB tzcg on tzcg.job=gwq.job
where emp.emplstatus='yes' and emp.dateto<= '20160701' and tzcg.txtsh is not null and trim(tzcg.txtsh) != '' limit 1;--2步同步20160701岗位套改前的数据
insert into table dm_hrmis.tm_hrmis_Temp_gwqsj
select cn.empcode,
cn.txtsh,
cn.dateto,
cn.datefrom,
from_unixtime(unix_timestamp(), 'yyyy-MM-dd HH:mm:ss') createTm
from (select emp.employee empcode,
tzcg.txtsh,
emp.dateto,
emp.datefrom,
row_number() over(partition by emp.employee,tzcg.txtsh,emp.dateto,emp.datefrom) rn
from (select * from ods_sap.bi0_memployee where dateto <= '20160701' and datefrom <='20160701' and datefrom >= '19930101' and inc_day = '${yyyymmdd}') emp
left join ods_sap.bic_awom1d00200 gwq on gwq.job = emp.job and gwq.inc_day = '${yyyymmdd}'
left join (select * from ods_sap.bi0_TJOB where txtsh is not null and inc_day = '${yyyymmdd}' and langu = 1 ) tzcg on tzcg.job = gwq.job
where emp.emplstatus = '3'
and emp.dateto >= gwq.datefrom
and emp.dateto <= gwq.dateto
and trim(tzcg.txtsh) != '') cn where cn.rn=1;--3步同步20160701岗位套改后的数据取职位
insert into table dm_hrmis.tm_hrmis_Temp_gwqsj
select cn.empcode,
cn.txtsh,
cn.dateto,
cn.datefrom,
from_unixtime(unix_timestamp(), 'yyyy-MM-dd HH:mm:ss') createTm
from (select emp.employee empcode,
tzcg.txtsh,
emp.dateto,
emp.datefrom,
row_number() over(partition by emp.employee,tzcg.txtsh,emp.dateto,emp.datefrom) rn
from (select * from ods_sap.bi0_memployee where dateto >= '20160701' and datefrom >='20160701' and from_unixtime(unix_timestamp(), 'yyyyMMdd')>=datefrom and inc_day = '${yyyymmdd}') emp
left join ods_sap.bic_awom1d03600 gwq on gwq.job = emp.job and gwq.inc_day = '${yyyymmdd}'
left join (select * from ods_sap.bi0_TJOB where txtsh is not null and inc_day = '${yyyymmdd}' and langu = 1 ) tzcg on tzcg.job = gwq.job
where emp.emplstatus = '3'
and emp.dateto >= gwq.datefrom
and emp.dateto <= gwq.dateto
and trim(tzcg.txtsh) != '') cn where cn.rn=1;
---清理岗位处理后的数据
--6-创建处理岗位经历合并结果数据表
insert overwrite table dm_hrmis.tm_hrmis_sync_gwqsj partition(inc_day='${yyyymmdd}')
select cn.empcode,
cn.txtsh,
cn.dateto,
cn.datefrom,
from_unixtime(unix_timestamp(), 'yyyy-MM-dd HH:mm:ss') createTm
from (select emp.empcode,
emp.txtsh,
emp.dateto,
emp.datefrom,
row_number() over(partition by emp.empcode,emp.txtsh,emp.dateto,emp.datefrom) rn
from dm_hrmis.tm_hrmis_Temp_gwqsj emp
where emp.empcode is not null
and trim(emp.empcode) != '') cn where cn.rn=1; ---清理岗位处理后的数据
--8步同步20160701岗位套改后的管理线人数据取职位主要是主序列字段
insert overwrite table dm_hrmis.tm_hrmis_sync_glzxl partition(inc_day='${yyyymmdd}')
select cn.empcode,
cn.txtmd txtsh,
cn.dateto dateto,
cn.datefrom datefrom,
cn.bic_zczxl biczczxl,
cn.bic_zczixl biczczixl,
from_unixtime(unix_timestamp(), 'yyyy-MM-dd HH:mm:ss') createTm
from (select emp.employee empcode,
thr.txtmd,
emp.dateto,
emp.datefrom,gwq.bic_zczxl, gwq.bic_zczixl,
row_number() over(partition by emp.employee,thr.txtmd,emp.dateto,emp.datefrom,gwq.bic_zczxl, gwq.bic_zczixl) rn
from (select * from ods_sap.bi0_memployee where dateto >= '20160701' and datefrom>='20160701' and from_unixtime(unix_timestamp(), 'yyyyMMdd')>=datefrom and inc_day = '${yyyymmdd}') emp
left join ods_sap.bi0_thrposition thr on thr.hrposition= emp.hrposition and thr.datefrom>='20160701' and thr.inc_day = '${yyyymmdd}'
left join ods_sap.bic_awom1d03600 gwq on gwq.job = emp.job and gwq.inc_day = '${yyyymmdd}'
where emp.emplstatus = '3'
and emp.dateto >= gwq.datefrom
and emp.dateto <= gwq.dateto
and trim(thr.txtmd) != '') cn where cn.rn=1;
CREATE TABLE if not exists test_json
(
type string,
name string,
doc string,
tableName string
)
partitioned by (inc_day string)
STORED AS parquet
;
create table dm_hrmis.tm_hrmis_Temp_gwqsj stored as parquet as
select uid,from_unixtime(cast(time/1000 as bigint)) as date_time ,event_id,properties_app_version,properties_res_result,device_id ,tag ,
version ,
remote_host ,
remote_port ,
msg_id ,
received_time ,
app_id ,
app_key ,
platform ,
uid ,
sec_uid ,
login_type ,
device_id ,
cookie_id ,
ip ,
time ,
event_id ,
event_type ,
page_url ,
page_name,inc_day from inc_ubas.ods_product_inc_ubas_hrss_clock limit 0;
hive 去掉字符串中最后一个逗号之后的数据 hive如何去除换行
转载本文章为转载内容,我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题,欢迎原作者联系我们进行内容更正或删除文章。
提问和评论都可以,用心的回复会被更多人看到
评论
发布评论
相关文章
-
嵌入式软件开发持续集成方案
嵌入式系统开发概述: 1、嵌入式的理解?嵌入式是在已有的硬件平台上移植操作系统 2、嵌入式开发和单片机的开发的区别? 传统开发又叫单片机开发,传统开发的缺点有两个:第一,软件的可移植性差。第 二,软件开发人员必须懂硬件的工作原理。 &nb
嵌入式软件开发持续集成方案 嵌入式是什么 嵌入式开发的优点 嵌入式开发和传统开发区别 嵌入式学习路线