hive 去掉字符串中最后一个逗号之后的数据 hive如何去除换行

转载
lanhy 2023-07-30 18:07:26
大数据平时工作使用总结整理
set mapred.max.split.size=100000000;
 set mapred.min.split.size.per.node=100000000;
 set mapred.min.split.size.per.rack=100000000;
 set hive.exec.reducers.max=200;
 set hive.exec.compress.output=false;
 set hive.exec.compress.intermediate=true;
 set hive.exec.dynamic.partition=true;
 set hive.exec.dynamic.partition.mode=nostrick;
 set map red.queue.name=hive;
 set map red.job.queue.name=hive;use dm_hrmis;
 --处理换行数据信息 
 drop table if exists dm_hrmis.tmp_tm_hrmis_search_info;
 CREATE TABLE dm_hrmis.tmp_tm_hrmis_search_info as 
 select info_id       ,    
 emp_code      ,    
 type_code     ,    
 type_name     ,    
 styp_code     ,    
 styp_name     ,    
 label_id      ,    
 label_code    ,    
 label_name    ,    
 label_alias   ,    
 invalid_start ,    
 invalid_end   ,    
 invalid_status,    
 create_date   ,    
 update_date   ,    
 regexp_replace(regexp_replace(attr1,'\\\\n',''),'\\\\r','')      attr1         ,    
 regexp_replace(regexp_replace(attr2,'\\\\n',''),'\\\\r','')      attr2         ,    
 regexp_replace(regexp_replace(attr3,'\\\\n',''),'\\\\r','')      attr3         ,    
 regexp_replace(regexp_replace(attr4,'\\\\n',''),'\\\\r','')      attr4         ,    
 regexp_replace(regexp_replace(attr5,'\\\\n',''),'\\\\r','')      attr5         ,    
 regexp_replace(regexp_replace(attr6,'\\\\n',''),'\\\\r','')      attr6         ,    
 regexp_replace(regexp_replace(attr7,'\\\\n',''),'\\\\r','')      attr7         ,    
 regexp_replace(regexp_replace(attr8,'\\\\n',''),'\\\\r','')      attr8         ,    
 regexp_replace(regexp_replace(attr9,'\\\\n',''),'\\\\r','')      attr9         ,    
 regexp_replace(regexp_replace(attr10,'\\\\n',''),'\\\\r','')     attr10        ,    
 regexp_replace(regexp_replace(attr11,'\\\\n',''),'\\\\r','')     attr11        ,    
 regexp_replace(regexp_replace(attr12,'\\\\n',''),'\\\\r','')     attr12        ,    
 regexp_replace(regexp_replace(attr13,'\\\\n',''),'\\\\r','')     attr13        ,    
 regexp_replace(regexp_replace(attr14,'\\\\n',''),'\\\\r','')      attr14        ,    
 regexp_replace(regexp_replace(attr15,'\\\\n',''),'\\\\r','')      attr15        ,    
 regexp_replace(regexp_replace(attr16,'\\\\n',''),'\\\\r','')      attr16        ,    
 regexp_replace(regexp_replace(attr17,'\\\\n',''),'\\\\r','')      attr17        ,    
 regexp_replace(regexp_replace(attr18,'\\\\n',''),'\\\\r','')      attr18        ,    
 regexp_replace(regexp_replace(attr19,'\\\\n',''),'\\\\r','')      attr19        ,    
 regexp_replace(regexp_replace(attr20,'\\\\n',''),'\\\\r','')      attr20        ,    
 description   ,    
 data_source   ,    
 is_label      from ods_hrss.tm_hrmis_search_info where inc_day='${yyyymmdd}';drop table if exists dm_hrmis.tm_hrmis_user_full_info;
 CREATE TABLE dm_hrmis.tm_hrmis_user_full_info(
     --员工基本信息 
     USER_ID                 STRING     comment '用户ID',
     EMP_CODE                STRING     comment '员工工号', 
     EMP_NAME                STRING     comment '员工姓名',
     MAIN_EMP_CODE           INT        comment '员工工号排序去掉00', 
     STATUS                  INT        comment '员工状态：1：有效 0：无效',
     ORG_ID                  STRING     comment '组织id',
     ORG_NAME                STRING     comment '组织名称',
     ORG_PATH                STRING     comment '组织全路径',
     POS_RANK                STRING     comment '职级',
     MAIN_SEQ_ID             STRING     comment '主序列id',
     MAIN_SEQ_NAME           STRING     comment '主序列名称',
     SUB_SEQ_ID              STRING     comment '子序列id',
     SUB_SEQ_NAME            STRING     comment '子序列名称',
     POSITION_ID             STRING     comment '职位id',
     POSITION_NAME           STRING     comment '职位名称',
     POST_ID                 STRING     comment '岗id',
     POST_NAME               STRING     comment '岗名称',
     JOB_ID                  STRING     comment '岗位id',
     JOB_NAME                STRING     comment '岗位名称',
     SEX                     STRING     comment '性别：M：男 F：女',
     POS_ATTR                STRING     comment '职位属性：一，二，三线',
     EDUCATION               STRING     comment '最高学历编码',
     POST_AGE                DOUBLE     comment '岗龄',
     ENTRY_AGE               DOUBLE     comment '司龄',
     POS_FUNCTION            STRING     comment '职能（人事子范围）',
     PICTURE                 STRING     comment '照片',
     WORK_AREA               STRING     comment '工作地点',
     EMAIL                   STRING     comment '邮箱',
     IN_TIME                 STRING     comment '入职日期',
     OUT_TIME                STRING     comment '离职日期',
     BIRTH_DATE              STRING     comment '出生日期',
     CUR_RANK_START          STRING     comment '当前职级开始时间',
     CUR_POST_START          STRING     comment '当前岗位开始时间',
     CUR_SEQ_START           STRING     comment '当前子序列开始时间',
     CREATE_DT               STRING     comment '创建时间',
     UPDATE_DT               STRING     comment '更新时间',
     ORG_SHORT_NAME          STRING     comment '组织短名称',
     POS_FUNCTION_NAME       STRING     comment '职能名称',
     EDUCATION_TXT           STRING     comment '最高学历文本',
     AGE                     INT        comment '年龄',
     RCNT_1YEAR_DMARKS       DOUBLE     comment '近一年行政扣分：直接责任扣分/管理责任扣分/诚信类扣分',
     SUPER_EMP_CODE          STRING     comment '上级工号',
     SUPER_EMP_NAME          STRING     comment '上级姓名',
     MAIN_POS_RANK           STRING     comment '主职级（如：M4.1，存储4.1）',
     SUPERIOR_PATH           STRING     comment '按上下级关系整理的一个工号路径',
     
     --标签信息，汇总时只同步当前有效的数据，过滤已过期等无效的数据
     QLFCTN_CODE             ARRAY<STRING>     comment '任职资格编码（等级编码+"-"+类型编码）',
     QLFCTN_NAME             ARRAY<STRING>     comment '任职资格名称（等级文本+"-"+类型文本+"-任职资格"）',
     UNDGR_TYPE_CODE         STRING            comment '大学生类型编码（年度+"-"+类型编码，如：2015-1）',
     UNDGR_TYPE_NAME         STRING            comment '大学生类型名称（年度+"-"+类型，如：2015-优才）',
     HIGH_POT_FALG           STRING            comment '高潜标识（HIGH_POT）',
     HIGH_POT                ARRAY<STRING>     comment '高潜（yyyy高潜）',
     RESERVE_FLAG            STRING            comment '储备标识（RESERVE）',
     RESERVE_CODE            ARRAY<STRING>     comment '后备信息类型',
     RESERVE_NAME            ARRAY<STRING>     comment '后备信息名称（后备经理）',
     ABIL_ADV_FLAG           STRING            comment '能力优势标识（ABIL_ADV）',
     ABIL_ADV_CODE           ARRAY<STRING>     comment '能力优势编码',
     ABIL_ADV_LABEL          ARRAY<STRING>     comment '能力优势名称',
     ABIL_OTHER              ARRAY<STRING>     comment '其他能力优势',
     PROF_SKILL_INFO        ARRAY<STRUCT<
         BEGDA:BIGINT,   --开始时间
         ENDDA:BIGINT,   --结束日期
         ZHRZXL:STRING,  --子序列ID
         ZHRJNLB:STRING, --专业技能类别
         ZHRJNMC:STRING, --专业技能名称
         ZHRJTCD:STRING, --精通程度
         ZHRZMR:STRING,  --在司证明人
         ZHRDH:STRING    --证明人电话
     >>
     comment '专业技能信息',
     TRANS_ORG_CODE          STRING            comment '跨组织经验编码',
     TRANS_ORG_LABEL         STRING            comment '跨组织经验名称',
     TRANS_POS_FLAG          STRING            comment '跨职能经验标识（TRANS_POS）',
     TRANS_POS_CODE          ARRAY<STRING>     comment '跨职能经验编码',
     TRANS_POS_LABEL         ARRAY<STRING>     comment '跨职能经验名称',
     TEAM_MGMT_CODE          STRING            comment '团队管理经验标识',
     TEAM_MGMT_LABEL         STRING            comment '团队管理经验名称',
     INDS_BENM_CODE          STRING            comment '行业标杆经验标识',
     INDS_BENM_LABEL         STRING            comment '行业标杆经验名称',
     PROJ_EXP_FLAG           STRING            comment '项目经验标识（PROJ_EXP）',
     PROJ_EXP_LABEL          ARRAY<STRUCT<
         PROJ_SDT:BIGINT,--项目开始时间
         PROJ_EDT:BIGINT,--项目结束日期
         PROJ_TYPE_CODE:STRING,--项目类别编码
         PROJ_TYPE:STRING,--项目类别
         PROJ_NAME:STRING,--项目名称
         PROJ_PCODE:STRING,--参与状态编码
         PROJ_PTEXT:STRING,--参与状态文本
         PROJ_ROLE:STRING--担任角色
     >>
     comment '项目经验名称',
     --格式 类别编码：“类别编码”+“#”类别名称：“类别名称”+“#”项目名称：“项目名称”+“#”开始日期：+“开始日期”+“#”+结束日期：+“结束日期”+...+担任角色：+“担任角色”
     --PROJ_EXP_DESC           STRING     comment '项目经验描述，多项之间用“||”分隔（如：类别编码：26#类别名称：人力资源#项目名称：ORACLE人事系统#开始日期：20070501#结束日期：20080131#参与状态编码：2#参与状态文本：全职#担任角色：项目经理）',        --
     DEV_DIR_CODE            ARRAY<STRING>     comment '发展方向编码，年度加编码（2016-ZYYJX）只汇总当前有效的数据',
     DEV_DIR_LABEL           ARRAY<STRING>     comment '发展方向名称，年度加名称（2016-专业研究型）',
     --RCNT_1YEAR_DMARKS       DOUBLE,        --近一年
     FOR_LANG_CODE           ARRAY<STRING>     comment '外语能力编码（语种编码+级别编码）',
     FOR_LANG_LABEL          ARRAY<STRING>     comment '外语能力文本（语种文本+级别文本）',
     PERFOR_CODE             ARRAY<STRING>     comment '年度绩效编码（2015A1）',
     PERFOR_LABEL            ARRAY<STRING>     comment '年度绩效编码（2015A1）',
     PERFOR_ADV              ARRAY<STRING>     comment '年度绩效评价优势（2015+评价信息）',
     PERFOR_DISADV           ARRAY<STRING>     comment '年度绩效评价劣势（2015+评价信息）',
     TLNTS_CHECK_Q           ARRAY<STRING>     comment '人才盘点Q值（年度加Q值）',
     TLNTS_CHECK_PLAN        ARRAY<STRING>     comment '人才盘点发展计划（年度+发展计划，如：2016随时可调任到新的岗位）',
     TLNTS_CHECK_ADV         ARRAY<STRING>     comment '人才盘点评价优势',
     TLNTS_CHECK_DISADV      ARRAY<STRING>     comment '人才盘点评价劣势',
     --司外经历
     EXTERNAL_EXP            ARRAY<STRUCT<
         BEGDA:BIGINT,--开始时间                   
         ENDDA:BIGINT,--结束时间
         SUBTY:STRING,--子信息类型
         SPRPS:STRING,--人力资源主数据记录的锁定标志
         SEQNR:STRING,--相同关键值信息类型记录的编号
         ZHRGZJLLX:STRING,--工作经历类型
         ZHRDW:STRING,--工作单位
         LAND1:STRING,--国家
         BRANC:STRING,--行业
         ZHRBM:STRING,--部门
         ZHRZW:STRING,--职位
         ZHRLZYY:STRING,--离职原因
         ZHRZMR:STRING,--证明人
         ZHRZMRLXDH:STRING,--证明人联系电话
         ZHRZWXLT:STRING,--岗位序列
         ZHRZWSX:STRING,--岗位属性
         ZHRSFLG:STRING,--是否轮岗
         ZHRLGDQSJ:BIGINT,--轮岗到期日期
         ZHRSFNP:STRING,--是否内聘
         ZHRBDLX:STRING,--变动类型
         ZHRFPLB:STRING,--分配类别
         ZHRYGZT:STRING,--员工状态
         ZHRJB:STRING,--级别
         ZHRGZDT:STRING,--工作地
         ZHRSFZZCF:STRING,--是否因组织机构拆分引起
         DESCRIPTION:STRING--描述
     >>      comment '司外经历',
     --教育信息
     EDUCATIONAL_INFO          ARRAY<STRUCT<
         BEGDA:BIGINT,--开始时间                   
         ENDDA:BIGINT,--结束时间
         SUBTY:STRING,--子信息类型
         OBJPS:STRING,--对象标识
         SPRPS:STRING,--人力资源主数据记录的锁定标志
         SEQNR:STRING,--相同关键值信息类型记录的编号
         SLART:STRING,--学历
         INSTI:STRING,--学校名称
         SLAND:STRING,--国家
         ZHRXKML:STRING,--学科门类
         ZHRZYL:STRING,--专业类
         ZHRZY:STRING,--专业
         ZHRZYT:STRING,--其他专业
         ANZKL:STRING,--学制
         ZDEHRXXFS:STRING,--学习方式
         ZDEHRSFZGXL:STRING,--最高学历标识
         ZDEHRSFZGXW:STRING,--最高学位标识
         SLABS:STRING,--学位
         ZHRBYZT:STRING,--毕业状态
         ZHRYXLB:STRING,--院校类别
         ZHRZSBH:STRING,--证书编号
         ZHRBZ:STRING--备注
     >>      comment '教育经历信息',
     CERTI_INFO          ARRAY<STRUCT<
         BEGDA:BIGINT,--开始时间
         ENDDA:BIGINT,--结束日期
         SUBTY:STRING,--子信息类型
         OBJPS:STRING,--对象标识
         SPRPS:STRING,--人力资源主数据记录的锁定标志
         SEQNR:STRING,--相同关键值信息类型记录的编号
         ZHRZJLX:STRING,--证书类型
         ZHRZGZSMC:STRING,--资格证书名称
         ZHRZSJB:STRING,--证书类别
         ZHRZSPDDW:STRING,--证书评定单位
         ZHRZSJB1:STRING,--证书级别
         ZHRZZHM:STRING--执照号码
     >>
     comment '技能证书信息',
     POS_EXP_INFO           ARRAY<STRING>   comment '岗位经验',
     PERSON_LABEL           STRING          comment '个人标签，用于展示在搜索结果页面',
     INC_DAY                STRING          comment '数据同步日期'
 )  stored as parquet;
 -- 汇总人才池信息
 drop table if exists dm_hrmis.tmp_emp_talent_pool_info;
 create table dm_hrmis.tmp_emp_talent_pool_info stored as parquet as
 select u.emp_code           emp_code,
        o.path               org_path,
        t_undgr.undgr_code   undgr_type_code,
        t_undgr.undgr_label  undgr_type_name,
        t_potent.styp_code   high_pot_falg,
        t_potent.label       high_pot,
        t_reserve.styp_code  reserve_flag,
        t_reserve.label_code reserve_code,
        t_reserve.label_name reserve_name
   from (select emp_code, org_id
           from ods_hrss.tm_hrmis_user
          where status = 1
            and nvl(out_time/1000, unix_timestamp()) >= unix_timestamp()
            and inc_day = '${yyyymmdd}') u
   left join (select distinct orgid, path
                from ods_hrss.tm_org
               where state = 1
                 and inc_day = '${yyyymmdd}') o on u.org_id = o.orgid
   left join (select emp_code,
                     concat(attr2, '-UNIVERSITY0', attr3) undgr_code,
                     concat(attr2, '-', label_name) undgr_label
                from dm_hrmis.tmp_tm_hrmis_search_info
               where styp_code = 'UNDGR'
                 and unix_timestamp() >= invalid_start / 1000
                 and unix_timestamp() < invalid_end / 1000) t_undgr on u.emp_code =
                                                      t_undgr.emp_code
   left join (select emp_code, styp_code, collect_set(label_alias) label
                from dm_hrmis.tmp_tm_hrmis_search_info
               where styp_code = 'HIGH_POT'
               group by emp_code, styp_code) t_potent on u.emp_code =
                                                         t_potent.emp_code
   left join (select emp_code,
                     styp_code,
                     collect_set(concat('RESERVE0', '', label_code)) label_code,
                     collect_set(label_name) label_name
                from dm_hrmis.tmp_tm_hrmis_search_info
               where styp_code = 'RESERVE'
                 and unix_timestamp() >= invalid_start / 1000
                 and unix_timestamp() < invalid_end / 1000
               group by emp_code, styp_code) t_reserve on u.emp_code =
                                                          t_reserve.emp_code;
                                                          
 -- 汇总能力信息
 drop table if exists dm_hrmis.tmp_emp_abil_info;
 create table dm_hrmis.tmp_emp_abil_info stored as parquet as
 select u.emp_code,
        t_qlfctn.qlfctn_code,
        t_qlfctn.qlfctn_name,
        t_skill.skill prof_skill_info,
        certi_info.certis certi_info,
        t_abil.styp_code abil_adv_flag,
        t_abil.abil_code abil_adv_code,
        t_abil.abil_label abil_adv_label,
        t_abil.other_abil abil_other
   from (select emp_code
           from ods_hrss.tm_hrmis_user
          where status = 1
            and nvl(out_time/1000, unix_timestamp()) >= unix_timestamp()
            and inc_day = '${yyyymmdd}') u
   left join (select emp_code,
                     collect_set(label_code) qlfctn_code,
                     collect_set(label_name) qlfctn_name
                from dm_hrmis.tmp_tm_hrmis_search_info
               where styp_code = 'QLFCTN'
                 and unix_timestamp() >= invalid_start / 1000
                 and unix_timestamp() < invalid_end / 1000
               group by emp_code) t_qlfctn on u.emp_code = t_qlfctn.emp_code
   left join (select k.pernr pernr,
                     collect_set(named_struct('begda',
                                              k.begda,
                                              'endda',
                                              k.endda,
                                              'zhrzxl',
                                              k.zhrzxl,
                                              'zhrjnlb',
                                              k.zhrjnlb,
                                              'zhrjnmc',
                                              k.zhrjnmc,
                                              'zhrjtcd',
                                              k.zhrjtcd,
                                              'zhrzmr',
                                              k.zhrzmr,
                                              'zhrdh',
                                              k.zhrdh)) skill
                from ods_hrss.tm_hrmis_prof_skill k
               where k.inc_day = '${yyyymmdd}'
               group by k.pernr) t_skill on u.emp_code = t_skill.pernr
   left join (select ssc.pernr, collect_set(ssc.clms) certis
                from (select sc.pernr,
                             named_struct('begda',
                                          sc.begda,
                                          'endda',
                                          sc.endda,
                                          'subty',
                                          sc.subty,
                                          'objps',
                                          sc.objps,
                                          'sprps',
                                          sc.sprps,
                                          'seqnr',
                                          sc.seqnr,
                                          'zhrzjlx',
                                          sc.zhrzjlx,
                                          'zhrzgzsmc',
                                          sc.zhrzgzsmc,
                                          'zhrzsjb',
                                          sc.zhrzsjb,
                                          'zhrzspddw',
                                          sc.zhrzspddw,
                                          'zhrzsjb1',
                                          sc.zhrzsjb1,
                                          'zhrzzhm',
                                          sc.zhrzzhm) as clms
                        from ods_hrss.sys_sap_certi sc
                       where sc.inc_day = '${yyyymmdd}') ssc
               group by ssc.pernr) certi_info on u.emp_code =
                                                 certi_info.pernr
   left join (select emp_code,
                     styp_code,
                     collect_set(label_code) abil_code,
                     collect_set(label_name) abil_label,
                     collect_set(attr1) other_abil
                from dm_hrmis.tmp_tm_hrmis_search_info
               where styp_code = 'ABIL_ADV'
                 and unix_timestamp() >= invalid_start / 1000
                 and unix_timestamp() < invalid_end / 1000
               group by emp_code, styp_code) t_abil on u.emp_code =
                                                       t_abil.emp_code;
                                                       
 -- 汇总经验信息
 drop table if exists dm_hrmis.tmp_emp_exp_info;
 create table dm_hrmis.tmp_emp_exp_info stored as parquet as
 select u.emp_code,
        t_trans_org.label_code trans_org_code,
        t_trans_org.label_name trans_org_label,
        t_trans_pos.styp_code trans_pos_flag,
        t_trans_pos.pos_code trans_pos_code,
        t_trans_pos.pos_label trans_pos_label,
        t_team_mgmt.label_code team_mgmt_code,
        t_team_mgmt.label_name team_mgmt_label,
        t_inds_benm.label_code inds_benm_code,
        t_inds_benm.label_name inds_benm_label,
        t_pos_exp.pos_exp_info pos_exp_info
   from (select emp_code
           from ods_hrss.tm_hrmis_user
          where status = 1
            and nvl(out_time/1000, unix_timestamp()) >= unix_timestamp()
            and inc_day = '${yyyymmdd}') u
   left join (select distinct emp_code, label_code, label_name
                from dm_hrmis.tmp_tm_hrmis_search_info
               where styp_code = 'TRANS_ORG') t_trans_org on u.emp_code =
                                                          t_trans_org.emp_code
   left join (select emp_code,
                     styp_code,
                     collect_set(attr1) pos_code,
                     collect_set(attr3) pos_label
                from dm_hrmis.tmp_tm_hrmis_search_info
               where styp_code = 'TRANS_POS'
               group by emp_code, styp_code) t_trans_pos on u.emp_code =
                                                            t_trans_pos.emp_code
   left join (select distinct emp_code, label_code, label_name
                from dm_hrmis.tmp_tm_hrmis_search_info
               where styp_code = 'TEAM_MGMT') t_team_mgmt on u.emp_code =
                                                          t_team_mgmt.emp_code
   left join (select distinct emp_code, label_code, label_name
                from dm_hrmis.tmp_tm_hrmis_search_info
               where styp_code = 'INDS_BENM') t_inds_benm on u.emp_code =
                                                          t_inds_benm.emp_code
   left join (select g.empcode, collect_set(g.constr1) pos_exp_info
                from ods_hrss.tm_hrmis_es_gwqsj g
               where g.inc_day = '${yyyymmdd}'
               group by empcode) t_pos_exp on u.emp_code = 
                                              t_pos_exp.empcode;-- 汇总“司外经历”、“项目经验”、“绩效”、“盘点”等信息
 drop table if exists dm_hrmis.tmp_emp_proj_check_info;
 create table dm_hrmis.tmp_emp_proj_check_info stored as parquet as
 select u.emp_code,
        b.infos external_exp,
        t_proj_exp.styp_code proj_exp_flag,
        t_proj_exp.exps proj_exp_label,
        t_perfor.code perfor_code,
        t_perfor.name perfor_label,
        t_perfor.advs perfor_adv,
        t_perfor.disadvs perfor_disadv,
        t_check.name tlnts_check_q,
        t_check.dirs tlnts_check_plan,
        t_check.advs tlnts_check_adv,
        t_check.disadvs tlnts_check_disadv
   from (select emp_code
           from ods_hrss.tm_hrmis_user
          where status = 1
            and nvl(out_time/1000, unix_timestamp()) >= unix_timestamp()
            and inc_day = '${yyyymmdd}') u
   left join (select t.pernr, collect_set(clms) infos
                from (select p.pernr,
                             named_struct('begda',
                                          p.begda,
                                          'endda',
                                          p.endda,
                                          'subty',
                                          p.subty,
                                          'sprps',
                                          p.sprps,
                                          'seqnr',
                                          p.seqnr,
                                          'zhrgzjllx',
                                          p.zhrgzjllx,
                                          'zhrdw',
                                          p.zhrdw,
                                          'land1',
                                          p.land1,
                                          'branc',
                                          p.branc,
                                          'zhrbm',
                                          p.zhrbm,
                                          'zhrzw',
                                          p.zhrzw,
                                          'zhrlzyy',
                                          p.zhrlzyy,
                                          'zhrzmr',
                                          p.zhrzmr,
                                          'zhrzmrlxdh',
                                          p.zhrzmrlxdh,
                                          'zhrzwxlt',
                                          p.zhrzwxlt,
                                          'zhrzwsx',
                                          p.zhrzwsx,
                                          'zhrsflg',
                                          p.zhrsflg,
                                          'zhrlgdqsj',
                                          p.zhrlgdqsj,
                                          'zhrsfnp',
                                          p.zhrsfnp,
                                          'zhrbdlx',
                                          p.zhrbdlx,
                                          'zhrfplb',
                                          p.zhrfplb,
                                          'zhrygzt',
                                          p.zhrygzt,
                                          'zhrjb',
                                          p.zhrjb,
                                          'zhrgzdt',
                                          p.zhrgzdt,
                                          'zhrsfzzcf',
                                          p.zhrsfzzcf,
                                          'description',
                                          p.description) as clms
                        from ods_hrss.tm_hrmis_external_exp p
                       where p.inc_day = '${yyyymmdd}') t
               group by t.pernr) b on u.emp_code = b.pernr
   left join (select s.emp_code,
                     s.styp_code,
                     collect_set(named_struct('proj_sdt',
                                              s.invalid_start,
                                              'proj_edt',
                                              s.invalid_end,
                                              'proj_type_code',
                                              s.attr2,
                                              'proj_type',
                                              s.attr3,
                                              'proj_name',
                                              attr1,
                                              'proj_pcode',
                                              attr4,
                                              'proj_ptext',
                                              attr5,
                                              'proj_role',
                                              attr6)) exps
                from dm_hrmis.tmp_tm_hrmis_search_info s
               where s.styp_code = 'PROJ_EXP'
               group by s.emp_code, s.styp_code) t_proj_exp on u.emp_code =
                                                               t_proj_exp.emp_code
   left join (select t.emp_code,
                     collect_set(label_code) code,
                     collect_set(label_name) name,
                     collect_set(adv) advs,
                     collect_set(disadv) disadvs
                from (select emp_code,
                             concat(attr2, '-', label_code) label_code,
                             concat(attr2, '-', label_name) label_name,
                             concat(attr2, '-', attr11) adv,
                             concat(attr2, '-', attr12) disadv
                        from dm_hrmis.tmp_tm_hrmis_search_info
                       where styp_code = 'PERFOR'
                       ) t
               group by t.emp_code) t_perfor on u.emp_code =
                                                t_perfor.emp_code
   left join (select t.emp_code,
                     collect_set(label_name) name,
                     collect_set(dir) dirs,
                     collect_set(adv) advs,
                     collect_set(disadv) disadvs
                from (select emp_code,
                             concat(attr2, '-', label_name) label_name,
                             concat(attr2, '-', attr3) dir,
                             concat(attr2, '-', attr11) adv,
                             concat(attr2, '-', attr12) disadv
                        from dm_hrmis.tmp_tm_hrmis_search_info
                       where styp_code = 'TLNTS_CHECK'
                       ) t
               group by t.emp_code) t_check on u.emp_code = t_check.emp_code;-- 汇总“发展方向”、“人才标签” 、“教育信息”、“外语能力”等信息
 drop table if exists dm_hrmis.tmp_emp_dir_label_info;
 create table dm_hrmis.tmp_emp_dir_label_info stored as parquet as
 select u.emp_code,
        t_dev_dir.code dev_dir_code,
        t_dev_dir.name dev_dir_label,
        pl.emp_label person_label,
        edu_info.infos educational_info,
        t_for_lang.code for_lang_code,
        t_for_lang.name for_lang_label
   from (select emp_code
           from ods_hrss.tm_hrmis_user
          where status = 1
            and nvl(out_time/1000, unix_timestamp()) >= unix_timestamp()
            and inc_day = '${yyyymmdd}') u
   left join (select t.emp_code,
                     collect_set(label_code) code,
                     collect_set(label_name) name
                from (select emp_code,
                             concat(attr2, '-', label_code) label_code,
                             concat(attr2, '-', label_name) label_name
                        from dm_hrmis.tmp_tm_hrmis_search_info
                       where styp_code = 'DEV_DIR') t
               group by t.emp_code) t_dev_dir on u.emp_code =
                                                 t_dev_dir.emp_code
   left join (select el.emp_code emp_code,
                     concat_ws(',', collect_set(el.label_alias)) emp_label
                from (select distinct m.serial_num, n.emp_code, n.label_alias
                        from ods_hrss.tm_hrmis_user_label_config m,
                             (select a.emp_code, a.type_code, a.label_alias
                                from dm_hrmis.tmp_tm_hrmis_search_info a
                               where a.invalid_start / 1000 <= unix_timestamp()
                                 and a.invalid_end / 1000 >= unix_timestamp()
                                 and a.is_label = 1
                                 and a.label_code <> 'OTHER'
                              union all
                              select b.emp_code, b.type_code, b.label_alias
                                from dm_hrmis.tmp_tm_hrmis_search_info b
                               where b.is_label = 1
                                 and b.label_code <> 'OTHER'
                                 and b.styp_code in
                                     ('TEAM_MGMT', 'INDS_BENM', 'PROJ_EXP',
                                      'TRANS_POS', 'TRANS_ORG')) n
                       where m.type_code = 'SEARCH_LABEL_TYPE'
                         and n.type_code = m.code
                       order by m.serial_num) el
               group by el.emp_code) pl on u.emp_code = pl.emp_code
   left join (select thei.pernr, collect_set(thei.clms) infos
                from (select ei.pernr,
                             named_struct('begda',
                                          ei.begda,
                                          'endda',
                                          ei.endda,
                                          'subty',
                                          ei.subty,
                                          'objps',
                                          ei.objps,
                                          'sprps',
                                          ei.sprps,
                                          'seqnr',
                                          ei.seqnr,
                                          'slart',
                                          ei.slart,
                                          'insti',
                                          ei.insti,
                                          'sland',
                                          ei.sland,
                                          'zhrxkml',
                                          ei.zhrxkml,
                                          'zhrzyl',
                                          ei.zhrzyl,
                                          'zhrzy',
                                          ei.zhrzy,
                                          'zhrzyt',
                                          ei.zhrzyt,
                                          'anzkl',
                                          ei.anzkl,
                                          'zdehrxxfs',
                                          ei.zdehrxxfs,
                                          'zdehrsfzgxl',
                                          ei.zdehrsfzgxl,
                                          'zdehrsfzgxw',
                                          ei.zdehrsfzgxw,
                                          'slabs',
                                          ei.slabs,
                                          'zhrbyzt',
                                          ei.zhrbyzt,
                                          'zhryxlb',
                                          ei.zhryxlb,
                                          'zhrzsbh',
                                          ei.zhrzsbh,
                                          'zhrbz',
                                          ei.zhrbz) as clms
                        from ods_hrss.tm_hrmis_educational_info ei
                       where ei.inc_day = '${yyyymmdd}') thei
               group by thei.pernr) edu_info on u.emp_code = edu_info.pernr
   left join (select emp_code,
                     collect_set(label_code) code,
                     collect_set(label_name) name
                from dm_hrmis.tmp_tm_hrmis_search_info
               where styp_code = 'FOR_LANG'
               group by emp_code) t_for_lang on u.emp_code =
                                                t_for_lang.emp_code;
                                                
 -- 将上述5个临时表的信息汇总到一个临时表：员工附属信息表
 drop table if exists dm_hrmis.tmp_emp_satellite_info;
 create table dm_hrmis.tmp_emp_satellite_info stored as parquet as 
 select a.emp_code,       --工号
        a.org_path,       --员工所在组织的组织全路径
        a.high_pot_falg,  --高潜标识
        a.high_pot,       --高潜
        a.reserve_flag,   --后备信息标识
        a.reserve_code,   --后备信息编码
        a.reserve_name,   --后备信息名称
        a.undgr_type_code,--大学生类型编码
        a.undgr_type_name,--大学生类型
        b.qlfctn_code,    --任职资格编码
        b.qlfctn_name,    --任职资格名称
        b.abil_adv_flag,  --能力优势标识
        b.abil_adv_code,  --能力优势编码
        b.abil_adv_label, --能力优势文本
        b.abil_other,     --能力优势其他信息
        b.certi_info,     --证书信息
        b.prof_skill_info,--技能信息
        c.inds_benm_code, --行业标杆经验编码
        c.inds_benm_label,--行业标杆经验
        c.team_mgmt_code, --团队管理经验编码
        c.team_mgmt_label,--团队管理经验
        c.trans_org_code, --跨组织经验编码
        c.trans_org_label,--跨组织经验
        c.trans_pos_flag, --跨职能经验标识
        c.trans_pos_code, --跨职能经验编码
        c.trans_pos_label,--跨组织经验文本
        c.pos_exp_info,   --岗位经验信息
        d.dev_dir_code,   --发展方向编码
        d.dev_dir_label,  --发展方向名称
        d.educational_info,--教育信息
        d.for_lang_code,  --外语能力编码
        d.for_lang_label, --外语能力文本
        d.person_label,   --个人标签信息
        e.perfor_code,    --个人绩效编码
        e.perfor_label,   --个人绩效文本
        e.perfor_adv,     --绩效评价信息优势
        e.perfor_disadv,  --绩效评价信息不足
        e.external_exp,   --外部工作经历信息
        e.proj_exp_flag,  --项目经验标识
        e.proj_exp_label, --项目经验信息
        e.tlnts_check_q,  --盘点结果
        e.tlnts_check_plan,--盘点发展计划
        e.tlnts_check_adv, --盘点评价信息优势
        e.tlnts_check_disadv --盘点评价信息不足
   from dm_hrmis.tmp_emp_talent_pool_info a,
        dm_hrmis.tmp_emp_abil_info        b,
        dm_hrmis.tmp_emp_exp_info         c,
        dm_hrmis.tmp_emp_dir_label_info   d,
        dm_hrmis.tmp_emp_proj_check_info  e
  where a.emp_code = b.emp_code
    and a.emp_code = c.emp_code
    and a.emp_code = d.emp_code
    and a.emp_code = e.emp_code;
    
 --员工基础信息表与员工附属信息临时表合成大宽表
 insert overwrite table dm_hrmis.tm_hrmis_user_full_info
   select u.user_id,
          u.emp_code,
          u.emp_name,
          u.emp_code main_emp_code, 
          u.status,
          u.org_id,
          u.org_name,
          s.org_path,
          u.pos_rank,
          u.main_seq_id,
          u.main_seq_name,
          u.sub_seq_id,
          u.sub_seq_name,
          u.position_id,
          u.position_name,
          u.post_id,
          u.post_name,
          u.job_id,
          u.job_name,
          u.sex,
          u.pos_attr,
          u.education,
          u.post_age,
          u.entry_age,
          u.pos_function,
          u.picture,
          u.work_area,
          u.email,
          u.in_time,
          u.out_time,
          u.birth_date,
          u.cur_rank_start,
          u.cur_post_start,
          u.cur_seq_start,
          u.create_dt,
          u.update_dt,
          u.org_short_name,
          u.pos_function_name,
          u.education_txt,
          u.age,
          u.rcnt_1year_dmarks,
          u.super_emp_code,
          u.super_emp_name,
          u.main_pos_rank,
          u.superior_path,
          s.qlfctn_code,
          s.qlfctn_name,
          s.undgr_type_code,
          s.undgr_type_name,
          s.high_pot_falg,
          s.high_pot,
          s.reserve_flag,
          s.reserve_code,
          s.reserve_name,
          s.abil_adv_flag,
          s.abil_adv_code,
          s.abil_adv_label,
          s.abil_other,
          s.prof_skill_info,
          s.trans_org_code,
          s.trans_org_label,
          s.trans_pos_flag,
          s.trans_pos_code,
          s.trans_pos_label,
          s.team_mgmt_code,
          s.team_mgmt_label,
          s.inds_benm_code,
          s.inds_benm_label,
          s.proj_exp_flag,
          s.proj_exp_label,
          s.dev_dir_code,
          s.dev_dir_label,
          s.for_lang_code,
          s.for_lang_label,
          s.perfor_code,
          s.perfor_label,
          s.perfor_adv,
          s.perfor_disadv,
          s.tlnts_check_q,
          s.tlnts_check_plan,
          s.tlnts_check_adv,
          s.tlnts_check_disadv,
          s.external_exp,
          s.educational_info,
          s.certi_info,
          s.pos_exp_info,
          s.person_label,
          '${yyyymmdd}'
     from ods_hrss.tm_hrmis_user u,
          dm_hrmis.tmp_emp_satellite_info s
 where u.status = 1
   and nvl(out_time/1000, unix_timestamp()) >= unix_timestamp()
   and u.inc_day = '${yyyymmdd}'
   and u.emp_code = s.emp_code;
   
                 set mapred.max.split.size=100000000;
 set mapred.min.split.size.per.node=100000000;
 set mapred.min.split.size.per.rack=100000000;
 set hive.exec.reducers.max=200;
 set hive.exec.compress.output=false;
 set hive.exec.compress.intermediate=true;
 set hive.exec.dynamic.partition=true;
 set hive.exec.dynamic.partition.mode=nostrick;
 set map red.queue.name=hive;
 set map red.job.queue.name=hive;use dm_hrmis;
---清理实战值-----------------岗位经历数据
 drop table if exists dm_hrmis.tm_hrmis_Temp_gwqsj; --1步创建20160701岗位套改前临时表--yes就不出数据创建表
 create table dm_hrmis.tm_hrmis_Temp_gwqsj stored as parquet as
 select  emp.employee empcode,tzcg.txtsh ,emp.dateto,emp.datefrom ,''createTm from ods_sap.bi0_memployee emp
 left join ods_sap.bic_awom1d00200 gwq on gwq.job=emp.job
 left join ods_sap.bi0_TJOB  tzcg on tzcg.job=gwq.job
 where emp.emplstatus='yes' and emp.dateto<= '20160701' and tzcg.txtsh is not null and trim(tzcg.txtsh) != '' limit 1;--2步同步20160701岗位套改前的数据
 insert into table dm_hrmis.tm_hrmis_Temp_gwqsj 
 select cn.empcode,
        cn.txtsh,
        cn.dateto,
        cn.datefrom,
        from_unixtime(unix_timestamp(), 'yyyy-MM-dd HH:mm:ss') createTm
   from (select emp.employee empcode,
                tzcg.txtsh,
                emp.dateto,
                emp.datefrom,
                row_number() over(partition by emp.employee,tzcg.txtsh,emp.dateto,emp.datefrom) rn
           from (select * from ods_sap.bi0_memployee where dateto <= '20160701' and datefrom <='20160701' and datefrom >= '19930101' and inc_day = '${yyyymmdd}') emp
           left join ods_sap.bic_awom1d00200 gwq on gwq.job = emp.job  and gwq.inc_day = '${yyyymmdd}'
           left join (select * from ods_sap.bi0_TJOB where txtsh is not null and  inc_day = '${yyyymmdd}' and langu = 1 ) tzcg on tzcg.job = gwq.job
          where emp.emplstatus = '3'
            and emp.dateto >= gwq.datefrom
            and emp.dateto <= gwq.dateto
            and trim(tzcg.txtsh) != '') cn where cn.rn=1;--3步同步20160701岗位套改后的数据取职位
 insert into table dm_hrmis.tm_hrmis_Temp_gwqsj 
 select cn.empcode,
        cn.txtsh,
        cn.dateto,
        cn.datefrom,
        from_unixtime(unix_timestamp(), 'yyyy-MM-dd HH:mm:ss') createTm
   from (select emp.employee empcode,
                tzcg.txtsh,
                emp.dateto,
                emp.datefrom,
                row_number() over(partition by emp.employee,tzcg.txtsh,emp.dateto,emp.datefrom) rn
           from (select * from ods_sap.bi0_memployee where dateto >= '20160701' and datefrom >='20160701' and from_unixtime(unix_timestamp(), 'yyyyMMdd')>=datefrom and inc_day = '${yyyymmdd}') emp
           left join ods_sap.bic_awom1d03600 gwq on gwq.job = emp.job and gwq.inc_day = '${yyyymmdd}'
           left join (select * from ods_sap.bi0_TJOB where txtsh is not null and  inc_day = '${yyyymmdd}' and langu = 1 ) tzcg on tzcg.job = gwq.job
          where emp.emplstatus = '3'
            and emp.dateto >= gwq.datefrom
            and emp.dateto <= gwq.dateto
            and trim(tzcg.txtsh) != '') cn where cn.rn=1;
            
            ---清理岗位处理后的数据
 --6-创建处理岗位经历合并结果数据表
 insert overwrite table dm_hrmis.tm_hrmis_sync_gwqsj partition(inc_day='${yyyymmdd}')
 select cn.empcode,
        cn.txtsh,
        cn.dateto,
        cn.datefrom,
        from_unixtime(unix_timestamp(), 'yyyy-MM-dd HH:mm:ss') createTm
 from   (select emp.empcode,
        emp.txtsh,
        emp.dateto,
        emp.datefrom,
        row_number() over(partition by emp.empcode,emp.txtsh,emp.dateto,emp.datefrom) rn
   from dm_hrmis.tm_hrmis_Temp_gwqsj emp
  where emp.empcode is not null
    and trim(emp.empcode) != '') cn where cn.rn=1;   ---清理岗位处理后的数据
--8步同步20160701岗位套改后的管理线人数据取职位主要是主序列字段
 insert overwrite table dm_hrmis.tm_hrmis_sync_glzxl partition(inc_day='${yyyymmdd}')
          select cn.empcode,
        cn.txtmd txtsh,
        cn.dateto dateto,
        cn.datefrom datefrom,
        cn.bic_zczxl biczczxl,
        cn.bic_zczixl biczczixl,
        from_unixtime(unix_timestamp(), 'yyyy-MM-dd HH:mm:ss') createTm
   from (select emp.employee empcode,
                thr.txtmd,
                emp.dateto,
                emp.datefrom,gwq.bic_zczxl, gwq.bic_zczixl,
                row_number() over(partition by emp.employee,thr.txtmd,emp.dateto,emp.datefrom,gwq.bic_zczxl, gwq.bic_zczixl) rn
           from (select * from ods_sap.bi0_memployee where dateto >= '20160701' and datefrom>='20160701' and from_unixtime(unix_timestamp(), 'yyyyMMdd')>=datefrom and inc_day = '${yyyymmdd}') emp
           left join ods_sap.bi0_thrposition thr on thr.hrposition= emp.hrposition and thr.datefrom>='20160701'  and thr.inc_day = '${yyyymmdd}'
            left join ods_sap.bic_awom1d03600 gwq on gwq.job = emp.job and gwq.inc_day = '${yyyymmdd}'
          where emp.emplstatus = '3'
            and emp.dateto >= gwq.datefrom
            and emp.dateto <= gwq.dateto
            and trim(thr.txtmd) != '') cn where cn.rn=1;
            CREATE TABLE if not exists test_json
 (
     type string,
     name string,
     doc string,
     tableName string
 )
 partitioned by (inc_day string)
 STORED AS parquet
 ;        
         
     create table dm_hrmis.tm_hrmis_Temp_gwqsj stored as parquet as       
            
            select uid,from_unixtime(cast(time/1000 as bigint)) as date_time ,event_id,properties_app_version,properties_res_result,device_id ,tag           ,
 version       ,
 remote_host   ,
 remote_port   ,
 msg_id        ,
 received_time ,
 app_id        ,
 app_key       ,
 platform      ,
 uid           ,
 sec_uid       ,
 login_type    ,
 device_id     ,
 cookie_id     ,
 ip            ,
 time          ,
 event_id      ,
 event_type    ,
 page_url      ,
 page_name,inc_day     from inc_ubas.ods_product_inc_ubas_hrss_clock  limit 0;
本文章为转载内容，我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题，欢迎原作者联系我们进行内容更正或删除文章。