(1)Hive 数仓中一些常用的dt与日期的转换操作

下面总结了自己工作中经常用到的一些日期转换,这类日期转换经常用于报表的时间粒度和统计周期的控制中

 



日期变换:
(1)dt转日期 
to_date(from_unixtime(unix_timestamp('${dt}','yyyyMMdd')))
(2)日期转dt
regexp_replace('${date}','-','')
(3)dt转当月1号日期
to_date(from_unixtime(unix_timestamp(concat(substr('${dt}',1,6),'01'),'yyyyMMdd')))
trunc(to_date(from_unixtime(unix_timestamp('${dt}','yyyyMMdd'))),'MM')
-- 下月1号日期
trunc(add_months(to_date(from_unixtime(unix_timestamp('${dt}','yyyyMMdd'))),1),'MM')
(4)dt转当周星期一日期
next_day(date_add(to_date(from_unixtime(unix_timestamp('${dt}','yyyyMMdd'))), -7), 'Mo')
date_sub(next_day(to_date(from_unixtime(unix_timestamp('${dt}','yyyyMMdd'))),'MO'),7)
-- 下周星期一日期
next_day(to_date(from_unixtime(unix_timestamp('${dt}','yyyyMMdd'))),'MO')
(5)dt前六天日期(dt为星期天时得到的是本周周一的日期)
date_add(to_date(from_unixtime(unix_timestamp('${dt}','yyyyMMdd'))), -6)
(5)dt转当季第一天日期
if(length(floor(substr('${dt}',5,2)/3.1)*3+1)=1,concat(substr('${dt}',1,4),'-0',floor(substr('${dt}',5,2)/3.1)*3+1,'-01'),concat(substr('${dt}',1,4),'-',floor(substr('${dt}',5,2)/3.1)*3+1,'-01'))
(6)dt转半年第一天日期
if(length(floor(substr('${dt}',5,2)/6.1)*6+1)=1,concat(substr('${dt}',1,4),'-0',floor(substr('${dt}',5,2)/6.1)*6+1,'-01'),concat(substr('${dt}',1,4),'-',floor(substr('${dt}',5,2)/6.1)*6+1,'-01'))
(7)dt转当年1号日期
concat(substr('${dt}',1,4),'-01-01')
(8)在同时有日周月粒度时要注意数据的时间范围,有时每月的第一个自然周会跨月,比如2019年3月的第一周的日期是20190225-20190303
where agent_business_date between date_add_day('${dt}',-31) and to_date(from_unixtime(unix_timestamp('${dt}','yyyyMMdd')))
where dt between regexp_replace(date_add_day('${dt}',-31),'-','') and '${dt}'


------------------------------------------------------------------------------------------
-- 日期维度表表结构edw_public.dim_esf_edw_pub_date
------------------------------------------------------------------------------------------
col_name	                data_type	comment
------------------------------------------------------------------------	
calendar_date	              string	    日期,格式为"YYYY-MM-DD"	
week_english_name	        string	    星期英文名	
week_chinese_name	        string	    星期中文名	
day_of_week_number	        int	    所属一周当中的第几天	
calendar_month_code	        string	    日期所属月份,格式为"YYYY-MM"	
calendar_month_number	      int	    所属月份数字	
month_english_name	        string	    月份英文名	
month_chinese_name	        string	    月份中文名	
day_of_month_number	        int	    所属月份当中的第几天	
calendar_quater_code	      string	    日期所属季度,格式为"YYYY-QT"	
calendar_quater_number	      int	    所属季度数字	
day_of_quater_number	      int	    所属季度当中的第几天	
calendar_half_year_code	      string	    日期所属半年,格式为"YYYY-HY"	
calendar_half_year_number	int	    所属半年数字,1为上半年,2为下半年	
calendar_year_code	        string	    日期所属年份,格式为"YYYY"	
day_of_year_number	        int	    所属年份当中的第几天	
work_day_flag	              string	    工作日标志: Y - 是/ N - 否	
holiday_flag	              string	    节假日标志: Y - 是/ N - 否


-- 日期维度表的使用
-- 当天日期
SELECT
	calendar_date
FROM
	edw_public.dim_esf_edw_pub_date
WHERE
	calendar_date = regexp_replace('${dt}','(\\d{4})(\\d{2})(\\d{2})','$1-$2-$3')



-- Finereport中日周月季半年年 各周期末日期的算法
select 
    ${if(粒度 == 1," case when date(max(calendar_date))>=date(date_add('day',-1,current_date)) then date(date_add('day',-1,current_date)) else date(max(calendar_date)) end as period_end_date","")}
    ${if(粒度 == 2," distinct case when day_of_week_number = 1 and date_add('day',6,date(calendar_date)) >=date(date_add('day',-1,current_date)) then date(date_add('day',-1,current_date))  when day_of_week_number = 7 and date(calendar_date) >=date(date_add('day',-1,current_date)) then date(date_add('day',-1,current_date)) when day_of_week_number = 1 then date_add('day',6,date(calendar_date))  when day_of_week_number = 7 then date(calendar_date) else date(calendar_date) end as period_end_date ","")}
    ${if(粒度 == 3," case when date(max(calendar_date))>=date(date_add('day',-1,current_date)) then date(date_add('day',-1,current_date)) else date(max(calendar_date)) end as period_end_date ","")}
    ${if(粒度 == 4," case when date(max(calendar_date))>=date(date_add('day',-1,current_date)) then date(date_add('day',-1,current_date)) else date(max(calendar_date)) end as period_end_date ","")}
    ${if(粒度 == 5," case when date(max(calendar_date))>=date(date_add('day',-1,current_date)) then date(date_add('day',-1,current_date)) else date(max(calendar_date)) end as period_end_date ","")}
    ${if(粒度 == 6," case when date(max(calendar_date))>=date(date_add('day',-1,current_date)) then date(date_add('day',-1,current_date)) else date(max(calendar_date)) end as period_end_date ","")}
from
    edw_public.dim_esf_edw_pub_date
where calendar_date >= '${开始时间}' and calendar_date <= '${结束时间}'
${if(粒度 == 1," group by calendar_date ","")}
${if(粒度 == 2," and day_of_week_number in (1,7) ","")}
${if(粒度 == 3," group by calendar_month_code  ","")}
${if(粒度 == 4," group by calendar_quater_code  ","")}
${if(粒度 == 5," group by calendar_year_code  ","")}
${if(粒度 == 6," group by calendar_half_year_code  ","")}

-- Finereport中日周月季半年年 各周期期初期末日期的算法(这种计算方法当前日期是20190330,输入的日期范围是2019-03-01至2091-03-28则输出的月日期范围是2019-03-29)
select 
    ${if(粒度 == 1,"date(calendar_date) as period_start_date, date(calendar_date) as period_end_date ","")}
    ${if(粒度 == 2,"case when day_of_week_number = 1 then  date(calendar_date) when day_of_week_number = 7 then date_add('day',-6, date(calendar_date)) end as period_start_date, case  when day_of_week_number = 1 and date_add('day',6, date(calendar_date)) >=date(date_add('day',-1,current_date)) then date(date_add('day',-1,current_date)) when day_of_week_number = 7 and date(calendar_date)>=date(date_add('day',-1,current_date)) then date(date_add('day',-1,current_date)) when day_of_week_number = 1 then date_add('day',6, date(calendar_date))  when day_of_week_number = 7 then date(calendar_date)  end as period_end_date ","")}
    ${if(粒度 == 3,"date(calendar_date) as period_start_date, case when date_add('day',-day(date(calendar_date)),date_add('month',1,(date(calendar_date))))>=date(date_add('day',-1,current_date)) then date(date_add('day',-1,current_date)) else date_add('day',-day(date(calendar_date)),date_add('month',1,(date(calendar_date)))) end as period_end_date ","")}
    ${if(粒度 == 4,"calendar_date as period_start_date,date_add('day',-1,date_add('month',1,date(substr(calendar_date,1,4)||'-'||cast(cast(floor(cast(substr(calendar_date,6,2) as int)/3.1)*3+3 as int) as varchar)||'-01')))  as  period_end_date ","")}
    ${if(粒度 == 5,"date(concat(substr(calendar_date,1,4),'-01','-01')) as period_start_date,case when date(concat(substr(calendar_date,1,4),'-12','-31'))>= date(date_add('day',-1,current_date)) then date(date_add('day',-1,current_date)) else date(concat(substr(calendar_date,1,4),'-12','-31')) end as  period_end_date","")}
    ${if(粒度 == 6,"date(min(calendar_date)) as period_start_date,case when date(max(calendar_date))>= date(date_add('day',-1,current_date)) then date(date_add('day',-1,current_date)) else date(max(calendar_date)) end as period_end_date","")}
from
    edw_public.dim_esf_edw_pub_date
where  calendar_date >= '${开始时间}' and calendar_date <= '${结束时间}'
${if(粒度 == 1," and 1 = 1 ","")}
${if(粒度 == 2," and day_of_week_number in (1,7) ","")}
${if(粒度 == 3," and day_of_month_number = 1","")}
${if(粒度 == 4," and day_of_quater_number = 1","")}
${if(粒度 == 5," and day_of_year_number = 1","")}
${if(粒度 == 6," group by calendar_half_year_code ","")}


------------------------------------------------------------------------------------------------
-- 根据输入的时间范围计算期末日期
------------------------------------------------------------------------------------------------
select t1.*
from
-- 日周月季年半年不同粒度的统计数据各存为了一张表 
    edw_reports.adm_xf_edw_house_sub_project_report_00${dtype}ly_di t1--日报
join
(
-- 日
SELECT
    calendar_date
FROM
    edw_public.dim_esf_edw_pub_date
WHERE
    calendar_date BETWEEN '${bdt}' AND '${edt}'
AND '${dtype}' = '1_dai'
UNION
-- 月
SELECT
    MAX(calendar_date) AS calendar_date
FROM
    edw_public.dim_esf_edw_pub_date
WHERE
    calendar_date BETWEEN '${bdt}' AND '${edt}'
AND '${dtype}' = '2_dai'
GROUP BY
    calendar_month_number

UNION
-- 周
SELECT
    calendar_date
FROM
    edw_public.dim_esf_edw_pub_date
WHERE
    calendar_date BETWEEN '${bdt}' AND '${edt}'
AND day_of_week_number = 7
AND '${dtype}' = '3_dai'

UNION
-- 季
SELECT
    MAX(calendar_date) AS calendar_date
FROM
    edw_public.dim_esf_edw_pub_date
WHERE
    calendar_date BETWEEN '${bdt}' AND '${edt}'
AND '${dtype}' = '4_dai'
GROUP BY
    calendar_quater_code

UNION
-- 年
SELECT
    MAX(calendar_date) AS calendar_date
FROM
    edw_public.dim_esf_edw_pub_date
WHERE
    calendar_date BETWEEN '${bdt}' AND '${edt}'
AND '${dtype}' = '5_dai'
GROUP BY
    calendar_year_code

UNION
-- 半年
SELECT
    MAX(calendar_date) AS calendar_date
FROM
    edw_public.dim_esf_edw_pub_date
WHERE
    calendar_date BETWEEN '${bdt}' AND '${edt}'
AND '${dtype}' = '6_dai'
GROUP BY
    calendar_half_year_code

UNION
SELECT
    MAX(calendar_date) AS calendar_date
FROM
    edw_public.dim_esf_edw_pub_date
WHERE
    calendar_date BETWEEN '${bdt}' AND '${edt}'
ORDER BY
    calendar_date
) t2
on t1.statistic_date = t2.calendar_date
where 
statistic_date between '${bdt}' and '${edt}'
${if(len(tenant_name) == 0,"","and house_sub_project_organization_short_name = '" + tenant_name + "'")}
${if(len(status) == 0,"","and house_sub_project_cooperation_status_code = " + status)}
${if(len(tenant_type) == 0,"","and house_sub_project_organization_business_type_code= " + tenant_type)}
${if(len(project_type) == 0,"","and house_sub_project_cooperation_type_code= " + project_type)}
order by statistic_date



 

(2)Hive 计算指定日期在本周的第几天和指定日期的本周指定天数的日期

注意这里需要先明确本周的第一天到底是星期一还是星期天?dayofweek函数定义星期天是一周中的第一天,另外dayofweek在hive2.2.0才开始支持,低版本的hive不支持dayofweek函数,需要使用其他方法实现,请见我的博客Hive和sparksql中的dayofweek



-- 计算指定日期本周的第一天和最后一天
select
     day
    , dayofweek(day)                                                                    as dw1
    , date_add(day,1 - dayofweek(day))                                                  as Su_s -- 周日_start
    , date_add(day,7 - dayofweek(day))                                                  as Sa_e -- 周六_end
    , case when dayofweek(day) = 1 then 7 else dayofweek(day) - 1 end                   as dw2
    , date_add(day,1 - case when dayofweek(day) = 1 then 7 else dayofweek(day) - 1 end) as Mo_s -- 周一_start
    , date_add(day,7 - case when dayofweek(day) = 1 then 7 else dayofweek(day) - 1 end) as Su_e -- 周日_end
    , trunc(day,'YY')                                                                   as yearly_first_day
    , trunc(day,'MM')                                                                   as monthly_first_day    -- 本月1号日期
    , last_day(day)                                                                     as monthly_last_day     -- 本月最后一天日期
    , date_add(next_day(day,'MO'), -7)                                                  as weekly_first_day     -- 本周一日期
    , next_day(date_add(day, -7),'MO')                                                  as weekly_first_day     -- 本周一日期    
    , case when (7 - datediff(next_day(day,'SU'),day)) <> 0 then next_day(day,'SU') else day end as weekly_end_day      -- 本周日日期

from (
    select '2018-11-01' as day union all
    select '2018-11-02' as day union all
    select '2018-11-03' as day union all
    select '2018-11-04' as day union all
    select '2018-11-05' as day union all
    select '2018-11-06' as day union all
    select '2018-11-07' as day union all
    select '2018-11-08' as day union all
    select '2018-11-09' as day union all
    select '2018-11-10' as day union all
    select '2018-11-11' as day union all
    select '2018-11-12' as day union all
    select '2018-11-13' as day union all
    select '2018-11-14' as day union all
    select '2018-11-15' as day union all
    select '2018-11-16' as day union all
    select '2018-11-17' as day union all
    select '2018-11-18' as day union all
    select '2018-11-19' as day union all
    select '2018-11-20' as day union all
    select '2018-11-21' as day union all
    select '2018-11-22' as day union all
    select '2018-11-23' as day union all
    select '2018-11-24' as day union all
    select '2018-11-25' as day union all
    select '2018-11-26' as day union all
    select '2018-11-27' as day union all
    select '2018-11-28' as day union all
    select '2018-11-29' as day union all
    select '2018-11-30' as day union all
) t1
;



 

其他一些参考资料:

Hive 时间日期处理总结