1:计算两个时间相差多少天:hive内置函数datediff
 例子Demo:datediff('2009-07-28','2009-07-30')=2
2:也可以写自定义函数

package cdel.edu.hive.udf.demo04; 



 import java.text.ParseException; 

 import java.text.SimpleDateFormat; 

 import java.util.Calendar; 

 import java.util.Date; 



 import org.apache.hadoop.hive.ql.exec.UDF; 



 /** 

  * 计算一个时间到今天的相差多少天 

  * @author dell 

  * 

  */ 

 public class MyDateDiff extends UDF { 



public String evaluate(String str) { 

try { 

String  today = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date().getTime()); 

int days = daysBetween(str,today); 

return String.valueOf(days); 

} catch (Exception e) { 

return null; 

} 

} 



/** 

* 计算两个日期之间相差的天数 

* 

* @param smdate 

*            较小的时间 

* @param bdate 

*            较大的时间 

* @return 相差天数 

* @throws ParseException 

*/ 

public static int daysBetween(Date smdate, Date bdate) 

throws ParseException { 

SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); 

smdate = sdf.parse(sdf.format(smdate)); 

bdate = sdf.parse(sdf.format(bdate)); 

Calendar cal = Calendar.getInstance(); 

cal.setTime(smdate); 

long time1 = cal.getTimeInMillis(); 

cal.setTime(bdate); 

long time2 = cal.getTimeInMillis(); 

long between_days = (time2 - time1) / (1000 * 3600 * 24); 



return Integer.parseInt(String.valueOf(between_days)); 

} 



/** 

* 字符串的日期格式的计算 

*/ 

public static int daysBetween(String smdate, String bdate) 

throws ParseException { 

SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); 

Calendar cal = Calendar.getInstance(); 

cal.setTime(sdf.parse(smdate)); 

long time1 = cal.getTimeInMillis(); 

cal.setTime(sdf.parse(bdate)); 

long time2 = cal.getTimeInMillis(); 

long between_days = (time2 - time1) / (1000 * 3600 * 24); 



return Integer.parseInt(String.valueOf(between_days)); 

} 



public static void main(String[] args) throws Exception { 

   String str="2015-04-27"; 

       System.out.println(new MyDateDiff().evaluate(str)); 

} 

 } 





 -创建自定义函数 

 add jar /home/admin/xxg/MyDateDiff.jar; 

 create temporary function myDateDiff as 'cdel.edu.hive.udf.demo04.MyDateDiff'; 



 --hive 统计  

   

 -- 最近30 天内学员的做题量统计  

 --分网统计 

 create table  qz_count as 

 SELECT   mps.userID,mem.memberid,COUNT(distinct qpq.questionID) as  doQuesCount,mem.domain, 

 to_date(concat_ws(' ',substr(mps.createTime,1,10),substr(mps.createTime,11,length(mps.createTime))))   as daytime 

 from   tmp_qz_member_paper_score  mps    

 left join  tmp_qz_member_paper_question qpq  on  qpq.paperScoreID = mps.paperScoreID  and mps.domain = qpq.domain  

 left join order.tmp_member_info_new  mem on mem.uid = mps.userID and mem.domain = mps.domain 

 where   cast(myDateDiff(concat_ws(' ',substr(mps.createTime,1,10),'')) as int) <=30 

 group  by  mps.userID,mem.memberid ,mem.domain, 

 to_date(concat_ws(' ',substr(mps.createTime,1,10),substr(mps.createTime,11,length(mps.createTime)))) 

 having  COUNT(distinct qpq.questionID)  >0 

 order by   doQuesCount  desc  ; 



 SELECT   mps.userID,mem.memberid,COUNT(distinct qpq.questionID) as  doQuesCount,mem.domain, 

 to_date(concat_ws(' ',substr(mps.createTime,1,10),substr(mps.createTime,11,length(mps.createTime))))   as daytime 

 from   tmp_qz_member_paper_score  mps    

 left join  tmp_qz_member_paper_question qpq  on  qpq.paperScoreID = mps.paperScoreID  and mps.domain = qpq.domain  

 left join order.tmp_member_info_new  mem on mem.uid = mps.userID and mem.domain = mps.domain 

 where  mps.userID='48888922'   and  cast(myDateDiff(concat_ws(' ',substr(mps.createTime,1,10),'')) as int) <=31 

 group  by  mps.userID,mem.memberid ,mem.domain, 

 to_date(concat_ws(' ',substr(mps.createTime,1,10),substr(mps.createTime,11,length(mps.createTime)))) 

 having  COUNT(distinct qpq.questionID)  >0 

 order by   doQuesCount  desc  ; 



 SELECT   qpq.* 

 from   tmp_qz_member_paper_score  mps    

 left join  tmp_qz_member_paper_question qpq  on  qpq.paperScoreID = mps.paperScoreID  and mps.domain = qpq.domain  

 where  mps.userID='48888922'   limit 100; 



 经验验证:没有医学网做题题目信息 

 select  distinct logdata from tmp_qz_member_paper_question where domain='med' limit 10 

 验证数据 

 select * from qz_count  where userid=48888922 order by   doQuesCount  desc limit 100; 



 select * from qz_count  where doQuesCount>=500 order by   doQuesCount  desc ; 



 -- 不分网统计 

 create table  qz_count as 

 SELECT   mps.userID,mem.memberid,COUNT(distinct qpq.questionID) as  doQuesCount, 

 to_date(concat_ws(' ',substr(mps.createTime,1,10),substr(mps.createTime,11,length(mps.createTime))))   as daytime 

 from   tmp_qz_member_paper_score  mps    

 left join  tmp_qz_member_paper_question qpq  on  qpq.paperScoreID = mps.paperScoreID  and mps.domain = qpq.domain  

 left join order.tmp_member_info_new  mem on mem.uid = mps.userID and mem.domain = mps.domain 

 where   cast(myDateDiff(concat_ws(' ',substr(mps.createTime,1,10),'')) as int) <=30 

 group  by  mps.userID,mem.memberid ,  

 to_date(concat_ws(' ',substr(mps.createTime,1,10),substr(mps.createTime,11,length(mps.createTime)))) 

 having  COUNT(distinct qpq.questionID)  >0 

 order by   doQuesCount  desc  ; 





 -- 统计学员每天做多少套试卷 

 -- 分网统计 

 create table  paper_count as 

 select   mps.userID,mem.Memberid,mem.domain, COUNT(distinct mps.paperViewID) AS doPaperCount , 
   

 to_date(concat_ws(' ',substr(mps.createTime,1,10),substr(mps.createTime,11,length(mps.createTime))))   as daytime 

 from tmp_qz_member_paper_score mps 

 left join  order.tmp_member_info_new  mem  on mem.uID = mps.userID  and mem.domain = mps.domain 

 where  cast(myDateDiff(concat_ws(' ',substr(mps.createTime,1,10),'')) as int) <=31 

 group  by  mps.userID,mem.Memberid, mem.domain, 

 to_date(concat_ws(' ',substr(mps.createTime,1,10),substr(mps.createTime,11,length(mps.createTime)))) 

 having  COUNT(distinct mps.paperViewID) >0 

 order by   doPaperCount   desc; 



  -- 不分网统计 

 select   mps.userID,mem.Memberid,COUNT(distinct mps.paperViewID) AS doPaperCount , 
   

 to_date(concat_ws(' ',substr(mps.createTime,1,10),substr(mps.createTime,11,length(mps.createTime))))   as daytime 

 from tmp_qz_member_paper_score mps 

 left join  order.tmp_member_info_new  mem  on mem.uID = mps.userID  and mem.domain = mps.domain 

 where   mps.userID=48888922 

 group  by mps.userID,mem.Memberid, 

 to_date(concat_ws(' ',substr(mps.createTime,1,10),substr(mps.createTime,11,length(mps.createTime)))) 

 having  COUNT(distinct mps.paperViewID) >0 

 order by   doPaperCount   desc;



测试:

select * from  paper_count  order by   doPaperCount   desc  limit 100; 



 select   mps.userID,  mps.paperViewID  from tmp_qz_member_paper_score mps  where   mps.userID=48888922; 







 -- 导出本地文件中 

 insert overwrite local directory '/home/admin/xxg/qz/med/' 

 row format delimited 

 fields terminated by '\t' 

 select  *  from qz_count; 







 -- 最近30天内学员的提问数量 

 -- 针对主题的提问统计 

   

 SELECT ft.userID,ft.userName ,  COUNT(ft.userID)  as num FROM  FAQ_TOPIC  ft 

 group by FT.userid ,ft.userName 





 SELECT ft.userID,ft.userName ,  COUNT(af.USERID)  as num  

 FROM  FAQ_TOPIC  ft 

 inner JOIN   faq   af on ft.topicID = af.topicID and af.istopic = 1 

 group by ft.userID,ft.userName 



 create table  faq_count as 

 SELECT   ft.userID, mem.memberid, COUNT(distinct ft.topicID) as num, mem.domain, 

 to_date(concat_ws(' ',substr(ft.createTime,1,10),substr(ft.createTime,11,length(ft.createTime))))   as daytime 

 FROM  tmp_faq_topic   ft   

 left join order.tmp_member_info_new  mem on mem.uid = ft.userID   and mem.domain = ft.domain    

 where     cast(myDateDiff(concat_ws(' ',substr(ft.createTime,1,10),'')) as int) <=30 

 group by ft.userID, mem.memberid,mem.domain, 

 to_date(concat_ws(' ',substr(ft.createTime,1,10),substr(ft.createTime,11,length(ft.createTime)))) 

 having  COUNT(distinct ft.topicID)  >0 

 order by num desc  ; 





 --针对主题和继续提问的 --- 就是如果一个主题,两个继续的话 你统计的sum =3 



 create table  faq_count2 as 

 SELECT   ft.userID, mem.memberid, COUNT(distinct af.faqid) as num, mem.domain, 

 to_date(concat_ws(' ',substr(af.createTime,1,10),substr(af.createTime,11,length(af.createTime))))   as daytime 

 FROM   tmp_faq af   

 left join  tmp_faq_topic   ft   on ft.topicID = af.topicID  and af.domain = ft.domain   

 left join order.tmp_member_info_new  mem on mem.uid = ft.userID   and mem.domain = af.domain    

 where     cast(myDateDiff(concat_ws(' ',substr(af.createTime,1,10),'')) as int) <=30 

 group by ft.userID, mem.memberid, mem.domain, 

 to_date(concat_ws(' ',substr(af.createTime,1,10),substr(af.createTime,11,length(af.createTime)))) 

 having  COUNT(distinct af.faqid)  >0 

 order by num desc   limit 10; 





 -- 导出本地文件中 

 insert overwrite local directory '/home/admin/xxg/faq/' 

 row format delimited 

 fields terminated by '\t' 

 select  *  from faq_count; 



 -- 删除表信息 

 drop table qz_count; 

 drop table faq_count; 

 drop table paper_count; 



 -- 统计  

 每天做题 >=3000道以上学员的个数 

 每天做题 >=2500  and <3000 道以上的学员个数 

 每天做题 >=2000  and <2500 道以上的学员个数 

 每天做题 >=1500 and <2000 道以上的学员个数 

 每天做题 >=1000 and <1500 道以上的学员个数 

 每天做题 >=500 and <1000 道以上的学员个数 

 每天做题 >=100 and <500 道以上的学员个数 

 每天做题 >=50 and <100 道以上的学员个数 

 每天做题 >0   and <50  道以上的学员个数 



 -- 根据学员分组统计,学员做题数量统计 

 select userid,memberid,domain, 

 SUM(case when doquescount=0    then 1 else 0 end) as n0,   

 SUM(case when doquescount>0  and doquescount<50  then 1 else 0 end) as n1,   

 SUM(case when doquescount>=50  and doquescount<100  then 1 else 0 end) as n2,  

 SUM(case when doquescount>=100  and doquescount<500  then 1 else 0 end) as n3,  

 SUM(case when doquescount>=500  and doquescount<1000  then 1 else 0 end) as n4, 

 SUM(case when doquescount>=1000  and doquescount<1500  then 1 else 0 end) as n5, 

 SUM(case when doquescount>=1500  and doquescount<2000  then 1 else 0 end) as n6, 

 SUM(case when doquescount>=2000  and doquescount<2500  then 1 else 0 end) as n7, 

 SUM(case when doquescount>=2500  and doquescount<3000  then 1 else 0 end) as n8, 

 SUM(case when doquescount>=3000    then 1 else 0 end) as n9    

 from  qz_count  

 group by  userid,memberid, domain limit 100; 



 -- 根据日期分组统计,学员做题量统计 

 select  daytime, 

 SUM(case when doquescount=0    then 1 else 0 end) as n0,   

 SUM(case when doquescount>=1  and doquescount<50  then 1 else 0 end) as n1,   

 SUM(case when doquescount>=50  and doquescount<100  then 1 else 0 end) as n2,  

 SUM(case when doquescount>=100  and doquescount<500  then 1 else 0 end) as n3,  

 SUM(case when doquescount>=500  and doquescount<1000  then 1 else 0 end) as n4, 

 SUM(case when doquescount>=1000  and doquescount<1500  then 1 else 0 end) as n5, 

 SUM(case when doquescount>=1500  and doquescount<2000  then 1 else 0 end) as n6, 

 SUM(case when doquescount>=2000  and doquescount<2500  then 1 else 0 end) as n7, 

 SUM(case when doquescount>=2500  and doquescount<3000  then 1 else 0 end) as n8, 

 SUM(case when doquescount>=3000    then 1 else 0 end) as n9  

 from  qz_count  

 group by   daytime; 



 测试 

 select  count(distinct userid) 

 from  qz_count   

 where  doquescount>=2500  and doquescount<3000; 



 select  * 

 from  qz_count   

 where  doquescount>=2500  and doquescount<3000; 



 select  count(distinct userid) 

 from  qz_count   

 where  doquescount>=3000 ; 

 --  

 -- 根据日期统计每天学员的所做试卷量 

 select  daytime, 

 SUM(case when dopapercount=0  then 1 else 0 end) as n0,   

 SUM(case when dopapercount>=1  and dopapercount<5  then 1 else 0 end) as n1,   

 SUM(case when dopapercount>=5  and dopapercount<10  then 1 else 0 end) as n2,  

 SUM(case when dopapercount>=10  and dopapercount<50  then 1 else 0 end) as n3,  

 SUM(case when dopapercount>=50  and dopapercount<100  then 1 else 0 end) as n4, 

 SUM(case when dopapercount>=100  and dopapercount<150  then 1 else 0 end) as n5, 

 SUM(case when dopapercount>=150  and dopapercount<200  then 1 else 0 end) as n6, 

 SUM(case when dopapercount>=200  and dopapercount<250  then 1 else 0 end) as n7, 

 SUM(case when dopapercount>=250  and dopapercount<300  then 1 else 0 end) as n8, 

 SUM(case when dopapercount>=300    then 1 else 0 end) as n9  

 from  paper_count  

 group by   daytime  limit 100; 



 测试检验: 

 select *  from  paper_count  order by dopapercount desc limit 100; 

 select  count(*) from paper_count  where   dopapercount>=10  and dopapercount<50   and daytime='2015-04-26'   limit 100; 

 select  count(distinct userid) from paper_count  where   dopapercount>=10  and dopapercount<50   and daytime='2015-04-26'   limit 100; 





   



 -- 根据日期统计每天学员在不同区间内答疑数量 

 每天答疑提问主题 >=60  道以上学员的个数 

 每天答疑提问主题 >=55 and < 60道以上学员的个数 

 每天答疑提问主题 >=50 and < 55道以上学员的个数 

 每天答疑提问主题 >=45  and <50 道以上的学员个数 

 每天答疑提问主题 >=40  and <45 道以上的学员个数 

 每天答疑提问主题 >=35 and <40 道以上的学员个数 

 每天答疑提问主题 >=30 and <35 道以上的学员个数 

 每天答疑提问主题 >=25 and <30 道以上的学员个数 

 每天答疑提问主题 >=20 and <25 道以上的学员个数 

 每天答疑提问主题 >=15 and <20 道以上的学员个数 

 每天答疑提问主题 >=10 and <15 道以上的学员个数 

 每天答疑提问主题 >=5   and <10  道以上的学员个数 

 每天答疑提问主题 >=1   and <5  道以上的学员个数 

 每天答疑提问主题 =0      道以上的学员个数 



 select  daytime, 

 SUM(case when num=0  then 1 else 0 end) as n0,   

 SUM(case when num>=1  and num<5  then 1 else 0 end) as n1,   

 SUM(case when num>=5  and num<10  then 1 else 0 end) as n2,  

 SUM(case when num>=10  and num<15  then 1 else 0 end) as n3,  

 SUM(case when num>=15  and num<20  then 1 else 0 end) as n4, 

 SUM(case when num>=20  and num<25  then 1 else 0 end) as n5, 

 SUM(case when num>=25  and num<30  then 1 else 0 end) as n6, 

 SUM(case when num>=30  and num<35  then 1 else 0 end) as n7, 

 SUM(case when num>=35  and num<40  then 1 else 0 end) as n8, 

 SUM(case when num>=40  and num<45  then 1 else 0 end) as n9, 

 SUM(case when num>=45  and num<50  then 1 else 0 end) as n10, 

 SUM(case when num>=50  and num<55  then 1 else 0 end) as n11, 

 SUM(case when num>=55  and num<60  then 1 else 0 end) as n12, 

 SUM(case when num>=60    then 1 else 0 end) as n13  

 from  faq_count  

 group by   daytime  limit 100;