原数据如下(不知道怎么在CSDN插入excel文件)
小知识点①:MySQL运算顺序为
from–> where–> group by --> having --> select --> order by
小知识点②:窗口函数模板为
函数名(字段名) over(partition by <要分列的组> order by <要排序的列> rows between <数据范围>)
1、导入数据、处理数据
desc user_trade;
alter table user_trade modify column user_name varchar(20) comment '用户名';
alter table user_trade modify column piece int comment '购买数量';
alter table user_trade modify column price double comment '价格';
alter table user_trade modify column pay_amount double comment '支付金额';
alter table user_trade modify column goods_category varchar(20) comment '商品品类';
alter table user_trade modify column pay_time date comment '日期';
2、集合类窗口函数
# 普通场景下的聚合函数是将多条记录聚合为一条(多到一);
# 窗口函数是每条记录都会执行,有几条记录执行完还是几条(多到多)。
①sum()over() 累计求和
#需求1:查询出2019年每月的支付总额和当年累积支付总额
#tips:此处over()中如果不加order by,则会生成相同的12个月的sum值
select a.mon
,a.sum_month_pay
,sum(a.sum_month_pay) over(order by a.mon) as sum_pay from
(
select month(pay_time) as mon
,round(sum(pay_amount),0) as sum_month_pay
from user_trade
where year(pay_time) = '2019'
group by month(pay_time)
) as a;
#需求2:查询出2018-2019年每月的支付总额和当年累积支付总额
select a.year
,a.month
,a.sum_month_pay
,sum(a.sum_month_pay) over(partition by a.year order by a.month) as sum_pay from
(
select year(pay_time) as year
,month(pay_time) as month
,round(sum(pay_amount),0) as sum_month_pay
from user_trade
where year(pay_time) = '2019'
or year(pay_time) = '2018'
group by year,month
) a ;
②avg()over() 移动平均
#需求3: 查询出2019年每个月的近三月移动平均支付金额
select a.month
,a.sum_month_pay
,round(avg(a.sum_month_pay) over(order by a.month rows between 2 preceding and current row),0) as avg_pay from
(
select month(pay_time) as month
,round(sum(pay_amount),0) as sum_month_pay
from user_trade
where year(pay_time) = '2019'
group by month(pay_time)
) as a;
③max()/min() over() 最大/最小值
#需求4: 查询出每四个月的最大月总支付金额
select a.month
,a.sum_month_pay
,max(a.sum_month_pay) over(order by month rows between 3 preceding and current row) as max_pay_4 from
(
select substring(pay_time,1,7) as month
,round(sum(pay_amount),0) as sum_month_pay
from user_trade
group by month
) a;
3、排序类窗口函数
①row_number()、rank()、dense_rank()
row_number:每一行记录生成一个序号,依次排序且不会重复。 1234...
rank:跳跃排序,生成的序号有可能不连续。1134..
dense_rank:在生成序号时是连续的。1123...
#需求5: 2020年1月,购买商品品类数的用户排名
select user_name
,count(distinct(goods_category)) as goods_count
,row_number() over(order by count(distinct(goods_category))) as rank1
,rank() over(order by count(distinct(goods_category))) as rank2
,dense_rank() over(order by count(distinct(goods_category))) as rank3
from user_trade
where substring(pay_time,1,7) = '2020-01'
group by user_name
②ntile(n) over()
ntile(n)用于将分组数据按照顺序切分成n片,返回当前切片值,不支持rows between
#需求6: 查询出将2020年2月的支付用户,按照支付金额分成5组后的结果
select user_name
,sum(pay_amount) as sum_pay_2
,ntile(5) over(order by sum(pay_amount) desc) as level
from user_trade
where substring(pay_time,1,7) = '2020-02'
group by user_name
#需求7: 查询出2020年支付金额排名前30%的所有用户
#tips:having不能放进窗口函数运算里,得从筛选结果表中进一步筛选
select a.user_name
,a.sum_pay_20
,a.level from
(
select user_name
,round(sum(pay_amount),0) as sum_pay_20
,ntile(10) over(order by sum(pay_amount) desc) as level
from user_trade
where year(pay_time) = '2020'
group by user_name
) a
having level in (1,2,3)
4、偏移分析函数
①lag() over() 向上偏移 & lead() over() 向下偏移
lag(exp_str,offset,defval)
exp_str:字段名
offset:偏移量
defval:默认值。当向上偏移了offset行已经超出了表的范围时,lag()函数将defval这个参数值作为函数的返回值,若没有指定默认值,则返回NULL。
#需求8: 查询出支付时间间隔超过100天的用户数
select distinct(a.user_name)
from
(
select user_name
,pay_time
,lag(pay_time) over(partition by user_name order by pay_time) as lg
from user_trade
) a
where datediff(a.pay_time,a.lg) > 100;
#需求9: 查询出每年支付时间间隔最长的用户
select b.years
,b.user_name
,b.pay_days
from
(
select a.years
,a.user_name
,datediff(a.pay_time,a.lg) as pay_days
,rank() over(partition by a.years order by datediff(a.pay_time,a.lg) desc) as rank1
from
(
select year(pay_time) as years
,user_name
,pay_time
,lag(pay_time) over(partition by user_name,year(pay_time) order by pay_time) as lg
from user_trade
) as a
where a.lg is not null
) as b
where b.rank1 =1