2、蚂蚁森林低碳用户排名分析
问题:查询user_low_carbon表中每日流水记录,条件为:
用户在2017年,连续三天(或以上)的天数里,
每天减少碳排放(low_carbon)都超过100g的用户低碳流水。
需要查询返回满足以上条件的user_low_carbon表中的记录流水。
例如用户u_002符合条件的记录如下,因为2017/1/2~2017/1/5连续四天的碳排放量之和都大于等于100g:
seq(key) user_id data_dt low_carbon
xxxxx10 u_002 2017/1/2 150
xxxxx11 u_002 2017/1/2 70
xxxxx12 u_002 2017/1/3 30
xxxxx13 u_002 2017/1/3 80
xxxxx14 u_002 2017/1/4 150
xxxxx14 u_002 2017/1/5 101
解答:
前提
1,每天总减排大于100g
2,是每个用户每天
3,连续超过三天
- 第一步:统计每个用户每天的总减排
from user_low_carbon
select user_id , data_dt , sum(low_carbon)
group by user_id,data_dt
- 第二步:过滤大于100g的天数
from user_low_carbon
select
user_id ,
regexp_replace(data_dt,'/','-') data_dt,
sum(low_carbon) sum_low_carbon
group by user_id,data_dt
having sum_low_carbon>=100
- 第三步:以达标日期排序,并按用户分区
from(
from user_low_carbon
select
user_id ,
regexp_replace(data_dt,'/','-') data_dt,
sum(low_carbon) sum_low_carbon
group by user_id,data_dt
having sum_low_carbon>=100
)t1
select
t1.user_id ,
t1.data_dt,
t1.sum_low_carbon ,
row_number()over(partition by t1.user_id order by t1.data_dt) rk
- 第四步:根据等差数列规律(或者说,一列连续的日期,每行的日期减去它所对应的序号,都是相等的)
from (
from(
from user_low_carbon
select
user_id ,
regexp_replace(data_dt,'/','-') data_dt,
sum(low_carbon) sum_low_carbon
group by user_id,data_dt
having sum_low_carbon>=100
)t1
select
t1.user_id ,
t1.data_dt,
t1.sum_low_carbon ,
row_number()over(partition by t1.user_id order by t1.data_dt) rk
)t2
select
t2.user_id ,
t2.data_dt,
date_sub(t2.data_dt,rk) sub_date
- 第五步:统计每个用户分组中日期相同的数量
from(
from (
from(
from user_low_carbon
select
user_id ,
regexp_replace(data_dt,'/','-') data_dt,
sum(low_carbon) sum_low_carbon
group by user_id,data_dt
having sum_low_carbon>=100
)t1
select
t1.user_id ,
t1.data_dt,
t1.sum_low_carbon ,
row_number()over(partition by t1.user_id order by t1.data_dt) rk
)t2
select
t2.user_id ,
t2.data_dt,
date_sub(t2.data_dt,rk) sub_date
) t3
select
t3.user_id ,
t3.data_dt,
count(1)over(partition by t3.user_id,t3.sub_date) count_date
- 第六步:提取大于3的用户
from(
from(
from (
from(
from user_low_carbon
select
user_id ,
regexp_replace(data_dt,'/','-') data_dt,
sum(low_carbon) sum_low_carbon
group by user_id,data_dt
having sum_low_carbon>=100
)t1
select
t1.user_id ,
t1.data_dt,
t1.sum_low_carbon ,
row_number()over(partition by t1.user_id order by t1.data_dt) rk
)t2
select
t2.user_id ,
t2.data_dt,
date_sub(t2.data_dt,rk) sub_date
) t3
select
t3.user_id ,
t3.data_dt,
count(1)over(partition by t3.user_id,t3.sub_date) count_date
)t4
select t4.user_id ,t4.data_dt
where t4.count_date>3
- 第七步:关联查询达标用户达标当天的流水
from(
from(
from(
from (
from(
from user_low_carbon
select
user_id ,
regexp_replace(data_dt,'/','-') data_dt,
sum(low_carbon) sum_low_carbon
group by user_id,data_dt
having sum_low_carbon>=100
)t1
select
t1.user_id ,
t1.data_dt,
t1.sum_low_carbon ,
row_number()over(partition by t1.user_id order by t1.data_dt) rk
)t2
select
t2.user_id ,
t2.data_dt,
date_sub(t2.data_dt,rk) sub_date
) t3
select
t3.user_id ,
t3.data_dt,
count(1)over(partition by t3.user_id,t3.sub_date) count_date
)t4
select t4.user_id ,t4.data_dt
where t4.count_date>3
)t5
inner join (
from user_low_carbon
select
user_id ,
regexp_replace(data_dt,'/','-') data_dt,
low_carbon
)t6 on t6.user_id=t5.user_id and t5.data_dt=t6.data_dt
select
t6.user_id ,
t6.data_dt,
t6.low_carbon
order by t6.user_id ,
t6.data_dt
本文地址:https://blog.csdn.net/sinat_36349318/article/details/110672558