insert overwrite table loan.app_user_personas_label_a select user_id , , , from (select user_id , from loan.dws_user_base_info_d where dt='${dt}' )user_base leftjoin (select user_id , from loan.dws_loan_user_base_info_d where dt='${dt}' )user_loan on user_base.user_id=user_loan.user_id leftjoin (select user_id , from loan.dws_decision_base_info_d where dt='${dt}' )decision on user_base.user_id=decision.user_id leftjoin (select user_id , from event.dws_user_etk_event_info_d where dt='${dt}' )etk on user_base.user_id=etk.user_id
with tag_1_6 as ( select user.user_id ,user_create_time_date ,bind_mobile_succ_time_date ,first_credit_apply_time_date ,first_credit_apply_succ_time_date ,first_loan_apply_time_date ,first_loan_succ_time_date from (select user_id ,to_date(user_create_time) as user_create_time_date from loan.dws_user_base_info_d where dt='${dt}' )user leftjoin (select user_id ,to_date(bind_mobile_succ_time) as bind_mobile_succ_time_date ,to_date(first_credit_apply_time) as first_credit_apply_time_date ,to_date(first_credit_apply_succ_time) as first_credit_apply_succ_time_date ,to_date(first_loan_apply_time) as first_loan_apply_time_date ,to_date(first_loan_succ_time) as first_loan_succ_time_date from loan.dws_loan_user_base_info_d where dt='${dt}' )loan on user.user_id= loan.user_id ) with tag_7 as( select user_id ,apply_time ,7as tag from (select user_id ,min(settle_time) as min_settle_time from loan.dwd_repay_plan_d where dt='${dt}' groupby user_id )settle_user leftjoin (select user_id ,apply_time from loan.dwd_loan_info_core_d where dt='${dt}' )loan_info on settle_user.user_id=loan_info.user_id wherecasewhen min_settle_time<apply_time then7 end=7 ) insert overwrite table loan.dws_user_market_level_d partition(dt='${dt}') select user_id ,start_time_date ,end_time_date ,tag ,casewhen tag =1then'新增uv' when tag =2then'未注册' when tag =3then'注册未申请授信' when tag =4then'申请授信未通过' when tag =5then'授信通过未申请借款' when tag =6then'申请借款未通过' when tag =7then'活跃老客' when tag =8then'沉默老客' end tag_detail from (--tag=1 select user_id ,user_create_time_date as start_time_date ,user_create_time_date as end_time_date ,1as tag from tag_1_6 unionall --tag=2-6 select time_queue.user_id ,time_queue.start_time ,time_queue.end_time ,casewhen first_loan_apply_time_date>=time_queue.start_time and
--这里需要倒排,因为满足6的话一定满足5,因为是有顺序的,但是我们显示的是6,所以需要这样倒排
first_loan_apply_time_date <end_time --借款未通过 then6 when first_credit_apply_succ_time_date>=time_queue.start_time and first_credit_apply_succ_time_date <end_time --授信未借款 then5 when first_credit_apply_time_date>=time_queue.start_time and first_credit_apply_time_date <end_time --授信未通过 then4 when bind_mobile_succ_time_date>=time_queue.start_time and bind_mobile_succ_time_date <end_time --注册未授信 then3 when date_add(user_create_time_date,1)>=time_queue.start_time and date_add(user_create_time_date,1)< end_time --未注册 then2 end tag from (select user_id ,change_time start_time ,lead(change_time,1,'2999-12-31') over(partitionby user_id orderby change_time asc) end_time from (select user_id ,change_time1 change_time from (select user_id ,concat_ws(',',user_create_time_date,bind_mobile_succ_time_date,first_credit_apply_time_date,first_credit_apply_succ_time_date,first_loan_apply_time_date,first_loan_succ_time_date) as change_time from (select user_id ,user_create_time_date ,bind_mobile_succ_time_date ,first_credit_apply_time_date ,first_credit_apply_succ_time_date ,first_loan_apply_time_date ,first_loan_succ_time_date from tag_1_6 ) a )aa lateralviewouter explode(split(change_time,',')) --这个其实是把一行数据拆分成多行,先把一行数据拼接,然后再根据逗号拆分成多行,就变成了一行数据变成多行 change_time as change_time1 ) time_queue1 groupby user_id ,change_time ) time_queue unionall select user_id ,min(apply_time) as start_time_date ,lead(apply_time,1,'2999-12-31')over(partitionby user_id orderby apply_time) as end_time_date ,tag from ( select user_id ,apply_time ,row_number()over(partitionby user_id orderby apply_time) as rn_1 ,row_number()over(partitionby user_id,tag orderby apply_time) as rn_2 from (select user_id ,apply_time ,7as tag from tag_7 unionall select user_id ,casewhen datediff(lag(apply_time,1)over(partitionby user_id orderby apply_time),apply_time)>30 then date_add(lag(apply_time,1)over(partitionby user_id orderby apply_time),30) endas apply_time --如果两次申请时间间隔超过30天则增加一个时间节点 作为tag=8的开始节点 ,8as tag from tag_7 wherecasewhen datediff(lag(apply_time,1)over(partitionby user_id orderby apply_time),apply_time)>30 then date_add(lag(apply_time,1)over(partitionby user_id orderby apply_time),30) endisnotnull ) a )tag7_8 groupby user_id ,rn_1-rn_2 ,tag )a
with tag_1_6 as ( select user.user_id ,user_create_time_date --用户创建时间 ,bind_mobile_succ_time_date --用户手机号创建时间 ,first_credit_apply_time_date --首次授信申请时间 ,first_credit_apply_succ_time_date --首次授信成功时间 ,first_loan_apply_time_date --首次借款申请时间 ,first_loan_succ_time_date --首次借款成功时间 from (select user_id ,to_date(user_create_time) as user_create_time_date from loan.dws_user_base_info_d where dt='${dt}' )user leftjoin (select user_id ,to_date(bind_mobile_succ_time) as bind_mobile_succ_time_date ,to_date(first_credit_apply_time) as first_credit_apply_time_date ,to_date(first_credit_apply_succ_time) as first_credit_apply_succ_time_date ,to_date(first_loan_apply_time) as first_loan_apply_time_date ,to_date(first_loan_succ_time) as first_loan_succ_time_date from loan.dws_loan_user_base_info_d where dt='${dt}' )loan on user.user_id= loan.user_id )
with tag_7 as( select user_id ,apply_time ,7as tag from (select user_id ,min(settle_time) as min_settle_time --最小结清时间 from loan.dwd_repay_plan_d where dt='${dt}' groupby user_iddsa )settle_user leftjoin (select user_id ,apply_time --申请时间 from loan.dwd_loan_info_core_d where dt='${dt}' )loan_info on settle_user.user_id=loan_info.user_idd wherecasewhen min_settle_time<apply_time --申请时间大于最小结清时间,说明是老客,之前申请过 then7 end=7 )
,casewhen first_loan_apply_time_date>=time_queue.start_time and first_loan_apply_time_date <end_time --借款未通过 then6 when first_credit_apply_succ_time_date>=time_queue.start_time and first_credit_apply_succ_time_date <end_time --授信未借款 then5 when first_credit_apply_time_date>=time_queue.start_time and first_credit_apply_time_date <end_time --授信未通过 then4 when bind_mobile_succ_time_date>=time_queue.start_time and bind_mobile_succ_time_date <end_time --注册未授信 then3 when date_add(user_create_time_date,1)>=time_queue.start_time and date_add(user_create_time_date,1)< end_time --未注册 这里是用户在一段时间内没有完成注册就是未注册的状态,这里选择了第二天,如果一个账号在第二天还没有注册,这个账号就会标记为未注册 then2 end tag from (select user_id ,change_time start_time ,lead(change_time,1,'2999-12-31') over(partitionby user_id orderby change_time asc) end_time from (select user_id ,change_time1 change_time from (select user_id ,concat_ws(',',user_create_time_date,bind_mobile_succ_time_date,first_credit_apply_time_date,first_credit_apply_succ_time_date,first_loan_apply_time_date,first_loan_succ_time_date) as change_time from (select user_id ,user_create_time_date ,bind_mobile_succ_time_date ,first_credit_apply_time_date ,first_credit_apply_succ_time_date ,first_loan_apply_time_date ,first_loan_succ_time_date from tag_1_6 ) a )aa lateralviewouter explode(split(change_time,',')) --这个其实是把一行数据拆分成多行,先把一行数据拼接,然后再根据逗号拆分成多行,就变成了一行数据变成多行 change_time as change_time1 ) time_queue1 groupby user_id ,change_time ) time_queue
这里有点多看着有些困难,我们拆开看
首先
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
select user_id ,change_time1 change_time from (select user_id ,concat_ws(',',user_create_time_date,bind_mobile_succ_time_date,first_credit_apply_time_date,first_credit_apply_succ_time_date,first_loan_apply_time_date,first_loan_succ_time_date) as change_time from (select user_id ,user_create_time_date ,bind_mobile_succ_time_date ,first_credit_apply_time_date ,first_credit_apply_succ_time_date ,first_loan_apply_time_date ,first_loan_succ_time_date from tag_1_6 ) a )aa lateralviewouter explode(split(change_time,',')) --这个其实是把一行数据拆分成多行,先把一行数据拼接,然后再根据逗号拆分成多行,就变成了一行数据变成多行 change_time as change_time1
这里的内容是首先我们获取到前面我们准备好的各个时间,然后
先通过concat_ws(',',user_create_time_date,bind_mobile_succ_time_date,first_credit_apply_time_date,first_credit_apply_succ_time_date,first_loan_apply_time_date,first_loan_succ_time_date) as change_time先将所有的这些行先拼接到一起
,casewhen first_loan_apply_time_date>=time_queue.start_time and first_loan_apply_time_date <end_time --借款未通过 then6 when first_credit_apply_succ_time_date>=time_queue.start_time and first_credit_apply_succ_time_date <end_time --授信未借款 then5 when first_credit_apply_time_date>=time_queue.start_time and first_credit_apply_time_date <end_time --授信未通过 then4 when bind_mobile_succ_time_date>=time_queue.start_time and bind_mobile_succ_time_date <end_time --注册未授信 then3 when date_add(user_create_time_date,1)>=time_queue.start_time and date_add(user_create_time_date,1)< end_time --未注册 这里是用户在一段时间内没有完成注册就是未注册的状态,这里选择了第二天,如果一个账号在第二天还没有注册,这个账号就会标记为未注册 then2 end tag -- from ( -- select user_id -- ,change_time start_time -- ,lead(change_time,1,'2999-12-31') over(partition by user_id order by change_time asc) end_time -- from (select user_id -- ,change_time1 change_time -- from (select user_id -- ,concat_ws(',',user_create_time_date,bind_mobile_succ_time_date,first_credit_apply_time_date,first_credit_apply_succ_time_date,first_loan_apply_time_date,first_loan_succ_time_date) as change_time -- from (select user_id -- ,user_create_time_date -- ,bind_mobile_succ_time_date -- ,first_credit_apply_time_date -- ,first_credit_apply_succ_time_date -- ,first_loan_apply_time_date -- ,first_loan_succ_time_date -- from tag_1_6 -- ) a -- )aa lateral view outer explode(split(change_time,',')) --这个其实是把一行数据拆分成多行,先把一行数据拼接,然后再根据逗号拆分成多行,就变成了一行数据变成多行 -- change_time as change_time1 -- ) time_queue1 -- group by user_id -- ,change_time -- ) time_queue
select user_id, casewhen datediff(lag(apply_time, 1) over (partitionby user_id orderby apply_time), apply_time) >30 then date_add(lag(apply_time, 1) over (partitionby user_id orderby apply_time), 30) endas apply_time, 8as tag from tag_7
select user_id, apply_time, row_number() over (partitionby user_id orderby apply_time) as rn_1, row_number() over (partitionby user_id, tag orderby apply_time) as rn_2 from ...
select user_id, min(apply_time) as start_time_date, lead(apply_time, 1, '2999-12-31') over (partitionby user_id orderby apply_time) as end_time_date, tag from ... groupby user_id, rn_1 - rn_2, tag
对每组记录(rn_1 - rn_2 和 tag):
min(apply_time):获取组内最小的申请时间作为时间段的开始时间。
lead(apply_time, 1):获取下一条记录的申请时间作为时间段的结束时间。
如果没有下一条记录,默认结束时间为 ‘2999-12-31’。
最后的结果是
user_id
start_time_date
end_time_date
tag
1
2024-01-01
2024-01-20
7
1
2024-01-20
2024-03-01
7
1
2024-03-01
2024-03-31
7
1
2024-03-31
2024-05-01
8
1
2024-05-01
2999-12-31
7
2
2024-02-15
2024-04-01
7
2
2024-04-01
2999-12-31
7
在所有的最后,我们再把这些内容放到外面的拉链表中
1 2 3 4 5 6 7 8 9 10 11 12 13 14
insert overwrite table loan.dws_user_market_level_d partition(dt='${dt}') select user_id ,start_time_date ,end_time_date ,tag ,casewhen tag =1then'新增uv' when tag =2then'未注册' when tag =3then'注册未申请授信' when tag =4then'申请授信未通过' when tag =5then'授信通过未申请借款' when tag =6then'申请借款未通过' when tag =7then'活跃老客' when tag =8then'沉默老客' end tag_detail