date_trunc
함수 활용해 각 event_date가 속한 주의 첫 번째 일자 생성하기(변수명 : week_date
)first_week
)date_diff
함수 활용(변수명 : week_diff
)count(distinct user_pseudo_id) as user_cnt
계산with base as(
select
distinct
event_date,
user_pseudo_id,
date_trunc(event_date,week(monday)) as date_week
from
`advanced.app_log`
),
first_week_add as(
select
distinct
*,
first_value(date_week) over(partition by user_pseudo_id order by date_week)
as first_week
from
base
order by user_pseudo_id, event_date
),
date_diff_add as(
select
*,
date_diff(date_week, first_week, week) as week_diff
from
first_week_add
)
select
*,
round(safe_divide(user_cnt,first_cnt),3) as retention_ratio
from
(select
*,
first_value(user_cnt) over(order by week_diff) as first_cnt
from
(select
week_diff,
count(distinct user_pseudo_id) as user_cnt
from
date_diff_add
group by all))
order by week_diff;
user group 분리 전, 데이터 일부 확인
event_date
의 최소값, 최대값 확인
select
min(event_date) as min_date,--2022-08-01
max(event_date) as max_date -- 2023-01-20
from
`advanced.app_log`;
user_id
가 null인 case 확인
user_id
가 null인 case는 모두 user_id
가 null인 case는 모두 login 하기 전 상태임을 확인, (event_name
=click_login)select
*
from
`advanced.app_log`
where
user_id is null;
스스로 생각한 user group 분리 기준
CREATE OR REPLACE table `advanced.user_seperate` as
with app_date as(
select
distinct
user_pseudo_id,
min(date_trunc(event_date,week(monday))) as first_view,
max(date_trunc(event_date,week(monday))) as last_view,
date_trunc(cast('2023-01-20'as date), week(monday)) as last_record_date
from
`advanced.app_log`
group by user_pseudo_id),
user_table1 as (
select
distinct user_pseudo_id, user_group
from(
select
*,
date_diff(last_record_date,last_view,month) as last_record,
case when date_diff(last_record_date,last_view,month)>=3 then 'dormant_user'
when date_diff(last_record_date,first_view,week)=0 then 'new_user'
else NULL end as user_group
from
app_date)
where user_group is not null),
app_view as (
select
distinct
user_pseudo_id,
date_trunc(event_date,week(monday)) as event_date
from
`advanced.app_log`
where
user_pseudo_id not in (select distinct user_pseudo_id from user_table1)
),
app_date_diff as(
select
*,
date_diff(event_date,lag_event_date,week) as diff_date,
case when date_diff(event_date,lag_event_date,week)>=4 then 'resurrected_user'
else NULL end as user_group
from
(select
*,
lag(event_date) over(partition by user_pseudo_id order by event_date)
as lag_event_date
from
app_view
order by user_pseudo_id, event_date)),
user_table2 as(
select
distinct user_pseudo_id
from
app_date_diff
where user_group is not null
),
user_seperate as(
select
distinct
user_pseudo_id,
event_date,
case when user_pseudo_id in (select distinct user_pseudo_id from user_table1 where user_group='new_user') then 'new_user'
when user_pseudo_id in (select distinct user_pseudo_id from user_table1 where user_group='dormant_user') then 'dormant_user'
when user_pseudo_id in (select distinct user_pseudo_id from user_table2) then 'resurrected_user'
else 'current_user' end as user_group
from
`advanced.app_log`)
select
distinct
user_pseudo_id,
user_group
from
user_seperate;
user group 구분 후, group 별 user 수 확인
User group | N수 |
---|---|
new user | 512 |
current user | 13122 |
resurrected user | 27242 |
dormant user | 11947 |
where
구문 활용해 user group 별, retention ratio 구함
with base as(
select
distinct
event_date,
user_pseudo_id,
date_trunc(event_date,week(monday)) as date_week
from
`advanced.app_log`
where
user_pseudo_id in (select user_pseudo_id from `advanced.user_seperate`
where user_group='resurrected_user')
## filtering 하는 부분(나머지는 모든 user group에 대해 동일)
),
first_week_add as(
select
distinct
*,
first_value(date_week) over(partition by user_pseudo_id order by date_week)
as first_week
from
base
order by user_pseudo_id, event_date
),
date_diff_add as(
select
*,
date_diff(date_week, first_week, week) as week_diff
from
first_week_add
)
select
*,
round(safe_divide(user_cnt,first_cnt),3) as retention_ratio
from
(select
*,
first_value(user_cnt) over(order by week_diff) as first_cnt
from
(select
week_diff,
count(distinct user_pseudo_id) as user_cnt
from
date_diff_add
group by all))
order by week_diff;
New user
new user group 정의 : 첫 앱 접속 주와 2023-01-20가 속한 주가 같은 user
→ 따라서, new user의 weekly retention은 따로 구하지 않았다.
Current user