앱 로그 해석
1번. DAU / WAU / MAU 뽑아보기
답:
# 문제 : 1번. DAU / WAU / MAU 뽑아보기
# 쿼리를 작성하는 목표, 확인할 지표 : DAU? = Daily / WAU? = week / MAU? = month
# 쿼리 계산 방법 : date trunc? extract? 둘다 써보자!! / user id 혹은 pusedo id를 세야겠지? => pusedo id만 세도 되겠다! 공통되니까! / 중복을 제거해야하니까 DISTINCT 써야지!
# 데이터의 기간 : 22.8.1 ~ 23.1.20
# 사용할 테이블 : app_logs
# Join KEY :
# 데이터 특징 :
# 작성 흐름도 : FROM - JOIN - WHERE - GROUP BY - HAVING - SELECT- ORDER BY - LIMIT
#DAU
SELECT
event_date AS day,
COUNT(DISTINCT user_pseudo_id) AS user_cnt #DISTINCT를 안 쓰면 : 유저들이 이벤트를 발생시킨 횟수가 된다.
FROM advanced.app_logs
-- WHERE
-- event_date BETWEEN '2022-08-27' AND '2022-08-31' # 시험삼아 해보기
GROUP BY
event_date
ORDER BY
event_date
#WAU
SELECT
DATE_TRUNC(event_date, WEEK) AS start_week,
EXTRACT(WEEK FROM event_date) AS week_number,
COUNT(DISTINCT user_pseudo_id) AS user_cnt
FROM advanced.app_logs
-- WHERE
-- -- event_date BETWEEN '2022-08-01' AND '2022-09-30'
GROUP BY
start_week, week_number
ORDER BY
start_week
#MAU
SELECT
DATE_TRUNC(event_date, MONTH) AS start_month,
-- EXTRACT(MONTH FROM event_date) AS month_number,#trunc 했으니까 필요없다!
COUNT(DISTINCT user_pseudo_id) AS user_cnt
FROM `advanced.app_logs`
-- WHERE
-- event_date BETWEEN '2022-08-01' AND '2022-09-30'
GROUP BY
start_month
ORDER BY
start_month
답:
# 문제 : 2. 어떤 페이지(스크린)에 많이 접근하는가?
# 쿼리를 작성하는 목표, 확인할 지표 : screen view 이벤트가 발생하고 -> firebase_screen의 string.value가 뭔지를 보면 => 어떤 페이지를 봤는 지 알 수 있다!! / string.value의 수를 세면 -> 몇 번 봤는지 알 수 있다! / 많이 접근? 기준이 뭐지? 일단 나열해보자! 내림차순으로!
# 쿼리 계산 방법 : UNNEST/ 조건 : screen view, firebase_screen / GROUP BY string.value / COUNT
# 데이터의 기간 : 22.08.01~23.01.20
# 사용할 테이블 : app_logs
# Join KEY :
# 데이터 특징 :
# 작성 흐름도 : FROM - JOIN - WHERE - GROUP BY - HAVING - SELECT- ORDER BY - LIMIT
SELECT
value.string_value AS screen_event,
COUNT(value.string_value) AS screen_view_number
FROM `advanced.app_logs`
CROSS JOIN UNNEST(event_params) AS event_para
WHERE
-- event_date BETWEEN '2022-08-27' AND '2022-08-31' AND
event_name = 'screen_view' AND
key = 'firebase_screen'
GROUP BY
value.string_value
ORDER BY screen_view_number DESC
답:
# 문제 : 3. 어떤 이벤트가 있나요? 이벤트들의 일자별 실행 횟수를 뽑아보세요
# 쿼리를 작성하는 목표, 확인할 지표 : 일자별 / 어떤 이벤트가 / 몇 번 실행됐는지
# 쿼리 계산 방법 : GROUP BY 일자 / 이벤트 이름 / COUNT
# 데이터의 기간 : 22.08.01~23.01.20
# 사용할 테이블 : app_logs
# Join KEY :
# 데이터 특징 :
# 작성 흐름도 : FROM - JOIN - WHERE - GROUP BY - HAVING - SELECT- ORDER BY - LIMIT
SELECT
event_date,
event_name,
COUNT(event_name) AS event_cnt
FROM advanced.app_logs
GROUP BY
event_date, event_name
ORDER BY
event_date, event_name
# app_logs를 PIVOT
WITH base AS (
SELECT
-- DISTINCT param.key
event_date,
-- event_timestamp,
DATETIME(TIMESTAMP_MICROS(event_timestamp), "Asia/Seoul") AS event_datetime, # event_timestamp를 쓰는 것이 아니라 미리 가공해서 가공한 datetime을 쓰겠다!
event_name,
user_pseudo_id,
platform,
MAX(IF(param.key = "firebase_screen", param.value.string_value, NULL)) AS firebase_screen,
MAX(IF(param.key = "food_id", param.value.int_value, NULL)) AS food_id,
MAX(IF(param.key = "session_id", param.value.string_value, NULL)) AS session_id,
MAX(IF(param.key = "is_meet_min_order_price", param.value.int_value, NULL)) AS is_meet_min_order_price,
MAX(IF(param.key = "banner_id", param.value.int_value, NULL)) AS banner_id,
MAX(IF(param.key = "restaurant_id", param.value.int_value, NULL)) AS restaurant_id,
MAX(IF(param.key = "food_category", param.value.string_value, NULL)) AS food_category,
MAX(IF(param.key = "search_keyword", param.value.string_value, NULL)) AS search_keyword,
MAX(IF(param.key = "payment_type", param.value.string_value, NULL)) AS payment_type,
MAX(IF(param.key = "use_recommend_food", param.value.string_value, NULL)) AS use_recommend_food
FROM advanced.app_logs
CROSS JOIN UNNEST(event_params) AS param # param을 SELECT에서 사용할 것이고, event_params는 SELECT에서 사용하지 않는다
WHERE
event_date BETWEEN "2022-08-01" AND "2022-10-08"
GROUP BY ALL
-- event_date,
-- event_timestamp,
-- event_name,
-- user_pseudo_id,
-- platform
)
# DAU 뽑기 => Active의 정의는 앱에 들어와서 어떤 이벤트라도 남겼으면 Active라고 하겠다!
-- SELECT
-- event_date,
-- COUNT(DISTINCT user_pseudo_id) AS dau
-- -- COUNT(user_pseudo_id) AS total,
-- -- COUNT(*) AS total2, # user_pseudo_id는 모두 값이 존재하기 때문에(NULL이 없음) *과 동일하다!
-- FROM base
-- GROUP BY event_date
-- ORDER BY event_date
# WAU, MAU
-- SELECT
-- -- EXTRACT(YEAR FROM event_datetime) AS year,
-- -- EXTRACT(WEEK FROM event_datetime) AS week_number,
-- DATETIME_TRUNC(event_datetime, WEEK) AS event_week, # DATETIME_TRUNC을 쓰면 하나의 컬럼에 모든 시간 데이터가 보존 => 시각화를 할 때 시간의 순서대로 데이터를 볼 때 유용
-- COUNT(DISTINCT user_pseudo_id) AS WAU
-- FROM base
-- GROUP BY event_week
-- ORDER BY event_week
-- 2. 어떤 페이지(스크린)에 많이 접근하는가?
-- SELECT
-- firebase_screen,
-- COUNT(user_pseudo_id) AS cnt
-- FROM base
-- WHERE event_name = "screen_view"
-- GROUP BY firebase_screen
-- 3. 어떤 이벤트가 있나요? 이벤트들의 일자별 실행 횟수를 뽑아보세요
SELECT
event_name,
COUNT(user_pseudo_id) AS cnt
FROM base
GROUP BY event_name
ORDER BY cnt DESC
퍼널 작성해보기!!
답:
WITH base AS (
SELECT
event_date,
DATETIME(TIMESTAMP_MICROS(event_timestamp), "Asia/Seoul") AS event_datetime, # event_timestamp를 쓰는 것이 아니라 미리 가공해서 가공한 datetime을 쓰겠다!
event_name,
user_pseudo_id,
platform,
MAX(IF(param.key = "firebase_screen", param.value.string_value, NULL)) AS firebase_screen,
MAX(IF(param.key = "food_id", param.value.int_value, NULL)) AS food_id,
MAX(IF(param.key = "session_id", param.value.string_value, NULL)) AS session_id,
MAX(IF(param.key = "is_meet_min_order_price", param.value.int_value, NULL)) AS is_meet_min_order_price,
MAX(IF(param.key = "banner_id", param.value.int_value, NULL)) AS banner_id,
MAX(IF(param.key = "restaurant_id", param.value.int_value, NULL)) AS restaurant_id,
MAX(IF(param.key = "food_category", param.value.string_value, NULL)) AS food_category,
MAX(IF(param.key = "search_keyword", param.value.string_value, NULL)) AS search_keyword,
MAX(IF(param.key = "payment_type", param.value.string_value, NULL)) AS payment_type,
MAX(IF(param.key = "use_recommend_food", param.value.string_value, NULL)) AS use_recommend_food
FROM advanced.app_logs
CROSS JOIN UNNEST(event_params) AS param # param을 SELECT에서 사용할 것이고, event_params는 SELECT에서 사용하지 않는다
WHERE
event_date BETWEEN "2022-08-01" AND "2022-08-18" # 이거만 수정하면 된다
GROUP BY ALL
)
SELECT
CONCAT(event_name, '-', firebase_screen),
CASE
WHEN firebase_screen = 'welcome' THEN 1
WHEN firebase_screen = 'home' THEN 2
WHEN firebase_screen = 'food_category' THEN 3
WHEN firebase_screen = 'restaurant' THEN 4
WHEN event_name = 'screen_view' AND firebase_screen = 'cart' THEN 5
ELSE 6
END AS step_number,
COUNT(DISTINCT user_pseudo_id) AS cnt
FROM base
WHERE
event_name IN ('screen_view', 'click_payment') AND
firebase_screen IN ('welcome', 'home', 'food_category', 'restaurant', 'cart')
GROUP BY
event_name, firebase_screen