SELECT id_bioguide, MIN(term_start) AS first_term
FROM legislators_terms
GROUP BY id_bioguide;
SELECT
A.id_bioguide,
A.first_term,
B.term_start,
B.term_end,
C.date,
DATE_PART('year', AGE(C.date, A.first_term)) AS period
FROM(
SELECT id_bioguide, MIN(term_start) AS first_term
FROM legislators_terms
GROUP BY id_bioguide
) AS A JOIN legislators_terms AS B ON A.id_bioguide = B.id_bioguide
LEFT JOIN date_dim AS C ON C.date between B.term_start AND B.term_end
AND C.month_name='December' AND C.day_of_month=31;
대략적인 과정은 위에 작성한 것과 같지만, 보다 자세히 이해하기 위해 예시로 ID가 A000004
인 경우를 살펴보자.
date_dim 테이블의 date 컬럼에서 12월 31일인 경우의 데이터만을 가지고 date 날짜가 임기 시작일과 끝나는 날짜 사이를 기준으로 JOIN하였다. date - first_term을 해주면 period가 계산된다.
SELECT
COALESCE(DATE_PART('year', AGE(C.date, A.first_term)), 0) AS period,
COUNT(DISTINCT A.id_bioguide) AS cohort_retained
FROM(
SELECT id_bioguide, MIN(term_start) AS first_term
FROM legislators_terms
GROUP BY id_bioguide
) AS A JOIN legislators_terms AS B ON A.id_bioguide = B.id_bioguide
LEFT JOIN date_dim AS C ON C.date between B.term_start AND B.term_end
AND C.month_name='December' AND C.day_of_month=31
GROUP BY COALESCE(DATE_PART('year', AGE(C.date, A.first_term)), 0);
SELECT
period,
cohort_retained,
FIRST_VALUE(cohort_retained) OVER(ORDER BY period) AS cohort_size,
ROUND(cohort_retained * 1.0 / FIRST_VALUE(cohort_retained) OVER(ORDER BY period), 2) AS pct_retained
FROM(
SELECT coalesce(date_part('year',age(c.date,a.first_term)),0) as period
,count(distinct a.id_bioguide) as cohort_retained
FROM(
SELECT id_bioguide, min(term_start) as first_term
FROM legislators_terms
GROUP BY id_bioguide
) a
JOIN legislators_terms b on a.id_bioguide = b.id_bioguide
LEFT JOIN date_dim c on c.date between b.term_start and b.term_end
and c.month_name = 'December' and c.day_of_month = 31
GROUP BY COALESCE(DATE_PART('year', AGE(C.date, A.first_term)), 0)
) AS sub;
rep
일 경우 임기 시작 날짜 + 2년sen
일 경우 임기 시작 날짜 + 6년SELECT
A.id_bioguide,
A.first_term,
B.term_start,
(CASE
WHEN B.term_type='rep' THEN B.term_start + INTERVAL '2 years'
WHEN B.term_type='sen' THEN B.term_start + INTERVAL '6 years'
END) AS term_end
FROM(
SELECT id_bioguide, min(term_start) as first_term
FROM legislators_terms
GROUP BY id_bioguide
) AS A JOIN legislators_terms AS B On A.id_bioguide = B.id_bioguide;
SELECT
A.id_bioguide,
A.first_term,
B.term_start,
LEAD(B.term_start) OVER(PARTITION BY A.id_bioguide ORDER BY term_start) - INTERVAL '1 day' AS term_end
FROM(
SELECT id_bioguide, min(term_start) as first_term
FROM legislators_terms
GROUP BY id_bioguide
) AS A JOIN legislators_terms AS B On A.id_bioguide = B.id_bioguide
ORDER BY A.id_bioguide ASC, A.first_term ASC;