CREATE TABLE t_oil (
region text,
country text,
year int,
production int,
consumption int
);
COPY t_oil FROM PROGRAM 'curl https://www.cybertec-postgresql.com/secret/oil_ext.txt ';
postgres=# select region, avg(production)
postgres-# from t_oil group by region;
region | avg
---------------+-----------------------
Middle East | 1992.6036866359447005
North America | 4541.3623188405797101
postgres=# select region, avg(production)
postgres-# from t_oil
postgres-# group by rollup (region);
region | avg
---------------+-----------------------
| 2607.5139860139860140
North America | 4541.3623188405797101
Middle East | 1992.6036866359447005
SELECT
region,
country,
avg(production)
FROM t_oil
WHERE country IN ('USA', 'Canada', 'Iran', 'Oman')
GROUP BY ROLLUP (region, country);
region | country | avg
---------------+---------+-----------------------
| | 3906.7692307692307692
Middle East | Iran | 3631.6956521739130435
North America | Canada | 2123.2173913043478261
North America | USA | 9141.3478260869565217
Middle East | Oman | 586.4545454545454545
North America | | 5632.2826086956521739
Middle East | | 2142.9111111111111111
ROLLUP: 데이터를 즉시 보여줄 때 적합
- GROUP BY region + the overall average
CUBE: 더 많은 flexibility 제공. grouping 의 모든 combination 제공
- GROUP BY region, country
- GROUP BY region
- GROUP BY country
- the overall average
SELECT region,
country,
Avg(production)
FROM t_oil
WHERE country IN ( 'USA', 'Canada', 'Iran', 'Oman' )
GROUP BY cube ( region, country );
region | country | avg
---------------+---------+-----------------------
| | 3906.7692307692307692
Middle East | Iran | 3631.6956521739130435
North America | Canada | 2123.2173913043478261
North America | USA | 9141.3478260869565217
Middle East | Oman | 586.4545454545454545
North America | | 5632.2826086956521739
Middle East | | 2142.9111111111111111
| Oman | 586.4545454545454545
| Canada | 2123.2173913043478261
| Iran | 3631.6956521739130435
| USA | 9141.3478260869565217
SELECT region,
country,
Avg(production)
FROM t_oil
WHERE country IN ( 'USA', 'Canada', 'Iran', 'Oman' )
GROUP BY grouping sets ( ( ), region, country );
region | country | avg
---------------+---------+-----------------------
| | 3906.7692307692307692
North America | | 5632.2826086956521739
Middle East | | 2142.9111111111111111
| Oman | 586.4545454545454545
| Canada | 2123.2173913043478261
| Iran | 3631.6956521739130435
| USA | 9141.3478260869565217
MixedAggregate
을 사용하여 여러 연산을 효율적으러 한 번에 수행GroupAggregate
을 사용MixedAggregate
은 근본적으로 HashAggregate
을 사용한다SELECT region,
Avg(production) AS all,
Avg(production) filter (WHERE year < 1990) AS old,
avg(production) filter (WHERE year >= 1990) AS new
FROM t_oil
GROUP BY rollup (region);
region | all | old | new
---------------+-----------------------+-----------------------+-----------------------
| 2607.5139860139860140 | 2430.6856187290969900 | 2801.1831501831501832
North America | 4541.3623188405797101 | 4471.6533333333333333 | 4624.3492063492063492
Middle East | 1992.6036866359447005 | 1747.3258928571428571 | 2254.2333333333333333
조건을 filter 절에서 where 절에 옮기는 것이 가능할 경우, table 에서 데이터를 적게 load 하기 때문에 더욱 바람직하다.
Filter 은 where 절에서 남은 데이터가 연산에 필요하지 않을 때 유용
FILTER
CASE WHEN ... THEN NULL ... ELSE END
보다 더 빠름percentile_desc(0.5) within group (order by ..)
SELECT region,
Percentile_disc(0.5)
within GROUP (ORDER BY production)
FROM t_oil
GROUP BY 1;
region | percentile_disc
---------------+-----------------
Middle East | 1082
North America | 3054
SELECT region,
Percentile_disc(0.5)
within GROUP (ORDER BY production)
FROM t_oil
GROUP BY rollup (1);
region | percentile_disc
---------------+-----------------
Middle East | 1082
North America | 3054
| 1696
백분율 나타내는 함수 - percentile_cont
SELECT percentile_disc(0.62) WITHIN GROUP (ORDER BY id),
percentile_cont(0.62) WITHIN GROUP (ORDER BY id)
FROM generate_series(1, 5) AS id;
percentile_disc | percentile_cont
-----------------+-----------------
4 | 3.48
SELECT x % 2 AS grp,
array_agg(x)
FROM generate_series(1, 10) AS x
GROUP BY x % 2;
grp | array_agg
-----+--------------
1 | {1,3,5,7,9}
0 | {2,4,6,8,10}
위와 같은 그룹들 (홀수 or 짝수) 이 있을 때, hypothetical (가상) 값을 넣을 경우
각 그룹에서의 결과가 어떤지 확인하는 것
SELECT x % 2 AS grp, array_agg(x),
rank(3.5) WITHIN GROUP (ORDER BY x)
FROM generate_series(1, 10) AS x
GROUP BY x % 2;
grp | array_agg | rank
-----+--------------+------
0 | {10,2,4,6,8} | 2
1 | {9,7,3,1,5} | 3
위에서 3.5 라는 값은 짝수 그룹인 {10,2,4,6,8}
에 들어갔을 경우 2번째,
홀수 그룹 {9,7,3,1,5}
에 들어갔을 경우 3번째 rank 라는 의미
SELECT region,
Rank(9000)
within GROUP (ORDER BY production DESC NULLS LAST)
FROM t_oil
GROUP BY rollup ( 1 );
region | rank
---------------+------
Middle East | 21
North America | 27
| 47
위의 경우 9000 라는 가상의, 혹은 임의의 값을 group by 된 각 group 에 넣었을 때
그 순위가 결과와 같다는 것을 의미한다. (rollup 은 전체)
데이터가 정렬될 경우 보통 null 은 끝에 위치하지만, 데이터가 역(reversed / DESC) 되었을 경우, null 이 최상단으로 오기 때문에
NULLS LAST
를 적용해줘야함
SELECT country,
year,
production,
consumption,
avg(production) over ()
FROM t_oil limit 4;
country | year | production | consumption | avg
---------+------+------------+-------------+-----------------------
USA | 1965 | 9014 | 11522 | 2607.5139860139860140
USA | 1966 | 9579 | 12100 | 2607.5139860139860140
USA | 1967 | 10219 | 12567 | 2607.5139860139860140
USA | 1968 | 10600 | 13405 | 2607.5139860139860140
PARTITION BY
적용SELECT country,
year,
production,
consumption,
avg(production) over (PARTITION BY country)
FROM t_oil;
country | year | production | consumption | avg
----------+-------+------------+-------------+-----------
Canada | 1965 | 920 | 1108 | 2123.2173
Canada | 2010 | 3332 | 2316 | 2123.2173
Canada | 2009 | 3202 | 2190 | 2123.2173
...
Iran | 1966 | 2132 | 148 | 3631.6956
Iran | 2010 | 4352 | 1874 | 3631.6956
Iran | 2009 | 4249 | 2012 | 3631.6956
...
year 별로 정렬되어있지 않는 것을 볼 수 있다. 정렬을 원할 경우 SQL 은 명시적으로 정렬문을 입력해야 함
SELECT year,
production,
avg(production) over (PARTITION BY year < 1990)
FROM t_oil
WHERE country = 'Canada'
ORDER BY year;
-------------------------------------------
year | production | avg
------+------------+-----------------------
1965 | 920| 1631.6000000000000000
1966 | 1012| 1631.6000000000000000
...
1990 | 1967| 2708.4761904761904762
1991 | 1983| 2708.4761904761904762
1992 | 2065| 2708.4761904761904762
SELECT country,
year,
production,
min(production) over (PARTITION BY country ORDER BY year)
FROM t_oil
WHERE year BETWEEN 1978 AND 1983
AND country IN ( 'Iran', 'Oman' );
country | year | production | min
---------+------+------------+------
Iran | 1978 | 5302 | 5302
Iran | 1979 | 3218 | 3218
Iran | 1980 | 1479 | 1479
Iran | 1981 | 1321 | 1321
Iran | 1982 | 2397 | 1321
Iran | 1983 | 2454 | 1321
Oman | 1978 | 314 | 314
Oman | 1979 | 295 | 295
Oman | 1980 | 285 | 285
Oman | 1981 | 330 | 285
Oman | 1982 | 338 | 285
Oman | 1983 | 391 | 285
SELECT country,
year,
production,
min(production) over (),
min(production) over (ORDER BY year)
FROM t_oil
WHERE year BETWEEN 1978 AND 1983
AND country = 'Iran';
country | year | production | min | min
---------+------+------------+------+------
Iran | 1978 | 5302 | 1321 | 5302
Iran | 1979 | 3218 | 1321 | 3218
Iran | 1980 | 1479 | 1321 | 1479
Iran | 1981 | 1321 | 1321 | 1321
Iran | 1982 | 2397 | 1321 | 1321
Iran | 1983 | 2454 | 1321 | 1321
위의 결과에서 over () 로만 끝낼 경우 정상적으로 결과가 반영되지 않았음을 확인할 수 있다
SELECT country,
year,
production,
min(production)
over (
PARTITION BY country
ORDER BY year ROWS BETWEEN 1 preceding AND 1 following)
FROM t_oil
WHERE year BETWEEN 1978 AND 1983
AND country IN ( 'Iran', 'Oman' );
country | year | production | min
---------+------+------------+------
Iran | 1978 | 5302 | 3218
Iran | 1979 | 3218 | 1479
Iran | 1980 | 1479 | 1321
Iran | 1981 | 1321 | 1321
Iran | 1982 | 2397 | 1321
Iran | 1983 | 2454 | 2397
Oman | 1978 | 314 | 295
Oman | 1979 | 295 | 285
Oman | 1980 | 285 | 285
Oman | 1981 | 330 | 285
Oman | 1982 | 338 | 330
Oman | 1983 | 391 | 338
SELECT *,
Array_agg(id)
over (
ORDER BY id
ROWS BETWEEN unbounded preceding
AND 0 following)
FROM generate_series(1, 5) AS id;
id | array_agg
----+-------------
1 | {1}
2 | {1,2}
3 | {1,2,3}
4 | {1,2,3,4}
5 | {1,2,3,4,5}
EXCLUDE CURRENT ROW
사용SELECT *,
x / 3 AS y,
Array_agg(x) over (ORDER BY x ROWS BETWEEN 1 preceding AND 1 following) AS rows_1,
Array_agg(x) over (ORDER BY x RANGE BETWEEN 1 preceding AND 1 following) AS range_1,
Array_agg(x/3) over (ORDER BY (x/3) ROWS BETWEEN 1 preceding AND 1 following) AS rows_2,
Array_agg(x/3) over (ORDER BY (x/3) RANGE BETWEEN 1 preceding AND 1 following) AS range_2
FROM generate_series(1, 15) AS x;
x | y | rows_1 | range_1 | rows_2 | range_2
----+---+------------+------------+---------+---------------------
1 | 0 | {1,2} | {1,2} | {0,0} | {0,0,1,1,1}
2 | 0 | {1,2,3} | {1,2,3} | {0,0,1} | {0,0,1,1,1}
3 | 1 | {2,3,4} | {2,3,4} | {0,1,1} | {0,0,1,1,1,2,2,2}
4 | 1 | {3,4,5} | {3,4,5} | {1,1,1} | {0,0,1,1,1,2,2,2}
5 | 1 | {4,5,6} | {4,5,6} | {1,1,2} | {0,0,1,1,1,2,2,2}
6 | 2 | {5,6,7} | {5,6,7} | {1,2,2} | {1,1,1,2,2,2,3,3,3}
7 | 2 | {6,7,8} | {6,7,8} | {2,2,2} | {1,1,1,2,2,2,3,3,3}
8 | 2 | {7,8,9} | {7,8,9} | {2,2,3} | {1,1,1,2,2,2,3,3,3}
9 | 3 | {8,9,10} | {8,9,10} | {2,3,3} | {2,2,2,3,3,3,4,4,4}
10 | 3 | {9,10,11} | {9,10,11} | {3,3,3} | {2,2,2,3,3,3,4,4,4}
11 | 3 | {10,11,12} | {10,11,12} | {3,3,4} | {2,2,2,3,3,3,4,4,4}
12 | 4 | {11,12,13} | {11,12,13} | {3,4,4} | {3,3,3,4,4,4,5}
13 | 4 | {12,13,14} | {12,13,14} | {4,4,4} | {3,3,3,4,4,4,5}
14 | 4 | {13,14,15} | {13,14,15} | {4,4,5} | {3,3,3,4,4,4,5}
15 | 5 | {14,15} | {14,15} | {4,5} | {4,4,4,5}
EXCLUDE TIES
키워드를 이용해 중복을 제거할 수 있다SELECT *,
x / 3 AS y,
Array_agg(x/3) over (ORDER BY x/3 ROWS BETWEEN 1 preceding AND 1 following) AS rows_1,
array_agg(x/3) over (ORDER BY x/3 ROWS BETWEEN 1 preceding AND 1 following exclude ties) AS rows_2
FROM generate_series(1, 10) AS x;
x | y | rows_1 | rows_2
----+---+---------+--------
1 | 0 | {0,0} | {0}
2 | 0 | {0,0,1} | {0,1}
3 | 1 | {0,1,1} | {0,1}
4 | 1 | {1,1,1} | {1}
5 | 1 | {1,1,2} | {1,2}
6 | 2 | {1,2,2} | {1,2}
7 | 2 | {2,2,2} | {2}
8 | 2 | {2,2,3} | {2,3}
9 | 3 | {2,3,3} | {2,3}
10 | 3 | {3,3} | {3}
EXCLUDE GROUP
키워드를 이용해 그룹 자체 데이터를 제외하는 것도 가능SELECT *,
x / 3 AS y,
array_agg(x/3) over (ORDER BY x/3 ROWS BETWEEN 1 preceding AND 1 following) AS rows_1,
avg(x /3) over (ORDER BY x/3 ROWS BETWEEN 1 preceding AND 1 following) AS avg_1,
array_agg(x/3) over (ORDER BY x/3 ROWS BETWEEN 1 preceding AND 1 following exclude GROUP) AS rows_2,
avg(x /3) over (ORDER BY x/3 ROWS BETWEEN 1 preceding AND 1 following exclude GROUP) AS avg_2
FROM generate_series(1, 10)
x | y | rows_1 | avg_1 | rows_2 | avg_2
----+---+---------+------------------------+--------+------------------------
1 | 0 | {0,0} | 0.00000000000000000000 | |
2 | 0 | {0,0,1} | 0.33333333333333333333 | {1} | 1.00000000000000000000
3 | 1 | {0,1,1} | 0.66666666666666666667 | {0} | 0.00000000000000000000
4 | 1 | {1,1,1} | 1.00000000000000000000 | |
5 | 1 | {1,1,2} | 1.3333333333333333 | {2} | 2.0000000000000000
6 | 2 | {1,2,2} | 1.6666666666666667 | {1} | 1.00000000000000000000
7 | 2 | {2,2,2} | 2.0000000000000000 | |
8 | 2 | {2,2,3} | 2.3333333333333333 | {3} | 3.0000000000000000
9 | 3 | {2,3,3} | 2.6666666666666667 | {2} | 2.0000000000000000
10 | 3 | {3,3} | 3.0000000000000000 | |
window 함수의 over 절을 변수처럼 사용할 수 있다
SELECT country,
year,
production,
min(production) over (w),
max(production) over (w)
FROM t_oil
WHERE country = 'Canada'
AND year BETWEEN 1980 AND 1985
WINDOW w AS (ORDER BY year);
country | year | production | min | max
---------+------+------------+------+------
Canada | 1980 | 1764 | 1764 | 1764
Canada | 1981 | 1610 | 1610 | 1764
Canada | 1982 | 1590 | 1590 | 1764
Canada | 1983 | 1661 | 1590 | 1764
Canada | 1984 | 1775 | 1590 | 1775
Canada | 1985 | 1812 | 1590 | 1812
rank()
- tie 값이 있을 경우 다음 rank 에 tie 수만큼 건너뜀dense_rank()
- tie 값이 있을 경우 다음 rank 에 건너뛰지 않음rank()
- tie 값이 있을 경우 다음 rank 에 tie 수만큼 건너뜀dense_rank()
- tie 값이 있을 경우 다음 rank 에 건너뛰지 않음SELECT year,
production,
ntile(4) over (ORDER BY production)
FROM t_oil
WHERE country = 'Iraq'
AND year BETWEEN 2000 AND 2006;
year | production | ntile
------+------------+-------
2003 | 1344 | 1
2005 | 1833 | 1
2006 | 1999 | 2
2004 | 2030 | 2
2002 | 2116 | 3
2001 | 2522 | 3
2000 | 2613 | 4
ntile 자체만 사용되는 경우는 없기 때문에 sub-query 로 이용
(lag).*
으로 모든 칼럼을 반환받을 수 있다SELECT year,
production,
(lag(t_oil, 1) over (ORDER BY year)).*
FROM t_oil
WHERE country = 'USA' limit 3;
year | production | region | country | year | production | consumption
------+------------+---------------+---------+------+------------+-------------
1965 | 9014 | | | | |
1966 | 9579 | North America | USA | 1965 | 9014 | 11522
1967 | 10219 | North America | USA | 1966 | 9579 | 12100
PK 가 없을 경우 아래와 같이 중복된 데이터를 걸러낼 수 있다.
SELECT *
FROM (SELECT t_oil, lag(t_oil) over (ORDER BY year)
FROM t_oil
WHERE country = 'USA') AS x
WHERE t_oil = lag;