위의 결과는 각각 캔들차트를 monthly, weekly 옵션을 넣어서 돌렸을 때 걸린 시간을 나타낸 것이다.
기존에는 다음과 같이 시간복잡도가 O(n^2) 인 코드를 작성했다.
class StockCandleChart(View):
def get(self, request):
chart_type = request.GET.get('chart_type', 'daily')
code = request.GET.get('ticker', None)
error_handler_res = handle_candle_chart_input_error(chart_type, code)
if isinstance(error_handler_res, JsonResponse):
return error_handler_res
two_years_from_now = datetime.datetime.now() - relativedelta(years=2)
ticker = Ticker.objects.get(code=code)
stock_prices_qs = StockPrice.objects.filter(ticker=ticker, date__gte=two_years_from_now).order_by('date')
if chart_type == 'daily':
stock_prices = [
{
'date' : str(stock_price_qs.date),
'bprc_adj': stock_price_qs.bprc_adj,
'prc_adj' : stock_price_qs.prc_adj,
'hi_adj' : stock_price_qs.hi_adj,
'lo_adj' : stock_price_qs.lo_adj,
'volume' : stock_price_qs.volume
} for stock_price_qs in stock_prices_qs]
else:
groups_dict = self.get_stock_price_groups_by_chart_type(chart_type, stock_prices_qs)
stock_prices = self.get_stock_prices_list(groups_dict)
data = {
'name' : ticker.stock_name,
'ticker': ticker.code,
'values': stock_prices
}
return JsonResponse({'results': data}, status=200)
def get_stock_price_groups_by_chart_type(self, chart_type, stock_prices_qs):
pre_group_num = int()
groups_dict = dict()
for stock_price_qs in stock_prices_qs:
if chart_type == 'weekly':
today = datetime.datetime.today()
this_friday = today + datetime.timedelta((calendar.FRIDAY - today.weekday()) % 7)
base_date = this_friday.date()
time_diff = (base_date - stock_price_qs.date).days
current_group_num = int(time_diff / 7)
elif chart_type == 'monthly':
current_group_num = stock_price_qs.date.strftime('%Y-%m')
groups_dict.setdefault(current_group_num, [stock_price_qs])
if current_group_num == pre_group_num:
groups_dict[current_group_num].append(stock_price_qs)
pre_group_num = current_group_num
return groups_dict
def get_stock_prices_list(self, groups_dict):
stock_prices = list()
for group in groups_dict:
stock_price_list = groups_dict[group]
first_stock_price = stock_price_list[0]
last_stock_price = stock_price_list[-1]
bprc_adj = first_stock_price.bprc_adj
prc_adj = last_stock_price.prc_adj
lowest = first_stock_price.lo_adj
highest = first_stock_price.hi_adj
volume = 0
for stock_price in stock_price_list:
if stock_price.lo_adj < lowest:
lowest = stock_price.lo_adj
if stock_price.hi_adj > highest:
highest = stock_price.hi_adj
volume += stock_price.volume
weekly_stock_price = {
'date': last_stock_price.date,
'bprc_adj': bprc_adj,
'prc_adj': prc_adj,
'hi_adj': highest,
'lo_adj': lowest,
'volume': volume
}
stock_prices.append(weekly_stock_price)
return stock_prices
하지만 아무리 생각해도 너무 for 문이 많고 복잡도가 높은 것 같아서,
App Level 이 아니라, DB Level 에서 처리해보기로 했다.
SELECT time_diff, SUM(volume) volume, MAX(hi_adj) hi_adj, MIN(lo_adj) lo_adj
FROM (SELECT *, (date '2021-04-29' - date) / 7 as time_diff
FROM stock_prices WHERE ticker_id=3 ORDER BY date) td
GROUP BY time_diff ORDER BY time_diff;
weekly candle chart 는
일주일으로 묶인 주식 가격 그룹의 가장 마지막 날의 종가를 구해야한다.
SELECT *
FROM
(SELECT *, row_number() OVER(PARTITION BY time_diff ORDER BY date DESC) "row_number"
FROM (SELECT *, (date '2021-04-29' - date) / 7 as time_diff
FROM stock_prices
WHERE ticker_id=3
ORDER BY date) td) td2 WHERE row_number=1;
PARTITION BY 구문에서 DESC 를 ASC 로 바꿔주기만 하면 된다.
위 쿼리들은 각각 (거래량, 고가, 저가), 시가, 종가 를 세번에 걸쳐서 구한다.
이 모든걸 한 번의 쿼리로 수행되게끔 바꿀 수 있는 쿼리는 무엇인지 생각해보았다.
SELECT
MAX(date), SUM(volume) volume, MAX(hi_adj) hi_adj, MIN(lo_adj) lo_adj, MAX(first_bprc_adj) bprc_adj, MAX(last_prc_adj) prc_adj
FROM
(
SELECT
FIRST_VALUE(bprc_adj) OVER(PARTITION BY group_id ORDER BY date) first_bprc_adj,
FIRST_VALUE(prc_adj) OVER(PARTITION BY group_id ORDER BY date DESC) last_prc_adj,
*
FROM
(SELECT *, (date '2021-04-30' - date) / 7 group_id FROM stock_prices) t1
WHERE ticker_id = 3
) t2
GROUP BY group_id
group by 에서 지정되지 않은 컬럼은 집계함수를 못쓰지만,
어차피 모두 동일한 값을 가지고 있는 필드 값들에서 max 든 min 등 하나의 값을 반환하기 때문에 이렇게 했다.
weekly 기준 db level 에서의 속도가 약 16배 빠르다.
def get_candle_chart_by_type(self, chart_type, stock_prices_qs):
if chart_type == 'daily':
results_qs = stock_prices_qs.values('date', 'bprc_adj', 'prc_adj', 'hi_adj', 'lo_adj', 'volume').order_by('date')
return list(results_qs)
if chart_type == 'weekly':
today = datetime.datetime.today()
this_friday = today + datetime.timedelta((calendar.FRIDAY - today.weekday()) % 7)
base_date = this_friday.date()
first_qs = stock_prices_qs.annotate(group_id=Cast(
ExtractDay(base_date - F('date')), IntegerField()) / 7).order_by('date')
elif chart_type == 'monthly':
first_qs = stock_prices_qs.annotate(group_id=TruncMonth(F('date'))).order_by('date')
second_qs = first_qs.values('group_id')\
.annotate(
bprc_adj=Window(
expression = FirstValue('bprc_adj'),
partition_by = F('group_id'),
order_by = F('date').asc()
),
prc_adj=Window(
expression = FirstValue('prc_adj'),
partition_by = F('group_id'),
order_by = F('date').desc()
),
date=Window(
expression = Max('date'),
partition_by = F('group_id')
),
hi_adj=Window(
expression = Max('hi_adj'),
partition_by = F('group_id')
),
lo_adj=Window(
expression = Min('lo_adj'),
partition_by = F('group_id')
),
volume=Window(
expression = Sum('volume'),
partition_by = F('group_id')
)
)\
.distinct('date')\
.values('date', 'bprc_adj', 'prc_adj', 'hi_adj', 'lo_adj', 'volume')
results = list(second_qs)
return results