6๋ฒ ๋ฌธ์ . SQL ๋ฌธ๋ฒ โ ํ์ด์ฌ ๋ฌธ๋ฒ
# Skeleton ์ฝ๋
import pandas as pd
orders = pd.read_csv(""orders.csv")
# ์ปฌ๋ผ๋ช
์งง๊ฒ ์์ (order_purchase_timestamp โ order_time)
# df.rename() ํ์ฉ
orders.rename(columns={'order_purchase_timestamp':'order_time'}, inplace=True)
# order_time ์ปฌ๋ผ dtype ๋ณ๊ฒฝ (object โ datetime)
# pandas.to_datetime() ํ์ฉ
orders['order_time'] = pd.to_datetime(orders['order_time'])
# Date_Format ์์ (์: '2026-10') + month ์ปฌ๋ผ์ ์ถ๊ฐ
# Series.dt.strftime() ํ์ฉ
orders['months'] = orders['order_time'].dt.strftime('%Y-%m')
# ๋ชจ๋ ์ฐ์์ด ๋ด๊ธด ๋ฐ์ดํฐํ๋ ์ ์์ฑ
# pd.date_range() ํ์ฉ
all_months = pd.date_range(start = orders['order_time'].min(), end = orders['order_time'].max(),freq="MS")
all_months = pd.DataFrame(all_months, columns=['months'])
# Date_Format ์์
all_months['months'] = all_months['months'].dt.strftime('%Y-%m')
# orders ๋ฐ์ดํฐํ๋ ์ ๊ธฐ๋ฐ ์๋ณ ์ฃผ๋ฌธ๋ count
# df.groupby().count() ํ์ฉ
orders_month = orders.groupby(['months'])['order_id'].count().reset_index()
# column๋ช
์์ (order_id โ order_count)
orders_month = orders_month.rename(columns={'order_id':'order_count'})
# all_months + orders ๋ฐ์ดํฐํ๋ ์ ํฉ์น๊ธฐ
# pd.merge() ํ์ฉ
all_months_table = pd.merge(all_months, orders_month, how='left', on = 'months')
# ๊ฒฐ์ธก๊ฐ 0์ฒ๋ฆฌ
# df.fillna() ํ์ฉ
all_months_table['order_count'] = all_months_table['order_count'].fillna(value=0)
# ์ ์ ์ฃผ๋ฌธ๊ฑด์ ์ปฌ๋ผ ์ถ๊ฐ
# df.shift() ํ์ฉ
all_months_table['before_order_count'] = all_months_table['order_count'].shift(1)
# ์ฆ๊ฐ์จ ๊ณ์ฐ - ์กฐ๊ฑด์ ๋ฐ๋ผ ๋ค๋ฅธ ๊ณ์ฐ์ ์ ์ฉ
# df.loc[์กฐ๊ฑด์, column] = value ํ์ฉ
all_months_table['growth_rate'] = 0
all_months_table.loc[all_months_table['before_order_count'] == 0, "growth_rate"] = all_months_table['order_count'] * 100
all_months_table.loc[all_months_table['before_order_count'] != 0, "growth_rate"] = (all_months_table['order_count']-all_months_table['before_order_count']) / all_months_table['before_order_count'] * 100
# ์์์ ๋์งธ ์๋ฆฌ๊น์ง ๋ฐ์ฌ๋ฆผ
# round() ํ์ฉ
all_months_table['growth_rate'] = round(all_months_table['growth_rate'],2)
# ์ปฌ๋ผ๋ช
์์ (order_count โ cnt_orders)
all_months_table.rename(columns={'order_count':'cnt_orders'}, inplace=True)
# ํ์ํ ์ปฌ๋ผ๋ง ์ถ์ถ
all_months_table[['months', 'cnt_orders','growth_rate']]