“별모양 스키마(Star Schema)”는 데이터 웨어하우스(DW) 설계에서 가장 기본적이면서도 많이 쓰이는 모델이다.
예시 (온라인 쇼핑몰 매출 DW):
FactSales (사실 테이블)
├── CustomerDimension (차원: 고객)
├── ProductDimension (차원: 상품)
├── DateDimension (차원: 날짜)
└── StoreDimension (차원: 매장)
예를 들어, 매출 데이터 웨어하우스를 만든다고 가정
from django.db import models
# 차원 테이블들
class CustomerDimension(models.Model):
customer_key = models.AutoField(primary_key=True)
name = models.CharField(max_length=255)
email = models.EmailField()
region = models.CharField(max_length=100)
def __str__(self):
return f"{self.name} ({self.region})"
class ProductDimension(models.Model):
product_key = models.AutoField(primary_key=True)
name = models.CharField(max_length=255)
category = models.CharField(max_length=100)
price = models.DecimalField(max_digits=10, decimal_places=2)
def __str__(self):
return f"{self.name} - {self.category}"
class DateDimension(models.Model):
date_key = models.DateField(primary_key=True)
year = models.IntegerField()
quarter = models.IntegerField()
month = models.IntegerField()
day = models.IntegerField()
weekday = models.CharField(max_length=10)
def __str__(self):
return str(self.date_key)
class StoreDimension(models.Model):
store_key = models.AutoField(primary_key=True)
name = models.CharField(max_length=255)
city = models.CharField(max_length=100)
country = models.CharField(max_length=100)
def __str__(self):
return f"{self.name} ({self.city})"
# 사실 테이블
class SalesFact(models.Model):
sale_id = models.AutoField(primary_key=True)
customer = models.ForeignKey(CustomerDimension, on_delete=models.CASCADE)
product = models.ForeignKey(ProductDimension, on_delete=models.CASCADE)
date = models.ForeignKey(DateDimension, on_delete=models.CASCADE)
store = models.ForeignKey(StoreDimension, on_delete=models.CASCADE)
quantity = models.IntegerField()
total_amount = models.DecimalField(max_digits=12, decimal_places=2)
def __str__(self):
return f"Sale {self.sale_id} - {self.total_amount}"
from django.db.models import Sum
# 2025년 매출 총합
total_sales_2025 = SalesFact.objects.filter(
date__year=2025
).aggregate(Sum("total_amount"))top_customers = (
SalesFact.objects
.values("customer__name")
.annotate(total=Sum("total_amount"))
.order_by("-total")[:5]
)monthly_sales = (
SalesFact.objects
.values("date__year", "date__month", "product__category")
.annotate(total=Sum("total_amount"))
.order_by("date__year", "date__month")
)예: 고객 지역 변동 이력 관리
from django.db import models
from datetime import date
class CustomerDimension(models.Model):
customer_key = models.AutoField(primary_key=True)
customer_id = models.IntegerField() # 실제 고객 ID
name = models.CharField(max_length=255)
email = models.EmailField()
region = models.CharField(max_length=100)
# SCD 관리 필드
start_date = models.DateField(default=date.today)
end_date = models.DateField(null=True, blank=True)
is_current = models.BooleanField(default=True)
def __str__(self):
return f"{self.name} ({self.region})"
로직 예시 (고객 지역 업데이트 시):
def update_customer_region(customer_id, new_region):
current_record = CustomerDimension.objects.get(customer_id=customer_id, is_current=True)
if current_record.region != new_region:
# 기존 레코드 종료
current_record.end_date = date.today()
current_record.is_current = False
current_record.save()
# 새 레코드 추가
CustomerDimension.objects.create(
customer_id=customer_id,
name=current_record.name,
email=current_record.email,
region=new_region,
start_date=date.today(),
is_current=True
)
Fack1, FackN, Fact 1 + application, Fact N + application
확장성 → 파티션을 어떤기준으로 쪼개느냐
계정과목 3depth 기준, 2deplth에 대한 집계는 appliaction 으로 집계
postgresql partitioning