import random
import numpy as np
import torch
def set_seed(seed=2021):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
# 학습 시작 전에 호출
set_seed(2021)
사용 시점: 스크립트 맨 위, main() 함수 시작 부분, 또는 학습 루프 시작 전
import os
import random
import numpy as np
import torch
def set_seed_full(seed=2021, deterministic=True):
"""
완전한 재현성을 위한 seed 설정
Args:
seed (int): 사용할 seed 값
deterministic (bool): cuDNN의 deterministic 모드 사용 여부
"""
# Python random seed
random.seed(seed)
# Numpy random seed
np.random.seed(seed)
# PyTorch random seed
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed) # 멀티 GPU 사용 시
# cuDNN 설정
if deterministic:
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
else:
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = True
# 환경 변수 설정 (추가 재현성)
os.environ['PYTHONHASHSEED'] = str(seed)
# 사용 예시
set_seed_full(2021, deterministic=True) # 논문/연구용
set_seed_full(2021, deterministic=False) # 일반 실험용
# 1. Python 기본 random 모듈
random.seed(seed)
# 영향: random.shuffle(), random.choice(), random.randint() 등
# 2. NumPy random
np.random.seed(seed)
# 영향: np.random.shuffle(), np.random.choice(), sklearn의 random_state 등
# 3. PyTorch CPU 연산
torch.manual_seed(seed)
# 영향: CPU에서의 모든 torch 연산
# 4. PyTorch GPU 연산 (단일 GPU)
torch.cuda.manual_seed(seed)
# 영향: 현재 GPU에서의 모든 연산
# 5. PyTorch GPU 연산 (멀티 GPU)
torch.cuda.manual_seed_all(seed)
# 영향: 모든 GPU에서의 연산
# 6. cuDNN deterministic 모드
torch.backends.cudnn.deterministic = True
# 영향: 합성곱 등의 연산이 항상 같은 알고리즘 사용
# 7. cuDNN benchmark 비활성화
torch.backends.cudnn.benchmark = False
# 영향: 입력 크기별 최적 알고리즘 자동 선택 비활성화
# 8. Python hash seed
os.environ['PYTHONHASHSEED'] = str(seed)
# 영향: 딕셔너리, 셋 등의 순서 고정
# train.py
import torch
import random
import numpy as np
def set_seed(seed):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
# 스크립트 최상단에 배치
set_seed(2021)
# 이후 모델 정의, 데이터 로드 등
model = MyModel()
train_loader = DataLoader(dataset, shuffle=True)
# utils/seed.py
import os
import random
import numpy as np
import torch
def set_seed(seed):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
os.environ['PYTHONHASHSEED'] = str(seed)
# train.py
from utils.seed import set_seed
def main():
set_seed(2021)
# 학습 코드
if __name__ == '__main__':
main()
# config.py
class Config:
seed = 2021
deterministic = True
# utils/seed.py
def set_seed_from_config(config):
random.seed(config.seed)
np.random.seed(config.seed)
torch.manual_seed(config.seed)
torch.cuda.manual_seed_all(config.seed)
if config.deterministic:
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
# train.py
from config import Config
from utils.seed import set_seed_from_config
def main():
cfg = Config()
set_seed_from_config(cfg)
# 학습 코드
def set_seed(seed):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
set_seed(2021)
# shuffle=True 사용 시에도 seed가 고정되어 있으면 같은 순서로 섞임
train_loader = DataLoader(
dataset,
batch_size=32,
shuffle=True, # seed 덕분에 매번 같은 방식으로 섞임
num_workers=0 # 단일 프로세스
)
def set_seed(seed):
random.seed(seed)
np.random.seed(seed)
torch.manual_manual_seed(seed)
torch.cuda.manual_seed_all(seed)
def seed_worker(worker_id):
"""
각 DataLoader 워커의 seed를 설정하는 함수
"""
worker_seed = torch.initial_seed() % 2**32
np.random.seed(worker_seed)
random.seed(worker_seed)
# Generator 객체 생성
g = torch.Generator()
g.manual_seed(2021)
# 메인 프로세스 seed 설정
set_seed(2021)
# DataLoader 설정
train_loader = DataLoader(
dataset,
batch_size=32,
shuffle=True,
num_workers=4, # 멀티 프로세스 사용
worker_init_fn=seed_worker, # 각 워커의 seed 설정
generator=g # shuffle용 generator
)
import os
import random
import numpy as np
import torch
from torch.utils.data import DataLoader
def set_seed(seed):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
os.environ['PYTHONHASHSEED'] = str(seed)
def seed_worker(worker_id):
worker_seed = torch.initial_seed() % 2**32
np.random.seed(worker_seed)
random.seed(worker_seed)
def get_dataloader(dataset, batch_size, num_workers, seed):
# Generator 생성
g = torch.Generator()
g.manual_seed(seed)
# DataLoader 반환
return DataLoader(
dataset,
batch_size=batch_size,
shuffle=True,
num_workers=num_workers,
worker_init_fn=seed_worker,
generator=g,
pin_memory=True
)
# 사용
def main():
SEED = 2021
set_seed(SEED)
train_loader = get_dataloader(train_dataset, 32, 4, SEED)
valid_loader = get_dataloader(valid_dataset, 32, 4, SEED)
# 학습 코드...
from sklearn.model_selection import train_test_split
def set_seed(seed):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
# Seed 설정
set_seed(2021)
# 방법 1: random_state 파라미터 사용
X_train, X_temp = train_test_split(X, test_size=0.3, random_state=2021)
X_valid, X_test = train_test_split(X_temp, test_size=0.5, random_state=2021)
# 방법 2: 여러 번 분할 시 다른 seed 사용
X_train, X_temp = train_test_split(X, test_size=0.3, random_state=2021)
X_valid, X_test = train_test_split(X_temp, test_size=0.5, random_state=2022)
from sklearn.model_selection import KFold
def set_seed(seed):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
set_seed(2021)
# random_state 파라미터에 seed 전달
kfold = KFold(n_splits=5, shuffle=True, random_state=2021)
for fold, (train_idx, valid_idx) in enumerate(kfold.split(dataset)):
print(f"Fold {fold}")
# 각 fold마다 seed 재설정 (선택사항)
set_seed(2021 + fold)
train_data = dataset[train_idx]
valid_data = dataset[valid_idx]
# 학습 코드...
from sklearn.model_selection import StratifiedKFold
def set_seed(seed):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
set_seed(2021)
# 클래스 비율을 유지하면서 분할
skfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=2021)
for fold, (train_idx, valid_idx) in enumerate(skfold.split(X, y)):
set_seed(2021 + fold)
# 학습 코드...
def set_seed(seed):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
# 처음 한 번만 설정
set_seed(2021)
for epoch in range(num_epochs):
# Seed를 재설정하지 않음
for batch in train_loader:
# 학습 코드
pass
def set_seed(seed):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
BASE_SEED = 2021
for epoch in range(num_epochs):
# 각 epoch마다 다른 seed 사용
set_seed(BASE_SEED + epoch)
for batch in train_loader:
# 학습 코드
pass
def set_seed(seed):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
# 전역 seed 설정
set_seed(2021)
for epoch in range(num_epochs):
for batch in train_loader:
# 일반 학습은 seed 영향 받음
output = model(batch)
loss = criterion(output, target)
# 특정 증강만 다른 seed 사용
if epoch > 10:
set_seed(2021 + epoch)
augmented = augmentation(batch)
set_seed(2021) # 다시 원래 seed로
import albumentations as A
def set_seed(seed):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
set_seed(2021)
# Albumentations는 numpy random을 사용하므로 자동으로 재현됨
transform = A.Compose([
A.HorizontalFlip(p=0.5),
A.RandomBrightnessContrast(p=0.2),
A.Rotate(limit=30, p=0.5)
])
# 사용
image = transform(image=img)['image'] # 매번 같은 증강 적용
import torchvision.transforms as transforms
def set_seed(seed):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
set_seed(2021)
# torchvision은 torch random을 사용
transform = transforms.Compose([
transforms.RandomHorizontalFlip(p=0.5),
transforms.RandomRotation(30),
transforms.ColorJitter(brightness=0.2)
])
# 사용
img_transformed = transform(img) # 매번 같은 결과
def custom_augmentation(image, seed=None):
"""Seed를 받는 커스텀 증강 함수"""
if seed is not None:
np.random.seed(seed)
random.seed(seed)
# 증강 로직
if random.random() > 0.5:
image = np.fliplr(image)
angle = np.random.uniform(-30, 30)
# 회전 적용...
return image
# 사용
set_seed(2021)
for img in images:
augmented = custom_augmentation(img, seed=2021)
# 가장 간단한 방법
SEED = 2021
def main():
set_seed(SEED)
model = Model()
train_loader = get_dataloader(train_dataset, SEED)
# 학습
train(model, train_loader)
# 여러 seed로 모델 학습해서 앙상블
SEEDS = [2021, 2022, 2023, 2024, 2025]
def train_single_model(seed):
set_seed(seed)
model = Model()
train_loader = get_dataloader(train_dataset, seed)
# 학습
train(model, train_loader)
return model
# 앙상블
models = []
for seed in SEEDS:
model = train_single_model(seed)
models.append(model)
# 예측
def ensemble_predict(models, x):
predictions = [model(x) for model in models]
return torch.stack(predictions).mean(dim=0)
# 여러 seed로 실험해서 평균과 표준편차 측정
SEEDS = [2021, 2022, 2023, 2024, 2025]
results = []
for seed in SEEDS:
print(f"\n=== Experiment with seed {seed} ===")
set_seed(seed)
model = Model()
train_loader = get_dataloader(train_dataset, seed)
valid_loader = get_dataloader(valid_dataset, seed)
# 학습
train(model, train_loader)
# 평가
accuracy = evaluate(model, valid_loader)
results.append(accuracy)
print(f"Accuracy: {accuracy:.4f}")
# 통계 출력
mean_acc = np.mean(results)
std_acc = np.std(results)
print(f"\nMean Accuracy: {mean_acc:.4f} ± {std_acc:.4f}")
print(f"Min: {np.min(results):.4f}, Max: {np.max(results):.4f}")
# 각 하이퍼파라미터 조합마다 고정된 seed 사용
BASE_SEED = 2021
hyperparameters = {
'lr': [0.001, 0.01, 0.1],
'batch_size': [16, 32, 64],
'dropout': [0.1, 0.3, 0.5]
}
for lr in hyperparameters['lr']:
for batch_size in hyperparameters['batch_size']:
for dropout in hyperparameters['dropout']:
# 같은 seed로 공정한 비교
set_seed(BASE_SEED)
model = Model(dropout=dropout)
train_loader = get_dataloader(
train_dataset,
batch_size=batch_size,
seed=BASE_SEED
)
# 학습
train(model, train_loader, lr=lr)
# 버그가 발생한 seed를 기록
BUG_SEED = 2021
def debug_session():
# 같은 seed로 버그 재현
set_seed(BUG_SEED)
model = Model()
train_loader = get_dataloader(train_dataset, BUG_SEED)
for epoch in range(num_epochs):
for batch_idx, batch in enumerate(train_loader):
try:
output = model(batch)
loss = criterion(output, target)
except Exception as e:
print(f"Error at epoch {epoch}, batch {batch_idx}")
print(f"Seed: {BUG_SEED}")
raise e
def debug_specific_batch(target_batch_idx):
set_seed(2021)
train_loader = get_dataloader(train_dataset, 2021)
for batch_idx, batch in enumerate(train_loader):
if batch_idx == target_batch_idx:
# 이 배치만 집중 분석
print(f"Debugging batch {batch_idx}")
print(f"Batch shape: {batch[0].shape}")
print(f"Batch statistics: {batch[0].mean()}, {batch[0].std()}")
# 문제 해결...
break
import logging
def setup_logging(seed):
logging.basicConfig(
filename=f'experiment_seed_{seed}.log',
level=logging.INFO,
format='%(asctime)s - %(message)s'
)
logging.info(f"Starting experiment with seed: {seed}")
def main():
SEED = 2021
setup_logging(SEED)
set_seed(SEED)
# 학습 중 주요 이벤트 로깅
logging.info("Model initialized")
logging.info(f"Training started with seed {SEED}")
# 나중에 문제 발생 시 로그에서 seed 확인 가능
def save_checkpoint(model, optimizer, epoch, seed, path):
checkpoint = {
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'seed': seed, # Seed 정보 저장
'random_state': random.getstate(), # Python random state
'numpy_random_state': np.random.get_state(), # Numpy random state
'torch_random_state': torch.get_rng_state(), # PyTorch random state
'cuda_random_state': torch.cuda.get_rng_state_all() # CUDA random state
}
torch.save(checkpoint, path)
# 사용
save_checkpoint(model, optimizer, epoch, SEED, 'checkpoint.pth')
def load_checkpoint(model, optimizer, path):
checkpoint = torch.load(path)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
seed = checkpoint['seed']
# Random state 복원
random.setstate(checkpoint['random_state'])
np.random.set_state(checkpoint['numpy_random_state'])
torch.set_rng_state(checkpoint['torch_random_state'])
torch.cuda.set_rng_state_all(checkpoint['cuda_random_state'])
print(f"Checkpoint loaded. Resuming from epoch {epoch} with seed {seed}")
return epoch, seed
# 사용
start_epoch, seed = load_checkpoint(model, optimizer, 'checkpoint.pth')
def train_with_resume(resume_path=None):
SEED = 2021
if resume_path and os.path.exists(resume_path):
# 체크포인트에서 재개
print("Resuming training from checkpoint...")
start_epoch, seed = load_checkpoint(model, optimizer, resume_path)
set_seed(seed) # 혹시 모를 추가 설정
else:
# 처음부터 시작
print("Starting new training...")
start_epoch = 0
set_seed(SEED)
save_checkpoint(model, optimizer, 0, SEED, 'initial.pth')
# 학습
for epoch in range(start_epoch, num_epochs):
train_one_epoch(model, train_loader)
# 주기적으로 체크포인트 저장
if epoch % 10 == 0:
save_checkpoint(
model, optimizer, epoch, SEED,
f'checkpoint_epoch_{epoch}.pth'
)
import os
import random
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import logging
class SeedManager:
"""Seed 관리 클래스"""
def __init__(self, seed=2021, deterministic=True):
self.seed = seed
self.deterministic = deterministic
self.set_seed()
def set_seed(self):
"""모든 random seed 설정"""
random.seed(self.seed)
np.random.seed(self.seed)
torch.manual_seed(self.seed)
torch.cuda.manual_seed(self.seed)
torch.cuda.manual_seed_all(self.seed)
if self.deterministic:
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
else:
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = True
os.environ['PYTHONHASHSEED'] = str(self.seed)
def seed_worker(self, worker_id):
"""DataLoader worker용 seed 함수"""
worker_seed = torch.initial_seed() % 2**32
np.random.seed(worker_seed)
random.seed(worker_seed)
def get_generator(self):
"""DataLoader용 generator 반환"""
g = torch.Generator()
g.manual_seed(self.seed)
return g
def get_dataloader(dataset, batch_size, num_workers, seed_manager):
"""재현 가능한 DataLoader 생성"""
return DataLoader(
dataset,
batch_size=batch_size,
shuffle=True,
num_workers=num_workers,
worker_init_fn=seed_manager.seed_worker,
generator=seed_manager.get_generator(),
pin_memory=True
)
def setup_logging(seed, log_dir='./logs'):
"""로깅 설정"""
os.makedirs(log_dir, exist_ok=True)
logging.basicConfig(
filename=f'{log_dir}/experiment_seed_{seed}.log',
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
def save_checkpoint(model, optimizer, epoch, seed_manager, path):
"""체크포인트 저장"""
checkpoint = {
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'seed': seed_manager.seed,
'deterministic': seed_manager.deterministic,
'random_state': random.getstate(),
'numpy_random_state': np.random.get_state(),
'torch_random_state': torch.get_rng_state(),
'cuda_random_state': torch.cuda.get_rng_state_all()
}
torch.save(checkpoint, path)
logging.info(f"Checkpoint saved at epoch {epoch}")
def load_checkpoint(model, optimizer, path):
"""체크포인트 로드"""
checkpoint = torch.load(path)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
# Random state 복원
random.setstate(checkpoint['random_state'])
np.random.set_state(checkpoint['numpy_random_state'])
torch.set_rng_state(checkpoint['torch_random_state'])
torch.cuda.set_rng_state_all(checkpoint['cuda_random_state'])
return checkpoint['epoch'], checkpoint['seed'], checkpoint['deterministic']
def main():
# 설정
SEED = 2021
DETERMINISTIC = True
BATCH_SIZE = 32
NUM_WORKERS = 4
NUM_EPOCHS = 100
# Seed 관리자 초기화
seed_manager = SeedManager(SEED, DETERMINISTIC)
# 로깅 설정
setup_logging(SEED)
logging.info(f"Training started with seed {SEED}")
logging.info(f"Deterministic mode: {DETERMINISTIC}")
# 데이터 로더 생성
train_loader = get_dataloader(
train_dataset, BATCH_SIZE, NUM_WORKERS, seed_manager
)
valid_loader = get_dataloader(
valid_dataset, BATCH_SIZE, NUM_WORKERS, seed_manager
)
# 모델 및 옵티마이저
model = Model().cuda()
optimizer = torch.optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss()
# 학습 루프
best_accuracy = 0.0
for epoch in range(NUM_EPOCHS):
# 학습
model.train()
train_loss = 0.0
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.cuda(), target.cuda()
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
train_loss += loss.item()
# 검증
model.eval()
correct = 0
total = 0
with torch.no_grad():
for data, target in valid_loader:
data, target