
이 글에서는 diffusers 라이브러리를 이용한 pretrained 사용예시와 Fine-Tuning을 다뤄보고자 한다.
import numpy as np
import torch
import torch.nn.functional as F
import torchvision
from datasets import load_dataset
from diffusers import DDIMScheduler, DDPMPipeline
from matplotlib import pyplot as plt
from PIL import Image
from torchvision import transforms
from tqdm.auto import tqdm
image_pipe = DDPMPipeline.from_pretrained("google/ddpm-celebahq-256")
image_pipe.to(device);
images = image_pipe().images
images[0]

scheduler = DDIMScheduler.from_pretrained("google/ddpm-celebahq-256")
scheduler.set_timesteps(num_inference_steps=40)
image_pipe.scheduler = scheduler
images = image_pipe(num_inference_steps=40).images
images[0]

# 랜덤 정규분포 : N(0, 1)
x = torch.randn(4, 3, 256, 256).to(device)
for i, t in tqdm(enumerate(scheduler.timesteps)):
# 이미지 전처리
model_input = scheduler.scale_model_input(x, t)
with torch.no_grad():
noise_pred = image_pipe.unet(model_input, t)["sample"]
scheduler_output = scheduler.step(noise_pred, t, x)
# Update x
x = scheduler_output.prev_sample
x = torch.randn(4, 3, 256, 256).to(device)
scheduler.timesteps
tensor([975, 950, 925, 900, 875, 850, 825, 800, 775, 750, 725, 700, 675, 650,
625, 600, 575, 550, 525, 500, 475, 450, 425, 400, 375, 350, 325, 300,
275, 250, 225, 200, 175, 150, 125, 100, 75, 50, 25, 0])
noise_pred = image_pipe.unet(model_input, t)["sample"]
scheduler_output = scheduler.step(noise_pred, t, x)
x = scheduler_output.prev_sample


lr : 1e-5
optimizer : AdamW
grad_accumulation_steps : 2
clean_images = batch["images"].to(device)
noise = torch.randn(clean_images.shape).to(device)
batch_size = clean_images.shape[0]
max_timestep = image_pipe.scheduler.num_train_timesteps
timesteps = torch.randint(0, max_timestep, (batch_size,), device=device).long()
noisy_images = image_pipe.scheduler.add_noise(clean_images, noise, timesteps)
noise_pred = image_pipe.unet(noisy_images, timesteps, return_dict=False)[0]
loss = F.mse_loss(noise_pred, noise)
loss.backward(loss)
if (step + 1) % grad_accumulation_steps == 0:
optimizer.step()
optimizer.zero_grad()
clean_images = batch["images"].to(device)
noise = torch.randn(clean_images.shape).to(device)
timesteps = torch.randint(0, max_timestep, (batch_size,), device=device).long()
noisy_images = image_pipe.scheduler.add_noise(clean_images, noise, timesteps)
noise_pred = image_pipe.unet(noisy_images, timesteps, return_dict=False)[0]
loss = F.mse_loss(noise_pred, noise)
loss.backward(loss)
if (step + 1) % grad_accumulation_steps == 0:
optimizer.step()
optimizer.zero_grad()
