Kserve Custom Predictor

임정민·2024년 10월 30일

메모장

목록 보기
16/33
post-thumbnail
# model.py
import io
import base64
from PIL import Image
import argparse
from torchvision import models, transforms
from typing import Dict, Union
import torch
import numpy as np
from kserve import Model, ModelServer
import uuid

def generate_uuid():
    return str(uuid.uuid4())

class AlexNetModel(Model):
    def __init__(self, name: str):
        super().__init__(name)
        self.name = name
        self.load()

    def load(self):
        self.model = models.alexnet(pretrained=True)
        self.model.eval()
        self.ready = True

    def predict(self, payload: Dict, headers: Dict[str, str] = None) -> Dict:
        img_data = payload["instances"][0]["image"]["b64"]
        raw_img_data = base64.b64decode(img_data)
        input_image = Image.open(io.BytesIO(raw_img_data))
        preprocess = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                              std=[0.229, 0.224, 0.225]),
        ])
        input_tensor = preprocess(input_image).unsqueeze(0)
        output = self.model(input_tensor)
        torch.nn.functional.softmax(output, dim=1)
        values, top_5 = torch.topk(output, 5)
        result = values.flatten().tolist()
        response_id = generate_uuid()
        return {"predictions": result}

if __name__ == "__main__":
    model = AlexNetModel("custom-model")
    ModelServer().start([model])

# requirements.txt
kserve>=0.13.0
torch>=1.9.0
torchvision>=0.10.0
Pillow>=8.0.0
numpy>=1.19.5
# Docker 이미지 빌드
docker build -t alexnet-predictor:v1 .

# Docker 컨테이너 실행 (8080 포트를 80 포트로 매핑)
docker run -p 80:8080 alexnet-predictor:v1

# 테스트용 예시 요청
curl -X POST http://localhost:80/v1/models/custom-model:predict \
  -H "Content-Type: application/json" \
  -d '{"instances": [{"image": {"b64": "BASE64_ENCODED_IMAGE_HERE"}}]}'
  
curl -v -H "Content-Type: application/json" http://localhost/v1/models/custom-model:predict -d @input.json
# Dockerfile
FROM python:3.8-slim

WORKDIR /app
COPY requirements.txt .
COPY model.py .

RUN pip install -r requirements.txt

EXPOSE 8080

CMD ["python", "model.py"]

---
# kubernetes/inference-service.yaml
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
  name: alexnet-predictor
spec:
  predictor:
    containers:
      - name: kserve-container
        image: YOUR-REGISTRY/alexnet-predictor:v1
        ports:
          - containerPort: 8080
apiVersion: "serving.kserve.io/v1beta1"
kind: "InferenceService"
metadata:
  name: "alexnet-predictor"
spec:
  predictor:
    containers:
      - name: kserve-container
        image: ${REGISTRY}/alexnet-predictor:v3
        ports:
          - containerPort: 8080
        readinessProbe:
          httpGet:
            path: /v1/models/custom-model/ready
            port: 8080
          initialDelaySeconds: 30
          periodSeconds: 30
        resources:
          requests:
            cpu: "1"
            memory: "2Gi"
          limits:
            cpu: "2"
            memory: "4Gi"
profile
https://github.com/min731

0개의 댓글