import io
import base64
from PIL import Image
import argparse
from torchvision import models, transforms
from typing import Dict, Union
import torch
import numpy as np
from kserve import Model, ModelServer
import uuid
def generate_uuid():
return str(uuid.uuid4())
class AlexNetModel(Model):
def __init__(self, name: str):
super().__init__(name)
self.name = name
self.load()
def load(self):
self.model = models.alexnet(pretrained=True)
self.model.eval()
self.ready = True
def predict(self, payload: Dict, headers: Dict[str, str] = None) -> Dict:
img_data = payload["instances"][0]["image"]["b64"]
raw_img_data = base64.b64decode(img_data)
input_image = Image.open(io.BytesIO(raw_img_data))
preprocess = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
])
input_tensor = preprocess(input_image).unsqueeze(0)
output = self.model(input_tensor)
torch.nn.functional.softmax(output, dim=1)
values, top_5 = torch.topk(output, 5)
result = values.flatten().tolist()
response_id = generate_uuid()
return {"predictions": result}
if __name__ == "__main__":
model = AlexNetModel("custom-model")
ModelServer().start([model])
kserve>=0.13.0
torch>=1.9.0
torchvision>=0.10.0
Pillow>=8.0.0
numpy>=1.19.5
docker build -t alexnet-predictor:v1 .
docker run -p 80:8080 alexnet-predictor:v1
curl -X POST http://localhost:80/v1/models/custom-model:predict \
-H "Content-Type: application/json" \
-d '{"instances": [{"image": {"b64": "BASE64_ENCODED_IMAGE_HERE"}}]}'
curl -v -H "Content-Type: application/json" http://localhost/v1/models/custom-model:predict -d @input.json
# Dockerfile
FROM python:3.8-slim
WORKDIR /app
COPY requirements.txt .
COPY model.py .
RUN pip install -r requirements.txt
EXPOSE 8080
CMD ["python", "model.py"]
---
# kubernetes/inference-service.yaml
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
name: alexnet-predictor
spec:
predictor:
containers:
- name: kserve-container
image: YOUR-REGISTRY/alexnet-predictor:v1
ports:
- containerPort: 8080
apiVersion: "serving.kserve.io/v1beta1"
kind: "InferenceService"
metadata:
name: "alexnet-predictor"
spec:
predictor:
containers:
- name: kserve-container
image: ${REGISTRY}/alexnet-predictor:v3
ports:
- containerPort: 8080
readinessProbe:
httpGet:
path: /v1/models/custom-model/ready
port: 8080
initialDelaySeconds: 30
periodSeconds: 30
resources:
requests:
cpu: "1"
memory: "2Gi"
limits:
cpu: "2"
memory: "4Gi"