모델 도커 이미지 빌드할때 포함시키기
FROM python:3.10.15-slim-bullseye
# Install system dependencies
RUN apt-get update && apt-get install -y \
curl \
wget \
git \
vim \
nano \
build-essential \
libssl-dev \
libffi-dev \
python3-dev \
&& apt-get clean
WORKDIR /app/
COPY ./requirements.txt /app/requirements.txt
RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
COPY ./download_models.py /app/download_models.py
RUN python download_models.py
COPY . /app/
CMD ["gradio", "main.py"]
# download_models.py
from transformers import AutoTokenizer, AutoModelForTokenClassification
import os
# Define the model directory where you want to save the model
model_dir = "/app/checkpoints"
# Ensure the directory exists
os.makedirs(model_dir, exist_ok=True)
print(f"Downloading and saving model and tokenizer to {model_dir}...")
# Download tokenizer and save it to the specified directory
tokenizer = AutoTokenizer.from_pretrained('FacebookAI/xlm-roberta-large-finetuned-conll03-english')
tokenizer.save_pretrained(model_dir)
# Download model and save it to the specified directory
model = AutoModelForTokenClassification.from_pretrained('FacebookAI/xlm-roberta-large-finetuned-conll03-english')
model.save_pretrained(model_dir)
print("Model and tokenizer download and save completed.")
def ner_model(model_dir):
# Load tokenizer and model from the specified path
tokenizer = AutoTokenizer.from_pretrained(model_dir, local_files_only=True) # Load tokenizer locally
model = AutoModelForTokenClassification.from_pretrained(model_dir, local_files_only=True) # Load model locally
ner = pipeline("token-classification", model=model, tokenizer=tokenizer)
return ner
# Initialize the NER model
model_name = '/app/ner_model'