ollama

Gin Song·2025년 5월 3일

https://sysdocu.tistory.com/2036

1. 모델 다운로드

mkdir -p /data/Llama4/Q4_K_M
cd /data/Llama4/Q4_K_M

wget https://huggingface.co/unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF/resolve/main/Q4_K_M/Llama-4-Scout-17B-16E-Instruct-Q4_K_M-0000{1..2}-of-00002.gguf

2. Llama.cpp

apt -y install cmake libopenblas-dev g++ libcurl4-openssl-dev
git clone https://github.com/ggerganov/llama.cpp.git

cd llama.cpp
mkdir build
cd build
cmake ..

cmake --build . --config Release


3. 분할된 파일 합치기

cd bin

./llama-gguf-split --merge \
/data/Llama4/Q4_K_M/Llama-4-Scout-17B-16E-Instruct-Q4_K_M-00001-of-00002.gguf \
/data/Llama4/Q4_K_M/Llama-4-Scout-17B-16E-Instruct-Q4_K_M.gguf


4. LLM 실행

./llama-cli -m /data/Llama4/Q4_K_M/Llama-4-Scout-17B-16E-Instruct-Q4_K_M.gguf -p "오늘은 며칠이야?"

./llama-server -m /data/Llama4/Q4_K_M/Llama-4-Scout-17B-16E-Instruct-Q4_K_M.gguf --host 0.0.0.0 --port 8080


apt -y install jq

curl http://{서버IP}:8080/completion -H "Content-Type: application/json" -d '{
"prompt": "오늘은 며칠이야?",
"n_predict": 100
}' | jq

도커 gpu

distribution=(. /etc/os-release;echo $IDVERSION_ID) # ubuntu20.04 → ubuntu20.04

curl -s -L https://nvidia.github.io/libnvidia-container/gpgkey | sudo apt-key add -
curl -s -L https://nvidia.github.io/libnvidia-container/ubuntu22.04/libnvidia-container.list | \
sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list

sudo apt update
sudo apt install -y nvidia-container-toolkit

sudo nvidia-ctk runtime configure --runtime=docker

sudo systemctl restart docker

docker run --rm --gpus all nvidia/cuda:12.3.2-base-ubuntu22.04 nvidia-smi

gpu

sudo lshw -C system
sudo lshw -C display

cat /proc/cpuinfo

lspci | grep -i VGA
lspci | grep -E "VGA|3D|Display"
lspci -v

apt search nvidia-driver

nvidia-smi --query | fgrep 'Product Name
watch nvidia-smi

sudo ubuntu-drivers autoinstall

sudo apt-get update
sudo apt-get install -y curl ca-certificates gnupg

curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg

curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list

sudo apt-get update
sudo apt-get install -y nvidia-container-toolkit

sudo nvidia-ctk runtime configure --runtime=docker
sudo systemctl restart docker

docker run --rm --gpus all nvidia/cuda:12.2.0-base-ubuntu20.04 nvidia-smi

0개의 댓글