feat: deploy model api server to chicago-web01
This commit is contained in:
parent
515a0e6d81
commit
0cb89ddc80
6 changed files with 394 additions and 18 deletions
127
api/salience-editor-api.nomad.hcl
Normal file
127
api/salience-editor-api.nomad.hcl
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
job "salience-editor-api" {
|
||||
datacenters = ["ord10"]
|
||||
type = "service"
|
||||
|
||||
constraint {
|
||||
attribute = "${node.unique.name}"
|
||||
value = "chicago-web01"
|
||||
}
|
||||
|
||||
group "app" {
|
||||
count = 1
|
||||
|
||||
network {
|
||||
mode = "host"
|
||||
|
||||
port "http" {
|
||||
# Nomad will assign an available port
|
||||
}
|
||||
}
|
||||
|
||||
update {
|
||||
max_parallel = 1
|
||||
health_check = "checks"
|
||||
min_healthy_time = "10s"
|
||||
healthy_deadline = "5m"
|
||||
auto_promote = true
|
||||
auto_revert = true
|
||||
canary = 1
|
||||
}
|
||||
|
||||
task "gunicorn-server" {
|
||||
driver = "raw_exec"
|
||||
|
||||
user = "peoplesgrocers"
|
||||
|
||||
config {
|
||||
work_dir = "$RELEASE_PATH"
|
||||
command = "/home/peoplesgrocers/.local/bin/uv"
|
||||
# You can add --log-level debug to gunicorn
|
||||
args = ["run", "gunicorn", "--preload", "--workers", "3", "--bind", "127.0.0.1:${NOMAD_PORT_http}", "--timeout", "300", "salience:app"]
|
||||
}
|
||||
|
||||
env {
|
||||
PORT = "${NOMAD_PORT_http}"
|
||||
ORIGIN = "https://peoplesgrocers.com"
|
||||
#PATH = "/home/peoplesgrocers/.local/bin:/usr/local/bin:/usr/bin:/bin"
|
||||
HOME = "/home/peoplesgrocers"
|
||||
UV_CACHE_DIR = "/home/peoplesgrocers/.cache/uv"
|
||||
HF_HOME = "/home/peoplesgrocers/cache-huggingface"
|
||||
NLTK_DATA = "/home/peoplesgrocers/cache-nltk"
|
||||
}
|
||||
|
||||
# Release path set during deployment via envsubst
|
||||
template {
|
||||
data = <<EOH
|
||||
RELEASE_PATH="$RELEASE_PATH"
|
||||
EOH
|
||||
destination = "local/env"
|
||||
env = true
|
||||
}
|
||||
|
||||
service {
|
||||
name = "salience-editor-api"
|
||||
port = "http"
|
||||
|
||||
tags = [
|
||||
"flask",
|
||||
"gunicorn",
|
||||
"api",
|
||||
"ml"
|
||||
]
|
||||
|
||||
# Health check on stats endpoint (lightweight)
|
||||
check {
|
||||
type = "http"
|
||||
path = "/stats"
|
||||
interval = "10s"
|
||||
timeout = "5s"
|
||||
|
||||
check_restart {
|
||||
limit = 3
|
||||
grace = "180s" # 3 minutes for model loading
|
||||
}
|
||||
}
|
||||
|
||||
meta {
|
||||
version = "$RELEASE_PLACEHOLDER"
|
||||
}
|
||||
}
|
||||
|
||||
template {
|
||||
data = <<EOH
|
||||
#!/bin/sh
|
||||
host=http://127.0.0.1:{{ env "NOMAD_PORT_http" }}
|
||||
|
||||
echo "=== /models ==="
|
||||
curl -s "$host/models"
|
||||
echo
|
||||
|
||||
echo "=== /salience ==="
|
||||
curl -s -X POST -d "The cat sat on the mat. The dog chased the cat." "$host/salience?model=all-mpnet-base-v2"
|
||||
echo
|
||||
EOH
|
||||
destination = "local/smoke-test.sh"
|
||||
perms = "0755"
|
||||
change_mode = "script"
|
||||
change_script {
|
||||
command = "/bin/sh"
|
||||
args = ["-c", "cp ${NOMAD_TASK_DIR}/smoke-test.sh $RELEASE_PATH/smoke-test.sh"]
|
||||
}
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 2000
|
||||
# If the task keeps dieing with Error code 137, check
|
||||
# sudo dmesg -T | grep -i "killed process
|
||||
# What I saw when the memory limit was too low was
|
||||
# [Thu Nov 27 18:19:09 2025] Memory cgroup out of memory: Killed process 2750984 (gunicorn) total-vm:4556920kB, anon-rss:295900kB, file-rss:244188kB, shmem-rss:0kB, UID:1010 pgtables:1920kB oom_score_adj:0
|
||||
memory = 8000
|
||||
}
|
||||
|
||||
# I manually timed it once. Took a good 18 seconds to shutdown
|
||||
kill_timeout = "30s"
|
||||
kill_signal = "SIGTERM"
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue