salience-editor/api/salience-editor-api.nomad.hcl

128 lines
3.3 KiB
HCL
Raw Permalink Normal View History

job "salience-editor-api" {
datacenters = ["ord10"]
type = "service"
constraint {
attribute = "${node.unique.name}"
2025-11-29 13:56:55 -08:00
value = "mac-mini"
}
group "app" {
count = 1
network {
mode = "host"
port "http" {
# Nomad will assign an available port
}
}
update {
max_parallel = 1
health_check = "checks"
min_healthy_time = "10s"
healthy_deadline = "5m"
auto_promote = true
auto_revert = true
canary = 1
}
2025-11-29 13:56:55 -08:00
task "waitress-server" {
driver = "raw_exec"
2025-11-29 13:56:55 -08:00
#user = "nomad"
config {
work_dir = "$RELEASE_PATH"
2025-11-29 13:56:55 -08:00
command = "/Users/nomad/.local/bin/uv"
# Waitress is single-process (no fork), avoiding Metal/MPS issues on macOS
args = ["run", "waitress-serve", "--listen=10.77.0.2:${NOMAD_PORT_http}", "--listen=127.0.0.1:${NOMAD_PORT_http}", "salience:app"]
}
env {
PORT = "${NOMAD_PORT_http}"
ORIGIN = "https://peoplesgrocers.com"
2025-11-29 13:56:55 -08:00
#PATH = "/Users/nomad/.local/bin:/usr/local/bin:/usr/bin:/bin"
HOME = "/Users/nomad"
UV_CACHE_DIR = "/Users/nomad/.cache/uv"
HF_HOME = "/Users/nomad/cache-huggingface"
NLTK_DATA = "/Users/nomad/cache-nltk"
#TORCH_DEVICE = "cpu" # Force CPU since chicago-web01 has no GPU
}
# Release path set during deployment via envsubst
template {
data = <<EOH
RELEASE_PATH="$RELEASE_PATH"
EOH
destination = "local/env"
env = true
}
service {
name = "salience-editor-api"
port = "http"
tags = [
"flask",
2025-11-29 13:56:55 -08:00
"waitress",
"api",
"ml"
]
# Health check on stats endpoint (lightweight)
check {
type = "http"
path = "/stats"
interval = "10s"
timeout = "5s"
check_restart {
limit = 3
grace = "180s" # 3 minutes for model loading
}
}
meta {
version = "$RELEASE_PLACEHOLDER"
}
}
template {
data = <<EOH
#!/bin/sh
2025-11-29 13:56:55 -08:00
host=http://10.77.0.2:{{ env "NOMAD_PORT_http" }}
echo "=== /models ==="
curl -s "$host/models"
echo
echo "=== /salience ==="
curl -s --data-binary -H "Content-Type: text/plain" "The cat sat on the mat. The dog chased the cat." "$host/salience?model=all-mpnet-base-v2"
echo
EOH
destination = "local/smoke-test.sh"
perms = "0755"
change_mode = "script"
change_script {
command = "/bin/sh"
args = ["-c", "cp ${NOMAD_TASK_DIR}/smoke-test.sh $RELEASE_PATH/smoke-test.sh"]
}
}
resources {
cpu = 2000
# If the task keeps dieing with Error code 137, check
# sudo dmesg -T | grep -i "killed process
# What I saw when the memory limit was too low was
# [Thu Nov 27 18:19:09 2025] Memory cgroup out of memory: Killed process 2750984 (gunicorn) total-vm:4556920kB, anon-rss:295900kB, file-rss:244188kB, shmem-rss:0kB, UID:1010 pgtables:1920kB oom_score_adj:0
memory = 8000
}
# I manually timed it once. Took a good 18 seconds to shutdown
kill_timeout = "30s"
kill_signal = "SIGTERM"
}
}
}