127 lines
3.2 KiB
HCL
127 lines
3.2 KiB
HCL
|
|
job "salience-editor-api" {
|
||
|
|
datacenters = ["ord10"]
|
||
|
|
type = "service"
|
||
|
|
|
||
|
|
constraint {
|
||
|
|
attribute = "${node.unique.name}"
|
||
|
|
value = "chicago-web01"
|
||
|
|
}
|
||
|
|
|
||
|
|
group "app" {
|
||
|
|
count = 1
|
||
|
|
|
||
|
|
network {
|
||
|
|
mode = "host"
|
||
|
|
|
||
|
|
port "http" {
|
||
|
|
# Nomad will assign an available port
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
update {
|
||
|
|
max_parallel = 1
|
||
|
|
health_check = "checks"
|
||
|
|
min_healthy_time = "10s"
|
||
|
|
healthy_deadline = "5m"
|
||
|
|
auto_promote = true
|
||
|
|
auto_revert = true
|
||
|
|
canary = 1
|
||
|
|
}
|
||
|
|
|
||
|
|
task "gunicorn-server" {
|
||
|
|
driver = "raw_exec"
|
||
|
|
|
||
|
|
user = "peoplesgrocers"
|
||
|
|
|
||
|
|
config {
|
||
|
|
work_dir = "$RELEASE_PATH"
|
||
|
|
command = "/home/peoplesgrocers/.local/bin/uv"
|
||
|
|
# You can add --log-level debug to gunicorn
|
||
|
|
args = ["run", "gunicorn", "--preload", "--workers", "3", "--bind", "127.0.0.1:${NOMAD_PORT_http}", "--timeout", "300", "salience:app"]
|
||
|
|
}
|
||
|
|
|
||
|
|
env {
|
||
|
|
PORT = "${NOMAD_PORT_http}"
|
||
|
|
ORIGIN = "https://peoplesgrocers.com"
|
||
|
|
#PATH = "/home/peoplesgrocers/.local/bin:/usr/local/bin:/usr/bin:/bin"
|
||
|
|
HOME = "/home/peoplesgrocers"
|
||
|
|
UV_CACHE_DIR = "/home/peoplesgrocers/.cache/uv"
|
||
|
|
HF_HOME = "/home/peoplesgrocers/cache-huggingface"
|
||
|
|
NLTK_DATA = "/home/peoplesgrocers/cache-nltk"
|
||
|
|
}
|
||
|
|
|
||
|
|
# Release path set during deployment via envsubst
|
||
|
|
template {
|
||
|
|
data = <<EOH
|
||
|
|
RELEASE_PATH="$RELEASE_PATH"
|
||
|
|
EOH
|
||
|
|
destination = "local/env"
|
||
|
|
env = true
|
||
|
|
}
|
||
|
|
|
||
|
|
service {
|
||
|
|
name = "salience-editor-api"
|
||
|
|
port = "http"
|
||
|
|
|
||
|
|
tags = [
|
||
|
|
"flask",
|
||
|
|
"gunicorn",
|
||
|
|
"api",
|
||
|
|
"ml"
|
||
|
|
]
|
||
|
|
|
||
|
|
# Health check on stats endpoint (lightweight)
|
||
|
|
check {
|
||
|
|
type = "http"
|
||
|
|
path = "/stats"
|
||
|
|
interval = "10s"
|
||
|
|
timeout = "5s"
|
||
|
|
|
||
|
|
check_restart {
|
||
|
|
limit = 3
|
||
|
|
grace = "180s" # 3 minutes for model loading
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
meta {
|
||
|
|
version = "$RELEASE_PLACEHOLDER"
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
template {
|
||
|
|
data = <<EOH
|
||
|
|
#!/bin/sh
|
||
|
|
host=http://127.0.0.1:{{ env "NOMAD_PORT_http" }}
|
||
|
|
|
||
|
|
echo "=== /models ==="
|
||
|
|
curl -s "$host/models"
|
||
|
|
echo
|
||
|
|
|
||
|
|
echo "=== /salience ==="
|
||
|
|
curl -s -X POST -d "The cat sat on the mat. The dog chased the cat." "$host/salience?model=all-mpnet-base-v2"
|
||
|
|
echo
|
||
|
|
EOH
|
||
|
|
destination = "local/smoke-test.sh"
|
||
|
|
perms = "0755"
|
||
|
|
change_mode = "script"
|
||
|
|
change_script {
|
||
|
|
command = "/bin/sh"
|
||
|
|
args = ["-c", "cp ${NOMAD_TASK_DIR}/smoke-test.sh $RELEASE_PATH/smoke-test.sh"]
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
resources {
|
||
|
|
cpu = 2000
|
||
|
|
# If the task keeps dieing with Error code 137, check
|
||
|
|
# sudo dmesg -T | grep -i "killed process
|
||
|
|
# What I saw when the memory limit was too low was
|
||
|
|
# [Thu Nov 27 18:19:09 2025] Memory cgroup out of memory: Killed process 2750984 (gunicorn) total-vm:4556920kB, anon-rss:295900kB, file-rss:244188kB, shmem-rss:0kB, UID:1010 pgtables:1920kB oom_score_adj:0
|
||
|
|
memory = 8000
|
||
|
|
}
|
||
|
|
|
||
|
|
# I manually timed it once. Took a good 18 seconds to shutdown
|
||
|
|
kill_timeout = "30s"
|
||
|
|
kill_signal = "SIGTERM"
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|