job "salience-editor-api" {
  datacenters = ["ord10"]
  type        = "service"

  constraint {
    attribute = "${node.unique.name}"
    value     = "mac-mini"
  }

  group "app" {
    count = 1

    network {
      mode = "host"

      port "http" {
        # Nomad will assign an available port
      }
    }

    update {
      max_parallel      = 1
      health_check      = "checks"
      min_healthy_time  = "10s"
      healthy_deadline  = "5m"
      auto_promote      = true
      auto_revert       = true
      canary            = 1
    }

    task "waitress-server" {
      driver = "raw_exec"

      #user = "nomad"

      config {
        work_dir = "$RELEASE_PATH"
        command  = "/Users/nomad/.local/bin/uv"
        # Waitress is single-process (no fork), avoiding Metal/MPS issues on macOS
        args     = ["run", "waitress-serve", "--listen=10.77.0.2:${NOMAD_PORT_http}", "--listen=127.0.0.1:${NOMAD_PORT_http}", "salience:app"]
      }

      env {
        PORT   = "${NOMAD_PORT_http}"
        ORIGIN = "https://peoplesgrocers.com"
        #PATH   = "/Users/nomad/.local/bin:/usr/local/bin:/usr/bin:/bin"
        HOME   = "/Users/nomad"
        UV_CACHE_DIR = "/Users/nomad/.cache/uv"
        HF_HOME   = "/Users/nomad/cache-huggingface"
        NLTK_DATA = "/Users/nomad/cache-nltk"
        #TORCH_DEVICE = "cpu"  # Force CPU since chicago-web01 has no GPU
      }

      # Release path set during deployment via envsubst
      template {
        data = <<EOH
RELEASE_PATH="$RELEASE_PATH"
EOH
        destination = "local/env"
        env         = true
      }

      service {
        name = "salience-editor-api"
        port = "http"

        tags = [
          "flask",
          "waitress",
          "api",
          "ml"
        ]

        # Health check on stats endpoint (lightweight)
        check {
          type     = "http"
          path     = "/stats"
          interval = "10s"
          timeout  = "5s"

          check_restart {
            limit = 3
            grace = "180s"  # 3 minutes for model loading
          }
        }

        meta {
          version = "$RELEASE_PLACEHOLDER"
        }
      }

      template {
        data = <<EOH
#!/bin/sh
host=http://10.77.0.2:{{ env "NOMAD_PORT_http" }}

echo "=== /models ==="
curl -s "$host/models"
echo

echo "=== /salience ==="
curl -s --data-binary -H "Content-Type: text/plain" "The cat sat on the mat. The dog chased the cat." "$host/salience?model=all-mpnet-base-v2"
echo
EOH
        destination = "local/smoke-test.sh"
        perms       = "0755"
        change_mode = "script"
        change_script {
          command = "/bin/sh"
          args    = ["-c", "cp ${NOMAD_TASK_DIR}/smoke-test.sh $RELEASE_PATH/smoke-test.sh"]
        }
      }

      resources {
        cpu    = 2000
        # If the task keeps dieing with Error code 137, check
        # sudo dmesg -T | grep -i "killed process
        # What I saw when the memory limit was too low was
        # [Thu Nov 27 18:19:09 2025] Memory cgroup out of memory: Killed process 2750984 (gunicorn) total-vm:4556920kB, anon-rss:295900kB, file-rss:244188kB, shmem-rss:0kB, UID:1010 pgtables:1920kB oom_score_adj:0
        memory = 8000
      }

      # I manually timed it once. Took a good 18 seconds to shutdown
      kill_timeout = "30s"
      kill_signal = "SIGTERM"
    }
  }
}