#!/usr/bin/env bash
set -euo pipefail

# HF Compete + Vast.ai Provider Bootstrap Script
# 
# Automated host registration and setup for GPU providers.
# Full documentation: https://docs.platform.dev/hosting/quick-start
# Detailed guide: https://docs.platform.dev/hosting/host-setup-guide
#
# WHAT THIS SCRIPT DOES:
# - Installs host prerequisites (curl, jq, Docker, Nvidia runtime) [optional]
# - Ensures XFS partition is configured
# - Creates/verifies provider profile + accepts hosting agreement
# - Auto-detects machine hardware (GPU, CPU, RAM, storage) via nvidia-smi/lscpu
# - Creates machine listing in platform marketplace
# - Marks machine online and sends initial health check
# - Automatically installs 5-minute health-check systemd timer [optional]
#
# PREREQUISITES:
# - Ubuntu 18.04+ (22.04 LTS recommended)
# - Nvidia GPU 10-series or newer
# - Nvidia drivers installed and working (test: nvidia-smi)
# - Docker and Nvidia Docker runtime installed
# - Bearer token from your platform account (HF_TOKEN env var)
#
# QUICK START:
# curl -fsSL https://YOUR_DOMAIN/scripts/vast-provider-bootstrap.sh -o vast-provider-bootstrap.sh
# chmod +x vast-provider-bootstrap.sh
# export HF_TOKEN=<your_access_token>
# export HF_API_URL=https://api.platform.dev
# ./vast-provider-bootstrap.sh
#
# ENVIRONMENT VARIABLES (see README or host setup guide for full list):
# HF_API_URL         - API endpoint (default: http://localhost:3001)
# HF_TOKEN           - Bearer token (REQUIRED)
# HF_REGION          - Region code (default: US-East)
# HF_COUNTRY         - Country code (default: US)
# HF_PRICE_PER_GPU_HOUR - Pricing in USD (default: 0.65)
# HF_SKIP_INSTALL    - Skip prereq install if 1 (default: 0)
# HF_INSTALL_HEALTH_TIMER - Install systemd timer if 1 (default: 1)

API_URL="${HF_API_URL:-http://localhost:3001}"
TOKEN="${HF_TOKEN:-}"
PAYOUT_METHOD="${HF_PAYOUT_METHOD:-STRIPE}"
PAYOUT_ADDRESS="${HF_PAYOUT_ADDRESS:-}"

REGION="${HF_REGION:-US-East}"
COUNTRY="${HF_COUNTRY:-US}"
PRICE_PER_GPU_HOUR="${HF_PRICE_PER_GPU_HOUR:-0.6500}"
STORAGE_PER_HOUR_GB="${HF_STORAGE_PER_HOUR_GB:-0.0000}"
BANDWIDTH_PER_GB="${HF_BANDWIDTH_PER_GB:-0.0000}"

MIN_GPU_ALLOC="${HF_MIN_GPU_ALLOC:-1}"
MAX_GPU_ALLOC="${HF_MAX_GPU_ALLOC:-}"
STORAGE_TYPE="${HF_STORAGE_TYPE:-NVME}"
NETWORK_SPEED="${HF_NETWORK_SPEED:-1Gbps}"
COOLING="${HF_COOLING:-air}"
POWER="${HF_POWER:-redundant-psu}"
TAGS="${HF_TAGS:-vast,provider,gpu}"
DESCRIPTION="${HF_DESCRIPTION:-Vast.ai host auto-registered via bootstrap script}"

SKIP_INSTALL="${HF_SKIP_INSTALL:-0}"
INSTALL_HEALTH_TIMER="${HF_INSTALL_HEALTH_TIMER:-1}"

if [[ -z "${TOKEN}" ]]; then
  echo "ERROR: HF_TOKEN is required (JWT access token)."
  exit 1
fi

SUDO=""
if [[ "${EUID}" -ne 0 ]] && command -v sudo >/dev/null 2>&1; then
  SUDO="sudo"
fi

log() { echo "[hf-vast-bootstrap] $*"; }
warn() { echo "[hf-vast-bootstrap][warn] $*"; }
err() { echo "[hf-vast-bootstrap][error] $*"; }

require_cmd() {
  if ! command -v "$1" >/dev/null 2>&1; then
    return 1
  fi
}

install_prereqs() {
  if [[ "${SKIP_INSTALL}" == "1" ]]; then
    log "Skipping package installation (HF_SKIP_INSTALL=1)."
    return
  fi

  if require_cmd apt-get; then
    log "Installing prerequisites (curl, jq, docker, nvidia utils)..."
    ${SUDO} apt-get update -y
    ${SUDO} apt-get install -y curl jq ca-certificates gnupg lsb-release docker.io pciutils

    if ! require_cmd nvidia-smi; then
      warn "nvidia-smi not found. Installing NVIDIA user-space tools..."
      ${SUDO} apt-get install -y nvidia-utils-535 || true
    fi

    if require_cmd systemctl; then
      ${SUDO} systemctl enable docker || true
      ${SUDO} systemctl start docker || true
    fi
  else
    warn "apt-get not found; skipping automatic package installation."
  fi
}

api_call() {
  local method="$1"
  local path="$2"
  local body="${3:-}"

  if [[ -n "${body}" ]]; then
    curl -sS -X "${method}" \
      -H "Authorization: Bearer ${TOKEN}" \
      -H "Content-Type: application/json" \
      -d "${body}" \
      "${API_URL}${path}" \
      -w "\n%{http_code}"
  else
    curl -sS -X "${method}" \
      -H "Authorization: Bearer ${TOKEN}" \
      -H "Content-Type: application/json" \
      "${API_URL}${path}" \
      -w "\n%{http_code}"
  fi
}

ensure_jq() {
  if ! require_cmd jq; then
    err "jq is required but not installed."
    exit 1
  fi
}

detect_hardware() {
  if ! require_cmd nvidia-smi; then
    err "nvidia-smi not found. Install NVIDIA drivers before running this script."
    exit 1
  fi

  GPU_COUNT="$(nvidia-smi -L | wc -l | tr -d ' ')"
  if [[ "${GPU_COUNT}" == "0" ]]; then
    err "No NVIDIA GPUs detected."
    exit 1
  fi

  GPU_TYPE="${HF_GPU_TYPE:-$(nvidia-smi --query-gpu=name --format=csv,noheader | head -n1 | sed 's/^ *//;s/ *$//')}"
  GPU_MEMORY="${HF_GPU_MEMORY_GB:-$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits | head -n1 | tr -d ' ')}"

  CPU_MODEL="${HF_CPU_MODEL:-$(lscpu 2>/dev/null | awk -F: '/Model name/ {gsub(/^ +/,"",$2); print $2; exit}') }"
  CPU_CORES="${HF_CPU_CORES:-$(nproc)}"
  RAM_GB="${HF_RAM_GB:-$(free -g | awk '/Mem:/ {print $2}') }"
  STORAGE_GB="${HF_STORAGE_GB:-$(df --output=size -BG / | tail -n1 | tr -dc '0-9')}"

  if [[ -z "${MAX_GPU_ALLOC}" ]]; then
    MAX_GPU_ALLOC="${GPU_COUNT}"
  fi

  HOSTNAME_SAFE="$(hostname | tr -cd '[:alnum:]-')"
  MACHINE_ID="${HF_MACHINE_ID:-vast-${HOSTNAME_SAFE}-$(date +%s)}"
  TITLE="${HF_TITLE:-${GPU_COUNT}x ${GPU_TYPE} in ${REGION}}"

  log "Detected: ${GPU_COUNT}x ${GPU_TYPE}, ${GPU_MEMORY}GB VRAM, ${CPU_CORES} cores, ${RAM_GB}GB RAM"
}

ensure_provider() {
  log "Checking provider profile..."
  local res body code
  res="$(api_call GET /providers/me)"
  body="$(echo "${res}" | sed '$d')"
  code="$(echo "${res}" | tail -n1)"

  if [[ "${code}" == "200" ]]; then
    log "Provider profile already exists."
    return
  fi

  if [[ "${code}" != "404" ]]; then
    err "Unexpected response from /providers/me (HTTP ${code}): ${body}"
    exit 1
  fi

  log "Creating provider profile..."
  local payload
  payload="$(jq -n \
    --arg payoutMethod "${PAYOUT_METHOD}" \
    --arg payoutAddress "${PAYOUT_ADDRESS}" \
    '{payoutMethod: $payoutMethod, payoutAddress: (if $payoutAddress == "" then null else $payoutAddress end)}')"

  res="$(api_call POST /providers "${payload}")"
  body="$(echo "${res}" | sed '$d')"
  code="$(echo "${res}" | tail -n1)"
  if [[ "${code}" != "201" && "${code}" != "200" ]]; then
    err "Failed to create provider profile (HTTP ${code}): ${body}"
    exit 1
  fi

  log "Accepting hosting agreement..."
  res="$(api_call POST /providers/me/accept-agreement)")"
  body="$(echo "${res}" | sed '$d')"
  code="$(echo "${res}" | tail -n1)"
  if [[ "${code}" != "200" && "${code}" != "201" ]]; then
    err "Failed to accept agreement (HTTP ${code}): ${body}"
    exit 1
  fi
}

create_machine() {
  log "Creating machine listing..."
  local payload res body code

  payload="$(jq -n \
    --arg machineId "${MACHINE_ID}" \
    --arg gpuType "${GPU_TYPE}" \
    --arg cpuModel "${CPU_MODEL}" \
    --arg storageType "${STORAGE_TYPE}" \
    --arg networkSpeed "${NETWORK_SPEED}" \
    --arg region "${REGION}" \
    --arg country "${COUNTRY}" \
    --arg title "${TITLE}" \
    --arg description "${DESCRIPTION}" \
    --arg cooling "${COOLING}" \
    --arg power "${POWER}" \
    --arg tags "${TAGS}" \
    --argjson gpuCount "${GPU_COUNT}" \
    --argjson gpuMemory "${GPU_MEMORY}" \
    --argjson cpuCores "${CPU_CORES}" \
    --argjson ramGb "${RAM_GB}" \
    --argjson storageGb "${STORAGE_GB}" \
    --argjson pricePerGpuPerHour "${PRICE_PER_GPU_HOUR}" \
    --argjson storagePerHourGb "${STORAGE_PER_HOUR_GB}" \
    --argjson bandwidthPerGb "${BANDWIDTH_PER_GB}" \
    --argjson minGpuAlloc "${MIN_GPU_ALLOC}" \
    --argjson maxGpuAlloc "${MAX_GPU_ALLOC}" \
    '{
      machineId: $machineId,
      gpuType: $gpuType,
      gpuCount: $gpuCount,
      gpuMemory: $gpuMemory,
      cpuModel: $cpuModel,
      cpuCores: $cpuCores,
      ramGb: $ramGb,
      storageGb: $storageGb,
      storageType: $storageType,
      networkSpeed: $networkSpeed,
      region: $region,
      country: $country,
      pricePerGpuPerHour: $pricePerGpuPerHour,
      storagePerHourGb: $storagePerHourGb,
      bandwidthPerGb: $bandwidthPerGb,
      minGpuAlloc: $minGpuAlloc,
      maxGpuAlloc: $maxGpuAlloc,
      title: $title,
      description: $description,
      tags: ($tags | split(",") | map(gsub("^\\s+|\\s+$"; "")) | map(select(length > 0))),
      cooling: $cooling,
      power: $power
    }')"

  res="$(api_call POST /machines "${payload}")"
  body="$(echo "${res}" | sed '$d')"
  code="$(echo "${res}" | tail -n1)"

  if [[ "${code}" != "201" && "${code}" != "200" ]]; then
    err "Machine creation failed (HTTP ${code}): ${body}"
    exit 1
  fi

  MACHINE_DB_ID="$(echo "${body}" | jq -r '.id')"
  if [[ -z "${MACHINE_DB_ID}" || "${MACHINE_DB_ID}" == "null" ]]; then
    err "Machine created but response did not include id: ${body}"
    exit 1
  fi

  log "Machine created with id: ${MACHINE_DB_ID}"
}

mark_online_and_healthcheck() {
  log "Marking machine online..."
  local res body code
  res="$(api_call PATCH "/machines/${MACHINE_DB_ID}/online-status" '{"isOnline":true}')"
  body="$(echo "${res}" | sed '$d')"
  code="$(echo "${res}" | tail -n1)"
  if [[ "${code}" != "200" ]]; then
    warn "Failed to set online status (HTTP ${code}): ${body}"
  fi

  log "Sending initial health check..."
  local health
  health="$(jq -n \
    --arg status "BOOTSTRAP_OK" \
    --argjson success true \
    --argjson gpuUtilization 0 \
    --argjson temperature 45 \
    --argjson memoryUsage 0 \
    --argjson cpuUsage 0 \
    --argjson networkLatency 5 \
    '{
      status: $status,
      success: $success,
      gpuUtilization: $gpuUtilization,
      temperature: $temperature,
      memoryUsage: $memoryUsage,
      cpuUsage: $cpuUsage,
      networkLatency: $networkLatency
    }')"

  res="$(api_call POST "/machines/${MACHINE_DB_ID}/health-check" "${health}")"
  code="$(echo "${res}" | tail -n1)"
  if [[ "${code}" != "200" && "${code}" != "201" ]]; then
    warn "Initial health check failed (HTTP ${code})."
  fi
}

install_health_timer() {
  if [[ "${INSTALL_HEALTH_TIMER}" != "1" ]]; then
    log "Skipping systemd timer setup (HF_INSTALL_HEALTH_TIMER=${INSTALL_HEALTH_TIMER})."
    return
  fi

  if ! require_cmd systemctl; then
    warn "systemctl not found; skipping periodic health-check timer setup."
    return
  fi

  local target_dir="/usr/local/bin"
  local script_path="${target_dir}/hf-provider-healthcheck.sh"
  local service_path="/etc/systemd/system/hf-provider-healthcheck.service"
  local timer_path="/etc/systemd/system/hf-provider-healthcheck.timer"

  log "Installing periodic health-check script..."
  ${SUDO} mkdir -p "${target_dir}"
  ${SUDO} tee "${script_path}" >/dev/null <<EOF
#!/usr/bin/env bash
set -euo pipefail
curl -sS -X POST \
  -H "Authorization: Bearer ${TOKEN}" \
  -H "Content-Type: application/json" \
  "${API_URL}/machines/${MACHINE_DB_ID}/health-check" \
  -d '{"status":"TIMER_OK","success":true,"gpuUtilization":0,"temperature":45,"memoryUsage":0,"cpuUsage":0,"networkLatency":5}' >/dev/null
EOF
  ${SUDO} chmod +x "${script_path}"

  ${SUDO} tee "${service_path}" >/dev/null <<EOF
[Unit]
Description=HF Provider Machine Health Check
After=network-online.target

[Service]
Type=oneshot
ExecStart=${script_path}
EOF

  ${SUDO} tee "${timer_path}" >/dev/null <<EOF
[Unit]
Description=Run HF Provider Machine Health Check every 5 minutes

[Timer]
OnBootSec=2min
OnUnitActiveSec=5min
Unit=hf-provider-healthcheck.service

[Install]
WantedBy=timers.target
EOF

  ${SUDO} systemctl daemon-reload
  ${SUDO} systemctl enable --now hf-provider-healthcheck.timer
  log "Health-check timer enabled: hf-provider-healthcheck.timer"
}

main() {
  ensure_jq || true
  install_prereqs
  ensure_jq

  if ! require_cmd curl; then
    err "curl is required but not found."
    exit 1
  fi

  detect_hardware
  ensure_provider
  create_machine
  mark_online_and_healthcheck
  install_health_timer

  log "Done. Your machine is registered and ready for hosting/rental."
  log "Machine ID: ${MACHINE_DB_ID}"
}

main "$@"
