Initial fleet ansible config - 2026-05-21T14:00:49-04:00

2026-05-21 14:00:49 -04:00
commit cea2a0ff15
8 changed files with 287 additions and 0 deletions
--- a/README.md
+++ b/README.md
@@ -0,0 +1,35 @@
 # Ansible Pull — Iron Legion Fleet
 Auto-applied Ansible playbooks for the Iron Legion AI agent fleet.
 ## How It Works
 Each node runs `ansible-pull` every 5 minutes via cron. It clones this repo and applies `local.yml` to itself.
 ## Repo Structure
 ```
 .
 ├── local.yml           # Main playbook — always runs
 ├── group_vars/
 │   └── all.yml         # Fleet-wide variables
 ├── host_vars/
 │   ├── artemis.yml     # Artemis (AI Foreman) specific
 │   ├── mark44.yml      # Mark44 (Hulkbuster) specific
 │   ├── mark5.yml       # Mark5 (Suitcase) specific
 │   └── bones.yml       # Bones (Mark XLI) specific
 └── roles/
    └── common/
        └── tasks/
            └── main.yml
 ```
 ## Adding Node-Specific Tasks
 Edit the corresponding `host_vars/` file with node-specific vars (packages, configs). Edit `local.yml` for shared tasks that apply to all nodes.
 ## Security
 - HTTPS auth via deploy token stored in `/etc/ansible/ansible.env`
 - Token is root-readable only (chmod 600)
 - Gitea provides TLS via NetBird mesh
--- a/group_vars/all.yml
+++ b/group_vars/all.yml
@@ -0,0 +1,10 @@
 ---
 # Fleet-wide defaults applied to ALL nodes
 # Schedule for ansible-pull cron job
 ansible_pull_cron_schedule: "*/5 * * * *"
 # Gitea repo configuration
 gitea_base_url: "gitea.nb.bobbysh.me"
 gitea_org: "Iron-Legion"
 gitea_repo: "ansible-pull-deploy"
--- a/host_vars/artemis.yml
+++ b/host_vars/artemis.yml
@@ -0,0 +1,31 @@
 ---
 # Artemis (AI Foreman) — Control node, no NVIDIA GPU
 node_type: foreman
 has_gpu: false
 # Artemis-specific packages (monitoring and control)
 extra_packages:
  - nvtop                    # GPU monitoring (uses AMD iGPU info if available)
  - nethogs                  # Per-process network monitoring
  - iotop                    # Per-process I/O monitoring
  - lm-sensors               # Temperature/fan monitoring
  - stress-ng                # Load testing
  - cockpit                  # Web-based system management
 # Services to manage (not auto-started, just ensure packages installed)
 managed_services:
  - name: hermes-gateway
    enabled: true
  - name: hermes-dashboard
    enabled: true
 # Ollama models for Artemis (CPU inference, small models only)
 ollama_models:
  - gemma3:4b                # Small enough for CPU
  - phi4-mini:latest         # Tiny, fast
 # Hermes configuration
 hermes_config:
  provider: openrouter
  model: openai/gpt-4o-mini
  context_length: 128000
--- a/host_vars/bones.yml
+++ b/host_vars/bones.yml
@@ -0,0 +1,27 @@
 ---
 # Bones (Mark XLI) — Headless CPU-only node
 node_type: headless
 has_gpu: false
 # Headless essentials
 extra_packages:
  - cpufrequtils              # CPU frequency management
  - lm-sensors               # Temperature monitoring
  - smartmontools             # Disk health monitoring
  - hdparm                    # Disk performance tuning
  - netdata                   # lightweight monitoring (optional)
 # Services managed on Bones
 managed_services:
  - name: jarvis              # Paperclip + Ollama + PostgreSQL stack
    enabled: true
  - name: ollama              # CPU inference only
    enabled: true
 # Ollama config (CPU mode, very small models)
 ollama_models:
  - gemma3:1b                # Ultra-tiny for CPU
 # Node-specific vars
 bones_storage: "256GB SSD"
 jvm_heap: "512m"
--- a/host_vars/hulkbuster.yml
+++ b/host_vars/hulkbuster.yml
@@ -0,0 +1,32 @@
 ---
 # Mark44 (Hulkbuster) — Heavy GPU compute node
 node_type: gpu_heavy
 has_gpu: true
 gpu_type: nvidia
 gpu_model: "RTX 4070"
 vram_mb: 12282
 # GPU-specific tools (not drivers — Mark44 uses proprietary NVIDIA package)
 extra_packages:
  - nvtop                     # GPU monitoring
  - nethogs                   # Network per-process monitoring
  - iotop                     # I/O per-process monitoring
 # Ollama models — largest VRAM headroom, can run big models
 ollama_models:
  - gemma4:e4b               # Already pulled — keep it
  - qwen2.5-coder:14b        # Primary coding model
  - qwen2.5:14b             # General purpose large model
 # Services
 managed_services:
  - name: ollama
    enabled: true
  - name: open-webui
    enabled: true
  - name: hermes-gateway
    enabled: true
 # Ollama port override (standard)
 ollama_port: 11434
 open_webui_port: 8080
--- a/host_vars/mark5.yml
+++ b/host_vars/mark5.yml
@@ -0,0 +1,30 @@
 ---
 # Mark5 (Suitcase) — Mobile/light GPU node
 node_type: gpu_light
 has_gpu: true
 gpu_type: nvidia
 gpu_model: "RTX 4060 Laptop"
 vram_mb: 8188
 # Laptop-specific packages
 extra_packages:
  - nvtop                     # GPU monitoring
  - powertop                  # Power management analysis
  - tlp                       # Laptop power management
  - htop                      # Already baseline, ensure present
 # Ollama models — limited VRAM, smaller models only
 ollama_models:
  - qwen2.5-coder:7b         # Small coding model
  - gemma3:4b                # Tiny, fast
  - llama3.1:8b              # Balanced
 # Services
 managed_services:
  - name: ollama
    enabled: true
  - name: hermes-gateway
    enabled: true
 # Ollama port
 ollama_port: 11434
--- a/host_vars/nebuchadnezzar.yml
+++ b/host_vars/nebuchadnezzar.yml
@@ -0,0 +1,28 @@
 ---
 # Neo (Nebuchadnezzar) — Services node
 # Nextcloud AIO + Vaultwarden ONLY. Debian.
 node_type: services
 has_gpu: false
 # Services-specific packages
 extra_packages:
  - docker.io                # Nextcloud AIO is container-based
  - docker-compose           # Compose for multi-service stacks
  - apache2-utils            # htpasswd for Vaultwarden basic auth
  - certbot                  # Let's Encrypt automation
  - cron                     # Ensure cron is present
 # Services managed on Neo
 managed_services:
  - name: nextcloud-aio
    enabled: true
  - name: vaultwarden
    enabled: true
 # Ollama models: none — not an inference node
 ollama_models: []
 # Neo-specific facts
 neo_services:
  - nextcloud_aio
  - vaultwarden
--- a/local.yml
+++ b/local.yml
@@ -0,0 +1,94 @@
 ---
 - hosts: localhost
  connection: local
  become: true
  tasks:
    # Load host-specific vars by actual system hostname
    - name: Load host-specific variables
      include_vars:
        file: "host_vars/{{ ansible_hostname | lower }}.yml"
      ignore_errors: true
      tags: [vars, always]
    - name: Print start message
      debug:
        msg: "Ansible Pull running on {{ ansible_hostname }} ({{ inventory_hostname }}) — role: {{ node_type | default('unspecified') }}"
    # --- ALL NODES: baseline ---
    - name: Ensure apt packages are updated
      apt:
        update_cache: yes
        cache_valid_time: 3600
      when: ansible_os_family == "Debian"
      tags: [baseline]
    - name: Ensure common packages installed
      apt:
        name:
          - curl
          - git
          - htop
          - tmux
          - jq
          - vim
          - python3-pip
        state: present
      when: ansible_os_family == "Debian"
      tags: [baseline]
    # --- NODE-SPECIFIC: extra packages ---
    - name: Ensure node-specific extra packages installed
      apt:
        name: "{{ extra_packages }}"
        state: present
      when:
        - ansible_os_family == "Debian"
        - extra_packages is defined
        - extra_packages | length > 0
      tags: [node_specific]
    # --- NODE-SPECIFIC: Ollama model management ---
    - name: Ensure Ollama is installed
      command: which ollama
      register: ollama_check
      ignore_errors: true
      changed_when: false
      tags: [ollama]
    - name: Pull node-specific Ollama models
      command: "ollama pull {{ item }}"
      loop: "{{ ollama_models }}"
      when:
        - ollama_check.rc == 0
        - ollama_models is defined
        - ollama_models | length > 0
      register: ollama_pull_result
      tags: [ollama]
    # --- NODE-SPECIFIC: Service management (placeholder) ---
    - name: Ensure managed services are enabled
      systemd:
        name: "{{ item.name }}"
        enabled: "{{ item.enabled | default(true) }}"
      loop: "{{ managed_services }}"
      when:
        - managed_services is defined
        - managed_services | length > 0
      ignore_errors: true
      tags: [services]
    # --- Artemis-specific: monitoring dashboard ---
    - name: Ensure Artemis cockpit available
      apt:
        name:
          - cockpit
          - cockpit-pcp
        state: present
      when:
        - inventory_hostname == "artemis.ai.home" or ansible_hostname == "artemis"
        - ansible_os_family == "Debian"
      tags: [artemis]
    - name: Print completion message
      debug:
        msg: "Baseline complete on {{ ansible_hostname }} — node_type={{ node_type | default('unspecified') }}, gpu={{ has_gpu | default(false) }}"