Initial fleet ansible config - 2026-05-21T14:00:49-04:00
This commit is contained in:
35
README.md
Normal file
35
README.md
Normal file
@@ -0,0 +1,35 @@
|
||||
# Ansible Pull — Iron Legion Fleet
|
||||
|
||||
Auto-applied Ansible playbooks for the Iron Legion AI agent fleet.
|
||||
|
||||
## How It Works
|
||||
|
||||
Each node runs `ansible-pull` every 5 minutes via cron. It clones this repo and applies `local.yml` to itself.
|
||||
|
||||
## Repo Structure
|
||||
|
||||
```
|
||||
.
|
||||
├── local.yml # Main playbook — always runs
|
||||
├── group_vars/
|
||||
│ └── all.yml # Fleet-wide variables
|
||||
├── host_vars/
|
||||
│ ├── artemis.yml # Artemis (AI Foreman) specific
|
||||
│ ├── mark44.yml # Mark44 (Hulkbuster) specific
|
||||
│ ├── mark5.yml # Mark5 (Suitcase) specific
|
||||
│ └── bones.yml # Bones (Mark XLI) specific
|
||||
└── roles/
|
||||
└── common/
|
||||
└── tasks/
|
||||
└── main.yml
|
||||
```
|
||||
|
||||
## Adding Node-Specific Tasks
|
||||
|
||||
Edit the corresponding `host_vars/` file with node-specific vars (packages, configs). Edit `local.yml` for shared tasks that apply to all nodes.
|
||||
|
||||
## Security
|
||||
|
||||
- HTTPS auth via deploy token stored in `/etc/ansible/ansible.env`
|
||||
- Token is root-readable only (chmod 600)
|
||||
- Gitea provides TLS via NetBird mesh
|
||||
10
group_vars/all.yml
Normal file
10
group_vars/all.yml
Normal file
@@ -0,0 +1,10 @@
|
||||
---
|
||||
# Fleet-wide defaults applied to ALL nodes
|
||||
|
||||
# Schedule for ansible-pull cron job
|
||||
ansible_pull_cron_schedule: "*/5 * * * *"
|
||||
|
||||
# Gitea repo configuration
|
||||
gitea_base_url: "gitea.nb.bobbysh.me"
|
||||
gitea_org: "Iron-Legion"
|
||||
gitea_repo: "ansible-pull-deploy"
|
||||
31
host_vars/artemis.yml
Normal file
31
host_vars/artemis.yml
Normal file
@@ -0,0 +1,31 @@
|
||||
---
|
||||
# Artemis (AI Foreman) — Control node, no NVIDIA GPU
|
||||
node_type: foreman
|
||||
has_gpu: false
|
||||
|
||||
# Artemis-specific packages (monitoring and control)
|
||||
extra_packages:
|
||||
- nvtop # GPU monitoring (uses AMD iGPU info if available)
|
||||
- nethogs # Per-process network monitoring
|
||||
- iotop # Per-process I/O monitoring
|
||||
- lm-sensors # Temperature/fan monitoring
|
||||
- stress-ng # Load testing
|
||||
- cockpit # Web-based system management
|
||||
|
||||
# Services to manage (not auto-started, just ensure packages installed)
|
||||
managed_services:
|
||||
- name: hermes-gateway
|
||||
enabled: true
|
||||
- name: hermes-dashboard
|
||||
enabled: true
|
||||
|
||||
# Ollama models for Artemis (CPU inference, small models only)
|
||||
ollama_models:
|
||||
- gemma3:4b # Small enough for CPU
|
||||
- phi4-mini:latest # Tiny, fast
|
||||
|
||||
# Hermes configuration
|
||||
hermes_config:
|
||||
provider: openrouter
|
||||
model: openai/gpt-4o-mini
|
||||
context_length: 128000
|
||||
27
host_vars/bones.yml
Normal file
27
host_vars/bones.yml
Normal file
@@ -0,0 +1,27 @@
|
||||
---
|
||||
# Bones (Mark XLI) — Headless CPU-only node
|
||||
node_type: headless
|
||||
has_gpu: false
|
||||
|
||||
# Headless essentials
|
||||
extra_packages:
|
||||
- cpufrequtils # CPU frequency management
|
||||
- lm-sensors # Temperature monitoring
|
||||
- smartmontools # Disk health monitoring
|
||||
- hdparm # Disk performance tuning
|
||||
- netdata # lightweight monitoring (optional)
|
||||
|
||||
# Services managed on Bones
|
||||
managed_services:
|
||||
- name: jarvis # Paperclip + Ollama + PostgreSQL stack
|
||||
enabled: true
|
||||
- name: ollama # CPU inference only
|
||||
enabled: true
|
||||
|
||||
# Ollama config (CPU mode, very small models)
|
||||
ollama_models:
|
||||
- gemma3:1b # Ultra-tiny for CPU
|
||||
|
||||
# Node-specific vars
|
||||
bones_storage: "256GB SSD"
|
||||
jvm_heap: "512m"
|
||||
32
host_vars/hulkbuster.yml
Normal file
32
host_vars/hulkbuster.yml
Normal file
@@ -0,0 +1,32 @@
|
||||
---
|
||||
# Mark44 (Hulkbuster) — Heavy GPU compute node
|
||||
node_type: gpu_heavy
|
||||
has_gpu: true
|
||||
gpu_type: nvidia
|
||||
gpu_model: "RTX 4070"
|
||||
vram_mb: 12282
|
||||
|
||||
# GPU-specific tools (not drivers — Mark44 uses proprietary NVIDIA package)
|
||||
extra_packages:
|
||||
- nvtop # GPU monitoring
|
||||
- nethogs # Network per-process monitoring
|
||||
- iotop # I/O per-process monitoring
|
||||
|
||||
# Ollama models — largest VRAM headroom, can run big models
|
||||
ollama_models:
|
||||
- gemma4:e4b # Already pulled — keep it
|
||||
- qwen2.5-coder:14b # Primary coding model
|
||||
- qwen2.5:14b # General purpose large model
|
||||
|
||||
# Services
|
||||
managed_services:
|
||||
- name: ollama
|
||||
enabled: true
|
||||
- name: open-webui
|
||||
enabled: true
|
||||
- name: hermes-gateway
|
||||
enabled: true
|
||||
|
||||
# Ollama port override (standard)
|
||||
ollama_port: 11434
|
||||
open_webui_port: 8080
|
||||
30
host_vars/mark5.yml
Normal file
30
host_vars/mark5.yml
Normal file
@@ -0,0 +1,30 @@
|
||||
---
|
||||
# Mark5 (Suitcase) — Mobile/light GPU node
|
||||
node_type: gpu_light
|
||||
has_gpu: true
|
||||
gpu_type: nvidia
|
||||
gpu_model: "RTX 4060 Laptop"
|
||||
vram_mb: 8188
|
||||
|
||||
# Laptop-specific packages
|
||||
extra_packages:
|
||||
- nvtop # GPU monitoring
|
||||
- powertop # Power management analysis
|
||||
- tlp # Laptop power management
|
||||
- htop # Already baseline, ensure present
|
||||
|
||||
# Ollama models — limited VRAM, smaller models only
|
||||
ollama_models:
|
||||
- qwen2.5-coder:7b # Small coding model
|
||||
- gemma3:4b # Tiny, fast
|
||||
- llama3.1:8b # Balanced
|
||||
|
||||
# Services
|
||||
managed_services:
|
||||
- name: ollama
|
||||
enabled: true
|
||||
- name: hermes-gateway
|
||||
enabled: true
|
||||
|
||||
# Ollama port
|
||||
ollama_port: 11434
|
||||
28
host_vars/nebuchadnezzar.yml
Normal file
28
host_vars/nebuchadnezzar.yml
Normal file
@@ -0,0 +1,28 @@
|
||||
---
|
||||
# Neo (Nebuchadnezzar) — Services node
|
||||
# Nextcloud AIO + Vaultwarden ONLY. Debian.
|
||||
node_type: services
|
||||
has_gpu: false
|
||||
|
||||
# Services-specific packages
|
||||
extra_packages:
|
||||
- docker.io # Nextcloud AIO is container-based
|
||||
- docker-compose # Compose for multi-service stacks
|
||||
- apache2-utils # htpasswd for Vaultwarden basic auth
|
||||
- certbot # Let's Encrypt automation
|
||||
- cron # Ensure cron is present
|
||||
|
||||
# Services managed on Neo
|
||||
managed_services:
|
||||
- name: nextcloud-aio
|
||||
enabled: true
|
||||
- name: vaultwarden
|
||||
enabled: true
|
||||
|
||||
# Ollama models: none — not an inference node
|
||||
ollama_models: []
|
||||
|
||||
# Neo-specific facts
|
||||
neo_services:
|
||||
- nextcloud_aio
|
||||
- vaultwarden
|
||||
94
local.yml
Normal file
94
local.yml
Normal file
@@ -0,0 +1,94 @@
|
||||
---
|
||||
- hosts: localhost
|
||||
connection: local
|
||||
become: true
|
||||
tasks:
|
||||
# Load host-specific vars by actual system hostname
|
||||
- name: Load host-specific variables
|
||||
include_vars:
|
||||
file: "host_vars/{{ ansible_hostname | lower }}.yml"
|
||||
ignore_errors: true
|
||||
tags: [vars, always]
|
||||
|
||||
- name: Print start message
|
||||
debug:
|
||||
msg: "Ansible Pull running on {{ ansible_hostname }} ({{ inventory_hostname }}) — role: {{ node_type | default('unspecified') }}"
|
||||
|
||||
# --- ALL NODES: baseline ---
|
||||
- name: Ensure apt packages are updated
|
||||
apt:
|
||||
update_cache: yes
|
||||
cache_valid_time: 3600
|
||||
when: ansible_os_family == "Debian"
|
||||
tags: [baseline]
|
||||
|
||||
- name: Ensure common packages installed
|
||||
apt:
|
||||
name:
|
||||
- curl
|
||||
- git
|
||||
- htop
|
||||
- tmux
|
||||
- jq
|
||||
- vim
|
||||
- python3-pip
|
||||
state: present
|
||||
when: ansible_os_family == "Debian"
|
||||
tags: [baseline]
|
||||
|
||||
# --- NODE-SPECIFIC: extra packages ---
|
||||
- name: Ensure node-specific extra packages installed
|
||||
apt:
|
||||
name: "{{ extra_packages }}"
|
||||
state: present
|
||||
when:
|
||||
- ansible_os_family == "Debian"
|
||||
- extra_packages is defined
|
||||
- extra_packages | length > 0
|
||||
tags: [node_specific]
|
||||
|
||||
# --- NODE-SPECIFIC: Ollama model management ---
|
||||
- name: Ensure Ollama is installed
|
||||
command: which ollama
|
||||
register: ollama_check
|
||||
ignore_errors: true
|
||||
changed_when: false
|
||||
tags: [ollama]
|
||||
|
||||
- name: Pull node-specific Ollama models
|
||||
command: "ollama pull {{ item }}"
|
||||
loop: "{{ ollama_models }}"
|
||||
when:
|
||||
- ollama_check.rc == 0
|
||||
- ollama_models is defined
|
||||
- ollama_models | length > 0
|
||||
register: ollama_pull_result
|
||||
tags: [ollama]
|
||||
|
||||
# --- NODE-SPECIFIC: Service management (placeholder) ---
|
||||
- name: Ensure managed services are enabled
|
||||
systemd:
|
||||
name: "{{ item.name }}"
|
||||
enabled: "{{ item.enabled | default(true) }}"
|
||||
loop: "{{ managed_services }}"
|
||||
when:
|
||||
- managed_services is defined
|
||||
- managed_services | length > 0
|
||||
ignore_errors: true
|
||||
tags: [services]
|
||||
|
||||
# --- Artemis-specific: monitoring dashboard ---
|
||||
- name: Ensure Artemis cockpit available
|
||||
apt:
|
||||
name:
|
||||
- cockpit
|
||||
- cockpit-pcp
|
||||
state: present
|
||||
when:
|
||||
- inventory_hostname == "artemis.ai.home" or ansible_hostname == "artemis"
|
||||
- ansible_os_family == "Debian"
|
||||
tags: [artemis]
|
||||
|
||||
- name: Print completion message
|
||||
debug:
|
||||
msg: "Baseline complete on {{ ansible_hostname }} — node_type={{ node_type | default('unspecified') }}, gpu={{ has_gpu | default(false) }}"
|
||||
Reference in New Issue
Block a user