From ba84a7826870989f7a7f38fd8590aac563d1c69c Mon Sep 17 00:00:00 2001 From: "F.R.I.D.A.Y." Date: Thu, 4 Jun 2026 09:28:50 -0400 Subject: [PATCH] procedures/ansible-playbook: Add NFS client role documentation - Full README.md with task breakdown, inventory targeting, TrueNAS requirements - ADDITIONAL_NOTES.md with per-node key nuances, repogroup mapping, mount opts evolution - Included canonical copies of: inventory.yml, main.yml, roles/nfs_client/tasks/main.yml - Covers TrueNAS maproot/ACL interaction and jarvis write access patterns --- .../ansible-playbook/ADDITIONAL_NOTES.md | 95 ++++++++++ procedures/ansible-playbook/README.md | 174 ++++++++++++++++++ procedures/ansible-playbook/inventory.yml | 140 ++++++++++++++ procedures/ansible-playbook/main.yml | 59 ++++++ .../roles/nfs_client/tasks/main.yml | 59 ++++++ 5 files changed, 527 insertions(+) create mode 100644 procedures/ansible-playbook/ADDITIONAL_NOTES.md create mode 100644 procedures/ansible-playbook/README.md create mode 100644 procedures/ansible-playbook/inventory.yml create mode 100644 procedures/ansible-playbook/main.yml create mode 100644 procedures/ansible-playbook/roles/nfs_client/tasks/main.yml diff --git a/procedures/ansible-playbook/ADDITIONAL_NOTES.md b/procedures/ansible-playbook/ADDITIONAL_NOTES.md new file mode 100644 index 0000000..acb1d6f --- /dev/null +++ b/procedures/ansible-playbook/ADDITIONAL_NOTES.md @@ -0,0 +1,95 @@ +# Additional Notes — Ansible NFS Playbook (Iron Legion) + +**Date:** 2026-06-04 | **Author:** Artemis (AI Foreman) + +--- + +## Nuance 1: `ansible_ssh_private_key_file` per node + +Most fleet nodes use the standard `id_ed25519` key (auto-discovered). Mark44 requires `vscode_ed25519` — the code-server key. Because it's a special case, mark44's inventory block sets: + +```yaml +mark44: + ansible_host: 192.168.5.214 + ansible_user: jarvis + ansible_ssh_private_key_file: /root/.ssh/vscode_ed25519 +``` + +**Don't change this to `id_ed25519`** — mark44's `authorized_keys` only contains: +1. The Termius key (artemis_key) +2. The vscode_ed25519 key + +The artemis_key is NOT auto-discovered by Ansible because the filename is non-standard. Keep the explicit `ansible_ssh_private_key_file` for mark44. + +--- + +## Nuance 2: What the `repogroup` actually is + +`repogroup` is a **local alias** for TrueNAS's `apps` group (GID 568). The mapping works like this: + +| System | Group Name | GID | +|--------|-----------|-----| +| TrueNAS | `apps` | 568 | +| Client | `repogroup` | 568 | + +NFSv4 identity mapping sees the numeric GID only, not the symbolic name. So "jarvis in group 568" on the client maps to "jarvis in group `apps`" on TrueNAS. + +**No TrueNAS-side user creation is needed** on clients. We only need the local group with the matching GID. + +--- + +## Nuance 3: NFS mount opts evolution + +| Stage | Mount opts | Result | +|-------|-----------|--------| +| Old (broken) | `defaults,_netdev` | Mount failed — TrueNAS rejects unversioned (v3) negotiation | +| Current | `vers=4.2,proto=tcp,_netdev` | Mount succeeds; root can RWX | + +The `proto=tcp` is required because UDP negotiation can silently fall back and fail on large packets. + +--- + +## Nuance 4: Why `ansible.posix.mount` instead of `mount` module + +The native Ansible `ansible.posix.mount` module handles idempotency correctly: +- If already mounted at the same `src` + `path` + `opts`, reports `ok` +- If opts don't match, reports `changed` and remounts +- If `state: mounted`, ensures `/etc/fstab` entry is added + +Manual `shell: mount ...` would create duplicate fstab entries. + +--- + +## Nuance 5: TrueNAS server-side `chmod 775` on `/mnt/Ice/Repo` + +This was applied as an emergency fix during debugging. The correct long-term approach would be to add a proper NFS4 ACL entry for `jarvis` (UID 1000) via TrueNAS WebUI or `midclt` API, but the `chmod 775` workaround is sufficient for production. + +**Command used (for record):** +```bash +ssh -i ~/.ssh/artemis_key jarvis@192.168.16.254 'sudo chmod 775 /mnt/Ice/Repo' +``` + +--- + +## Nuance 6: Host targeting syntax edge cases + +Ansible supports two exclusion syntaxes: + +1. **Union + subtraction:** `hosts: fleet_nodes:!pve_hosts:!igor` ✅ Working +2. **Direct group list:** `hosts: physical_agents:core_services:infrastructure` ❌ Broken — `nfs_shares` variable is scoped under `fleet_nodes`, not these child groups + +The inventory variable `nfs_shares` is defined at `fleet_nodes` level. Exclusion from `fleet_nodes` is the only way to get the variable AND exclude specific children. + +--- + +## Nuance 7: Container vs bare-metal execution + +When running Ansible inside the Docker container (`docker exec -it ansible ...`): +- SSH keys mount to `/root/.ssh` inside container +- `ansible.cfg` lives in `/ansible` (container working dir) + +When running Ansible on the host (Artemis bare metal): +- SSH keys at `/home/jarvis/.ssh` +- `ansible.cfg` may be in `/home/jarvis/.ansible-repo` or current dir + +The playbooks are identical but paths may differ. Always run from the project root where `ansible.cfg` and inventory files exist. diff --git a/procedures/ansible-playbook/README.md b/procedures/ansible-playbook/README.md new file mode 100644 index 0000000..5b65f34 --- /dev/null +++ b/procedures/ansible-playbook/README.md @@ -0,0 +1,174 @@ +# Ansible Playbook — NFS Client Role (Iron Legion) + +**Status:** Canonical | **Last updated:** 2026-06-04 + +## 1. Purpose + +Standardized NFS client mounting for fleet Debian nodes. Mounts the TrueNAS `Repo` dataset (`/mnt/Ice/Repo`) to `/home/jarvis/repo` on all non-PVE, non-igor nodes. + +--- + +## 2. Files + +| File | Purpose | +|------|---------| +| `roles/nfs_client/tasks/main.yml` | Role tasks: install package, create dirs, create repogroup, mount NFS, fix permissions | +| `inventory.yml` | Host definitions + `nfs_shares` variable | +| `main.yml` | Playbook entry point: target selection | + +--- + +## 3. Role Task Breakdown + +### 3.1 Install nfs-common + +```yaml +- name: Install nfs-common + ansible.builtin.apt: + name: nfs-common + state: present + become: true + when: ansible_os_family == "Debian" +``` + +- Guard: only runs on Debian family (excludes ZimaOS/igor). + +### 3.2 Create mount directory + +```yaml +- name: Ensure NFS mount directories exists + ansible.builtin.file: + path: "{{ item.path }}" + state: directory + mode: '0755' + owner: jarvis + group: jarvis + become: true + loop: "{{ nfs_shares }}" +``` + +- Owner set to `jarvis` (NOT root) because user jarvis needs to access files after mount. + +### 3.3 Create local `repogroup` (GID 568) + +```yaml +- name: Create local repogroup matching TrueNAS GID 568 + ansible.builtin.group: + name: repogroup + gid: 568 + state: present + become: true +``` + +- TrueNAS `apps` group uses GID 568. Creating a local group with the same GID maps jarvis's supplementary group across the NFSv4 identity boundary. + +### 3.4 Add jarvis to repogroup + +```yaml +- name: Add jarvis to repogroup + ansible.builtin.user: + name: jarvis + groups: + - repogroup + append: true + become: true +``` + +- After relogin (or `sg repogroup`), jarvis inherits group 568 write access. + +### 3.5 Mount NFS (root required) + +```yaml +- name: Mount an NFS volume (root, because kernel mount) + ansible.posix.mount: + src: "{{ item.src }}" + path: "{{ item.path }}" + opts: "vers=4.2,proto=tcp,_netdev" + state: mounted + fstype: nfs + become: true + loop: "{{ nfs_shares }}" +``` + +- Kernel mount requires root. `vers=4.2` required because TrueNAS SCALE 25.10.2 exports NFSv4.2 only; `defaults` fails silently. + +### 3.6 Fix mount permissions + +```yaml +- name: Set mount permissions so jarvis (repogroup member) can write + ansible.builtin.file: + path: "{{ item.path }}" + mode: '0770' + owner: root + group: repogroup + become: true + loop: "{{ nfs_shares }}" +``` + +- Mountpoint inherits remote permissions from TrueNAS, but the underlying local permission layer is `770` with group `repogroup`. + +--- + +## 4. Inventory Host Targeting + +```yaml +- name: Install NFS client + hosts: fleet_nodes:!pve_hosts:!igor + become: false + roles: + - nfs_client +``` + +**Rationale:** +- PVE nodes (`mk33`, `mk34`, `mk39`) already have TrueNAS mounts via Proxmox integration. Don't double-mount. +- `igor` is ZimaOS (non-Debian) and can't run `apt`. +- Group exclusion syntax: `fleet_nodes:!pve_hosts:!igor` + +--- + +## 5. TrueNAS Server-Side Companion + +### Dataset: `/mnt/Ice/Repo` + +| Setting | Value | +|---------|-------| +| NFS version | 4.2 | +| Maproot user | `pveuser` (UID 3003) | +| Dataset owner | `root` (UID 0) | +| Dataset group | `apps` (GID 568) | +| Dataset permissions | `775` | + +**Why 775 on TrueNAS:** +- Without 775, jarvis (who is `other` in the NFS identity mapping) sees `drwxrwx---` and gets permission denied on listing. +- With 775 (`drwxrwxr-x`), jarvis gains `read + execute` through the "other" bit. +- Through the supplementary group path, jarvis gets `read + write` via group 568 after repogroup is applied. + +--- + +## 6. Tested Behavior + +| Action | Result | +|--------|--------| +| `sudo mount` | OK — root mounts, `mountpoint` returns true | +| `ls -la /home/jarvis/repo` | OK — all TrueNAS files visible | +| `touch` without relogin | FAIL — Permission denied (jarvis hasn't picked up new group in current shell) | +| `sg repogroup -c "touch ..."` | OK — works immediately | +| `touch` after relogin | OK — jarvis has repogroup in new shell | + +--- + +## 7. Caveats + +1. **NFSv4 identity mapping** requires supplemental groups. They are NOT transmitted across NFSv4 by default in Linux. The local `repogroup` creation is the workaround. +2. **TrueNAS 775** is the non-Negotiable server-side change. Without it, jarvis gets no access. +3. **Reboot or relogin** required on client after first `repogroup` addition. The group change doesn't apply retroactively to existing sessions. +4. **Kernel mount must be root** — don't try user-space NFS (FUSE). It fails for non-root users without `fusermount3` and proper `/etc/fuse.conf`. + +--- + +## 8. Changelog + +| Date | Change | Author | +|------|--------|--------| +| 2026-06-03 | Initial playbook + inventory validation | Artemis | +| 2026-06-04 | Added repogroup + permission fix after TrueNAS 775 | Artemis | diff --git a/procedures/ansible-playbook/inventory.yml b/procedures/ansible-playbook/inventory.yml new file mode 100644 index 0000000..72fb03e --- /dev/null +++ b/procedures/ansible-playbook/inventory.yml @@ -0,0 +1,140 @@ +# Iron Legion Fleet Inventory +# Generated: 2026-06-03 +# Source: fleet documentation + live SSH config +# +# Usage with Ansible: +# ansible all -m ping -i inventory.yml +# ansible pve_workers -m setup -i inventory.yml +# ansible swarm_manager -a "docker service ls" -i inventory.yml +# +# FIX: Group-specific variables (e.g. pve_workers:) were previously +# placed outside `all:` scope, breaking inventory parsing. +# All group vars are now merged into the group definitions below. + +--- + +all: + vars: + ansible_ssh_private_key_file: /root/.ssh/id_ed25519 + children: + + # ────────────────────────────────────────── + # Physical / Virtual Fleet Nodes + # ────────────────────────────────────────── + + fleet_nodes: + children: + + # Core fleet services + core_services: + hosts: + mk7: + ansible_host: 192.168.7.7 + ansible_user: jarvis + node_role: swarm_manager + docker_host: true + description: "Swarm manager + Traefik + service stack host" + + # PVE hosts nodes + pve_hosts: + vars: + ansible_user: root + ansible_ssh_pass: "proxmox12" + ansible_become: true + ansible_python_interpreter: /usr/bin/python3 + hosts: + mk33: + ansible_host: 192.168.7.33 + node_role: pve_worker + pve_api_url: "https://192.168.7.33:8006/" + description: "PVE Silver Centurion" + + mk34: + ansible_host: 192.168.7.34 + node_role: pve_worker + pve_api_url: "https://192.168.7.34:8006/" + description: "PVE Southpaw" + + mk39: + ansible_host: 192.168.7.39 + node_role: pve_worker + pve_api_url: "https://192.168.7.39:8006/" + description: "PVE Gemini" + + # Active physical agents + physical_agents: + hosts: + artemis: + ansible_host: 192.168.15.182 + ansible_user: jarvis + node_role: discord_gateway + hermes_agent: true + description: "Primary AI orchestrator + Discord gateway" + + mark44: + ansible_host: 192.168.5.214 + ansible_user: jarvis + ansible_ssh_private_key_file: /root/.ssh/vscode_ed25519 + node_role: gpu_host + gpu: true + description: "Hulkbuster — GPU/Ollama standby" + + mark5: + ansible_host: 192.168.6.5 + ansible_user: jarvis + node_role: tbd + description: "Mark 5 — being repurposed" + + mk42: + ansible_host: 192.168.0.196 + ansible_user: jarvis + ansible_become_pass: "ubuntu" + node_role: swarm_worker + description: "Swarm Extremis" + + # Infrastructure / support nodes + infrastructure: + hosts: + shield: + ansible_host: 192.168.27.205 + ansible_user: jarvis + ansible_become_pass: "ubuntu" + node_role: pxe_server + description: "iVentoy PXE deployment server" + + igor: + ansible_host: 192.168.10.211 + ansible_user: jarvis + node_role: nas + description: "ZimaOS NAS (MK-38)" + + vars: + nfs_shares: + - src: "192.168.16.254:/mnt/Ice/Repo" + path: "/home/jarvis/repo" + + # Tailscale fallback aliases (uncomment if LAN fails) + # tailscale_fallback: + # hosts: + # ts-mk7: + # ansible_host: 100.66.70.51 + # ansible_user: jarvis + # ts-mk33: + # ansible_host: 100.125.155.41 + # ansible_user: jarvis + # ts-mk34: + # ansible_host: 100.94.190.43 + # ansible_user: jarvis + # ts-nebuchadnezzar: + # ansible_host: 100.99.123.16 + # ansible_user: jarvis + + # Docker host targeting groups (uncomment when needed) + # docker_hosts: + # children: + # swarm_manager: + # hosts: + # mk7: + # standalone_docker: + # hosts: + # nebuchadnezzar: diff --git a/procedures/ansible-playbook/main.yml b/procedures/ansible-playbook/main.yml new file mode 100644 index 0000000..afa7003 --- /dev/null +++ b/procedures/ansible-playbook/main.yml @@ -0,0 +1,59 @@ +- name: Install nfs-common + ansible.builtin.apt: + name: nfs-common + state: present + become: true + when: ansible_os_family == "Debian" + +- name: Ensure NFS mount directories exists + ansible.builtin.file: + path: "{{ item.path }}" + state: directory + mode: '0755' + owner: jarvis + group: jarvis + become: true + loop: "{{ nfs_shares }}" + loop_control: + label: "Directory: {{ item.path }}" + when: ansible_os_family == "Debian" + +- name: Create local repogroup matching TrueNAS GID 568 + ansible.builtin.group: + name: repogroup + gid: 568 + state: present + become: true + +- name: Add jarvis to repogroup + ansible.builtin.user: + name: jarvis + groups: + - repogroup + append: true + become: true + +- name: Mount an NFS volume (root, because kernel mount) + ansible.posix.mount: + src: "{{ item.src }}" + path: "{{ item.path }}" + opts: "vers=4.2,proto=tcp,_netdev" + state: mounted + fstype: nfs + become: true + loop: "{{ nfs_shares }}" + loop_control: + label: "Mounted: {{ item.src }}" + when: ansible_os_family == "Debian" + +- name: Set mount permissions so jarvis (repogroup member) can write + ansible.builtin.file: + path: "{{ item.path }}" + mode: '0770' + owner: root + group: repogroup + become: true + loop: "{{ nfs_shares }}" + loop_control: + label: "Permission fix: {{ item.path }}" + when: ansible_os_family == "Debian" diff --git a/procedures/ansible-playbook/roles/nfs_client/tasks/main.yml b/procedures/ansible-playbook/roles/nfs_client/tasks/main.yml new file mode 100644 index 0000000..afa7003 --- /dev/null +++ b/procedures/ansible-playbook/roles/nfs_client/tasks/main.yml @@ -0,0 +1,59 @@ +- name: Install nfs-common + ansible.builtin.apt: + name: nfs-common + state: present + become: true + when: ansible_os_family == "Debian" + +- name: Ensure NFS mount directories exists + ansible.builtin.file: + path: "{{ item.path }}" + state: directory + mode: '0755' + owner: jarvis + group: jarvis + become: true + loop: "{{ nfs_shares }}" + loop_control: + label: "Directory: {{ item.path }}" + when: ansible_os_family == "Debian" + +- name: Create local repogroup matching TrueNAS GID 568 + ansible.builtin.group: + name: repogroup + gid: 568 + state: present + become: true + +- name: Add jarvis to repogroup + ansible.builtin.user: + name: jarvis + groups: + - repogroup + append: true + become: true + +- name: Mount an NFS volume (root, because kernel mount) + ansible.posix.mount: + src: "{{ item.src }}" + path: "{{ item.path }}" + opts: "vers=4.2,proto=tcp,_netdev" + state: mounted + fstype: nfs + become: true + loop: "{{ nfs_shares }}" + loop_control: + label: "Mounted: {{ item.src }}" + when: ansible_os_family == "Debian" + +- name: Set mount permissions so jarvis (repogroup member) can write + ansible.builtin.file: + path: "{{ item.path }}" + mode: '0770' + owner: root + group: repogroup + become: true + loop: "{{ nfs_shares }}" + loop_control: + label: "Permission fix: {{ item.path }}" + when: ansible_os_family == "Debian"