fix: emergency mode due to unsed disk
This commit is contained in:
163
infrastructure/ansible/README.md
Normal file
163
infrastructure/ansible/README.md
Normal file
@@ -0,0 +1,163 @@
|
||||
# Ansible Infrastructure
|
||||
|
||||
Ansible Playbooks für die Verwaltung der Infrastruktur.
|
||||
|
||||
## Voraussetzungen
|
||||
|
||||
### Auf dem Management-Host (localhost)
|
||||
|
||||
1. **Ansible installiert:**
|
||||
```bash
|
||||
pip3 install ansible
|
||||
# ODER
|
||||
sudo apt install ansible
|
||||
```
|
||||
|
||||
2. **Ansible Collections installieren:**
|
||||
```bash
|
||||
ansible-galaxy collection install -r requirements.yml
|
||||
```
|
||||
|
||||
3. **Python Dependencies (für Vault-Zugriff):**
|
||||
|
||||
**Option A: pipx (empfohlen für isolierte Installation):**
|
||||
```bash
|
||||
# Installiere pipx falls nicht vorhanden
|
||||
sudo apt install pipx
|
||||
pipx ensurepath
|
||||
|
||||
# Installiere hvac in isoliertem Environment
|
||||
pipx install hvac
|
||||
```
|
||||
|
||||
**Option B: Virtual Environment:**
|
||||
```bash
|
||||
# Erstelle Virtual Environment
|
||||
python3 -m venv ~/.venv/ansible
|
||||
source ~/.venv/ansible/bin/activate
|
||||
|
||||
# Installiere Dependencies
|
||||
pip install -r requirements.txt
|
||||
|
||||
# Nutze Ansible aus Virtual Environment
|
||||
# (Aktivierung vor jedem Ansible-Run nötig)
|
||||
```
|
||||
|
||||
**Option C: System-Paket (falls verfügbar):**
|
||||
```bash
|
||||
sudo apt install python3-hvac
|
||||
```
|
||||
|
||||
**Option D: --break-system-packages (nicht empfohlen):**
|
||||
```bash
|
||||
pip3 install --break-system-packages -r requirements.txt
|
||||
```
|
||||
|
||||
**Wichtig:** Das `hvac` Modul wird für Vault-Lookups benötigt, die auf `localhost` ausgeführt werden (z.B. in `k3s_deploy.yml`).
|
||||
|
||||
### Auf den Managed Hosts (VMs)
|
||||
|
||||
Die VMs müssen per SSH erreichbar sein. Die benötigten Pakete werden automatisch via Ansible installiert.
|
||||
|
||||
## Installation
|
||||
|
||||
### 1. Ansible Collections installieren
|
||||
|
||||
```bash
|
||||
cd infrastructure/infrastructure/ansible
|
||||
ansible-galaxy collection install -r requirements.yml
|
||||
```
|
||||
|
||||
### 2. Python Dependencies installieren
|
||||
|
||||
**Option A: pipx (empfohlen):**
|
||||
```bash
|
||||
sudo apt install pipx
|
||||
pipx ensurepath
|
||||
pipx install hvac
|
||||
```
|
||||
|
||||
**Option B: System-Paket:**
|
||||
```bash
|
||||
sudo apt install python3-hvac
|
||||
```
|
||||
|
||||
**Option C: Virtual Environment:**
|
||||
```bash
|
||||
python3 -m venv ~/.venv/ansible
|
||||
source ~/.venv/ansible/bin/activate
|
||||
pip install -r requirements.txt
|
||||
# Hinweis: Virtual Environment muss vor jedem Ansible-Run aktiviert werden
|
||||
```
|
||||
|
||||
**Wichtig:** Diese Dependencies sind für Vault-Lookups auf `localhost` erforderlich.
|
||||
|
||||
## Verwendung
|
||||
|
||||
### K3s Cluster Deployment
|
||||
|
||||
```bash
|
||||
export VAULT_TOKEN="<your-vault-token>"
|
||||
export VAULT_ADDR="https://10.100.30.11:8200" # Optional, Default in Playbook
|
||||
ansible-playbook -i inventory.ini k3s_deploy.yml
|
||||
```
|
||||
|
||||
### Docker Apps Deployment
|
||||
|
||||
```bash
|
||||
ansible-playbook -i inventory.ini deploy.yml
|
||||
```
|
||||
|
||||
### Longhorn Disk Setup
|
||||
|
||||
```bash
|
||||
ansible-playbook -i inventory.ini setup_longhorn_disks.yml
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### "ModuleNotFoundError: No module named 'hvac'"
|
||||
|
||||
**Problem:** Vault-Lookup schlägt fehl, weil `hvac` nicht installiert ist.
|
||||
|
||||
**Lösung:**
|
||||
```bash
|
||||
pip3 install hvac
|
||||
# ODER
|
||||
pip3 install -r requirements.txt
|
||||
```
|
||||
|
||||
### "sudo: a password is required" bei Vault-Lookup
|
||||
|
||||
**Problem:** Task mit `delegate_to: localhost` versucht sudo zu nutzen.
|
||||
|
||||
**Lösung:** Vault-Tasks sollten `ansible_become: false` haben (bereits implementiert).
|
||||
|
||||
### "Guest Agent not running" in Proxmox
|
||||
|
||||
**Problem:** qemu-guest-agent läuft nicht auf den VMs.
|
||||
|
||||
**Lösung:** Wird automatisch via `common` Role installiert. Siehe [Troubleshooting](../docs/internal/operations/troubleshooting-proxmox-guest-agent.md).
|
||||
|
||||
## Struktur
|
||||
|
||||
```
|
||||
ansible/
|
||||
├── roles/
|
||||
│ ├── common/ # Basis-Setup (Docker, User, CA Certs, Guest Agent)
|
||||
│ ├── k3s/ # K3s Cluster Setup
|
||||
│ └── users/ # User Management
|
||||
├── deploy.yml # Docker Apps Deployment (Push Mode)
|
||||
├── k3s_deploy.yml # K3s Cluster Deployment
|
||||
├── setup_longhorn_disks.yml # Longhorn Disk Setup
|
||||
├── inventory.ini # Production Inventory
|
||||
├── inventory_local.ini # Local Inventory (für Tests)
|
||||
├── requirements.yml # Ansible Collections
|
||||
└── requirements.txt # Python Dependencies (hvac)
|
||||
```
|
||||
|
||||
## Referenzen
|
||||
|
||||
- [Troubleshooting Docs](../../docs/internal/operations/)
|
||||
- [K3s Planning](../../K3S_PLANNING.md)
|
||||
- [Vault Setup](../../setup_k3s_secrets.sh)
|
||||
@@ -1,73 +0,0 @@
|
||||
---
|
||||
- name: Cleanup Rook Ceph Resources (K8s)
|
||||
hosts: k3s_masters[0]
|
||||
become: yes
|
||||
tasks:
|
||||
- name: Delete ArgoCD Applications if they exist
|
||||
shell: kubectl delete application -n argocd rook-ceph-cluster rook-ceph-operator --ignore-not-found
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Delete Rook Ceph Cluster CR
|
||||
shell: kubectl -n rook-ceph delete cephcluster rook-ceph --wait=false --ignore-not-found
|
||||
|
||||
- name: Patch CephCluster finalizer (to force deletion if stuck)
|
||||
shell: |
|
||||
kubectl -n rook-ceph patch cephcluster rook-ceph --type merge -p '{"metadata":{"finalizers": []}}'
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Patch CephBlockPool finalizers
|
||||
shell: |
|
||||
kubectl -n rook-ceph get cephblockpool -o name | xargs -I {} kubectl -n rook-ceph patch {} --type merge -p '{"metadata":{"finalizers": []}}'
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Patch CephObjectStore finalizers
|
||||
shell: |
|
||||
kubectl -n rook-ceph get cephobjectstore -o name | xargs -I {} kubectl -n rook-ceph patch {} --type merge -p '{"metadata":{"finalizers": []}}'
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Patch CephFilesystem finalizers
|
||||
shell: |
|
||||
kubectl -n rook-ceph get cephfilesystem -o name | xargs -I {} kubectl -n rook-ceph patch {} --type merge -p '{"metadata":{"finalizers": []}}'
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Patch all remaining Rook resources finalizers
|
||||
shell: |
|
||||
kubectl api-resources --verbs=list --namespaced -o name | grep ceph.rook.io | xargs -n 1 kubectl get --show-kind --ignore-not-found -n rook-ceph -o name | xargs -r -n 1 kubectl -n rook-ceph patch --type merge -p '{"metadata":{"finalizers": []}}'
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Force delete Namespace rook-ceph (remove finalizers from NS)
|
||||
shell: |
|
||||
kubectl get namespace rook-ceph -o json | jq '.spec.finalizers=[]' | kubectl replace --raw "/api/v1/namespaces/rook-ceph/finalize" -f -
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Delete Rook Ceph Namespace
|
||||
shell: kubectl delete namespace rook-ceph --wait=false --ignore-not-found
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Delete Rook Ceph CRDs (Global cleanup)
|
||||
shell: kubectl delete crd $(kubectl get crd | grep ceph.rook.io | awk '{print $1}')
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Cleanup Rook Ceph Data on Nodes
|
||||
hosts: k3s_masters
|
||||
become: yes
|
||||
tasks:
|
||||
- name: Remove /var/lib/rook directory
|
||||
file:
|
||||
path: /var/lib/rook
|
||||
state: absent
|
||||
force: yes
|
||||
|
||||
# WARNING: These commands will WIPE DATA on /dev/sdb
|
||||
- name: Zap Disk sdb
|
||||
shell: sgdisk --zap-all /dev/sdb || true
|
||||
ignore_errors: yes
|
||||
|
||||
- name: WipeFS sdb
|
||||
shell: wipefs -a /dev/sdb || true
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Mapper clean
|
||||
shell: ls /dev/mapper/ceph-* | xargs -I% -- dmsetup remove %
|
||||
ignore_errors: yes
|
||||
failed_when: false
|
||||
@@ -1,6 +1,6 @@
|
||||
[docker_hosts]
|
||||
vm-docker-apps-301.stabify.de ansible_host=10.100.30.11
|
||||
vm-docker-traefik-302.stabify.de ansible_host=10.100.30.12
|
||||
# vm-docker-traefik-302 entfernt (Traefik Edge läuft jetzt im k3s Cluster)
|
||||
# vm-docker-mailcow-300.stabify.de ansible_host=10.100.30.10
|
||||
|
||||
[k3s_masters]
|
||||
|
||||
@@ -12,9 +12,16 @@
|
||||
- ca-certificates
|
||||
- gnupg
|
||||
- lsb-release
|
||||
- qemu-guest-agent # Proxmox Guest Agent
|
||||
state: present
|
||||
update_cache: true
|
||||
|
||||
- name: "Starte qemu-guest-agent Service"
|
||||
systemd:
|
||||
name: qemu-guest-agent
|
||||
state: started
|
||||
enabled: yes
|
||||
|
||||
- name: "Verteile Stabify Root CA"
|
||||
copy:
|
||||
src: "{{ playbook_dir }}/../../vault-ca.crt" # Relativ zum Playbook-Root (wenn Push)
|
||||
|
||||
@@ -19,6 +19,11 @@
|
||||
enabled: yes
|
||||
state: started
|
||||
|
||||
- name: Wait for /dev/sdb to be available
|
||||
wait_for:
|
||||
path: /dev/sdb
|
||||
timeout: 30
|
||||
|
||||
- name: Check if /dev/sdb exists
|
||||
stat:
|
||||
path: /dev/sdb
|
||||
@@ -26,15 +31,29 @@
|
||||
|
||||
- name: Fail if /dev/sdb is missing
|
||||
fail:
|
||||
msg: "/dev/sdb was not found on this host!"
|
||||
msg: "/dev/sdb was not found on this host! Ensure the disk is attached in Proxmox/Terraform."
|
||||
when: not disk_sdb.stat.exists
|
||||
|
||||
- name: Create ext4 filesystem on /dev/sdb
|
||||
- name: Check if /dev/sdb has a filesystem
|
||||
command: blkid /dev/sdb
|
||||
register: disk_blkid
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Create ext4 filesystem on /dev/sdb (only if no filesystem exists)
|
||||
filesystem:
|
||||
fstype: ext4
|
||||
dev: /dev/sdb
|
||||
# force: yes # Be careful with force, but since we wiped it, it should be fine.
|
||||
# If filesystem already exists (e.g. from a previous partial run), this is idempotent.
|
||||
when: disk_blkid.rc != 0
|
||||
|
||||
- name: Get UUID of /dev/sdb
|
||||
command: blkid -s UUID -o value /dev/sdb
|
||||
register: disk_uuid
|
||||
changed_when: false
|
||||
|
||||
- name: Display UUID for verification
|
||||
debug:
|
||||
msg: "Disk UUID: {{ disk_uuid.stdout }}"
|
||||
|
||||
- name: Create mount point /var/lib/longhorn
|
||||
file:
|
||||
@@ -42,13 +61,13 @@
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Mount /dev/sdb to /var/lib/longhorn
|
||||
- name: Mount disk using UUID to /var/lib/longhorn
|
||||
mount:
|
||||
path: /var/lib/longhorn
|
||||
src: /dev/sdb
|
||||
src: "UUID={{ disk_uuid.stdout }}"
|
||||
fstype: ext4
|
||||
state: mounted
|
||||
opts: defaults,noatime
|
||||
opts: defaults,noatime,nofail
|
||||
|
||||
- name: Display disk usage for /var/lib/longhorn
|
||||
shell: df -h /var/lib/longhorn
|
||||
|
||||
Reference in New Issue
Block a user