phase45 doctrine183 disaster-recovery v2 deep + intent opus_disaster_deep_clean | GOLD preserved | 5 triggers: deep_clean disaster_deep aggressive_disk disk_deep cleanup_aggressive | cmd: bash opus-disaster-recovery.sh deep
Some checks failed
WEVAL NonReg / nonreg (push) Has been cancelled

This commit is contained in:
Opus
2026-04-24 14:41:33 +02:00
parent ec607b7e51
commit f877e90e38
2 changed files with 238 additions and 0 deletions

View File

@@ -0,0 +1,143 @@
#!/bin/bash
# WEVIA Meta-Disaster-Recovery v2 — WEVIA répare elle-même son infra
# Usage: opus-disaster-recovery.sh [status|chrome|disk|deep|intents|all|deep-all]
# v2 24avr2026 : ajout subcommand deep (cleanup agressif) + kill orphan greps
ACTION="${1:-status}"
load_now() { uptime | awk -F'load average:' '{print $2}' | awk -F, '{print $1}' | tr -d ' '; }
disk_pct() { df -h / | tail -1 | awk '{print $5}' | tr -d '%'; }
disk_avail() { df -h / | tail -1 | awk '{print $4}'; }
chrome_count() { ps aux | grep -E "chrome|chromium" | grep -v grep | wc -l; }
intents_count() { python3 -c 'import json; d=json.load(open("/opt/wevia-brain/priority-intents-nl.json")); print(len(d))' 2>/dev/null || echo "CORRUPTED"; }
case "$ACTION" in
status)
echo "=== DISASTER RECOVERY STATUS ==="
echo "load: $(load_now)"
echo "disk: $(disk_pct)% ($(disk_avail) avail)"
echo "chrome: $(chrome_count)"
echo "intents: $(intents_count)"
L=$(load_now | cut -d. -f1)
D=$(disk_pct)
C=$(chrome_count)
ALARMS=""
[ "$L" -gt 30 ] && ALARMS="$ALARMS LOAD_HIGH"
[ "$D" -gt 85 ] && ALARMS="$ALARMS DISK_HIGH"
[ "$C" -gt 50 ] && ALARMS="$ALARMS CHROME_LEAK"
[ -z "$ALARMS" ] && ALARMS="NONE"
echo "alarms:$ALARMS"
;;
chrome)
echo "=== CHROME CLEANUP ==="
BEFORE=$(chrome_count)
sudo pkill -9 -f chrome 2>&1 | head -1
sudo pkill -9 -f chromium 2>&1 | head -1
sleep 3
AFTER=$(chrome_count)
echo "chrome: ${BEFORE} -> ${AFTER}"
echo "load: $(load_now)"
;;
disk)
echo "=== DISK CLEANUP (standard) ==="
BEFORE=$(disk_pct)
sudo docker system prune -f 2>&1 | tail -3
sudo find /tmp -type f -atime +3 ! -path "*/nuclei*" -delete 2>/dev/null
sudo find /tmp -maxdepth 2 -type d -name "playwright_*" -mmin +30 -exec rm -rf {} \; 2>/dev/null
for F in $(sudo find /var/log -type f -size +100M 2>/dev/null); do
sudo truncate -s 10M "$F"
done
cd /opt/wevads/vault 2>/dev/null && ls -t db-backup-*.sql.gz 2>/dev/null | tail -n +2 | xargs -r sudo rm -f
AFTER=$(disk_pct)
echo "disk: ${BEFORE}% -> ${AFTER}%"
;;
deep)
echo "=== DISK CLEANUP DEEP (agressif, safe) ==="
BEFORE=$(disk_pct)
echo "avail_before: $(disk_avail)"
echo "--- 1/8 kill orphan greps >120s ---"
for PID in $(ps -eo pid,etimes,cmd --no-headers | awk '$2>120 && /grep -rn/ && !/grep -rn/{print $1}' 2>/dev/null); do
sudo kill -9 "$PID" 2>/dev/null && echo "killed grep $PID"
done
ps -eo pid,etimes,cmd --no-headers | awk '$2>120 && $3=="/usr/bin/grep"{print $1}' | while read PID; do
sudo kill -9 "$PID" 2>/dev/null && echo "killed grep $PID"
done
echo "--- 2/8 docker prune -a --volumes (unused images+volumes) ---"
sudo docker system prune -a -f --volumes 2>&1 | tail -5
echo "--- 3/8 journalctl vacuum 3 jours ---"
sudo journalctl --vacuum-time=3d 2>&1 | tail -3
echo "--- 4/8 /tmp atime+1 (hors nuclei) ---"
sudo find /tmp -type f -atime +1 ! -path "*/nuclei*" -delete 2>/dev/null
sudo find /tmp -maxdepth 3 -type d -empty -delete 2>/dev/null
echo "--- 5/8 /var/log truncate >50M ---"
for F in $(sudo find /var/log -type f -size +50M 2>/dev/null); do
SZ=$(stat -c%s "$F")
sudo truncate -s 5M "$F"
echo "trunc $F ($SZ -> 5M)"
done
echo "--- 6/8 proofs old webm >7j delete ---"
sudo find /var/www/html/proofs -type f -name "*.webm" -mtime +7 -delete 2>/dev/null
sudo find /var/www/html/proofs -type f -name "*.png" -mtime +14 -delete 2>/dev/null
sudo find /var/www/html/proofs -type d -empty -delete 2>/dev/null
echo "--- 7/8 apt cache + old kernels ---"
sudo apt-get clean 2>&1 | tail -1
sudo dpkg -l 'linux-image-*' 2>/dev/null | awk '/^ii/ && !/'"$(uname -r)"'/{print $2}' | head -3 | xargs -r sudo apt-get -y purge 2>&1 | tail -2
echo "--- 8/8 pip/npm caches ---"
sudo rm -rf /root/.cache/pip /root/.npm/_cacache 2>/dev/null
sudo rm -rf /var/www/.cache/pip 2>/dev/null
AFTER=$(disk_pct)
echo "=== RESULT ==="
echo "disk: ${BEFORE}% -> ${AFTER}%"
echo "avail_after: $(disk_avail)"
;;
intents)
echo "=== INTENT JSON HEAL ==="
SIZE=$(stat -c%s /opt/wevia-brain/priority-intents-nl.json 2>/dev/null)
if [ "$SIZE" -lt 1000 ]; then
echo "CORRUPTED (size=${SIZE}B) - restoring from latest bak"
BAK=$(ls -t /opt/wevia-brain/priority-intents-nl.json.bak* 2>/dev/null | head -1)
if [ -n "$BAK" ]; then
sudo cp "$BAK" /opt/wevia-brain/priority-intents-nl.json
sudo chown www-data:www-data /opt/wevia-brain/priority-intents-nl.json
NEW_COUNT=$(intents_count)
echo "RESTORED from $BAK -> $NEW_COUNT intents"
else
echo "FAIL: no bak found"
fi
else
echo "OK ($SIZE bytes, $(intents_count) intents)"
fi
;;
all)
bash "$0" status
echo
bash "$0" chrome
echo
bash "$0" disk
echo
bash "$0" intents
echo
bash "$0" status
;;
deep-all)
bash "$0" status
echo
bash "$0" chrome
echo
bash "$0" deep
echo
bash "$0" intents
echo
bash "$0" status
;;
*)
echo "usage: $0 {status|chrome|disk|deep|intents|all|deep-all}"
;;
esac

View File

@@ -0,0 +1,95 @@
#!/bin/bash
# WEVIA Meta-Disaster-Recovery — WEVIA répare elle-même son infra
# Usage: opus-disaster-recovery.sh [full|chrome|disk|intents|all]
ACTION="${1:-status}"
load_now() { uptime | awk -F'load average:' '{print $2}' | awk -F, '{print $1}' | tr -d ' '; }
disk_pct() { df -h / | tail -1 | awk '{print $5}' | tr -d '%'; }
chrome_count() { ps aux | grep -E "chrome|chromium" | grep -v grep | wc -l; }
intents_count() { python3 -c 'import json; d=json.load(open("/opt/wevia-brain/priority-intents-nl.json")); print(len(d))' 2>/dev/null || echo "CORRUPTED"; }
case "$ACTION" in
status)
echo "=== DISASTER RECOVERY STATUS ==="
echo "load: $(load_now)"
echo "disk: $(disk_pct)%"
echo "chrome: $(chrome_count)"
echo "intents: $(intents_count)"
# Alarm levels
L=$(load_now | cut -d. -f1)
D=$(disk_pct)
C=$(chrome_count)
ALARMS=""
[ "$L" -gt 30 ] && ALARMS="$ALARMS LOAD_HIGH"
[ "$D" -gt 85 ] && ALARMS="$ALARMS DISK_HIGH"
[ "$C" -gt 50 ] && ALARMS="$ALARMS CHROME_LEAK"
[ -z "$ALARMS" ] && ALARMS="NONE"
echo "alarms:$ALARMS"
;;
chrome)
echo "=== CHROME CLEANUP ==="
BEFORE=$(chrome_count)
sudo pkill -9 -f chrome 2>&1 | head -1
sudo pkill -9 -f chromium 2>&1 | head -1
sleep 3
AFTER=$(chrome_count)
echo "chrome: ${BEFORE} → ${AFTER}"
echo "load: $(load_now)"
;;
disk)
echo "=== DISK CLEANUP ==="
BEFORE=$(disk_pct)
# 1. docker prune
sudo docker system prune -f 2>&1 | tail -3
# 2. tmp cleanup (skip nuclei running)
sudo find /tmp -type f -atime +3 ! -path "*/nuclei*" -delete 2>/dev/null
# 3. Playwright profile dirs (orphan)
sudo find /tmp -maxdepth 2 -type d -name "playwright_*" -mmin +30 -exec rm -rf {} \; 2>/dev/null
# 4. Log truncate > 100M
for F in $(sudo find /var/log -type f -size +100M 2>/dev/null); do
sudo truncate -s 10M "$F"
done
# 5. Old DB backups keep latest
cd /opt/wevads/vault 2>/dev/null && ls -t db-backup-*.sql.gz 2>/dev/null | tail -n +2 | xargs -r sudo rm -f
AFTER=$(disk_pct)
echo "disk: ${BEFORE}% → ${AFTER}%"
;;
intents)
echo "=== INTENT JSON HEAL ==="
SIZE=$(stat -c%s /opt/wevia-brain/priority-intents-nl.json 2>/dev/null)
if [ "$SIZE" -lt 1000 ]; then
echo "CORRUPTED (size=${SIZE}B) - restoring from latest bak"
BAK=$(ls -t /opt/wevia-brain/priority-intents-nl.json.bak* 2>/dev/null | head -1)
if [ -n "$BAK" ]; then
sudo cp "$BAK" /opt/wevia-brain/priority-intents-nl.json
sudo chown www-data:www-data /opt/wevia-brain/priority-intents-nl.json
NEW_COUNT=$(intents_count)
echo "RESTORED from $BAK → $NEW_COUNT intents"
else
echo "FAIL: no bak found"
fi
else
echo "OK ($SIZE bytes, $(intents_count) intents)"
fi
;;
all)
bash "$0" status
echo
bash "$0" chrome
echo
bash "$0" disk
echo
bash "$0" intents
echo
bash "$0" status
;;
*)
echo "usage: $0 {status|chrome|disk|intents|all}"
;;
esac