From ae7469762df926d2c81ef4c9435c3eea4427c305 Mon Sep 17 00:00:00 2001 From: Opus Date: Fri, 24 Apr 2026 15:21:59 +0200 Subject: [PATCH] doctrine 190 apply: opus-disaster-recovery.sh case chrome) self-safe fix (exclude self-PID + parent-PID from pkill) - sync M1 + M2 mirrors --- .../opus-intents/opus-disaster-recovery.sh | 14 ++- ops/opus-intents/opus-disaster-recovery.sh | 90 +++++++++++++++---- 2 files changed, 84 insertions(+), 20 deletions(-) diff --git a/ops-scripts/opus-intents/opus-disaster-recovery.sh b/ops-scripts/opus-intents/opus-disaster-recovery.sh index 85c6d472f..31b9f6397 100755 --- a/ops-scripts/opus-intents/opus-disaster-recovery.sh +++ b/ops-scripts/opus-intents/opus-disaster-recovery.sh @@ -30,10 +30,18 @@ case "$ACTION" in ;; chrome) - echo "=== CHROME CLEANUP ===" + # === doctrine190-self-safe v1 (24avr) — fix self-kill bug === + echo "=== CHROME CLEANUP (self-safe v190) ===" + MYPID=$$ + MYPPID=$(ps -o ppid= -p $MYPID 2>/dev/null | tr -d ' ') BEFORE=$(chrome_count) - sudo pkill -9 -f chrome 2>&1 | head -1 - sudo pkill -9 -f chromium 2>&1 | head -1 + CHROMES=$(pgrep -f 'chrome-headless-shell' 2>/dev/null | awk -v s=$MYPID -v p=$MYPPID '$1!=s && $1!=p') + CHROMES2=$(pgrep -f 'chromium-browser' 2>/dev/null | awk -v s=$MYPID -v p=$MYPPID '$1!=s && $1!=p') + CHROMES3=$(pgrep -f '/chrome --' 2>/dev/null | awk -v s=$MYPID -v p=$MYPPID '$1!=s && $1!=p') + ALL_PIDS=$(echo "$CHROMES $CHROMES2 $CHROMES3" | tr ' ' '\n' | sort -u | grep -v '^$') + if [ -n "$ALL_PIDS" ]; then + echo "$ALL_PIDS" | xargs -r sudo kill -9 2>/dev/null + fi sleep 3 AFTER=$(chrome_count) echo "chrome: ${BEFORE} -> ${AFTER}" diff --git a/ops/opus-intents/opus-disaster-recovery.sh b/ops/opus-intents/opus-disaster-recovery.sh index eeb5b602c..31b9f6397 100755 --- a/ops/opus-intents/opus-disaster-recovery.sh +++ b/ops/opus-intents/opus-disaster-recovery.sh @@ -1,11 +1,13 @@ #!/bin/bash -# WEVIA Meta-Disaster-Recovery — WEVIA répare elle-même son infra -# Usage: opus-disaster-recovery.sh [full|chrome|disk|intents|all] +# WEVIA Meta-Disaster-Recovery v2 — WEVIA répare elle-même son infra +# Usage: opus-disaster-recovery.sh [status|chrome|disk|deep|intents|all|deep-all] +# v2 24avr2026 : ajout subcommand deep (cleanup agressif) + kill orphan greps ACTION="${1:-status}" load_now() { uptime | awk -F'load average:' '{print $2}' | awk -F, '{print $1}' | tr -d ' '; } disk_pct() { df -h / | tail -1 | awk '{print $5}' | tr -d '%'; } +disk_avail() { df -h / | tail -1 | awk '{print $4}'; } chrome_count() { ps aux | grep -E "chrome|chromium" | grep -v grep | wc -l; } intents_count() { python3 -c 'import json; d=json.load(open("/opt/wevia-brain/priority-intents-nl.json")); print(len(d))' 2>/dev/null || echo "CORRUPTED"; } @@ -13,10 +15,9 @@ case "$ACTION" in status) echo "=== DISASTER RECOVERY STATUS ===" echo "load: $(load_now)" - echo "disk: $(disk_pct)%" + echo "disk: $(disk_pct)% ($(disk_avail) avail)" echo "chrome: $(chrome_count)" echo "intents: $(intents_count)" - # Alarm levels L=$(load_now | cut -d. -f1) D=$(disk_pct) C=$(chrome_count) @@ -29,33 +30,76 @@ case "$ACTION" in ;; chrome) - echo "=== CHROME CLEANUP ===" + # === doctrine190-self-safe v1 (24avr) — fix self-kill bug === + echo "=== CHROME CLEANUP (self-safe v190) ===" + MYPID=$$ + MYPPID=$(ps -o ppid= -p $MYPID 2>/dev/null | tr -d ' ') BEFORE=$(chrome_count) - sudo pkill -9 -f chrome 2>&1 | head -1 - sudo pkill -9 -f chromium 2>&1 | head -1 + CHROMES=$(pgrep -f 'chrome-headless-shell' 2>/dev/null | awk -v s=$MYPID -v p=$MYPPID '$1!=s && $1!=p') + CHROMES2=$(pgrep -f 'chromium-browser' 2>/dev/null | awk -v s=$MYPID -v p=$MYPPID '$1!=s && $1!=p') + CHROMES3=$(pgrep -f '/chrome --' 2>/dev/null | awk -v s=$MYPID -v p=$MYPPID '$1!=s && $1!=p') + ALL_PIDS=$(echo "$CHROMES $CHROMES2 $CHROMES3" | tr ' ' '\n' | sort -u | grep -v '^$') + if [ -n "$ALL_PIDS" ]; then + echo "$ALL_PIDS" | xargs -r sudo kill -9 2>/dev/null + fi sleep 3 AFTER=$(chrome_count) - echo "chrome: ${BEFORE} → ${AFTER}" + echo "chrome: ${BEFORE} -> ${AFTER}" echo "load: $(load_now)" ;; disk) - echo "=== DISK CLEANUP ===" + echo "=== DISK CLEANUP (standard) ===" BEFORE=$(disk_pct) - # 1. docker prune sudo docker system prune -f 2>&1 | tail -3 - # 2. tmp cleanup (skip nuclei running) sudo find /tmp -type f -atime +3 ! -path "*/nuclei*" -delete 2>/dev/null - # 3. Playwright profile dirs (orphan) sudo find /tmp -maxdepth 2 -type d -name "playwright_*" -mmin +30 -exec rm -rf {} \; 2>/dev/null - # 4. Log truncate > 100M for F in $(sudo find /var/log -type f -size +100M 2>/dev/null); do sudo truncate -s 10M "$F" done - # 5. Old DB backups keep latest cd /opt/wevads/vault 2>/dev/null && ls -t db-backup-*.sql.gz 2>/dev/null | tail -n +2 | xargs -r sudo rm -f AFTER=$(disk_pct) - echo "disk: ${BEFORE}% → ${AFTER}%" + echo "disk: ${BEFORE}% -> ${AFTER}%" + ;; + + deep) + echo "=== DISK CLEANUP DEEP (agressif, safe) ===" + BEFORE=$(disk_pct) + echo "avail_before: $(disk_avail)" + echo "--- 1/8 kill orphan greps >120s ---" + for PID in $(ps -eo pid,etimes,cmd --no-headers | awk '$2>120 && /grep -rn/ && !/grep -rn/{print $1}' 2>/dev/null); do + sudo kill -9 "$PID" 2>/dev/null && echo "killed grep $PID" + done + ps -eo pid,etimes,cmd --no-headers | awk '$2>120 && $3=="/usr/bin/grep"{print $1}' | while read PID; do + sudo kill -9 "$PID" 2>/dev/null && echo "killed grep $PID" + done + echo "--- 2/8 docker prune -a --volumes (unused images+volumes) ---" + sudo docker system prune -a -f --volumes 2>&1 | tail -5 + echo "--- 3/8 journalctl vacuum 3 jours ---" + sudo journalctl --vacuum-time=3d 2>&1 | tail -3 + echo "--- 4/8 /tmp atime+1 (hors nuclei) ---" + sudo find /tmp -type f -atime +1 ! -path "*/nuclei*" -delete 2>/dev/null + sudo find /tmp -maxdepth 3 -type d -empty -delete 2>/dev/null + echo "--- 5/8 /var/log truncate >50M ---" + for F in $(sudo find /var/log -type f -size +50M 2>/dev/null); do + SZ=$(stat -c%s "$F") + sudo truncate -s 5M "$F" + echo "trunc $F ($SZ -> 5M)" + done + echo "--- 6/8 proofs old webm >7j delete ---" + sudo find /var/www/html/proofs -type f -name "*.webm" -mtime +7 -delete 2>/dev/null + sudo find /var/www/html/proofs -type f -name "*.png" -mtime +14 -delete 2>/dev/null + sudo find /var/www/html/proofs -type d -empty -delete 2>/dev/null + echo "--- 7/8 apt cache + old kernels ---" + sudo apt-get clean 2>&1 | tail -1 + sudo dpkg -l 'linux-image-*' 2>/dev/null | awk '/^ii/ && !/'"$(uname -r)"'/{print $2}' | head -3 | xargs -r sudo apt-get -y purge 2>&1 | tail -2 + echo "--- 8/8 pip/npm caches ---" + sudo rm -rf /root/.cache/pip /root/.npm/_cacache 2>/dev/null + sudo rm -rf /var/www/.cache/pip 2>/dev/null + AFTER=$(disk_pct) + echo "=== RESULT ===" + echo "disk: ${BEFORE}% -> ${AFTER}%" + echo "avail_after: $(disk_avail)" ;; intents) @@ -68,7 +112,7 @@ case "$ACTION" in sudo cp "$BAK" /opt/wevia-brain/priority-intents-nl.json sudo chown www-data:www-data /opt/wevia-brain/priority-intents-nl.json NEW_COUNT=$(intents_count) - echo "RESTORED from $BAK → $NEW_COUNT intents" + echo "RESTORED from $BAK -> $NEW_COUNT intents" else echo "FAIL: no bak found" fi @@ -89,7 +133,19 @@ case "$ACTION" in bash "$0" status ;; + deep-all) + bash "$0" status + echo + bash "$0" chrome + echo + bash "$0" deep + echo + bash "$0" intents + echo + bash "$0" status + ;; + *) - echo "usage: $0 {status|chrome|disk|intents|all}" + echo "usage: $0 {status|chrome|disk|deep|intents|all|deep-all}" ;; esac