169 lines
9.9 KiB
Bash
Executable File
169 lines
9.9 KiB
Bash
Executable File
#!/bin/bash
|
|
# WEVIA Self-Pilot — runs */10, handles EVERYTHING automatically
|
|
LOG="/var/log/wevia-pilot.log"
|
|
TS=$(date +%H:%M)
|
|
ALERT=""
|
|
|
|
# === 1. AUTO-GIT BACKUP (every run) ===
|
|
cd /var/www/html
|
|
DIRTY=$(git status --porcelain 2>/dev/null | wc -l)
|
|
if [ "$DIRTY" -gt 0 ]; then
|
|
git add -A && git commit -m "AUTO-BACKUP $(date +%Y%m%d-%H%M)" 2>/dev/null
|
|
git push github main 2>/dev/null && echo "$TS: git backup $DIRTY files" >> $LOG
|
|
chown -R www-data:www-data /var/www/html/.git 2>/dev/null
|
|
fi
|
|
|
|
# === 2. AUTO-DOCKER HEALTH ===
|
|
DEAD=$(docker ps -a --filter status=exited --filter status=dead --format '{{.Names}}' 2>/dev/null | grep -v 'tmp\|test' | head -5)
|
|
if [ -n "$DEAD" ]; then
|
|
for C in $DEAD; do
|
|
docker restart $C 2>/dev/null
|
|
echo "$TS: docker restart $C" >> $LOG
|
|
done
|
|
ALERT="$ALERT Docker:$DEAD"
|
|
fi
|
|
|
|
# === 3. AUTO-NGINX/PHP HEALTH ===
|
|
if ! systemctl is-active nginx >/dev/null 2>&1; then
|
|
systemctl restart nginx 2>/dev/null
|
|
ALERT="$ALERT Nginx:RESTARTED"
|
|
echo "$TS: nginx restarted" >> $LOG
|
|
fi
|
|
# Test HTTP
|
|
HTTP=$(curl -sf -o /dev/null -w '%{http_code}' https://weval-consulting.com 2>/dev/null)
|
|
if [ "$HTTP" != "200" ]; then
|
|
systemctl reload nginx 2>/dev/null
|
|
ALERT="$ALERT HTTP:$HTTP"
|
|
fi
|
|
|
|
# === 4. AUTO-SSL RENEW (if <15 days) ===
|
|
DAYS=$(echo | openssl s_client -connect weval-consulting.com:443 -servername weval-consulting.com 2>/dev/null | openssl x509 -noout -checkend 1296000 2>/dev/null && echo "ok" || echo "expiring")
|
|
if [ "$DAYS" = "expiring" ]; then
|
|
certbot renew --quiet 2>/dev/null
|
|
echo "$TS: SSL renewed" >> $LOG
|
|
ALERT="$ALERT SSL:RENEWED"
|
|
fi
|
|
|
|
# === 5. AUTO-DB BACKUP (once per day at 03:xx) ===
|
|
HOUR=$(date +%H)
|
|
if [ "$HOUR" = "03" ] && [ ! -f /tmp/db-backup-today ]; then
|
|
PGPASSWORD=admin123 pg_dump -h 10.1.0.3 -U admin adx_system 2>/dev/null | gzip > /opt/wevads/vault/db-backup-$(date +%Y%m%d).sql.gz
|
|
touch /tmp/db-backup-today
|
|
# Keep only 7 days
|
|
find /opt/wevads/vault -name 'db-backup-*.sql.gz' -mtime +7 -delete 2>/dev/null
|
|
echo "$TS: DB backup done" >> $LOG
|
|
fi
|
|
[ "$HOUR" != "03" ] && rm -f /tmp/db-backup-today
|
|
|
|
# === 6. AUTO-DISK CLEANUP (if >85%) ===
|
|
DISK=$(df / | awk 'NR==2{print $5}' | tr -d '%')
|
|
if [ "$DISK" -gt 85 ]; then
|
|
journalctl --vacuum-size=50M 2>/dev/null
|
|
find /tmp -type f -mtime +1 -delete 2>/dev/null
|
|
find /var/log -name '*.gz' -delete 2>/dev/null
|
|
docker system prune -f 2>/dev/null
|
|
DISK2=$(df / | awk 'NR==2{print $5}' | tr -d '%')
|
|
echo "$TS: disk cleanup $DISK%→$DISK2%" >> $LOG
|
|
ALERT="$ALERT Disk:${DISK}%→${DISK2}%"
|
|
fi
|
|
|
|
# === 7. AUTO-SWAP CLEANUP (if >80%) ===
|
|
SWAP=$(free | awk '/Swap/{if($2>0)printf("%.0f",$3/$2*100);else print 0}')
|
|
if [ "$SWAP" -gt 80 ]; then
|
|
sync && echo 3 > /proc/sys/vm/drop_caches 2>/dev/null
|
|
echo "$TS: swap cleanup $SWAP%" >> $LOG
|
|
fi
|
|
|
|
# === 8. AUTO-LOG ROTATION ===
|
|
find /var/log -name 'wevia-*.log' -size +10M -exec truncate -s 1M {} \; 2>/dev/null
|
|
find /tmp -name 'wevia-*.log' -size +5M -exec truncate -s 500k {} \; 2>/dev/null
|
|
|
|
# === 9. AUTO-LEARN (save errors to KB) ===
|
|
ERRORS=$(tail -20 /var/log/nginx/error.log 2>/dev/null | grep -c "$(date +%Y/%m/%d)")
|
|
if [ "$ERRORS" -gt 10 ]; then
|
|
LAST_ERR=$(tail -1 /var/log/nginx/error.log 2>/dev/null | head -c 200)
|
|
PGPASSWORD=admin123 psql -h 10.1.0.3 -U admin -d adx_system -c "INSERT INTO kb_learnings (category,fact,source,confidence,created_at) VALUES ('AUTO-ERROR','$LAST_ERR','pilot',0.8,NOW())" 2>/dev/null
|
|
echo "$TS: $ERRORS nginx errors logged to KB" >> $LOG
|
|
fi
|
|
|
|
# === 10. AUTO-DATASET GROW (daily) ===
|
|
if [ "$HOUR" = "04" ] && [ ! -f /tmp/dataset-today ]; then
|
|
DS="/var/www/html/api/notebooks/weval-finetune-chatml.jsonl"
|
|
COUNT=$(wc -l < $DS 2>/dev/null || echo 0)
|
|
# Extract recent KB learnings as training pairs
|
|
PGPASSWORD=admin123 psql -h 10.1.0.3 -U admin -d adx_system -t -c "SELECT json_build_object('messages', json_build_array(json_build_object('role','system','content','WEVIA Master AI'),json_build_object('role','user','content',category),json_build_object('role','assistant','content',fact))) FROM kb_learnings WHERE created_at > NOW() - INTERVAL '24 hours' LIMIT 10" 2>/dev/null >> $DS
|
|
COUNT2=$(wc -l < $DS 2>/dev/null || echo 0)
|
|
touch /tmp/dataset-today
|
|
echo "$TS: dataset $COUNT→$COUNT2 pairs" >> $LOG
|
|
fi
|
|
[ "$HOUR" != "04" ] && rm -f /tmp/dataset-today
|
|
|
|
# === 11. AUTO-CLOUDFLARE (check WAF status) ===
|
|
if [ "$HOUR" = "06" ] && [ ! -f /tmp/cf-check-today ]; then
|
|
CF_KEY=$(grep CF_API /etc/weval/secrets.env 2>/dev/null | cut -d= -f2 | tr -d '"')
|
|
if [ -n "$CF_KEY" ]; then
|
|
CF_STATUS=$(curl -sf -H "X-Auth-Email: ymahboub@weval-consulting.com" -H "X-Auth-Key: $CF_KEY" "https://api.cloudflare.com/client/v4/zones/1488bbba251c6fa282999fcc09aac9fe" 2>/dev/null | python3 -c "import json,sys;d=json.load(sys.stdin);print(d.get('result',{}).get('status','?'))" 2>/dev/null)
|
|
echo "$TS: CF status: $CF_STATUS" >> $LOG
|
|
fi
|
|
touch /tmp/cf-check-today
|
|
fi
|
|
[ "$HOUR" != "06" ] && rm -f /tmp/cf-check-today
|
|
|
|
# === 12. STATUS JSON ===
|
|
cat > /var/www/html/api/wevia-pilot-status.json << EOJSON
|
|
{"ts":"$TS","disk":"${DISK:-?}%","swap":"${SWAP:-?}%","docker_dead":"${DEAD:-none}","http":"${HTTP:-?}","ssl":"$DAYS","alerts":"${ALERT:-none}","crons":$(crontab -l 2>/dev/null | grep -c .)}
|
|
EOJSON
|
|
|
|
# === 13. ALERT YACINE (Mattermost/email if critical) ===
|
|
if [ -n "$ALERT" ]; then
|
|
echo "$TS: ALERT: $ALERT" >> $LOG
|
|
fi
|
|
|
|
echo "$TS: pilot OK disk:${DISK}% swap:${SWAP}% docker:$(docker ps -q 2>/dev/null | wc -l)" >> $LOG
|
|
|
|
# === WEVIA BRAIN HEALTH CHECK ===
|
|
BRAIN=$(curl -sf "https://weval-consulting.com/api/wevia-health.php" --max-time 5 2>/dev/null | python3 -c "import json,sys;print(json.load(sys.stdin).get(\"status\",\"down\"))" 2>/dev/null)
|
|
if [ "$BRAIN" != "ok" ]; then
|
|
echo "$(date +%H:%M): BRAIN DOWN → restarting ollama" >> /var/log/wevia-pilot.log
|
|
systemctl restart ollama 2>/dev/null
|
|
systemctl reload nginx 2>/dev/null
|
|
fi
|
|
|
|
# === MONITOR HEALTH CHECK (all endpoints) ===
|
|
# MONITOR_HEALTH
|
|
ENDPOINTS="wevia-health health-ollama health-qdrant health-searxng"
|
|
for EP in $ENDPOINTS; do
|
|
CODE=$(curl -sf -o /dev/null -w "%{http_code}" "https://weval-consulting.com/api/$EP.php" --max-time 5 2>/dev/null)
|
|
if [ "$CODE" != "200" ]; then
|
|
echo "$(date +%H:%M): $EP HTTP:$CODE" >> $LOG
|
|
# Auto-fix: restart services
|
|
case $EP in
|
|
wevia-health) systemctl restart php*-fpm 2>/dev/null; systemctl reload nginx 2>/dev/null ;;
|
|
health-ollama) systemctl restart ollama 2>/dev/null ;;
|
|
esac
|
|
fi
|
|
done
|
|
|
|
# === ETHICA API CHECK ===
|
|
ETHICA_HTTP=$(curl -sf -o /dev/null -w "%{http_code}" "https://weval-consulting.com/api/ethica-api.php?action=dashboard&token=ETHICA_API_2026_SECURE" --max-time 5 2>/dev/null)
|
|
[ "$ETHICA_HTTP" != "200" ] && echo "$(date +%H:%M): ETHICA API DOWN HTTP:$ETHICA_HTTP" >> $LOG
|
|
# ETHICA_API_CHECK
|
|
|
|
# === WEVIA CHAT HEALTH ===
|
|
# CHAT_HEALTH
|
|
CHAT_RESP=$(curl -sf -X POST -H "Content-Type: application/json" -d "{\"message\":\"ping\",\"stream\":false}" "https://weval-consulting.com/api/weval-ia-fast.php" --max-time 10 2>/dev/null)
|
|
if echo "$CHAT_RESP" | grep -q "maintenance"; then
|
|
echo "$(date +%H:%M): WEVIA CHAT DOWN (maintenance mode)" >> $LOG
|
|
fi
|
|
\n# === L99 SCREENSHOT CHECK ===\n# L99_SCREENSHOT_CHECK\nL99_PASS=$(python3 -c "import json;d=json.load(open(\"/var/www/html/api/l99-results.json\")); print(d.get(\"pass\",0))" 2>/dev/null)\n[ -z "$L99_PASS" ] || [ "$L99_PASS" = "None" ] && echo "$(date +%H:%M): L99 results empty, triggering scan" >> $LOG && timeout 120 python3 /opt/weval-l99/l99-master.py --quick > /dev/null 2>&1 &\n
|
|
\n# === L99 SCREENSHOT PURGE (keep 7 days) ===\n# L99_PURGE\nfind /opt/weval-l99/screenshots -name "*.png" -mtime +7 -delete 2>/dev/null\nfind /opt/weval-l99/ss -name "*.png" -mtime +7 -delete 2>/dev/null\n
|
|
\n# === L99 AUTO-FIX FAILURES ===\n# L99_AUTOFIX\n# Restart exited Docker containers\ndocker ps -a --filter status=exited -q 2>/dev/null | xargs -r docker restart 2>/dev/null\n# Fix common issues\nsystemctl is-active nginx >/dev/null || systemctl restart nginx\nsystemctl is-active ollama >/dev/null || systemctl restart ollama\n
|
|
\n# === PAGE REGRESSION SCAN (hourly) ===\n# PAGE_REGRESSION\nMIN=$(date +%M)\n[ "$MIN" = "00" ] && bash /opt/weval-l99/wevia-page-scan.sh\n
|
|
\n# === L99 FULL WIRE ===\n# L99_WIRE\n# Run page scanner hourly\n[ $(date +%M) = "00" ] && bash /opt/weval-l99/wevia-page-scan.sh 2>/dev/null\n# Feed L99 results into architecture scanner\ncp /var/www/html/api/wevia-page-scan.json /var/www/html/api/l99-page-health.json 2>/dev/null\n# Trigger L99 if results stale (>6h)\nL99_AGE=$(( ($(date +%s) - $(stat -c %Y /var/www/html/api/l99-results.json 2>/dev/null || echo 0)) ))\n[ $L99_AGE -gt 21600 ] && nohup timeout 180 python3 /opt/weval-l99/l99-purge-scan.sh > /dev/null 2>&1 &\n
|
|
\n# === GPT RUNNER CHECK ===\n# GPT_RUNNER\nss -tlnp | grep -q 3900 || (cd /opt/claw-code && nohup node packages/gpt-runner-web/dist/start-server.cjs --port 3900 > /var/log/gpt-runner.log 2>&1 &)\n
|
|
\n# === CLAWCODE WATCHDOG ===\n# CLAWCODE_WATCH\nCCSZ=$(curl -sf https://code.weval-consulting.com --max-time 5 2>/dev/null | wc -c)\n[ "$CCSZ" -lt 1000 ] && echo "$(date +%H:%M): ClawCode DOWN (${CCSZ}B) restarting GPT Runner" >> $LOG && cd /opt/claw-code && nohup node packages/gpt-runner-web/dist/start-server.cjs --port 3900 > /var/log/gpt-runner.log 2>&1 &\n
|
|
\n# === BLADE MONITOR ===\nbash /opt/weval-l99/wevia-blade-monitor.sh\n
|
|
\n# === PUBLIC CHATBOT MONITOR ===\n# CHATBOT_PUBLIC\nCBR=$(curl -sf -X POST http://127.0.0.1/api/weval-ia-fast.php -H "Content-Type: application/json" -d "{\"message\":\"test\"}" --max-time 10 2>/dev/null | python3 -c "import json,sys;print(json.load(sys.stdin).get(\"provider\",\"FAIL\"))" 2>/dev/null)\n[ "$CBR" = "FAIL" ] && echo "$(date +%H:%M): PUBLIC CHATBOT DOWN" >> $LOG\n
|
|
\n# LITELLM PROXY\n# LITELLM_WATCH\nss -tlnp | grep -q 4001 || nohup python3 /opt/weval-litellm/wevia-proxy.py 4001 > /var/log/litellm-proxy.log 2>&1 &\n
|
|
\n# 6SIGMA QUALITY AGENT\n# QUALITY_AGENT\npython3 /opt/weval-l99/wevia-quality-agent.py > /dev/null 2>&1\n
|