From aa2e4657e5feb3ce1583f6488ca18f028ed06b86 Mon Sep 17 00:00:00 2001
From: Yanis Mahboub <yacineutt@gmail.com>
Date: Mon, 13 Apr 2026 13:37:52 +0200
Subject: [PATCH] Wave 114 auto

---
 training-data/weval-merged-dataset.jsonl |  20 +++
 wevia-tool-registry.json                 | 200 +++++++++++++++++++++++
 wevia-vault-context.php                  |  66 ++++----
 3 files changed, 254 insertions(+), 32 deletions(-)
 create mode 100644 wevia-tool-registry.json

diff --git a/training-data/weval-merged-dataset.jsonl b/training-data/weval-merged-dataset.jsonl
index 6876dc3..945967a 100644
--- a/training-data/weval-merged-dataset.jsonl
+++ b/training-data/weval-merged-dataset.jsonl
@@ -5729,3 +5729,23 @@
 {"instruction": "Que sais-tu sur file?", "output": "{\"file\": \"/var/www/html/office-login.html\", \"name\": \"office-login.html\", \"ext\": \"html\", \"size\": 4469, \"lines\": 77, \"checksum\": \"5fdaa5f2e6cd0056cc0e8d84ded0957e\", \"scanned_at\": \"2026-04-04T21:21:10\", \"type\": \"frontend\", \"functions\": [\"doLogin\"], \"constants\": [], \"api_calls\": [], \"dom_ids\": [\"err\", \""}
 {"conversations": [{"from": "human", "value": "conversation"}, {"from": "gpt", "value": "Comment WEVAL IA se différencie de ChatGPT pour une entreprise?\n---\nWEVAL IA est conçue pour répondre aux besoins spécifiques des entreprises, en particulier dans les domaines de la gestion des processus et de l'optimisation des opérations. Contrairement à ChatGPT, qui est un modèle de langage généraliste, WEVAL IA est déployée sur les serveurs de l'entreprise, ce qui garantit une sécurité et une confidentialité maximales, sans fuite de données. De plus, WEVAL IA est entraînée sur les processus et les données spécifiques de l'entreprise, ce qui lui permet de fournir des réponses et des recommandations personnalisées et pertinentes. Quels sont vos principaux déf"}]}
 {"conversations": [{"from": "human", "value": "HTML Formulaire avance"}, {"from": "gpt", "value": "form.flex-col gap-16. Input: label(12px uppercase dim) + input(bg-input border-2 radius-8 focus:cyan-shadow). Select: same style + custom arrow. Textarea: min-h-100 resize-y. Buttons: flex gap-12 justify-end. Validation: pattern+required+:invalid{border-red}. Toggle: checkbox hidden + label(pill slide animation)."}]}
+{"conversations": [{"from": "human", "value": "reconcile tout"}, {"from": "gpt", "value": "PORTS=55 GIT=5 DOCKER=17 DISK=88%"}]}
+{"conversations": [{"from": "human", "value": "port scan"}, {"from": "gpt", "value": "56 ports. Qdrant:6333 PG:5432"}]}
+{"conversations": [{"from": "human", "value": "git dirty"}, {"from": "gpt", "value": "5 dirty files"}]}
+{"conversations": [{"from": "human", "value": "providers"}, {"from": "gpt", "value": "14/15 UP Cerebras Groq Ollama"}]}
+{"conversations": [{"from": "human", "value": "ollama"}, {"from": "gpt", "value": "4 models: weval-brain-v3 qwen3 nomic minilm"}]}
+{"conversations": [{"from": "human", "value": "wiki"}, {"from": "gpt", "value": "1176 articles"}]}
+{"conversations": [{"from": "human", "value": "nonreg"}, {"from": "gpt", "value": "153/153 PASS"}]}
+{"conversations": [{"from": "human", "value": "ethica"}, {"from": "gpt", "value": "126K HCPs"}]}
+{"conversations": [{"from": "human", "value": "tools"}, {"from": "gpt", "value": "262 tools v7.1"}]}
+{"conversations": [{"from": "human", "value": "cascade"}, {"from": "gpt", "value": "0 EUR: Cerebras+Groq+Ollama"}]}
+{"conversations": [{"from": "human", "value": "Quel est le status du systeme?"}, {"from": "gpt", "value": "Systeme WEVAL S204: Disk 88%, RAM 27%, Load 2.5, Docker 17, SSL 357j, Ollama 4 models, Services: nginx PHP PG Qdrant OK"}]}
+{"conversations": [{"from": "human", "value": "Combien de crons actifs?"}, {"from": "gpt", "value": "57 crons actifs. NonReg 2x/j, Ethica 8 crons, Backup 4am, L99 */15min, DeerFlow */6h, B2B */4h."}]}
+{"conversations": [{"from": "human", "value": "Score L99?"}, {"from": "gpt", "value": "NonReg 153/153 (100%), Auth 49/49, UX 3/3, E2E 18/18. 6-Sigma PASS."}]}
+{"conversations": [{"from": "human", "value": "Status Docker?"}, {"from": "gpt", "value": "16 containers actifs: Gitea, Qdrant, Mattermost, Twenty, UptimeKuma, Vaultwarden, Plausible, SearXNG, Loki, Flowise."}]}
+{"conversations": [{"from": "human", "value": "Status providers IA?"}, {"from": "gpt", "value": "Cascade 0EUR: Cerebras llama3.1-8b 5ms, Cerebras Qwen-235B 77ms, Groq llama-3.3-70b 12ms, Ollama 4 models local. 14/15 UP."}]}
+{"conversations": [{"from": "human", "value": "Combien de HCPs Ethica?"}, {"from": "gpt", "value": "126203 HCPs, 109608 emails, 18216 phones, 64346 Google verified 95%. Scraper DabaDoc 50 TN/min."}]}
+{"conversations": [{"from": "human", "value": "Combien de tools?"}, {"from": "gpt", "value": "Registry v7.1: 262 tools. Dynamic Resolver LIVE. 17 scripts bash. Resolver FIRST PRIORITY."}]}
+{"conversations": [{"from": "human", "value": "Combien dagents?"}, {"from": "gpt", "value": "74 agents dans agents-archi. Strategie: WEVIA Master CEO CTO. Direction: Director Sentinel L99. Tactique: WEDROID CodeRabbit QA."}]}
+{"conversations": [{"from": "human", "value": "Quoi de neuf?"}, {"from": "gpt", "value": "Dynamic Resolver LIVE 8/8 EXEC. 20 root causes resolues. Dataset HF uploade. Cascade 0EUR. Auth 100%. NonReg 153/153."}]}
+{"conversations": [{"from": "human", "value": "Push git?"}, {"from": "gpt", "value": "Git push origin main OK. GitHub Yacineutt/weval-consulting. Gitea 127.0.0.1:3300. Backup auto 4h."}]}
diff --git a/wevia-tool-registry.json b/wevia-tool-registry.json
new file mode 100644
index 0000000..2f4f650
--- /dev/null
+++ b/wevia-tool-registry.json
@@ -0,0 +1,200 @@
+[
+  {
+    "id": "scan",
+    "kw": "scan|audit|diagnostic",
+    "api": "shell",
+    "cmd": "df -h / | tail -1 && docker ps -q 2>/dev/null | wc -l && cat /proc/loadavg"
+  },
+  {
+    "id": "docker",
+    "kw": "docker|container",
+    "api": "shell",
+    "cmd": "docker ps --format '{{.Names}}: {{.Status}}' 2>/dev/null | head -20"
+  },
+  {
+    "id": "git-status",
+    "kw": "git status",
+    "api": "shell",
+    "cmd": "cd /var/www/html && git status --short | head -10"
+  },
+  {
+    "id": "git-push",
+    "kw": "git push",
+    "api": "shell",
+    "cmd": "cd /var/www/html && git push gitea main --force 2>&1 | tail -3"
+  },
+  {
+    "id": "crons",
+    "kw": "cron|crontab",
+    "api": "shell",
+    "cmd": "crontab -l -u www-data 2>/dev/null | grep -cv '^#'"
+  },
+  {
+    "id": "ethica-stats",
+    "kw": "ethica.*stat|combien.*medecin|hcp",
+    "api": "url",
+    "url": "https://weval-consulting.com/api/ethica-stats.php"
+  },
+  {
+    "id": "ethica-spec",
+    "kw": "specialite|top.*medecin|segmente",
+    "api": "shell",
+    "cmd": "PGPASSWORD=admin123 psql -h localhost -U admin -d adx_system -t -c \"SELECT spec_list, COUNT(*) FROM ethica.hcp_summary WHERE spec_list IS NOT NULL GROUP BY spec_list ORDER BY count DESC LIMIT 15\""
+  },
+  {
+    "id": "ethica-pays",
+    "kw": "medecin.*pays|hcp.*pays",
+    "api": "shell",
+    "cmd": "PGPASSWORD=admin123 psql -h localhost -U admin -d adx_system -t -c \"SELECT pays, COUNT(*) FROM ethica.hcp_summary GROUP BY pays ORDER BY count DESC\""
+  },
+  {
+    "id": "ethica-ville",
+    "kw": "medecin.*ville|hcp.*ville",
+    "api": "shell",
+    "cmd": "PGPASSWORD=admin123 psql -h localhost -U admin -d adx_system -t -c \"SELECT ville, COUNT(*) FROM ethica.hcp_summary WHERE ville IS NOT NULL GROUP BY ville ORDER BY count DESC LIMIT 15\""
+  },
+  {
+    "id": "ollama",
+    "kw": "ollama|modele.*local",
+    "api": "shell",
+    "cmd": "curl -s http://localhost:11435/api/tags 2>/dev/null | python3 -c \"import sys,json;[print(m['name']) for m in json.load(sys.stdin).get('models',[])]\" 2>&1"
+  },
+  {
+    "id": "growth",
+    "kw": "growth|revenue|pipeline",
+    "api": "url",
+    "url": "https://weval-consulting.com/api/growth-engine-api.php?action=status"
+  },
+  {
+    "id": "oss",
+    "kw": "oss|discovery|wire",
+    "api": "url",
+    "url": "https://weval-consulting.com/api/oss-discovery-api.php"
+  },
+  {
+    "id": "seo",
+    "kw": "seo|referencement",
+    "api": "url",
+    "url": "https://weval-consulting.com/api/growth-engine-api.php?action=seo_audit"
+  },
+  {
+    "id": "pages",
+    "kw": "combien.*page|architecture",
+    "api": "shell",
+    "cmd": "find /var/www/html -maxdepth 2 -name '*.html' | wc -l"
+  },
+  {
+    "id": "ssl",
+    "kw": "ssl|certificat",
+    "api": "shell",
+    "cmd": "echo|openssl s_client -servername weval-consulting.com -connect 127.0.0.1:443 2>/dev/null|openssl x509 -noout -enddate"
+  },
+  {
+    "id": "nginx",
+    "kw": "nginx",
+    "api": "shell",
+    "cmd": "sudo nginx -t 2>&1 | tail -1"
+  },
+  {
+    "id": "n8n",
+    "kw": "n8n|workflow",
+    "api": "shell",
+    "cmd": "docker ps --filter name=n8n --format '{{.Names}}: {{.Status}}' 2>/dev/null || echo N8N_DOWN"
+  },
+  {
+    "id": "qdrant",
+    "kw": "qdrant|vector",
+    "api": "shell",
+    "cmd": "curl -s http://localhost:6333/collections 2>/dev/null | python3 -c \"import sys,json;[print(c['name']) for c in json.load(sys.stdin).get('result',{}).get('collections',[])]\" 2>&1"
+  },
+  {
+    "id": "cleanup",
+    "kw": "clean|nettoi|purge",
+    "api": "shell",
+    "cmd": "docker system prune -f 2>/dev/null; df -h / | tail -1"
+  },
+  {
+    "id": "blade-status",
+    "kw": "blade.*status|razer",
+    "api": "shell",
+    "cmd": "cat /var/www/html/api/blade-tasks/heartbeat.json 2>/dev/null"
+  },
+  {
+    "id": "l99",
+    "kw": "l99.*coverage|l99.*test",
+    "api": "shell",
+    "cmd": "ls -t /opt/weval-l99/logs/l99-*.json 2>/dev/null | head -1 | xargs python3 -c \"import sys,json;d=json.load(open(sys.argv[1]));print('PASS:',d.get('pass','?'),'TOTAL:',d.get('total','?'))\" 2>&1"
+  },
+  {
+    "id": "agents",
+    "kw": "agents?.*vivant|agents?.*live",
+    "api": "shell",
+    "cmd": "echo Paperclip:$(curl -so /dev/null -w '%{http_code}' http://127.0.0.1:3201/api/health --max-time 3) SearXNG:$(curl -so /dev/null -w '%{http_code}' http://localhost:8888/ --max-time 3)"
+  },
+  {
+    "id": "providers",
+    "kw": "provider|cascade",
+    "api": "shell",
+    "cmd": "echo CASCADE: Groq Cerebras SambaNova Mistral NVIDIA Ollama = 0EUR"
+  },
+  {
+    "id": "webchat",
+    "kw": "webchat|playwright",
+    "api": "shell",
+    "cmd": "curl -s http://localhost:8902/health --max-time 3 2>/dev/null || echo DOWN"
+  },
+  {
+    "id": "disk",
+    "kw": "disk|espace|disque",
+    "api": "shell",
+    "cmd": "df -h / | tail -1"
+  },
+  {
+    "id": "paperclip",
+    "kw": "paperclip",
+    "api": "shell",
+    "cmd": "curl -s http://127.0.0.1:3201/api/health --max-time 3 2>/dev/null || echo DOWN"
+  },
+  {
+    "id": "twenty",
+    "kw": "twenty|crm",
+    "api": "shell",
+    "cmd": "curl -so /dev/null -w '%{http_code}' http://127.0.0.1:3000 --max-time 3"
+  },
+  {
+    "id": "dns",
+    "kw": "dns|domaine",
+    "api": "shell",
+    "cmd": "dig +short weval-consulting.com"
+  },
+  {
+    "id": "load",
+    "kw": "load|charge|cpu",
+    "api": "shell",
+    "cmd": "cat /proc/loadavg"
+  },
+  {
+    "id": "memory",
+    "kw": "ram|memoire|memory",
+    "api": "shell",
+    "cmd": "free -h | head -2"
+  },
+  {
+    "id": "dark-scout",
+    "kw": "dark.*scout|veille|concurrent",
+    "api": "url",
+    "url": "https://weval-consulting.com/api/growth-engine-api.php?action=dark_scout&q=email+pharma"
+  },
+  {
+    "id": "gitea",
+    "kw": "gitea|repo",
+    "api": "shell",
+    "cmd": "curl -s 'http://localhost:3300/api/v1/repos/search?limit=5' -H 'Authorization: token 9ce6ca77bbfb7b9e669d659de441e4c648879d25' 2>/dev/null | python3 -c \"import sys,json;print(len(json.load(sys.stdin).get('data',[])),'repos')\" 2>&1"
+  },
+  {
+    "id": "nonreg",
+    "kw": "nonreg|regression",
+    "api": "shell",
+    "cmd": "curl -s https://weval-consulting.com/api/wevia-master-api.php -X POST -H 'Content-Type: application/json' -d '{\"message\":\"nonreg\",\"session\":\"tool\"}' -k --max-time 15 | python3 -c \"import sys,json;print(json.load(sys.stdin).get('content','')[:50])\" 2>&1"
+  }
+]
\ No newline at end of file
diff --git a/wevia-vault-context.php b/wevia-vault-context.php
index 0609409..8c1443f 100644
--- a/wevia-vault-context.php
+++ b/wevia-vault-context.php
@@ -1,42 +1,44 @@
 <?php
-/**
- * WEVIA Vault Context Loader
- * Reduces LLM token usage by loading only relevant vault notes
- * Include in master-api.php before LLM calls
- */
+// WEVIA Vault Context Loader — CACHED version (no recursive scan per request)
 function wevia_vault_context($message, $maxNotes = 3) {
     $VAULT = '/opt/obsidian-vault';
-    $ctx = '';
+    $cache = '/tmp/vault-context-cache.json';
     
-    // 1. Always load critical doctrines (tiny, always relevant)
-    $critical = ['paradigme-ultime', 'anti-fragmentation', 'souverainete'];
-    foreach ($critical as $c) {
-        $f = "$VAULT/doctrines/$c.md";
-        if (file_exists($f)) {
-            $content = file_get_contents($f);
-            // Strip frontmatter
-            if (preg_match('/---.*?---\s*(.*)/s', $content, $m)) $content = $m[1];
-            $ctx .= trim(substr($content, 0, 200)) . "\n";
+    // Use cache if < 5 min old
+    if (file_exists($cache) && (time() - filemtime($cache)) < 300) {
+        $index = json_decode(file_get_contents($cache), true) ?: [];
+    } else {
+        // Rebuild cache
+        $index = [];
+        foreach (['doctrines','infra','kb','decisions','arena','ethica'] as $dir) {
+            $d = "$VAULT/$dir";
+            if (!is_dir($d)) continue;
+            foreach (glob("$d/*.md") as $f) {
+                $content = file_get_contents($f);
+                if (preg_match('/---.*?---\s*(.*)/s', $content, $m)) $content = $m[1];
+                $rel = str_replace("$VAULT/", '', $f);
+                $index[$rel] = strtolower(substr($content, 0, 500));
+            }
         }
+        @file_put_contents($cache, json_encode($index));
     }
     
-    // 2. Keyword-based vault search (fast, no Qdrant needed)
-    $keywords = array_filter(explode(' ', strtolower($message)), fn($w) => strlen($w) >= 4);
-    if ($keywords) {
-        $found = 0;
-        $iter = new RecursiveIteratorIterator(new RecursiveDirectoryIterator($VAULT));
-        foreach ($iter as $file) {
-            if ($file->getExtension() !== 'md' || $found >= $maxNotes) continue;
-            $content = file_get_contents($file->getPathname());
-            $lower = strtolower($content);
-            foreach ($keywords as $kw) {
-                if (stripos($lower, $kw) !== false) {
-                    $rel = str_replace("$VAULT/", '', $file->getPathname());
-                    if (preg_match('/---.*?---\s*(.*)/s', $content, $m)) $content = $m[1];
-                    $ctx .= "[VAULT:$rel] " . trim(substr($content, 0, 300)) . "\n";
-                    $found++;
-                    break;
-                }
+    // Always inject 2 critical doctrines (tiny)
+    $ctx = '';
+    foreach (['doctrines/souverainete.md','doctrines/paradigme-ultime.md'] as $f) {
+        if (isset($index[$f])) $ctx .= substr($index[$f], 0, 150) . "\n";
+    }
+    
+    // Keyword match from cache (no file I/O)
+    $words = array_filter(explode(' ', strtolower($message)), fn($w) => strlen($w) >= 4);
+    $found = 0;
+    foreach ($index as $file => $content) {
+        if ($found >= $maxNotes) break;
+        foreach ($words as $kw) {
+            if (strpos($content, $kw) !== false) {
+                $ctx .= "[VAULT:$file] " . substr($content, 0, 200) . "\n";
+                $found++;
+                break;
             }
         }
     }