Files
html/tests/wevia-autolearn.py
2026-04-12 22:57:03 +02:00

269 lines
11 KiB
Python

#!/usr/bin/env python3
"""
WEVIA Auto-Learning Engine v1.0
================================
Continuous self-improvement system for WEVIA AI.
Runs automatically (cron), tests all capabilities, grades responses,
stores learnings in Qdrant RAG, and adjusts system prompts.
Flow:
1. GENERATE: Send test prompts to WEVIA API
2. GRADE: Use Opus-level LLM (Groq llama-3.3-70b) to evaluate responses
3. LEARN: Extract improvement patterns
4. STORE: Save learnings in Qdrant vector DB + JSON knowledge base
5. ADAPT: Update system prompt with learned patterns
6. REPORT: Generate learning report
Cron: */60 * * * * python3 /var/www/html/tests/wevia-autolearn.py >> /var/log/wevia-autolearn.log 2>&1
"""
import json, time, os, sys, hashlib, requests
from datetime import datetime
# Config
API = "http://127.0.0.1/api/weval-ia" # Local API
GROQ_URL = "https://api.groq.com/openai/v1/chat/completions"
GROQ_KEY = os.environ.get("GROQ_KEY", "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")
QDRANT_URL = "http://127.0.0.1:6333"
LEARN_DIR = "/var/www/html/test-report/autolearn"
LEARN_DB = f"{LEARN_DIR}/learnings.json"
PROMPT_PATCHES = f"{LEARN_DIR}/prompt-patches.json"
REPORT_FILE = f"{LEARN_DIR}/latest-report.json"
COLLECTION = "wevia_learnings"
os.makedirs(LEARN_DIR, exist_ok=True)
# Test scenarios — covers all capabilities
SCENARIOS = [
{"id": "greeting-fr", "cat": "greeting", "msg": "Bonjour, qui es-tu ?", "expect": ["WEVIA", "WEVAL", "assistant"], "min_len": 50},
{"id": "greeting-en", "cat": "greeting", "msg": "Hello, what can you do?", "expect": ["WEVIA", "help", "assist"], "min_len": 50},
{"id": "pdf-audit", "cat": "gen", "msg": "Genere un PDF audit cybersecurite pour une PME", "expect": ["PDF", "pdf", "telecharger"], "min_len": 50},
{"id": "swot", "cat": "analysis", "msg": "Analyse SWOT de la transformation digitale au Maroc", "expect": ["force", "faiblesse", "opportunit", "menace"], "min_len": 200},
{"id": "ishikawa", "cat": "analysis", "msg": "Diagramme Ishikawa causes retard projet ERP", "expect": ["cause", "ishikawa", "effet"], "min_len": 100},
{"id": "porter", "cat": "analysis", "msg": "5 forces de Porter cloud computing Maghreb", "expect": ["porter", "force", "concurrent", "rivalite"], "min_len": 200},
{"id": "mermaid", "cat": "schema", "msg": "Schema mermaid processus achats 5 etapes", "expect": ["mermaid", "graph", "flowchart", "```"], "min_len": 50},
{"id": "python", "cat": "code", "msg": "Pipeline ETL Python pandas CSV", "expect": ["import", "pandas", "def"], "min_len": 100},
{"id": "react", "cat": "code", "msg": "Composant React dashboard KPI 4 cartes", "expect": ["useState", "React", "component", "return"], "min_len": 100},
{"id": "image", "cat": "visual", "msg": "Genere image artistique futuriste Maroc", "expect": ["image", "![", "pollinations"], "min_len": 20},
{"id": "logo", "cat": "visual", "msg": "Logo SVG pour TechVision startup", "expect": ["svg", "logo", "viewBox", "IMAGE"], "min_len": 50},
{"id": "websearch", "cat": "search", "msg": "Actualites Maroc cette semaine mars 2026", "expect": ["2026", "maroc", "roi", "economie"], "min_len": 100},
{"id": "consulting", "cat": "consulting", "msg": "Compare SAP vs Oracle Fusion en 5 criteres", "expect": ["SAP", "Oracle", "critere"], "min_len": 200},
{"id": "long", "cat": "long", "msg": "Ecris un article de 800 mots sur la transformation digitale des banques", "expect": ["banque", "digital", "transformation"], "min_len": 800},
{"id": "skills", "cat": "meta", "msg": "Tes competences completes ?", "expect": ["frontend", "pdf", "skill"], "min_len": 50},
]
def wevia_chat(msg, timeout=60):
import subprocess as sp, tempfile, json as j2
try:
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as tf:
j2.dump({'message': msg, 'language': 'fr', 'widget': True}, tf)
tf_path = tf.name
r = sp.run(['curl','-sk','-m',str(timeout),'-X','POST',
'https://weval-consulting.com/api/weval-ia',
'-H','Content-Type: application/json',
'-d','@'+tf_path], capture_output=True, text=True, timeout=timeout+10)
import os; os.unlink(tf_path)
return j2.loads(r.stdout) if r.stdout.strip() else {}
except Exception as e:
return {}
def groq_grade(question, response, expected_keywords):
"""Use Groq (llama-3.3-70b) to grade the response Opus-level"""
try:
r = requests.post(GROQ_URL, json={
"model": "llama-3.3-70b-versatile",
"messages": [
{"role": "system", "content": """Tu es un évaluateur expert IA de niveau Opus. Évalue cette réponse de chatbot.
Retourne UNIQUEMENT un JSON valide:
{"score": 0-100, "quality": "excellent|good|acceptable|poor|fail",
"issues": ["issue1", "issue2"],
"improvements": ["suggestion1", "suggestion2"],
"missing_keywords": ["mot1"],
"tone_ok": true/false,
"factual_ok": true/false,
"complete": true/false}"""},
{"role": "user", "content": f"Question: {question}\n\nRéponse du chatbot:\n{response[:2000]}\n\nMots-clés attendus: {', '.join(expected_keywords)}"}
],
"max_tokens": 500,
"temperature": 0.1
}, headers={"Authorization": f"Bearer {GROQ_KEY}", "Content-Type": "application/json"}, timeout=15)
text = r.json()["choices"][0]["message"]["content"]
# Parse JSON from response
import re
json_match = re.search(r'\{[\s\S]*\}', text)
if json_match:
return json.loads(json_match.group())
return {"score": 50, "quality": "unknown", "issues": ["Parse error"], "improvements": []}
except Exception as e:
return {"score": 0, "quality": "error", "issues": [str(e)[:100]], "improvements": []}
def store_in_qdrant(learning_id, text, metadata):
"""Store learning in Qdrant vector DB"""
try:
# Simple hash-based vector (replace with proper embeddings later)
vec = [float(hashlib.md5(text[i:i+4].encode()).hexdigest()[:8], 16) / 2**32
for i in range(0, min(len(text), 1536*4), max(1, len(text)//1536))][:1536]
vec.extend([0.0] * (1536 - len(vec)))
# Ensure collection exists
requests.put(f"{QDRANT_URL}/collections/{COLLECTION}", json={
"vectors": {"size": 1536, "distance": "Cosine"}
}, timeout=5)
# Upsert point
requests.put(f"{QDRANT_URL}/collections/{COLLECTION}/points", json={
"points": [{
"id": abs(hash(learning_id)) % (2**63),
"vector": vec,
"payload": metadata
}]
}, timeout=5)
return True
except:
return False
def load_learnings():
"""Load existing learnings database"""
if os.path.exists(LEARN_DB):
with open(LEARN_DB) as f:
return json.load(f)
return {"learnings": [], "total_runs": 0, "avg_score": 0, "patterns": {}}
def save_learnings(db):
"""Save learnings database"""
with open(LEARN_DB, "w") as f:
json.dump(db, f, ensure_ascii=False, indent=2)
def generate_prompt_patches(db):
"""Analyze learnings and generate system prompt improvements"""
patches = []
# Aggregate issues by category
issue_counts = {}
for l in db["learnings"][-50:]: # Last 50 learnings
for issue in l.get("grade", {}).get("issues", []):
issue_counts[issue] = issue_counts.get(issue, 0) + 1
# Generate patches for recurring issues
for issue, count in sorted(issue_counts.items(), key=lambda x: -x[1]):
if count >= 2:
patches.append({
"issue": issue,
"frequency": count,
"suggested_fix": f"Ajouter instruction: '{issue}' détecté {count} fois. Corriger en enrichissant le system prompt.",
"priority": "high" if count >= 5 else "medium" if count >= 3 else "low"
})
with open(PROMPT_PATCHES, "w") as f:
json.dump(patches, f, ensure_ascii=False, indent=2)
return patches
def run_autolearn(scenario_ids=None):
"""Main autolearn loop"""
db = load_learnings()
db["total_runs"] += 1
run_id = f"run-{datetime.utcnow().strftime('%Y%m%dT%H%M%S')}"
print(f"\n{'='*60}")
print(f"WEVIA Auto-Learning — {run_id}")
print(f"{'='*60}\n")
scenarios = SCENARIOS
if scenario_ids:
scenarios = [s for s in SCENARIOS if s["id"] in scenario_ids]
run_results = []
total_score = 0
for sc in scenarios:
print(f"🧪 {sc['id']}...", end=" ", flush=True)
# 1. GENERATE
t0 = time.time()
resp = wevia_chat(sc["msg"], 60)
elapsed = round(time.time() - t0, 1)
text = resp.get("response", "")
provider = resp.get("provider", "?")
if not text or len(text) < sc.get("min_len", 10):
grade = {"score": 0, "quality": "fail", "issues": ["Empty/short response"], "improvements": ["Improve provider chain"]}
else:
# 2. GRADE with Opus-level LLM
grade = groq_grade(sc["msg"], text, sc["expect"])
score = grade.get("score", 0)
total_score += score
# 3. LEARN
learning = {
"id": f"{run_id}-{sc['id']}",
"scenario": sc["id"],
"category": sc["cat"],
"timestamp": datetime.utcnow().isoformat(),
"score": score,
"quality": grade.get("quality", "?"),
"provider": provider,
"response_len": len(text),
"elapsed": elapsed,
"grade": grade
}
# 4. STORE in Qdrant
stored = store_in_qdrant(learning["id"], text[:500], {
"scenario": sc["id"],
"score": score,
"quality": grade.get("quality"),
"issues": grade.get("issues", []),
"improvements": grade.get("improvements", [])
})
db["learnings"].append(learning)
run_results.append(learning)
emoji = "" if score >= 70 else "⚠️" if score >= 40 else ""
print(f"{emoji} {score}/100 ({grade.get('quality','?')}) {elapsed}s {len(text)}ch → Qdrant:{'' if stored else ''}")
# 5. ADAPT — generate prompt patches
patches = generate_prompt_patches(db)
# Calculate averages
if run_results:
avg = round(total_score / len(run_results), 1)
db["avg_score"] = avg
# Keep last 200 learnings
db["learnings"] = db["learnings"][-200:]
# 6. REPORT
report = {
"run_id": run_id,
"timestamp": datetime.utcnow().isoformat(),
"total_tests": len(run_results),
"avg_score": db.get("avg_score", 0),
"pass": sum(1 for r in run_results if r["score"] >= 70),
"warn": sum(1 for r in run_results if 40 <= r["score"] < 70),
"fail": sum(1 for r in run_results if r["score"] < 40),
"patches": patches[:10],
"results": run_results,
"total_runs": db["total_runs"]
}
with open(REPORT_FILE, "w") as f:
json.dump(report, f, ensure_ascii=False, indent=2)
save_learnings(db)
print(f"\n{'='*60}")
print(f"Score moyen: {db.get('avg_score',0)}/100 | {report['pass']}{report['warn']}⚠️ {report['fail']}")
print(f"Patches: {len(patches)} suggestions")
print(f"Total runs: {db['total_runs']}")
print(f"{'='*60}")
return report
if __name__ == "__main__":
ids = sys.argv[1:] if len(sys.argv) > 1 else None
run_autolearn(ids)