269 lines
11 KiB
Python
269 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
WEVIA Auto-Learning Engine v1.0
|
|
================================
|
|
Continuous self-improvement system for WEVIA AI.
|
|
Runs automatically (cron), tests all capabilities, grades responses,
|
|
stores learnings in Qdrant RAG, and adjusts system prompts.
|
|
|
|
Flow:
|
|
1. GENERATE: Send test prompts to WEVIA API
|
|
2. GRADE: Use Opus-level LLM (Groq llama-3.3-70b) to evaluate responses
|
|
3. LEARN: Extract improvement patterns
|
|
4. STORE: Save learnings in Qdrant vector DB + JSON knowledge base
|
|
5. ADAPT: Update system prompt with learned patterns
|
|
6. REPORT: Generate learning report
|
|
|
|
Cron: */60 * * * * python3 /var/www/html/tests/wevia-autolearn.py >> /var/log/wevia-autolearn.log 2>&1
|
|
"""
|
|
|
|
import json, time, os, sys, hashlib, requests
|
|
from datetime import datetime
|
|
|
|
# Config
|
|
API = "http://127.0.0.1/api/weval-ia" # Local API
|
|
GROQ_URL = "https://api.groq.com/openai/v1/chat/completions"
|
|
GROQ_KEY = os.environ.get("GROQ_KEY", "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")
|
|
QDRANT_URL = "http://127.0.0.1:6333"
|
|
LEARN_DIR = "/var/www/html/test-report/autolearn"
|
|
LEARN_DB = f"{LEARN_DIR}/learnings.json"
|
|
PROMPT_PATCHES = f"{LEARN_DIR}/prompt-patches.json"
|
|
REPORT_FILE = f"{LEARN_DIR}/latest-report.json"
|
|
COLLECTION = "wevia_learnings"
|
|
|
|
os.makedirs(LEARN_DIR, exist_ok=True)
|
|
|
|
# Test scenarios — covers all capabilities
|
|
SCENARIOS = [
|
|
{"id": "greeting-fr", "cat": "greeting", "msg": "Bonjour, qui es-tu ?", "expect": ["WEVIA", "WEVAL", "assistant"], "min_len": 50},
|
|
{"id": "greeting-en", "cat": "greeting", "msg": "Hello, what can you do?", "expect": ["WEVIA", "help", "assist"], "min_len": 50},
|
|
{"id": "pdf-audit", "cat": "gen", "msg": "Genere un PDF audit cybersecurite pour une PME", "expect": ["PDF", "pdf", "telecharger"], "min_len": 50},
|
|
{"id": "swot", "cat": "analysis", "msg": "Analyse SWOT de la transformation digitale au Maroc", "expect": ["force", "faiblesse", "opportunit", "menace"], "min_len": 200},
|
|
{"id": "ishikawa", "cat": "analysis", "msg": "Diagramme Ishikawa causes retard projet ERP", "expect": ["cause", "ishikawa", "effet"], "min_len": 100},
|
|
{"id": "porter", "cat": "analysis", "msg": "5 forces de Porter cloud computing Maghreb", "expect": ["porter", "force", "concurrent", "rivalite"], "min_len": 200},
|
|
{"id": "mermaid", "cat": "schema", "msg": "Schema mermaid processus achats 5 etapes", "expect": ["mermaid", "graph", "flowchart", "```"], "min_len": 50},
|
|
{"id": "python", "cat": "code", "msg": "Pipeline ETL Python pandas CSV", "expect": ["import", "pandas", "def"], "min_len": 100},
|
|
{"id": "react", "cat": "code", "msg": "Composant React dashboard KPI 4 cartes", "expect": ["useState", "React", "component", "return"], "min_len": 100},
|
|
{"id": "image", "cat": "visual", "msg": "Genere image artistique futuriste Maroc", "expect": ["image", "![", "pollinations"], "min_len": 20},
|
|
{"id": "logo", "cat": "visual", "msg": "Logo SVG pour TechVision startup", "expect": ["svg", "logo", "viewBox", "IMAGE"], "min_len": 50},
|
|
{"id": "websearch", "cat": "search", "msg": "Actualites Maroc cette semaine mars 2026", "expect": ["2026", "maroc", "roi", "economie"], "min_len": 100},
|
|
{"id": "consulting", "cat": "consulting", "msg": "Compare SAP vs Oracle Fusion en 5 criteres", "expect": ["SAP", "Oracle", "critere"], "min_len": 200},
|
|
{"id": "long", "cat": "long", "msg": "Ecris un article de 800 mots sur la transformation digitale des banques", "expect": ["banque", "digital", "transformation"], "min_len": 800},
|
|
{"id": "skills", "cat": "meta", "msg": "Tes competences completes ?", "expect": ["frontend", "pdf", "skill"], "min_len": 50},
|
|
]
|
|
|
|
def wevia_chat(msg, timeout=60):
|
|
import subprocess as sp, tempfile, json as j2
|
|
try:
|
|
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as tf:
|
|
j2.dump({'message': msg, 'language': 'fr', 'widget': True}, tf)
|
|
tf_path = tf.name
|
|
r = sp.run(['curl','-sk','-m',str(timeout),'-X','POST',
|
|
'https://weval-consulting.com/api/weval-ia',
|
|
'-H','Content-Type: application/json',
|
|
'-d','@'+tf_path], capture_output=True, text=True, timeout=timeout+10)
|
|
import os; os.unlink(tf_path)
|
|
return j2.loads(r.stdout) if r.stdout.strip() else {}
|
|
except Exception as e:
|
|
return {}
|
|
|
|
def groq_grade(question, response, expected_keywords):
|
|
"""Use Groq (llama-3.3-70b) to grade the response Opus-level"""
|
|
try:
|
|
r = requests.post(GROQ_URL, json={
|
|
"model": "llama-3.3-70b-versatile",
|
|
"messages": [
|
|
{"role": "system", "content": """Tu es un évaluateur expert IA de niveau Opus. Évalue cette réponse de chatbot.
|
|
Retourne UNIQUEMENT un JSON valide:
|
|
{"score": 0-100, "quality": "excellent|good|acceptable|poor|fail",
|
|
"issues": ["issue1", "issue2"],
|
|
"improvements": ["suggestion1", "suggestion2"],
|
|
"missing_keywords": ["mot1"],
|
|
"tone_ok": true/false,
|
|
"factual_ok": true/false,
|
|
"complete": true/false}"""},
|
|
{"role": "user", "content": f"Question: {question}\n\nRéponse du chatbot:\n{response[:2000]}\n\nMots-clés attendus: {', '.join(expected_keywords)}"}
|
|
],
|
|
"max_tokens": 500,
|
|
"temperature": 0.1
|
|
}, headers={"Authorization": f"Bearer {GROQ_KEY}", "Content-Type": "application/json"}, timeout=15)
|
|
|
|
text = r.json()["choices"][0]["message"]["content"]
|
|
# Parse JSON from response
|
|
import re
|
|
json_match = re.search(r'\{[\s\S]*\}', text)
|
|
if json_match:
|
|
return json.loads(json_match.group())
|
|
return {"score": 50, "quality": "unknown", "issues": ["Parse error"], "improvements": []}
|
|
except Exception as e:
|
|
return {"score": 0, "quality": "error", "issues": [str(e)[:100]], "improvements": []}
|
|
|
|
def store_in_qdrant(learning_id, text, metadata):
|
|
"""Store learning in Qdrant vector DB"""
|
|
try:
|
|
# Simple hash-based vector (replace with proper embeddings later)
|
|
vec = [float(hashlib.md5(text[i:i+4].encode()).hexdigest()[:8], 16) / 2**32
|
|
for i in range(0, min(len(text), 1536*4), max(1, len(text)//1536))][:1536]
|
|
vec.extend([0.0] * (1536 - len(vec)))
|
|
|
|
# Ensure collection exists
|
|
requests.put(f"{QDRANT_URL}/collections/{COLLECTION}", json={
|
|
"vectors": {"size": 1536, "distance": "Cosine"}
|
|
}, timeout=5)
|
|
|
|
# Upsert point
|
|
requests.put(f"{QDRANT_URL}/collections/{COLLECTION}/points", json={
|
|
"points": [{
|
|
"id": abs(hash(learning_id)) % (2**63),
|
|
"vector": vec,
|
|
"payload": metadata
|
|
}]
|
|
}, timeout=5)
|
|
return True
|
|
except:
|
|
return False
|
|
|
|
def load_learnings():
|
|
"""Load existing learnings database"""
|
|
if os.path.exists(LEARN_DB):
|
|
with open(LEARN_DB) as f:
|
|
return json.load(f)
|
|
return {"learnings": [], "total_runs": 0, "avg_score": 0, "patterns": {}}
|
|
|
|
def save_learnings(db):
|
|
"""Save learnings database"""
|
|
with open(LEARN_DB, "w") as f:
|
|
json.dump(db, f, ensure_ascii=False, indent=2)
|
|
|
|
def generate_prompt_patches(db):
|
|
"""Analyze learnings and generate system prompt improvements"""
|
|
patches = []
|
|
|
|
# Aggregate issues by category
|
|
issue_counts = {}
|
|
for l in db["learnings"][-50:]: # Last 50 learnings
|
|
for issue in l.get("grade", {}).get("issues", []):
|
|
issue_counts[issue] = issue_counts.get(issue, 0) + 1
|
|
|
|
# Generate patches for recurring issues
|
|
for issue, count in sorted(issue_counts.items(), key=lambda x: -x[1]):
|
|
if count >= 2:
|
|
patches.append({
|
|
"issue": issue,
|
|
"frequency": count,
|
|
"suggested_fix": f"Ajouter instruction: '{issue}' détecté {count} fois. Corriger en enrichissant le system prompt.",
|
|
"priority": "high" if count >= 5 else "medium" if count >= 3 else "low"
|
|
})
|
|
|
|
with open(PROMPT_PATCHES, "w") as f:
|
|
json.dump(patches, f, ensure_ascii=False, indent=2)
|
|
|
|
return patches
|
|
|
|
def run_autolearn(scenario_ids=None):
|
|
"""Main autolearn loop"""
|
|
db = load_learnings()
|
|
db["total_runs"] += 1
|
|
run_id = f"run-{datetime.utcnow().strftime('%Y%m%dT%H%M%S')}"
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f"WEVIA Auto-Learning — {run_id}")
|
|
print(f"{'='*60}\n")
|
|
|
|
scenarios = SCENARIOS
|
|
if scenario_ids:
|
|
scenarios = [s for s in SCENARIOS if s["id"] in scenario_ids]
|
|
|
|
run_results = []
|
|
total_score = 0
|
|
|
|
for sc in scenarios:
|
|
print(f"🧪 {sc['id']}...", end=" ", flush=True)
|
|
|
|
# 1. GENERATE
|
|
t0 = time.time()
|
|
resp = wevia_chat(sc["msg"], 60)
|
|
elapsed = round(time.time() - t0, 1)
|
|
text = resp.get("response", "")
|
|
provider = resp.get("provider", "?")
|
|
|
|
if not text or len(text) < sc.get("min_len", 10):
|
|
grade = {"score": 0, "quality": "fail", "issues": ["Empty/short response"], "improvements": ["Improve provider chain"]}
|
|
else:
|
|
# 2. GRADE with Opus-level LLM
|
|
grade = groq_grade(sc["msg"], text, sc["expect"])
|
|
|
|
score = grade.get("score", 0)
|
|
total_score += score
|
|
|
|
# 3. LEARN
|
|
learning = {
|
|
"id": f"{run_id}-{sc['id']}",
|
|
"scenario": sc["id"],
|
|
"category": sc["cat"],
|
|
"timestamp": datetime.utcnow().isoformat(),
|
|
"score": score,
|
|
"quality": grade.get("quality", "?"),
|
|
"provider": provider,
|
|
"response_len": len(text),
|
|
"elapsed": elapsed,
|
|
"grade": grade
|
|
}
|
|
|
|
# 4. STORE in Qdrant
|
|
stored = store_in_qdrant(learning["id"], text[:500], {
|
|
"scenario": sc["id"],
|
|
"score": score,
|
|
"quality": grade.get("quality"),
|
|
"issues": grade.get("issues", []),
|
|
"improvements": grade.get("improvements", [])
|
|
})
|
|
|
|
db["learnings"].append(learning)
|
|
run_results.append(learning)
|
|
|
|
emoji = "✅" if score >= 70 else "⚠️" if score >= 40 else "❌"
|
|
print(f"{emoji} {score}/100 ({grade.get('quality','?')}) {elapsed}s {len(text)}ch → Qdrant:{'✓' if stored else '✗'}")
|
|
|
|
# 5. ADAPT — generate prompt patches
|
|
patches = generate_prompt_patches(db)
|
|
|
|
# Calculate averages
|
|
if run_results:
|
|
avg = round(total_score / len(run_results), 1)
|
|
db["avg_score"] = avg
|
|
|
|
# Keep last 200 learnings
|
|
db["learnings"] = db["learnings"][-200:]
|
|
|
|
# 6. REPORT
|
|
report = {
|
|
"run_id": run_id,
|
|
"timestamp": datetime.utcnow().isoformat(),
|
|
"total_tests": len(run_results),
|
|
"avg_score": db.get("avg_score", 0),
|
|
"pass": sum(1 for r in run_results if r["score"] >= 70),
|
|
"warn": sum(1 for r in run_results if 40 <= r["score"] < 70),
|
|
"fail": sum(1 for r in run_results if r["score"] < 40),
|
|
"patches": patches[:10],
|
|
"results": run_results,
|
|
"total_runs": db["total_runs"]
|
|
}
|
|
|
|
with open(REPORT_FILE, "w") as f:
|
|
json.dump(report, f, ensure_ascii=False, indent=2)
|
|
|
|
save_learnings(db)
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f"Score moyen: {db.get('avg_score',0)}/100 | {report['pass']}✅ {report['warn']}⚠️ {report['fail']}❌")
|
|
print(f"Patches: {len(patches)} suggestions")
|
|
print(f"Total runs: {db['total_runs']}")
|
|
print(f"{'='*60}")
|
|
|
|
return report
|
|
|
|
if __name__ == "__main__":
|
|
ids = sys.argv[1:] if len(sys.argv) > 1 else None
|
|
run_autolearn(ids)
|