Files
wevia-brain/wevia-rag-engine.php
2026-04-12 23:01:36 +02:00

206 lines
7.0 KiB
PHP

<?php
/**
* ╔══════════════════════════════════════════════════════════════════╗
* ║ WEVIA RAG ENGINE v1.0 — Qdrant Semantic Search for Master Router ║
* ║ Standalone — ZERO impact on existing codebase ║
* ║ Created: 2026-04-04 ║
* ╚══════════════════════════════════════════════════════════════════╝
*
* FLOW: Query → Embed (Ollama) → Search (Qdrant) → Context Injection
*/
define('RAG_QDRANT_URL', 'http://127.0.0.1:6333');
define('RAG_OLLAMA_URL', 'http://127.0.0.1:11435');
define('RAG_EMBED_MODEL', 'all-minilm:latest'); // 384 dim, matches weval_skills
define('RAG_TOP_K', 5);
define('RAG_MIN_SCORE', 0.35);
/**
* Generate embedding vector via Ollama
*/
function rag_embed($text, $timeout = 10) {
$payload = json_encode([
'model' => RAG_EMBED_MODEL,
'input' => mb_substr($text, 0, 1000),
], JSON_UNESCAPED_UNICODE);
$ch = curl_init(RAG_OLLAMA_URL . '/api/embed');
curl_setopt_array($ch, [
CURLOPT_POST => true,
CURLOPT_POSTFIELDS => $payload,
CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TIMEOUT => $timeout,
CURLOPT_CONNECTTIMEOUT => 3,
]);
$result = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
if ($httpCode !== 200 || !$result) return null;
$data = json_decode($result, true);
// Ollama returns embeddings array
$embeddings = $data['embeddings'] ?? $data['embedding'] ?? null;
if (is_array($embeddings) && !empty($embeddings)) {
// If nested array, take first
return is_array($embeddings[0]) ? $embeddings[0] : $embeddings;
}
return null;
}
/**
* Search Qdrant collection with vector
*/
function rag_searchQdrant($collection, $vector, $topK = 5, $minScore = 0.35) {
$payload = json_encode([
'vector' => $vector,
'limit' => $topK,
'score_threshold' => $minScore,
'with_payload' => true,
], JSON_UNESCAPED_UNICODE);
$ch = curl_init(RAG_QDRANT_URL . "/collections/$collection/points/query");
curl_setopt_array($ch, [
CURLOPT_POST => true,
CURLOPT_POSTFIELDS => $payload,
CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TIMEOUT => 5,
CURLOPT_CONNECTTIMEOUT => 2,
]);
$result = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
if ($httpCode !== 200 || !$result) {
// Fallback: try /points/search (older Qdrant API)
$ch2 = curl_init(RAG_QDRANT_URL . "/collections/$collection/points/search");
curl_setopt_array($ch2, [
CURLOPT_POST => true,
CURLOPT_POSTFIELDS => $payload,
CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TIMEOUT => 5,
]);
$result = curl_exec($ch2);
$httpCode = curl_getinfo($ch2, CURLINFO_HTTP_CODE);
curl_close($ch2);
if ($httpCode !== 200 || !$result) return [];
}
$data = json_decode($result, true);
$points = $data['result'] ?? $data['result']['points'] ?? [];
if (!is_array($points)) return [];
// Handle nested result format
if (isset($points['points'])) $points = $points['points'];
$results = [];
foreach ($points as $point) {
$score = $point['score'] ?? 0;
$payload_data = $point['payload'] ?? [];
$text = $payload_data['text'] ?? $payload_data['content'] ?? $payload_data['description'] ?? '';
$title = $payload_data['title'] ?? $payload_data['name'] ?? $payload_data['key'] ?? '';
$source = $payload_data['source'] ?? $payload_data['category'] ?? '';
if (!empty($text)) {
$results[] = [
'text' => mb_substr($text, 0, 800),
'title' => $title,
'source' => $source,
'score' => round($score, 3),
];
}
}
return $results;
}
/**
* Multi-collection RAG search
* Searches weval_skills (main) + wevia_kb + wevia_memory
*/
function rag_search($query, $options = []) {
$start = microtime(true);
$topK = $options['top_k'] ?? RAG_TOP_K;
$minScore = $options['min_score'] ?? RAG_MIN_SCORE;
$collections = $options['collections'] ?? ['weval_skills', 'wevia_kb'];
// Step 1: Generate embedding
$vector = rag_embed($query);
if (!$vector) {
error_log("RAG: embedding failed for: " . mb_substr($query, 0, 50));
return ['context' => '', 'results' => [], 'error' => 'embedding_failed'];
}
$embedLatency = round((microtime(true) - $start) * 1000);
// Step 2: Search each collection
$allResults = [];
foreach ($collections as $collection) {
$results = rag_searchQdrant($collection, $vector, $topK, $minScore);
foreach ($results as &$r) {
$r['collection'] = $collection;
}
$allResults = array_merge($allResults, $results);
}
// Step 3: Sort by score descending
usort($allResults, fn($a, $b) => $b['score'] <=> $a['score']);
// Step 4: Take top K
$allResults = array_slice($allResults, 0, $topK);
// Step 5: Build context string
$context = '';
if (!empty($allResults)) {
$context = "\n\n## CONTEXTE WEVAL (RAG — " . count($allResults) . " résultats pertinents)\n";
foreach ($allResults as $i => $r) {
$title = $r['title'] ? "**{$r['title']}**" : '';
$src = $r['source'] ? " [{$r['source']}]" : '';
$score = $r['score'];
$context .= "\n### Résultat " . ($i + 1) . " (score: $score)$src\n$title\n{$r['text']}\n";
}
}
$totalLatency = round((microtime(true) - $start) * 1000);
error_log(sprintf(
"RAG: query=%s results=%d embed=%dms total=%dms",
mb_substr($query, 0, 40),
count($allResults),
$embedLatency,
$totalLatency
));
return [
'context' => $context,
'results' => $allResults,
'embed_latency_ms' => $embedLatency,
'total_latency_ms' => $totalLatency,
'vector_dim' => count($vector),
'collections_searched' => $collections,
];
}
/**
* Enrich system prompt with RAG context
*/
function rag_enrichPrompt($systemPrompt, $userMessage, $options = []) {
$rag = rag_search($userMessage, $options);
if (!empty($rag['context'])) {
$systemPrompt .= $rag['context'];
$systemPrompt .= "\n\n**INSTRUCTION:** Utilise le contexte ci-dessus pour enrichir ta réponse. Si le contexte est pertinent, intègre-le naturellement. Sinon, réponds normalement.";
}
return [
'system_prompt' => $systemPrompt,
'rag' => $rag,
];
}