RAG_EMBED_MODEL, 'input' => mb_substr($text, 0, 1000), ], JSON_UNESCAPED_UNICODE); $ch = curl_init(RAG_OLLAMA_URL . '/api/embed'); curl_setopt_array($ch, [ CURLOPT_POST => true, CURLOPT_POSTFIELDS => $payload, CURLOPT_HTTPHEADER => ['Content-Type: application/json'], CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => $timeout, CURLOPT_CONNECTTIMEOUT => 3, ]); $result = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); if ($httpCode !== 200 || !$result) return null; $data = json_decode($result, true); // Ollama returns embeddings array $embeddings = $data['embeddings'] ?? $data['embedding'] ?? null; if (is_array($embeddings) && !empty($embeddings)) { // If nested array, take first return is_array($embeddings[0]) ? $embeddings[0] : $embeddings; } return null; } /** * Search Qdrant collection with vector */ function rag_searchQdrant($collection, $vector, $topK = 5, $minScore = 0.35) { $payload = json_encode([ 'vector' => $vector, 'limit' => $topK, 'score_threshold' => $minScore, 'with_payload' => true, ], JSON_UNESCAPED_UNICODE); $ch = curl_init(RAG_QDRANT_URL . "/collections/$collection/points/query"); curl_setopt_array($ch, [ CURLOPT_POST => true, CURLOPT_POSTFIELDS => $payload, CURLOPT_HTTPHEADER => ['Content-Type: application/json'], CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => 5, CURLOPT_CONNECTTIMEOUT => 2, ]); $result = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); if ($httpCode !== 200 || !$result) { // Fallback: try /points/search (older Qdrant API) $ch2 = curl_init(RAG_QDRANT_URL . "/collections/$collection/points/search"); curl_setopt_array($ch2, [ CURLOPT_POST => true, CURLOPT_POSTFIELDS => $payload, CURLOPT_HTTPHEADER => ['Content-Type: application/json'], CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => 5, ]); $result = curl_exec($ch2); $httpCode = curl_getinfo($ch2, CURLINFO_HTTP_CODE); curl_close($ch2); if ($httpCode !== 200 || !$result) return []; } $data = json_decode($result, true); $points = $data['result'] ?? $data['result']['points'] ?? []; if (!is_array($points)) return []; // Handle nested result format if (isset($points['points'])) $points = $points['points']; $results = []; foreach ($points as $point) { $score = $point['score'] ?? 0; $payload_data = $point['payload'] ?? []; $text = $payload_data['text'] ?? $payload_data['content'] ?? $payload_data['description'] ?? ''; $title = $payload_data['title'] ?? $payload_data['name'] ?? $payload_data['key'] ?? ''; $source = $payload_data['source'] ?? $payload_data['category'] ?? ''; if (!empty($text)) { $results[] = [ 'text' => mb_substr($text, 0, 800), 'title' => $title, 'source' => $source, 'score' => round($score, 3), ]; } } return $results; } /** * Multi-collection RAG search * Searches weval_skills (main) + wevia_kb + wevia_memory */ function rag_search($query, $options = []) { $start = microtime(true); $topK = $options['top_k'] ?? RAG_TOP_K; $minScore = $options['min_score'] ?? RAG_MIN_SCORE; $collections = $options['collections'] ?? ['weval_skills', 'wevia_kb']; // Step 1: Generate embedding $vector = rag_embed($query); if (!$vector) { error_log("RAG: embedding failed for: " . mb_substr($query, 0, 50)); return ['context' => '', 'results' => [], 'error' => 'embedding_failed']; } $embedLatency = round((microtime(true) - $start) * 1000); // Step 2: Search each collection $allResults = []; foreach ($collections as $collection) { $results = rag_searchQdrant($collection, $vector, $topK, $minScore); foreach ($results as &$r) { $r['collection'] = $collection; } $allResults = array_merge($allResults, $results); } // Step 3: Sort by score descending usort($allResults, fn($a, $b) => $b['score'] <=> $a['score']); // Step 4: Take top K $allResults = array_slice($allResults, 0, $topK); // Step 5: Build context string $context = ''; if (!empty($allResults)) { $context = "\n\n## CONTEXTE WEVAL (RAG — " . count($allResults) . " résultats pertinents)\n"; foreach ($allResults as $i => $r) { $title = $r['title'] ? "**{$r['title']}**" : ''; $src = $r['source'] ? " [{$r['source']}]" : ''; $score = $r['score']; $context .= "\n### Résultat " . ($i + 1) . " (score: $score)$src\n$title\n{$r['text']}\n"; } } $totalLatency = round((microtime(true) - $start) * 1000); error_log(sprintf( "RAG: query=%s results=%d embed=%dms total=%dms", mb_substr($query, 0, 40), count($allResults), $embedLatency, $totalLatency )); return [ 'context' => $context, 'results' => $allResults, 'embed_latency_ms' => $embedLatency, 'total_latency_ms' => $totalLatency, 'vector_dim' => count($vector), 'collections_searched' => $collections, ]; } /** * Enrich system prompt with RAG context */ function rag_enrichPrompt($systemPrompt, $userMessage, $options = []) { $rag = rag_search($userMessage, $options); if (!empty($rag['context'])) { $systemPrompt .= $rag['context']; $systemPrompt .= "\n\n**INSTRUCTION:** Utilise le contexte ci-dessus pour enrichir ta réponse. Si le contexte est pertinent, intègre-le naturellement. Sinon, réponds normalement."; } return [ 'system_prompt' => $systemPrompt, 'rag' => $rag, ]; }