'no message']) . "\n\n"; exit; } // Step 1: Complexity scoring $complexity = mr_scoreComplexity($message, $history); // Step 2: RAG enrichment $ragCount = 0; if (mb_strlen($message) > 15 && function_exists('rag_search')) { $ragData = rag_search($message); $ragCount = count($ragData['results'] ?? []); if (!empty($ragData['context'])) { $message = "CONTEXTE INTERNE WEVAL (obligatoire):\n" . $ragData['context'] . "\n\n---\nQUESTION:\n" . $message; } // Send RAG info echo "data: " . json_encode(['type' => 'rag', 'count' => $ragCount, 'latency' => $ragData['total_latency_ms'] ?? 0]) . "\n\n"; ob_flush(); flush(); } // Step 2a: Memory recall (persistent cross-session) // Direct Qdrant call for memory (bypass nginx 301) $memVector = null; $memCh = curl_init("http://127.0.0.1:4000/v1/embeddings"); curl_setopt_array($memCh, [CURLOPT_POST=>true, CURLOPT_POSTFIELDS=>json_encode(["model"=>"all-minilm","prompt"=>$message]), CURLOPT_HTTPHEADER=>["Content-Type: application/json"], CURLOPT_RETURNTRANSFER=>true, CURLOPT_TIMEOUT=>5]); $memEmb = curl_exec($memCh); curl_close($memCh); $memVec = json_decode($memEmb, true)["embedding"] ?? null; $memResult = null; if ($memVec) { $memCh2 = curl_init("http://127.0.0.1:6333/collections/wevia_memory/points/search"); curl_setopt_array($memCh2, [CURLOPT_POST=>true, CURLOPT_POSTFIELDS=>json_encode(["vector"=>$memVec,"limit"=>3,"with_payload"=>true]), CURLOPT_HTTPHEADER=>["Content-Type: application/json"], CURLOPT_RETURNTRANSFER=>true, CURLOPT_TIMEOUT=>3]); $memR = curl_exec($memCh2); curl_close($memCh2); $memPts = json_decode($memR, true)["result"] ?? []; $memData = ["memories" => array_map(function($p){return ["key"=>$p["payload"]["key"]??"","value"=>$p["payload"]["value"]??"","score"=>round($p["score"],3)];}, $memPts)]; } else { $memData = ["memories" => []]; } $memResult = json_encode($memData); // $memData already set above from direct Qdrant call if (!empty($memData["memories"])) { $memCtx = "\n\nMÉMOIRES WEVIA (cross-session):\n"; foreach (array_slice($memData["memories"], 0, 3) as $m) { if ($m["score"] > 0.3) { $memCtx .= "- [{$m["key"]}] {$m["value"]}\n"; } } $message = $memCtx . "\n---\n" . $message; echo "data: " . json_encode(["type" => "memory", "count" => count($memData["memories"])]) . "\n\n"; ob_flush(); flush(); } // Step 2b: Capability context if (function_exists('wevia_capabilityContext')) { $capCtx = wevia_capabilityContext($message); if ($capCtx) $message .= $capCtx; } // Step 3: Build messages $messages = []; if ($system) $messages[] = ['role' => 'system', 'content' => $system]; foreach (array_slice($history, -10) as $h) { if (isset($h['role'], $h['content'])) { $messages[] = ['role' => $h['role'], 'content' => mb_substr($h['content'], 0, 1500)]; } } $messages[] = ['role' => 'user', 'content' => $message]; // Step 4: Get provider (skip Ollama, go Tier 1) $secrets = mr_loadSecrets(); $providers = [ ['name' => 'cerebras', 'url' => 'https://api.cerebras.ai/v1/chat/completions', 'key' => $secrets['CEREBRAS_API_KEY'] ?? '', 'model' => 'qwen-3-235b-a22b-instruct-2507'], ['name' => 'groq', 'url' => 'https://api.groq.com/openai/v1/chat/completions', 'key' => $secrets['GROQ_KEY'] ?? $secrets['GROQ_API_KEY'] ?? '', 'model' => 'llama-3.3-70b-versatile'], ['name' => 'groq-kimi', 'url' => 'https://api.groq.com/openai/v1/chat/completions', 'key' => $secrets['GROQ_KEY'] ?? $secrets['GROQ_API_KEY'] ?? '', 'model' => 'moonshotai/kimi-k2-instruct'], ['name' => 'nvidia', 'url' => 'https://integrate.api.nvidia.com/v1/chat/completions', 'key' => $secrets['NVIDIA_KEY'] ?? '', 'model' => 'meta/llama-3.3-70b-instruct'], ['name' => 'mistral', 'url' => 'https://api.mistral.ai/v1/chat/completions', 'key' => $secrets['MISTRAL_KEY'] ?? '', 'model' => 'mistral-large-latest'], ['name' => 'sambanova', 'url' => 'https://api.sambanova.ai/v1/chat/completions', 'key' => $secrets['SAMBANOVA_KEY'] ?? '', 'model' => 'DeepSeek-V3.2'], ]; $success = false; foreach ($providers as $prov) { if (empty($prov['key'])) continue; $payload = json_encode([ 'model' => $prov['model'], 'messages' => $messages, 'max_tokens' => 4096, 'temperature' => 0.4, 'stream' => true, ], JSON_UNESCAPED_UNICODE); $ch = curl_init($prov['url']); curl_setopt_array($ch, [ CURLOPT_POST => true, CURLOPT_POSTFIELDS => $payload, CURLOPT_HTTPHEADER => [ 'Content-Type: application/json', 'Authorization: Bearer ' . $prov['key'], ], CURLOPT_RETURNTRANSFER => false, CURLOPT_TIMEOUT => 60, CURLOPT_CONNECTTIMEOUT => 5, CURLOPT_WRITEFUNCTION => function($ch, $data) use ($prov) { $lines = explode("\n", $data); foreach ($lines as $line) { $line = trim($line); if (empty($line) || $line === 'data: [DONE]') continue; if (strpos($line, 'data: ') === 0) { $json = json_decode(substr($line, 6), true); $delta = $json['choices'][0]['delta']['content'] ?? ''; if ($delta !== '') { echo "data: " . json_encode(['type' => 'token', 'content' => $delta, 'provider' => $prov['name'], 'model' => $prov['model']]) . "\n\n"; ob_flush(); flush(); } } } return strlen($data); }, ]); // Send provider info echo "data: " . json_encode(['type' => 'start', 'provider' => $prov['name'], 'model' => $prov['model']]) . "\n\n"; ob_flush(); flush(); $startTime = microtime(true); curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); $latency = round((microtime(true) - $startTime) * 1000); curl_close($ch); if ($httpCode === 200) { echo "data: " . json_encode(['type' => 'done', 'provider' => $prov['name'], 'model' => $prov['model'], 'latency_ms' => $latency, 'rag' => $ragCount]) . "\n\n"; ob_flush(); flush(); $success = true; break; } } if (!$success) { echo "data: " . json_encode(['type' => 'error', 'message' => 'All providers failed']) . "\n\n"; } echo "data: [DONE]\n\n"; ob_flush(); flush();