166 lines
7.3 KiB
PHP
166 lines
7.3 KiB
PHP
<?php
|
|
/**
|
|
* WEVIA MASTER STREAM API — Server-Sent Events for real-time token streaming
|
|
* Works with Cerebras/Groq/SambaNova/NVIDIA which all support streaming
|
|
*/
|
|
header("Content-Type: text/event-stream");
|
|
header("Cache-Control: no-cache");
|
|
header("Connection: keep-alive");
|
|
header("Access-Control-Allow-Origin: *");
|
|
header("X-Accel-Buffering: no"); // Disable nginx buffering
|
|
|
|
if ($_SERVER["REQUEST_METHOD"] === "OPTIONS") { http_response_code(200); exit; }
|
|
|
|
require_once "/opt/wevia-brain/wevia-master-router.php";
|
|
require_once "/opt/wevia-brain/wevia-capabilities.php";
|
|
|
|
$input = json_decode(file_get_contents("php://input"), true);
|
|
$message = $input['message'] ?? '';
|
|
$history = $input['history'] ?? [];
|
|
$system = $input['system'] ?? "Tu es WEVIA CORTEX, IA cognitive souveraine de WEVAL Consulting. QUALITE: expert senior, precision technique, zero bullshit. Utilise le contexte RAG. Structure: probleme-analyse-solution. Ne hallucine JAMAIS. Code complet.";
|
|
|
|
if (empty(trim($message))) {
|
|
echo "data: " . json_encode(['error' => 'no message']) . "\n\n";
|
|
exit;
|
|
}
|
|
|
|
// Step 1: Complexity scoring
|
|
$complexity = mr_scoreComplexity($message, $history);
|
|
|
|
// Step 2: RAG enrichment
|
|
$ragCount = 0;
|
|
if (mb_strlen($message) > 15 && function_exists('rag_search')) {
|
|
$ragData = rag_search($message);
|
|
$ragCount = count($ragData['results'] ?? []);
|
|
if (!empty($ragData['context'])) {
|
|
$message = "CONTEXTE INTERNE WEVAL (obligatoire):\n" . $ragData['context'] . "\n\n---\nQUESTION:\n" . $message;
|
|
}
|
|
// Send RAG info
|
|
echo "data: " . json_encode(['type' => 'rag', 'count' => $ragCount, 'latency' => $ragData['total_latency_ms'] ?? 0]) . "\n\n";
|
|
ob_flush(); flush();
|
|
}
|
|
|
|
// Step 2a: Memory recall (persistent cross-session)
|
|
// Direct Qdrant call for memory (bypass nginx 301)
|
|
$memVector = null;
|
|
$memCh = curl_init("http://127.0.0.1:4000/v1/embeddings");
|
|
curl_setopt_array($memCh, [CURLOPT_POST=>true, CURLOPT_POSTFIELDS=>json_encode(["model"=>"all-minilm","prompt"=>$message]), CURLOPT_HTTPHEADER=>["Content-Type: application/json"], CURLOPT_RETURNTRANSFER=>true, CURLOPT_TIMEOUT=>5]);
|
|
$memEmb = curl_exec($memCh); curl_close($memCh);
|
|
$memVec = json_decode($memEmb, true)["embedding"] ?? null;
|
|
$memResult = null;
|
|
if ($memVec) {
|
|
$memCh2 = curl_init("http://127.0.0.1:6333/collections/wevia_memory/points/search");
|
|
curl_setopt_array($memCh2, [CURLOPT_POST=>true, CURLOPT_POSTFIELDS=>json_encode(["vector"=>$memVec,"limit"=>3,"with_payload"=>true]), CURLOPT_HTTPHEADER=>["Content-Type: application/json"], CURLOPT_RETURNTRANSFER=>true, CURLOPT_TIMEOUT=>3]);
|
|
$memR = curl_exec($memCh2); curl_close($memCh2);
|
|
$memPts = json_decode($memR, true)["result"] ?? [];
|
|
$memData = ["memories" => array_map(function($p){return ["key"=>$p["payload"]["key"]??"","value"=>$p["payload"]["value"]??"","score"=>round($p["score"],3)];}, $memPts)];
|
|
} else {
|
|
$memData = ["memories" => []];
|
|
}
|
|
$memResult = json_encode($memData);
|
|
// $memData already set above from direct Qdrant call
|
|
if (!empty($memData["memories"])) {
|
|
$memCtx = "\n\nMÉMOIRES WEVIA (cross-session):\n";
|
|
foreach (array_slice($memData["memories"], 0, 3) as $m) {
|
|
if ($m["score"] > 0.3) {
|
|
$memCtx .= "- [{$m["key"]}] {$m["value"]}\n";
|
|
}
|
|
}
|
|
$message = $memCtx . "\n---\n" . $message;
|
|
echo "data: " . json_encode(["type" => "memory", "count" => count($memData["memories"])]) . "\n\n";
|
|
ob_flush(); flush();
|
|
}
|
|
|
|
// Step 2b: Capability context
|
|
if (function_exists('wevia_capabilityContext')) {
|
|
$capCtx = wevia_capabilityContext($message);
|
|
if ($capCtx) $message .= $capCtx;
|
|
}
|
|
|
|
// Step 3: Build messages
|
|
$messages = [];
|
|
if ($system) $messages[] = ['role' => 'system', 'content' => $system];
|
|
foreach (array_slice($history, -10) as $h) {
|
|
if (isset($h['role'], $h['content'])) {
|
|
$messages[] = ['role' => $h['role'], 'content' => mb_substr($h['content'], 0, 1500)];
|
|
}
|
|
}
|
|
$messages[] = ['role' => 'user', 'content' => $message];
|
|
|
|
// Step 4: Get provider (skip Ollama, go Tier 1)
|
|
$secrets = mr_loadSecrets();
|
|
$providers = [
|
|
['name' => 'cerebras', 'url' => 'https://api.cerebras.ai/v1/chat/completions', 'key' => $secrets['CEREBRAS_API_KEY'] ?? '', 'model' => 'qwen-3-235b-a22b-instruct-2507'],
|
|
['name' => 'groq', 'url' => 'https://api.groq.com/openai/v1/chat/completions', 'key' => $secrets['GROQ_KEY'] ?? $secrets['GROQ_API_KEY'] ?? '', 'model' => 'llama-3.3-70b-versatile'],
|
|
['name' => 'groq-kimi', 'url' => 'https://api.groq.com/openai/v1/chat/completions', 'key' => $secrets['GROQ_KEY'] ?? $secrets['GROQ_API_KEY'] ?? '', 'model' => 'moonshotai/kimi-k2-instruct'],
|
|
['name' => 'nvidia', 'url' => 'https://integrate.api.nvidia.com/v1/chat/completions', 'key' => $secrets['NVIDIA_KEY'] ?? '', 'model' => 'meta/llama-3.3-70b-instruct'],
|
|
['name' => 'mistral', 'url' => 'https://api.mistral.ai/v1/chat/completions', 'key' => $secrets['MISTRAL_KEY'] ?? '', 'model' => 'mistral-large-latest'],
|
|
['name' => 'sambanova', 'url' => 'https://api.sambanova.ai/v1/chat/completions', 'key' => $secrets['SAMBANOVA_KEY'] ?? '', 'model' => 'DeepSeek-V3.2'],
|
|
];
|
|
|
|
$success = false;
|
|
foreach ($providers as $prov) {
|
|
if (empty($prov['key'])) continue;
|
|
|
|
$payload = json_encode([
|
|
'model' => $prov['model'],
|
|
'messages' => $messages,
|
|
'max_tokens' => 4096,
|
|
'temperature' => 0.4,
|
|
'stream' => true,
|
|
], JSON_UNESCAPED_UNICODE);
|
|
|
|
$ch = curl_init($prov['url']);
|
|
curl_setopt_array($ch, [
|
|
CURLOPT_POST => true,
|
|
CURLOPT_POSTFIELDS => $payload,
|
|
CURLOPT_HTTPHEADER => [
|
|
'Content-Type: application/json',
|
|
'Authorization: Bearer ' . $prov['key'],
|
|
],
|
|
CURLOPT_RETURNTRANSFER => false,
|
|
CURLOPT_TIMEOUT => 60,
|
|
CURLOPT_CONNECTTIMEOUT => 5,
|
|
CURLOPT_WRITEFUNCTION => function($ch, $data) use ($prov) {
|
|
$lines = explode("\n", $data);
|
|
foreach ($lines as $line) {
|
|
$line = trim($line);
|
|
if (empty($line) || $line === 'data: [DONE]') continue;
|
|
if (strpos($line, 'data: ') === 0) {
|
|
$json = json_decode(substr($line, 6), true);
|
|
$delta = $json['choices'][0]['delta']['content'] ?? '';
|
|
if ($delta !== '') {
|
|
echo "data: " . json_encode(['type' => 'token', 'content' => $delta, 'provider' => $prov['name'], 'model' => $prov['model']]) . "\n\n";
|
|
ob_flush(); flush();
|
|
}
|
|
}
|
|
}
|
|
return strlen($data);
|
|
},
|
|
]);
|
|
|
|
// Send provider info
|
|
echo "data: " . json_encode(['type' => 'start', 'provider' => $prov['name'], 'model' => $prov['model']]) . "\n\n";
|
|
ob_flush(); flush();
|
|
|
|
$startTime = microtime(true);
|
|
curl_exec($ch);
|
|
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
$latency = round((microtime(true) - $startTime) * 1000);
|
|
curl_close($ch);
|
|
|
|
if ($httpCode === 200) {
|
|
echo "data: " . json_encode(['type' => 'done', 'provider' => $prov['name'], 'model' => $prov['model'], 'latency_ms' => $latency, 'rag' => $ragCount]) . "\n\n";
|
|
ob_flush(); flush();
|
|
$success = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!$success) {
|
|
echo "data: " . json_encode(['type' => 'error', 'message' => 'All providers failed']) . "\n\n";
|
|
}
|
|
|
|
echo "data: [DONE]\n\n";
|
|
ob_flush(); flush();
|