Files
html/api/wevia-stream-api.php
2026-04-12 22:57:03 +02:00

166 lines
7.3 KiB
PHP

<?php
/**
* WEVIA MASTER STREAM API — Server-Sent Events for real-time token streaming
* Works with Cerebras/Groq/SambaNova/NVIDIA which all support streaming
*/
header("Content-Type: text/event-stream");
header("Cache-Control: no-cache");
header("Connection: keep-alive");
header("Access-Control-Allow-Origin: *");
header("X-Accel-Buffering: no"); // Disable nginx buffering
if ($_SERVER["REQUEST_METHOD"] === "OPTIONS") { http_response_code(200); exit; }
require_once "/opt/wevia-brain/wevia-master-router.php";
require_once "/opt/wevia-brain/wevia-capabilities.php";
$input = json_decode(file_get_contents("php://input"), true);
$message = $input['message'] ?? '';
$history = $input['history'] ?? [];
$system = $input['system'] ?? "Tu es WEVIA CORTEX, IA cognitive souveraine de WEVAL Consulting. QUALITE: expert senior, precision technique, zero bullshit. Utilise le contexte RAG. Structure: probleme-analyse-solution. Ne hallucine JAMAIS. Code complet.";
if (empty(trim($message))) {
echo "data: " . json_encode(['error' => 'no message']) . "\n\n";
exit;
}
// Step 1: Complexity scoring
$complexity = mr_scoreComplexity($message, $history);
// Step 2: RAG enrichment
$ragCount = 0;
if (mb_strlen($message) > 15 && function_exists('rag_search')) {
$ragData = rag_search($message);
$ragCount = count($ragData['results'] ?? []);
if (!empty($ragData['context'])) {
$message = "CONTEXTE INTERNE WEVAL (obligatoire):\n" . $ragData['context'] . "\n\n---\nQUESTION:\n" . $message;
}
// Send RAG info
echo "data: " . json_encode(['type' => 'rag', 'count' => $ragCount, 'latency' => $ragData['total_latency_ms'] ?? 0]) . "\n\n";
ob_flush(); flush();
}
// Step 2a: Memory recall (persistent cross-session)
// Direct Qdrant call for memory (bypass nginx 301)
$memVector = null;
$memCh = curl_init("http://127.0.0.1:11435/api/embeddings");
curl_setopt_array($memCh, [CURLOPT_POST=>true, CURLOPT_POSTFIELDS=>json_encode(["model"=>"all-minilm","prompt"=>$message]), CURLOPT_HTTPHEADER=>["Content-Type: application/json"], CURLOPT_RETURNTRANSFER=>true, CURLOPT_TIMEOUT=>5]);
$memEmb = curl_exec($memCh); curl_close($memCh);
$memVec = json_decode($memEmb, true)["embedding"] ?? null;
$memResult = null;
if ($memVec) {
$memCh2 = curl_init("http://127.0.0.1:6333/collections/wevia_memory/points/search");
curl_setopt_array($memCh2, [CURLOPT_POST=>true, CURLOPT_POSTFIELDS=>json_encode(["vector"=>$memVec,"limit"=>3,"with_payload"=>true]), CURLOPT_HTTPHEADER=>["Content-Type: application/json"], CURLOPT_RETURNTRANSFER=>true, CURLOPT_TIMEOUT=>3]);
$memR = curl_exec($memCh2); curl_close($memCh2);
$memPts = json_decode($memR, true)["result"] ?? [];
$memData = ["memories" => array_map(function($p){return ["key"=>$p["payload"]["key"]??"","value"=>$p["payload"]["value"]??"","score"=>round($p["score"],3)];}, $memPts)];
} else {
$memData = ["memories" => []];
}
$memResult = json_encode($memData);
// $memData already set above from direct Qdrant call
if (!empty($memData["memories"])) {
$memCtx = "\n\nMÉMOIRES WEVIA (cross-session):\n";
foreach (array_slice($memData["memories"], 0, 3) as $m) {
if ($m["score"] > 0.3) {
$memCtx .= "- [{$m["key"]}] {$m["value"]}\n";
}
}
$message = $memCtx . "\n---\n" . $message;
echo "data: " . json_encode(["type" => "memory", "count" => count($memData["memories"])]) . "\n\n";
ob_flush(); flush();
}
// Step 2b: Capability context
if (function_exists('wevia_capabilityContext')) {
$capCtx = wevia_capabilityContext($message);
if ($capCtx) $message .= $capCtx;
}
// Step 3: Build messages
$messages = [];
if ($system) $messages[] = ['role' => 'system', 'content' => $system];
foreach (array_slice($history, -10) as $h) {
if (isset($h['role'], $h['content'])) {
$messages[] = ['role' => $h['role'], 'content' => mb_substr($h['content'], 0, 1500)];
}
}
$messages[] = ['role' => 'user', 'content' => $message];
// Step 4: Get provider (skip Ollama, go Tier 1)
$secrets = mr_loadSecrets();
$providers = [
['name' => 'cerebras', 'url' => 'https://api.cerebras.ai/v1/chat/completions', 'key' => $secrets['CEREBRAS_API_KEY'] ?? '', 'model' => 'qwen-3-235b-a22b-instruct-2507'],
['name' => 'groq', 'url' => 'https://api.groq.com/openai/v1/chat/completions', 'key' => $secrets['GROQ_KEY'] ?? $secrets['GROQ_API_KEY'] ?? '', 'model' => 'llama-3.3-70b-versatile'],
['name' => 'groq-kimi', 'url' => 'https://api.groq.com/openai/v1/chat/completions', 'key' => $secrets['GROQ_KEY'] ?? $secrets['GROQ_API_KEY'] ?? '', 'model' => 'moonshotai/kimi-k2-instruct'],
['name' => 'nvidia', 'url' => 'https://integrate.api.nvidia.com/v1/chat/completions', 'key' => $secrets['NVIDIA_KEY'] ?? '', 'model' => 'meta/llama-3.3-70b-instruct'],
['name' => 'mistral', 'url' => 'https://api.mistral.ai/v1/chat/completions', 'key' => $secrets['MISTRAL_KEY'] ?? '', 'model' => 'mistral-large-latest'],
['name' => 'sambanova', 'url' => 'https://api.sambanova.ai/v1/chat/completions', 'key' => $secrets['SAMBANOVA_KEY'] ?? '', 'model' => 'DeepSeek-V3.2'],
];
$success = false;
foreach ($providers as $prov) {
if (empty($prov['key'])) continue;
$payload = json_encode([
'model' => $prov['model'],
'messages' => $messages,
'max_tokens' => 4096,
'temperature' => 0.4,
'stream' => true,
], JSON_UNESCAPED_UNICODE);
$ch = curl_init($prov['url']);
curl_setopt_array($ch, [
CURLOPT_POST => true,
CURLOPT_POSTFIELDS => $payload,
CURLOPT_HTTPHEADER => [
'Content-Type: application/json',
'Authorization: Bearer ' . $prov['key'],
],
CURLOPT_RETURNTRANSFER => false,
CURLOPT_TIMEOUT => 60,
CURLOPT_CONNECTTIMEOUT => 5,
CURLOPT_WRITEFUNCTION => function($ch, $data) use ($prov) {
$lines = explode("\n", $data);
foreach ($lines as $line) {
$line = trim($line);
if (empty($line) || $line === 'data: [DONE]') continue;
if (strpos($line, 'data: ') === 0) {
$json = json_decode(substr($line, 6), true);
$delta = $json['choices'][0]['delta']['content'] ?? '';
if ($delta !== '') {
echo "data: " . json_encode(['type' => 'token', 'content' => $delta, 'provider' => $prov['name'], 'model' => $prov['model']]) . "\n\n";
ob_flush(); flush();
}
}
}
return strlen($data);
},
]);
// Send provider info
echo "data: " . json_encode(['type' => 'start', 'provider' => $prov['name'], 'model' => $prov['model']]) . "\n\n";
ob_flush(); flush();
$startTime = microtime(true);
curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
$latency = round((microtime(true) - $startTime) * 1000);
curl_close($ch);
if ($httpCode === 200) {
echo "data: " . json_encode(['type' => 'done', 'provider' => $prov['name'], 'model' => $prov['model'], 'latency_ms' => $latency, 'rag' => $ragCount]) . "\n\n";
ob_flush(); flush();
$success = true;
break;
}
}
if (!$success) {
echo "data: " . json_encode(['type' => 'error', 'message' => 'All providers failed']) . "\n\n";
}
echo "data: [DONE]\n\n";
ob_flush(); flush();