html/api/wevia-stream-api.php

<?php
/**
 * WEVIA MASTER STREAM API — Server-Sent Events for real-time token streaming
 * Works with Cerebras/Groq/SambaNova/NVIDIA which all support streaming
 */
header("Content-Type: text/event-stream");
header("Cache-Control: no-cache");
header("Connection: keep-alive");
header("Access-Control-Allow-Origin: *");
header("X-Accel-Buffering: no"); // Disable nginx buffering

if ($_SERVER["REQUEST_METHOD"] === "OPTIONS") { http_response_code(200); exit; }

require_once "/opt/wevia-brain/wevia-master-router.php";
require_once "/opt/wevia-brain/wevia-capabilities.php";

$input = json_decode(file_get_contents("php://input"), true);
$message = $input['message'] ?? '';
$history = $input['history'] ?? [];
$system = $input['system'] ?? "Tu es WEVIA CORTEX, IA cognitive souveraine de WEVAL Consulting. QUALITE: expert senior, precision technique, zero bullshit. Utilise le contexte RAG. Structure: probleme-analyse-solution. Ne hallucine JAMAIS. Code complet.";

if (empty(trim($message))) {
    echo "data: " . json_encode(['error' => 'no message']) . "\n\n";
    exit;
}

// Step 1: Complexity scoring
$complexity = mr_scoreComplexity($message, $history);

// Step 2: RAG enrichment
$ragCount = 0;
if (mb_strlen($message) > 15 && function_exists('rag_search')) {
    $ragData = rag_search($message);
    $ragCount = count($ragData['results'] ?? []);
    if (!empty($ragData['context'])) {
        $message = "CONTEXTE INTERNE WEVAL (obligatoire):\n" . $ragData['context'] . "\n\n---\nQUESTION:\n" . $message;
    }
    // Send RAG info
    echo "data: " . json_encode(['type' => 'rag', 'count' => $ragCount, 'latency' => $ragData['total_latency_ms'] ?? 0]) . "\n\n";
    ob_flush(); flush();
}

// Step 2a: Memory recall (persistent cross-session)
// Direct Qdrant call for memory (bypass nginx 301)
$memVector = null;
$memCh = curl_init("http://127.0.0.1:4000/v1/embeddings");
curl_setopt_array($memCh, [CURLOPT_POST=>true, CURLOPT_POSTFIELDS=>json_encode(["model"=>"all-minilm","prompt"=>$message]), CURLOPT_HTTPHEADER=>["Content-Type: application/json"], CURLOPT_RETURNTRANSFER=>true, CURLOPT_TIMEOUT=>5]);
$memEmb = curl_exec($memCh); curl_close($memCh);
$memVec = json_decode($memEmb, true)["embedding"] ?? null;
$memResult = null;
if ($memVec) {
    $memCh2 = curl_init("http://127.0.0.1:6333/collections/wevia_memory/points/search");
    curl_setopt_array($memCh2, [CURLOPT_POST=>true, CURLOPT_POSTFIELDS=>json_encode(["vector"=>$memVec,"limit"=>3,"with_payload"=>true]), CURLOPT_HTTPHEADER=>["Content-Type: application/json"], CURLOPT_RETURNTRANSFER=>true, CURLOPT_TIMEOUT=>3]);
    $memR = curl_exec($memCh2); curl_close($memCh2);
    $memPts = json_decode($memR, true)["result"] ?? [];
    $memData = ["memories" => array_map(function($p){return ["key"=>$p["payload"]["key"]??"","value"=>$p["payload"]["value"]??"","score"=>round($p["score"],3)];}, $memPts)];
} else {
    $memData = ["memories" => []];
}
$memResult = json_encode($memData);
// $memData already set above from direct Qdrant call
if (!empty($memData["memories"])) {
    $memCtx = "\n\nMÉMOIRES WEVIA (cross-session):\n";
    foreach (array_slice($memData["memories"], 0, 3) as $m) {
        if ($m["score"] > 0.3) {
            $memCtx .= "- [{$m["key"]}] {$m["value"]}\n";
        }
    }
    $message = $memCtx . "\n---\n" . $message;
    echo "data: " . json_encode(["type" => "memory", "count" => count($memData["memories"])]) . "\n\n";
    ob_flush(); flush();
}

// Step 2b: Capability context
if (function_exists('wevia_capabilityContext')) {
    $capCtx = wevia_capabilityContext($message);
    if ($capCtx) $message .= $capCtx;
}

// Step 3: Build messages
$messages = [];
if ($system) $messages[] = ['role' => 'system', 'content' => $system];
foreach (array_slice($history, -10) as $h) {
    if (isset($h['role'], $h['content'])) {
        $messages[] = ['role' => $h['role'], 'content' => mb_substr($h['content'], 0, 1500)];
    }
}
$messages[] = ['role' => 'user', 'content' => $message];

// Step 4: Get provider (skip Ollama, go Tier 1)
$secrets = mr_loadSecrets();
$providers = [
    ['name' => 'cerebras', 'url' => 'https://api.cerebras.ai/v1/chat/completions', 'key' => $secrets['CEREBRAS_API_KEY'] ?? '', 'model' => 'qwen-3-235b-a22b-instruct-2507'],
    ['name' => 'groq', 'url' => 'https://api.groq.com/openai/v1/chat/completions', 'key' => $secrets['GROQ_KEY'] ?? $secrets['GROQ_API_KEY'] ?? '', 'model' => 'llama-3.3-70b-versatile'],
    ['name' => 'groq-kimi', 'url' => 'https://api.groq.com/openai/v1/chat/completions', 'key' => $secrets['GROQ_KEY'] ?? $secrets['GROQ_API_KEY'] ?? '', 'model' => 'moonshotai/kimi-k2-instruct'],
    ['name' => 'nvidia', 'url' => 'https://integrate.api.nvidia.com/v1/chat/completions', 'key' => $secrets['NVIDIA_KEY'] ?? '', 'model' => 'meta/llama-3.3-70b-instruct'],
    ['name' => 'mistral', 'url' => 'https://api.mistral.ai/v1/chat/completions', 'key' => $secrets['MISTRAL_KEY'] ?? '', 'model' => 'mistral-large-latest'],
    ['name' => 'sambanova', 'url' => 'https://api.sambanova.ai/v1/chat/completions', 'key' => $secrets['SAMBANOVA_KEY'] ?? '', 'model' => 'DeepSeek-V3.2'],
];

$success = false;
foreach ($providers as $prov) {
    if (empty($prov['key'])) continue;

    $payload = json_encode([
        'model' => $prov['model'],
        'messages' => $messages,
        'max_tokens' => 4096,
        'temperature' => 0.4,
        'stream' => true,
    ], JSON_UNESCAPED_UNICODE);

    $ch = curl_init($prov['url']);
    curl_setopt_array($ch, [
        CURLOPT_POST => true,
        CURLOPT_POSTFIELDS => $payload,
        CURLOPT_HTTPHEADER => [
            'Content-Type: application/json',
            'Authorization: Bearer ' . $prov['key'],
        ],
        CURLOPT_RETURNTRANSFER => false,
        CURLOPT_TIMEOUT => 60,
        CURLOPT_CONNECTTIMEOUT => 5,
        CURLOPT_WRITEFUNCTION => function($ch, $data) use ($prov) {
            $lines = explode("\n", $data);
            foreach ($lines as $line) {
                $line = trim($line);
                if (empty($line) || $line === 'data: [DONE]') continue;
                if (strpos($line, 'data: ') === 0) {
                    $json = json_decode(substr($line, 6), true);
                    $delta = $json['choices'][0]['delta']['content'] ?? '';
                    if ($delta !== '') {
                        echo "data: " . json_encode(['type' => 'token', 'content' => $delta, 'provider' => $prov['name'], 'model' => $prov['model']]) . "\n\n";
                        ob_flush(); flush();
                    }
                }
            }
            return strlen($data);
        },
    ]);

    // Send provider info
    echo "data: " . json_encode(['type' => 'start', 'provider' => $prov['name'], 'model' => $prov['model']]) . "\n\n";
    ob_flush(); flush();

    $startTime = microtime(true);
    curl_exec($ch);
    $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
    $latency = round((microtime(true) - $startTime) * 1000);
    curl_close($ch);

    if ($httpCode === 200) {
        echo "data: " . json_encode(['type' => 'done', 'provider' => $prov['name'], 'model' => $prov['model'], 'latency_ms' => $latency, 'rag' => $ragCount]) . "\n\n";
        ob_flush(); flush();
        $success = true;
        break;
    }
}

if (!$success) {
    echo "data: " . json_encode(['type' => 'error', 'message' => 'All providers failed']) . "\n\n";
}

echo "data: [DONE]\n\n";
ob_flush(); flush();