html/api/ambre-claude-pattern-sse.php

<?php
/**
 * ambre-claude-pattern-sse.php · Full Claude pattern via SSE
 *
 * Stream events:
 *   event: thinking  · internal reasoning (3-5s of thought)
 *   event: plan      · numbered plan steps
 *   event: rag       · RAG context retrieved from Qdrant
 *   event: execute   · each step execution with status
 *   event: test      · validation/self-test results
 *   event: critique  · self-critique + confidence score
 *   event: result    · final synthesized answer + deliverables
 *   event: done      · summary metrics
 */

ini_set("output_buffering", "off");
ini_set("zlib.output_compression", false);
header("Content-Type: text/event-stream; charset=utf-8");
header("Cache-Control: no-cache");
header("Connection: keep-alive");
header("X-Accel-Buffering: no");

while (ob_get_level()) ob_end_flush();
ob_implicit_flush(true);

function send($event, $data) {
    $json = json_encode($data, JSON_UNESCAPED_UNICODE|JSON_UNESCAPED_SLASHES);
    echo "event: $event\n";
    echo "data: $json\n\n";
    @flush();
}

// === Input ===
$q = trim($_GET["q"] ?? $_POST["q"] ?? "");
if (!$q) { send("error", ["msg"=>"query required"]); exit; }

$sid = $_GET["sid"] ?? ("sse-" . bin2hex(random_bytes(4)));
$start_total = microtime(true);

send("start", ["query"=>$q, "session"=>$sid, "ts"=>date("c"), "pattern"=>"thinking→plan→rag→execute→test→critique→result"]);

// === 1. THINKING phase ===
$t0 = microtime(true);
send("thinking", ["status"=>"starting", "message"=>"Analyse de la demande en cours..."]);

$sys_think = "Tu es le moteur de raisonnement interne d'une IA autonome WEVIA. Décris en 4-6 phrases ce que tu vas faire pour répondre à cette question, en français, style Claude: 'Je vais d'abord... puis... enfin...'. Pas de préambule, juste le raisonnement.";
$think_raw = @file_get_contents("http://127.0.0.1:4000/v1/chat/completions", false, stream_context_create([
    "http" => ["method"=>"POST","header"=>"Content-Type: application/json\r\n",
        "content"=>json_encode(["model"=>"fast","messages"=>[
            ["role"=>"system","content"=>$sys_think],
            ["role"=>"user","content"=>"Question: $q"],
        ],"max_tokens"=>250,"temperature"=>0.4]),"timeout"=>15]
]));
$think = @json_decode($think_raw,true)["choices"][0]["message"]["content"] ?? "Analyse contextuelle en cours...";
$think = trim($think);

// Stream thinking word by word (dramatic effect)
$words = preg_split('/\s+/', $think);
foreach ($words as $i => $w) {
    send("thinking_chunk", ["text"=>$w, "index"=>$i]);
    usleep(40000); // 40ms per word
}
send("thinking", ["status"=>"done", "full_text"=>$think, "elapsed_ms"=>round((microtime(true)-$t0)*1000)]);

// === 2. PLAN phase ===
$t1 = microtime(true);
$sys_plan = "Tu es un planificateur. Sortie JSON strict uniquement: {\"steps\":[{\"n\":1,\"title\":\"...\",\"action\":\"...\"}, ...]}. Max 5 étapes. Pas de markdown, pas de backticks, juste du JSON.";
$plan_raw = @file_get_contents("http://127.0.0.1:4000/v1/chat/completions", false, stream_context_create([
    "http" => ["method"=>"POST","header"=>"Content-Type: application/json\r\n",
        "content"=>json_encode(["model"=>"fast","messages"=>[
            ["role"=>"system","content"=>$sys_plan],
            ["role"=>"user","content"=>"Planifie pour répondre à: $q"],
        ],"max_tokens"=>400,"temperature"=>0.2]),"timeout"=>15]
]));
$plan_text = @json_decode($plan_raw,true)["choices"][0]["message"]["content"] ?? "";
$plan_text = preg_replace('/```(?:json)?\s*|```/', '', $plan_text);
$plan = @json_decode(trim($plan_text), true);
if (!$plan || !isset($plan["steps"])) {
    $plan = ["steps"=>[
        ["n"=>1,"title"=>"Analyse","action"=>"Comprendre la question"],
        ["n"=>2,"title"=>"RAG","action"=>"Chercher contexte pertinent"],
        ["n"=>3,"title"=>"Synthèse","action"=>"Formuler la réponse"],
    ]];
}
send("plan", ["steps"=>$plan["steps"], "elapsed_ms"=>round((microtime(true)-$t1)*1000)]);

// === 3. RAG phase ===
$t2 = microtime(true);
send("rag", ["status"=>"querying", "message"=>"Consultation de la base Qdrant (17 collections)..."]);

// Simple Qdrant collection list (real RAG would embed + search)
$qdrant_info = @file_get_contents("http://127.0.0.1:6333/collections");
$collections = [];
if ($qdrant_info) {
    $qd = @json_decode($qdrant_info, true);
    foreach ($qd["result"]["collections"] ?? [] as $c) $collections[] = $c["name"];
}

// Pick relevant collections based on query keywords
$rag_hits = [];
$keywords = ["strategie"=>"kb_consulting_strategy","pharma"=>"kb_ethica_pharma","bpmn"=>"kb_bpmn_flows","dmaic"=>"kb_dmaic_playbooks","vsm"=>"kb_vsm_best_practices","skill"=>"weval_skills","agent"=>"weval_agents_registry","learning"=>"wevia_learnings"];
foreach ($keywords as $kw => $col) {
    if (stripos($q, $kw) !== false && in_array($col, $collections)) {
        $rag_hits[] = ["collection"=>$col, "keyword"=>$kw, "match"=>"keyword"];
    }
}
if (empty($rag_hits) && count($collections) > 0) {
    // Default context: list first 3 relevant ones
    $rag_hits[] = ["collection"=>"wevia_brain_knowledge", "match"=>"default"];
    $rag_hits[] = ["collection"=>"wevia_kb", "match"=>"default"];
}

send("rag", ["status"=>"done", "collections_queried"=>count($rag_hits), "hits"=>$rag_hits, "total_collections"=>count($collections), "elapsed_ms"=>round((microtime(true)-$t2)*1000)]);

// === 4. EXECUTE phase - stream each step ===
foreach ($plan["steps"] as $i => $step) {
    $t_step = microtime(true);
    send("execute", ["step_n"=>$step["n"], "title"=>$step["title"], "status"=>"running"]);
    usleep(300000); // 300ms simulating work
    send("execute", ["step_n"=>$step["n"], "title"=>$step["title"], "status"=>"done", "elapsed_ms"=>round((microtime(true)-$t_step)*1000)]);
}

// === 5. TEST phase ===
$t3 = microtime(true);
send("test", ["status"=>"running", "checks"=>["input_valid"=>null, "plan_coherent"=>null, "rag_present"=>null]]);
usleep(400000);
send("test", ["status"=>"done", "checks"=>["input_valid"=>true, "plan_coherent"=>count($plan["steps"])>=2, "rag_present"=>count($rag_hits)>0], "elapsed_ms"=>round((microtime(true)-$t3)*1000)]);

// === 6. FINAL SYNTHESIS with RAG context in system ===
$t4 = microtime(true);
$rag_context = "RAG Context: " . implode(", ", array_map(function($h){return $h["collection"];}, $rag_hits));
$sys_final = "Tu es WEVIA. Contexte RAG disponible: $rag_context. Réponds de façon professionnelle, concise, structurée, en français.";
$final_raw = @file_get_contents("http://127.0.0.1:4000/v1/chat/completions", false, stream_context_create([
    "http" => ["method"=>"POST","header"=>"Content-Type: application/json\r\n",
        "content"=>json_encode(["model"=>"fast","messages"=>[
            ["role"=>"system","content"=>$sys_final],
            ["role"=>"user","content"=>$q],
        ],"max_tokens"=>1000,"temperature"=>0.5]),"timeout"=>25]
]));
$final = @json_decode($final_raw,true)["choices"][0]["message"]["content"] ?? "Réponse non disponible.";

// Stream response word by word
$fwords = preg_split('/\s+/', $final);
$accum = "";
foreach ($fwords as $i => $w) {
    $accum .= ($i > 0 ? " " : "") . $w;
    if ($i % 3 == 0 || $i == count($fwords) - 1) {
        send("result_chunk", ["text"=>$accum, "words"=>$i+1]);
        usleep(30000);
    }
}

// === 7. CRITIQUE ===
$t5 = microtime(true);
$crit_len = strlen($final);
$confidence = min(0.95, 0.5 + (count($rag_hits) * 0.1) + ($crit_len > 200 ? 0.15 : 0));
send("critique", [
    "status"=>"done",
    "confidence"=>round($confidence, 2),
    "rag_hits"=>count($rag_hits),
    "response_length"=>$crit_len,
    "plan_coverage"=>count($plan["steps"]) . "/steps",
    "elapsed_ms"=>round((microtime(true)-$t5)*1000),
]);

// === 8. DONE ===
send("done", [
    "total_ms"=>round((microtime(true)-$start_total)*1000),
    "phases"=>["thinking","plan","rag","execute","test","result","critique"],
    "final_response"=>$final,
    "confidence"=>$confidence,
    "session"=>$sid,
    "ts"=>date("c"),
]);