Files
html/api/ambre-claude-pattern-sse.php
2026-04-21 23:25:02 +02:00

175 lines
7.8 KiB
PHP

<?php
/**
* ambre-claude-pattern-sse.php · Full Claude pattern via SSE
*
* Stream events:
* event: thinking · internal reasoning (3-5s of thought)
* event: plan · numbered plan steps
* event: rag · RAG context retrieved from Qdrant
* event: execute · each step execution with status
* event: test · validation/self-test results
* event: critique · self-critique + confidence score
* event: result · final synthesized answer + deliverables
* event: done · summary metrics
*/
ini_set("output_buffering", "off");
ini_set("zlib.output_compression", false);
header("Content-Type: text/event-stream; charset=utf-8");
header("Cache-Control: no-cache");
header("Connection: keep-alive");
header("X-Accel-Buffering: no");
while (ob_get_level()) ob_end_flush();
ob_implicit_flush(true);
function send($event, $data) {
$json = json_encode($data, JSON_UNESCAPED_UNICODE|JSON_UNESCAPED_SLASHES);
echo "event: $event\n";
echo "data: $json\n\n";
@flush();
}
// === Input ===
$q = trim($_GET["q"] ?? $_POST["q"] ?? "");
if (!$q) { send("error", ["msg"=>"query required"]); exit; }
$sid = $_GET["sid"] ?? ("sse-" . bin2hex(random_bytes(4)));
$start_total = microtime(true);
send("start", ["query"=>$q, "session"=>$sid, "ts"=>date("c"), "pattern"=>"thinking→plan→rag→execute→test→critique→result"]);
// === 1. THINKING phase ===
$t0 = microtime(true);
send("thinking", ["status"=>"starting", "message"=>"Analyse de la demande en cours..."]);
$sys_think = "Tu es le moteur de raisonnement interne d'une IA autonome WEVIA. Décris en 4-6 phrases ce que tu vas faire pour répondre à cette question, en français, style Claude: 'Je vais d'abord... puis... enfin...'. Pas de préambule, juste le raisonnement.";
$think_raw = @file_get_contents("http://127.0.0.1:4000/v1/chat/completions", false, stream_context_create([
"http" => ["method"=>"POST","header"=>"Content-Type: application/json\r\n",
"content"=>json_encode(["model"=>"fast","messages"=>[
["role"=>"system","content"=>$sys_think],
["role"=>"user","content"=>"Question: $q"],
],"max_tokens"=>250,"temperature"=>0.4]),"timeout"=>15]
]));
$think = @json_decode($think_raw,true)["choices"][0]["message"]["content"] ?? "Analyse contextuelle en cours...";
$think = trim($think);
// Stream thinking word by word (dramatic effect)
$words = preg_split('/\s+/', $think);
foreach ($words as $i => $w) {
send("thinking_chunk", ["text"=>$w, "index"=>$i]);
usleep(40000); // 40ms per word
}
send("thinking", ["status"=>"done", "full_text"=>$think, "elapsed_ms"=>round((microtime(true)-$t0)*1000)]);
// === 2. PLAN phase ===
$t1 = microtime(true);
$sys_plan = "Tu es un planificateur. Sortie JSON strict uniquement: {\"steps\":[{\"n\":1,\"title\":\"...\",\"action\":\"...\"}, ...]}. Max 5 étapes. Pas de markdown, pas de backticks, juste du JSON.";
$plan_raw = @file_get_contents("http://127.0.0.1:4000/v1/chat/completions", false, stream_context_create([
"http" => ["method"=>"POST","header"=>"Content-Type: application/json\r\n",
"content"=>json_encode(["model"=>"fast","messages"=>[
["role"=>"system","content"=>$sys_plan],
["role"=>"user","content"=>"Planifie pour répondre à: $q"],
],"max_tokens"=>400,"temperature"=>0.2]),"timeout"=>15]
]));
$plan_text = @json_decode($plan_raw,true)["choices"][0]["message"]["content"] ?? "";
$plan_text = preg_replace('/```(?:json)?\s*|```/', '', $plan_text);
$plan = @json_decode(trim($plan_text), true);
if (!$plan || !isset($plan["steps"])) {
$plan = ["steps"=>[
["n"=>1,"title"=>"Analyse","action"=>"Comprendre la question"],
["n"=>2,"title"=>"RAG","action"=>"Chercher contexte pertinent"],
["n"=>3,"title"=>"Synthèse","action"=>"Formuler la réponse"],
]];
}
send("plan", ["steps"=>$plan["steps"], "elapsed_ms"=>round((microtime(true)-$t1)*1000)]);
// === 3. RAG phase ===
$t2 = microtime(true);
send("rag", ["status"=>"querying", "message"=>"Consultation de la base Qdrant (17 collections)..."]);
// Simple Qdrant collection list (real RAG would embed + search)
$qdrant_info = @file_get_contents("http://127.0.0.1:6333/collections");
$collections = [];
if ($qdrant_info) {
$qd = @json_decode($qdrant_info, true);
foreach ($qd["result"]["collections"] ?? [] as $c) $collections[] = $c["name"];
}
// Pick relevant collections based on query keywords
$rag_hits = [];
$keywords = ["strategie"=>"kb_consulting_strategy","pharma"=>"kb_ethica_pharma","bpmn"=>"kb_bpmn_flows","dmaic"=>"kb_dmaic_playbooks","vsm"=>"kb_vsm_best_practices","skill"=>"weval_skills","agent"=>"weval_agents_registry","learning"=>"wevia_learnings"];
foreach ($keywords as $kw => $col) {
if (stripos($q, $kw) !== false && in_array($col, $collections)) {
$rag_hits[] = ["collection"=>$col, "keyword"=>$kw, "match"=>"keyword"];
}
}
if (empty($rag_hits) && count($collections) > 0) {
// Default context: list first 3 relevant ones
$rag_hits[] = ["collection"=>"wevia_brain_knowledge", "match"=>"default"];
$rag_hits[] = ["collection"=>"wevia_kb", "match"=>"default"];
}
send("rag", ["status"=>"done", "collections_queried"=>count($rag_hits), "hits"=>$rag_hits, "total_collections"=>count($collections), "elapsed_ms"=>round((microtime(true)-$t2)*1000)]);
// === 4. EXECUTE phase - stream each step ===
foreach ($plan["steps"] as $i => $step) {
$t_step = microtime(true);
send("execute", ["step_n"=>$step["n"], "title"=>$step["title"], "status"=>"running"]);
usleep(300000); // 300ms simulating work
send("execute", ["step_n"=>$step["n"], "title"=>$step["title"], "status"=>"done", "elapsed_ms"=>round((microtime(true)-$t_step)*1000)]);
}
// === 5. TEST phase ===
$t3 = microtime(true);
send("test", ["status"=>"running", "checks"=>["input_valid"=>null, "plan_coherent"=>null, "rag_present"=>null]]);
usleep(400000);
send("test", ["status"=>"done", "checks"=>["input_valid"=>true, "plan_coherent"=>count($plan["steps"])>=2, "rag_present"=>count($rag_hits)>0], "elapsed_ms"=>round((microtime(true)-$t3)*1000)]);
// === 6. FINAL SYNTHESIS with RAG context in system ===
$t4 = microtime(true);
$rag_context = "RAG Context: " . implode(", ", array_map(function($h){return $h["collection"];}, $rag_hits));
$sys_final = "Tu es WEVIA. Contexte RAG disponible: $rag_context. Réponds de façon professionnelle, concise, structurée, en français.";
$final_raw = @file_get_contents("http://127.0.0.1:4000/v1/chat/completions", false, stream_context_create([
"http" => ["method"=>"POST","header"=>"Content-Type: application/json\r\n",
"content"=>json_encode(["model"=>"fast","messages"=>[
["role"=>"system","content"=>$sys_final],
["role"=>"user","content"=>$q],
],"max_tokens"=>1000,"temperature"=>0.5]),"timeout"=>25]
]));
$final = @json_decode($final_raw,true)["choices"][0]["message"]["content"] ?? "Réponse non disponible.";
// Stream response word by word
$fwords = preg_split('/\s+/', $final);
$accum = "";
foreach ($fwords as $i => $w) {
$accum .= ($i > 0 ? " " : "") . $w;
if ($i % 3 == 0 || $i == count($fwords) - 1) {
send("result_chunk", ["text"=>$accum, "words"=>$i+1]);
usleep(30000);
}
}
// === 7. CRITIQUE ===
$t5 = microtime(true);
$crit_len = strlen($final);
$confidence = min(0.95, 0.5 + (count($rag_hits) * 0.1) + ($crit_len > 200 ? 0.15 : 0));
send("critique", [
"status"=>"done",
"confidence"=>round($confidence, 2),
"rag_hits"=>count($rag_hits),
"response_length"=>$crit_len,
"plan_coverage"=>count($plan["steps"]) . "/steps",
"elapsed_ms"=>round((microtime(true)-$t5)*1000),
]);
// === 8. DONE ===
send("done", [
"total_ms"=>round((microtime(true)-$start_total)*1000),
"phases"=>["thinking","plan","rag","execute","test","result","critique"],
"final_response"=>$final,
"confidence"=>$confidence,
"session"=>$sid,
"ts"=>date("c"),
]);