html/api/ambre-internal-chat-api.php

<?php
/**
 * ambre-internal-chat-api.php · wave-259 · Unified chat API for INTERNAL chatbots
 *
 * Features:
 * - Persistent memory (AmbreInternalMemory · /opt/wevads/internal-memory/)
 * - Cross-chat learning (shared KB: all chats contribute to common learning pool)
 * - Zero CF cache (Cache-Control headers + CF-Cache-Status: BYPASS)
 * - LLM semaphore-protected
 * - Auto-identity extraction
 * - Multi-agent dispatcher if complex query
 *
 * POST { chat_id, message, enable_multiagent: true/false }
 */

// FORCE NO CF CACHE
header("Cache-Control: no-store, no-cache, must-revalidate, max-age=0");
header("CDN-Cache-Control: no-store");
header("Cloudflare-CDN-Cache-Control: no-store");
header("Pragma: no-cache");
header("Expires: 0");
header("Access-Control-Allow-Origin: *");
header("Content-Type: application/json; charset=utf-8");

require_once __DIR__ . "/ambre-internal-memory.php";
@require_once __DIR__ . "/ambre-llm-semaphore.php";

$t0 = microtime(true);
$raw = file_get_contents("php://input");
$in = json_decode($raw, true) ?: $_POST;

$chat_id = trim($in["chat_id"] ?? "");
$msg = trim($in["message"] ?? "");
$enable_ma = !empty($in["enable_multiagent"]);

if (!$msg) { echo json_encode(["error"=>"message required"]); exit; }
if (!$chat_id) $chat_id = "internal-" . substr(md5(($_SERVER["REMOTE_ADDR"] ?? "x") . date("Y-m-d")), 0, 10);

// Load persistent memory (last 50 turns for context)
$history = AmbreInternalMemory::context_messages($chat_id, 50);

// Cross-chat learning: load shared insights pool
$shared_kb_file = "/opt/wevads/internal-memory/_shared-learning.json";
$shared_kb = @json_decode(@file_get_contents($shared_kb_file), true) ?: [];

// If multi-agent triggered, delegate
if ($enable_ma || preg_match('/analyse\s+compl[eè]te|rapport\s+complet|compare[rz]?\s+.{3,}\s+(?:avec|vs|contre|et)|multi[- ]?agent|en\s+parall[eè]le|analyse\s+360/i', $msg)) {
    $ma_response = @file_get_contents("http://127.0.0.1/api/ambre-multiagent-parallel.php", false, stream_context_create([
        "http" => [
            "method" => "POST",
            "header" => "Content-Type: application/json\r\n",
            "content" => json_encode(["goal" => $msg, "max_agents" => 6]),
            "timeout" => 60,
        ],
    ]));
    $ma_data = @json_decode($ma_response, true);
    if ($ma_data && !empty($ma_data["ok"])) {
        // Append to memory
        AmbreInternalMemory::append($chat_id, "user", $msg);
        AmbreInternalMemory::append($chat_id, "assistant", $ma_data["reconciled"], ["mode"=>"multiagent", "agents"=>$ma_data["agents_count"]]);

        // Extract learning for cross-chat KB
        if (isset($ma_data["plan"]["objective"])) {
            $shared_kb[] = [
                "ts" => time(),
                "chat_id" => $chat_id,
                "topic" => $ma_data["plan"]["objective"],
                "synthesis_preview" => substr($ma_data["reconciled"], 0, 300),
            ];
            if (count($shared_kb) > 500) $shared_kb = array_slice($shared_kb, -500);
            @file_put_contents($shared_kb_file, json_encode($shared_kb, JSON_UNESCAPED_UNICODE));
        }

        echo json_encode([
            "ok" => true,
            "mode" => "multiagent",
            "response" => $ma_data["reconciled"],
            "plan" => $ma_data["plan"],
            "agents" => $ma_data["results"],
            "total_ms" => round((microtime(true)-$t0)*1000),
            "memory_turns" => count(AmbreInternalMemory::load($chat_id)),
            "shared_kb_size" => count($shared_kb),
            "cache_bypass" => true,
        ]);
        exit;
    }
}

// Standard path: LLM with memory + cross-chat hints
$sys_parts = [
    "Tu es un agent WEVAL Consulting, spécialisé et informé.",
    "Tu mémorises toute la conversation (mémoire persistante illimitée).",
    "Tu adaptes ton ton au contexte.",
    "Si la question est complexe, propose un multi-agent pour détailler.",
    "Réponds en français clair et actionnable.",
];

// Inject cross-chat hints (last 3 topics discussed on this server)
if (!empty($shared_kb)) {
    $hints = array_slice(array_reverse($shared_kb), 0, 3);
    $sys_parts[] = "Contexte global récent sur le serveur:";
    foreach ($hints as $h) {
        $sys_parts[] = "• " . substr($h["topic"] ?? "", 0, 100);
    }
}

$messages = [["role"=>"system","content"=>implode("\n", $sys_parts)]];
foreach ($history as $h) {
    if ($h["role"] !== "system") $messages[] = $h;
}
$messages[] = ["role"=>"user","content"=>$msg];

// LLM call
$sem_id = class_exists("AmbreLLMSemaphore") ? @AmbreLLMSemaphore::acquire() : null;
$llm_t0 = microtime(true);
$llm_raw = @file_get_contents("http://127.0.0.1:4000/v1/chat/completions", false, stream_context_create([
    "http" => [
        "method" => "POST",
        "header" => "Content-Type: application/json\r\n",
        "content" => json_encode(["model"=>"fast", "messages"=>$messages, "max_tokens"=>800]),
        "timeout" => 30,
    ],
]));
if ($sem_id && class_exists("AmbreLLMSemaphore")) @AmbreLLMSemaphore::release($sem_id);

$llm_data = @json_decode($llm_raw, true);
$reply = $llm_data["choices"][0]["message"]["content"] ?? "Erreur LLM";
$llm_ms = round((microtime(true)-$llm_t0)*1000);

// Persist
AmbreInternalMemory::append($chat_id, "user", $msg);
AmbreInternalMemory::append($chat_id, "assistant", $reply, ["llm_ms"=>$llm_ms]);

echo json_encode([
    "ok" => true,
    "mode" => "standard",
    "response" => $reply,
    "total_ms" => round((microtime(true)-$t0)*1000),
    "llm_ms" => $llm_ms,
    "memory_turns" => count(AmbreInternalMemory::load($chat_id)),
    "shared_kb_size" => count($shared_kb),
    "cache_bypass" => true,
    "chat_id" => $chat_id,
]);