html/api/wevia-claude-code-patterns.php

<?php
/**
 * WEVIA Claude Code Patterns v1.0 — 31 mars 2026
 *
 * Architectural patterns extracted from Claude Code source leak analysis:
 * 1. Token Budget Continuation — auto-resume on truncated outputs
 * 2. Three-Layer Context Compression — snip/compact/collapse
 * 3. Provider Effort Levels — intent complexity → provider tier mapping
 * 4. Deferred Tool Discovery — core vs specialized tool split (-85% tokens)
 * 5. Tool Alphabetical Sorting — prompt cache stability
 * 6. Memory Consolidation Framework — Dream-like session memory
 * 7. Enhanced Circuit Breaker — per-provider health tracking with decay
 * 8. Concurrency Safety Tiers — read-only vs write tool classification
 *
 * Usage: require_once __DIR__ . '/wevia-claude-code-patterns.php';
 * All functions are prefixed with wcp_ (wevia-claude-patterns)
 */

define('WCP_VERSION', '1.0.0');
define('WCP_MAX_MEMORY_LINES', 200); // Dream consolidation cap (from Claude Code)

// ═══════════════════════════════════════════════════════════════
// 1. TOKEN BUDGET CONTINUATION
// When a provider truncates output mid-task, inject invisible
// meta-message and retry. Up to 3 recovery attempts.
// ═══════════════════════════════════════════════════════════════

function wcp_detect_truncation($response, $finish_reason = null) {
    // Explicit truncation signal from API
    if ($finish_reason === 'length') return true;

    // Heuristic: response ends mid-sentence (no terminal punctuation)
    $trimmed = rtrim($response);
    if (empty($trimmed)) return false;

    $lastChar = mb_substr($trimmed, -1);
    $terminals = ['.', '!', '?', ':', ';', '```', '>', ')', ']', '}', '"', "'", "\n"];

    // If response is long enough and doesn't end with terminal punctuation
    if (mb_strlen($trimmed) > 200 && !in_array($lastChar, $terminals)) {
        // Additional check: ends mid-code-block?
        $openBlocks = substr_count($trimmed, '```');
        if ($openBlocks % 2 !== 0) return true; // Unclosed code block
        return true;
    }

    return false;
}

function wcp_continuation_message($partial_response) {
    // Invisible meta-message that instructs the model to resume
    // Pattern from Claude Code: "Resume directly — no apology, no recap"
    return [
        "role" => "user",
        "content" => "[SYSTEM: Your previous response was truncated at the token limit. Continue EXACTLY where you left off. Do NOT apologize, do NOT recap what you already said, do NOT restart. Resume mid-sentence if needed. Here is your last 200 chars for context: \"" . mb_substr($partial_response, -200) . "\"]"
    ];
}

function wcp_auto_continue($callProvider, $messages, $sys, $provider, $maxAttempts = 3) {
    $fullResponse = '';
    $attempts = 0;

    while ($attempts < $maxAttempts) {
        $result = $callProvider($messages, $sys, $provider);
        if (!$result || empty($result['response'])) break;

        $fullResponse .= $result['response'];
        $attempts++;

        // Check if truncated
        $finishReason = $result['finish_reason'] ?? null;
        if (!wcp_detect_truncation($result['response'], $finishReason)) {
            break; // Complete response
        }

        // Inject continuation message
        $messages[] = ["role" => "assistant", "content" => $result['response']];
        $messages[] = wcp_continuation_message($result['response']);

        error_log("WCP_CONTINUE: attempt=$attempts provider=$provider partial_len=" . mb_strlen($result['response']));
    }

    return [
        'response' => $fullResponse,
        'attempts' => $attempts,
        'continued' => $attempts > 1
    ];
}


// ═══════════════════════════════════════════════════════════════
// 2. THREE-LAYER CONTEXT COMPRESSION
// Layer 1: snipCompact — trim low-value segments
// Layer 2: autoCompact — summarize conversation history
// Layer 3: contextCollapse — restructure for provider limits
// ═══════════════════════════════════════════════════════════════

function wcp_snip_compact($history, $maxTokenEstimate = 8000) {
    /**
     * Layer 1: Remove low-value exchanges from history
     * - Greetings and acknowledgments
     * - Repeated questions
     * - Error messages that were resolved
     */
    if (empty($history)) return $history;

    $snipped = [];
    $totalEstTokens = 0;

    // Process from newest to oldest (keep recent context)
    $reversed = array_reverse($history);

    foreach ($reversed as $msg) {
        $content = $msg['content'] ?? '';
        $tokenEst = (int)(mb_strlen($content) / 3.5); // rough token estimate

        // Skip low-value messages
        if (wcp_is_low_value($content, $msg['role'] ?? 'user')) {
            continue;
        }

        $totalEstTokens += $tokenEst;
        if ($totalEstTokens > $maxTokenEstimate) {
            // Summarize remaining old messages
            $remaining = array_slice($reversed, count($snipped));
            if (!empty($remaining)) {
                $summary = wcp_summarize_block($remaining);
                if ($summary) {
                    $snipped[] = ["role" => "system", "content" => "[Résumé conversation précédente: $summary]"];
                }
            }
            break;
        }

        $snipped[] = $msg;
    }

    return array_reverse($snipped);
}

function wcp_is_low_value($content, $role) {
    $l = mb_strtolower(trim($content));
    $len = mb_strlen($l);

    // Very short acknowledgments
    if ($len < 15 && preg_match('/^(ok|oui|non|merci|d.accord|parfait|super|cool|top|nice|yes|no|thanks|got it|compris|entendu|noté)$/i', $l)) {
        return true;
    }

    // Pure greetings without substance
    if ($len < 30 && preg_match('/^(bonjour|bonsoir|salut|hello|hi|hey|salam|coucou)\s*(claude|wevia|!|\.|\?)?$/i', $l)) {
        return true;
    }

    return false;
}

function wcp_summarize_block($messages) {
    /**
     * Generate a compact summary of a conversation block.
     * Uses extractive summarization (key phrases) not LLM calls.
     */
    $topics = [];
    foreach ($messages as $msg) {
        $content = $msg['content'] ?? '';
        // Extract key entities/topics via simple NLP
        if (preg_match_all('/(?:SAP|ERP|WEVIA|Arsenal|Ethica|API|server|docker|nginx|postgresql|kubernetes|python|php|react|audit|compliance|RGPD|cybersécurité|pharma|marketing|CRM|pipeline)/i', $content, $matches)) {
            $topics = array_merge($topics, array_map('strtolower', $matches[0]));
        }
    }

    $topicCounts = array_count_values($topics);
    arsort($topicCounts);
    $topTopics = array_slice(array_keys($topicCounts), 0, 5);

    if (empty($topTopics)) return null;

    $msgCount = count($messages);
    return "Discussion de $msgCount échanges couvrant: " . implode(', ', $topTopics);
}

function wcp_auto_compact($history, $targetTokens = 4000) {
    /**
     * Layer 2: Aggressive summarization when context is critical.
     * Keeps last 4 exchanges intact, summarizes everything else.
     */
    if (count($history) <= 8) return $history; // 4 exchanges = 8 messages

    $recent = array_slice($history, -8);
    $old = array_slice($history, 0, -8);

    $summary = wcp_summarize_block($old);
    $compacted = [];

    if ($summary) {
        $compacted[] = ["role" => "system", "content" => "[Contexte précédent: $summary]"];
    }

    return array_merge($compacted, $recent);
}

function wcp_context_collapse($sys, $kbContext, $history, $providerMaxTokens = 32000) {
    /**
     * Layer 3: Restructure entire context for provider-specific limits.
     * Different providers have different context windows:
     * - Cerebras qwen-3-235b: 8K-32K
     * - Groq llama-3.3-70b: 128K
     * - Ollama local qwen3:4b: 8K
     * - Gemini 2.0 flash: 1M
     *
     * Collapse strategy: sys + kb + history must fit in providerMaxTokens
     */
    $sysTokens = (int)(mb_strlen($sys) / 3.5);
    $kbTokens = (int)(mb_strlen($kbContext) / 3.5);
    $histTokens = 0;
    foreach ($history as $h) {
        $histTokens += (int)(mb_strlen($h['content'] ?? '') / 3.5);
    }

    $totalTokens = $sysTokens + $kbTokens + $histTokens;

    if ($totalTokens <= $providerMaxTokens * 0.85) {
        return ['sys' => $sys, 'kb' => $kbContext, 'history' => $history]; // Fits fine
    }

    error_log("WCP_COLLAPSE: total={$totalTokens} limit={$providerMaxTokens} — compressing");

    // Step 1: Trim KB context (least important)
    if ($kbTokens > $providerMaxTokens * 0.2) {
        $maxKbChars = (int)($providerMaxTokens * 0.2 * 3.5);
        $kbContext = mb_substr($kbContext, 0, $maxKbChars) . "\n[...KB tronquée pour limites contexte...]";
    }

    // Step 2: Compact history
    $history = wcp_snip_compact($history, (int)($providerMaxTokens * 0.4));

    // Step 3: If still too large, aggressive compact
    $newTotal = (int)(mb_strlen($sys) / 3.5) + (int)(mb_strlen($kbContext) / 3.5);
    foreach ($history as $h) $newTotal += (int)(mb_strlen($h['content'] ?? '') / 3.5);

    if ($newTotal > $providerMaxTokens * 0.85) {
        $history = wcp_auto_compact($history, (int)($providerMaxTokens * 0.3));
    }

    // Step 4: UNIVERSAL safe cap — brain's fallback chain may reach smaller providers
    // Nuclear trim if sys prompt > 6K chars (safe for ALL providers including Cerebras 8K)
    if (mb_strlen($sys) > 6000) {
        $sys = mb_substr($sys, 0, 4000) . "\n[...expertises condensées pour optimisation contexte...]\n" . mb_substr($sys, -2000);
        error_log("WCP_COLLAPSE: UNIVERSAL cap sys=" . mb_strlen($sys) . " (was >" . 6000 . ")");
    }
    if ($newTotal > $providerMaxTokens * 0.9) {
        // Keep core identity + security, trim expertise details
        if (preg_match('/EXPERTISE [A-Z]+:/s', $sys)) {
            // Remove all EXPERTISE blocks but keep CONTACT and SECURITE INTERNE
            $sys = preg_replace('/EXPERTISE (?:SECURITE|PHARMA|ERP|PDF|SCHEMA|API|FRONTEND|DEVOPS|RGPD|IA ETHICS)[^E]+((?=EXPERTISE)|(?=CONTACT)|(?=SECURITE INTERNE)|(?=QUALITE))/s', "[Expertise condensée]\n", $sys);
            error_log("WCP_COLLAPSE: Trimmed EXPERTISE blocks");
        }
    }

    return ['sys' => $sys, 'kb' => $kbContext, 'history' => $history];
}


// ═══════════════════════════════════════════════════════════════
// 3. PROVIDER EFFORT LEVELS
// Map intent complexity to provider tiers.
// Pattern from Claude Code: low/medium/high effort → model selection
// ═══════════════════════════════════════════════════════════════

/**
 * Provider capability tiers for WEVIA's multi-provider fallback
 */
define('WCP_TIER_FAST', 'fast');       // Simple/greetings: cheapest, fastest provider
define('WCP_TIER_BALANCED', 'balanced'); // General: good balance of speed + quality
define('WCP_TIER_DEEP', 'deep');       // Complex reasoning: best available model
define('WCP_TIER_SPECIALIST', 'specialist'); // Domain-specific: vision, code, etc.

function wcp_provider_tiers() {
    return [
        WCP_TIER_FAST => ['cerebras', 'groq', 'sambanova', 'ollama_s95'],
        WCP_TIER_BALANCED => ['groq', 'alibaba', 'sambanova', 'mistral', 'deepseek'], // cerebras=8K too small for full sys
        WCP_TIER_DEEP => ['groq_deep', 'deepseek', 'gemini', 'cohere', 'alibaba'], // no cerebras=8K
        WCP_TIER_SPECIALIST => [
            'vision' => ['gemini', 'groq_vision'],
            'code' => ['cerebras', 'deepseek'],
            'rag' => ['cohere', 'groq_deep'],
            'multilingual' => ['mistral', 'alibaba', 'gemini'],
        ],
    ];
}

function wcp_classify_effort($intent, $msg, $mode = 'balanced') {
    /**
     * Determine effort level based on intent + message complexity.
     * Returns: [tier, priority_providers[], context_budget]
     */
    $msgLen = mb_strlen($msg);
    $tiers = wcp_provider_tiers();

    // Fast tier: greetings, short messages, acknowledgments
    if ($intent === 'greeting' || $mode === 'fast' || $msgLen < 20) {
        return [
            'tier' => WCP_TIER_FAST,
            'providers' => $tiers[WCP_TIER_FAST],
            'context_budget' => 4000,
            'max_tokens' => 512,
        ];
    }

    // Specialist tier: domain-specific intents
    if ($intent === 'image') {
        return ['tier' => WCP_TIER_SPECIALIST, 'providers' => $tiers[WCP_TIER_SPECIALIST]['vision'], 'context_budget' => 8000, 'max_tokens' => 2048];
    }
    if ($intent === 'code' || $intent === 'schema') {
        return ['tier' => WCP_TIER_SPECIALIST, 'providers' => $tiers[WCP_TIER_SPECIALIST]['code'], 'context_budget' => 16000, 'max_tokens' => 4096];
    }

    // Deep tier: complex reasoning, analysis, compliance, medical
    if (in_array($intent, ['analysis', 'compliance', 'medical', 'consulting']) || $mode === 'verified') {
        return [
            'tier' => WCP_TIER_DEEP,
            'providers' => $tiers[WCP_TIER_DEEP],
            'context_budget' => 24000,
            'max_tokens' => 4096,
        ];
    }

    // Long messages suggest complex queries
    if ($msgLen > 300) {
        return [
            'tier' => WCP_TIER_DEEP,
            'providers' => $tiers[WCP_TIER_DEEP],
            'context_budget' => 16000,
            'max_tokens' => 4096,
        ];
    }

    // Balanced tier: everything else
    return [
        'tier' => WCP_TIER_BALANCED,
        'providers' => $tiers[WCP_TIER_BALANCED],
        'context_budget' => 12000,
        'max_tokens' => 2048,
    ];
}

function wcp_smart_route_v3($msg, $mode, $intent, $cbState = []) {
    /**
     * Enhanced smartRoute using effort levels + circuit breaker state.
     * Falls through providers in tier order, skipping broken ones.
     */
    $effort = wcp_classify_effort($intent, $msg, $mode);
    $providers = $effort['providers'];

    // Filter out circuit-broken providers
    foreach ($providers as $i => $p) {
        if (isset($cbState[$p]) && $cbState[$p]['blocked']) {
            unset($providers[$i]);
        }
    }
    $providers = array_values($providers);

    if (empty($providers)) {
        // All providers in tier are broken — fallback to ollama
        error_log("WCP_ROUTE: all tier={$effort['tier']} providers broken, fallback ollama");
        return ['provider' => 'ollama_s95', 'effort' => $effort];
    }

    error_log("WCP_ROUTE: tier={$effort['tier']} provider={$providers[0]} budget={$effort['context_budget']}");
    return ['provider' => $providers[0], 'effort' => $effort];
}


// ═══════════════════════════════════════════════════════════════
// 4. DEFERRED TOOL DISCOVERY
// Only surface core tools in the base prompt.
// Load specialized tools on-demand when the model requests them.
// This reduces prompt token usage by ~85%.
// ═══════════════════════════════════════════════════════════════

function wcp_tool_registry() {
    /**
     * Complete tool catalog with metadata.
     * core=true means always present in prompt.
     * core=false means loaded via deferred discovery.
     */
    return [
        // ═══ CORE TOOLS (always in prompt) ═══
        'web_search' => ['core' => true, 'safety' => 'read', 'desc' => 'Recherche web en temps réel'],
        'kb_search' => ['core' => true, 'safety' => 'read', 'desc' => 'Recherche base de connaissances WEVIA'],
        'mermaid' => ['core' => true, 'safety' => 'read', 'desc' => 'Génération de schémas Mermaid'],
        'pdf_generate' => ['core' => true, 'safety' => 'write', 'desc' => 'Génération de documents PDF'],
        'code_execute' => ['core' => true, 'safety' => 'write', 'desc' => 'Exécution de code Python/PHP'],
        'image_generate' => ['core' => true, 'safety' => 'write', 'desc' => 'Génération d\'images IA'],
        'svg_logo' => ['core' => true, 'safety' => 'write', 'desc' => 'Génération de logos SVG'],
        'tool_search' => ['core' => true, 'safety' => 'read', 'desc' => 'Découvrir des outils spécialisés disponibles'],

        // ═══ SPECIALIZED TOOLS (loaded on demand) ═══
        'nuclei_scan' => ['core' => false, 'safety' => 'write', 'category' => 'security', 'desc' => 'Scan de vulnérabilités Nuclei'],
        'kilo_cli' => ['core' => false, 'safety' => 'write', 'category' => 'devops', 'desc' => 'Kilo CLI pour gestion serveurs'],
        'toolFK' => ['core' => false, 'safety' => 'read', 'category' => 'data', 'desc' => '25 outils data: JSON, CSV, XML, regex, hash, etc.'],
        'ethica_hcp' => ['core' => false, 'safety' => 'read', 'category' => 'pharma', 'desc' => 'Recherche HCP base Ethica'],
        'arsenal_query' => ['core' => false, 'safety' => 'write', 'category' => 'marketing', 'desc' => 'Requêtes Arsenal WEVADS'],
        'domain_check' => ['core' => false, 'safety' => 'read', 'category' => 'marketing', 'desc' => 'Vérification domaine email (DKIM/SPF/DMARC)'],
        'cron_manager' => ['core' => false, 'safety' => 'write', 'category' => 'devops', 'desc' => 'Gestion des crons serveur'],
        'db_query' => ['core' => false, 'safety' => 'read', 'category' => 'data', 'desc' => 'Requêtes PostgreSQL en lecture'],
        'git_status' => ['core' => false, 'safety' => 'read', 'category' => 'devops', 'desc' => 'Statut des repos Git'],
        'docker_status' => ['core' => false, 'safety' => 'read', 'category' => 'devops', 'desc' => 'Statut des conteneurs Docker'],
        'uptime_check' => ['core' => false, 'safety' => 'read', 'category' => 'monitoring', 'desc' => 'Statut Uptime Kuma'],
        'plausible_stats' => ['core' => false, 'safety' => 'read', 'category' => 'analytics', 'desc' => 'Statistiques Plausible Analytics'],
        'searxng' => ['core' => false, 'safety' => 'read', 'category' => 'search', 'desc' => 'Recherche SearXNG souveraine'],
        'n8n_workflow' => ['core' => false, 'safety' => 'write', 'category' => 'automation', 'desc' => 'Déclenchement workflows n8n'],
        'warmup_control' => ['core' => false, 'safety' => 'write', 'category' => 'marketing', 'desc' => 'Contrôle warmup emails'],
        'contact_import' => ['core' => false, 'safety' => 'write', 'category' => 'marketing', 'desc' => 'Import contacts CSV'],
        'ai_template' => ['core' => false, 'safety' => 'write', 'category' => 'marketing', 'desc' => 'Génération templates email IA'],
        'rlhf_feedback' => ['core' => false, 'safety' => 'write', 'category' => 'system', 'desc' => 'Feedback RLHF utilisateur'],
    ];
}

function wcp_get_core_tools() {
    /**
     * Return only core tools for the base prompt.
     * Sorted alphabetically for prompt cache stability.
     */
    $registry = wcp_tool_registry();
    $core = array_filter($registry, fn($t) => $t['core'] === true);
    ksort($core); // Alphabetical sort for cache stability
    return $core;
}

function wcp_discover_tools($query) {
    /**
     * Deferred tool discovery: search specialized tools by keyword.
     * Called when the model uses the tool_search meta-tool.
     */
    $registry = wcp_tool_registry();
    $specialized = array_filter($registry, fn($t) => $t['core'] === false);

    $query = mb_strtolower($query);
    $results = [];

    foreach ($specialized as $name => $tool) {
        $searchable = mb_strtolower($name . ' ' . $tool['desc'] . ' ' . ($tool['category'] ?? ''));
        if (mb_strpos($searchable, $query) !== false) {
            $results[$name] = $tool;
        }
    }

    // Also match by category
    foreach ($specialized as $name => $tool) {
        if (isset($tool['category']) && mb_strpos(mb_strtolower($tool['category']), $query) !== false) {
            $results[$name] = $tool;
        }
    }

    ksort($results); // Alphabetical for cache stability
    return $results;
}

function wcp_tool_discovery_prompt() {
    /**
     * Generate the tool_search instruction for the system prompt.
     * This replaces listing all 25+ tools.
     */
    $core = wcp_get_core_tools();
    $specialized = array_filter(wcp_tool_registry(), fn($t) => $t['core'] === false);
    $categories = array_unique(array_filter(array_map(fn($t) => $t['category'] ?? null, $specialized)));
    sort($categories);

    $prompt = "OUTILS DISPONIBLES:\n";
    foreach ($core as $name => $tool) {
        $prompt .= "- $name: {$tool['desc']}\n";
    }
    $prompt .= "\nOUTILS SPÉCIALISÉS (utilise tool_search pour les découvrir):\n";
    $prompt .= "Catégories: " . implode(', ', $categories) . "\n";
    $prompt .= "Pour accéder aux outils spécialisés, utilise: tool_search(\"catégorie ou mot-clé\")\n";
    $prompt .= "Exemple: tool_search(\"security\") → nuclei_scan, etc.\n";

    return $prompt;
}


// ═══════════════════════════════════════════════════════════════
// 5. ENHANCED CIRCUIT BREAKER
// Per-provider health tracking with exponential decay.
// Pattern: track failures, auto-recover after cooldown.
// ═══════════════════════════════════════════════════════════════

function wcp_cb_state_file() {
    return '/tmp/wcp_circuit_breaker.json';
}

function wcp_cb_load() {
    $file = wcp_cb_state_file();
    if (!file_exists($file)) return [];
    $data = @json_decode(file_get_contents($file), true);
    return is_array($data) ? $data : [];
}

function wcp_cb_save($state) {
    file_put_contents(wcp_cb_state_file(), json_encode($state));
}

function wcp_cb_record_failure($provider) {
    $state = wcp_cb_load();
    if (!isset($state[$provider])) {
        $state[$provider] = ['failures' => 0, 'last_failure' => 0, 'blocked' => false, 'blocked_until' => 0];
    }

    $state[$provider]['failures']++;
    $state[$provider]['last_failure'] = time();

    // Block after 3 consecutive failures
    if ($state[$provider]['failures'] >= 3) {
        // Exponential backoff: 30s, 60s, 120s, 240s, max 600s
        $cooldown = min(600, 30 * pow(2, $state[$provider]['failures'] - 3));
        $state[$provider]['blocked'] = true;
        $state[$provider]['blocked_until'] = time() + $cooldown;
        error_log("WCP_CB: BLOCKED provider=$provider failures={$state[$provider]['failures']} cooldown={$cooldown}s");
    }

    wcp_cb_save($state);
    return $state;
}

function wcp_cb_record_success($provider) {
    $state = wcp_cb_load();
    if (isset($state[$provider])) {
        // Decay failures on success (don't reset to 0 immediately)
        $state[$provider]['failures'] = max(0, $state[$provider]['failures'] - 1);
        $state[$provider]['blocked'] = false;
        $state[$provider]['blocked_until'] = 0;
    }
    wcp_cb_save($state);
    return $state;
}

function wcp_cb_is_available($provider) {
    $state = wcp_cb_load();
    if (!isset($state[$provider])) return true;

    $p = $state[$provider];
    if (!$p['blocked']) return true;

    // Auto-recover after cooldown
    if (time() > $p['blocked_until']) {
        $state[$provider]['blocked'] = false;
        wcp_cb_save($state);
        error_log("WCP_CB: AUTO_RECOVER provider=$provider");
        return true;
    }

    return false;
}

function wcp_cb_available_providers($providerList) {
    return array_filter($providerList, fn($p) => wcp_cb_is_available($p));
}


// ═══════════════════════════════════════════════════════════════
// 6. CONCURRENCY SAFETY TIERS
// Classification of tools as read-only (safe for parallel)
// vs write (must be serial). From Claude Code pattern.
// ═══════════════════════════════════════════════════════════════

function wcp_tool_safety($toolName) {
    $registry = wcp_tool_registry();
    return $registry[$toolName]['safety'] ?? 'write'; // Default to write (safe)
}

function wcp_can_parallel($tools) {
    /**
     * Check if a set of tools can all run in parallel.
     * Only if ALL tools are read-only.
     */
    foreach ($tools as $tool) {
        if (wcp_tool_safety($tool) !== 'read') return false;
    }
    return true;
}


// ═══════════════════════════════════════════════════════════════
// 7. MEMORY CONSOLIDATION (DREAM SYSTEM)
// Background process that consolidates session memories.
// Phases: Orient → Gather → Consolidate → Prune
// Designed to run as a cron job.
// ═══════════════════════════════════════════════════════════════

function wcp_dream_consolidate($pdo, $sessionId = null) {
    /**
     * Dream consolidation for WEVIA session memory.
     *
     * Phase 1 - Orient: Load existing memory file
     * Phase 2 - Gather: Extract key facts from recent sessions
     * Phase 3 - Consolidate: Merge, deduplicate, resolve contradictions
     * Phase 4 - Prune: Keep under WCP_MAX_MEMORY_LINES (200)
     *
     * Storage: admin.wevia_memory table
     */

    // Ensure memory table exists
    try {
        $pdo->exec("CREATE TABLE IF NOT EXISTS admin.wevia_memory (
            id SERIAL PRIMARY KEY,
            session_id VARCHAR(100),
            category VARCHAR(50) DEFAULT 'general',
            fact TEXT NOT NULL,
            confidence FLOAT DEFAULT 1.0,
            source VARCHAR(100),
            created_at TIMESTAMP DEFAULT NOW(),
            updated_at TIMESTAMP DEFAULT NOW(),
            contradicted_by INT REFERENCES admin.wevia_memory(id),
            active BOOLEAN DEFAULT true
        )");
        $pdo->exec("CREATE INDEX IF NOT EXISTS idx_wevia_memory_active ON admin.wevia_memory(active) WHERE active = true");
        $pdo->exec("CREATE INDEX IF NOT EXISTS idx_wevia_memory_session ON admin.wevia_memory(session_id)");
    } catch(\Exception $e) {
        error_log("WCP_DREAM: table creation error: " . $e->getMessage());
        return false;
    }

    // Phase 1: Orient — Load current memories
    $currentMemories = [];
    try {
        $st = $pdo->query("SELECT id, category, fact, confidence, created_at FROM admin.wevia_memory WHERE active = true ORDER BY confidence DESC, updated_at DESC LIMIT 200");
        $currentMemories = $st->fetchAll(\PDO::FETCH_ASSOC);
    } catch(\Exception $e) {
        error_log("WCP_DREAM: orient error: " . $e->getMessage());
    }

    // Phase 2: Gather — Extract facts from recent RLHF/conversations
    $recentFacts = [];
    try {
        // Get recent conversations that haven't been consolidated
        $st = $pdo->query("SELECT question, answer, provider, created_at FROM admin.rlhf_feedback
                          WHERE created_at > NOW() - INTERVAL '7 days'
                          ORDER BY created_at DESC LIMIT 100");
        $conversations = $st->fetchAll(\PDO::FETCH_ASSOC);

        foreach ($conversations as $conv) {
            $extracted = wcp_extract_facts($conv['question'], $conv['answer']);
            foreach ($extracted as $fact) {
                $recentFacts[] = array_merge($fact, ['source' => 'rlhf_' . substr(md5($conv['created_at']), 0, 8)]);
            }
        }
    } catch(\Exception $e) {
        error_log("WCP_DREAM: gather error: " . $e->getMessage());
    }

    // Phase 3: Consolidate — Merge with existing, resolve contradictions
    $consolidated = wcp_merge_memories($currentMemories, $recentFacts, $pdo);

    // Phase 4: Prune — Keep under 200 lines
    $pruned = wcp_prune_memories($consolidated, WCP_MAX_MEMORY_LINES);

    error_log("WCP_DREAM: consolidated " . count($recentFacts) . " new facts, total active=" . count($pruned));

    return ['new_facts' => count($recentFacts), 'total' => count($pruned), 'pruned' => count($consolidated) - count($pruned)];
}

function wcp_extract_facts($question, $answer) {
    /**
     * Extract factual claims from a Q&A exchange.
     * Simple extractive approach — no LLM needed.
     */
    $facts = [];
    $combined = $question . ' ' . $answer;

    // Extract technology mentions
    if (preg_match_all('/(utilise|recommande|implementé|déployé|configuré)\s+(\w+(?:\s+\w+)?)/iu', $combined, $m)) {
        foreach ($m[0] as $match) {
            $facts[] = ['category' => 'technology', 'fact' => trim($match), 'confidence' => 0.7];
        }
    }

    // Extract decisions
    if (preg_match_all('/(choisi|décidé|opté pour|validé|confirmé)\s+(.+?)[\.\,\;]/iu', $combined, $m)) {
        foreach ($m[0] as $match) {
            $facts[] = ['category' => 'decision', 'fact' => trim($match), 'confidence' => 0.9];
        }
    }

    // Extract problems/solutions
    if (preg_match_all('/(problème|erreur|bug|fix|solution|résolu)\s*:?\s*(.{20,100})/iu', $combined, $m)) {
        foreach ($m[0] as $match) {
            $facts[] = ['category' => 'troubleshoot', 'fact' => trim($match), 'confidence' => 0.8];
        }
    }

    return $facts;
}

function wcp_merge_memories($existing, $newFacts, $pdo) {
    /**
     * Merge new facts with existing memories.
     * Detect duplicates via fuzzy matching.
     * Resolve contradictions (newer wins, mark old as contradicted).
     */
    $merged = $existing;

    foreach ($newFacts as $fact) {
        $isDuplicate = false;

        foreach ($existing as $mem) {
            // Simple fuzzy match: >70% shared words
            $simil = wcp_text_similarity($fact['fact'], $mem['fact']);
            if ($simil > 0.7) {
                $isDuplicate = true;
                // Update confidence if new is higher
                if ($fact['confidence'] > $mem['confidence']) {
                    try {
                        $pdo->prepare("UPDATE admin.wevia_memory SET confidence = ?, updated_at = NOW() WHERE id = ?")
                            ->execute([$fact['confidence'], $mem['id']]);
                    } catch(\Exception $e) {}
                }
                break;
            }
        }

        if (!$isDuplicate) {
            // Insert new fact
            try {
                $st = $pdo->prepare("INSERT INTO admin.wevia_memory (category, fact, confidence, source) VALUES (?, ?, ?, ?)");
                $st->execute([$fact['category'], $fact['fact'], $fact['confidence'], $fact['source'] ?? 'dream']);
                $merged[] = $fact;
            } catch(\Exception $e) {
                error_log("WCP_DREAM: insert error: " . $e->getMessage());
            }
        }
    }

    return $merged;
}

function wcp_prune_memories($memories, $maxLines) {
    /**
     * Keep only top N memories by confidence * recency.
     * Deactivate old/low-confidence ones.
     */
    // Already within limits
    if (count($memories) <= $maxLines) return $memories;

    // Sort by confidence DESC, created_at DESC
    usort($memories, function($a, $b) {
        $confDiff = ($b['confidence'] ?? 0.5) - ($a['confidence'] ?? 0.5);
        if (abs($confDiff) > 0.01) return $confDiff > 0 ? 1 : -1;
        return strcmp($b['created_at'] ?? '', $a['created_at'] ?? '');
    });

    return array_slice($memories, 0, $maxLines);
}

function wcp_text_similarity($a, $b) {
    $wordsA = array_unique(preg_split('/\s+/', mb_strtolower($a)));
    $wordsB = array_unique(preg_split('/\s+/', mb_strtolower($b)));

    if (empty($wordsA) || empty($wordsB)) return 0;

    $intersection = count(array_intersect($wordsA, $wordsB));
    $union = count(array_unique(array_merge($wordsA, $wordsB)));

    return $union > 0 ? $intersection / $union : 0;
}

function wcp_get_session_memory($pdo, $limit = 20) {
    /**
     * Load active memories for injection into system prompt.
     * Returns formatted string for prompt enrichment.
     */
    try {
        $st = $pdo->query("SELECT category, fact FROM admin.wevia_memory
                          WHERE active = true
                          ORDER BY confidence DESC, updated_at DESC
                          LIMIT $limit");
        $memories = $st->fetchAll(\PDO::FETCH_ASSOC);

        if (empty($memories)) return '';

        $grouped = [];
        foreach ($memories as $m) {
            $grouped[$m['category']][] = $m['fact'];
        }

        $prompt = "\nMÉMOIRE CONSOLIDÉE:\n";
        foreach ($grouped as $cat => $facts) {
            $prompt .= strtoupper($cat) . ": " . implode('; ', array_slice($facts, 0, 5)) . "\n";
        }

        return $prompt;
    } catch(\Exception $e) {
        return '';
    }
}


// ═══════════════════════════════════════════════════════════════
// 8. PROVIDER CONTEXT WINDOWS
// Map each provider to its actual context window limit.
// Used by context_collapse to optimize per-provider.
// ═══════════════════════════════════════════════════════════════

function wcp_provider_context_limits() {
    return [
        'cerebras' => 8192,
        'groq' => 131072,
        'groq_deep' => 131072,
        'groq_vision' => 131072,
        'ollama_s95' => 8192,
        'ollama_local' => 8192,
        'mistral' => 32768,
        'deepseek' => 65536,
        'alibaba' => 32768,
        'sambanova' => 65536,
        'gemini' => 1048576,
        'cohere' => 131072,
    ];
}

function wcp_get_context_limit($provider) {
    $limits = wcp_provider_context_limits();
    return $limits[$provider] ?? 8192; // Conservative default
}


// ═══════════════════════════════════════════════════════════════
// 9. CRASH-SAFE PERSISTENCE
// Write conversation state before API calls.
// Pattern from Claude Code: JSONL format, session resumable.
// ═══════════════════════════════════════════════════════════════

function wcp_persist_turn($sessionId, $role, $content, $provider = '', $metadata = []) {
    $logDir = '/var/www/weval/wevia-ia/session-logs';
    if (!is_dir($logDir)) @mkdir($logDir, 0755, true);

    $entry = json_encode([
        'ts' => date('c'),
        'session' => $sessionId,
        'role' => $role,
        'content' => mb_substr($content, 0, 5000), // Cap for storage
        'provider' => $provider,
        'meta' => $metadata,
    ], JSON_UNESCAPED_UNICODE) . "\n";

    @file_put_contents("$logDir/$sessionId.jsonl", $entry, FILE_APPEND | LOCK_EX);
}

function wcp_load_session($sessionId) {
    $file = "/var/www/weval/wevia-ia/session-logs/$sessionId.jsonl";
    if (!file_exists($file)) return [];

    $lines = file($file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
    $turns = [];
    foreach ($lines as $line) {
        $turn = @json_decode($line, true);
        if ($turn) $turns[] = $turn;
    }
    return $turns;
}


// ═══════════════════════════════════════════════════════════════
// INTEGRATION HELPER
// One-call function to wire all patterns into the existing brain.
// ═══════════════════════════════════════════════════════════════

function wcp_enhance_request($msg, $mode, $intent, $history, $sys, $kbContext, $provider, $pdo = null) {
    /**
     * Main integration point. Call this before sending to a provider.
     * Returns enhanced parameters ready for API call.
     */

    // 1. Classify effort and get optimal provider
    $route = wcp_smart_route_v3($msg, $mode, $intent, wcp_cb_load());
    $selectedProvider = $route['provider'];
    $effort = $route['effort'];

    // 2. Get provider context limit
    $contextLimit = wcp_get_context_limit($selectedProvider);

    // 3. Apply three-layer context compression
    $compressed = wcp_context_collapse($sys, $kbContext, $history, $contextLimit);

    // 4. Inject session memory if available
    if ($pdo) {
        $memory = wcp_get_session_memory($pdo, 10);
        if ($memory) {
            $compressed['sys'] .= $memory;
        }
    }

    // 5. Add tool discovery prompt
    $compressed['sys'] .= "\n" . wcp_tool_discovery_prompt();

    // 6. Persist turn for crash safety
    $sessionId = session_id() ?: md5(microtime());
    wcp_persist_turn($sessionId, 'user', $msg, $selectedProvider);

    return [
        'provider' => $selectedProvider,
        'effort' => $effort,
        'sys' => $compressed['sys'],
        'kb' => $compressed['kb'],
        'history' => $compressed['history'],
        'context_limit' => $contextLimit,
        'max_tokens' => $effort['max_tokens'],
        'session_id' => $sessionId,
    ];
}

// Log module load
error_log("WCP: wevia-claude-code-patterns v" . WCP_VERSION . " loaded (" . count(wcp_tool_registry()) . " tools registered)");