Files
wevia-brain/modules/core/context-manager.php
2026-04-12 23:01:36 +02:00

217 lines
7.7 KiB
PHP
Executable File

<?php
/**
* WEVIA OPUS — Context Window Manager
*
* Gère intelligemment le context window limité des LLMs:
* - Priorise les informations par pertinence
* - Compresse les échanges anciens
* - Injecte les KBs et faits pertinents
* - Gère le budget de tokens
*
* Tailles context (Ollama models):
* - 8B models: ~8K tokens
* - 14B-32B: ~16K-32K tokens
* - 70B: ~8K-16K tokens
* - deepseek-r1:32b: ~64K tokens
*/
class ContextManager {
private int $maxTokens;
private array $prioritySlots;
public function __construct(int $maxTokens = 16000) {
$this->maxTokens = $maxTokens;
$this->initPrioritySlots();
}
private function initPrioritySlots(): void {
// Allocation du budget de tokens par priorité
$this->prioritySlots = [
'system_prompt' => ['budget_pct' => 15, 'priority' => 1, 'compressible' => false],
'user_query' => ['budget_pct' => 10, 'priority' => 1, 'compressible' => false],
'memory_facts' => ['budget_pct' => 10, 'priority' => 2, 'compressible' => true],
'rag_context' => ['budget_pct' => 20, 'priority' => 2, 'compressible' => true],
'conversation_history' => ['budget_pct' => 25, 'priority' => 3, 'compressible' => true],
'tool_results' => ['budget_pct' => 10, 'priority' => 2, 'compressible' => true],
'response_budget' => ['budget_pct' => 10, 'priority' => 1, 'compressible' => false]
];
}
/**
* Construit le contexte optimal pour un appel LLM
*/
public function buildContext(array $components): array {
$totalBudget = $this->maxTokens;
$context = [];
// Allouer les tokens par slot
foreach ($this->prioritySlots as $slot => $config) {
$budget = (int)($totalBudget * $config['budget_pct'] / 100);
if (!isset($components[$slot])) continue;
$content = $components[$slot];
$tokenCount = $this->estimateTokens($content);
if ($tokenCount <= $budget) {
$context[$slot] = ['content' => $content, 'tokens' => $tokenCount, 'compressed' => false];
} elseif ($config['compressible']) {
$compressed = $this->compress($content, $budget);
$context[$slot] = ['content' => $compressed, 'tokens' => $this->estimateTokens($compressed), 'compressed' => true];
} else {
// Non compressible, tronquer
$truncated = $this->truncate($content, $budget);
$context[$slot] = ['content' => $truncated, 'tokens' => $budget, 'compressed' => false];
}
}
return $context;
}
/**
* Compresse l'historique de conversation
*/
public function compressHistory(array $messages, int $maxTokens): array {
$totalTokens = 0;
foreach ($messages as $msg) {
$totalTokens += $this->estimateTokens($msg['content'] ?? '');
}
if ($totalTokens <= $maxTokens) return $messages;
// Stratégie: garder le premier message, résumer le milieu, garder les 3 derniers
$keepFirst = 1;
$keepLast = min(3, count($messages) - 1);
if (count($messages) <= $keepFirst + $keepLast) return $messages;
$first = array_slice($messages, 0, $keepFirst);
$middle = array_slice($messages, $keepFirst, count($messages) - $keepFirst - $keepLast);
$last = array_slice($messages, -$keepLast);
// Résumer le milieu
$middleSummary = $this->summarizeMessages($middle);
return array_merge(
$first,
[['role' => 'system', 'content' => "[Résumé des échanges précédents: $middleSummary]"]],
$last
);
}
/**
* Sélectionne les chunks RAG les plus pertinents dans le budget
*/
public function selectRAGChunks(array $chunks, int $maxTokens): array {
$selected = [];
$usedTokens = 0;
// Les chunks sont déjà triés par pertinence (similarity score)
foreach ($chunks as $chunk) {
$chunkTokens = $this->estimateTokens($chunk['content']);
if ($usedTokens + $chunkTokens > $maxTokens) break;
$selected[] = $chunk;
$usedTokens += $chunkTokens;
}
return $selected;
}
/**
* Estime le nombre de tokens (heuristique: 1 token ≈ 4 caractères en français)
*/
public function estimateTokens(string $text): int {
return (int)(mb_strlen($text) / 3.5); // Français est un peu plus dense que l'anglais
}
/**
* Compresse un texte pour tenir dans un budget de tokens
*/
private function compress(string $text, int $targetTokens): string {
$currentTokens = $this->estimateTokens($text);
if ($currentTokens <= $targetTokens) return $text;
// Ratio de compression
$ratio = $targetTokens / $currentTokens;
$targetChars = (int)(mb_strlen($text) * $ratio);
// Si ratio > 0.7, simple troncature intelligente
if ($ratio > 0.7) {
return $this->truncate($text, $targetTokens);
}
// Si ratio < 0.7, résumer les sections
$paragraphs = preg_split('/\n{2,}/', $text);
$charsPerParagraph = (int)($targetChars / max(1, count($paragraphs)));
$compressed = '';
foreach ($paragraphs as $para) {
if (mb_strlen($para) <= $charsPerParagraph) {
$compressed .= $para . "\n\n";
} else {
// Garder la première phrase de chaque paragraphe
$firstSentence = strtok($para, '.!?');
$compressed .= $firstSentence . ".\n\n";
}
}
return mb_substr($compressed, 0, $targetChars);
}
/**
* Tronque intelligemment (pas au milieu d'un mot/phrase)
*/
private function truncate(string $text, int $maxTokens): string {
$maxChars = (int)($maxTokens * 3.5);
if (mb_strlen($text) <= $maxChars) return $text;
$truncated = mb_substr($text, 0, $maxChars);
// Couper au dernier espace
$lastSpace = mb_strrpos($truncated, ' ');
if ($lastSpace > $maxChars * 0.8) {
$truncated = mb_substr($truncated, 0, $lastSpace);
}
return $truncated . "\n\n[... tronqué pour respecter le budget de contexte]";
}
/**
* Résume une liste de messages
*/
private function summarizeMessages(array $messages): string {
$summary = '';
foreach ($messages as $msg) {
$role = ($msg['role'] ?? '') === 'user' ? 'U' : 'A';
$content = mb_substr($msg['content'] ?? '', 0, 80);
$summary .= "$role: $content... | ";
}
return mb_substr($summary, 0, 500);
}
/**
* Adapte le budget au modèle utilisé
*/
public function adaptToModel(string $model): void {
$modelContextSizes = [
'llama3.1:8b' => 8192,
'llama3.1:70b' => 8192,
'llama3.3:70b' => 16384,
'deepseek-r1:32b' => 65536,
'deepseek-r1:14b' => 32768,
'deepseek-r1:8b' => 16384,
'qwen2.5-coder:32b' => 32768,
'qwen2.5-coder:14b' => 32768,
'nemotron:70b' => 8192,
'mixtral:8x22b' => 65536,
'codestral:22b' => 32768,
'gemma2:27b' => 8192
];
$this->maxTokens = $modelContextSizes[$model] ?? 8192;
}
public function getMaxTokens(): int { return $this->maxTokens; }
public function getSlots(): array { return $this->prioritySlots; }
}