Files
wevia-brain/modules/core/error-taxonomy.php
2026-04-12 23:01:36 +02:00

242 lines
13 KiB
PHP
Executable File

<?php
/**
* WEVIA OPUS — Error Taxonomy & Recovery
*
* Classifie les erreurs et propose des stratégies de recovery:
* - Network errors (timeout, DNS, SSL)
* - LLM errors (overload, context overflow, hallucination)
* - Database errors (connection, query, constraint)
* - File system errors (permission, not found, disk full)
* - Application errors (validation, auth, rate limit)
*/
class ErrorTaxonomy {
private array $taxonomy;
private array $recoveryStrategies;
private array $errorLog = [];
public function __construct() {
$this->initTaxonomy();
$this->initRecoveryStrategies();
}
private function initTaxonomy(): void {
$this->taxonomy = [
'network' => [
'timeout' => ['code' => 'NET_TIMEOUT', 'severity' => 'medium', 'retryable' => true,
'symptoms' => ['ETIMEDOUT', 'Connection timed out', 'curl_errno 28'],
'common_causes' => ['Server overloaded', 'Network congestion', 'DNS slow']],
'dns_failure' => ['code' => 'NET_DNS', 'severity' => 'high', 'retryable' => true,
'symptoms' => ['Could not resolve host', 'Name resolution failed', 'getaddrinfo failed'],
'common_causes' => ['DNS server down', 'Invalid hostname', 'Network disconnected']],
'ssl_error' => ['code' => 'NET_SSL', 'severity' => 'high', 'retryable' => false,
'symptoms' => ['SSL certificate problem', 'certificate verify failed', 'SSL_ERROR'],
'common_causes' => ['Expired certificate', 'Self-signed cert', 'Wrong hostname']],
'connection_refused' => ['code' => 'NET_REFUSED', 'severity' => 'high', 'retryable' => true,
'symptoms' => ['Connection refused', 'ECONNREFUSED', 'port 5821'],
'common_causes' => ['Service not running', 'Firewall blocking', 'Wrong port']],
],
'llm' => [
'overload' => ['code' => 'LLM_OVERLOAD', 'severity' => 'medium', 'retryable' => true,
'symptoms' => ['429 Too Many Requests', 'Rate limit', 'Server busy'],
'common_causes' => ['Too many concurrent requests', 'API quota exceeded']],
'context_overflow' => ['code' => 'LLM_CTX_OVERFLOW', 'severity' => 'medium', 'retryable' => false,
'symptoms' => ['context length', 'maximum context', 'token limit'],
'common_causes' => ['Prompt too long', 'Conversation history too big']],
'model_not_found' => ['code' => 'LLM_NO_MODEL', 'severity' => 'high', 'retryable' => false,
'symptoms' => ['model not found', 'not a valid model', 'unknown model'],
'common_causes' => ['Model not pulled', 'Wrong model name', 'Ollama not running']],
'generation_error' => ['code' => 'LLM_GEN_ERROR', 'severity' => 'medium', 'retryable' => true,
'symptoms' => ['internal server error', '500', 'generation failed'],
'common_causes' => ['GPU out of memory', 'Model corrupted', 'Concurrent load']],
'empty_response' => ['code' => 'LLM_EMPTY', 'severity' => 'medium', 'retryable' => true,
'symptoms' => ['empty response', 'no content', 'null message'],
'common_causes' => ['Model confused', 'Temperature too low', 'Prompt issue']],
],
'database' => [
'connection_failed' => ['code' => 'DB_CONN', 'severity' => 'critical', 'retryable' => true,
'symptoms' => ['could not connect', 'Connection refused', 'FATAL:', 'pg_connect'],
'common_causes' => ['PostgreSQL not running', 'Wrong credentials', 'Max connections reached']],
'query_error' => ['code' => 'DB_QUERY', 'severity' => 'medium', 'retryable' => false,
'symptoms' => ['syntax error', 'ERROR:', 'relation does not exist', 'column does not exist'],
'common_causes' => ['SQL syntax error', 'Table/column renamed', 'Missing migration']],
'constraint_violation' => ['code' => 'DB_CONSTRAINT', 'severity' => 'low', 'retryable' => false,
'symptoms' => ['unique constraint', 'foreign key', 'not-null constraint', 'check constraint'],
'common_causes' => ['Duplicate entry', 'Missing reference', 'Invalid data']],
'timeout' => ['code' => 'DB_TIMEOUT', 'severity' => 'high', 'retryable' => true,
'symptoms' => ['statement timeout', 'canceling statement', 'lock timeout'],
'common_causes' => ['Slow query', 'Missing index', 'Table lock', 'Dead lock']],
],
'filesystem' => [
'permission_denied' => ['code' => 'FS_PERM', 'severity' => 'high', 'retryable' => false,
'symptoms' => ['Permission denied', 'EACCES', 'Operation not permitted'],
'common_causes' => ['Wrong file ownership', 'SELinux', 'Read-only filesystem']],
'not_found' => ['code' => 'FS_NOTFOUND', 'severity' => 'medium', 'retryable' => false,
'symptoms' => ['No such file', 'ENOENT', 'file not found'],
'common_causes' => ['Wrong path', 'File deleted', 'Typo in filename']],
'disk_full' => ['code' => 'FS_FULL', 'severity' => 'critical', 'retryable' => false,
'symptoms' => ['No space left', 'ENOSPC', 'disk full'],
'common_causes' => ['Logs too big', 'Backups not cleaned', 'Models filling disk']],
],
'application' => [
'validation' => ['code' => 'APP_VALID', 'severity' => 'low', 'retryable' => false,
'symptoms' => ['validation failed', 'invalid', 'required field'],
'common_causes' => ['Missing input', 'Wrong format', 'Out of range']],
'auth_failed' => ['code' => 'APP_AUTH', 'severity' => 'high', 'retryable' => false,
'symptoms' => ['401 Unauthorized', '403 Forbidden', 'authentication failed', 'invalid token'],
'common_causes' => ['Expired token', 'Wrong credentials', 'Insufficient permissions']],
'rate_limited' => ['code' => 'APP_RATE', 'severity' => 'medium', 'retryable' => true,
'symptoms' => ['429', 'rate limit', 'too many requests', 'throttled'],
'common_causes' => ['Too many requests', 'Burst limit', 'Daily quota']],
]
];
}
private function initRecoveryStrategies(): void {
$this->recoveryStrategies = [
'retry_with_backoff' => [
'description' => 'Retry with exponential backoff',
'max_attempts' => 3,
'delays' => [1, 2, 4],
'applicable_to' => ['NET_TIMEOUT', 'NET_DNS', 'NET_REFUSED', 'LLM_OVERLOAD', 'LLM_GEN_ERROR', 'LLM_EMPTY', 'DB_CONN', 'DB_TIMEOUT', 'APP_RATE']
],
'fallback_model' => [
'description' => 'Switch to a smaller/different model',
'fallback_chain' => ['deepseek-r1:32b' => 'deepseek-r1:14b', 'llama3.3:70b' => 'llama3.1:8b', 'qwen2.5-coder:32b' => 'qwen2.5-coder:14b'],
'applicable_to' => ['LLM_OVERLOAD', 'LLM_GEN_ERROR', 'LLM_CTX_OVERFLOW']
],
'compress_context' => [
'description' => 'Reduce context window by summarizing history',
'applicable_to' => ['LLM_CTX_OVERFLOW']
],
'reconnect_db' => [
'description' => 'Close and reopen database connection',
'applicable_to' => ['DB_CONN', 'DB_TIMEOUT']
],
'cleanup_disk' => [
'description' => 'Clean temp files, old logs, and caches',
'commands' => [
'find /tmp -name "wevia_*" -mtime +1 -delete',
'find /opt/wevads/logs/ -name "*.log" -mtime +7 -delete',
'journalctl --vacuum-time=3d'
],
'applicable_to' => ['FS_FULL']
],
'alert_admin' => [
'description' => 'Send alert to admin when recovery fails',
'applicable_to' => ['*'] // All errors after recovery fails
]
];
}
/**
* Classifie une erreur
*/
public function classify(string $errorMessage, int $httpCode = 0): array {
$errorLower = mb_strtolower($errorMessage);
foreach ($this->taxonomy as $category => $types) {
foreach ($types as $typeName => $typeConfig) {
foreach ($typeConfig['symptoms'] as $symptom) {
if (mb_stripos($errorLower, mb_strtolower($symptom)) !== false) {
$classification = [
'category' => $category,
'type' => $typeName,
'code' => $typeConfig['code'],
'severity' => $typeConfig['severity'],
'retryable' => $typeConfig['retryable'],
'common_causes' => $typeConfig['common_causes'],
'matched_symptom' => $symptom
];
$classification['recovery'] = $this->getRecoveryStrategies($typeConfig['code']);
$this->errorLog[] = array_merge($classification, ['timestamp' => date('Y-m-d H:i:s'), 'raw' => mb_substr($errorMessage, 0, 500)]);
return $classification;
}
}
}
}
// Unknown error
return [
'category' => 'unknown',
'type' => 'unclassified',
'code' => 'UNKNOWN',
'severity' => 'medium',
'retryable' => false,
'raw' => mb_substr($errorMessage, 0, 500),
'recovery' => [['strategy' => 'alert_admin', 'description' => 'Unknown error — alert admin']]
];
}
/**
* Récupère les stratégies de recovery pour un code d'erreur
*/
public function getRecoveryStrategies(string $errorCode): array {
$strategies = [];
foreach ($this->recoveryStrategies as $name => $config) {
if (in_array($errorCode, $config['applicable_to']) || in_array('*', $config['applicable_to'])) {
$strategies[] = ['strategy' => $name, 'description' => $config['description']];
}
}
return $strategies;
}
/**
* Exécute la recovery automatique
*/
public function autoRecover(string $errorMessage, int $httpCode = 0, callable $retryCallback = null): array {
$classification = $this->classify($errorMessage, $httpCode);
if (!$classification['retryable']) {
return ['recovered' => false, 'classification' => $classification, 'action' => 'Manual intervention required'];
}
// Essayer les stratégies dans l'ordre
foreach ($classification['recovery'] as $strategy) {
switch ($strategy['strategy']) {
case 'retry_with_backoff':
if ($retryCallback) {
$config = $this->recoveryStrategies['retry_with_backoff'];
for ($i = 0; $i < $config['max_attempts']; $i++) {
sleep($config['delays'][$i] ?? 4);
try {
$result = $retryCallback();
if ($result !== false) {
return ['recovered' => true, 'strategy' => 'retry_with_backoff', 'attempt' => $i + 1, 'result' => $result];
}
} catch (\Exception $e) {
continue;
}
}
}
break;
case 'cleanup_disk':
foreach ($this->recoveryStrategies['cleanup_disk']['commands'] as $cmd) {
shell_exec($cmd);
}
return ['recovered' => true, 'strategy' => 'cleanup_disk', 'action' => 'Disk cleanup executed'];
}
}
return ['recovered' => false, 'classification' => $classification, 'action' => 'All recovery strategies failed'];
}
public function getErrorLog(): array { return $this->errorLog; }
/**
* Résumé des erreurs pour le rapport
*/
public function errorSummary(): array {
$summary = ['total' => count($this->errorLog), 'by_category' => [], 'by_severity' => []];
foreach ($this->errorLog as $err) {
$cat = $err['category'] ?? 'unknown';
$sev = $err['severity'] ?? 'unknown';
$summary['by_category'][$cat] = ($summary['by_category'][$cat] ?? 0) + 1;
$summary['by_severity'][$sev] = ($summary['by_severity'][$sev] ?? 0) + 1;
}
return $summary;
}
}