initTaxonomy(); $this->initRecoveryStrategies(); } private function initTaxonomy(): void { $this->taxonomy = [ 'network' => [ 'timeout' => ['code' => 'NET_TIMEOUT', 'severity' => 'medium', 'retryable' => true, 'symptoms' => ['ETIMEDOUT', 'Connection timed out', 'curl_errno 28'], 'common_causes' => ['Server overloaded', 'Network congestion', 'DNS slow']], 'dns_failure' => ['code' => 'NET_DNS', 'severity' => 'high', 'retryable' => true, 'symptoms' => ['Could not resolve host', 'Name resolution failed', 'getaddrinfo failed'], 'common_causes' => ['DNS server down', 'Invalid hostname', 'Network disconnected']], 'ssl_error' => ['code' => 'NET_SSL', 'severity' => 'high', 'retryable' => false, 'symptoms' => ['SSL certificate problem', 'certificate verify failed', 'SSL_ERROR'], 'common_causes' => ['Expired certificate', 'Self-signed cert', 'Wrong hostname']], 'connection_refused' => ['code' => 'NET_REFUSED', 'severity' => 'high', 'retryable' => true, 'symptoms' => ['Connection refused', 'ECONNREFUSED', 'port 5821'], 'common_causes' => ['Service not running', 'Firewall blocking', 'Wrong port']], ], 'llm' => [ 'overload' => ['code' => 'LLM_OVERLOAD', 'severity' => 'medium', 'retryable' => true, 'symptoms' => ['429 Too Many Requests', 'Rate limit', 'Server busy'], 'common_causes' => ['Too many concurrent requests', 'API quota exceeded']], 'context_overflow' => ['code' => 'LLM_CTX_OVERFLOW', 'severity' => 'medium', 'retryable' => false, 'symptoms' => ['context length', 'maximum context', 'token limit'], 'common_causes' => ['Prompt too long', 'Conversation history too big']], 'model_not_found' => ['code' => 'LLM_NO_MODEL', 'severity' => 'high', 'retryable' => false, 'symptoms' => ['model not found', 'not a valid model', 'unknown model'], 'common_causes' => ['Model not pulled', 'Wrong model name', 'Ollama not running']], 'generation_error' => ['code' => 'LLM_GEN_ERROR', 'severity' => 'medium', 'retryable' => true, 'symptoms' => ['internal server error', '500', 'generation failed'], 'common_causes' => ['GPU out of memory', 'Model corrupted', 'Concurrent load']], 'empty_response' => ['code' => 'LLM_EMPTY', 'severity' => 'medium', 'retryable' => true, 'symptoms' => ['empty response', 'no content', 'null message'], 'common_causes' => ['Model confused', 'Temperature too low', 'Prompt issue']], ], 'database' => [ 'connection_failed' => ['code' => 'DB_CONN', 'severity' => 'critical', 'retryable' => true, 'symptoms' => ['could not connect', 'Connection refused', 'FATAL:', 'pg_connect'], 'common_causes' => ['PostgreSQL not running', 'Wrong credentials', 'Max connections reached']], 'query_error' => ['code' => 'DB_QUERY', 'severity' => 'medium', 'retryable' => false, 'symptoms' => ['syntax error', 'ERROR:', 'relation does not exist', 'column does not exist'], 'common_causes' => ['SQL syntax error', 'Table/column renamed', 'Missing migration']], 'constraint_violation' => ['code' => 'DB_CONSTRAINT', 'severity' => 'low', 'retryable' => false, 'symptoms' => ['unique constraint', 'foreign key', 'not-null constraint', 'check constraint'], 'common_causes' => ['Duplicate entry', 'Missing reference', 'Invalid data']], 'timeout' => ['code' => 'DB_TIMEOUT', 'severity' => 'high', 'retryable' => true, 'symptoms' => ['statement timeout', 'canceling statement', 'lock timeout'], 'common_causes' => ['Slow query', 'Missing index', 'Table lock', 'Dead lock']], ], 'filesystem' => [ 'permission_denied' => ['code' => 'FS_PERM', 'severity' => 'high', 'retryable' => false, 'symptoms' => ['Permission denied', 'EACCES', 'Operation not permitted'], 'common_causes' => ['Wrong file ownership', 'SELinux', 'Read-only filesystem']], 'not_found' => ['code' => 'FS_NOTFOUND', 'severity' => 'medium', 'retryable' => false, 'symptoms' => ['No such file', 'ENOENT', 'file not found'], 'common_causes' => ['Wrong path', 'File deleted', 'Typo in filename']], 'disk_full' => ['code' => 'FS_FULL', 'severity' => 'critical', 'retryable' => false, 'symptoms' => ['No space left', 'ENOSPC', 'disk full'], 'common_causes' => ['Logs too big', 'Backups not cleaned', 'Models filling disk']], ], 'application' => [ 'validation' => ['code' => 'APP_VALID', 'severity' => 'low', 'retryable' => false, 'symptoms' => ['validation failed', 'invalid', 'required field'], 'common_causes' => ['Missing input', 'Wrong format', 'Out of range']], 'auth_failed' => ['code' => 'APP_AUTH', 'severity' => 'high', 'retryable' => false, 'symptoms' => ['401 Unauthorized', '403 Forbidden', 'authentication failed', 'invalid token'], 'common_causes' => ['Expired token', 'Wrong credentials', 'Insufficient permissions']], 'rate_limited' => ['code' => 'APP_RATE', 'severity' => 'medium', 'retryable' => true, 'symptoms' => ['429', 'rate limit', 'too many requests', 'throttled'], 'common_causes' => ['Too many requests', 'Burst limit', 'Daily quota']], ] ]; } private function initRecoveryStrategies(): void { $this->recoveryStrategies = [ 'retry_with_backoff' => [ 'description' => 'Retry with exponential backoff', 'max_attempts' => 3, 'delays' => [1, 2, 4], 'applicable_to' => ['NET_TIMEOUT', 'NET_DNS', 'NET_REFUSED', 'LLM_OVERLOAD', 'LLM_GEN_ERROR', 'LLM_EMPTY', 'DB_CONN', 'DB_TIMEOUT', 'APP_RATE'] ], 'fallback_model' => [ 'description' => 'Switch to a smaller/different model', 'fallback_chain' => ['deepseek-r1:32b' => 'deepseek-r1:14b', 'llama3.3:70b' => 'llama3.1:8b', 'qwen2.5-coder:32b' => 'qwen2.5-coder:14b'], 'applicable_to' => ['LLM_OVERLOAD', 'LLM_GEN_ERROR', 'LLM_CTX_OVERFLOW'] ], 'compress_context' => [ 'description' => 'Reduce context window by summarizing history', 'applicable_to' => ['LLM_CTX_OVERFLOW'] ], 'reconnect_db' => [ 'description' => 'Close and reopen database connection', 'applicable_to' => ['DB_CONN', 'DB_TIMEOUT'] ], 'cleanup_disk' => [ 'description' => 'Clean temp files, old logs, and caches', 'commands' => [ 'find /tmp -name "wevia_*" -mtime +1 -delete', 'find /opt/wevads/logs/ -name "*.log" -mtime +7 -delete', 'journalctl --vacuum-time=3d' ], 'applicable_to' => ['FS_FULL'] ], 'alert_admin' => [ 'description' => 'Send alert to admin when recovery fails', 'applicable_to' => ['*'] // All errors after recovery fails ] ]; } /** * Classifie une erreur */ public function classify(string $errorMessage, int $httpCode = 0): array { $errorLower = mb_strtolower($errorMessage); foreach ($this->taxonomy as $category => $types) { foreach ($types as $typeName => $typeConfig) { foreach ($typeConfig['symptoms'] as $symptom) { if (mb_stripos($errorLower, mb_strtolower($symptom)) !== false) { $classification = [ 'category' => $category, 'type' => $typeName, 'code' => $typeConfig['code'], 'severity' => $typeConfig['severity'], 'retryable' => $typeConfig['retryable'], 'common_causes' => $typeConfig['common_causes'], 'matched_symptom' => $symptom ]; $classification['recovery'] = $this->getRecoveryStrategies($typeConfig['code']); $this->errorLog[] = array_merge($classification, ['timestamp' => date('Y-m-d H:i:s'), 'raw' => mb_substr($errorMessage, 0, 500)]); return $classification; } } } } // Unknown error return [ 'category' => 'unknown', 'type' => 'unclassified', 'code' => 'UNKNOWN', 'severity' => 'medium', 'retryable' => false, 'raw' => mb_substr($errorMessage, 0, 500), 'recovery' => [['strategy' => 'alert_admin', 'description' => 'Unknown error — alert admin']] ]; } /** * Récupère les stratégies de recovery pour un code d'erreur */ public function getRecoveryStrategies(string $errorCode): array { $strategies = []; foreach ($this->recoveryStrategies as $name => $config) { if (in_array($errorCode, $config['applicable_to']) || in_array('*', $config['applicable_to'])) { $strategies[] = ['strategy' => $name, 'description' => $config['description']]; } } return $strategies; } /** * Exécute la recovery automatique */ public function autoRecover(string $errorMessage, int $httpCode = 0, callable $retryCallback = null): array { $classification = $this->classify($errorMessage, $httpCode); if (!$classification['retryable']) { return ['recovered' => false, 'classification' => $classification, 'action' => 'Manual intervention required']; } // Essayer les stratégies dans l'ordre foreach ($classification['recovery'] as $strategy) { switch ($strategy['strategy']) { case 'retry_with_backoff': if ($retryCallback) { $config = $this->recoveryStrategies['retry_with_backoff']; for ($i = 0; $i < $config['max_attempts']; $i++) { sleep($config['delays'][$i] ?? 4); try { $result = $retryCallback(); if ($result !== false) { return ['recovered' => true, 'strategy' => 'retry_with_backoff', 'attempt' => $i + 1, 'result' => $result]; } } catch (\Exception $e) { continue; } } } break; case 'cleanup_disk': foreach ($this->recoveryStrategies['cleanup_disk']['commands'] as $cmd) { shell_exec($cmd); } return ['recovered' => true, 'strategy' => 'cleanup_disk', 'action' => 'Disk cleanup executed']; } } return ['recovered' => false, 'classification' => $classification, 'action' => 'All recovery strategies failed']; } public function getErrorLog(): array { return $this->errorLog; } /** * Résumé des erreurs pour le rapport */ public function errorSummary(): array { $summary = ['total' => count($this->errorLog), 'by_category' => [], 'by_severity' => []]; foreach ($this->errorLog as $err) { $cat = $err['category'] ?? 'unknown'; $sev = $err['severity'] ?? 'unknown'; $summary['by_category'][$cat] = ($summary['by_category'][$cat] ?? 0) + 1; $summary['by_severity'][$sev] = ($summary['by_severity'][$sev] ?? 0) + 1; } return $summary; } }