ollamaUrl = $ollamaUrl; if ($pdo) { $this->pdo = $pdo; } else { $this->pdo = new PDO("pgsql:host=127.0.0.1;dbname=wevia_db", "postgres", ""); $this->pdo->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION); } $this->ensureSchema(); } private function ensureSchema(): void { $this->pdo->exec("CREATE EXTENSION IF NOT EXISTS vector"); $this->pdo->exec(" CREATE TABLE IF NOT EXISTS kb_embeddings ( id SERIAL PRIMARY KEY, source VARCHAR(255), category VARCHAR(100), chunk_text TEXT NOT NULL, embedding vector(768), metadata JSONB DEFAULT '{}', created_at TIMESTAMP DEFAULT NOW(), access_count INT DEFAULT 0 ) "); $this->pdo->exec(" CREATE INDEX IF NOT EXISTS idx_kb_embedding ON kb_embeddings USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100) "); } /** * Ingère un document en le découpant en chunks avec overlap */ public function ingest(string $text, string $source, string $category = 'general', array $meta = []): int { $chunks = $this->chunkText($text, 512, 64); $count = 0; foreach ($chunks as $chunk) { $embedding = $this->getEmbedding($chunk); if (!$embedding) continue; $stmt = $this->pdo->prepare(" INSERT INTO kb_embeddings (source, category, chunk_text, embedding, metadata) VALUES (?, ?, ?, ?::vector, ?::jsonb) "); $stmt->execute([ $source, $category, $chunk, '[' . implode(',', $embedding) . ']', json_encode($meta) ]); $count++; } return $count; } /** * Recherche sémantique — trouve les chunks les plus pertinents */ public function search(string $query, int $topK = null, string $category = null): array { $k = $topK ?? $this->topK; $embedding = $this->getEmbedding($query); if (!$embedding) return []; $vecStr = '[' . implode(',', $embedding) . ']'; $sql = "SELECT id, source, category, chunk_text, metadata, 1 - (embedding <=> ?::vector) as similarity FROM kb_embeddings WHERE 1=1"; $params = [$vecStr]; if ($category) { $sql .= " AND category = ?"; $params[] = $category; } $sql .= " ORDER BY embedding <=> ?::vector LIMIT ?"; $params[] = $vecStr; $params[] = $k; $stmt = $this->pdo->prepare($sql); $stmt->execute($params); $results = $stmt->fetchAll(PDO::FETCH_ASSOC); // Update access count foreach ($results as $r) { $this->pdo->exec("UPDATE kb_embeddings SET access_count = access_count + 1 WHERE id = " . intval($r['id'])); } return array_filter($results, fn($r) => $r['similarity'] >= $this->minSimilarity); } /** * Génère le contexte RAG pour une requête */ public function getContext(string $query, string $category = null): string { $results = $this->search($query, $this->topK, $category); if (empty($results)) return ''; $context = "--- CONTEXTE KNOWLEDGE BASE ---\n"; foreach ($results as $i => $r) { $sim = round($r['similarity'] * 100); $context .= "[{$r['source']}] (pertinence: {$sim}%)\n{$r['chunk_text']}\n\n"; } $context .= "--- FIN CONTEXTE ---\n"; return $context; } /** * Obtient l'embedding d'un texte via Ollama */ private function getEmbedding(string $text): ?array { $ch = curl_init("{$this->ollamaUrl}/api/embed"); curl_setopt_array($ch, [ CURLOPT_POST => true, CURLOPT_POSTFIELDS => json_encode([ 'model' => $this->embeddingModel, 'input' => $text ]), CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => 30, CURLOPT_HTTPHEADER => ['Content-Type: application/json'] ]); $raw = curl_exec($ch); curl_close($ch); $data = json_decode($raw, true); return $data['embeddings'][0] ?? null; } /** * Découpe un texte en chunks avec overlap */ private function chunkText(string $text, int $chunkSize = 512, int $overlap = 64): array { $words = preg_split('/\s+/', $text); $chunks = []; $total = count($words); for ($i = 0; $i < $total; $i += ($chunkSize - $overlap)) { $chunk = implode(' ', array_slice($words, $i, $chunkSize)); if (mb_strlen($chunk) > 20) { $chunks[] = $chunk; } } return $chunks; } /** * Stats du KB */ public function getStats(): array { $stats = $this->pdo->query(" SELECT category, COUNT(*) as chunks, SUM(access_count) as total_access, COUNT(DISTINCT source) as sources FROM kb_embeddings GROUP BY category ORDER BY chunks DESC ")->fetchAll(PDO::FETCH_ASSOC); $total = $this->pdo->query("SELECT COUNT(*) FROM kb_embeddings")->fetchColumn(); return ['total_chunks' => $total, 'categories' => $stats]; } }