147 lines
7.7 KiB
PHP
147 lines
7.7 KiB
PHP
<?php
|
|
/**
|
|
* WEVAL Brain — KB Auto-Enrichment Cron
|
|
* Runs every 2 hours to generate new KB entries via AI providers
|
|
* Injects into knowledge_base + brain_knowledge
|
|
* Uses WEVAL Brain multi-provider failover
|
|
*/
|
|
|
|
$LOG_FILE = '/opt/wevads/logs/kb-auto-enrich.log';
|
|
function logMsg($msg) { global $LOG_FILE; file_put_contents($LOG_FILE, date('Y-m-d H:i:s') . " $msg\n", FILE_APPEND); }
|
|
|
|
logMsg("=== KB AUTO-ENRICHMENT START ===");
|
|
|
|
// DB connection
|
|
include("/opt/wevads/config/credentials.php");
|
|
$pdo = get_pdo();
|
|
|
|
// AI Provider configs (failover order)
|
|
$providers = [
|
|
['name' => 'groq', 'url' => 'https://api.groq.com/openai/v1/chat/completions', 'model' => 'llama-3.3-70b-versatile', 'key_env' => 'GROQ_API_KEY'],
|
|
['name' => 'cerebras', 'url' => 'https://api.cerebras.ai/v1/chat/completions', 'model' => 'llama3.1-8b', 'key_env' => 'CEREBRAS_API_KEY'],
|
|
['name' => 'ollama', 'url' => 'http://88.198.4.195:11434/v1/chat/completions', 'model' => 'deepseek-r1:32b', 'key_env' => ''],
|
|
];
|
|
|
|
// Get API keys from DB
|
|
$keys = [];
|
|
$stmt = $pdo->query("SELECT LOWER(provider_name) as name, api_key FROM hamid_providers WHERE is_active=true AND api_key != ''");
|
|
while ($row = $stmt->fetch(PDO::FETCH_ASSOC)) {
|
|
$keys[strtolower($row['name'])] = $row['api_key'];
|
|
}
|
|
|
|
// Topics to cycle through (one per run)
|
|
$topics = [
|
|
'coding' => ['PHP design patterns avancés', 'JavaScript async patterns', 'Python data structures', 'SQL optimization techniques', 'API security best practices', 'Microservices communication patterns', 'Database indexing strategies', 'CI/CD pipeline optimization', 'Container orchestration patterns', 'Serverless architecture patterns'],
|
|
'consulting' => ['Digital transformation KPIs', 'Stakeholder management strategies', 'Business process reengineering', 'Strategic planning frameworks', 'Organizational design patterns', 'Post-merger integration', 'IT governance frameworks', 'Vendor management best practices'],
|
|
'ia_ml' => ['LLM prompt engineering techniques', 'Vector database optimization', 'ML model deployment strategies', 'AI ethics and governance', 'Computer vision applications industrielles', 'NLP for business intelligence', 'Reinforcement learning applications'],
|
|
'pharma' => ['Drug development lifecycle', 'Pharmacovigilance automation', 'Clinical trial design', 'GMP compliance digital', 'Pharmaceutical supply chain', 'Biosimilar development', 'Real-world evidence'],
|
|
'cybersecurite' => ['Incident response playbooks', 'Cloud security posture', 'API security testing', 'Identity governance', 'Threat modeling methodologies', 'Security automation SOAR'],
|
|
'sap' => ['SAP BTP extension development', 'S/4HANA data migration', 'SAP Fiori development patterns', 'SAP Integration Suite patterns', 'ABAP Cloud programming model'],
|
|
'sciences' => ['Quantum computing algorithms', 'Climate modeling techniques', 'CRISPR applications 2026', 'Neuroscience discoveries', 'Materials science innovations'],
|
|
'medecine' => ['Precision medicine advances', 'Digital health technologies', 'AI in medical imaging', 'Antimicrobial stewardship', 'Telemedicine best practices'],
|
|
'email_marketing' => ['Deliverability monitoring automation', 'Email authentication advanced', 'ISP relationship management', 'Inbox placement optimization', 'Email personalization at scale'],
|
|
'cloud' => ['Multi-cloud networking', 'Cloud cost optimization FinOps', 'Serverless patterns avancés', 'Cloud-native security', 'Platform engineering'],
|
|
'philosophie' => ['Ethics of artificial intelligence', 'Philosophy of consciousness', 'Epistemology and data science', 'Bioethics contemporary debates'],
|
|
'art_culture' => ['Digital art and NFTs', 'Architecture durable', 'Patrimoine culturel numerique', 'Design systems principles'],
|
|
'finance' => ['Algorithmic trading strategies', 'ESG investment frameworks', 'Decentralized finance risks', 'Islamic finance principles'],
|
|
'energie' => ['Smart grid technologies', 'Carbon capture innovations', 'Nuclear fusion progress 2026', 'Green hydrogen economics'],
|
|
];
|
|
|
|
// Pick random category and topic
|
|
$categories = array_keys($topics);
|
|
$cat = $categories[array_rand($categories)];
|
|
$topic = $topics[$cat][array_rand($topics[$cat])];
|
|
logMsg("Selected: [$cat] $topic");
|
|
|
|
// Build prompt
|
|
$prompt = "Generate 3 knowledge base entries about: $topic
|
|
Category: $cat
|
|
|
|
For each entry, provide:
|
|
1. A concise title (max 80 chars)
|
|
2. A detailed content paragraph (200-400 chars) with specific facts, numbers, frameworks, or techniques
|
|
|
|
Respond ONLY in this JSON format, no markdown:
|
|
[{\"title\":\"...\",\"content\":\"...\"},{\"title\":\"...\",\"content\":\"...\"},{\"title\":\"...\",\"content\":\"...\"}]";
|
|
|
|
// Call AI provider with failover
|
|
$result = null;
|
|
foreach ($providers as $prov) {
|
|
$key = $keys[$prov['name']] ?? ($prov['key_env'] ? getenv($prov['key_env']) : 'dummy');
|
|
if (!$key && $prov['name'] !== 'ollama') continue;
|
|
|
|
$headers = ['Content-Type: application/json'];
|
|
if ($prov['name'] !== 'ollama') $headers[] = "Authorization: Bearer $key";
|
|
|
|
$payload = json_encode([
|
|
'model' => $prov['model'],
|
|
'messages' => [
|
|
['role' => 'system', 'content' => 'You are a knowledge base generator. Respond ONLY with valid JSON arrays. No markdown, no explanations.'],
|
|
['role' => 'user', 'content' => $prompt]
|
|
],
|
|
'max_tokens' => 1500,
|
|
'temperature' => 0.7
|
|
]);
|
|
|
|
$ch = curl_init($prov['url']);
|
|
curl_setopt_array($ch, [
|
|
CURLOPT_POST => true,
|
|
CURLOPT_POSTFIELDS => $payload,
|
|
CURLOPT_HTTPHEADER => $headers,
|
|
CURLOPT_RETURNTRANSFER => true,
|
|
CURLOPT_TIMEOUT => 120,
|
|
]);
|
|
$resp = curl_exec($ch);
|
|
$code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
curl_close($ch);
|
|
|
|
if ($code === 200 && $resp) {
|
|
$data = json_decode($resp, true);
|
|
$text = $data['choices'][0]['message']['content'] ?? '';
|
|
// Clean potential markdown fences
|
|
$text = preg_replace('/```json\s*/', '', $text);
|
|
$text = preg_replace('/```\s*/', '', $text);
|
|
$text = trim($text);
|
|
$entries = json_decode($text, true);
|
|
if (is_array($entries) && count($entries) > 0) {
|
|
$result = $entries;
|
|
logMsg("Provider {$prov['name']} OK: " . count($entries) . " entries");
|
|
break;
|
|
}
|
|
}
|
|
logMsg("Provider {$prov['name']} failed (HTTP $code)");
|
|
}
|
|
|
|
if (!$result) {
|
|
logMsg("ERROR: All providers failed");
|
|
exit(1);
|
|
}
|
|
|
|
// Insert into knowledge_base + brain_knowledge
|
|
$kb_inserted = 0;
|
|
$bk_inserted = 0;
|
|
foreach ($result as $entry) {
|
|
$title = substr($entry['title'] ?? '', 0, 200);
|
|
$content = $entry['content'] ?? '';
|
|
if (!$title || !$content) continue;
|
|
|
|
// knowledge_base
|
|
$stmt = $pdo->prepare("INSERT INTO knowledge_base (title, category, content, author, source, created_at) VALUES (?, ?, ?, 'WEVAL Brain Auto', 'auto-enrichment', NOW()) ON CONFLICT DO NOTHING");
|
|
$stmt->execute([$title, $cat, $content]);
|
|
if ($stmt->rowCount() > 0) $kb_inserted++;
|
|
|
|
// brain_knowledge (key = slugified title)
|
|
$key = strtolower(preg_replace('/[^a-z0-9]+/i', '_', $title));
|
|
$key = substr($key, 0, 100);
|
|
$stmt = $pdo->prepare("INSERT INTO brain_knowledge (category, key, value, confidence, last_updated) VALUES (?, ?, ?, 0.85, NOW()) ON CONFLICT (category, key) DO UPDATE SET value=EXCLUDED.value, last_updated=NOW()");
|
|
$stmt->execute([$cat, $key, substr($content, 0, 500)]);
|
|
$bk_inserted++;
|
|
}
|
|
|
|
$total_kb = $pdo->query("SELECT count(*) FROM knowledge_base")->fetchColumn();
|
|
$total_bk = $pdo->query("SELECT count(*) FROM brain_knowledge")->fetchColumn();
|
|
logMsg("INJECTED: KB+$kb_inserted BK+$bk_inserted | TOTALS: KB=$total_kb BK=$total_bk");
|
|
logMsg("=== KB AUTO-ENRICHMENT END ===");
|
|
echo "OK: KB+$kb_inserted BK+$bk_inserted | TOTALS: KB=$total_kb BK=$total_bk\n";
|
|
?>
|