Files
html/api/oss-discovery.php
2026-04-12 22:57:03 +02:00

349 lines
20 KiB
PHP

<?php
error_reporting(0);
/**
* WEVAL OSS Discovery & Auto-Integration Module v2.0
* Discovers, scores, auto-integrates skills/tools into WEVAL architecture
*
* ?action=discover — Scan + auto-integrate high-score tools
* ?action=trending — GitHub trending scored
* ?action=evaluate — Manual evaluate a tool
* ?action=integrate — Force integrate a tool
* ?action=report — Ecosystem report
* ?action=auto_run — Full cycle: discover + evaluate + integrate + notify
*/
header('Content-Type: application/json');
$KEY = 'WEVADS2026';
// Public read (report/stats) allowed; auth required for write operations
$_action = $_GET['action'] ?? 'report';
$_writeActions = ['discover','evaluate','integrate','auto_run','force_update'];
if (in_array($_action, $_writeActions) && ($_GET['k'] ?? $_POST['k'] ?? '') !== $KEY) {
http_response_code(403); die(json_encode(['error'=>'auth']));
}
$action = $_GET['action'] ?? 'report';
$DB_FILE = '/opt/wevads/vault/oss-discovery.json';
$SKILLS_DIR = '/opt/deer-flow/skills/weval';
$LOG = '/var/log/oss-discovery.log';
$TG_BOT = '8544624912';
$TG_CHAT = '7605775322';
$OBSIDIAN_API = 'https://weval-consulting.com/api/obsidian-sync-receiver.php';
if (!is_dir($SKILLS_DIR)) @mkdir($SKILLS_DIR, 0755, true);
if (!is_dir('/opt/wevads/vault')) @mkdir('/opt/wevads/vault', 0755, true);
$db = file_exists($DB_FILE) ? json_decode(file_get_contents($DB_FILE), true) : [
'tools' => [], 'skills_created' => [], 'last_scan' => null,
'total_discovered' => 0, 'total_integrated' => 0, 'total_skills_injected' => 0
];
// ─── NEEDS MATRIX ───
$NEEDS = [
'rag' => ['weight'=>10, 'kw'=>['rag','retrieval','vector','embedding','knowledge-base','semantic-search']],
'skill_agent' => ['weight'=>10, 'kw'=>['skill','agent','plugin','hook','mcp','claude-code','subagent','orchestrat']],
'scraping' => ['weight'=>9, 'kw'=>['scraper','crawl','extraction','playwright','selenium','browser-use']],
'llm_local' => ['weight'=>9, 'kw'=>['ollama','llm','inference','quantize','gguf','local','sovereign','vllm']],
'security' => ['weight'=>8, 'kw'=>['security','audit','vulnerability','scan','pentest','nuclei','shield']],
'pharma_health' => ['weight'=>8, 'kw'=>['pharma','health','medical','hcp','drug','clinical','healthcare']],
'email' => ['weight'=>8, 'kw'=>['email','smtp','deliverability','warmup','bounce','mta','newsletter']],
'crm' => ['weight'=>7, 'kw'=>['crm','lead','pipeline','sales','contact','prospect']],
'automation' => ['weight'=>7, 'kw'=>['automation','workflow','n8n','cron','pipeline','orchestrat','activepieces']],
'analytics' => ['weight'=>6, 'kw'=>['analytics','dashboard','report','metric','tracking','plausible']],
'monitoring' => ['weight'=>6, 'kw'=>['monitor','alert','health','uptime','prometheus','grafana']],
'code_quality' => ['weight'=>5, 'kw'=>['lint','test','quality','ci','format','coverage','tdd']],
'verification' => ['weight'=>9, 'kw'=>['verification','verifier','adversarial','nonreg','regression','audit','assertion']],
'prompt_eng' => ['weight'=>9, 'kw'=>['system-prompt','prompt-engineering','prompt-leak','claude-code','instruction','guardrail']],
'context_mgmt' => ['weight'=>10, 'kw'=>['context-window','context-compression','token-budget','context-collapse','summarization','compaction']],
'moa_ensemble' => ['weight'=>9, 'kw'=>['mixture-of-agents','moa','ensemble','self-moa','multi-agent','coordinator','subagent','swarm']],
'spec_decode' => ['weight'=>8, 'kw'=>['speculative-decoding','eagle','medusa','draft-model','spec-decode','lookahead','specforge']],
'mem_consol' => ['weight'=>8, 'kw'=>['memory-consolidation','auto-dream','persistent-memory','session-memory','memory-layer','knowledge-graph']],
'mcp_protocol' => ['weight'=>9, 'kw'=>['mcp','model-context-protocol','tool-server','mcp-server','function-calling','tool-use']],
'vllm_serve' => ['weight'=>10, 'kw'=>['vllm','paged-attention','tensor-parallel','serving','inference-engine','sglang','tgi']],
'sovereign_eu' => ['weight'=>9, 'kw'=>['sovereign','gdpr','eu-ai-act','on-premise','self-hosted','data-sovereignty','mistral']],
];
// ─── AUTO-INTEGRATION TARGETS ───
$INTEGRATION_MAP = [
'skill_agent' => ['target'=>'skill_factory', 'path'=>$SKILLS_DIR, 'server'=>'S204'],
'rag' => ['target'=>'qdrant_pipeline', 'path'=>'/opt/weval-rag', 'server'=>'S204'],
'security' => ['target'=>'aegis_nuclei', 'path'=>'/var/www/html/api/nuclei-templates', 'server'=>'S204'],
'scraping' => ['target'=>'scraper_arsenal', 'path'=>'/opt/wevads-arsenal', 'server'=>'S95'],
'llm_local' => ['target'=>'ollama_models', 'path'=>'ollama', 'server'=>'S204'],
'pharma_health' => ['target'=>'ethica_tools', 'path'=>'/opt/ethica-tools', 'server'=>'S95'],
'email' => ['target'=>'mta_tools', 'path'=>'/opt/wevads/email-tools', 'server'=>'S95'],
'automation' => ['target'=>'n8n_workflows', 'path'=>'/opt/n8n-workflows', 'server'=>'S95'],
'monitoring' => ['target'=>'monitoring', 'path'=>'/opt/wevads/monitoring', 'server'=>'S204'],
'crm' => ['target'=>'crm_extensions', 'path'=>'/opt/crm-tools', 'server'=>'S204'],
'analytics' => ['target'=>'analytics', 'path'=>'/opt/analytics-tools', 'server'=>'S204'],
'verification' => ['target'=>'verifier_agent', 'path'=>'/opt/deer-flow/backend/packages/harness/deerflow/subagents', 'server'=>'S204'],
'prompt_eng' => ['target'=>'prompt_library', 'path'=>'/opt/wevads/vault/prompt-patterns', 'server'=>'S204'],
'context_mgmt' => ['target'=>'wcp_patterns', 'path'=>'/var/www/weval/wevia-ia', 'server'=>'S204'],
'moa_ensemble' => ['target'=>'wsi_sovereign', 'path'=>'/var/www/weval/wevia-ia', 'server'=>'S204'],
'spec_decode' => ['target'=>'vllm_config', 'path'=>'/opt/vllm', 'server'=>'S204'],
'mem_consol' => ['target'=>'dream_engine', 'path'=>'/var/www/weval/wevia-ia', 'server'=>'S204'],
'mcp_protocol' => ['target'=>'mcp_servers', 'path'=>'/opt/mcp-tools', 'server'=>'S204'],
'vllm_serve' => ['target'=>'inference_engine', 'path'=>'/opt/vllm', 'server'=>'S204'],
'sovereign_eu' => ['target'=>'sovereign_stack', 'path'=>'/opt/sovereign', 'server'=>'S204'],
];
// ─── HELPERS ───
function score_tool($repo, $needs) {
$score = 0; $matched = [];
$text = strtolower(($repo['name']??'').' '.($repo['description']??'').' '.implode(' ',$repo['topics']??[]));
foreach ($needs as $need => $cfg) {
foreach ($cfg['kw'] as $kw) {
if (strpos($text, $kw) !== false) { $score += $cfg['weight']; $matched[] = $need; break; }
}
}
$stars = $repo['stargazers_count'] ?? 0;
if ($stars > 10000) $score += 5; elseif ($stars > 1000) $score += 3; elseif ($stars > 100) $score += 1;
$lic = strtolower($repo['license']['spdx_id'] ?? '');
if (in_array($lic, ['mit','apache-2.0','bsd-2-clause','bsd-3-clause'])) $score += 2;
$lang = strtolower($repo['language'] ?? '');
if (in_array($lang, ['python','php','javascript','typescript','shell','go'])) $score += 2;
return ['score'=>$score, 'matched_needs'=>array_unique($matched)];
}
function tg_notify($msg) {
global $TG_BOT, $TG_CHAT;
@file_get_contents("https://api.telegram.org/bot{$TG_BOT}:AAGdBn1f3m0UtnxK7LHhA33fJ1I2VZJPnug/sendMessage?" . http_build_query([
'chat_id' => $TG_CHAT, 'text' => $msg, 'parse_mode' => 'HTML'
]));
}
function obsidian_push($path, $content) {
global $OBSIDIAN_API;
$ch = curl_init($OBSIDIAN_API);
curl_setopt_array($ch, [
CURLOPT_POST => true, CURLOPT_RETURNTRANSFER => true, CURLOPT_SSL_VERIFYPEER => false,
CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
CURLOPT_POSTFIELDS => json_encode(['action'=>'obsidian_sync','path'=>$path,'content'=>base64_encode($content)])
]);
curl_exec($ch); curl_close($ch);
}
function create_skill($tool, $needs_matched, $skills_dir) {
$slug = preg_replace('/[^a-z0-9-]/', '-', strtolower($tool['name']));
$dir = "$skills_dir/$slug";
if (is_dir($dir)) return null;
@mkdir($dir, 0755, true);
$needs_str = implode(', ', $needs_matched);
$topics_str = implode(', ', array_slice($tool['topics'] ?? [], 0, 8));
$desc = $tool['description'] ?? 'No description';
$skill_md = "# {$tool['name']}\n\n";
$skill_md .= "## Source\n- GitHub: {$tool['url']}\n- Stars: {$tool['stars']}\n- Language: {$tool['language']}\n- License: {$tool['license']}\n\n";
$skill_md .= "## Description\n{$desc}\n\n";
$skill_md .= "## WEVAL Relevance\n- Score: {$tool['score']}\n- Matched needs: {$needs_str}\n- Topics: {$topics_str}\n\n";
$skill_md .= "## Integration\n- Status: auto-discovered\n- Target: " . ($GLOBALS['INTEGRATION_MAP'][$needs_matched[0] ?? 'skill_agent']['target'] ?? 'manual') . "\n";
$skill_md .= "- Server: " . ($GLOBALS['INTEGRATION_MAP'][$needs_matched[0] ?? 'skill_agent']['server'] ?? 'S204') . "\n\n";
$skill_md .= "## Usage\n```\n# Clone and evaluate\ngit clone {$tool['url']}\n# Check README for install instructions\n```\n\n";
$skill_md .= "## Triggers\n";
foreach ($needs_matched as $n) { $skill_md .= "- $n\n"; }
$skill_md .= "\n---\nAuto-discovered: " . date('Y-m-d H:i') . "\n";
file_put_contents("$dir/SKILL.md", $skill_md);
return $slug;
}
function github_fetch($url) {
$ctx = stream_context_create(['http' => [
'header' => "User-Agent: WEVAL-Discovery/2.0\r\nAccept: application/vnd.github+json\r\n",
'timeout' => 15
]]);
$json = @file_get_contents($url, false, $ctx);
return $json ? json_decode($json, true) : null;
}
// ─── SOURCES ───
$SOURCES = [
'claude_skills' => 'https://api.github.com/search/repositories?q=topic:claude-code+OR+topic:claude-skills+OR+topic:agent-skills&sort=stars&order=desc&per_page=20',
'ai_agents' => 'https://api.github.com/search/repositories?q=topic:ai-agents+topic:open-source+language:python&sort=updated&order=desc&per_page=15',
'mcp_tools' => 'https://api.github.com/search/repositories?q=topic:mcp+OR+topic:model-context-protocol&sort=stars&order=desc&per_page=15',
'rag_tools' => 'https://api.github.com/search/repositories?q=rag+retrieval+augmented+generation+language:python&sort=stars&order=desc&per_page=10',
'security_tools'=> 'https://api.github.com/search/repositories?q=security+audit+agent+2026&sort=stars&order=desc&per_page=10',
'pharma_ai' => 'https://api.github.com/search/repositories?q=pharma+healthcare+ai+open-source&sort=stars&order=desc&per_page=10',
'ollama_tools' => 'https://api.github.com/search/repositories?q=topic:ollama+language:python&sort=updated&order=desc&per_page=10',
'system_prompts'=> 'https://api.github.com/search/repositories?q=system+prompt+leak+OR+claude-code+prompt+engineering&sort=stars&order=desc&per_page=15',
'verification' => 'https://api.github.com/search/repositories?q=adversarial+testing+OR+verification+agent+OR+regression+testing+ai&sort=stars&order=desc&per_page=10',
];
// ═══════════════════════════════════════════
switch ($action) {
case 'discover':
case 'auto_run':
$new_tools = []; $new_skills = []; $integrated = [];
foreach ($SOURCES as $name => $url) {
$data = github_fetch($url);
if (!$data) continue;
foreach (($data['items'] ?? []) as $repo) {
$id = $repo['full_name'] ?? '';
if (isset($db['tools'][$id])) continue;
$eval = score_tool($repo, $NEEDS);
if ($eval['score'] < 5) continue;
$tool = [
'name' => $repo['name'], 'full_name' => $id,
'description' => mb_substr($repo['description'] ?? '', 0, 200),
'url' => $repo['html_url'], 'stars' => $repo['stargazers_count'] ?? 0,
'language' => $repo['language'] ?? '?', 'license' => $repo['license']['spdx_id'] ?? '?',
'topics' => $repo['topics'] ?? [], 'score' => $eval['score'],
'matched_needs' => $eval['matched_needs'], 'discovered_at' => date('c'),
'status' => 'discovered', 'source' => $name,
];
// ─── AUTO-INTEGRATE if score >= 15 ───
if ($eval['score'] >= 15 && $action === 'auto_run') {
$slug = create_skill($tool, $eval['matched_needs'], $SKILLS_DIR);
if ($slug) {
$tool['status'] = 'integrated';
$tool['integrated_at'] = date('c');
$tool['skill_slug'] = $slug;
$db['total_skills_injected']++;
$new_skills[] = $slug;
$integrated[] = $tool;
}
}
$db['tools'][$id] = $tool;
$db['total_discovered']++;
$new_tools[] = $tool;
}
usleep(500000); // Rate limit
}
$db['last_scan'] = date('c');
file_put_contents($DB_FILE, json_encode($db, JSON_PRETTY_PRINT));
// Log
$log_msg = date('Y-m-d H:i:s') . " SCAN: +{count($new_tools)} discovered, +" . count($new_skills) . " skills created\n";
file_put_contents($LOG, str_replace('{count($new_tools)}', count($new_tools), $log_msg), FILE_APPEND);
// ─── TELEGRAM NOTIFICATION ───
if (count($new_tools) > 0) {
$msg = "🔍 <b>OSS Discovery</b>\n";
$msg .= "+" . count($new_tools) . " new tools found\n";
$msg .= "+" . count($new_skills) . " skills auto-injected\n\n";
$top3 = array_slice($new_tools, 0, 3);
foreach ($top3 as $t) {
$msg .= "⭐ <b>{$t['name']}</b> ({$t['score']}pts)\n";
$msg .= " " . implode(', ', $t['matched_needs']) . "\n";
}
$msg .= "\nTotal: " . count($db['tools']) . " tools | " . $db['total_skills_injected'] . " skills";
tg_notify($msg);
}
// ─── OBSIDIAN AUTO-UPDATE ───
$obsidian_note = "# OSS Discovery Report\n\n";
$obsidian_note .= "**Last scan:** " . date('Y-m-d H:i') . "\n";
$obsidian_note .= "**Total tools:** " . count($db['tools']) . "\n";
$obsidian_note .= "**Skills injected:** " . $db['total_skills_injected'] . "\n\n";
$obsidian_note .= "## Latest Discoveries\n";
$sorted = $db['tools']; usort($sorted, fn($a,$b) => ($b['score']??0) - ($a['score']??0));
foreach (array_slice($sorted, 0, 15) as $t) {
$status_icon = $t['status'] === 'integrated' ? '✅' : '🔍';
$obsidian_note .= "- {$status_icon} **{$t['name']}** ({$t['score']}pts) — " . implode(', ', $t['matched_needs'] ?? []) . "\n";
}
$obsidian_note .= "\n## Skills Auto-Injected\n";
foreach ($db['skills_created'] ?? [] as $s) { $obsidian_note .= "- `{$s}`\n"; }
foreach ($new_skills as $s) { $obsidian_note .= "- `{$s}` ⚡ NEW\n"; $db['skills_created'][] = $s; }
file_put_contents($DB_FILE, json_encode($db, JSON_PRETTY_PRINT));
obsidian_push('/03-Resources/Techniques/OSS-Discovery-Report.md', $obsidian_note);
usort($new_tools, fn($a,$b) => $b['score'] - $a['score']);
echo json_encode([
'ok' => true, 'new_tools' => count($new_tools), 'new_skills' => count($new_skills),
'auto_integrated' => count($integrated),
'top' => array_slice(array_map(fn($t) => ['name'=>$t['full_name'],'score'=>$t['score'],'needs'=>$t['matched_needs'],'status'=>$t['status']], $new_tools), 0, 10),
'total_known' => count($db['tools']), 'total_skills' => $db['total_skills_injected']
]);
break;
case 'trending':
$data = github_fetch('https://api.github.com/search/repositories?q=ai+agent+tool+created:>2026-03-01&sort=stars&order=desc&per_page=20');
$trending = [];
foreach (($data['items'] ?? []) as $repo) {
$eval = score_tool($repo, $NEEDS);
$trending[] = ['name'=>$repo['full_name'],'stars'=>$repo['stargazers_count'],'description'=>mb_substr($repo['description']??'',0,150),
'language'=>$repo['language'],'score'=>$eval['score'],'needs'=>$eval['matched_needs'],'url'=>$repo['html_url']];
}
usort($trending, fn($a,$b) => $b['score'] - $a['score']);
echo json_encode(['ok'=>true,'trending'=>array_slice($trending,0,15)]);
break;
case 'evaluate':
$tid = $_GET['tool'] ?? '';
if (!$tid || !isset($db['tools'][$tid])) { echo json_encode(['error'=>'not found']); break; }
$t = $db['tools'][$tid];
$f = [
'can_run_cpu' => !in_array('gpu', $t['topics'] ?? []),
'language_ok' => in_array(strtolower($t['language']), ['python','php','javascript','typescript','shell','go']),
'license_ok' => in_array(strtolower($t['license']), ['mit','apache-2.0','bsd-2-clause','bsd-3-clause','gpl-3.0']),
'has_docker' => in_array('docker', $t['topics'] ?? []),
];
$f['go'] = $f['can_run_cpu'] && $f['language_ok'] && $f['license_ok'];
$primary_need = $t['matched_needs'][0] ?? 'skill_agent';
$target = $INTEGRATION_MAP[$primary_need] ?? ['target'=>'manual','server'=>'S204'];
$db['tools'][$tid]['status'] = 'evaluated';
$db['tools'][$tid]['feasibility'] = $f;
$db['tools'][$tid]['integration_target'] = $target;
file_put_contents($DB_FILE, json_encode($db, JSON_PRETTY_PRINT));
echo json_encode(['ok'=>true,'tool'=>$t,'feasibility'=>$f,'target'=>$target]);
break;
case 'integrate':
$tid = $_GET['tool'] ?? '';
if (!$tid || !isset($db['tools'][$tid])) { echo json_encode(['error'=>'not found']); break; }
$t = $db['tools'][$tid];
$slug = create_skill($t, $t['matched_needs'], $SKILLS_DIR);
if ($slug) {
$db['tools'][$tid]['status'] = 'integrated';
$db['tools'][$tid]['integrated_at'] = date('c');
$db['tools'][$tid]['skill_slug'] = $slug;
$db['total_skills_injected']++;
$db['skills_created'][] = $slug;
file_put_contents($DB_FILE, json_encode($db, JSON_PRETTY_PRINT));
tg_notify("🔧 Skill integrated: <b>{$t['name']}</b> → {$SKILLS_DIR}/{$slug}/");
echo json_encode(['ok'=>true,'skill'=>$slug,'path'=>"$SKILLS_DIR/$slug/SKILL.md"]);
} else {
echo json_encode(['ok'=>false,'message'=>'already exists']);
}
break;
case 'skills':
$skills = [];
foreach (glob("$SKILLS_DIR/*/SKILL.md") as $f) {
$dir = basename(dirname($f));
$content = file_get_contents($f);
preg_match('/^# (.+)$/m', $content, $m);
$skills[] = ['slug'=>$dir, 'name'=>$m[1]??$dir, 'size'=>strlen($content)];
}
echo json_encode(['ok'=>true,'skills'=>$skills,'total'=>count($skills),'path'=>$SKILLS_DIR]);
break;
case 'report':
default:
$by_status = ['discovered'=>0,'evaluated'=>0,'integrated'=>0,'rejected'=>0];
$by_need = []; $top = [];
foreach ($db['tools'] as $t) {
$by_status[$t['status']??'discovered']++;
foreach ($t['matched_needs']??[] as $n) { $by_need[$n] = ($by_need[$n]??0)+1; }
if (($t['score']??0) >= 10) $top[] = ['name'=>$t['full_name'],'score'=>$t['score'],'status'=>$t['status'],'needs'=>$t['matched_needs']??[],'wire_date'=>$t['wire_date']??'','wire_status'=>$t['wire_status']??'','test_status'=>$t['test_status']??'','stars'=>$t['stars']??0,'slug'=>$t['skill_slug']??''];
}
$wire_ok=$wire_fail=0;foreach($db["tools"] as $_t){if(($_t["wire_status"]??"")=="success")$wire_ok++;elseif(($_t["wire_status"]??"")=="failed")$wire_fail++;}
usort($top, fn($a,$b) => $b['score']-$a['score']); arsort($by_need);
echo json_encode([
'ok'=>true,'total'=>count($db['tools']),'by_status'=>$by_status,'by_need'=>$by_need,
'top'=>array_slice($top,0,15),'skills_injected'=>$db['total_skills_injected'],
'skills_list'=>$db['skills_created']??[],'last_scan'=>$db['last_scan'],
'integration_targets'=>array_keys($INTEGRATION_MAP),
'test_summary'=>$db['test_summary']??[],'wire_stats'=>['success'=>$wire_ok,'failed'=>$wire_fail],'already_wired'=>['Browser Use','OpenClaw','Strix/Nuclei','Prometheus','Mastra','Dify','Supermemory','EvoMaster','Activepieces','Goose','AEGIS','SkillSmith','AIOS','vaultwarden','gitea','pmta-versions']
]);
break;
}