Files
html/api/ai-gap-discovery.py
2026-04-12 22:57:03 +02:00

134 lines
5.3 KiB
Python
Executable File

#!/usr/bin/env python3
"""
WEVAL AI Gap-Driven Discovery Engine
Connects AI Benchmark gaps → OSS Discovery search → Auto-wire solutions
Cron: runs after benchmark, feeds gaps to discovery
"""
import json, time, urllib.request, ssl
BENCHMARK_DB = '/opt/wevads/vault/ai-benchmark.json'
OSS_DB = '/opt/wevads/vault/oss-discovery.json'
GAP_DB = '/opt/wevads/vault/ai-gap-discovery.json'
# Map benchmark categories → GitHub search queries
CATEGORY_TO_SEARCH = {
'pdf_report': ['pdf generator python','report generator ai','fpdf2 python','weasyprint','markdown to pdf'],
'proposal': ['proposal generator ai','business proposal template','document generator llm'],
'code': ['code generation llm','ai coding assistant','code completion open source'],
'data_analysis': ['data analysis ai','pandas ai','automated eda python','dataprep'],
'pharma': ['pharmacovigilance ai','drug safety ai','medical nlp','clinical nlp'],
'strategy': ['business strategy ai','consulting ai','digital transformation ai'],
'legal_gdpr': ['gdpr compliance ai','privacy ai','data protection tool'],
'cdc_spec': ['requirements specification ai','user story generator','spec writer ai'],
'schema_db': ['database schema generator','sql generator ai','prisma schema ai'],
'devops': ['docker compose generator','infrastructure as code ai','devops ai'],
'frontend': ['frontend generator ai','html css generator','landing page builder ai'],
'api_design': ['api design tool','openapi generator','rest api builder'],
'ai_ethics': ['ai ethics audit','eu ai act compliance','ai governance tool'],
'security': ['security scanner','vulnerability assessment','owasp tools'],
'erp': ['erp consulting ai','sap tools open source','erp comparison tool'],
}
def load_gaps():
"""Load benchmark gaps (categories < 70/90)"""
try:
db = json.load(open(BENCHMARK_DB))
composite = db.get('composite', {})
gaps = {cat: score for cat, score in composite.items() if score < 70}
return gaps
except:
return {}
def load_existing_tools():
"""Load already wired tools"""
try:
db = json.load(open(OSS_DB))
return set(db.get('tools', {}).keys())
except:
return set()
def search_github(query, limit=3):
"""Search GitHub for tools matching gap"""
try:
url = f"https://api.github.com/search/repositories?q={urllib.parse.quote(query)}+language:python+stars:>100&sort=stars&per_page={limit}"
req = urllib.request.Request(url, headers={'Accept': 'application/vnd.github.v3+json'})
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
resp = urllib.request.urlopen(req, timeout=10, context=ctx)
data = json.loads(resp.read())
return data.get('items', [])
except:
return []
def generate_gap_report():
"""Main: analyze gaps, search solutions, create report"""
gaps = load_gaps()
existing = load_existing_tools()
report = {
'timestamp': time.strftime('%Y-%m-%dT%H:%M:%S'),
'total_gaps': len(gaps),
'gaps': {},
'recommendations': [],
'priority_wires': [],
}
for cat, score in sorted(gaps.items(), key=lambda x: x[1]):
gap_size = 90 - score
priority = 'critical' if score < 50 else ('high' if score < 60 else 'medium')
searches = CATEGORY_TO_SEARCH.get(cat, [f'{cat} ai tool'])
candidates = []
for query in searches[:2]: # Limit API calls
repos = search_github(query)
for repo in repos:
full_name = repo.get('full_name', '')
if full_name not in existing:
candidates.append({
'name': repo.get('name', ''),
'full_name': full_name,
'stars': repo.get('stargazers_count', 0),
'description': (repo.get('description', '') or '')[:100],
'url': repo.get('html_url', ''),
'language': repo.get('language', ''),
})
# Deduplicate
seen = set()
unique = []
for c in candidates:
if c['full_name'] not in seen:
seen.add(c['full_name'])
unique.append(c)
report['gaps'][cat] = {
'current_score': score,
'gap': gap_size,
'priority': priority,
'candidates': unique[:5],
}
if unique:
report['priority_wires'].extend([{
'category': cat,
'tool': c['full_name'],
'stars': c['stars'],
'reason': f"Fill {cat} gap ({score}/90 → target 70+)"
} for c in unique[:2]])
# Save
json.dump(report, open(GAP_DB, 'w'), indent=2, ensure_ascii=False)
return report
if __name__ == '__main__':
import urllib.parse
report = generate_gap_report()
print(f"Gaps: {report['total_gaps']}")
for cat, data in report['gaps'].items():
print(f" {data['priority'].upper():>8s} | {cat:<16s} {data['current_score']}/90 | {len(data['candidates'])} candidates")
print(f"\nPriority wires: {len(report['priority_wires'])}")
for pw in report['priority_wires'][:10]:
print(f"{pw['tool']} ({pw['stars']}★) for {pw['category']}")