134 lines
5.3 KiB
Python
Executable File
134 lines
5.3 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
WEVAL AI Gap-Driven Discovery Engine
|
|
Connects AI Benchmark gaps → OSS Discovery search → Auto-wire solutions
|
|
Cron: runs after benchmark, feeds gaps to discovery
|
|
"""
|
|
import json, time, urllib.request, ssl
|
|
|
|
BENCHMARK_DB = '/opt/wevads/vault/ai-benchmark.json'
|
|
OSS_DB = '/opt/wevads/vault/oss-discovery.json'
|
|
GAP_DB = '/opt/wevads/vault/ai-gap-discovery.json'
|
|
|
|
# Map benchmark categories → GitHub search queries
|
|
CATEGORY_TO_SEARCH = {
|
|
'pdf_report': ['pdf generator python','report generator ai','fpdf2 python','weasyprint','markdown to pdf'],
|
|
'proposal': ['proposal generator ai','business proposal template','document generator llm'],
|
|
'code': ['code generation llm','ai coding assistant','code completion open source'],
|
|
'data_analysis': ['data analysis ai','pandas ai','automated eda python','dataprep'],
|
|
'pharma': ['pharmacovigilance ai','drug safety ai','medical nlp','clinical nlp'],
|
|
'strategy': ['business strategy ai','consulting ai','digital transformation ai'],
|
|
'legal_gdpr': ['gdpr compliance ai','privacy ai','data protection tool'],
|
|
'cdc_spec': ['requirements specification ai','user story generator','spec writer ai'],
|
|
'schema_db': ['database schema generator','sql generator ai','prisma schema ai'],
|
|
'devops': ['docker compose generator','infrastructure as code ai','devops ai'],
|
|
'frontend': ['frontend generator ai','html css generator','landing page builder ai'],
|
|
'api_design': ['api design tool','openapi generator','rest api builder'],
|
|
'ai_ethics': ['ai ethics audit','eu ai act compliance','ai governance tool'],
|
|
'security': ['security scanner','vulnerability assessment','owasp tools'],
|
|
'erp': ['erp consulting ai','sap tools open source','erp comparison tool'],
|
|
}
|
|
|
|
def load_gaps():
|
|
"""Load benchmark gaps (categories < 70/90)"""
|
|
try:
|
|
db = json.load(open(BENCHMARK_DB))
|
|
composite = db.get('composite', {})
|
|
gaps = {cat: score for cat, score in composite.items() if score < 70}
|
|
return gaps
|
|
except:
|
|
return {}
|
|
|
|
def load_existing_tools():
|
|
"""Load already wired tools"""
|
|
try:
|
|
db = json.load(open(OSS_DB))
|
|
return set(db.get('tools', {}).keys())
|
|
except:
|
|
return set()
|
|
|
|
def search_github(query, limit=3):
|
|
"""Search GitHub for tools matching gap"""
|
|
try:
|
|
url = f"https://api.github.com/search/repositories?q={urllib.parse.quote(query)}+language:python+stars:>100&sort=stars&per_page={limit}"
|
|
req = urllib.request.Request(url, headers={'Accept': 'application/vnd.github.v3+json'})
|
|
ctx = ssl.create_default_context()
|
|
ctx.check_hostname = False
|
|
ctx.verify_mode = ssl.CERT_NONE
|
|
resp = urllib.request.urlopen(req, timeout=10, context=ctx)
|
|
data = json.loads(resp.read())
|
|
return data.get('items', [])
|
|
except:
|
|
return []
|
|
|
|
def generate_gap_report():
|
|
"""Main: analyze gaps, search solutions, create report"""
|
|
gaps = load_gaps()
|
|
existing = load_existing_tools()
|
|
|
|
report = {
|
|
'timestamp': time.strftime('%Y-%m-%dT%H:%M:%S'),
|
|
'total_gaps': len(gaps),
|
|
'gaps': {},
|
|
'recommendations': [],
|
|
'priority_wires': [],
|
|
}
|
|
|
|
for cat, score in sorted(gaps.items(), key=lambda x: x[1]):
|
|
gap_size = 90 - score
|
|
priority = 'critical' if score < 50 else ('high' if score < 60 else 'medium')
|
|
|
|
searches = CATEGORY_TO_SEARCH.get(cat, [f'{cat} ai tool'])
|
|
candidates = []
|
|
|
|
for query in searches[:2]: # Limit API calls
|
|
repos = search_github(query)
|
|
for repo in repos:
|
|
full_name = repo.get('full_name', '')
|
|
if full_name not in existing:
|
|
candidates.append({
|
|
'name': repo.get('name', ''),
|
|
'full_name': full_name,
|
|
'stars': repo.get('stargazers_count', 0),
|
|
'description': (repo.get('description', '') or '')[:100],
|
|
'url': repo.get('html_url', ''),
|
|
'language': repo.get('language', ''),
|
|
})
|
|
|
|
# Deduplicate
|
|
seen = set()
|
|
unique = []
|
|
for c in candidates:
|
|
if c['full_name'] not in seen:
|
|
seen.add(c['full_name'])
|
|
unique.append(c)
|
|
|
|
report['gaps'][cat] = {
|
|
'current_score': score,
|
|
'gap': gap_size,
|
|
'priority': priority,
|
|
'candidates': unique[:5],
|
|
}
|
|
|
|
if unique:
|
|
report['priority_wires'].extend([{
|
|
'category': cat,
|
|
'tool': c['full_name'],
|
|
'stars': c['stars'],
|
|
'reason': f"Fill {cat} gap ({score}/90 → target 70+)"
|
|
} for c in unique[:2]])
|
|
|
|
# Save
|
|
json.dump(report, open(GAP_DB, 'w'), indent=2, ensure_ascii=False)
|
|
return report
|
|
|
|
if __name__ == '__main__':
|
|
import urllib.parse
|
|
report = generate_gap_report()
|
|
print(f"Gaps: {report['total_gaps']}")
|
|
for cat, data in report['gaps'].items():
|
|
print(f" {data['priority'].upper():>8s} | {cat:<16s} {data['current_score']}/90 | {len(data['candidates'])} candidates")
|
|
print(f"\nPriority wires: {len(report['priority_wires'])}")
|
|
for pw in report['priority_wires'][:10]:
|
|
print(f" → {pw['tool']} ({pw['stars']}★) for {pw['category']}")
|