#!/usr/bin/env python3 """ OSS Discovery Pipeline — AUTO-DISCOVER tools in /opt/ → trending → Qdrant → wiki This is THE missing link: scans /opt/, detects new tools, indexes them, reports gaps. Cron: */120 (every 2h) """ import os, json, hashlib, time, urllib.request from datetime import datetime SCAN_DIR = "/opt" TRENDING_FILE = "/var/www/html/api/oss-trending.json" CACHE_FILE = "/var/www/html/api/oss-cache.json" LOG_PREFIX = f"[{datetime.now().strftime('%H:%M:%S')}]" SKIP_DIRS = {"vault", "backups", "containerd", "guard", "isolated-pmta5", "loki-data", "loki-config", "n8n-data", "nocodb-data", "flowise-data", "node-v18.20.4-linux-x64", "node18-mmdc", "google", "mattermost", "authentik", "hf-spaces"} QDRANT_URL = "http://127.0.0.1:6333" OLLAMA_URL = "http://127.0.0.1:11435" def log(msg): print(f"{LOG_PREFIX} {msg}") def scan_opt(): """Scan /opt/ for tools with README, SKILL.md, py/js files""" tools = [] for d in sorted(os.listdir(SCAN_DIR)): path = f"{SCAN_DIR}/{d}" if not os.path.isdir(path) or d.startswith(".") or d in SKIP_DIRS: continue has_readme = os.path.exists(f"{path}/README.md") has_skill = os.path.exists(f"{path}/SKILL.md") has_py = any(f.endswith(".py") for f in os.listdir(path)[:50]) has_js = any(f.endswith(".js") or f == "package.json" for f in os.listdir(path)[:50]) has_docker = os.path.exists(f"{path}/docker-compose.yml") or os.path.exists(f"{path}/Dockerfile") file_count = sum(1 for _ in os.scandir(path)) # Read README first 200 chars for description desc = "" if has_readme: try: desc = open(f"{path}/README.md").read()[:200].replace("\n", " ") except: pass # Check if wired in fast.php wired = False try: fast = open("/var/www/html/api/weval-ia-fast.php").read() wired = d.lower()[:6] in fast.lower() except: pass tools.append({ "name": d, "path": path, "files": file_count, "has_readme": has_readme, "has_skill": has_skill, "has_python": has_py, "has_node": has_js, "has_docker": has_docker, "wired": wired, "description": desc[:150], "discovered": datetime.now().isoformat(), }) return tools def save_trending(tools): """Save trending to JSON""" tools.sort(key=lambda x: -x["files"]) with open(TRENDING_FILE, "w") as f: json.dump(tools, f, ensure_ascii=False, indent=2) log(f"Trending: {len(tools)} tools saved") def save_cache(tools): """Save full cache with metadata""" cache = {"report": {"total": len(tools), "wired": sum(1 for t in tools if t["wired"]), "not_wired": sum(1 for t in tools if not t["wired"]), "with_readme": sum(1 for t in tools if t["has_readme"]), "with_docker": sum(1 for t in tools if t["has_docker"]), "scan_time": datetime.now().isoformat()}, "tools": {t["name"]: t for t in tools}} with open(CACHE_FILE, "w") as f: json.dump(cache, f, ensure_ascii=False, indent=2) log(f"Cache: {len(tools)} tools, {cache['report']['wired']} wired, {cache['report']['not_wired']} NOT wired") def report_gaps(tools): """Report unwired tools that look important""" unwired = [t for t in tools if not t["wired"] and t["files"] > 5] if unwired: log(f"⚠️ {len(unwired)} tools NOT WIRED in chatbot:") for t in unwired[:15]: flags = [] if t["has_readme"]: flags.append("README") if t["has_docker"]: flags.append("DOCKER") if t["has_python"]: flags.append("PY") if t["has_skill"]: flags.append("SKILL") log(f" → {t['name']:35s} {t['files']:>4d} files [{','.join(flags)}]") return unwired def update_wiki(tools, unwired): """Update wiki with discovery results""" wiki_dir = "/opt/weval-l99/wiki/" os.makedirs(wiki_dir, exist_ok=True) entry = { "OSS-DISCOVERY-AUTO": { "date": datetime.now().isoformat(), "total_tools": len(tools), "wired": sum(1 for t in tools if t["wired"]), "not_wired": len(unwired), "top_unwired": [t["name"] for t in unwired[:10]], } } with open(f"{wiki_dir}/OSS-DISCOVERY-AUTO.json", "w") as f: json.dump(entry, f, ensure_ascii=False, indent=2) def main(): log("═══ OSS DISCOVERY PIPELINE ═══") tools = scan_opt() log(f"Scanned: {len(tools)} tools in /opt/") save_trending(tools) save_cache(tools) unwired = report_gaps(tools) update_wiki(tools, unwired) log("DONE ✅") if __name__ == "__main__": main()