Files
weval-l99/oss-discovery.py.bak-skip5-143738
2026-04-16 16:39:53 +02:00

128 lines
4.8 KiB
Python

#!/usr/bin/env python3
"""
OSS Discovery Pipeline — AUTO-DISCOVER tools in /opt/ → trending → Qdrant → wiki
This is THE missing link: scans /opt/, detects new tools, indexes them, reports gaps.
Cron: */120 (every 2h)
"""
import os, json, hashlib, time, urllib.request
from datetime import datetime
SCAN_DIR = "/opt"
TRENDING_FILE = "/var/www/html/api/oss-trending.json"
CACHE_FILE = "/var/www/html/api/oss-cache.json"
LOG_PREFIX = f"[{datetime.now().strftime('%H:%M:%S')}]"
SKIP_DIRS = {"vault", "backups", "containerd", "guard", "isolated-pmta5", "loki-data", "loki-config", "n8n-data", "nocodb-data", "flowise-data", "node-v18.20.4-linux-x64", "node18-mmdc", "google", "mattermost", "authentik", "hf-spaces"}
QDRANT_URL = "http://127.0.0.1:6333"
OLLAMA_URL = "http://127.0.0.1:11435"
def log(msg):
print(f"{LOG_PREFIX} {msg}")
def scan_opt():
"""Scan /opt/ for tools with README, SKILL.md, py/js files"""
tools = []
for d in sorted(os.listdir(SCAN_DIR)):
path = f"{SCAN_DIR}/{d}"
if not os.path.isdir(path) or d.startswith(".") or d in SKIP_DIRS:
continue
has_readme = os.path.exists(f"{path}/README.md")
has_skill = os.path.exists(f"{path}/SKILL.md")
has_py = any(f.endswith(".py") for f in os.listdir(path)[:50])
has_js = any(f.endswith(".js") or f == "package.json" for f in os.listdir(path)[:50])
has_docker = os.path.exists(f"{path}/docker-compose.yml") or os.path.exists(f"{path}/Dockerfile")
file_count = sum(1 for _ in os.scandir(path))
# Read README first 200 chars for description
desc = ""
if has_readme:
try:
desc = open(f"{path}/README.md").read()[:200].replace("\n", " ")
except: pass
# Check if wired in fast.php
wired = False
try:
fast = open("/var/www/html/api/weval-ia-fast.php").read()
wired = d.lower()[:6] in fast.lower()
except: pass
tools.append({
"name": d,
"path": path,
"files": file_count,
"has_readme": has_readme,
"has_skill": has_skill,
"has_python": has_py,
"has_node": has_js,
"has_docker": has_docker,
"wired": wired,
"description": desc[:150],
"discovered": datetime.now().isoformat(),
})
return tools
def save_trending(tools):
"""Save trending to JSON"""
tools.sort(key=lambda x: -x["files"])
with open(TRENDING_FILE, "w") as f:
json.dump(tools, f, ensure_ascii=False, indent=2)
log(f"Trending: {len(tools)} tools saved")
def save_cache(tools):
"""Save full cache with metadata"""
cache = {"report": {"total": len(tools), "wired": sum(1 for t in tools if t["wired"]),
"not_wired": sum(1 for t in tools if not t["wired"]),
"with_readme": sum(1 for t in tools if t["has_readme"]),
"with_docker": sum(1 for t in tools if t["has_docker"]),
"scan_time": datetime.now().isoformat()},
"tools": {t["name"]: t for t in tools}}
with open(CACHE_FILE, "w") as f:
json.dump(cache, f, ensure_ascii=False, indent=2)
log(f"Cache: {len(tools)} tools, {cache['report']['wired']} wired, {cache['report']['not_wired']} NOT wired")
def report_gaps(tools):
"""Report unwired tools that look important"""
unwired = [t for t in tools if not t["wired"] and t["files"] > 5]
if unwired:
log(f"⚠️ {len(unwired)} tools NOT WIRED in chatbot:")
for t in unwired[:15]:
flags = []
if t["has_readme"]: flags.append("README")
if t["has_docker"]: flags.append("DOCKER")
if t["has_python"]: flags.append("PY")
if t["has_skill"]: flags.append("SKILL")
log(f"{t['name']:35s} {t['files']:>4d} files [{','.join(flags)}]")
return unwired
def update_wiki(tools, unwired):
"""Update wiki with discovery results"""
wiki_dir = "/opt/weval-l99/wiki/"
os.makedirs(wiki_dir, exist_ok=True)
entry = {
"OSS-DISCOVERY-AUTO": {
"date": datetime.now().isoformat(),
"total_tools": len(tools),
"wired": sum(1 for t in tools if t["wired"]),
"not_wired": len(unwired),
"top_unwired": [t["name"] for t in unwired[:10]],
}
}
with open(f"{wiki_dir}/OSS-DISCOVERY-AUTO.json", "w") as f:
json.dump(entry, f, ensure_ascii=False, indent=2)
def main():
log("═══ OSS DISCOVERY PIPELINE ═══")
tools = scan_opt()
log(f"Scanned: {len(tools)} tools in /opt/")
save_trending(tools)
save_cache(tools)
unwired = report_gaps(tools)
update_wiki(tools, unwired)
log("DONE ✅")
if __name__ == "__main__":
main()