128 lines
4.9 KiB
Python
128 lines
4.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
OSS Discovery Pipeline — AUTO-DISCOVER tools in /opt/ → trending → Qdrant → wiki
|
|
This is THE missing link: scans /opt/, detects new tools, indexes them, reports gaps.
|
|
Cron: */120 (every 2h)
|
|
"""
|
|
import os, json, hashlib, time, urllib.request
|
|
from datetime import datetime
|
|
|
|
SCAN_DIR = "/opt"
|
|
TRENDING_FILE = "/var/www/html/api/oss-trending.json"
|
|
CACHE_FILE = "/var/www/html/api/oss-cache.json"
|
|
LOG_PREFIX = f"[{datetime.now().strftime('%H:%M:%S')}]"
|
|
SKIP_DIRS = {"vault", "backups", "containerd", "guard", "isolated-pmta5", "loki-data", "loki-config", "n8n-data", "nocodb-data", "flowise-data", "node-v18.20.4-linux-x64", "node18-mmdc", "google", "mattermost", "authentik", "hf-spaces", "gitea", "vaultwarden", "n8n-docker", "pmta-versions", "rnd-swarm"}
|
|
QDRANT_URL = "http://127.0.0.1:6333"
|
|
OLLAMA_URL = "http://127.0.0.1:11434"
|
|
|
|
def log(msg):
|
|
print(f"{LOG_PREFIX} {msg}")
|
|
|
|
def scan_opt():
|
|
"""Scan /opt/ for tools with README, SKILL.md, py/js files"""
|
|
tools = []
|
|
for d in sorted(os.listdir(SCAN_DIR)):
|
|
path = f"{SCAN_DIR}/{d}"
|
|
if not os.path.isdir(path) or d.startswith(".") or d in SKIP_DIRS:
|
|
continue
|
|
|
|
has_readme = os.path.exists(f"{path}/README.md")
|
|
has_skill = os.path.exists(f"{path}/SKILL.md")
|
|
has_py = any(f.endswith(".py") for f in os.listdir(path)[:50])
|
|
has_js = any(f.endswith(".js") or f == "package.json" for f in os.listdir(path)[:50])
|
|
has_docker = os.path.exists(f"{path}/docker-compose.yml") or os.path.exists(f"{path}/Dockerfile")
|
|
|
|
file_count = sum(1 for _ in os.scandir(path))
|
|
|
|
# Read README first 200 chars for description
|
|
desc = ""
|
|
if has_readme:
|
|
try:
|
|
desc = open(f"{path}/README.md").read()[:200].replace("\n", " ")
|
|
except: pass
|
|
|
|
# Check if wired in fast.php
|
|
wired = False
|
|
try:
|
|
fast = open("/var/www/html/api/weval-ia-fast.php").read()
|
|
wired = d.lower()[:6] in fast.lower()
|
|
except: pass
|
|
|
|
tools.append({
|
|
"name": d,
|
|
"path": path,
|
|
"files": file_count,
|
|
"has_readme": has_readme,
|
|
"has_skill": has_skill,
|
|
"has_python": has_py,
|
|
"has_node": has_js,
|
|
"has_docker": has_docker,
|
|
"wired": wired,
|
|
"description": desc[:150],
|
|
"discovered": datetime.now().isoformat(),
|
|
})
|
|
|
|
return tools
|
|
|
|
def save_trending(tools):
|
|
"""Save trending to JSON"""
|
|
tools.sort(key=lambda x: -x["files"])
|
|
with open(TRENDING_FILE, "w") as f:
|
|
json.dump(tools, f, ensure_ascii=False, indent=2)
|
|
log(f"Trending: {len(tools)} tools saved")
|
|
|
|
def save_cache(tools):
|
|
"""Save full cache with metadata"""
|
|
cache = {"report": {"total": len(tools), "wired": sum(1 for t in tools if t["wired"]),
|
|
"not_wired": sum(1 for t in tools if not t["wired"]),
|
|
"with_readme": sum(1 for t in tools if t["has_readme"]),
|
|
"with_docker": sum(1 for t in tools if t["has_docker"]),
|
|
"scan_time": datetime.now().isoformat()},
|
|
"tools": {t["name"]: t for t in tools}}
|
|
with open(CACHE_FILE, "w") as f:
|
|
json.dump(cache, f, ensure_ascii=False, indent=2)
|
|
log(f"Cache: {len(tools)} tools, {cache['report']['wired']} wired, {cache['report']['not_wired']} NOT wired")
|
|
|
|
def report_gaps(tools):
|
|
"""Report unwired tools that look important"""
|
|
unwired = [t for t in tools if not t["wired"] and t["files"] > 5]
|
|
if unwired:
|
|
log(f"⚠️ {len(unwired)} tools NOT WIRED in chatbot:")
|
|
for t in unwired[:15]:
|
|
flags = []
|
|
if t["has_readme"]: flags.append("README")
|
|
if t["has_docker"]: flags.append("DOCKER")
|
|
if t["has_python"]: flags.append("PY")
|
|
if t["has_skill"]: flags.append("SKILL")
|
|
log(f" → {t['name']:35s} {t['files']:>4d} files [{','.join(flags)}]")
|
|
return unwired
|
|
|
|
def update_wiki(tools, unwired):
|
|
"""Update wiki with discovery results"""
|
|
wiki_dir = "/opt/weval-l99/wiki/"
|
|
os.makedirs(wiki_dir, exist_ok=True)
|
|
entry = {
|
|
"OSS-DISCOVERY-AUTO": {
|
|
"date": datetime.now().isoformat(),
|
|
"total_tools": len(tools),
|
|
"wired": sum(1 for t in tools if t["wired"]),
|
|
"not_wired": len(unwired),
|
|
"top_unwired": [t["name"] for t in unwired[:10]],
|
|
}
|
|
}
|
|
with open(f"{wiki_dir}/OSS-DISCOVERY-AUTO.json", "w") as f:
|
|
json.dump(entry, f, ensure_ascii=False, indent=2)
|
|
|
|
def main():
|
|
log("═══ OSS DISCOVERY PIPELINE ═══")
|
|
tools = scan_opt()
|
|
log(f"Scanned: {len(tools)} tools in /opt/")
|
|
save_trending(tools)
|
|
save_cache(tools)
|
|
unwired = report_gaps(tools)
|
|
update_wiki(tools, unwired)
|
|
log("DONE ✅")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|