Files
weval-l99/oss-master-pipeline.py.bak
2026-04-13 12:43:21 +02:00

258 lines
10 KiB
Python
Executable File

#!/usr/bin/env python3
"""
WEVAL OSS Master Pipeline v1.0
Full automation: OSS scan → Paperclip agents → Qdrant skills → DeerFlow → Benchmark → Enterprise-model
Source: /opt/* OSS directories
Targets: Paperclip (agents+skills), Qdrant, DeerFlow, enterprise-model.html, AI benchmark
Cron: daily 4:30 AM (before benchmark at 5:00)
Pipeline stages:
1. SCAN: Detect all OSS tools in /opt/
2. AGENTS: Create Paperclip agents for new tools
3. SKILLS: Sync skills to Qdrant via Ollama embedding
4. DEERFLOW: Update skill symlinks
5. BENCHMARK: Trigger AI benchmark comparison
6. ENTERPRISE: Sync Paperclip → enterprise-model.html
7. VALIDATE: Playwright test 0 JS errors
8. COMMIT: Git push
"""
import json, os, re, subprocess, sys, hashlib, time
import urllib.request
from datetime import datetime
from pathlib import Path
# === CONFIG ===
PAPERCLIP_URL = "http://127.0.0.1:3100"
COMPANY_ID = "dd12987b-c774-45e7-95fd-d34003f91650"
QDRANT_URL = "http://127.0.0.1:6333"
OLLAMA_URL = "http://127.0.0.1:11435"
BENCHMARK_URL = "https://weval-consulting.com/api/ai-benchmark.php?k=WEVADS2026&action=benchmark"
EM_FILE = "/var/www/html/enterprise-model.html"
DEERFLOW_SKILLS = "/opt/deer-flow/skills"
STATE_FILE = "/opt/weval-l99/oss-pipeline-state.json"
LOG_FILE = "/tmp/oss-pipeline.log"
# OSS tool directories to scan
OSS_DIRS = {
"aios": {"path": "/opt/aios", "role": "orchestrator", "desc": "OS for AI agents"},
"skillsmith": {"path": "/opt/skillsmith", "role": "dev", "desc": "Skill generator"},
"goose": {"path": "/opt/goose", "role": "dev", "desc": "AI coding agent"},
"activepieces": {"path": "/opt/activepieces", "role": "integration", "desc": "Workflow automation"},
"supermemory": {"path": "/opt/supermemory", "role": "data", "desc": "Memory layer"},
"dify": {"path": "/opt/dify", "role": "orchestrator", "desc": "LLM app platform"},
"prometheus": {"path": "/opt/prometheus", "role": "monitoring", "desc": "Metrics monitoring"},
"deer-flow": {"path": "/opt/deer-flow", "role": "research", "desc": "LangGraph research"},
"langflow": {"path": "/opt/langflow", "role": "orchestrator", "desc": "Visual LLM flows"},
"oh-my-claudecode": {"path": "/opt/oh-my-claudecode", "role": "dev", "desc": "19 Claude agents"},
"openclaw": {"path": "/opt/openclaw", "role": "dev", "desc": "Ollama skills"},
"browser-use": {"path": "/opt/browser-use", "role": "scraping", "desc": "Browser automation"},
"mastra": {"path": "/opt/mastra", "role": "orchestrator", "desc": "AI framework"},
"evomaster": {"path": "/opt/evomaster", "role": "testing", "desc": "API test generator"},
"strix": {"path": "/opt/strix", "role": "security", "desc": "Nuclei scanner"},
"aegis": {"path": "/opt/aegis", "role": "security", "desc": "Security agent"},
"crowdsec": {"path": "/opt/crowdsec", "role": "security", "desc": "Crowd-sourced security"},
"paperclip-weval": {"path": "/opt/paperclip-weval", "role": "orchestrator", "desc": "CEO agent"},
"flowise": {"path": "/opt/flowise", "role": "orchestrator", "desc": "Chatflow builder"},
}
ROLE_TO_ROOM = {
'orchestrator': 'ops', 'general': 'sal', 'dev': 'dev', 'qa': 'qa',
'security': 'sec', 'devops': 'srv', 'data': 'ai', 'research': 'con',
'marketing': 'sal', 'scraping': 'cron', 'monitoring': 'ops',
'testing': 'qa', 'integration': 'intg', 'wevia': 'wevia',
}
def log(msg):
line = f"[{datetime.now().strftime('%H:%M:%S')}] {msg}"
print(line)
with open(LOG_FILE, "a") as f:
f.write(line + "\n")
def api_get(url, timeout=10):
try:
req = urllib.request.Request(url, headers={"Accept": "application/json"})
with urllib.request.urlopen(req, timeout=timeout) as r:
return json.loads(r.read())
except:
return None
def api_post(url, data, timeout=10):
try:
body = json.dumps(data).encode()
req = urllib.request.Request(url, data=body, headers={"Content-Type": "application/json"}, method="POST")
with urllib.request.urlopen(req, timeout=timeout) as r:
return json.loads(r.read())
except Exception as e:
log(f" API POST error: {e}")
return None
def load_state():
try:
return json.load(open(STATE_FILE))
except:
return {"known_tools": [], "last_run": None, "runs": 0}
def save_state(state):
with open(STATE_FILE, "w") as f:
json.dump(state, f, indent=2, default=str)
# === STAGE 1: SCAN OSS ===
def stage_scan():
log("=== STAGE 1: SCAN OSS TOOLS ===")
found = {}
for name, cfg in OSS_DIRS.items():
path = cfg["path"]
if os.path.isdir(path):
files = sum(1 for _ in Path(path).rglob("*") if _.is_file() and _.suffix in ('.py', '.js', '.md', '.php', '.ts'))
size = subprocess.check_output(["du", "-sh", path]).decode().split()[0]
found[name] = {"path": path, "files": files, "size": size, "role": cfg["role"], "desc": cfg["desc"]}
log(f"Found {len(found)} OSS tools installed")
return found
# === STAGE 2: CREATE PAPERCLIP AGENTS ===
def stage_agents():
log("=== STAGE 2: CREATE PAPERCLIP AGENTS (SQL) ===")
import subprocess as sp2
existing = api_get(f"{PAPERCLIP_URL}/api/companies/{COMPANY_ID}/agents")
if not existing: return False
agents = existing if isinstance(existing, list) else existing.get("data", [])
names = {a.get("name","").lower() for a in agents}
log(f" Existing Paperclip agents: {len(names)}")
created = 0
for tool, info in scan_oss().items():
agent_name = tool.replace("-","").replace("_","").title()[:30]
if agent_name.lower() in names: continue
sql = f"INSERT INTO agents (id,company_id,name,role,status,capabilities,adapter_type,adapter_config,budget_monthly_cents,spent_monthly_cents,created_at,updated_at) VALUES (gen_random_uuid(),'dd12987b-c774-45e7-95fd-d34003f91650'::uuid,'{agent_name}','{info.get(chr(114)+chr(111)+chr(108)+chr(101),chr(100)+chr(101)+chr(118))}','active','[OSS] {info.get(chr(100)+chr(101)+chr(115)+chr(99),chr(63))}','process','{{}}'::jsonb,0,0,NOW(),NOW())"
r2 = sp2.run(["sudo","-u","postgres","psql","-d","paperclip","-c",sql], capture_output=True, text=True, timeout=5)
if r2.returncode == 0:
created += 1
log(f" Created: {agent_name}")
else:
log(f" SQL error: {r2.stderr[:80]}")
log(f" Created {created} new Paperclip agents")
return True
def stage_skills():
log("=== STAGE 3: SKILL SYNC (Qdrant) ===")
result = subprocess.run(["python3", "/opt/weval-l99/qdrant-skill-sync.py"],
capture_output=True, text=True, timeout=120)
lines = (result.stdout + result.stderr).strip().split("\n")
for l in lines[-5:]:
log(f" {l}")
return "error" not in result.stderr.lower()
# === STAGE 4: DEERFLOW SYMLINKS ===
def stage_deerflow(found):
log("=== STAGE 4: DEERFLOW SKILL LINKS ===")
linked = 0
for name, info in found.items():
skill_dir = os.path.join(info["path"], "skills")
if not os.path.isdir(skill_dir):
skill_dir = info["path"]
target = os.path.join(DEERFLOW_SKILLS, name)
if not os.path.exists(target) and os.path.isdir(skill_dir):
try:
os.symlink(skill_dir, target)
log(f" Linked: {name} -> {target}")
linked += 1
except:
pass
log(f"DeerFlow: {linked} new links")
return linked
# === STAGE 5: AI BENCHMARK ===
def stage_benchmark():
log("=== STAGE 5: AI BENCHMARK ===")
try:
req = urllib.request.Request(BENCHMARK_URL, timeout=60)
with urllib.request.urlopen(req) as r:
d = json.loads(r.read())
log(f" Benchmark: {d.get('status', 'unknown')}")
return True
except Exception as e:
log(f" Benchmark trigger: {e}")
return False
# === STAGE 6: ENTERPRISE-MODEL SYNC ===
def stage_enterprise():
log("=== STAGE 6: ENTERPRISE-MODEL SYNC ===")
result = subprocess.run(["python3", "/opt/weval-l99/enterprise-model-sync.py"],
capture_output=True, text=True, timeout=60)
for l in result.stdout.strip().split("\n"):
log(f" {l}")
return "DONE" in result.stdout or "Already synced" in result.stdout
# === STAGE 7: PLAYWRIGHT VALIDATE ===
def stage_validate():
log("=== STAGE 7: PLAYWRIGHT VALIDATION ===")
test_script = "/tmp/test-em.py"
if not os.path.exists(test_script):
log(" Playwright test not found, skipping")
return True
result = subprocess.run(["python3", test_script], capture_output=True, text=True, timeout=30)
output = result.stdout.strip()
for l in output.split("\n"):
log(f" {l}")
return "JS errors: 0" in output
# === STAGE 8: GIT COMMIT ===
def stage_git():
log("=== STAGE 8: GIT COMMIT ===")
os.system("cd /var/www/html && git add enterprise-model.html && git diff --cached --quiet || git commit -m 'PIPELINE: OSS→Paperclip→Enterprise auto-sync' && git push github main 2>/dev/null")
log(" Git done")
# === MAIN ===
def main():
log("=" * 60)
log("WEVAL OSS Master Pipeline v1.0")
log("=" * 60)
state = load_state()
state["runs"] = state.get("runs", 0) + 1
state["last_run"] = datetime.now().isoformat()
results = {}
# Stage 1: Scan
found = stage_scan()
results["scan"] = len(found)
# Stage 2: Paperclip agents
created = stage_agents(found, state)
results["agents_created"] = created
# Stage 3: Qdrant skills
results["skills"] = stage_skills()
# Stage 4: DeerFlow
results["deerflow_links"] = stage_deerflow(found)
# Stage 5: Benchmark
results["benchmark"] = stage_benchmark()
# Stage 6: Enterprise sync
results["enterprise"] = stage_enterprise()
# Stage 7: Validate
results["validated"] = stage_validate()
# Stage 8: Git
stage_git()
state["last_results"] = results
save_state(state)
log("=" * 60)
log(f"PIPELINE COMPLETE: {json.dumps(results)}")
log("=" * 60)
# Telegram alert
try:
msg = f"🔄 OSS Pipeline: {len(found)} tools, +{created} agents, validate={'' if results.get('validated') else ''}"
urllib.request.urlopen(f"https://api.telegram.org/bot8544624912:AAHtUBJwv-UccFYMBG_rMj9l3K3mG6x3x7c/sendMessage?chat_id=7605775322&text={urllib.parse.quote(msg)}", timeout=5)
except:
pass
if __name__ == "__main__":
import urllib.parse
main()