weval-l99/oss-master-pipeline.py.bak

#!/usr/bin/env python3
"""
WEVAL OSS Master Pipeline v1.0
Full automation: OSS scan → Paperclip agents → Qdrant skills → DeerFlow → Benchmark → Enterprise-model

Source: /opt/* OSS directories
Targets: Paperclip (agents+skills), Qdrant, DeerFlow, enterprise-model.html, AI benchmark
Cron: daily 4:30 AM (before benchmark at 5:00)

Pipeline stages:
  1. SCAN: Detect all OSS tools in /opt/
  2. AGENTS: Create Paperclip agents for new tools
  3. SKILLS: Sync skills to Qdrant via Ollama embedding
  4. DEERFLOW: Update skill symlinks
  5. BENCHMARK: Trigger AI benchmark comparison
  6. ENTERPRISE: Sync Paperclip → enterprise-model.html
  7. VALIDATE: Playwright test 0 JS errors
  8. COMMIT: Git push
"""
import json, os, re, subprocess, sys, hashlib, time
import urllib.request
from datetime import datetime
from pathlib import Path

# === CONFIG ===
PAPERCLIP_URL = "http://127.0.0.1:3100"
COMPANY_ID = "dd12987b-c774-45e7-95fd-d34003f91650"
QDRANT_URL = "http://127.0.0.1:6333"
OLLAMA_URL = "http://127.0.0.1:11435"
BENCHMARK_URL = "https://weval-consulting.com/api/ai-benchmark.php?k=WEVADS2026&action=benchmark"
EM_FILE = "/var/www/html/enterprise-model.html"
DEERFLOW_SKILLS = "/opt/deer-flow/skills"
STATE_FILE = "/opt/weval-l99/oss-pipeline-state.json"
LOG_FILE = "/tmp/oss-pipeline.log"

# OSS tool directories to scan
OSS_DIRS = {
    "aios": {"path": "/opt/aios", "role": "orchestrator", "desc": "OS for AI agents"},
    "skillsmith": {"path": "/opt/skillsmith", "role": "dev", "desc": "Skill generator"},
    "goose": {"path": "/opt/goose", "role": "dev", "desc": "AI coding agent"},
    "activepieces": {"path": "/opt/activepieces", "role": "integration", "desc": "Workflow automation"},
    "supermemory": {"path": "/opt/supermemory", "role": "data", "desc": "Memory layer"},
    "dify": {"path": "/opt/dify", "role": "orchestrator", "desc": "LLM app platform"},
    "prometheus": {"path": "/opt/prometheus", "role": "monitoring", "desc": "Metrics monitoring"},
    "deer-flow": {"path": "/opt/deer-flow", "role": "research", "desc": "LangGraph research"},
    "langflow": {"path": "/opt/langflow", "role": "orchestrator", "desc": "Visual LLM flows"},
    "oh-my-claudecode": {"path": "/opt/oh-my-claudecode", "role": "dev", "desc": "19 Claude agents"},
    "openclaw": {"path": "/opt/openclaw", "role": "dev", "desc": "Ollama skills"},
    "browser-use": {"path": "/opt/browser-use", "role": "scraping", "desc": "Browser automation"},
    "mastra": {"path": "/opt/mastra", "role": "orchestrator", "desc": "AI framework"},
    "evomaster": {"path": "/opt/evomaster", "role": "testing", "desc": "API test generator"},
    "strix": {"path": "/opt/strix", "role": "security", "desc": "Nuclei scanner"},
    "aegis": {"path": "/opt/aegis", "role": "security", "desc": "Security agent"},
    "crowdsec": {"path": "/opt/crowdsec", "role": "security", "desc": "Crowd-sourced security"},
    "paperclip-weval": {"path": "/opt/paperclip-weval", "role": "orchestrator", "desc": "CEO agent"},
    "flowise": {"path": "/opt/flowise", "role": "orchestrator", "desc": "Chatflow builder"},
}

ROLE_TO_ROOM = {
    'orchestrator': 'ops', 'general': 'sal', 'dev': 'dev', 'qa': 'qa',
    'security': 'sec', 'devops': 'srv', 'data': 'ai', 'research': 'con',
    'marketing': 'sal', 'scraping': 'cron', 'monitoring': 'ops',
    'testing': 'qa', 'integration': 'intg', 'wevia': 'wevia',
}

def log(msg):
    line = f"[{datetime.now().strftime('%H:%M:%S')}] {msg}"
    print(line)
    with open(LOG_FILE, "a") as f:
        f.write(line + "\n")

def api_get(url, timeout=10):
    try:
        req = urllib.request.Request(url, headers={"Accept": "application/json"})
        with urllib.request.urlopen(req, timeout=timeout) as r:
            return json.loads(r.read())
    except:
        return None

def api_post(url, data, timeout=10):
    try:
        body = json.dumps(data).encode()
        req = urllib.request.Request(url, data=body, headers={"Content-Type": "application/json"}, method="POST")
        with urllib.request.urlopen(req, timeout=timeout) as r:
            return json.loads(r.read())
    except Exception as e:
        log(f"  API POST error: {e}")
        return None

def load_state():
    try:
        return json.load(open(STATE_FILE))
    except:
        return {"known_tools": [], "last_run": None, "runs": 0}

def save_state(state):
    with open(STATE_FILE, "w") as f:
        json.dump(state, f, indent=2, default=str)

# === STAGE 1: SCAN OSS ===
def stage_scan():
    log("=== STAGE 1: SCAN OSS TOOLS ===")
    found = {}
    for name, cfg in OSS_DIRS.items():
        path = cfg["path"]
        if os.path.isdir(path):
            files = sum(1 for _ in Path(path).rglob("*") if _.is_file() and _.suffix in ('.py', '.js', '.md', '.php', '.ts'))
            size = subprocess.check_output(["du", "-sh", path]).decode().split()[0]
            found[name] = {"path": path, "files": files, "size": size, "role": cfg["role"], "desc": cfg["desc"]}
    log(f"Found {len(found)} OSS tools installed")
    return found

# === STAGE 2: CREATE PAPERCLIP AGENTS ===
def stage_agents():
    log("=== STAGE 2: CREATE PAPERCLIP AGENTS (SQL) ===")
    import subprocess as sp2
    existing = api_get(f"{PAPERCLIP_URL}/api/companies/{COMPANY_ID}/agents")
    if not existing: return False
    agents = existing if isinstance(existing, list) else existing.get("data", [])
    names = {a.get("name","").lower() for a in agents}
    log(f"  Existing Paperclip agents: {len(names)}")
    created = 0
    for tool, info in scan_oss().items():
        agent_name = tool.replace("-","").replace("_","").title()[:30]
        if agent_name.lower() in names: continue
        sql = f"INSERT INTO agents (id,company_id,name,role,status,capabilities,adapter_type,adapter_config,budget_monthly_cents,spent_monthly_cents,created_at,updated_at) VALUES (gen_random_uuid(),'dd12987b-c774-45e7-95fd-d34003f91650'::uuid,'{agent_name}','{info.get(chr(114)+chr(111)+chr(108)+chr(101),chr(100)+chr(101)+chr(118))}','active','[OSS] {info.get(chr(100)+chr(101)+chr(115)+chr(99),chr(63))}','process','{{}}'::jsonb,0,0,NOW(),NOW())"
        r2 = sp2.run(["sudo","-u","postgres","psql","-d","paperclip","-c",sql], capture_output=True, text=True, timeout=5)
        if r2.returncode == 0:
            created += 1
            log(f"  Created: {agent_name}")
        else:
            log(f"  SQL error: {r2.stderr[:80]}")
    log(f"  Created {created} new Paperclip agents")
    return True

def stage_skills():
    log("=== STAGE 3: SKILL SYNC (Qdrant) ===")
    result = subprocess.run(["python3", "/opt/weval-l99/qdrant-skill-sync.py"],
                          capture_output=True, text=True, timeout=120)
    lines = (result.stdout + result.stderr).strip().split("\n")
    for l in lines[-5:]:
        log(f"  {l}")
    return "error" not in result.stderr.lower()

# === STAGE 4: DEERFLOW SYMLINKS ===
def stage_deerflow(found):
    log("=== STAGE 4: DEERFLOW SKILL LINKS ===")
    linked = 0
    for name, info in found.items():
        skill_dir = os.path.join(info["path"], "skills")
        if not os.path.isdir(skill_dir):
            skill_dir = info["path"]
        target = os.path.join(DEERFLOW_SKILLS, name)
        if not os.path.exists(target) and os.path.isdir(skill_dir):
            try:
                os.symlink(skill_dir, target)
                log(f"  Linked: {name} -> {target}")
                linked += 1
            except:
                pass
    log(f"DeerFlow: {linked} new links")
    return linked

# === STAGE 5: AI BENCHMARK ===
def stage_benchmark():
    log("=== STAGE 5: AI BENCHMARK ===")
    try:
        req = urllib.request.Request(BENCHMARK_URL, timeout=60)
        with urllib.request.urlopen(req) as r:
            d = json.loads(r.read())
            log(f"  Benchmark: {d.get('status', 'unknown')}")
            return True
    except Exception as e:
        log(f"  Benchmark trigger: {e}")
        return False

# === STAGE 6: ENTERPRISE-MODEL SYNC ===
def stage_enterprise():
    log("=== STAGE 6: ENTERPRISE-MODEL SYNC ===")
    result = subprocess.run(["python3", "/opt/weval-l99/enterprise-model-sync.py"],
                          capture_output=True, text=True, timeout=60)
    for l in result.stdout.strip().split("\n"):
        log(f"  {l}")
    return "DONE" in result.stdout or "Already synced" in result.stdout

# === STAGE 7: PLAYWRIGHT VALIDATE ===
def stage_validate():
    log("=== STAGE 7: PLAYWRIGHT VALIDATION ===")
    test_script = "/tmp/test-em.py"
    if not os.path.exists(test_script):
        log("  Playwright test not found, skipping")
        return True
    result = subprocess.run(["python3", test_script], capture_output=True, text=True, timeout=30)
    output = result.stdout.strip()
    for l in output.split("\n"):
        log(f"  {l}")
    return "JS errors: 0" in output

# === STAGE 8: GIT COMMIT ===
def stage_git():
    log("=== STAGE 8: GIT COMMIT ===")
    os.system("cd /var/www/html && git add enterprise-model.html && git diff --cached --quiet || git commit -m 'PIPELINE: OSS→Paperclip→Enterprise auto-sync' && git push github main 2>/dev/null")
    log("  Git done")

# === MAIN ===
def main():
    log("=" * 60)
    log("WEVAL OSS Master Pipeline v1.0")
    log("=" * 60)
    state = load_state()
    state["runs"] = state.get("runs", 0) + 1
    state["last_run"] = datetime.now().isoformat()
    results = {}

    # Stage 1: Scan
    found = stage_scan()
    results["scan"] = len(found)

    # Stage 2: Paperclip agents
    created = stage_agents(found, state)
    results["agents_created"] = created

    # Stage 3: Qdrant skills
    results["skills"] = stage_skills()

    # Stage 4: DeerFlow
    results["deerflow_links"] = stage_deerflow(found)

    # Stage 5: Benchmark
    results["benchmark"] = stage_benchmark()

    # Stage 6: Enterprise sync
    results["enterprise"] = stage_enterprise()

    # Stage 7: Validate
    results["validated"] = stage_validate()

    # Stage 8: Git
    stage_git()

    state["last_results"] = results
    save_state(state)

    log("=" * 60)
    log(f"PIPELINE COMPLETE: {json.dumps(results)}")
    log("=" * 60)

    # Telegram alert
    try:
        msg = f"🔄 OSS Pipeline: {len(found)} tools, +{created} agents, validate={'✅' if results.get('validated') else '❌'}"
        urllib.request.urlopen(f"https://api.telegram.org/bot8544624912:AAHtUBJwv-UccFYMBG_rMj9l3K3mG6x3x7c/sendMessage?chat_id=7605775322&text={urllib.parse.quote(msg)}", timeout=5)
    except:
        pass

if __name__ == "__main__":
    import urllib.parse
    main()