weval-l99/wevia-gap-analyzer.py

#!/usr/bin/env python3
"""WEVIA GAP ANALYZER v1.0 — Find EVERY untested element across 4 machines
Scans reality → compares to test coverage → reports gaps
"""
import subprocess as sp,json,os,glob,time,re
from datetime import datetime
from collections import defaultdict

LOG="/var/log/wevia-gap-analyzer.log"
RESULT="/var/www/html/api/wevia-gap-analysis.json"
ts=datetime.now()

def lg(m):
    l=f"[{datetime.now().strftime('%H:%M:%S')}] {m}";print(l,flush=True)
    with open(LOG,"a") as f:f.write(l+"\n")

def cmd(c,t=10):
    try:return sp.run(c,shell=True,capture_output=True,text=True,timeout=t,errors='replace').stdout.strip()
    except:return ""

def sentinel(c):
    try:
        import urllib.parse as up
        r=sp.run(["curl","-sf","--max-time","8",f"http://10.1.0.3:5890/api/sentinel-brain.php?action=exec&cmd={up.quote(c)}"],
            capture_output=True,text=True,timeout=12)
        d=json.loads(r.stdout);return d.get("output","")
    except:return ""

lg("="*60)
lg(f"GAP ANALYZER — {ts}")

GAPS=defaultdict(list)
COVERED=defaultdict(int)
TOTAL=defaultdict(int)

# ═══════════════════════════════════════
# 1. S204 — FULL INVENTORY
# ═══════════════════════════════════════
lg("═══ S204 FULL SCAN ═══")

# All listening ports
s204_ports=cmd("ss -tlnp|grep LISTEN|awk '{print $4}'|grep -oP ':\\K\\d+'|sort -un").split("\n")
s204_ports=[p for p in s204_ports if p]
TOTAL["s204_ports"]=len(s204_ports)
lg(f"  Ports: {len(s204_ports)}")

# All HTML pages
all_pages=sorted([os.path.basename(f) for f in glob.glob("/var/www/html/*.html")])
TOTAL["pages"]=len(all_pages)

# All APIs
all_apis=sorted([os.path.basename(f) for f in glob.glob("/var/www/html/api/*.php")])
TOTAL["apis"]=len(all_apis)

# All Docker
docker_raw=cmd("docker ps --format '{{.Names}}'")
all_docker=docker_raw.split("\n") if docker_raw else []
TOTAL["docker"]=len(all_docker)

# All crons
root_crons=cmd("crontab -l 2>/dev/null|grep -v '^#'|grep -v '^$'").split("\n")
www_crons=cmd("sudo -u www-data crontab -l 2>/dev/null|grep -v '^#'|grep -v '^$'").split("\n")
crond_files=cmd("ls /etc/cron.d/ 2>/dev/null").split("\n")
all_crons=[c for c in root_crons+www_crons if c.strip()]
TOTAL["crons"]=len(all_crons)

# All nginx domains
nginx_domains=cmd("grep -rh 'server_name' /etc/nginx/sites-enabled/|grep -oP '[a-z][a-z0-9.-]+\\.(com|app)'|sort -u").split("\n")
nginx_domains=[d for d in nginx_domains if d]
TOTAL["domains"]=len(nginx_domains)

# All DB schemas/tables
db_tables=cmd("PGPASSWORD=admin123 psql -U admin -d adx_system -t -c \"SELECT schemaname||'.'||tablename FROM pg_tables WHERE schemaname NOT IN ('pg_catalog','information_schema')\" 2>/dev/null").split("\n")
db_tables=[t.strip() for t in db_tables if t.strip()]
TOTAL["db_tables"]=len(db_tables)

# All systemd services
systemd=cmd("systemctl list-units --type=service --state=running --no-pager|grep -oP '^\\s*\\K[a-z][-a-z0-9]*\\.service'").split("\n")
systemd=[s for s in systemd if s]
TOTAL["systemd"]=len(systemd)

# Ollama models
ollama=json.loads(cmd("curl -sf http://127.0.0.1:11434/api/tags") or '{"models":[]}').get("models",[])
TOTAL["ollama_models"]=len(ollama)

# Qdrant collections
qdrant=json.loads(cmd("curl -sf http://127.0.0.1:6333/collections") or '{"result":{"collections":[]}}').get("result",{}).get("collections",[])
TOTAL["qdrant_collections"]=len(qdrant)

# Protected pages (SSO)
sso_pages=cmd("grep -oP '/[a-z_-]+\\.html' /etc/nginx/sites-enabled/weval-consulting|grep -B0 ''").split("\n")
sso_pages=[p.strip("/") for p in sso_pages if p.strip()]
TOTAL["sso_pages"]=len(set(sso_pages))

# ═══════════════════════════════════════
# 2. S95 SCAN
# ═══════════════════════════════════════
lg("═══ S95 SCAN ═══")
s95_ports_raw=sentinel("ss -tlnp|grep LISTEN|awk '{print $4}'|grep -oP ':\\K\\d+'|sort -un")
s95_ports=[p for p in s95_ports_raw.split("\n") if p] if s95_ports_raw else []
TOTAL["s95_ports"]=len(s95_ports)

s95_crons=sentinel("crontab -l 2>/dev/null|grep -v '^#'|grep -v '^$'|wc -l")
TOTAL["s95_crons"]=int(s95_crons) if s95_crons.isdigit() else 0

s95_crond=sentinel("ls /etc/cron.d/ 2>/dev/null|wc -l")
TOTAL["s95_crond"]=int(s95_crond) if s95_crond.isdigit() else 0

# Arsenal screens
s95_arsenal=sentinel("ls /opt/wevads-arsenal/public/*.html 2>/dev/null|wc -l")
TOTAL["s95_arsenal"]=int(s95_arsenal) if s95_arsenal.isdigit() else 0

s95_apis=sentinel("ls /opt/wevads-arsenal/public/api/*.php 2>/dev/null|wc -l")
TOTAL["s95_apis"]=int(s95_apis) if s95_apis.isdigit() else 0
lg(f"  S95: {len(s95_ports)} ports, crons={TOTAL['s95_crons']}, arsenal={TOTAL['s95_arsenal']} screens, apis={TOTAL['s95_apis']}")

# ═══════════════════════════════════════
# 3. S151 SCAN
# ═══════════════════════════════════════
lg("═══ S151 SCAN ═══")
# S151 is limited access
s151_http=cmd("curl -sf -o /dev/null -w '%{http_code}' http://151.80.235.110/ --max-time 5")
TOTAL["s151_http"]=1 if s151_http else 0
lg(f"  S151: HTTP={s151_http}")

# ═══════════════════════════════════════
# 4. WHAT L99 CURRENTLY COVERS
# ═══════════════════════════════════════
lg("═══ L99 COVERAGE ANALYSIS ═══")

# Read all L99 result files
covered_pages=set()
covered_apis=set()
covered_docker=set()
covered_domains=set()

for rf in ["/var/www/html/api/l99-functional-result.json","/var/www/html/api/l99-ux-results.json",
    "/var/www/html/api/l99-results.json","/var/www/html/api/wevia-agents-pack-status.json",
    "/var/www/html/api/wevia-systematic-status.json"]:
    try:
        d=json.load(open(rf))
        tests=d.get("tests",d.get("results",[]))
        if isinstance(tests,list):
            for t in tests:
                name=t.get("name",t.get("n",""))
                layer=t.get("layer","")
                if ".html" in name: covered_pages.add(name.replace("sso-","").replace("pub-",""))
                if ".php" in name: covered_apis.add(name)
                if layer=="DOCKER" or "DOCKER" in name: covered_docker.add(name.replace("DOCKER:",""))
                if layer=="DOMAIN" or "." in name and ("weval" in name or "wevup" in name):
                    covered_domains.add(name)
    except:pass

# ═══════════════════════════════════════
# 5. FIND ALL GAPS
# ═══════════════════════════════════════
lg("═══ GAP ANALYSIS ═══")

# Pages not tested
untested_pages=[p for p in all_pages if p not in covered_pages and p.replace(".html","") not in str(covered_pages)]
GAPS["pages_not_tested"]=untested_pages[:30]
COVERED["pages"]=len(all_pages)-len(untested_pages)
lg(f"  Pages: {COVERED['pages']}/{len(all_pages)} tested | {len(untested_pages)} GAPS")

# APIs not tested
untested_apis=[a for a in all_apis if a not in covered_apis and not a.startswith("_")]
GAPS["apis_not_tested"]=untested_apis[:30]
COVERED["apis"]=len(all_apis)-len(untested_apis)
lg(f"  APIs: {COVERED['apis']}/{len(all_apis)} tested | {len(untested_apis)} GAPS")

# Docker not tested
untested_docker=[d for d in all_docker if d not in covered_docker]
GAPS["docker_not_tested"]=untested_docker
COVERED["docker"]=len(all_docker)-len(untested_docker)
lg(f"  Docker: {COVERED['docker']}/{len(all_docker)} tested | {len(untested_docker)} GAPS")

# Domains not tested
untested_domains=[d for d in nginx_domains if d not in covered_domains and d not in str(covered_domains)]
GAPS["domains_not_tested"]=untested_domains
COVERED["domains"]=len(nginx_domains)-len(untested_domains)
lg(f"  Domains: {COVERED['domains']}/{len(nginx_domains)} tested | {len(untested_domains)} GAPS")

# SSO pages without auth test
GAPS["sso_not_verified"]=list(set(sso_pages)-covered_pages)[:20]
lg(f"  SSO pages: {len(set(sso_pages))-len(GAPS['sso_not_verified'])}/{len(set(sso_pages))} verified")

# DB tables not queried in tests
GAPS["db_not_tested"]=db_tables[:20] if len(db_tables)>0 else []
lg(f"  DB tables: {len(db_tables)} exist (coverage TBD)")

# S95 not covered
GAPS["s95_gaps"]=[]
if TOTAL["s95_arsenal"]>0: GAPS["s95_gaps"].append(f"{TOTAL['s95_arsenal']} Arsenal screens not E2E tested")
if TOTAL["s95_apis"]>0: GAPS["s95_gaps"].append(f"{TOTAL['s95_apis']} Arsenal APIs not tested")
if TOTAL["s95_crons"]>0: GAPS["s95_gaps"].append(f"{TOTAL['s95_crons']} S95 crons not verified")

# Crons not monitored
GAPS["crons_not_monitored"]=f"{len(all_crons)} crons exist, monitoring checks count only"

# Ports not tested
tested_ports={"443","80","5432","6333","8888","9090","11434","5678","8065","3088","3200","8000","9443","3000"}
untested_ports=[p for p in s204_ports if p not in tested_ports]
GAPS["ports_not_tested"]=untested_ports[:20]
lg(f"  Ports: {len(tested_ports)}/{len(s204_ports)} tested | {len(untested_ports)} GAPS")

# Integration tests missing
GAPS["integration_missing"]=[
    "Chatbot → Qdrant RAG pipeline (query + retrieve + respond)",
    "WEVIA Life → IMAP → classify → PostgreSQL pipeline",
    "Ethica scraper → DB → campaign pipeline",
    "n8n → webhook → action engine integration",
    "Authentik → Forward Auth → app access E2E flow",
    "WEVADS → Arsenal → S95 MTA pipeline",
    "Provider cascade T0→T1→T2 failover test",
    "Git push → GitHub → webhook → deploy pipeline",
    "Fine-tuning → HuggingFace → Ollama update pipeline",
    "WEVIA KB → Qdrant ingest → semantic search pipeline",
]

# Business scenarios missing
GAPS["business_missing"]=[
    "Client onboarding: homepage→contact→calendly→proposal",
    "HCP campaign: scrape→validate→consent→send→track",
    "Incident: alert→detect→diagnose→fix→verify→close",
    "Sprint: plan→develop→test→review→deploy→retro",
    "AI query: user→chatbot→provider cascade→RAG→response",
]

# ═══════════════════════════════════════
# 6. SUMMARY
# ═══════════════════════════════════════
total_items=sum(TOTAL.values())
total_covered=sum(COVERED.values())
gap_pct=round((1-total_covered/total_items)*100,1) if total_items else 0

result={
    "timestamp":ts.isoformat(),
    "totals":dict(TOTAL),
    "covered":dict(COVERED),
    "gaps":dict(GAPS),
    "summary":{
        "total_items":total_items,
        "total_covered":total_covered,
        "gap_pct":gap_pct,
        "coverage_pct":round(total_covered/total_items*100,1) if total_items else 0
    }
}
json.dump(result,open(RESULT,"w"),indent=2)

lg(f"\n{'='*60}")
lg(f"TOTAL ARCHITECTURE: {total_items} items")
lg(f"TESTED: {total_covered} ({result['summary']['coverage_pct']}%)")
lg(f"GAPS: {total_items-total_covered} ({gap_pct}%)")
lg(f"\nTOP GAPS:")
lg(f"  Pages not tested: {len(GAPS.get('pages_not_tested',[]))}")
lg(f"  APIs not tested: {len(GAPS.get('apis_not_tested',[]))}")
lg(f"  Docker not tested: {len(GAPS.get('docker_not_tested',[]))}")
lg(f"  Ports not tested: {len(GAPS.get('ports_not_tested',[]))}")
lg(f"  Integration missing: {len(GAPS.get('integration_missing',[]))}")
lg(f"  Business scenarios: {len(GAPS.get('business_missing',[]))}")
lg(f"{'='*60}")