html/api/l99-fullscan-script.py.ref

#!/usr/bin/env python3
"""WAVE 164 — FULL USER-LIKE SCAN
Tests EVERY page via Playwright Chrome authed, like a real user would.
Captures: DOM count, JS errors, broken images, HTTP status, interactive elements.
"""
import asyncio,json,datetime,os,glob
from playwright.async_api import async_playwright

# Discover all HTML pages on disk
# Wave 133 Opus #1: exclude list for legitimate stubs/legacy/google-verif
EXCLUDE_PAGES = {
    'arsenal-offline', 'avatar-picker', 'claw-code', 'blade-ai',
    'agents-enterprise', 'agents-sim', 'infra-monitor',
    'agents-ia', 'crm', 'cron-control', 'blade-center',
}
EXCLUDE_PREFIXES = ('_', '.', '404', '500', 'google')
ALL_PAGES = sorted([
    os.path.basename(p).replace('.html','')
    for p in glob.glob('/var/www/html/*.html')
    if not os.path.basename(p).startswith(EXCLUDE_PREFIXES)
    and os.path.basename(p).replace('.html','') not in EXCLUDE_PAGES
])

print(f"DISCOVERED {len(ALL_PAGES)} pages")

# Priority pages (tested deep)
PRIORITY = ['agents-archi','wevia-meeting-rooms','enterprise-model','wevia-master','director-center','l99-brain','paperclip','wevia','index','admin-saas','admin-v2','security-dashboard','cyber-monitor','ethica-monitor','sovereign-monitor','monitoring','medreach-dashboard','realtime-monitor','realtime-monitor-v3','claude-monitor','crons-monitor','wevia-director-dashboard','nonreg','wiki']

async def scan_all():
    results = []
    start_ts = datetime.datetime.now()
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        ctx = await browser.new_context(
            storage_state="/opt/weval-l99/sso-state.json",
            viewport={"width":1920,"height":1080},
            ignore_https_errors=True
        )
        ctx.set_default_timeout(30000)

        for page_name in ALL_PAGES[:60]:  # cap at 60 to avoid timeout
            url = f"https://weval-consulting.com/{page_name}.html"
            page = await ctx.new_page()
            errors=[]
            console_errors=[]
            page.on("pageerror", lambda e: errors.append(str(e)[:200]))
            page.on("console", lambda m: console_errors.append(m.text[:150]) if m.type=="error" else None)

            try:
                resp = await page.goto(url, timeout=30000, wait_until="domcontentloaded")
                # If redirected to login, that means page exists but requires auth beyond our SSO state
                final_url = page.url
                is_login_redirect = "/login" in final_url or "/outpost" in final_url
                status = resp.status if resp else 0
                await page.wait_for_timeout(2000)

                metrics = await page.evaluate('''() => ({
                    title: document.title,
                    bodyLen: document.body.innerText.length,
                    elements: document.querySelectorAll("*").length,
                    links: document.querySelectorAll("a").length,
                    images: document.querySelectorAll("img").length,
                    forms: document.querySelectorAll("form, input, textarea, button").length,
                    scripts: document.querySelectorAll("script").length,
                    hasUnifiedOverlay: !!document.getElementById("unifiedLiveOverlay"),
                    hasLiveBar: !!document.getElementById("liveStatusBar"),
                    brokenImages: Array.from(document.querySelectorAll("img")).filter(i => i.complete && i.naturalWidth===0).length,
                    h1_count: document.querySelectorAll("h1").length,
                    viewport_h: document.body.scrollHeight,
                })''')

                result = {
                    "page": page_name,
                    "status": status,
                    "title": metrics["title"][:80],
                    "elements": metrics["elements"],
                    "body_len": metrics["bodyLen"],
                    "links": metrics["links"],
                    "images": metrics["images"],
                    "broken_images": metrics["brokenImages"],
                    "interactive": metrics["forms"],
                    "has_overlay": metrics["hasUnifiedOverlay"],
                    "js_errors": len(errors),
                    "console_errors": len(console_errors),
                    "body_h": metrics["viewport_h"],
                    "pass": (status == 200 or is_login_redirect) and (is_login_redirect or (metrics["elements"] > 10 and len(errors) == 0 and metrics["brokenImages"] == 0))
                }
                if errors: result["errors"] = errors[:3]
                results.append(result)
            except Exception as e:
                results.append({"page": page_name, "error": str(e)[:200], "pass": False, "status": 0})
            finally:
                await page.close()

        await browser.close()

    total = len(results)
    passed = sum(1 for r in results if r.get("pass"))
    failed = total - passed
    elapsed = (datetime.datetime.now() - start_ts).total_seconds()

    output = {
        "ts": datetime.datetime.now().isoformat(),
        "total": total,
        "passed": passed,
        "failed": failed,
        "elapsed_s": round(elapsed,1),
        "pages": results
    }

    with open("/opt/weval-l99/l99-fullscan-state.json","w") as f:
        json.dump(output,f,indent=2)

    print(f"\nFULL SCAN: {passed}/{total} PASS in {elapsed:.1f}s")
    fail_list = [r for r in results if not r.get("pass")]
    if fail_list:
        print(f"\nFAILURES ({len(fail_list)}):")
        for r in fail_list[:20]:
            reason = []
            if r.get("status") != 200: reason.append(f"HTTP {r.get('status')}")
            if r.get("error"): reason.append(r["error"][:80])
            if r.get("js_errors",0) > 0: reason.append(f"{r['js_errors']} JS err")
            if r.get("broken_images",0) > 0: reason.append(f"{r['broken_images']} broken img")
            if r.get("elements",0) <= 10: reason.append("empty DOM")
            print(f"  FAIL {r['page']}: {' | '.join(reason) or 'unknown'}")

asyncio.run(scan_all())