weval-l99/pw-final-b10.py


import os, re, json
from playwright.sync_api import sync_playwright

SCREENSHOTS_DIR = "/opt/weval-l99/pw-screenshots-b10-final"
os.makedirs(SCREENSHOTS_DIR, exist_ok=True)

# DO NOT check client names as issues — user wants them kept
TOXIC = ["scrape", "scraping", "scraper", "harvest", "crawler", "sniff", "spider", "botnet"]
INTERNAL_JARGON = ["rebrandés", "rebrandées", "documentés, déployés", "CATALOGUE REBRAND"]

# Verify apostrophes are typographic (no ASCII ' in French elisions)
ASCII_APOS_PATTERNS = ["\\bl'", "\\bL'", "\\bd'", "\\bD'", "\\bqu'", "\\bQu'",
                       "\\bn'", "\\bjusqu'"]

def check_page(content):
    issues = []
    for t in TOXIC:
        cnt = len(re.findall(r"\b" + re.escape(t) + r"\b", content, re.IGNORECASE))
        if cnt > 0: issues.append(f"TOXIC '{t}' x{cnt}")
    for j in INTERNAL_JARGON:
        cnt = content.count(j)
        if cnt > 0: issues.append(f"JARGON '{j}' x{cnt}")
    # ASCII French elisions
    for p in ASCII_APOS_PATTERNS:
        cnt = len(re.findall(p + r"\w", content))
        if cnt > 0: issues.append(f"ASCII-apos {p} x{cnt}")
    return issues

results = {"stages": []}

with sync_playwright() as p:
    browser = p.chromium.launch(headless=True, args=["--no-sandbox"])
    context = browser.new_context(viewport={"width": 1440, "height": 900})
    page = context.new_page()

    stages = [
        ("Homepage", "https://weval-consulting.com/"),
        ("Products catalogue", "https://weval-consulting.com/products/"),
        ("WEVIA EM flagship", "https://weval-consulting.com/products/wevia-em.html"),
        ("Suite Enterprise", "https://weval-consulting.com/solutions/wevia-enterprise.html"),
        ("Consulting Pro", "https://weval-consulting.com/products/consulting-pro.html"),
        ("Marketplace", "https://weval-consulting.com/marketplace"),
    ]

    for i, (name, url) in enumerate(stages, 1):
        print(f"\n[Stage {i}] {name}")
        page.goto(url + "?cb=final-pw", wait_until="networkidle", timeout=25000)
        page.wait_for_timeout(1500)

        content = page.content()
        issues = check_page(content)

        # Structural checks
        struct = {}
        if "/" == url.replace("https://weval-consulting.com", "") or url.endswith("/.com/"):
            struct["ribbon"] = page.locator("#wv-em-ribbon").count()
            struct["section"] = page.locator("#wv-em-flagship-section").count()
            struct["huawei"] = page.locator("#huawei-banner-v2").count()
            # Client logos (user wants them visible)
            imgs = page.locator("img").all()
            client_logo_visible = False
            for img in imgs[:50]:
                src = img.get_attribute("src") or ""
                if any(n in src for n in ["Abbott", "Michelin", "LOreal", "Carrefour"]):
                    if img.is_visible():
                        client_logo_visible = True
                        break
            struct["client_logos_visible"] = client_logo_visible
        elif "marketplace" in url:
            struct["huawei"] = page.locator("#huawei-banner-v2").count()
            struct["ribbon"] = page.locator("#wv-em-ribbon").count()

        print(f"  Title: {page.title()[:80]}")
        if struct:
            print(f"  Structure: {struct}")
        print(f"  Issues: {issues if issues else 'CLEAN'}")

        # Screenshot
        fname = f"{SCREENSHOTS_DIR}/{i:02d}-{name.replace(' ', '-').lower()}.png"
        page.screenshot(path=fname, full_page=(i != 1))

        results["stages"].append({
            "name": name, "url": url,
            "title": page.title(),
            "issues": issues,
            "structure": struct
        })

    browser.close()

# Summary
clean = sum(1 for s in results["stages"] if not s["issues"])
total = len(results["stages"])
print(f"\n{'='*60}")
print(f"RESULT: {clean}/{total} stages CLEAN")
print(f"Screenshots: {SCREENSHOTS_DIR}")
print(f"{'='*60}")

# Save report
import json
with open("/opt/weval-l99/pw-report-b10-final.json", "w") as f:
    json.dump(results, f, indent=2, ensure_ascii=False)