Files
weval-l99/pw-final-b10.py
opus-wire 7f67eb6bdf auto-push
2026-04-18 14:33:55 +02:00

104 lines
4.0 KiB
Python

import os, re, json
from playwright.sync_api import sync_playwright
SCREENSHOTS_DIR = "/opt/weval-l99/pw-screenshots-b10-final"
os.makedirs(SCREENSHOTS_DIR, exist_ok=True)
# DO NOT check client names as issues — user wants them kept
TOXIC = ["scrape", "scraping", "scraper", "harvest", "crawler", "sniff", "spider", "botnet"]
INTERNAL_JARGON = ["rebrandés", "rebrandées", "documentés, déployés", "CATALOGUE REBRAND"]
# Verify apostrophes are typographic (no ASCII ' in French elisions)
ASCII_APOS_PATTERNS = ["\\bl'", "\\bL'", "\\bd'", "\\bD'", "\\bqu'", "\\bQu'",
"\\bn'", "\\bjusqu'"]
def check_page(content):
issues = []
for t in TOXIC:
cnt = len(re.findall(r"\b" + re.escape(t) + r"\b", content, re.IGNORECASE))
if cnt > 0: issues.append(f"TOXIC '{t}' x{cnt}")
for j in INTERNAL_JARGON:
cnt = content.count(j)
if cnt > 0: issues.append(f"JARGON '{j}' x{cnt}")
# ASCII French elisions
for p in ASCII_APOS_PATTERNS:
cnt = len(re.findall(p + r"\w", content))
if cnt > 0: issues.append(f"ASCII-apos {p} x{cnt}")
return issues
results = {"stages": []}
with sync_playwright() as p:
browser = p.chromium.launch(headless=True, args=["--no-sandbox"])
context = browser.new_context(viewport={"width": 1440, "height": 900})
page = context.new_page()
stages = [
("Homepage", "https://weval-consulting.com/"),
("Products catalogue", "https://weval-consulting.com/products/"),
("WEVIA EM flagship", "https://weval-consulting.com/products/wevia-em.html"),
("Suite Enterprise", "https://weval-consulting.com/solutions/wevia-enterprise.html"),
("Consulting Pro", "https://weval-consulting.com/products/consulting-pro.html"),
("Marketplace", "https://weval-consulting.com/marketplace"),
]
for i, (name, url) in enumerate(stages, 1):
print(f"\n[Stage {i}] {name}")
page.goto(url + "?cb=final-pw", wait_until="networkidle", timeout=25000)
page.wait_for_timeout(1500)
content = page.content()
issues = check_page(content)
# Structural checks
struct = {}
if "/" == url.replace("https://weval-consulting.com", "") or url.endswith("/.com/"):
struct["ribbon"] = page.locator("#wv-em-ribbon").count()
struct["section"] = page.locator("#wv-em-flagship-section").count()
struct["huawei"] = page.locator("#huawei-banner-v2").count()
# Client logos (user wants them visible)
imgs = page.locator("img").all()
client_logo_visible = False
for img in imgs[:50]:
src = img.get_attribute("src") or ""
if any(n in src for n in ["Abbott", "Michelin", "LOreal", "Carrefour"]):
if img.is_visible():
client_logo_visible = True
break
struct["client_logos_visible"] = client_logo_visible
elif "marketplace" in url:
struct["huawei"] = page.locator("#huawei-banner-v2").count()
struct["ribbon"] = page.locator("#wv-em-ribbon").count()
print(f" Title: {page.title()[:80]}")
if struct:
print(f" Structure: {struct}")
print(f" Issues: {issues if issues else 'CLEAN'}")
# Screenshot
fname = f"{SCREENSHOTS_DIR}/{i:02d}-{name.replace(' ', '-').lower()}.png"
page.screenshot(path=fname, full_page=(i != 1))
results["stages"].append({
"name": name, "url": url,
"title": page.title(),
"issues": issues,
"structure": struct
})
browser.close()
# Summary
clean = sum(1 for s in results["stages"] if not s["issues"])
total = len(results["stages"])
print(f"\n{'='*60}")
print(f"RESULT: {clean}/{total} stages CLEAN")
print(f"Screenshots: {SCREENSHOTS_DIR}")
print(f"{'='*60}")
# Save report
import json
with open("/opt/weval-l99/pw-report-b10-final.json", "w") as f:
json.dump(results, f, indent=2, ensure_ascii=False)