104 lines
4.0 KiB
Python
104 lines
4.0 KiB
Python
|
|
import os, re, json
|
|
from playwright.sync_api import sync_playwright
|
|
|
|
SCREENSHOTS_DIR = "/opt/weval-l99/pw-screenshots-b10-final"
|
|
os.makedirs(SCREENSHOTS_DIR, exist_ok=True)
|
|
|
|
# DO NOT check client names as issues — user wants them kept
|
|
TOXIC = ["scrape", "scraping", "scraper", "harvest", "crawler", "sniff", "spider", "botnet"]
|
|
INTERNAL_JARGON = ["rebrandés", "rebrandées", "documentés, déployés", "CATALOGUE REBRAND"]
|
|
|
|
# Verify apostrophes are typographic (no ASCII ' in French elisions)
|
|
ASCII_APOS_PATTERNS = ["\\bl'", "\\bL'", "\\bd'", "\\bD'", "\\bqu'", "\\bQu'",
|
|
"\\bn'", "\\bjusqu'"]
|
|
|
|
def check_page(content):
|
|
issues = []
|
|
for t in TOXIC:
|
|
cnt = len(re.findall(r"\b" + re.escape(t) + r"\b", content, re.IGNORECASE))
|
|
if cnt > 0: issues.append(f"TOXIC '{t}' x{cnt}")
|
|
for j in INTERNAL_JARGON:
|
|
cnt = content.count(j)
|
|
if cnt > 0: issues.append(f"JARGON '{j}' x{cnt}")
|
|
# ASCII French elisions
|
|
for p in ASCII_APOS_PATTERNS:
|
|
cnt = len(re.findall(p + r"\w", content))
|
|
if cnt > 0: issues.append(f"ASCII-apos {p} x{cnt}")
|
|
return issues
|
|
|
|
results = {"stages": []}
|
|
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.launch(headless=True, args=["--no-sandbox"])
|
|
context = browser.new_context(viewport={"width": 1440, "height": 900})
|
|
page = context.new_page()
|
|
|
|
stages = [
|
|
("Homepage", "https://weval-consulting.com/"),
|
|
("Products catalogue", "https://weval-consulting.com/products/"),
|
|
("WEVIA EM flagship", "https://weval-consulting.com/products/wevia-em.html"),
|
|
("Suite Enterprise", "https://weval-consulting.com/solutions/wevia-enterprise.html"),
|
|
("Consulting Pro", "https://weval-consulting.com/products/consulting-pro.html"),
|
|
("Marketplace", "https://weval-consulting.com/marketplace"),
|
|
]
|
|
|
|
for i, (name, url) in enumerate(stages, 1):
|
|
print(f"\n[Stage {i}] {name}")
|
|
page.goto(url + "?cb=final-pw", wait_until="networkidle", timeout=25000)
|
|
page.wait_for_timeout(1500)
|
|
|
|
content = page.content()
|
|
issues = check_page(content)
|
|
|
|
# Structural checks
|
|
struct = {}
|
|
if "/" == url.replace("https://weval-consulting.com", "") or url.endswith("/.com/"):
|
|
struct["ribbon"] = page.locator("#wv-em-ribbon").count()
|
|
struct["section"] = page.locator("#wv-em-flagship-section").count()
|
|
struct["huawei"] = page.locator("#huawei-banner-v2").count()
|
|
# Client logos (user wants them visible)
|
|
imgs = page.locator("img").all()
|
|
client_logo_visible = False
|
|
for img in imgs[:50]:
|
|
src = img.get_attribute("src") or ""
|
|
if any(n in src for n in ["Abbott", "Michelin", "LOreal", "Carrefour"]):
|
|
if img.is_visible():
|
|
client_logo_visible = True
|
|
break
|
|
struct["client_logos_visible"] = client_logo_visible
|
|
elif "marketplace" in url:
|
|
struct["huawei"] = page.locator("#huawei-banner-v2").count()
|
|
struct["ribbon"] = page.locator("#wv-em-ribbon").count()
|
|
|
|
print(f" Title: {page.title()[:80]}")
|
|
if struct:
|
|
print(f" Structure: {struct}")
|
|
print(f" Issues: {issues if issues else 'CLEAN'}")
|
|
|
|
# Screenshot
|
|
fname = f"{SCREENSHOTS_DIR}/{i:02d}-{name.replace(' ', '-').lower()}.png"
|
|
page.screenshot(path=fname, full_page=(i != 1))
|
|
|
|
results["stages"].append({
|
|
"name": name, "url": url,
|
|
"title": page.title(),
|
|
"issues": issues,
|
|
"structure": struct
|
|
})
|
|
|
|
browser.close()
|
|
|
|
# Summary
|
|
clean = sum(1 for s in results["stages"] if not s["issues"])
|
|
total = len(results["stages"])
|
|
print(f"\n{'='*60}")
|
|
print(f"RESULT: {clean}/{total} stages CLEAN")
|
|
print(f"Screenshots: {SCREENSHOTS_DIR}")
|
|
print(f"{'='*60}")
|
|
|
|
# Save report
|
|
import json
|
|
with open("/opt/weval-l99/pw-report-b10-final.json", "w") as f:
|
|
json.dump(results, f, indent=2, ensure_ascii=False)
|