27 lines
1.0 KiB
Python
27 lines
1.0 KiB
Python
|
|
import re
|
|
from playwright.sync_api import sync_playwright
|
|
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.launch(headless=True, args=["--no-sandbox"])
|
|
context = browser.new_context(viewport={"width": 1440, "height": 900})
|
|
page = context.new_page()
|
|
page.goto("https://weval-consulting.com/?cb=trace2", wait_until="networkidle", timeout=25000)
|
|
page.wait_for_timeout(2500)
|
|
|
|
content = page.content()
|
|
|
|
# Find ASCII apos contexts
|
|
patterns = [r"\bl'(?=\w)", r"\bL'(?=\w)", r"\bd'(?=\w)", r"\bD'(?=\w)",
|
|
r"\bqu'(?=\w)", r"\bQu'(?=\w)", r"\bn'(?=\w)", r"\bjusqu'(?=\w)"]
|
|
|
|
for p_re in patterns:
|
|
for m in re.finditer(p_re, content):
|
|
ctx = content[max(0,m.start()-80):m.end()+80]
|
|
# Strip HTML tags for readability
|
|
ctx_clean = re.sub(r"<[^>]+>", "", ctx)
|
|
ctx_clean = re.sub(r"\s+", " ", ctx_clean).strip()
|
|
print(f" {p_re}: ...{ctx[max(0,m.start()-40-max(0,m.start()-80)):m.end()-max(0,m.start()-80)+40]}...")
|
|
|
|
browser.close()
|