177 lines
5.5 KiB
Python
Executable File
177 lines
5.5 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
wem-thumb-worker.py — Generate thumbnail + metadata for a page
|
|
Called by wem-screen-thumb.php in background or by cron refresh.
|
|
Usage: python3 wem-thumb-worker.py <path>
|
|
Example: python3 wem-thumb-worker.py ethica-chatbot.html
|
|
|
|
Output:
|
|
/var/www/html/api/screenshots/wem/<md5>.png (320x200)
|
|
Updates /var/www/html/api/wem-page-meta.json with {title, http, mtime, ts}
|
|
|
|
Doctrine 84.
|
|
"""
|
|
import sys, os, hashlib, json, time, subprocess
|
|
from pathlib import Path
|
|
|
|
if len(sys.argv) < 2:
|
|
print("Usage: wem-thumb-worker.py <path>")
|
|
sys.exit(1)
|
|
|
|
raw = sys.argv[1]
|
|
|
|
# Accept either: bare path (ethica-chatbot.html) OR full URL
|
|
# Resolve URL to the right upstream (S204 vs S95)
|
|
import re as _re
|
|
m_wevads = _re.match(r'^https?://wevads\.weval-consulting\.com/(.+)$', raw)
|
|
m_main = _re.match(r'^https?://(?:www\.)?weval-consulting\.com/(.+)$', raw)
|
|
m_ethica = _re.match(r'^https?://ethica\.wevup\.app/(.+)$', raw)
|
|
|
|
if m_wevads:
|
|
# Route via S95 directly (WireGuard 10.1.0.3:5890) — faster than going through CF
|
|
path = m_wevads.group(1)
|
|
url_for_meta = raw
|
|
url_for_fetch = f"http://10.1.0.3:5890/{path}"
|
|
key = f"wevads/{path}" # meta key prefix to disambiguate from S204
|
|
elif m_main:
|
|
path = m_main.group(1)
|
|
url_for_meta = raw
|
|
url_for_fetch = f"http://127.0.0.1:5890/{path}"
|
|
key = path
|
|
elif m_ethica:
|
|
path = m_ethica.group(1)
|
|
url_for_meta = raw
|
|
url_for_fetch = f"http://127.0.0.1:5890/{path}" # ethica.wevup.app proxies locally
|
|
key = f"ethica/{path}"
|
|
else:
|
|
# bare path → S204 local
|
|
path = raw.lstrip('/')
|
|
url_for_meta = f"https://weval-consulting.com/{path}"
|
|
url_for_fetch = f"http://127.0.0.1:5890/{path}"
|
|
key = path
|
|
|
|
h = hashlib.md5(key.encode()).hexdigest()
|
|
thumb_out = f"/var/www/html/api/screenshots/wem/{h}.png"
|
|
meta_file = "/var/www/html/api/wem-page-meta.json"
|
|
lockfile = f"/tmp/wem-thumb-{h}.lock"
|
|
|
|
# Load existing meta
|
|
try:
|
|
with open(meta_file) as f:
|
|
meta = json.load(f)
|
|
except Exception:
|
|
meta = {}
|
|
|
|
url = url_for_fetch
|
|
|
|
# For vhost routing on S95 without Host header, fallback to public URL via CF
|
|
# This is slower but Host header breaks Playwright's connection reuse
|
|
if 'wevads' in key:
|
|
url = url_for_meta # use https://wevads.weval-consulting.com/... (public via CF)
|
|
elif 'ethica/' in key:
|
|
url = url_for_meta # use https://ethica.wevup.app/... (public via CF)
|
|
|
|
host_header = None # not needed with public URLs
|
|
|
|
try:
|
|
from playwright.sync_api import sync_playwright
|
|
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.launch(headless=True, args=['--no-sandbox', '--disable-dev-shm-usage'])
|
|
context = browser.new_context(viewport={'width': 1024, 'height': 640}, ignore_https_errors=True)
|
|
page = context.new_page()
|
|
|
|
if host_header:
|
|
page.set_extra_http_headers({'Host': host_header})
|
|
|
|
http_status = 0
|
|
try:
|
|
resp = page.goto(url, wait_until='domcontentloaded', timeout=12000)
|
|
http_status = resp.status if resp else 0
|
|
except Exception as e:
|
|
http_status = -1
|
|
print(f"nav error: {e}")
|
|
|
|
# Give a beat for any fast JS render
|
|
try:
|
|
page.wait_for_timeout(1500)
|
|
except Exception:
|
|
pass
|
|
|
|
# Get real title
|
|
try:
|
|
title = page.title() or path
|
|
except Exception:
|
|
title = path
|
|
title = title.strip()[:120]
|
|
|
|
# Screenshot full viewport (1024x640) then we resize
|
|
png_tmp = f"/tmp/wem-raw-{h}.png"
|
|
try:
|
|
page.screenshot(path=png_tmp, full_page=False, type='png')
|
|
except Exception as e:
|
|
print(f"screenshot error: {e}")
|
|
browser.close()
|
|
sys.exit(2)
|
|
|
|
browser.close()
|
|
|
|
# Resize to 320x200 using Python Pillow (fast) or ImageMagick fallback
|
|
try:
|
|
from PIL import Image
|
|
img = Image.open(png_tmp)
|
|
# Crop top portion (aspect 16:10 from 1024x640 is already 16:10, just resize)
|
|
img.thumbnail((640, 400), Image.LANCZOS)
|
|
# Pad/crop to 320x200
|
|
img = img.resize((320, 200), Image.LANCZOS)
|
|
img.save(thumb_out, 'PNG', optimize=True)
|
|
except Exception as e:
|
|
# Fallback: ImageMagick
|
|
subprocess.run(['convert', png_tmp, '-resize', '320x200!', thumb_out], check=False)
|
|
|
|
# Cleanup tmp
|
|
try: os.unlink(png_tmp)
|
|
except Exception: pass
|
|
|
|
# Chown for web
|
|
try:
|
|
subprocess.run(['chown', 'www-data:www-data', thumb_out], check=False)
|
|
except Exception:
|
|
pass
|
|
|
|
# Get file mtime (only for S204-local files)
|
|
try:
|
|
file_path = f"/var/www/html/{path}"
|
|
mtime = time.strftime('%d%b', time.localtime(os.path.getmtime(file_path))) if os.path.exists(file_path) else ''
|
|
except Exception:
|
|
mtime = ''
|
|
|
|
# Update meta — key is the URL-derived key used by the HTML for lookup
|
|
meta[key] = {
|
|
'title': title,
|
|
'http': http_status,
|
|
'mtime': mtime,
|
|
'thumb': f"/api/screenshots/wem/{h}.png",
|
|
'url': url_for_meta,
|
|
'ts': int(time.time())
|
|
}
|
|
|
|
# Atomic write
|
|
tmp_meta = meta_file + '.tmp'
|
|
with open(tmp_meta, 'w') as f:
|
|
json.dump(meta, f, indent=2, ensure_ascii=False)
|
|
os.rename(tmp_meta, meta_file)
|
|
try:
|
|
subprocess.run(['chown', 'www-data:www-data', meta_file], check=False)
|
|
except Exception:
|
|
pass
|
|
|
|
print(f"OK {path} http={http_status} title={title[:40]}")
|
|
|
|
except Exception as e:
|
|
print(f"FATAL {path}: {e}")
|
|
sys.exit(3)
|
|
finally:
|
|
try: os.unlink(lockfile)
|
|
except Exception: pass
|