Files
html/api/wem-thumb-refresh-all.py
2026-04-17 22:35:01 +02:00

100 lines
3.1 KiB
Python
Executable File

#!/usr/bin/env python3
"""
wem-thumb-refresh-all.py — Batch refresh thumbnails every 6h via cron
Reads /var/www/html/api/wem-inventory.json → iterate categorized_pages
Calls wem-thumb-worker.py for each page (parallel, max 4 workers)
Logs to /tmp/wem-refresh.log
Doctrine 84.
"""
import json, os, subprocess, time, hashlib
from concurrent.futures import ThreadPoolExecutor, as_completed
INV = '/var/www/html/api/wem-inventory.json'
WORKER = '/var/www/html/api/wem-thumb-worker.py'
THUMB_DIR = '/var/www/html/api/screenshots/wem'
TTL = 21600 # 6h
start = time.time()
print(f"=== WEM thumb refresh start {time.strftime('%Y-%m-%d %H:%M:%S')} ===")
try:
with open(INV) as f:
inv = json.load(f)
except Exception as e:
print(f"inventory read fail: {e}")
exit(1)
# Collect all scannable pages (internal S204 + S95 wevads + ethica)
pages = set()
cp = inv.get('categorized_pages', {})
import re as _re
WHITELIST = _re.compile(r'^https?://(?:(?:www\.)?weval-consulting\.com|wevads\.weval-consulting\.com|ethica\.wevup\.app)/')
for cat, lst in cp.items():
if not isinstance(lst, list): continue
for p in lst:
if not isinstance(p, str): continue
# Full URL path
if p.startswith('http'):
if WHITELIST.match(p) and p.endswith('.html'):
pages.add(p) # keep full URL — worker resolves to right upstream
else:
path = p.lstrip('/')
if path.endswith('.html') and '..' not in path:
pages.add(path)
print(f"Found {len(pages)} internal HTML pages to refresh")
# Filter: skip if thumb exists and < TTL (use same hash key logic as worker)
import re as _re2
def _key_for(p):
m = _re2.match(r'^https?://wevads\.weval-consulting\.com/(.+)$', p)
if m: return f"wevads/{m.group(1)}"
m = _re2.match(r'^https?://ethica\.wevup\.app/(.+)$', p)
if m: return f"ethica/{m.group(1)}"
m = _re2.match(r'^https?://(?:www\.)?weval-consulting\.com/(.+)$', p)
if m: return m.group(1)
return p.lstrip('/')
fresh = 0
todo = []
for path in pages:
key = _key_for(path)
h = hashlib.md5(key.encode()).hexdigest()
thumb = f"{THUMB_DIR}/{h}.png"
if os.path.exists(thumb) and (time.time() - os.path.getmtime(thumb)) < TTL:
fresh += 1
continue
todo.append(path)
print(f"Fresh (skipped): {fresh} / Todo: {len(todo)}")
def run_one(path):
try:
r = subprocess.run(
['python3', WORKER, path],
capture_output=True, text=True, timeout=25
)
return path, r.returncode, r.stdout.strip()[:80]
except subprocess.TimeoutExpired:
return path, -1, "TIMEOUT"
except Exception as e:
return path, -2, f"ERR: {e}"
ok = 0
fail = 0
with ThreadPoolExecutor(max_workers=4) as ex:
futures = {ex.submit(run_one, p): p for p in todo}
for fut in as_completed(futures):
path, code, msg = fut.result()
if code == 0:
ok += 1
print(f"{path}: {msg}")
else:
fail += 1
print(f"{path}: {msg}")
dur = round(time.time() - start, 1)
print(f"\n=== DONE in {dur}s — ok={ok} fail={fail} skipped={fresh} total={len(pages)} ===")