Files
weval-l99/v132_orphans.py
2026-04-24 04:38:58 +02:00

58 lines
1.8 KiB
Python

#!/usr/bin/env python3
"""V132 - Scan WTP v80-quick links, detect HTTP status, report orphans
Write report to /var/www/html/api/wtp-links-health.json (NO touch to WTP itself)
"""
import json, re, urllib.request, ssl, time, os
with open('/var/www/html/weval-technology-platform.html', 'r', encoding='utf-8') as f:
wtp = f.read()
# Extract all v80-quick hrefs
links = re.findall(r'v80-quick["\s][^>]*href="(/[^"]+)"', wtp)
# Dedupe
links = list(dict.fromkeys(links))
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
results = []
for href in links:
url = f"https://weval-consulting.com{href}"
try:
req = urllib.request.Request(url, method='HEAD')
t0 = time.time()
with urllib.request.urlopen(req, context=ctx, timeout=5) as r:
status = r.status
ms = int((time.time() - t0) * 1000)
except urllib.error.HTTPError as e:
status = e.code
ms = int((time.time() - t0) * 1000)
except Exception as e:
status = 0
ms = -1
results.append({'href': href, 'status': status, 'ms': ms})
healthy = [r for r in results if 200 <= r['status'] < 400]
redir = [r for r in results if r['status'] in (301, 302, 307, 308)]
broken = [r for r in results if r['status'] >= 400 or r['status'] == 0]
report = {
'scanned_at': time.strftime('%Y-%m-%dT%H:%M:%S%z'),
'total_links': len(results),
'healthy_count': len(healthy),
'redirect_count': len(redir),
'broken_count': len(broken),
'broken_links': broken,
'all_results': results
}
out = '/var/www/html/api/wtp-links-health.json'
with open(out, 'w') as f:
json.dump(report, f, indent=2)
print(f"TOTAL {len(results)} · HEALTHY {len(healthy)} · REDIR {len(redir)} · BROKEN {len(broken)}")
if broken:
print("Broken:")
for b in broken:
print(f" {b['status']} {b['href']}")