53 lines
1.7 KiB
Python
Executable File
53 lines
1.7 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Purge phantom URLs from screens-health.json:
|
|
URLs that 5xx but the underlying file doesn't exist on the filesystem
|
|
= reclassify as PHANTOM (not BROKEN)
|
|
"""
|
|
import json, os, sys
|
|
from urllib.parse import urlparse
|
|
|
|
H = "/var/www/html/api/screens-health.json"
|
|
backup = H + ".pre-phantom-" + os.popen("date +%Y%m%d-%H%M%S").read().strip()
|
|
os.system(f"cp {H} {backup}")
|
|
|
|
d = json.load(open(H))
|
|
by_url = d.get("by_url", {})
|
|
counts = {"UP":0,"SLOW":0,"BROKEN":0,"DOWN":0,"NOT_FOUND":0,"PROTECTED":0,"PHANTOM":0}
|
|
|
|
# Map URL → filesystem path
|
|
def url_to_path(url):
|
|
p = urlparse(url)
|
|
if not p.path: return None
|
|
if p.netloc == "weval-consulting.com" or p.netloc == "www.weval-consulting.com":
|
|
return "/var/www/html" + p.path
|
|
if p.netloc == "wevads.weval-consulting.com":
|
|
# wevads is on S95, we can't check file from S204
|
|
# Mark as phantom by default for paths that don't exist on S204 either
|
|
return "/var/www/html" + p.path # try same prefix
|
|
return None
|
|
|
|
phantom_count = 0
|
|
for url, info in by_url.items():
|
|
status = info.get("status")
|
|
if status not in ("BROKEN","DOWN","NOT_FOUND"):
|
|
if status in counts: counts[status] += 1
|
|
continue
|
|
fpath = url_to_path(url)
|
|
if fpath and not os.path.exists(fpath):
|
|
info["status"] = "PHANTOM"
|
|
info["original_status"] = status
|
|
phantom_count += 1
|
|
counts["PHANTOM"] += 1
|
|
else:
|
|
if status in counts: counts[status] += 1
|
|
|
|
d["counts"] = counts
|
|
d["phantom_purge_at"] = os.popen("date -Iseconds").read().strip()
|
|
d["phantom_count"] = phantom_count
|
|
|
|
with open(H, "w") as f:
|
|
json.dump(d, f, indent=2)
|
|
|
|
print(json.dumps({"phantom_reclassified":phantom_count,"new_counts":counts,"backup":backup}, indent=2))
|