FIX scanner URL encoding: urllib.parse.quote pour URLs non-ASCII (méthodologie/adhérence retournaient DOWN alors que 200 en curl) · root cause urllib ne encode pas automatiquement

This commit is contained in:
Opus-Yacine
2026-04-17 04:20:09 +02:00
parent 469267bd80
commit 9ac56e875b

View File

@@ -5,6 +5,7 @@ import json, os, sys, time, re, tempfile
from concurrent.futures import ThreadPoolExecutor, as_completed
from urllib.request import Request, urlopen
from urllib.error import URLError, HTTPError
from urllib.parse import quote, urlparse, urlunparse
CARTO_FILE = "/var/www/html/cartographie-screens.html"
OUT_FILE = "/var/www/html/api/screens-health.json"
@@ -49,6 +50,14 @@ def classify(code, elapsed_ms):
def check_one(entry):
url = entry.get("url")
name = entry.get("name")
# Percent-encode non-ASCII chars in path (fixes DOWN on URLs like méthodologie, adhérence)
try:
p = urlparse(url)
if any(ord(c) > 127 for c in p.path):
encoded_path = quote(p.path, safe="/")
url = urlunparse((p.scheme, p.netloc, encoded_path, p.params, p.query, p.fragment))
except Exception:
pass
t0 = time.time()
code = 0
try: