FIX scanner URL encoding: urllib.parse.quote pour URLs non-ASCII (méthodologie/adhérence retournaient DOWN alors que 200 en curl) · root cause urllib ne encode pas automatiquement
This commit is contained in:
@@ -5,6 +5,7 @@ import json, os, sys, time, re, tempfile
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from urllib.request import Request, urlopen
|
||||
from urllib.error import URLError, HTTPError
|
||||
from urllib.parse import quote, urlparse, urlunparse
|
||||
|
||||
CARTO_FILE = "/var/www/html/cartographie-screens.html"
|
||||
OUT_FILE = "/var/www/html/api/screens-health.json"
|
||||
@@ -49,6 +50,14 @@ def classify(code, elapsed_ms):
|
||||
def check_one(entry):
|
||||
url = entry.get("url")
|
||||
name = entry.get("name")
|
||||
# Percent-encode non-ASCII chars in path (fixes DOWN on URLs like méthodologie, adhérence)
|
||||
try:
|
||||
p = urlparse(url)
|
||||
if any(ord(c) > 127 for c in p.path):
|
||||
encoded_path = quote(p.path, safe="/")
|
||||
url = urlunparse((p.scheme, p.netloc, encoded_path, p.params, p.query, p.fragment))
|
||||
except Exception:
|
||||
pass
|
||||
t0 = time.time()
|
||||
code = 0
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user