112 lines
5.0 KiB
Python
Executable File
112 lines
5.0 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# /opt/weval-l99/screens-deep-scan.py
|
|
# Scan EXHAUSTIF de tous les paths sur S204 et S95 pour detecter TOUTES les pages.
|
|
# Sortie: JSON structure avec URLs publiques + path local + hash.
|
|
import os, sys, json, subprocess, re, time, hashlib
|
|
|
|
OUT = "/var/www/html/api/screens-exhaustive.json"
|
|
|
|
# Paths à scanner sur S204 (local) et S95 (SSH)
|
|
# Format: (label, base_path, url_prefix, exclude_patterns, ssh)
|
|
PATHS = [
|
|
# S204
|
|
("S204-web", "/var/www/html", "https://weval-consulting.com/", ["cache/", "tmp/", ".git/", "node_modules/", "vendor/"], False),
|
|
("S204-l99", "/opt/weval-l99", None, ["__pycache__", ".git/", "node_modules/", "archive/"], False),
|
|
("S204-wevads", "/opt/wevads", None, ["vault/", ".git/", "__pycache__"], False),
|
|
("S204-weval", "/opt/weval", None, ["__pycache__", ".git/"], False),
|
|
("S204-ethica", "/opt/ethica", None, ["__pycache__", ".git/"], False),
|
|
("S204-deer", "/opt/deer-flow", None, ["__pycache__", ".git/", "node_modules/"], False),
|
|
# S95
|
|
("S95-arsenal", "/opt/wevads-arsenal/public", "https://wevads.weval-consulting.com/", ["vendor/", ".git/"], True),
|
|
("S95-wv", "/var/www/html", "https://wevads.weval-consulting.com/wv/", ["cache/", ".git/"], True),
|
|
("S95-opt-wv", "/opt/wevads/public", "https://wevads.weval-consulting.com/w/", ["_archive", ".git/"], True),
|
|
("S95-opt", "/opt", None, ["wevads-arsenal/", "wevads/public/", "__pycache__", "node_modules/", ".git/"], True),
|
|
]
|
|
|
|
EXTS_PUBLIC = (".html", ".php")
|
|
EXTS_SCRIPT = (".py", ".sh", ".js")
|
|
|
|
def scan_local(path, excludes):
|
|
out = []
|
|
if not os.path.isdir(path):
|
|
return out
|
|
for root, dirs, files in os.walk(path, followlinks=False):
|
|
# Prune dirs
|
|
dirs[:] = [d for d in dirs if not any(ex.strip("/") in (root + "/" + d + "/") for ex in excludes)]
|
|
rel_root = os.path.relpath(root, path)
|
|
depth = 0 if rel_root == "." else rel_root.count("/") + 1
|
|
if depth > 4:
|
|
continue
|
|
for f in files:
|
|
full = os.path.join(root, f)
|
|
rel = os.path.relpath(full, path)
|
|
ext = os.path.splitext(f)[1].lower()
|
|
if ext in EXTS_PUBLIC or ext in EXTS_SCRIPT:
|
|
try:
|
|
st = os.stat(full)
|
|
out.append({"rel": rel.replace("\\", "/"), "size": st.st_size, "ext": ext, "mtime": int(st.st_mtime)})
|
|
except Exception:
|
|
pass
|
|
return out
|
|
|
|
def scan_ssh(path, excludes):
|
|
# Use find on remote
|
|
exclude_args = " ".join([f'-not -path "*/{ex.strip("/")}/*"' for ex in excludes])
|
|
cmd = f'sudo ssh -p 49222 -o StrictHostKeyChecking=no -i /var/www/.ssh/wevads_key root@10.1.0.3 "find {path} -maxdepth 5 -type f \\( -name \'*.html\' -o -name \'*.php\' -o -name \'*.py\' -o -name \'*.sh\' -o -name \'*.js\' \\) {exclude_args} -printf \'%P|%s|%T@\\n\' 2>/dev/null | head -5000"'
|
|
try:
|
|
r = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=60)
|
|
out = []
|
|
for line in r.stdout.strip().split("\n"):
|
|
if "|" not in line:
|
|
continue
|
|
parts = line.split("|", 2)
|
|
if len(parts) < 2:
|
|
continue
|
|
rel = parts[0]
|
|
try:
|
|
size = int(parts[1])
|
|
mtime = int(float(parts[2])) if len(parts) > 2 else 0
|
|
except Exception:
|
|
continue
|
|
ext = os.path.splitext(rel)[1].lower()
|
|
if ext in EXTS_PUBLIC or ext in EXTS_SCRIPT:
|
|
out.append({"rel": rel, "size": size, "ext": ext, "mtime": mtime})
|
|
return out
|
|
except Exception as e:
|
|
return []
|
|
|
|
def main():
|
|
result = {"generated_at": time.strftime("%Y-%m-%dT%H:%M:%S%z"), "sources": {}}
|
|
grand_total = 0
|
|
for label, base, url_prefix, excludes, ssh in PATHS:
|
|
print(f"Scanning {label} {base} (ssh={ssh})...", file=sys.stderr)
|
|
t0 = time.time()
|
|
if ssh:
|
|
entries = scan_ssh(base, excludes)
|
|
else:
|
|
entries = scan_local(base, excludes)
|
|
elapsed = round(time.time() - t0, 1)
|
|
# Count by ext
|
|
by_ext = {}
|
|
for e in entries:
|
|
by_ext[e["ext"]] = by_ext.get(e["ext"], 0) + 1
|
|
result["sources"][label] = {
|
|
"base_path": base,
|
|
"url_prefix": url_prefix,
|
|
"total": len(entries),
|
|
"by_ext": by_ext,
|
|
"elapsed_sec": elapsed,
|
|
"entries": entries[:2000] # cap for JSON size
|
|
}
|
|
grand_total += len(entries)
|
|
print(f" {label}: {len(entries)} files ({by_ext}) in {elapsed}s", file=sys.stderr)
|
|
result["grand_total"] = grand_total
|
|
with open(OUT, "w") as f:
|
|
json.dump(result, f, separators=(",", ":"))
|
|
os.chmod(OUT, 0o644)
|
|
print(f"TOTAL: {grand_total} files across {len(PATHS)} paths")
|
|
print(f"Written: {OUT}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|