Files
weval-l99/social-scanner.py
opus-wire 7f67eb6bdf auto-push
2026-04-18 14:33:55 +02:00

72 lines
2.3 KiB
Python
Executable File

#!/usr/bin/env python3
"""Social Media Scanner — Selenium/Chrome headless
Scanne LinkedIn, YouTube, Instagram, TikTok, Meta
Stocke les resultats dans /tmp/social-scan-cache.json
"""
import sys, json, time
from datetime import datetime
platform = sys.argv[1] if len(sys.argv) > 1 else "all"
result = {"ts": datetime.now().isoformat(), "platform": platform, "data": {}}
try:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
opts = Options()
opts.add_argument("--headless=new")
opts.add_argument("--no-sandbox")
opts.add_argument("--disable-dev-shm-usage")
opts.add_argument("--window-size=1920,1080")
driver = webdriver.Chrome(options=opts)
driver.set_page_load_timeout(12)
targets = {
"linkedin": [
"https://www.linkedin.com/company/weval-consulting/",
"https://www.linkedin.com/company/weval-life-sciences/"
],
"youtube": [
"https://www.youtube.com/@weval-consulting"
],
"instagram": [
"https://www.instagram.com/weval_consulting/"
],
"tiktok": [
"https://www.tiktok.com/@weval"
]
}
scan_targets = targets if platform == "all" else {platform: targets.get(platform, [])}
for plat, urls in scan_targets.items():
result["data"][plat] = []
for url in urls:
try:
driver.get(url)
time.sleep(4)
entry = {
"url": url,
"title": driver.title,
"page_length": len(driver.page_source)
}
driver.save_screenshot(f"/tmp/social-{plat}-{len(result['data'][plat])}.png")
entry["screenshot"] = f"/tmp/social-{plat}-{len(result['data'][plat])}.png"
result["data"][plat].append(entry)
except Exception as e:
result["data"][plat].append({"url": url, "error": str(e)[:80]})
driver.quit()
result["status"] = "OK"
except Exception as e:
result["status"] = "ERROR"
result["error"] = str(e)[:100]
# Sauvegarder
with open("/tmp/social-scan-cache.json", "w") as f:
json.dump(result, f, indent=2)
print(json.dumps(result, indent=2)[:300])