#!/usr/bin/env python3 """WEVAL Playwright Kompass Scraper — headless directory scraping""" import re, sys, time, psycopg2 from playwright.sync_api import sync_playwright DB = dict(host='10.1.0.3', dbname='adx_system', user='admin', password='admin123') country = sys.argv[1] if len(sys.argv) > 1 else 'MA' max_pages = int(sys.argv[2]) if len(sys.argv) > 2 else 20 COUNTRY_MAP = {'MA': 'ma', 'DZ': 'dz', 'TN': 'tn'} SECTORS = ['informatique','logiciel','erp','cloud','pharmaceutique','banque','telecom','energie','logistique','chimie'] conn = psycopg2.connect(**DB); cur = conn.cursor(); added = 0 with sync_playwright() as p: browser = p.chromium.launch(headless=True) page = browser.new_page() cc = COUNTRY_MAP.get(country, 'ma') for sector in SECTORS: for pg in range(1, max_pages + 1): url = f"https://www.kompass.com/{cc}/searchCompanies?text={sector}&page={pg}" try: page.goto(url, timeout=15000) time.sleep(2) # Extract company cards cards = page.query_selector_all('.company-card, .companyName, [class*=company]') if not cards: cards = page.query_selector_all('a[href*="/company/"]') for card in cards: try: name = card.inner_text().strip()[:200] href = card.get_attribute('href') or '' if not name or len(name) < 3: continue cur.execute("SELECT 1 FROM admin.weval_leads WHERE company_name=%s AND country=%s",(name,country)) if cur.fetchone(): continue website = f"https://www.kompass.com{href}" if href.startswith('/') else href cur.execute("""INSERT INTO admin.weval_leads (company_name,industry,country,website,source,created_at) VALUES (%s,%s,%s,%s,'kompass_pw',NOW())""", (name, sector, country, website)) conn.commit(); added += 1 print(f"+KOMPASS {name} [{sector}] {country}") except: pass if not cards: break print(f"[{sector}] page {pg}: {len(cards)} cards") except Exception as e: print(f"ERR {sector} p{pg}: {e}") break time.sleep(2) browser.close() cur.close(); conn.close() print(f"KOMPASS_{country}: +{added} companies")