#!/usr/bin/env python3 """WEVAL LinkedIn Scraper v1.1 - SearXNG LinkedIn discovery""" import re, sys, time, psycopg2, requests DB = dict(host='10.1.0.3', dbname='adx_system', user='admin', password='admin123') SEARX = "http://localhost:8888/search" country = sys.argv[1] if len(sys.argv) > 1 else 'MA' CITIES = {'MA': ['casablanca','rabat','tanger','marrakech'], 'DZ': ['alger','oran','constantine','annaba'], 'TN': ['tunis','sfax','sousse','bizerte']} ROLES = ['DSI','directeur general','CTO','directeur informatique','responsable IT', 'directeur achats','supply chain','DAF','DRH','SAP consultant', 'ERP manager','cloud architect','CISO','data officer','CDO'] conn = psycopg2.connect(**DB); cur = conn.cursor(); added = 0 def sx(q): try: r = requests.get(SEARX, params={'q': q, 'format': 'json'}, timeout=15) return r.json().get('results', [])[:10] except: return [] cities = CITIES.get(country, CITIES['MA']) for city in cities: for role in ROLES: q = f'site:linkedin.com/in "{role}" "{city}"' for r in sx(q): u = r.get('url','') if 'linkedin.com/in/' not in u: continue t = r.get('title','').replace(' | LinkedIn','').replace(' - LinkedIn','') c = r.get('content','') parts = t.split(' - ') name = parts[0].strip() job = parts[1].strip() if len(parts)>1 else role comp = parts[2].strip() if len(parts)>2 else '' if not comp: for kw in [' chez ',' at ']: if kw in c: comp = c.split(kw)[-1].split('.')[0].strip()[:100]; break if len(name.split()) < 2: continue cur.execute("SELECT 1 FROM admin.linkedin_leads WHERE lead_linkedin_url=%s",(u,)) if cur.fetchone(): continue ind = '' for kw in ['pharma','SAP','ERP','cloud','banque','assurance','telecom','energie','IT','cyber']: if kw.lower() in (c+t+comp).lower(): ind = kw; break cur.execute("""INSERT INTO admin.linkedin_leads (lead_name,lead_company,lead_title,lead_industry,lead_seniority,lead_linkedin_url,lead_country,lead_city,captured_at) VALUES (%s,%s,%s,%s,'senior',%s,%s,%s,NOW())""", (name,comp,job,ind,u,country,city)) conn.commit(); added += 1 print(f"+LI {name} @{comp} [{job}] {u}") time.sleep(1.5) cur.close(); conn.close() print(f"LINKEDIN_{country}: +{added} profiles")