#!/usr/bin/env python3 import requests,re,psycopg2,sys,time,json DB=dict(host='10.1.0.3',dbname='adx_system',user='admin',password='admin123') SX='http://127.0.0.1:8888/search' batch=int(sys.argv[1]) if len(sys.argv)>1 else 99999 QUERIES=[ "directeur general pharma maroc","DG laboratoire tunisie","CEO pharma algerie", "directeur marketing pharma maroc","directeur medical pharma tunisie","medical director algerie", "directeur qualite pharma maroc","regulatory affairs maghreb","pharmacovigilance maroc", "clinical research tunisie","market access algerie","key account manager pharma maroc", "business development pharma tunisie","country manager pharma maroc","consultant SAP maroc", "SAP project manager tunisie","SAP functional algerie","SAP S4HANA maroc","ERP manager maghreb", "CIO DSI maroc","directeur informatique tunisie","IT director algerie", "RSSI CISO maroc","cybersecurity tunisie","security architect algerie", "cloud architect maroc","data engineer tunisie","devops algerie", "supply chain director maroc","logistics manager tunisie","directeur achats algerie", "CFO directeur financier maroc","finance director tunisie", "clinical operations maghreb","MSL pharma maroc","chef de produit pharma tunisie", "directeur commercial pharma tunisie","sales director pharma maroc", "directeur usine maroc","plant manager tunisie","quality manager manufacturing maroc", "AI machine learning maroc","data scientist algerie","BI analyst tunisie", ] conn=psycopg2.connect(**DB);cur=conn.cursor();total=0 for query in QUERIES: if total>=batch:break sq=f"site:linkedin.com/in {query}" try: r=requests.get(SX,params={"q":sq,"format":"json"},timeout=15) if r.status_code!=200:continue data=r.json() for res in data.get("results",[]): url=res.get("url","") if "linkedin.com/in/" not in url:continue url=url.split("?")[0] # Dedup cur.execute("SELECT 1 FROM admin.linkedin_profiles WHERE linkedin_url=%s LIMIT 1",(url,)) if cur.fetchone():continue # Parse title=res.get("title","") content=res.get("content","") slug=url.split("/in/")[1].split("/")[0] if "/in/" in url else "" full_name=slug.replace("-"," ").title()[:100] full_name=re.sub(r'\d+','',full_name).strip() if len(full_name)<3:continue headline=(title+" "+content)[:300] company="" m=re.search(r'Experience:\s*([^·\n]+)',headline) if m:company=m.group(1).strip()[:100] location="" m=re.search(r'Location:\s*([^·\n]+)',headline) if m:location=m.group(1).strip()[:100] try: cur.execute("INSERT INTO admin.linkedin_profiles (linkedin_url,full_name,headline,company,location,source_search,scraped_at) VALUES(%s,%s,%s,%s,%s,%s,NOW())", (url,full_name,headline,company,location,query)) conn.commit();total+=1 except:conn.rollback() time.sleep(2) except:pass if total%10==0 and total>0:print(f"+{total} profiles") cur.close();conn.close() print(f"LINKEDIN_SEARXNG:+{total}")