55 lines
2.3 KiB
Python
55 lines
2.3 KiB
Python
#!/usr/bin/env python3
|
|
import re, sys, time, psycopg2
|
|
from playwright.sync_api import sync_playwright
|
|
|
|
DB=dict(host='10.1.0.3',dbname='adx_system',user='admin',password='admin123')
|
|
batch=int(sys.argv[1]) if len(sys.argv)>1 else 20
|
|
pays=sys.argv[2] if len(sys.argv)>2 else 'MA'
|
|
|
|
conn=psycopg2.connect(**DB);cur=conn.cursor()
|
|
cur.execute("SELECT id,nom,prenom,specialite,ville FROM ethica.medecins_validated WHERE pays=%s AND (email IS NULL OR email='') AND nom!='' AND LENGTH(nom)>2 ORDER BY RANDOM() LIMIT %s",(pays,batch))
|
|
rows=cur.fetchall();en=0
|
|
|
|
bl=['google','bing','example','facebook','1sante','dzdoc','lespages','pagesjaunes','wikipedia','speedtest','annuaire']
|
|
|
|
with sync_playwright() as p:
|
|
browser=p.chromium.launch(headless=True,args=['--no-sandbox','--disable-dev-shm-usage'])
|
|
ctx=browser.new_context(user_agent='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/120.0.0.0')
|
|
|
|
for rid,nom,prenom,spec,ville in rows:
|
|
q=f"Dr {nom} {prenom or ''} {spec or ''} {ville or ''} email"
|
|
if pays=='MA': q+=" maroc"
|
|
elif pays=='TN': q+=" tunisie"
|
|
else: q+=" algerie"
|
|
|
|
try:
|
|
page=ctx.new_page()
|
|
page.goto(f"https://duckduckgo.com/?q={q.replace(' ','+')}", timeout=10000)
|
|
page.wait_for_timeout(2000)
|
|
text=page.content()
|
|
page.close()
|
|
|
|
emails=set(re.findall(r'[\w.+-]+@[\w.-]+\.[a-z]{2,}',text))
|
|
emails={e.lower() for e in emails if not any(x in e.lower() for x in bl)}
|
|
|
|
if emails:
|
|
nl=nom.lower()
|
|
best=None
|
|
for em in emails:
|
|
if nl in em.split('@')[0]:best=em;break
|
|
if not best:best=list(emails)[0]
|
|
cur.execute("SELECT 1 FROM ethica.medecins_validated WHERE email=%s LIMIT 1",(best,))
|
|
if not cur.fetchone():
|
|
cur.execute("UPDATE ethica.medecins_validated SET email=%s,enriched_at=NOW() WHERE id=%s",(best,rid))
|
|
conn.commit();en+=1
|
|
print(f"+EMAIL {nom} {prenom}: {best}")
|
|
time.sleep(3)
|
|
except:
|
|
try:page.close()
|
|
except:pass
|
|
|
|
browser.close()
|
|
|
|
cur.close();conn.close()
|
|
print(f"PW_EMAIL_{pays}:+{en}/{len(rows)}")
|