#!/usr/bin/env python3
"""Playwright DabaDoc deep scraper - bypasses JS, gets all doctors"""
import re, json, sys, time, psycopg2
from playwright.sync_api import sync_playwright

DB = dict(host="10.1.0.3", dbname="adx_system", user="admin", password="admin123")
country = sys.argv[1] if len(sys.argv) > 1 else "ma"
batch = int(sys.argv[2]) if len(sys.argv) > 2 else 99999
pays_code = country.upper()

SPECS = [
    "medecin-generaliste","dentiste","cardiologue","pediatre","gynecologue",
    "dermatologue","ophtalmologue","orl","gastro-enterologue","pneumologue",
    "rhumatologue","endocrinologue","neurologue","urologue","nephrologue",
    "psychiatre","allergologue","orthopediste","radiologue","pharmacien",
    "chirurgien","anesthesiste","hematologue","oncologue","nutritionniste",
    "medecin-du-travail","kinesitherapeute","sage-femme"
]
SPEC_MAP = {"medecin-generaliste": "generaliste"}

conn = psycopg2.connect(**DB)
cur = conn.cursor()
total_new = 0

with sync_playwright() as p:
    browser = p.chromium.launch(headless=True, args=["--no-sandbox","--disable-dev-shm-usage"])
    ctx = browser.new_context(user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36")
    
    for spec in SPECS:
        if total_new >= batch:
            break
        page_num = 1
        consecutive_empty = 0
        
        while page_num <= 100 and total_new < batch:
            url = f"https://www.dabadoc.com/{country}/{spec}"
            if page_num > 1:
                url += f"/page/{page_num}"
            
            try:
                page = ctx.new_page()
                page.goto(url, timeout=15000, wait_until="domcontentloaded")
                page.wait_for_timeout(2000)
                
                # Extract all doctor cards
                doctors = page.evaluate("""() => {
                    const results = [];
                    // Get all h2/h3 links (doctor names)
                    document.querySelectorAll('h2 a, h3 a').forEach(el => {
                        const name = el.textContent.trim();
                        const href = el.href || '';
                        if (href.includes('dabadoc.com') && name.length > 3) {
                            // Try to get specialty and location from parent card
                            const card = el.closest('article, .card, div[class*=card], div[class*=result]') || el.parentElement.parentElement;
                            let specialty = '', city = '', phone = '';
                            if (card) {
                                const texts = card.innerText.split('\\n').map(t => t.trim()).filter(t => t.length > 0);
                                // Usually: Name, Specialty, City
                                for (const t of texts) {
                                    if (t.match(/^(Dr|Pr|Prof)/i)) continue;
                                    if (!specialty && t.match(/(médecin|dentiste|cardio|pédiatre|gynéco|dermato|ophtalmo|orl|gastro|pneumo|rhumato|endocrino|neuro|uro|néphro|psychiatre|allergo|orthopéd|radio|pharma|chirurg|anesthés|hémato|onco|nutri|kiné|sage)/i)) {
                                        specialty = t;
                                    }
                                }
                            }
                            // Extract city from URL
                            const urlParts = href.split('/');
                            const cityFromUrl = urlParts.length > 5 ? urlParts[4] : '';
                            
                            results.push({name, href, specialty, city: cityFromUrl});
                        }
                    });
                    return results;
                }""")
                
                page.close()
                
                if not doctors:
                    consecutive_empty += 1
                    if consecutive_empty >= 2:
                        break
                    page_num += 1
                    continue
                
                consecutive_empty = 0
                spec_new = 0
                
                for doc in doctors:
                    name = re.sub(r'^(Dr|Pr|Prof)\.?\s*', '', doc['name'], flags=re.I).strip()
                    parts = name.split(None, 1)
                    if not parts or len(parts[0]) < 2:
                        continue
                    
                    nom = parts[0].upper()
                    prenom = parts[1].title() if len(parts) > 1 else ""
                    canon_spec = SPEC_MAP.get(spec, spec)
                    ville = doc.get('city', '').replace('-', ' ').title() if doc.get('city') else ''
                    profile_url = doc.get('href', '')
                    
                    # Dedup check
                    cur.execute(
                        "SELECT 1 FROM ethica.medecins_validated WHERE LOWER(TRIM(nom))=LOWER(TRIM(%s)) AND LOWER(TRIM(prenom))=LOWER(TRIM(%s)) AND specialite=%s AND pays=%s LIMIT 1",
                        (nom, prenom, canon_spec, pays_code)
                    )
                    if cur.fetchone():
                        continue
                    
                    try:
                        cur.execute(
                            "INSERT INTO ethica.medecins_validated (nom,prenom,specialite,ville,pays,source,profile_url,created_at) VALUES(%s,%s,%s,%s,%s,'playwright_deep',%s,NOW())",
                            (nom, prenom, canon_spec, ville, pays_code, profile_url)
                        )
                        conn.commit()
                        total_new += 1
                        spec_new += 1
                    except:
                        conn.rollback()
                
                page_num += 1
                time.sleep(1)
                
            except Exception as e:
                try:
                    page.close()
                except:
                    pass
                page_num += 1
                continue
        
        print(f"[{spec}] +{total_new} total ({pays_code})")
    
    browser.close()

cur.close()
conn.close()
print(f"PLAYWRIGHT_{pays_code}: +{total_new} new doctors")