setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION); $mode = $argv[1] ?? 'all'; $log = '/tmp/ethica-boost.log'; function l($m) { global $log; $t=date('H:i:s'); file_put_contents($log,"[$t] $m\n",FILE_APPEND); echo "[$t] $m\n"; } function g($u,$r=3) { for($i=0;$i<$r;$i++) { $ch=curl_init($u); curl_setopt_array($ch,[CURLOPT_RETURNTRANSFER=>1,CURLOPT_FOLLOWLOCATION=>1,CURLOPT_TIMEOUT=>30,CURLOPT_SSL_VERIFYPEER=>0, CURLOPT_USERAGENT=>'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36']); $h=curl_exec($ch); $c=curl_getinfo($ch,CURLINFO_HTTP_CODE); curl_close($ch); if($c==200&&!empty($h)) return $h; sleep(rand(2,4)); } return false; } function ins($pdo,$d) { if(!empty($d['email'])){$s=$pdo->prepare("SELECT 1 FROM ethica.medecins_real WHERE email=:e LIMIT 1");$s->execute(['e'=>$d['email']]);if($s->fetch())return 0;} if(!empty($d['nom'])&&!empty($d['ville'])){$s=$pdo->prepare("SELECT 1 FROM ethica.medecins_real WHERE nom=:n AND prenom=:p AND ville=:v AND pays=:c LIMIT 1"); $s->execute(['n'=>$d['nom'],'p'=>$d['prenom']??'','v'=>$d['ville'],'c'=>$d['pays']]);if($s->fetch())return 0;} $pdo->prepare("INSERT INTO ethica.medecins_real(nom,prenom,specialite,ville,pays,telephone,source,profile_url,scraped_at,created_at) VALUES(:n,:p,:s,:v,:c,:t,:src,:u,NOW(),NOW())") ->execute(['n'=>$d['nom']??'','p'=>$d['prenom']??'','s'=>$d['specialite']??'','v'=>$d['ville']??'','c'=>$d['pays']??'','t'=>$d['telephone']??'','src'=>$d['source']??'','u'=>$d['profile_url']??'']); return 1; } $specs=['medecin-generaliste','dentiste','gynecologue','pediatre','ophtalmologue','cardiologue','dermatologue','orl','psychiatre','radiologue', 'chirurgien','urologue','neurologue','gastro-enterologue','rhumatologue','endocrinologue','pneumologue','nephrologue','oncologue', 'orthopediste','kinesitherapeute','sage-femme','pharmacien','allergologue','hematologue','anesthesiste']; // DABADOC SCRAPER function dabadoc($pdo,$cc,$specs) { $pays=$cc=='dz'?'ALG':'MA'; $src="dabadoc_boost_$cc"; $ins=0; $scan=0; l("[$pays] dabadoc.$cc START - ".count($specs)." specs"); foreach($specs as $sp) { for($pg=1;$pg<=50;$pg++) { $h=g("https://www.dabadoc.com/$cc/$sp?page=$pg"); if(!$h)break; // Extract doctor names + profile links preg_match_all('/href="(\/'.preg_quote($cc).'\/[^"]+)"[^>]*>\s*(?:Dr\.?\s*)?([A-Z\x{00C0}-\x{024F}][a-z\x{00C0}-\x{024F}]+(?:\s+[A-Z\x{00C0}-\x{024F}][a-z\x{00C0}-\x{024F}]+)+)/u',$h,$m,PREG_SET_ORDER); $pfound=0; foreach($m as $doc) { $parts=explode(' ',trim($doc[2]),2); $r=ins($pdo,['pays'=>$pays,'source'=>$src,'specialite'=>ucfirst(str_replace('-',' ',$sp)), 'prenom'=>$parts[0],'nom'=>$parts[1]??$doc[2],'profile_url'=>'https://www.dabadoc.com'.$doc[1]]); $ins+=$r; $scan++; $pfound+=$r; } // Also try JSON-LD preg_match_all('/"@type"\s*:\s*"Physician"[^}]*?"name"\s*:\s*"([^"]+)"/s',$h,$jm); foreach($jm[1] as $n) { $parts=explode(' ',trim($n),2); $ins+=ins($pdo,['pays'=>$pays,'source'=>$src,'specialite'=>ucfirst(str_replace('-',' ',$sp)),'prenom'=>$parts[0],'nom'=>$parts[1]??$n]); $scan++; } if(strpos($h,'aucun')!==false && $pfound==0 && count($m)==0) break; usleep(rand(500000,1500000)); } if($scan>0 && $scan%100==0) l("[$pays] dabadoc $sp: scan=$scan ins=$ins"); } l("[$pays] dabadoc DONE: scanned=$scan inserted=$ins"); return $ins; } // 1SANTE.DZ DEEP (48 wilayas) function sante1dz($pdo,$specs) { $src='1sante_boost'; $ins=0; $wilayas=['adrar','chlef','laghouat','oum-el-bouaghi','batna','bejaia','biskra','bechar','blida','bouira', 'tamanrasset','tebessa','tlemcen','tiaret','tizi-ouzou','alger','djelfa','jijel','setif','saida','skikda', 'sidi-bel-abbes','annaba','guelma','constantine','medea','mostaganem','msila','mascara','ouargla','oran', 'el-bayadh','illizi','bordj-bou-arreridj','boumerdes','el-tarf','tindouf','tissemsilt','el-oued','khenchela', 'souk-ahras','tipaza','mila','ain-defla','naama','ain-temouchent','ghardaia','relizane']; l("[ALG] 1sante.dz START - ".count($wilayas)." wilayas"); foreach($wilayas as $w) { for($pg=1;$pg<=20;$pg++) { $h=g("https://www.1sante.dz/annuaire/medecin/$w?page=$pg"); if(!$h||strlen($h)<500) break; preg_match_all('/(?:Dr\.?\s+)?([A-Z\x{00C0}-\x{024F}][a-z\x{00C0}-\x{024F}]+(?:\s+[A-Z\x{00C0}-\x{024F}][a-z\x{00C0}-\x{024F}]+)+)/u',$h,$names); preg_match_all('/(?:0[5-7]\d{2}[\s.\-]?\d{2}[\s.\-]?\d{2}[\s.\-]?\d{2})/',$h,$phones); $pi=0; foreach($names[0] as $idx=>$fn) { $parts=explode(' ',trim($fn),2); $pi+=ins($pdo,['pays'=>'ALG','source'=>$src,'specialite'=>'Medecin','ville'=>ucfirst(str_replace('-',' ',$w)), 'prenom'=>$parts[0],'nom'=>$parts[1]??$fn,'telephone'=>$phones[0][$idx]??'']); } $ins+=$pi; if($pi==0&&$pg>1) break; usleep(rand(300000,800000)); } l("[ALG] 1sante $w: total=$ins"); } l("[ALG] 1sante DONE: inserted=$ins"); return $ins; } // DOCTORALIA.MA function doctoralia($pdo,$specs) { $src='doctoralia_ma'; $ins=0; $villes=['casablanca','rabat','marrakech','fes','tanger','agadir','meknes','oujda','kenitra','tetouan', 'safi','el-jadida','nador','beni-mellal','khouribga','settat','mohammedia','taza','sale','temara']; l("[MA] doctoralia.ma START - ".count($villes)." cities"); foreach($villes as $v) { foreach($specs as $sp) { for($pg=1;$pg<=20;$pg++) { $h=g("https://www.doctoralia.ma/$sp/$v?page=$pg"); if(!$h||strlen($h)<1000) break; preg_match_all('/"@type"\s*:\s*"Physician"[^}]*?"name"\s*:\s*"([^"]+)"/s',$h,$jm); foreach($jm[1] as $n) { $parts=explode(' ',trim($n),2); $ins+=ins($pdo,['pays'=>'MA','source'=>$src,'specialite'=>ucfirst(str_replace('-',' ',$sp)), 'prenom'=>$parts[0],'nom'=>$parts[1]??$n,'ville'=>ucfirst(str_replace('-',' ',$v))]); } preg_match_all('/href="\/([^"]+)"[^>]*>\s*(?:Dr\.?\s*)?([A-Z\x{00C0}-\x{024F}][a-z\x{00C0}-\x{024F}]+(?:\s+[A-Z\x{00C0}-\x{024F}][a-z\x{00C0}-\x{024F}]+)+)/u',$h,$lnk,PREG_SET_ORDER); foreach($lnk as $l2) { $parts=explode(' ',trim($l2[2]),2); $ins+=ins($pdo,['pays'=>'MA','source'=>$src,'specialite'=>ucfirst(str_replace('-',' ',$sp)), 'prenom'=>$parts[0],'nom'=>$parts[1]??$l2[2],'ville'=>ucfirst(str_replace('-',' ',$v)), 'profile_url'=>'https://www.doctoralia.ma/'.$l2[1]]); } if(count($jm[1])==0&&count($lnk)==0) break; usleep(rand(500000,1500000)); } } l("[MA] doctoralia $v: total=$ins"); } l("[MA] doctoralia DONE: inserted=$ins"); return $ins; } // MAIN l("=== BOOST START mode=$mode ==="); $r=$pdo->query("SELECT pays,COUNT(*) c FROM ethica.medecins_real GROUP BY pays ORDER BY c DESC"); while($row=$r->fetch(PDO::FETCH_ASSOC)) l(" {$row['pays']}: {$row['c']}"); $tot=0; if($mode=='alg'||$mode=='all'){l("--- ALG ---");$tot+=dabadoc($pdo,'dz',$specs);$tot+=sante1dz($pdo,$specs);} if($mode=='ma'||$mode=='all'){l("--- MA ---");$tot+=dabadoc($pdo,'ma',$specs);$tot+=doctoralia($pdo,$specs);} l("=== TOTAL INSERTED: $tot ==="); $r=$pdo->query("SELECT pays,COUNT(*) c FROM ethica.medecins_real GROUP BY pays ORDER BY c DESC"); while($row=$r->fetch(PDO::FETCH_ASSOC)) l(" {$row['pays']}: {$row['c']}"); l("=== DONE ===");