28 lines
1.4 KiB
PHP
Executable File
28 lines
1.4 KiB
PHP
Executable File
<?php
|
|
set_time_limit(0);
|
|
$db = new PDO('pgsql:host=localhost;dbname=adx_system', 'postgres', '');
|
|
$upd = $db->prepare('INSERT INTO ethica.medecins_real (nom,prenom,email,pays,source,scraped_at,status) VALUES (?,?,?,?,?,NOW(),?) ON CONFLICT DO NOTHING');
|
|
$countries = ['Morocco'=>'MA', 'Tunisia'=>'TN', 'Algeria'=>'ALG'];
|
|
$n = 0;
|
|
foreach($countries as $country => $code) {
|
|
$query = urlencode("$country[Affiliation]");
|
|
$url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=$query&retmax=500&retmode=json";
|
|
$data = @file_get_contents($url);
|
|
if($data && $json = json_decode($data, true)) {
|
|
foreach($json['esearchresult']['idlist'] ?? [] as $pmid) {
|
|
$detail_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=$pmid&retmode=xml";
|
|
$xml = @file_get_contents($detail_url);
|
|
if($xml && preg_match('/<Author[^>]*>.*?<LastName>([^<]+)<\/LastName>.*?<ForeName>([^<]+)<\/ForeName>/s', $xml, $author)) {
|
|
$nom = trim($author[1]);
|
|
$prenom = trim($author[2]);
|
|
if(preg_match('/([a-z0-9._-]+@[a-z0-9.-]+\.(?:ma|tn|dz|edu))/i', $xml, $email)) {
|
|
$upd->execute([$nom, $prenom, strtolower($email[1]), $code, 'pubmed', 'active']);
|
|
$n++;
|
|
}
|
|
}
|
|
usleep(500000);
|
|
}
|
|
}
|
|
}
|
|
echo "PubMed: $n emails\n";
|