setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION); $batch = min(10000, max(10, (int)($argv[1] ?? 5000))); function lg($m) { echo date('H:i:s')." $m\n"; @file_put_contents("/opt/wevads/logs/rich-validator.log", date('Y-m-d H:i:s')." $m\n", FILE_APPEND); } $contacts = $db->query("SELECT id, email FROM richscraper.professionals WHERE email_valid='unknown' ORDER BY id LIMIT $batch")->fetchAll(PDO::FETCH_ASSOC); lg("Batch: ".count($contacts)." to validate"); if(empty($contacts)) { lg("Nothing to check"); exit(0); } $stats = ['valid_mx'=>0,'invalid'=>0,'catch_all'=>0,'risky'=>0]; $update = $db->prepare("UPDATE richscraper.professionals SET email_valid=?, email_checked_at=NOW(), mx_host=?, smtp_msg=? WHERE id=?"); $mx_cache = []; $big = ['gmail.com','yahoo.fr','yahoo.com','hotmail.com','hotmail.fr','outlook.com','outlook.fr', 'live.fr','live.com','msn.com','wanadoo.fr','orange.fr','free.fr','sfr.fr','laposte.net', 'aol.com','icloud.com','me.com','protonmail.com','gmx.com','gmx.de','web.de','t-online.de', 'yahoo.de','hotmail.de','freenet.de','yahoo.co.uk','hotmail.co.uk','btinternet.com','sky.com', 'yahoo.it','hotmail.it','libero.it','virgilio.it','tiscali.it','alice.it', 'bluewin.ch','sunrise.ch','gmx.ch','hotmail.nl','ziggo.nl','kpnmail.nl', 'comcast.net','att.net','sbcglobal.net','bellsouth.net','videotron.ca','bell.net','rogers.com', 'bigpond.com','optusnet.com.au','wp.pl','onet.pl','o2.pl','interia.pl','gazeta.pl', 'yahoo.com.au','hotmail.com.au','shaw.ca','yahoo.ca']; $catchall = ['menara.ma','iam.ma','topnet.tn','planet.tn','djaweb.dz','caramail.com', 'mailinator.com','yopmail.com','guerrillamail.com','tempmail.com']; foreach($contacts as $c) { $email = trim(strtolower($c['email'])); if(!filter_var($email, FILTER_VALIDATE_EMAIL)) { $update->execute(['invalid','','bad_syntax',$c['id']]); $stats['invalid']++; continue; } $domain = substr($email, strpos($email,'@')+1); if(in_array($domain, $catchall)) { $update->execute(['catch_all',$domain,'catch_all_domain',$c['id']]); $stats['catch_all']++; continue; } if(in_array($domain, $big)) { $update->execute(['risky',$domain,'big_provider_mx_ok',$c['id']]); $stats['risky']++; continue; } if(!isset($mx_cache[$domain])) { $mx = []; $ok = @getmxrr($domain, $mx); if(!$ok || empty($mx)) { $a = @gethostbyname($domain); $mx_cache[$domain] = ($a !== $domain) ? ['valid',$a] : ['invalid','']; } else $mx_cache[$domain] = ['valid',$mx[0]]; } if($mx_cache[$domain][0]==='invalid') { $update->execute(['invalid','','no_mx',$c['id']]); $stats['invalid']++; } else { $update->execute(['valid_mx',$mx_cache[$domain][1],'mx_ok',$c['id']]); $stats['valid_mx']++; } } lg("DONE valid_mx:{$stats['valid_mx']} invalid:{$stats['invalid']} catch_all:{$stats['catch_all']} risky:{$stats['risky']}"); $rem = $db->query("SELECT COUNT(*) FROM richscraper.professionals WHERE email_valid='unknown'")->fetchColumn(); lg("Remaining unknown: $rem");