50 lines
3.1 KiB
PHP
Executable File
50 lines
3.1 KiB
PHP
Executable File
<?php
|
|
/**
|
|
* RICH SCRAPER VALIDATOR — DNS/MX only, NO SMTP (Hetzner safe)
|
|
*/
|
|
error_reporting(E_ALL); set_time_limit(0);
|
|
$db = new PDO("pgsql:host=localhost;dbname=adx_system", "admin", "admin123");
|
|
$db->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
|
|
|
|
$batch = min(10000, max(10, (int)($argv[1] ?? 5000)));
|
|
function lg($m) { echo date('H:i:s')." $m\n"; @file_put_contents("/opt/wevads/logs/rich-validator.log", date('Y-m-d H:i:s')." $m\n", FILE_APPEND); }
|
|
|
|
$contacts = $db->query("SELECT id, email FROM richscraper.professionals WHERE email_valid='unknown' ORDER BY id LIMIT $batch")->fetchAll(PDO::FETCH_ASSOC);
|
|
lg("Batch: ".count($contacts)." to validate");
|
|
if(empty($contacts)) { lg("Nothing to check"); exit(0); }
|
|
|
|
$stats = ['valid_mx'=>0,'invalid'=>0,'catch_all'=>0,'risky'=>0];
|
|
$update = $db->prepare("UPDATE richscraper.professionals SET email_valid=?, email_checked_at=NOW(), mx_host=?, smtp_msg=? WHERE id=?");
|
|
$mx_cache = [];
|
|
|
|
$big = ['gmail.com','yahoo.fr','yahoo.com','hotmail.com','hotmail.fr','outlook.com','outlook.fr',
|
|
'live.fr','live.com','msn.com','wanadoo.fr','orange.fr','free.fr','sfr.fr','laposte.net',
|
|
'aol.com','icloud.com','me.com','protonmail.com','gmx.com','gmx.de','web.de','t-online.de',
|
|
'yahoo.de','hotmail.de','freenet.de','yahoo.co.uk','hotmail.co.uk','btinternet.com','sky.com',
|
|
'yahoo.it','hotmail.it','libero.it','virgilio.it','tiscali.it','alice.it',
|
|
'bluewin.ch','sunrise.ch','gmx.ch','hotmail.nl','ziggo.nl','kpnmail.nl',
|
|
'comcast.net','att.net','sbcglobal.net','bellsouth.net','videotron.ca','bell.net','rogers.com',
|
|
'bigpond.com','optusnet.com.au','wp.pl','onet.pl','o2.pl','interia.pl','gazeta.pl',
|
|
'yahoo.com.au','hotmail.com.au','shaw.ca','yahoo.ca'];
|
|
$catchall = ['menara.ma','iam.ma','topnet.tn','planet.tn','djaweb.dz','caramail.com',
|
|
'mailinator.com','yopmail.com','guerrillamail.com','tempmail.com'];
|
|
|
|
foreach($contacts as $c) {
|
|
$email = trim(strtolower($c['email']));
|
|
if(!filter_var($email, FILTER_VALIDATE_EMAIL)) { $update->execute(['invalid','','bad_syntax',$c['id']]); $stats['invalid']++; continue; }
|
|
$domain = substr($email, strpos($email,'@')+1);
|
|
if(in_array($domain, $catchall)) { $update->execute(['catch_all',$domain,'catch_all_domain',$c['id']]); $stats['catch_all']++; continue; }
|
|
if(in_array($domain, $big)) { $update->execute(['risky',$domain,'big_provider_mx_ok',$c['id']]); $stats['risky']++; continue; }
|
|
if(!isset($mx_cache[$domain])) {
|
|
$mx = []; $ok = @getmxrr($domain, $mx);
|
|
if(!$ok || empty($mx)) { $a = @gethostbyname($domain); $mx_cache[$domain] = ($a !== $domain) ? ['valid',$a] : ['invalid','']; }
|
|
else $mx_cache[$domain] = ['valid',$mx[0]];
|
|
}
|
|
if($mx_cache[$domain][0]==='invalid') { $update->execute(['invalid','','no_mx',$c['id']]); $stats['invalid']++; }
|
|
else { $update->execute(['valid_mx',$mx_cache[$domain][1],'mx_ok',$c['id']]); $stats['valid_mx']++; }
|
|
}
|
|
|
|
lg("DONE valid_mx:{$stats['valid_mx']} invalid:{$stats['invalid']} catch_all:{$stats['catch_all']} risky:{$stats['risky']}");
|
|
$rem = $db->query("SELECT COUNT(*) FROM richscraper.professionals WHERE email_valid='unknown'")->fetchColumn();
|
|
lg("Remaining unknown: $rem");
|