Files
wevads-platform/scripts/rich-validator-safe.php

50 lines
3.1 KiB
PHP
Executable File

<?php
/**
* RICH SCRAPER VALIDATOR — DNS/MX only, NO SMTP (Hetzner safe)
*/
error_reporting(E_ALL); set_time_limit(0);
$db = new PDO("pgsql:host=localhost;dbname=adx_system", "admin", "admin123");
$db->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
$batch = min(10000, max(10, (int)($argv[1] ?? 5000)));
function lg($m) { echo date('H:i:s')." $m\n"; @file_put_contents("/opt/wevads/logs/rich-validator.log", date('Y-m-d H:i:s')." $m\n", FILE_APPEND); }
$contacts = $db->query("SELECT id, email FROM richscraper.professionals WHERE email_valid='unknown' ORDER BY id LIMIT $batch")->fetchAll(PDO::FETCH_ASSOC);
lg("Batch: ".count($contacts)." to validate");
if(empty($contacts)) { lg("Nothing to check"); exit(0); }
$stats = ['valid_mx'=>0,'invalid'=>0,'catch_all'=>0,'risky'=>0];
$update = $db->prepare("UPDATE richscraper.professionals SET email_valid=?, email_checked_at=NOW(), mx_host=?, smtp_msg=? WHERE id=?");
$mx_cache = [];
$big = ['gmail.com','yahoo.fr','yahoo.com','hotmail.com','hotmail.fr','outlook.com','outlook.fr',
'live.fr','live.com','msn.com','wanadoo.fr','orange.fr','free.fr','sfr.fr','laposte.net',
'aol.com','icloud.com','me.com','protonmail.com','gmx.com','gmx.de','web.de','t-online.de',
'yahoo.de','hotmail.de','freenet.de','yahoo.co.uk','hotmail.co.uk','btinternet.com','sky.com',
'yahoo.it','hotmail.it','libero.it','virgilio.it','tiscali.it','alice.it',
'bluewin.ch','sunrise.ch','gmx.ch','hotmail.nl','ziggo.nl','kpnmail.nl',
'comcast.net','att.net','sbcglobal.net','bellsouth.net','videotron.ca','bell.net','rogers.com',
'bigpond.com','optusnet.com.au','wp.pl','onet.pl','o2.pl','interia.pl','gazeta.pl',
'yahoo.com.au','hotmail.com.au','shaw.ca','yahoo.ca'];
$catchall = ['menara.ma','iam.ma','topnet.tn','planet.tn','djaweb.dz','caramail.com',
'mailinator.com','yopmail.com','guerrillamail.com','tempmail.com'];
foreach($contacts as $c) {
$email = trim(strtolower($c['email']));
if(!filter_var($email, FILTER_VALIDATE_EMAIL)) { $update->execute(['invalid','','bad_syntax',$c['id']]); $stats['invalid']++; continue; }
$domain = substr($email, strpos($email,'@')+1);
if(in_array($domain, $catchall)) { $update->execute(['catch_all',$domain,'catch_all_domain',$c['id']]); $stats['catch_all']++; continue; }
if(in_array($domain, $big)) { $update->execute(['risky',$domain,'big_provider_mx_ok',$c['id']]); $stats['risky']++; continue; }
if(!isset($mx_cache[$domain])) {
$mx = []; $ok = @getmxrr($domain, $mx);
if(!$ok || empty($mx)) { $a = @gethostbyname($domain); $mx_cache[$domain] = ($a !== $domain) ? ['valid',$a] : ['invalid','']; }
else $mx_cache[$domain] = ['valid',$mx[0]];
}
if($mx_cache[$domain][0]==='invalid') { $update->execute(['invalid','','no_mx',$c['id']]); $stats['invalid']++; }
else { $update->execute(['valid_mx',$mx_cache[$domain][1],'mx_ok',$c['id']]); $stats['valid_mx']++; }
}
lg("DONE valid_mx:{$stats['valid_mx']} invalid:{$stats['invalid']} catch_all:{$stats['catch_all']} risky:{$stats['risky']}");
$rem = $db->query("SELECT COUNT(*) FROM richscraper.professionals WHERE email_valid='unknown'")->fetchColumn();
lg("Remaining unknown: $rem");