Files
wevads-platform/scripts/extract_newsletters.php
2026-02-26 04:53:11 +01:00

109 lines
3.5 KiB
PHP
Executable File

<?php
/**
* Script d'extraction newsletters depuis seeds IMAP
* Usage: php extract_newsletters.php "1,2,3" (seed IDs)
*/
$seedIds = $argv[1] ?? '';
if (empty($seedIds)) {
die("Usage: php extract_newsletters.php 'seed_id1,seed_id2,...'\n");
}
$pdo = new PDO("pgsql:host=localhost;dbname=adx_system", "admin", "admin123");
$pdo->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
$ids = explode(',', $seedIds);
echo "Extraction pour " . count($ids) . " seeds\n";
foreach ($ids as $seedId) {
$seedId = (int)trim($seedId);
if (!$seedId) continue;
$seed = $pdo->query("SELECT * FROM admin.brain_seeds WHERE id = $seedId")->fetch(PDO::FETCH_ASSOC);
if (!$seed) {
echo "Seed $seedId non trouvé\n";
continue;
}
echo "Processing: {$seed['email']} ({$seed['isp']})\n";
// Déterminer host IMAP
$imapHost = $seed['imap_host'];
if (!$imapHost) {
$imapHosts = [
'gmail' => 'imap.gmail.com',
'hotmail' => 'outlook.office365.com',
'yahoo' => 'imap.mail.yahoo.com',
'aol' => 'imap.aol.com'
];
$imapHost = $imapHosts[$seed['isp']] ?? null;
}
if (!$imapHost) {
echo " IMAP host inconnu pour ISP {$seed['isp']}\n";
continue;
}
// Connexion IMAP
$mailbox = "{" . $imapHost . ":993/imap/ssl}INBOX";
$imap = @imap_open($mailbox, $seed['email'], $seed['password']);
if (!$imap) {
echo " Connexion IMAP échouée: " . imap_last_error() . "\n";
continue;
}
// Scanner les 50 derniers emails
$emails = imap_search($imap, 'ALL');
if (!$emails) {
echo " Aucun email trouvé\n";
imap_close($imap);
continue;
}
rsort($emails);
$emails = array_slice($emails, 0, 50);
$extracted = 0;
foreach ($emails as $emailNum) {
$header = imap_headerinfo($imap, $emailNum);
$rawHeaders = imap_fetchheader($imap, $emailNum);
$fromEmail = $header->from[0]->mailbox . '@' . $header->from[0]->host;
$fromName = isset($header->from[0]->personal) ? imap_utf8($header->from[0]->personal) : '';
$fromDomain = $header->from[0]->host;
// Extraire headers SPF/DKIM
$spf = 'unknown';
$dkim = 'unknown';
if (preg_match('/spf=(\w+)/i', $rawHeaders, $m)) $spf = $m[1];
if (preg_match('/dkim=(\w+)/i', $rawHeaders, $m)) $dkim = $m[1];
// Return-Path
$returnPath = '';
if (preg_match('/Return-Path:\s*<([^>]+)>/i', $rawHeaders, $m)) $returnPath = $m[1];
// X-Mailer
$xMailer = '';
if (preg_match('/X-Mailer:\s*(.+)/i', $rawHeaders, $m)) $xMailer = trim($m[1]);
// Insérer ou update
$stmt = $pdo->prepare("
INSERT INTO admin.newsletter_patterns
(sender_email, sender_name, sender_domain, isp_received, folder_received,
headers_spf, headers_dkim, return_path, x_mailer, last_seen_at)
VALUES (?, ?, ?, ?, 'INBOX', ?, ?, ?, ?, NOW())
ON CONFLICT (sender_email, isp_received)
DO UPDATE SET occurrence_count = newsletter_patterns.occurrence_count + 1,
last_seen_at = NOW()
");
$stmt->execute([$fromEmail, $fromName, $fromDomain, $seed['isp'], $spf, $dkim, $returnPath, $xMailer]);
$extracted++;
}
imap_close($imap);
echo " Extrait $extracted patterns\n";
}
echo "\nExtraction terminée\n";