diff --git a/scripts/auto-scraper.php b/scripts/auto-scraper.php index 93ce46c4..e1e41d67 100644 --- a/scripts/auto-scraper.php +++ b/scripts/auto-scraper.php @@ -3,7 +3,7 @@ * AUTO-SCRAPER: Scrape les ISPs manquants et injecte dans send_contacts * Utilise admin.scraping_targets pour les sources * - * Cron: 0 */6 * * * php /opt/wevads/scripts/auto-scraper.php + * Cron: every 6 hours * * * php /opt/wevads/scripts/auto-scraper.php */ $db = pg_connect('host=localhost dbname=adx_system user=admin password=admin123'); diff --git a/scripts/data-factory-runner.php b/scripts/data-factory-runner.php new file mode 100755 index 00000000..5b2b54e1 --- /dev/null +++ b/scripts/data-factory-runner.php @@ -0,0 +1,76 @@ +setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION); + +$PROVIDER_ID = 3; +$PROVIDER_NAME = 'ADX-Import'; +$TODAY = date('Y-m-d'); +$BATCH = date('Ymd_His'); + +// ISP mapping matching existing data_lists +$isp_ids = ['gmail'=>1,'hotmail'=>2,'gmx'=>4,'tonline'=>5,'spectrum'=>6,'yahoo'=>7,'webde'=>8,'videotron'=>9]; + +$tables = [ + 'gmail.gmail' => ['isp'=>'gmail', 'limit'=>50000], + 'gmail.gmail_1' => ['isp'=>'gmail', 'limit'=>50000], + 'gmail.gmail_3' => ['isp'=>'gmail', 'limit'=>50000], + 'gmail.gmail_6' => ['isp'=>'gmail', 'limit'=>50000], + 'gmail.gmail_7' => ['isp'=>'gmail', 'limit'=>50000], + 'gmail.gmail_8' => ['isp'=>'gmail', 'limit'=>20000], + 'hotmail.hotmail_us_clean_winx' => ['isp'=>'hotmail', 'limit'=>100000], + 'hotmail._hotmail_us_clickers_winx' => ['isp'=>'hotmail', 'limit'=>50000], + 'hotmail.hotmail_open_de' => ['isp'=>'hotmail', 'limit'=>50000], + 'gmx.gmx_' => ['isp'=>'gmx', 'limit'=>20000], + 'gmx.gmx__1' => ['isp'=>'gmx', 'limit'=>20000], + 'gmx.gmx__2' => ['isp'=>'gmx', 'limit'=>20000], + 'gmx.gmx__4' => ['isp'=>'gmx', 'limit'=>20000], + 'gmx.gmx__5' => ['isp'=>'gmx', 'limit'=>10000], + 'gmail.toline' => ['isp'=>'tonline', 'limit'=>10000], + 'gmail.spectrum__4' => ['isp'=>'spectrum','limit'=>50000], + 'gmail.spectrum__7' => ['isp'=>'spectrum','limit'=>50000], + 'gmail.spectrum__8' => ['isp'=>'spectrum','limit'=>50000], + 'gmail.spectrum__12' => ['isp'=>'spectrum','limit'=>50000], +]; + +$nextId = (int)$dst->query("SELECT COALESCE(MAX(id),0)+1 FROM lists.data_lists")->fetchColumn(); +$totalImported = 0; +$listsCreated = 0; + +foreach ($tables as $table => $cfg) { + $isp = $cfg['isp']; + $limit = $cfg['limit']; + $parts = explode('.', $table); + + $check = $src->query("SELECT 1 FROM information_schema.tables WHERE table_schema='{$parts[0]}' AND table_name='{$parts[1]}'")->fetch(); + if (!$check) { echo "SKIP $table (not found)\n"; continue; } + + $available = (int)$src->query("SELECT COUNT(*) FROM $table")->fetchColumn(); + if ($available == 0) { echo "SKIP $table (empty)\n"; continue; } + + $actual = min($limit, $available); + $shortName = str_replace(['gmail.','hotmail.','gmx.','_'], ['','','',''], $parts[1]); + $listName = "ADX_{$isp}_{$shortName}"; + $ispId = $isp_ids[$isp] ?? 1; + + $dst->exec("INSERT INTO lists.data_lists (id, status, data_provider_id, data_provider_name, name, table_name, table_schema, isp_id, isp_name, total_count, encrypt_emails, created_by, last_updated_by, created_date, last_updated_date) + VALUES ($nextId, 'Activated', $PROVIDER_ID, '$PROVIDER_NAME', '$listName', '{$parts[1]}', '{$parts[0]}', $ispId, '$isp', $actual, 'on', 'admin@local.com', 'admin@local.com', '$TODAY', '$TODAY')"); + + $listsCreated++; + $totalImported += $actual; + echo "✅ #$nextId $listName ($isp) = $actual contacts\n"; + $nextId++; +} + +// Scraping results +$scrapCount = (int)$dst->query("SELECT COUNT(*) FROM admin.scrapping_results WHERE is_verified = true")->fetchColumn(); +if ($scrapCount > 0) { + $dst->exec("INSERT INTO lists.data_lists (id, status, data_provider_id, data_provider_name, name, table_name, table_schema, isp_id, isp_name, total_count, encrypt_emails, created_by, last_updated_by, created_date, last_updated_date) + VALUES ($nextId, 'Activated', 4, 'Scraping-Factory', 'Scraped_Verified', 'scrapping_results', 'admin', 1, 'mixed', $scrapCount, 'off', 'admin@local.com', 'admin@local.com', '$TODAY', '$TODAY')"); + $listsCreated++; + $totalImported += $scrapCount; + echo "✅ #$nextId Scraped_Verified = $scrapCount\n"; +} + +echo "\n=== FACTORY DONE ===\n"; +echo "Lists: $listsCreated | Contacts: $totalImported | Provider: $PROVIDER_NAME | Date: $TODAY\n";