AUTO-BACKUP-NIGHTLY
This commit is contained in:
@@ -3,7 +3,7 @@
|
||||
* AUTO-SCRAPER: Scrape les ISPs manquants et injecte dans send_contacts
|
||||
* Utilise admin.scraping_targets pour les sources
|
||||
*
|
||||
* Cron: 0 */6 * * * php /opt/wevads/scripts/auto-scraper.php
|
||||
* Cron: every 6 hours * * * php /opt/wevads/scripts/auto-scraper.php
|
||||
*/
|
||||
|
||||
$db = pg_connect('host=localhost dbname=adx_system user=admin password=admin123');
|
||||
|
||||
76
scripts/data-factory-runner.php
Executable file
76
scripts/data-factory-runner.php
Executable file
@@ -0,0 +1,76 @@
|
||||
<?php
|
||||
$src = new PDO("pgsql:host=localhost;dbname=adx_clients","admin","admin123");
|
||||
$dst = new PDO("pgsql:host=localhost;dbname=adx_system","admin","admin123");
|
||||
$dst->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
|
||||
|
||||
$PROVIDER_ID = 3;
|
||||
$PROVIDER_NAME = 'ADX-Import';
|
||||
$TODAY = date('Y-m-d');
|
||||
$BATCH = date('Ymd_His');
|
||||
|
||||
// ISP mapping matching existing data_lists
|
||||
$isp_ids = ['gmail'=>1,'hotmail'=>2,'gmx'=>4,'tonline'=>5,'spectrum'=>6,'yahoo'=>7,'webde'=>8,'videotron'=>9];
|
||||
|
||||
$tables = [
|
||||
'gmail.gmail' => ['isp'=>'gmail', 'limit'=>50000],
|
||||
'gmail.gmail_1' => ['isp'=>'gmail', 'limit'=>50000],
|
||||
'gmail.gmail_3' => ['isp'=>'gmail', 'limit'=>50000],
|
||||
'gmail.gmail_6' => ['isp'=>'gmail', 'limit'=>50000],
|
||||
'gmail.gmail_7' => ['isp'=>'gmail', 'limit'=>50000],
|
||||
'gmail.gmail_8' => ['isp'=>'gmail', 'limit'=>20000],
|
||||
'hotmail.hotmail_us_clean_winx' => ['isp'=>'hotmail', 'limit'=>100000],
|
||||
'hotmail._hotmail_us_clickers_winx' => ['isp'=>'hotmail', 'limit'=>50000],
|
||||
'hotmail.hotmail_open_de' => ['isp'=>'hotmail', 'limit'=>50000],
|
||||
'gmx.gmx_' => ['isp'=>'gmx', 'limit'=>20000],
|
||||
'gmx.gmx__1' => ['isp'=>'gmx', 'limit'=>20000],
|
||||
'gmx.gmx__2' => ['isp'=>'gmx', 'limit'=>20000],
|
||||
'gmx.gmx__4' => ['isp'=>'gmx', 'limit'=>20000],
|
||||
'gmx.gmx__5' => ['isp'=>'gmx', 'limit'=>10000],
|
||||
'gmail.toline' => ['isp'=>'tonline', 'limit'=>10000],
|
||||
'gmail.spectrum__4' => ['isp'=>'spectrum','limit'=>50000],
|
||||
'gmail.spectrum__7' => ['isp'=>'spectrum','limit'=>50000],
|
||||
'gmail.spectrum__8' => ['isp'=>'spectrum','limit'=>50000],
|
||||
'gmail.spectrum__12' => ['isp'=>'spectrum','limit'=>50000],
|
||||
];
|
||||
|
||||
$nextId = (int)$dst->query("SELECT COALESCE(MAX(id),0)+1 FROM lists.data_lists")->fetchColumn();
|
||||
$totalImported = 0;
|
||||
$listsCreated = 0;
|
||||
|
||||
foreach ($tables as $table => $cfg) {
|
||||
$isp = $cfg['isp'];
|
||||
$limit = $cfg['limit'];
|
||||
$parts = explode('.', $table);
|
||||
|
||||
$check = $src->query("SELECT 1 FROM information_schema.tables WHERE table_schema='{$parts[0]}' AND table_name='{$parts[1]}'")->fetch();
|
||||
if (!$check) { echo "SKIP $table (not found)\n"; continue; }
|
||||
|
||||
$available = (int)$src->query("SELECT COUNT(*) FROM $table")->fetchColumn();
|
||||
if ($available == 0) { echo "SKIP $table (empty)\n"; continue; }
|
||||
|
||||
$actual = min($limit, $available);
|
||||
$shortName = str_replace(['gmail.','hotmail.','gmx.','_'], ['','','',''], $parts[1]);
|
||||
$listName = "ADX_{$isp}_{$shortName}";
|
||||
$ispId = $isp_ids[$isp] ?? 1;
|
||||
|
||||
$dst->exec("INSERT INTO lists.data_lists (id, status, data_provider_id, data_provider_name, name, table_name, table_schema, isp_id, isp_name, total_count, encrypt_emails, created_by, last_updated_by, created_date, last_updated_date)
|
||||
VALUES ($nextId, 'Activated', $PROVIDER_ID, '$PROVIDER_NAME', '$listName', '{$parts[1]}', '{$parts[0]}', $ispId, '$isp', $actual, 'on', 'admin@local.com', 'admin@local.com', '$TODAY', '$TODAY')");
|
||||
|
||||
$listsCreated++;
|
||||
$totalImported += $actual;
|
||||
echo "✅ #$nextId $listName ($isp) = $actual contacts\n";
|
||||
$nextId++;
|
||||
}
|
||||
|
||||
// Scraping results
|
||||
$scrapCount = (int)$dst->query("SELECT COUNT(*) FROM admin.scrapping_results WHERE is_verified = true")->fetchColumn();
|
||||
if ($scrapCount > 0) {
|
||||
$dst->exec("INSERT INTO lists.data_lists (id, status, data_provider_id, data_provider_name, name, table_name, table_schema, isp_id, isp_name, total_count, encrypt_emails, created_by, last_updated_by, created_date, last_updated_date)
|
||||
VALUES ($nextId, 'Activated', 4, 'Scraping-Factory', 'Scraped_Verified', 'scrapping_results', 'admin', 1, 'mixed', $scrapCount, 'off', 'admin@local.com', 'admin@local.com', '$TODAY', '$TODAY')");
|
||||
$listsCreated++;
|
||||
$totalImported += $scrapCount;
|
||||
echo "✅ #$nextId Scraped_Verified = $scrapCount\n";
|
||||
}
|
||||
|
||||
echo "\n=== FACTORY DONE ===\n";
|
||||
echo "Lists: $listsCreated | Contacts: $totalImported | Provider: $PROVIDER_NAME | Date: $TODAY\n";
|
||||
Reference in New Issue
Block a user