Files
wevads-platform/scripts/download_creatives.py
2026-02-26 04:53:11 +01:00

170 lines
7.4 KiB
Python

#!/usr/bin/env python3
"""Download CX3 creatives and inject tracking placeholders"""
import psycopg2, re, urllib.request, sys, time, base64
db = psycopg2.connect(dbname='adx_system', user='postgres')
db.autocommit = True
cur = db.cursor()
# Get all download URLs
cur.execute("""SELECT id, offer_id, value FROM affiliate.creatives
WHERE value LIKE '%DOWNLOAD%' AND status='Activated'""")
rows = cur.fetchall()
print(f'Found {len(rows)} creatives to download')
downloaded = 0
failed = 0
for cid, offer_id, val in rows:
m = re.search(r'(https?://publisher\.cx3ads\.com/affiliates/dc\.ashx\S+)', val)
if not m:
failed += 1
continue
url = m.group(1).rstrip('-').rstrip('>').rstrip()
# Clean the URL
url = re.sub(r'\s*-->.*$', '', url).strip()
try:
req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
resp = urllib.request.urlopen(req, timeout=15)
html = resp.read().decode('utf-8', errors='replace')
if len(html) < 200 or '<' not in html:
if downloaded < 3:
print(f' SKIP #{cid}: too small ({len(html)} bytes)')
failed += 1
continue
# Inject tracking placeholders
# [open] = open tracking pixel before </body>
if '[open]' not in html:
pixel = '<img src="[open]" width="1" height="1" style="display:none" />'
if '</body>' in html:
html = html.replace('</body>', pixel + '\n</body>')
elif '</BODY>' in html:
html = html.replace('</BODY>', pixel + '\n</BODY>')
else:
html += '\n' + pixel
# [unsub] = unsubscribe link before </body>
if '[unsub]' not in html:
unsub = '<p style="font-size:10px;color:#999;text-align:center;margin-top:20px"><a href="[unsub]" style="color:#999">Unsubscribe</a></p>'
if '</body>' in html:
html = html.replace('</body>', unsub + '\n</body>')
elif '</BODY>' in html:
html = html.replace('</BODY>', unsub + '\n</BODY>')
else:
html += '\n' + unsub
# Store as base64
b64 = base64.b64encode(html.encode('utf-8')).decode()
cur.execute("""UPDATE affiliate.creatives
SET value = %s, quality_score = 5, has_tracking_placeholders = true, last_validated = NOW()
WHERE id = %s""", (b64, cid))
downloaded += 1
if downloaded % 50 == 0:
print(f' Progress: {downloaded}/{len(rows)}')
except Exception as e:
failed += 1
if failed <= 5:
print(f' FAIL #{cid}: {e}')
print(f'Done: {downloaded} downloaded, {failed} failed out of {len(rows)}')
# Also handle "other" stubs (non-download, non-link-only, small)
cur.execute("""SELECT id, offer_id, value FROM affiliate.creatives
WHERE value NOT LIKE '%DOWNLOAD%' AND value NOT LIKE '%LINK ONLY%'
AND LENGTH(value) < 500 AND status='Activated'""")
others = cur.fetchall()
print(f'\nFound {len(others)} "other" stubs without download URL')
# For these, try to get the offer URL and create a simple but proper creative
fixed_others = 0
for cid, offer_id, val in others:
cur.execute("SELECT name, offer_url FROM affiliate.offers WHERE id = %s", (offer_id,))
offer = cur.fetchone()
if not offer:
continue
oname, ourl = offer
if not ourl or 'http' not in ourl:
continue
# Create a clean, professional email creative
html = f"""<!DOCTYPE html>
<html><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width,initial-scale=1.0"></head>
<body style="margin:0;padding:0;background:#f4f4f4;font-family:Arial,sans-serif">
<table width="100%" cellpadding="0" cellspacing="0" style="max-width:600px;margin:0 auto;background:#ffffff">
<tr><td style="padding:30px 20px;text-align:center">
<h2 style="color:#333;margin:0 0 15px">[subject]</h2>
<p style="color:#555;font-size:15px;line-height:1.5;margin:0 0 25px">Hi [first_name], we have a special offer for you today. Click below to learn more.</p>
<a href="[url]" style="display:inline-block;background:#2563eb;color:#ffffff;padding:14px 32px;text-decoration:none;border-radius:6px;font-size:16px;font-weight:bold">View Offer</a>
<p style="color:#888;font-size:12px;margin-top:30px">This is a limited time offer.</p>
</td></tr>
<tr><td style="padding:15px;text-align:center;background:#f9f9f9;border-top:1px solid #eee">
<a href="[unsub]" style="color:#999;font-size:11px;text-decoration:underline">Unsubscribe</a>
</td></tr>
</table>
<img src="[open]" width="1" height="1" style="display:none" />
</body></html>"""
b64 = base64.b64encode(html.encode('utf-8')).decode()
cur.execute("""UPDATE affiliate.creatives
SET value = %s, quality_score = 5, has_tracking_placeholders = true, last_validated = NOW()
WHERE id = %s""", (b64, cid))
fixed_others += 1
print(f'Fixed {fixed_others} "other" stubs with generated creatives')
# Handle LINK ONLY stubs too
cur.execute("""SELECT id, offer_id, value FROM affiliate.creatives
WHERE value LIKE '%LINK ONLY%' AND status='Activated'""")
linkonly = cur.fetchall()
print(f'\nFound {len(linkonly)} "link only" stubs')
fixed_links = 0
for cid, offer_id, val in linkonly:
cur.execute("SELECT name, offer_url FROM affiliate.offers WHERE id = %s", (offer_id,))
offer = cur.fetchone()
if not offer:
continue
oname, ourl = offer
if not ourl or 'http' not in ourl:
continue
html = f"""<!DOCTYPE html>
<html><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width,initial-scale=1.0"></head>
<body style="margin:0;padding:0;background:#f4f4f4;font-family:Arial,sans-serif">
<table width="100%" cellpadding="0" cellspacing="0" style="max-width:600px;margin:0 auto;background:#ffffff">
<tr><td style="padding:30px 20px;text-align:center">
<h2 style="color:#333;margin:0 0 15px">[subject]</h2>
<p style="color:#555;font-size:15px;line-height:1.5;margin:0 0 25px">Hi [first_name], we have a special offer for you today. Click below to learn more.</p>
<a href="[url]" style="display:inline-block;background:#2563eb;color:#ffffff;padding:14px 32px;text-decoration:none;border-radius:6px;font-size:16px;font-weight:bold">View Offer</a>
<p style="color:#888;font-size:12px;margin-top:30px">This is a limited time offer.</p>
</td></tr>
<tr><td style="padding:15px;text-align:center;background:#f9f9f9;border-top:1px solid #eee">
<a href="[unsub]" style="color:#999;font-size:11px;text-decoration:underline">Unsubscribe</a>
</td></tr>
</table>
<img src="[open]" width="1" height="1" style="display:none" />
</body></html>"""
b64 = base64.b64encode(html.encode('utf-8')).decode()
cur.execute("""UPDATE affiliate.creatives
SET value = %s, quality_score = 5, has_tracking_placeholders = true, last_validated = NOW()
WHERE id = %s""", (b64, cid))
fixed_links += 1
print(f'Fixed {fixed_links} "link only" stubs with generated creatives')
# Final stats
cur.execute("""SELECT
COUNT(*) as total,
COUNT(*) FILTER(WHERE quality_score >= 3) as quality_ok,
COUNT(*) FILTER(WHERE has_tracking_placeholders) as has_tracking,
COUNT(*) FILTER(WHERE LENGTH(value) > 500) as real_html
FROM affiliate.creatives WHERE status='Activated'""")
r = cur.fetchone()
print(f'\nFINAL: {r[0]} total, {r[1]} quality_ok, {r[2]} has_tracking, {r[3]} real_html')
db.close()