Files
html/api/scan-erp-gaps-rss.py

192 lines
7.4 KiB
Python
Executable File

#!/usr/bin/env python3
"""
WEVAL — ERP Gap Scanner via RSS/ATOM feeds (Option C)
Subscribe à flux consulting/tech/vendor et extrait mentions des 25 ERPs avec keywords pain.
Stocke dans erp_gap_scans avec source_url = URL de l'article.
Sources publiques (no auth required):
- Reddit r/ERP (JSON API)
- CIO.com ERP tag
- TechRepublic ERP
- SAP/Oracle/Microsoft blog RSS (release notes mentioning issues)
- Gartner public blog
- G2 recently added (limited)
Doctrine #5 INSERT ON CONFLICT DO NOTHING.
Doctrine #4 honnêteté — tag source clairement.
"""
import sys, json, time, re, urllib.request, urllib.error, html
from datetime import datetime
import psycopg2
try:
import feedparser
except ImportError:
print("ERR: pip install feedparser", file=sys.stderr)
sys.exit(1)
DB_CONFIG = dict(host="10.1.0.3", port=5432, dbname="adx_system", user="admin", password="admin123", connect_timeout=5)
# Publicly accessible RSS feeds (no auth)
RSS_FEEDS = [
("Reddit_r_ERP", "https://www.reddit.com/r/ERP/.rss"),
("Reddit_r_sap", "https://www.reddit.com/r/SAP/.rss"),
("Reddit_r_netsuite", "https://www.reddit.com/r/netsuite/.rss"),
("Reddit_r_Dynamics365","https://www.reddit.com/r/Dynamics365/.rss"),
("Reddit_r_salesforce","https://www.reddit.com/r/salesforce/.rss"),
("Reddit_r_workday", "https://www.reddit.com/r/Workday/.rss"),
("Reddit_r_Odoo", "https://www.reddit.com/r/Odoo/.rss"),
("CIO_ERP", "https://www.cio.com/feed/"),
("TechRepublic_Enterprise", "https://www.techrepublic.com/rssfeeds/topic/enterprise-software/"),
("ComputerWeekly_ERP", "https://www.computerweekly.com/rss/IT-for-transport-and-travel-industry.xml"),
("ITWorldCanada_ERP", "https://www.itworldcanada.com/feed"),
("ERPToday", "https://erp.today/feed/"),
("DiginomicaERP", "https://diginomica.com/topic/erp/rss.xml"),
("CXToday", "https://www.cxtoday.com/crm/feed/"),
]
# ERP name -> id mapping (for extraction)
ERP_MAP = {
"SAP S/4HANA": "sap_s4hana", "S/4HANA": "sap_s4hana", "S/4 HANA": "sap_s4hana",
"SAP Business One": "sap_b1", "SAP B1": "sap_b1", "Business One": "sap_b1",
"Oracle E-Business": "oracle_ebs", "Oracle EBS": "oracle_ebs", "E-Business Suite": "oracle_ebs",
"Oracle Fusion": "oracle_fusion", "Fusion Cloud": "oracle_fusion",
"NetSuite": "oracle_netsuite",
"Sage X3": "sage_x3",
"Sage 100": "sage_100",
"Sage Intacct": "sage_intacct", "Intacct": "sage_intacct",
"Odoo": "odoo",
"Dynamics 365 F&O": "ms_d365_fo", "D365 F&O": "ms_d365_fo", "Dynamics 365 Finance": "ms_d365_fo", "D365FO": "ms_d365_fo",
"Dynamics 365 Business Central": "ms_d365_bc", "D365 BC": "ms_d365_bc", "Business Central": "ms_d365_bc",
"Dynamics 365 Customer Engagement": "ms_d365_ce", "D365 CE": "ms_d365_ce", "Dynamics CRM": "ms_d365_ce",
"Workday": "workday",
"Salesforce": "salesforce",
"Infor M3": "infor_m3",
"Infor CloudSuite": "infor_cs", "Infor CS": "infor_cs",
"IFS Cloud": "ifs", "IFS Applications": "ifs",
"Epicor": "epicor", "Kinetic": "epicor",
"QAD": "qad",
"Acumatica": "acumatica",
"Priority": "priority",
"Deltek": "deltek", "Costpoint": "deltek",
"ServiceNow": "servicenow",
"Veeva": "veeva",
"Temenos": "temenos",
}
# Keywords (English + French) for pain-detection
PAIN_KW = [
"pain", "limitation", "limits", "issue", "problem", "bug", "slow", "crash",
"complaint", "drawback", "weakness", "shortcoming", "bottleneck", "broken",
"frustrating", "workaround", "manual", "difficult", "lacks", "missing",
"challenge", "struggle", "outdated", "legacy", "expensive", "complex",
# FR
"lent", "manque", "limitation", "problème", "difficulté", "bogue", "bug",
"archaïque", "obsolète", "manuel", "complexe", "difficile", "frustrant",
]
def normalize_entry(entry):
title = html.unescape(entry.get("title", ""))
summary = html.unescape(entry.get("summary", entry.get("description", "")))
# Strip HTML tags
summary = re.sub(r"<[^>]+>", " ", summary)
summary = re.sub(r"\s+", " ", summary).strip()
link = entry.get("link", "")
return title, summary, link
def detect_erps_mentioned(text):
"""Return list of erp_id mentioned in text"""
text_lc = text.lower()
found = set()
for alias, erp_id in ERP_MAP.items():
if alias.lower() in text_lc:
found.add(erp_id)
return list(found)
def score_pain(text):
"""Return (score 0..1, matched keywords)"""
text_lc = text.lower()
matches = [kw for kw in PAIN_KW if kw in text_lc]
# Normalize: 5+ kw = 1.0
score = min(1.0, len(matches) / 5.0)
return round(score, 3), matches
def main():
print(f"═══ SCAN-ERP-GAPS-RSS · {datetime.now().isoformat()} ═══")
conn = psycopg2.connect(**DB_CONFIG)
cur = conn.cursor()
total_feeds = 0
total_entries = 0
total_matches = 0
total_inserted = 0
for feed_name, feed_url in RSS_FEEDS:
print(f"\n━━━ {feed_name} ━━━")
try:
# feedparser doesn't always respect timeout; set socket default
import socket
socket.setdefaulttimeout(10)
feed = feedparser.parse(feed_url)
entries = feed.entries[:30] # top 30 latest
total_feeds += 1
total_entries += len(entries)
print(f"{len(entries)} entries")
except Exception as e:
print(f" [ERR] {e}")
continue
feed_matches = 0
feed_inserted = 0
for entry in entries:
title, summary, link = normalize_entry(entry)
combined = f"{title} {summary}"
if not combined.strip() or not link:
continue
erps = detect_erps_mentioned(combined)
if not erps:
continue
score, kws = score_pain(combined)
if score < 0.1: # at least 1 pain keyword
continue
feed_matches += 1
# Insert one row per ERP mentioned
for erp_id in erps:
erp_name = [k for k, v in ERP_MAP.items() if v == erp_id and len(k) > 3]
erp_name = erp_name[0] if erp_name else erp_id
try:
cur.execute("""
INSERT INTO erp_gap_scans (erp_id, erp_name, query, source_url, title, snippet, confidence_score, keywords)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (erp_id, source_url) DO NOTHING
""", (
erp_id, erp_name,
f"rss_{feed_name}",
link[:500], title[:500], summary[:1500],
score,
kws + ["rss", feed_name]
))
if cur.rowcount > 0:
feed_inserted += 1
total_inserted += 1
except Exception as e:
pass
total_matches += 1
conn.commit()
print(f" matches={feed_matches}, inserted={feed_inserted}")
cur.close()
conn.close()
print(f"\n═══ DONE · feeds={total_feeds} · entries={total_entries} · matches={total_matches} · inserted={total_inserted} ═══")
return 0
if __name__ == "__main__":
sys.exit(main())