192 lines
7.4 KiB
Python
Executable File
192 lines
7.4 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
WEVAL — ERP Gap Scanner via RSS/ATOM feeds (Option C)
|
|
Subscribe à flux consulting/tech/vendor et extrait mentions des 25 ERPs avec keywords pain.
|
|
Stocke dans erp_gap_scans avec source_url = URL de l'article.
|
|
|
|
Sources publiques (no auth required):
|
|
- Reddit r/ERP (JSON API)
|
|
- CIO.com ERP tag
|
|
- TechRepublic ERP
|
|
- SAP/Oracle/Microsoft blog RSS (release notes mentioning issues)
|
|
- Gartner public blog
|
|
- G2 recently added (limited)
|
|
|
|
Doctrine #5 INSERT ON CONFLICT DO NOTHING.
|
|
Doctrine #4 honnêteté — tag source clairement.
|
|
"""
|
|
import sys, json, time, re, urllib.request, urllib.error, html
|
|
from datetime import datetime
|
|
import psycopg2
|
|
|
|
try:
|
|
import feedparser
|
|
except ImportError:
|
|
print("ERR: pip install feedparser", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
DB_CONFIG = dict(host="10.1.0.3", port=5432, dbname="adx_system", user="admin", password="admin123", connect_timeout=5)
|
|
|
|
# Publicly accessible RSS feeds (no auth)
|
|
RSS_FEEDS = [
|
|
("Reddit_r_ERP", "https://www.reddit.com/r/ERP/.rss"),
|
|
("Reddit_r_sap", "https://www.reddit.com/r/SAP/.rss"),
|
|
("Reddit_r_netsuite", "https://www.reddit.com/r/netsuite/.rss"),
|
|
("Reddit_r_Dynamics365","https://www.reddit.com/r/Dynamics365/.rss"),
|
|
("Reddit_r_salesforce","https://www.reddit.com/r/salesforce/.rss"),
|
|
("Reddit_r_workday", "https://www.reddit.com/r/Workday/.rss"),
|
|
("Reddit_r_Odoo", "https://www.reddit.com/r/Odoo/.rss"),
|
|
("CIO_ERP", "https://www.cio.com/feed/"),
|
|
("TechRepublic_Enterprise", "https://www.techrepublic.com/rssfeeds/topic/enterprise-software/"),
|
|
("ComputerWeekly_ERP", "https://www.computerweekly.com/rss/IT-for-transport-and-travel-industry.xml"),
|
|
("ITWorldCanada_ERP", "https://www.itworldcanada.com/feed"),
|
|
("ERPToday", "https://erp.today/feed/"),
|
|
("DiginomicaERP", "https://diginomica.com/topic/erp/rss.xml"),
|
|
("CXToday", "https://www.cxtoday.com/crm/feed/"),
|
|
]
|
|
|
|
# ERP name -> id mapping (for extraction)
|
|
ERP_MAP = {
|
|
"SAP S/4HANA": "sap_s4hana", "S/4HANA": "sap_s4hana", "S/4 HANA": "sap_s4hana",
|
|
"SAP Business One": "sap_b1", "SAP B1": "sap_b1", "Business One": "sap_b1",
|
|
"Oracle E-Business": "oracle_ebs", "Oracle EBS": "oracle_ebs", "E-Business Suite": "oracle_ebs",
|
|
"Oracle Fusion": "oracle_fusion", "Fusion Cloud": "oracle_fusion",
|
|
"NetSuite": "oracle_netsuite",
|
|
"Sage X3": "sage_x3",
|
|
"Sage 100": "sage_100",
|
|
"Sage Intacct": "sage_intacct", "Intacct": "sage_intacct",
|
|
"Odoo": "odoo",
|
|
"Dynamics 365 F&O": "ms_d365_fo", "D365 F&O": "ms_d365_fo", "Dynamics 365 Finance": "ms_d365_fo", "D365FO": "ms_d365_fo",
|
|
"Dynamics 365 Business Central": "ms_d365_bc", "D365 BC": "ms_d365_bc", "Business Central": "ms_d365_bc",
|
|
"Dynamics 365 Customer Engagement": "ms_d365_ce", "D365 CE": "ms_d365_ce", "Dynamics CRM": "ms_d365_ce",
|
|
"Workday": "workday",
|
|
"Salesforce": "salesforce",
|
|
"Infor M3": "infor_m3",
|
|
"Infor CloudSuite": "infor_cs", "Infor CS": "infor_cs",
|
|
"IFS Cloud": "ifs", "IFS Applications": "ifs",
|
|
"Epicor": "epicor", "Kinetic": "epicor",
|
|
"QAD": "qad",
|
|
"Acumatica": "acumatica",
|
|
"Priority": "priority",
|
|
"Deltek": "deltek", "Costpoint": "deltek",
|
|
"ServiceNow": "servicenow",
|
|
"Veeva": "veeva",
|
|
"Temenos": "temenos",
|
|
}
|
|
|
|
# Keywords (English + French) for pain-detection
|
|
PAIN_KW = [
|
|
"pain", "limitation", "limits", "issue", "problem", "bug", "slow", "crash",
|
|
"complaint", "drawback", "weakness", "shortcoming", "bottleneck", "broken",
|
|
"frustrating", "workaround", "manual", "difficult", "lacks", "missing",
|
|
"challenge", "struggle", "outdated", "legacy", "expensive", "complex",
|
|
# FR
|
|
"lent", "manque", "limitation", "problème", "difficulté", "bogue", "bug",
|
|
"archaïque", "obsolète", "manuel", "complexe", "difficile", "frustrant",
|
|
]
|
|
|
|
def normalize_entry(entry):
|
|
title = html.unescape(entry.get("title", ""))
|
|
summary = html.unescape(entry.get("summary", entry.get("description", "")))
|
|
# Strip HTML tags
|
|
summary = re.sub(r"<[^>]+>", " ", summary)
|
|
summary = re.sub(r"\s+", " ", summary).strip()
|
|
link = entry.get("link", "")
|
|
return title, summary, link
|
|
|
|
def detect_erps_mentioned(text):
|
|
"""Return list of erp_id mentioned in text"""
|
|
text_lc = text.lower()
|
|
found = set()
|
|
for alias, erp_id in ERP_MAP.items():
|
|
if alias.lower() in text_lc:
|
|
found.add(erp_id)
|
|
return list(found)
|
|
|
|
def score_pain(text):
|
|
"""Return (score 0..1, matched keywords)"""
|
|
text_lc = text.lower()
|
|
matches = [kw for kw in PAIN_KW if kw in text_lc]
|
|
# Normalize: 5+ kw = 1.0
|
|
score = min(1.0, len(matches) / 5.0)
|
|
return round(score, 3), matches
|
|
|
|
def main():
|
|
print(f"═══ SCAN-ERP-GAPS-RSS · {datetime.now().isoformat()} ═══")
|
|
conn = psycopg2.connect(**DB_CONFIG)
|
|
cur = conn.cursor()
|
|
|
|
total_feeds = 0
|
|
total_entries = 0
|
|
total_matches = 0
|
|
total_inserted = 0
|
|
|
|
for feed_name, feed_url in RSS_FEEDS:
|
|
print(f"\n━━━ {feed_name} ━━━")
|
|
try:
|
|
# feedparser doesn't always respect timeout; set socket default
|
|
import socket
|
|
socket.setdefaulttimeout(10)
|
|
feed = feedparser.parse(feed_url)
|
|
entries = feed.entries[:30] # top 30 latest
|
|
total_feeds += 1
|
|
total_entries += len(entries)
|
|
print(f" → {len(entries)} entries")
|
|
except Exception as e:
|
|
print(f" [ERR] {e}")
|
|
continue
|
|
|
|
feed_matches = 0
|
|
feed_inserted = 0
|
|
|
|
for entry in entries:
|
|
title, summary, link = normalize_entry(entry)
|
|
combined = f"{title} {summary}"
|
|
if not combined.strip() or not link:
|
|
continue
|
|
|
|
erps = detect_erps_mentioned(combined)
|
|
if not erps:
|
|
continue
|
|
|
|
score, kws = score_pain(combined)
|
|
if score < 0.1: # at least 1 pain keyword
|
|
continue
|
|
|
|
feed_matches += 1
|
|
|
|
# Insert one row per ERP mentioned
|
|
for erp_id in erps:
|
|
erp_name = [k for k, v in ERP_MAP.items() if v == erp_id and len(k) > 3]
|
|
erp_name = erp_name[0] if erp_name else erp_id
|
|
try:
|
|
cur.execute("""
|
|
INSERT INTO erp_gap_scans (erp_id, erp_name, query, source_url, title, snippet, confidence_score, keywords)
|
|
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
|
|
ON CONFLICT (erp_id, source_url) DO NOTHING
|
|
""", (
|
|
erp_id, erp_name,
|
|
f"rss_{feed_name}",
|
|
link[:500], title[:500], summary[:1500],
|
|
score,
|
|
kws + ["rss", feed_name]
|
|
))
|
|
if cur.rowcount > 0:
|
|
feed_inserted += 1
|
|
total_inserted += 1
|
|
except Exception as e:
|
|
pass
|
|
|
|
total_matches += 1
|
|
|
|
conn.commit()
|
|
print(f" matches={feed_matches}, inserted={feed_inserted}")
|
|
|
|
cur.close()
|
|
conn.close()
|
|
print(f"\n═══ DONE · feeds={total_feeds} · entries={total_entries} · matches={total_matches} · inserted={total_inserted} ═══")
|
|
return 0
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|