207 lines
6.4 KiB
JavaScript
207 lines
6.4 KiB
JavaScript
// WEVAL — ERP Gap Scanner via Playwright (Option B)
|
|
// Scrape G2/TrustRadius/Capterra public review pages for the "Cons" / "Dislikes" sections
|
|
// Usage: node scan-erp-gaps-playwright.js [erp_id|all]
|
|
// Doctrine #5 ON CONFLICT DO NOTHING via unique constraint
|
|
|
|
const { chromium } = require('playwright');
|
|
const { Pool } = require('pg');
|
|
|
|
const DB = {
|
|
host: '10.1.0.3',
|
|
port: 5432,
|
|
database: 'adx_system',
|
|
user: 'admin',
|
|
password: 'admin123',
|
|
connectionTimeoutMillis: 5000,
|
|
};
|
|
|
|
// Known public review URLs per ERP (G2 and TrustRadius only — publicly accessible)
|
|
const ERP_URLS = {
|
|
sap_s4hana: [
|
|
'https://www.trustradius.com/products/sap-s-4hana/reviews',
|
|
],
|
|
sap_b1: [
|
|
'https://www.trustradius.com/products/sap-business-one/reviews',
|
|
],
|
|
oracle_netsuite: [
|
|
'https://www.trustradius.com/products/netsuite-erp/reviews',
|
|
],
|
|
oracle_fusion: [
|
|
'https://www.trustradius.com/products/oracle-fusion-cloud-erp/reviews',
|
|
],
|
|
sage_x3: [
|
|
'https://www.trustradius.com/products/sage-x3/reviews',
|
|
],
|
|
sage_intacct: [
|
|
'https://www.trustradius.com/products/sage-intacct/reviews',
|
|
],
|
|
odoo: [
|
|
'https://www.trustradius.com/products/odoo/reviews',
|
|
],
|
|
ms_d365_fo: [
|
|
'https://www.trustradius.com/products/microsoft-dynamics-365-finance/reviews',
|
|
],
|
|
ms_d365_bc: [
|
|
'https://www.trustradius.com/products/microsoft-dynamics-365-business-central/reviews',
|
|
],
|
|
workday: [
|
|
'https://www.trustradius.com/products/workday-human-capital-management/reviews',
|
|
],
|
|
salesforce: [
|
|
'https://www.trustradius.com/products/salesforce-sales-cloud/reviews',
|
|
],
|
|
infor_m3: [
|
|
'https://www.trustradius.com/products/infor-cloudsuite-industrial/reviews',
|
|
],
|
|
ifs: [
|
|
'https://www.trustradius.com/products/ifs-cloud/reviews',
|
|
],
|
|
epicor: [
|
|
'https://www.trustradius.com/products/epicor-kinetic/reviews',
|
|
],
|
|
acumatica: [
|
|
'https://www.trustradius.com/products/acumatica-cloud-erp/reviews',
|
|
],
|
|
deltek: [
|
|
'https://www.trustradius.com/products/deltek-costpoint/reviews',
|
|
],
|
|
servicenow: [
|
|
'https://www.trustradius.com/products/servicenow/reviews',
|
|
],
|
|
veeva: [
|
|
'https://www.trustradius.com/products/veeva-vault/reviews',
|
|
],
|
|
};
|
|
|
|
const ERP_NAMES = {
|
|
sap_s4hana: 'SAP S/4HANA',
|
|
sap_b1: 'SAP Business One',
|
|
oracle_netsuite: 'Oracle NetSuite',
|
|
oracle_fusion: 'Oracle Fusion Cloud',
|
|
sage_x3: 'Sage X3',
|
|
sage_intacct: 'Sage Intacct',
|
|
odoo: 'Odoo',
|
|
ms_d365_fo: 'Microsoft Dynamics 365 F&O',
|
|
ms_d365_bc: 'Microsoft Dynamics 365 Business Central',
|
|
workday: 'Workday',
|
|
salesforce: 'Salesforce',
|
|
infor_m3: 'Infor M3',
|
|
ifs: 'IFS Cloud',
|
|
epicor: 'Epicor Kinetic',
|
|
acumatica: 'Acumatica Cloud',
|
|
deltek: 'Deltek Costpoint',
|
|
servicenow: 'ServiceNow',
|
|
veeva: 'Veeva Vault',
|
|
};
|
|
|
|
async function scrapeG2(browser, erp_id, url) {
|
|
const context = await browser.newContext({
|
|
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
viewport: { width: 1280, height: 800 },
|
|
});
|
|
const page = await context.newPage();
|
|
const results = [];
|
|
|
|
try {
|
|
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 25000 });
|
|
await page.waitForTimeout(3000);
|
|
|
|
// Extract "Dislikes" / "Cons" sections from reviews
|
|
const extracted = await page.evaluate(() => {
|
|
const out = [];
|
|
// Strategy 1: look for text following "Dislikes" / "What do you dislike" / "Cons"
|
|
const textNodes = Array.from(document.querySelectorAll('p, div[class*="review"], li'));
|
|
for (const el of textNodes) {
|
|
const txt = (el.innerText || '').trim();
|
|
if (!txt || txt.length < 40 || txt.length > 1500) continue;
|
|
// Is this a "dislike" / "con" snippet ?
|
|
const lc = txt.toLowerCase();
|
|
if (
|
|
lc.match(/\b(dislike|drawback|limitation|painpoint|pain point|missing|lacks|difficult|slow|complicated|bug|issue|problem|frustrat|workaround|hard to)\b/i)
|
|
) {
|
|
out.push({
|
|
snippet: txt.substring(0, 1000),
|
|
title: txt.substring(0, 100),
|
|
});
|
|
}
|
|
}
|
|
// Dedupe by first 60 chars
|
|
const seen = new Set();
|
|
return out.filter(x => {
|
|
const k = x.snippet.substring(0, 60);
|
|
if (seen.has(k)) return false;
|
|
seen.add(k);
|
|
return true;
|
|
}).slice(0, 15);
|
|
});
|
|
|
|
for (const r of extracted) {
|
|
results.push({
|
|
erp_id,
|
|
erp_name: ERP_NAMES[erp_id] || erp_id,
|
|
source_url: `${url}#pain-${Date.now()}-${results.length}`,
|
|
title: r.title,
|
|
snippet: r.snippet,
|
|
confidence: 0.65, // Playwright scraped from public G2 reviews — medium-high
|
|
keywords: ['g2_review', 'playwright_scrape'],
|
|
});
|
|
}
|
|
} catch (e) {
|
|
console.error(` [ERR] ${url}: ${e.message}`);
|
|
} finally {
|
|
await page.close();
|
|
await context.close();
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
async function storeResults(pool, results) {
|
|
let inserted = 0;
|
|
for (const r of results) {
|
|
try {
|
|
const res = await pool.query(
|
|
`INSERT INTO erp_gap_scans (erp_id, erp_name, query, source_url, title, snippet, confidence_score, keywords)
|
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
|
|
ON CONFLICT DO NOTHING`,
|
|
[r.erp_id, r.erp_name, 'playwright_g2', r.source_url, r.title, r.snippet, r.confidence, r.keywords]
|
|
);
|
|
if (res.rowCount > 0) inserted++;
|
|
} catch (e) {
|
|
// ignore
|
|
}
|
|
}
|
|
return inserted;
|
|
}
|
|
|
|
(async () => {
|
|
const target = process.argv[2];
|
|
console.log(`═══ SCAN-ERP-GAPS-PLAYWRIGHT · ${new Date().toISOString()} ═══`);
|
|
|
|
const pool = new Pool(DB);
|
|
const browser = await chromium.launch({ headless: true, args: ['--no-sandbox'] });
|
|
|
|
let totalInserted = 0;
|
|
const erpIds = target && ERP_URLS[target] ? [target] : Object.keys(ERP_URLS);
|
|
|
|
for (const erp_id of erpIds) {
|
|
for (const url of ERP_URLS[erp_id]) {
|
|
console.log(`\n━━━ ${erp_id} · ${url}`);
|
|
const results = await scrapeG2(browser, erp_id, url);
|
|
console.log(` → ${results.length} snippets extracted`);
|
|
if (results.length > 0) {
|
|
const ins = await storeResults(pool, results);
|
|
console.log(` → ${ins} inserted`);
|
|
totalInserted += ins;
|
|
}
|
|
// Pace between scrapes
|
|
await new Promise(r => setTimeout(r, 2000));
|
|
}
|
|
}
|
|
|
|
await browser.close();
|
|
await pool.end();
|
|
console.log(`\n═══ DONE · inserted=${totalInserted} ═══`);
|
|
process.exit(0);
|
|
})();
|