From 5b90677fcd19644b8d4c8ef581badf3a8d385458 Mon Sep 17 00:00:00 2001 From: opus Date: Sun, 19 Apr 2026 19:55:02 +0200 Subject: [PATCH] auto-sync-1955 --- api/em-kpi-cache.json | 15 ++-- api/scan-erp-gaps.sh | 201 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 208 insertions(+), 8 deletions(-) create mode 100755 api/scan-erp-gaps.sh diff --git a/api/em-kpi-cache.json b/api/em-kpi-cache.json index 90b9f6f96..aada94287 100644 --- a/api/em-kpi-cache.json +++ b/api/em-kpi-cache.json @@ -1,8 +1,7 @@ -{ - "ts": "2026-04-19T20:55:00+00:00", - "leads_per_week": 12, - "mql_per_week": 20, - "mql_month": 20, - "source": "V43 em-kpi-cache aligned with MQL scoring agent V42", - "note": "mql_per_week updated to 20 matching V42 MQL auto scoring 41pct" -} + +500 Internal Server Error + +

500 Internal Server Error

+
nginx/1.24.0 (Ubuntu)
+ + diff --git a/api/scan-erp-gaps.sh b/api/scan-erp-gaps.sh new file mode 100755 index 000000000..37b404057 --- /dev/null +++ b/api/scan-erp-gaps.sh @@ -0,0 +1,201 @@ +#!/usr/bin/env bash +# WEVAL — ERP Gap Scanner via Searxng +# Scans public web sources for ERP pain points / limitations / complaints +# Usage: scan-erp-gaps.sh [erp_id] (no arg = scan all 25 ERPs) +# Sources: G2, TrustRadius, Gartner Peer Insights, Reddit, LinkedIn posts, consulting blogs +set -uo pipefail + +SEARX_URL="http://localhost:8080/search" +PG_HOST="10.1.0.3" +PG_USER="admin" +PG_DB="adx_system" +PG_PASS="admin123" + +psql_exec() { + PGPASSWORD="$PG_PASS" timeout 10 psql -h "$PG_HOST" -U "$PG_USER" -d "$PG_DB" -c "$1" 2>&1 +} + +# Ensure table exists (idempotent, doctrine #5) +psql_exec "CREATE TABLE IF NOT EXISTS erp_gap_scans ( + id BIGSERIAL PRIMARY KEY, + erp_id TEXT NOT NULL, + erp_name TEXT, + query TEXT NOT NULL, + source_url TEXT, + title TEXT, + snippet TEXT, + confidence_score NUMERIC(4,3), + keywords TEXT[], + scanned_at TIMESTAMPTZ DEFAULT NOW() +);" > /dev/null +psql_exec "CREATE INDEX IF NOT EXISTS idx_erp_gap_erp ON erp_gap_scans(erp_id);" > /dev/null +psql_exec "CREATE INDEX IF NOT EXISTS idx_erp_gap_ts ON erp_gap_scans(scanned_at DESC);" > /dev/null +psql_exec "CREATE INDEX IF NOT EXISTS idx_erp_gap_conf ON erp_gap_scans(confidence_score DESC);" > /dev/null + +# ERP catalog (matches wevia-v66 naming) +declare -A ERP_NAMES=( + [sap_s4hana]="SAP S/4HANA" + [sap_b1]="SAP Business One" + [oracle_ebs]="Oracle E-Business Suite" + [oracle_fusion]="Oracle Fusion Cloud" + [oracle_netsuite]="Oracle NetSuite" + [sage_x3]="Sage X3" + [sage_100]="Sage 100" + [sage_intacct]="Sage Intacct" + [odoo]="Odoo" + [ms_d365_fo]="Microsoft Dynamics 365 F&O" + [ms_d365_bc]="Microsoft Dynamics 365 Business Central" + [ms_d365_ce]="Microsoft Dynamics 365 Customer Engagement" + [workday]="Workday" + [salesforce]="Salesforce" + [infor_m3]="Infor M3" + [infor_cs]="Infor CloudSuite" + [ifs]="IFS Cloud" + [epicor]="Epicor Kinetic" + [qad]="QAD Adaptive" + [acumatica]="Acumatica Cloud" + [priority]="Priority Software" + [deltek]="Deltek Costpoint" + [servicenow]="ServiceNow" + [veeva]="Veeva Vault" + [temenos]="Temenos T24" +) + +# Keywords that denote a gap / pain point +GAP_KEYWORDS=("limitation" "pain point" "painpoint" "complaint" "drawback" "issue" "problem" "weakness" "shortcoming" "bottleneck" "missing feature" "difficult to" "struggle" "broken" "frustrating" "slow" "manual" "workaround" "bug" "lacks") + +# Compute confidence from snippet +score_confidence() { + local text="$1" + local text_lc="${text,,}" + local score=0 + local matches=() + for kw in "${GAP_KEYWORDS[@]}"; do + if [[ "$text_lc" == *"$kw"* ]]; then + score=$((score + 1)) + matches+=("$kw") + fi + done + # Normalize 0..1 (max 5 hits = 1.0) + local norm + if [ $score -ge 5 ]; then norm="1.000"; else norm=$(printf '%.3f' "$(echo "scale=3; $score / 5" | bc 2>/dev/null || echo 0)"); fi + echo "$norm|${matches[*]}" +} + +# Escape for SQL +sql_quote() { + local s="$1" + # remove null-bytes, truncate 500 chars, escape single quotes + echo "${s//\'/\'\'}" | tr -d '\0' | head -c 500 +} + +# Per-ERP scan +scan_erp() { + local erp_id="$1" + local erp_name="${ERP_NAMES[$erp_id]:-$erp_id}" + + echo "════════════════════════════════════════" + echo "SCAN: $erp_id ($erp_name)" + echo "════════════════════════════════════════" + + # Multi-query to cover different angles + local queries=( + "\"$erp_name\" pain points 2025" + "\"$erp_name\" limitations complaints" + "\"$erp_name\" review drawbacks g2 trustradius" + "\"$erp_name\" problems workaround forum" + ) + + local total_inserted=0 + + for q in "${queries[@]}"; do + local q_enc + q_enc=$(python3 -c "import urllib.parse,sys; print(urllib.parse.quote(sys.argv[1]))" "$q") + + # Query searxng via docker + local raw + raw=$(docker exec searxng wget -qO- --timeout=15 "http://localhost:8080/search?q=${q_enc}&format=json&safesearch=0" 2>/dev/null) + + if [ -z "$raw" ] || [ "$raw" = "" ]; then + echo " [WARN] empty result for: $q" + continue + fi + + # Parse + score + insert (top 5 results) + local ins + ins=$(echo "$raw" | python3 -c " +import json, sys, re +try: + d = json.load(sys.stdin) +except: + print('0'); sys.exit() +results = d.get('results', [])[:5] +out = [] +for r in results: + url = r.get('url','').replace(\"'\", \"''\")[:500] + title = r.get('title','').replace(\"'\", \"''\")[:500] + content = r.get('content','').replace(\"'\", \"''\")[:1500] + if not url: continue + out.append(f'{url}|||{title}|||{content}') +print('\n'.join(out)) +") + + if [ -z "$ins" ]; then + echo " [WARN] no parseable results for: $q" + continue + fi + + while IFS= read -r line; do + [ -z "$line" ] && continue + local url title snippet + url=$(echo "$line" | awk -F '\\|\\|\\|' '{print $1}') + title=$(echo "$line" | awk -F '\\|\\|\\|' '{print $2}') + snippet=$(echo "$line" | awk -F '\\|\\|\\|' '{print $3}') + + local scored + scored=$(score_confidence "$title $snippet") + local conf="${scored%%|*}" + local kws="${scored#*|}" + + # Only insert if confidence >= 0.2 (at least 1 keyword match) + if (( $(echo "$conf >= 0.2" | bc -l 2>/dev/null || echo 0) )); then + local kws_arr + kws_arr=$(echo "$kws" | sed 's/\\|/ /g' | tr ' ' '\n' | sed "s/^/'/" | sed "s/$/'/" | paste -sd, -) + + psql_exec "INSERT INTO erp_gap_scans (erp_id, erp_name, query, source_url, title, snippet, confidence_score, keywords) VALUES ( + '$(sql_quote "$erp_id")', + '$(sql_quote "$erp_name")', + '$(sql_quote "$q")', + '$(sql_quote "$url")', + '$(sql_quote "$title")', + '$(sql_quote "$snippet")', + $conf, + ARRAY[${kws_arr:-''}]::TEXT[] + ) ON CONFLICT DO NOTHING;" > /dev/null + total_inserted=$((total_inserted + 1)) + fi + done <<< "$ins" + + # Be gentle with searxng + sleep 0.5 + done + + echo " → $total_inserted new gap candidates stored" +} + +# Main +if [ $# -eq 0 ]; then + # Scan all ERPs + echo "SCAN_ALL_STARTED $(date -Iseconds)" + for erp_id in "${!ERP_NAMES[@]}"; do + scan_erp "$erp_id" + done + echo "" + echo "═══════════════════════════════════════" + echo "SUMMARY" + echo "═══════════════════════════════════════" + psql_exec "SELECT erp_id, COUNT(*) gaps_stored, AVG(confidence_score)::NUMERIC(4,3) avg_conf, MAX(scanned_at) last_scan FROM erp_gap_scans GROUP BY erp_id ORDER BY erp_id;" + echo "SCAN_ALL_DONE $(date -Iseconds)" +else + scan_erp "$1" +fi