wevads-gpu/nonreg-framework.sh

#!/usr/bin/env bash
set -euo pipefail

# -------------------------------------------------------------------
# WEVADS / WEVIA anti-regression framework
# Safe by design: read-only HTTP checks, no infra mutation.
# -------------------------------------------------------------------

BASE_URL="${BASE_URL:-https://weval-consulting.com}"
TRACKING_BASE_URL="${TRACKING_BASE_URL:-http://151.80.235.110}"
TRACKING_DOMAIN_URL="${TRACKING_DOMAIN_URL:-https://culturellemejean.charity}"
API_KEY="${API_KEY:-}"
GPU_MODEL="${GPU_MODEL:-qwen2.5:3b}"
MAX_GREETING_SECONDS="${MAX_GREETING_SECONDS:-3}"
MAX_DEEP_SECONDS="${MAX_DEEP_SECONDS:-60}"
STRICT_CONFIDENTIALITY="${STRICT_CONFIDENTIALITY:-0}"

REPORT_DIR="${REPORT_DIR:-./reports}"
RUN_ID="$(date +%Y%m%d_%H%M%S)"
REPORT_FILE="${REPORT_DIR}/nonreg_${RUN_ID}.md"

mkdir -p "${REPORT_DIR}"

PASS_COUNT=0
FAIL_COUNT=0
WARN_COUNT=0

declare -a FAILURES
declare -a WARNINGS

log() { printf '%s\n' "$*"; }

record_pass() {
  PASS_COUNT=$((PASS_COUNT + 1))
  log "PASS | $1"
}

record_fail() {
  FAIL_COUNT=$((FAIL_COUNT + 1))
  FAILURES+=("$1")
  log "FAIL | $1"
}

record_warn() {
  WARN_COUNT=$((WARN_COUNT + 1))
  WARNINGS+=("$1")
  log "WARN | $1"
}

http_status() {
  local url="$1"
  curl -sS -L -o /tmp/nonreg_body_${RUN_ID}.tmp -w "%{http_code} %{time_total}" --max-time 120 "$url"
}

check_status_200() {
  local name="$1"
  local url="$2"
  local out code t
  out="$(http_status "$url" || true)"
  code="$(awk '{print $1}' <<<"$out")"
  t="$(awk '{print $2}' <<<"$out")"
  if [[ "$code" == "200" ]]; then
    record_pass "${name} (${url}) code=${code} t=${t}s"
  else
    record_fail "${name} (${url}) expected 200 got ${code:-N/A} t=${t:-N/A}s"
  fi
}

check_not_confidential_terms() {
  local url="$1"
  local body
  body="$(curl -sS -L --max-time 60 "$url" || true)"
  if [[ -z "$body" ]]; then
    record_fail "Confidentiality scan cannot fetch ${url}"
    return
  fi

  if rg -n -i "McKinsey|PwC|Deloitte|OpenAI|Anthropic|Abbott|AbbVie|J&J|CX3|DoubleM|89\\.167\\.40\\.150|88\\.198\\.4\\.195|\\b646\\b|\\b604\\b" <<<"$body" >/dev/null; then
    if [[ "$STRICT_CONFIDENTIALITY" == "1" ]]; then
      record_fail "Confidentiality terms detected in ${url}"
    else
      record_warn "Confidentiality terms detected in ${url} (strict mode disabled)"
    fi
  else
    record_pass "Confidentiality scan clean for ${url}"
  fi
}

check_content_quality() {
  local name="$1"
  local url="$2"
  local out

  out="$(python3 - "$url" <<'PY'
import re
import sys
import requests

url = sys.argv[1]
issues = []

try:
    resp = requests.get(url, timeout=45)
    # Force UTF-8 decode to avoid false mojibake from missing charset headers.
    html = resp.content.decode("utf-8", errors="replace")
except Exception as e:
    print(f"ERR\tfetch_failed:{e}")
    sys.exit(0)

if re.search(r'd00e[0-9a-fA-F]{1,3}', html):
    issues.append("encoding_hex_artifact")
if "<22>" in html:
    issues.append("replacement_char")
if re.search(r'Ã.|Â.', html):
    issues.append("utf8_mojibake")
if re.search(r'[\U0001F300-\U0001FAFF]', html):
    issues.append("emoji_found")

# Known FR issues to prevent regressions on product pages.
for bad in [
    "plan de d00e9ploiement",
    "ce que nos concurrents ne peuvent pas faire",
    "0 couverture international",
    "proximite international",
]:
    if bad in html.lower():
        issues.append(f"copy_issue:{bad}")

if issues:
    print("ISSUES\t" + "|".join(sorted(set(issues))))
else:
    print("OK")
PY
)"

  if [[ "$out" == "OK" ]]; then
    record_pass "Content quality clean ${name} (${url})"
  elif [[ "$out" == ERR* ]]; then
    record_fail "Content quality check failed ${name} (${url}) ${out#ERR	}"
  else
    record_fail "Content quality issues ${name} (${url}) ${out#ISSUES	}"
  fi
}

check_wevia_greeting() {
  local out code t
  out="$(curl -sS -o /tmp/nonreg_wevia_${RUN_ID}.json -w "%{http_code} %{time_total}" \
    --max-time 60 \
    -H "Content-Type: application/json" \
    -d '{"message":"Bonjour","mode":"fast"}' \
    "${BASE_URL}/api/weval-ia" || true)"
  code="$(awk '{print $1}' <<<"$out")"
  t="$(awk '{print $2}' <<<"$out")"
  if [[ "$code" != "200" ]]; then
    record_fail "WEVIA greeting expected 200 got ${code:-N/A}"
    return
  fi
  if awk "BEGIN {exit !($t < $MAX_GREETING_SECONDS)}"; then
    record_pass "WEVIA greeting latency ${t}s < ${MAX_GREETING_SECONDS}s"
  else
    record_fail "WEVIA greeting latency ${t}s >= ${MAX_GREETING_SECONDS}s"
  fi
}

check_wevia_deep() {
  local out code t
  out="$(curl -sS -o /tmp/nonreg_wevia_full_${RUN_ID}.json -w "%{http_code} %{time_total}" \
    --max-time 120 \
    -H "Content-Type: application/json" \
    -d '{"message":"Fais une analyse concise supply chain internationale.","mode":"deep"}' \
    "${BASE_URL}/api/weval-ia-full" || true)"
  code="$(awk '{print $1}' <<<"$out")"
  t="$(awk '{print $2}' <<<"$out")"
  if [[ "$code" != "200" ]]; then
    record_fail "WEVIA deep expected 200 got ${code:-N/A}"
    return
  fi
  if awk "BEGIN {exit !($t < $MAX_DEEP_SECONDS)}"; then
    record_pass "WEVIA deep latency ${t}s < ${MAX_DEEP_SECONDS}s"
  else
    record_fail "WEVIA deep latency ${t}s >= ${MAX_DEEP_SECONDS}s"
  fi
}

check_gpu_chat() {
  if [[ -z "$API_KEY" ]]; then
    record_warn "GPU chat check skipped (API_KEY not set)"
    return
  fi
  local payload out code
  payload="$(printf '{"model":"%s","messages":[{"role":"user","content":"Donne 3 points pour optimiser une campagne email."}]}' "$GPU_MODEL")"
  out="$(curl -sS -o /tmp/nonreg_gpu_${RUN_ID}.json -w "%{http_code}" \
    --max-time 120 \
    -H "Content-Type: application/json" \
    -H "X-API-Key: ${API_KEY}" \
    -d "$payload" \
    "${BASE_URL}/api/gpu/chat.php" || true)"
  code="$out"
  if [[ "$code" == "200" ]]; then
    if rg -n -i "Model not available" /tmp/nonreg_gpu_${RUN_ID}.json >/dev/null; then
      record_fail "GPU chat returned model-not-available despite HTTP 200"
    else
      record_pass "GPU chat functional (model=${GPU_MODEL})"
    fi
  else
    record_fail "GPU chat expected 200 got ${code:-N/A}"
  fi
}

check_tracking_smoke() {
  local out1 out2 c1 c2
  out1="$(curl -sS -o /dev/null -w "%{http_code}" --max-time 30 "${TRACKING_BASE_URL}" || true)"
  out2="$(curl -sS -o /dev/null -w "%{http_code}" --max-time 30 "${TRACKING_DOMAIN_URL}" || true)"
  c1="$out1"
  c2="$out2"

  if [[ "$c1" =~ ^(200|301|302)$ ]]; then
    record_pass "Tracking base reachable (${TRACKING_BASE_URL}) code=${c1}"
  else
    record_fail "Tracking base unreachable (${TRACKING_BASE_URL}) code=${c1:-N/A}"
  fi

  if [[ "$c2" =~ ^(200|301|302)$ ]]; then
    record_pass "Tracking domain reachable (${TRACKING_DOMAIN_URL}) code=${c2}"
  else
    record_fail "Tracking domain unreachable (${TRACKING_DOMAIN_URL}) code=${c2:-N/A}"
  fi
}

check_deliverscore_smoke() {
  local out code t
  if [[ -n "$API_KEY" ]]; then
    out="$(curl -sS -o /tmp/nonreg_deliver_${RUN_ID}.json -w "%{http_code} %{time_total}" \
      --max-time 120 \
      "${BASE_URL}/api/deliverscore/scan.php?domain=gmail.com&api_key=${API_KEY}" || true)"
  else
    out="$(curl -sS -o /tmp/nonreg_deliver_${RUN_ID}.json -w "%{http_code} %{time_total}" \
      --max-time 120 \
      "${BASE_URL}/api/deliverscore/scan.php?domain=gmail.com" || true)"
  fi
  code="$(awk '{print $1}' <<<"$out")"
  t="$(awk '{print $2}' <<<"$out")"
  if [[ "$code" == "200" ]]; then
    record_pass "DeliverScore smoke code=${code} t=${t}s"
  elif [[ "$code" == "429" ]]; then
    record_warn "DeliverScore rate-limited code=429 t=${t}s"
  elif [[ "$code" =~ ^(401|403)$ ]]; then
    record_warn "DeliverScore auth required code=${code} (provide API_KEY for strict test)"
  else
    record_fail "DeliverScore smoke unexpected code=${code:-N/A} t=${t:-N/A}s"
  fi
}

main() {
  log "=== NON-REG FRAMEWORK START (${RUN_ID}) ==="
  log "BASE_URL=${BASE_URL}"
  log "TRACKING_BASE_URL=${TRACKING_BASE_URL}"
  log "TRACKING_DOMAIN_URL=${TRACKING_DOMAIN_URL}"

  # Core pages
  check_status_200 "Home" "${BASE_URL}/"
  check_status_200 "Products hub" "${BASE_URL}/products/"
  check_status_200 "WEVIA page" "${BASE_URL}/wevia"
  check_status_200 "Platform" "${BASE_URL}/platform/"

  # Products (27 pages from the original exhaustive list)
  declare -a product_pages=(
    "academy"
    "affiliates"
    "arsenal"
    "blueprintai"
    "cloud-providers"
    "content-factory"
    "dashboard"
    "deliverads"
    "deliverscore"
    "formbuilder"
    "gpu-inference"
    "index"
    "leadforge"
    "mailforge"
    "mailwarm"
    "medreach"
    "outreachai"
    "proposalai"
    "sentinel"
    "services"
    "signup"
    "storeforge"
    "wevads"
    "wevads-ia"
    "wevia-enterprise"
    "wevia-whitelabel"
    "workspace"
  )
  for page in "${product_pages[@]}"; do
    check_status_200 "Product ${page}" "${BASE_URL}/products/${page}.html"
  done

  # Confidentiality scans on strategic pages
  check_not_confidential_terms "${BASE_URL}/"
  check_not_confidential_terms "${BASE_URL}/products/"
  check_not_confidential_terms "${BASE_URL}/products/wevads-ia.html"
  check_not_confidential_terms "${BASE_URL}/products/workspace.html"

  # Copy/encoding quality checks (FR text + no emoji regressions).
  check_content_quality "Products hub" "${BASE_URL}/products/"
  check_content_quality "Academy" "${BASE_URL}/products/academy.html"
  check_content_quality "Workspace" "${BASE_URL}/products/workspace.html"
  check_content_quality "WEVADS IA" "${BASE_URL}/products/wevads-ia.html"
  check_content_quality "DeliverScore" "${BASE_URL}/products/deliverscore.html"

  # WEVIA performance checks
  check_wevia_greeting
  check_wevia_deep

  # SaaS API checks (smoke)
  check_deliverscore_smoke
  check_status_200 "MedReach smoke" "${BASE_URL}/api/medreach/search.php?specialty=cardiologue&country=FR&limit=3"
  check_gpu_chat
  check_tracking_smoke

  # Sentinel / Arsenal API (added from left branch)
  check_status_200 "Sentinel API" "http://89.167.40.150:5890/api/sentinel-brain.php"  # expecting 200 even if response is JSON
  check_status_200 "Consent wevup" "http://consent.wevup.app"

  {
    echo "# Rapport anti-regression ${RUN_ID}"
    echo
    echo "- Base URL: ${BASE_URL}"
    echo "- Tracking base: ${TRACKING_BASE_URL}"
    echo "- Tracking domain: ${TRACKING_DOMAIN_URL}"
    echo
    echo "## Resume"
    echo
    echo "- PASS: ${PASS_COUNT}"
    echo "- WARN: ${WARN_COUNT}"
    echo "- FAIL: ${FAIL_COUNT}"
    echo
    if (( WARN_COUNT > 0 )); then
      echo "## Warnings"
      printf -- "- %s\n" "${WARNINGS[@]}"
      echo
    fi
    if (( FAIL_COUNT > 0 )); then
      echo "## Failures"
      printf -- "- %s\n" "${FAILURES[@]}"
      echo
    fi
    echo "## Verdict"
    if (( FAIL_COUNT == 0 )); then
      echo "GO (no hard regression detected)."
    else
      echo "NO-GO (${FAIL_COUNT} hard failures)."
    fi
  } > "${REPORT_FILE}"

  log "Report written: ${REPORT_FILE}"
  log "=== NON-REG FRAMEWORK END (${RUN_ID}) ==="

  if (( FAIL_COUNT > 0 )); then
    exit 1
  fi
}

main "$@"