html/top-ia/rgpd_forget.sh

#!/bin/bash
# RGPD Right to be Forgotten: purge data by identifier (email/phone/name)
# Usage: rgpd_forget.sh <identifier> [--dry-run]
ID="$1"
DRYRUN="${2:-}"
[ -z "$ID" ] && { echo '{"error":"need identifier"}'; exit 1; }

# Safety: require at least 5 chars to avoid purging too broad
if [ "${#ID}" -lt 5 ]; then
  echo '{"error":"identifier too short (min 5 chars for safety)"}'
  exit 1
fi

# Audit the request
/opt/weval-ops/top-ia/audit_log.sh "rgpd" "forget_request" "$ID" "{\"dry_run\":\"$DRYRUN\"}" >/dev/null 2>&1

python3 <<PY
import subprocess, json
ident = "$ID"
dry = "$DRYRUN" == "--dry-run"
results = {"identifier": ident, "dry_run": dry, "locations": {}}

# 1. Qdrant wevia_memory_768 (search then delete)
try:
    import urllib.request
    # Embed the identifier
    req = urllib.request.Request("http://localhost:11434/api/embeddings",
        data=json.dumps({"model":"nomic-embed-text","prompt":ident}).encode(),
        headers={"Content-Type":"application/json"})
    emb = json.loads(urllib.request.urlopen(req, timeout=8).read()).get("embedding",[])
    if emb:
        search = json.dumps({"vector":emb,"limit":50,"with_payload":True,"score_threshold":0.4})
        req = urllib.request.Request("http://localhost:6333/collections/wevia_memory_768/points/search",
            data=search.encode(), headers={"Content-Type":"application/json"})
        matches = json.loads(urllib.request.urlopen(req, timeout=8).read()).get("result",[])
        ids = [m["id"] for m in matches if ident.lower() in str(m.get("payload",{})).lower()]
        results["locations"]["qdrant_matches"] = len(ids)
        if ids and not dry:
            delreq = urllib.request.Request("http://localhost:6333/collections/wevia_memory_768/points/delete?wait=true",
                data=json.dumps({"points":ids}).encode(),
                headers={"Content-Type":"application/json"}, method="POST")
            urllib.request.urlopen(delreq, timeout=8).read()
            results["locations"]["qdrant_deleted"] = len(ids)
except Exception as e:
    results["locations"]["qdrant_error"] = str(e)[:100]

# 2. Procedural memory JSONL
try:
    f = "/opt/weval-ops/top-ia/procedural.jsonl"
    lines = open(f).readlines() if __import__('os').path.exists(f) else []
    match = [l for l in lines if ident.lower() in l.lower()]
    results["locations"]["procedural_matches"] = len(match)
    if match and not dry:
        keep = [l for l in lines if ident.lower() not in l.lower()]
        open(f,"w").writelines(keep)
        results["locations"]["procedural_deleted"] = len(match)
except Exception as e:
    results["locations"]["procedural_error"] = str(e)[:100]

# 3. Audit trail (do NOT delete — immutability principle, but mark as tombstone)
# Just report occurrences
try:
    f = "/var/log/weval/audit/audit.jsonl"
    lines = open(f).readlines() if __import__('os').path.exists(f) else []
    occur = sum(1 for l in lines if ident.lower() in l.lower())
    results["locations"]["audit_occurrences"] = occur
    results["locations"]["audit_note"] = "audit trail is immutable by design (RGPD Art.30 balance)"
except Exception as e:
    pass

print(json.dumps(results, ensure_ascii=False))
PY

# Log completion
/opt/weval-ops/top-ia/audit_log.sh "rgpd" "forget_completed" "$ID" "{}" >/dev/null 2>&1