76 lines
3.2 KiB
Bash
Executable File
76 lines
3.2 KiB
Bash
Executable File
#!/bin/bash
|
|
# RGPD Right to be Forgotten: purge data by identifier (email/phone/name)
|
|
# Usage: rgpd_forget.sh <identifier> [--dry-run]
|
|
ID="$1"
|
|
DRYRUN="${2:-}"
|
|
[ -z "$ID" ] && { echo '{"error":"need identifier"}'; exit 1; }
|
|
|
|
# Safety: require at least 5 chars to avoid purging too broad
|
|
if [ "${#ID}" -lt 5 ]; then
|
|
echo '{"error":"identifier too short (min 5 chars for safety)"}'
|
|
exit 1
|
|
fi
|
|
|
|
# Audit the request
|
|
/opt/weval-ops/top-ia/audit_log.sh "rgpd" "forget_request" "$ID" "{\"dry_run\":\"$DRYRUN\"}" >/dev/null 2>&1
|
|
|
|
python3 <<PY
|
|
import subprocess, json
|
|
ident = "$ID"
|
|
dry = "$DRYRUN" == "--dry-run"
|
|
results = {"identifier": ident, "dry_run": dry, "locations": {}}
|
|
|
|
# 1. Qdrant wevia_memory_768 (search then delete)
|
|
try:
|
|
import urllib.request
|
|
# Embed the identifier
|
|
req = urllib.request.Request("http://localhost:11434/api/embeddings",
|
|
data=json.dumps({"model":"nomic-embed-text","prompt":ident}).encode(),
|
|
headers={"Content-Type":"application/json"})
|
|
emb = json.loads(urllib.request.urlopen(req, timeout=8).read()).get("embedding",[])
|
|
if emb:
|
|
search = json.dumps({"vector":emb,"limit":50,"with_payload":True,"score_threshold":0.4})
|
|
req = urllib.request.Request("http://localhost:6333/collections/wevia_memory_768/points/search",
|
|
data=search.encode(), headers={"Content-Type":"application/json"})
|
|
matches = json.loads(urllib.request.urlopen(req, timeout=8).read()).get("result",[])
|
|
ids = [m["id"] for m in matches if ident.lower() in str(m.get("payload",{})).lower()]
|
|
results["locations"]["qdrant_matches"] = len(ids)
|
|
if ids and not dry:
|
|
delreq = urllib.request.Request("http://localhost:6333/collections/wevia_memory_768/points/delete?wait=true",
|
|
data=json.dumps({"points":ids}).encode(),
|
|
headers={"Content-Type":"application/json"}, method="POST")
|
|
urllib.request.urlopen(delreq, timeout=8).read()
|
|
results["locations"]["qdrant_deleted"] = len(ids)
|
|
except Exception as e:
|
|
results["locations"]["qdrant_error"] = str(e)[:100]
|
|
|
|
# 2. Procedural memory JSONL
|
|
try:
|
|
f = "/opt/weval-ops/top-ia/procedural.jsonl"
|
|
lines = open(f).readlines() if __import__('os').path.exists(f) else []
|
|
match = [l for l in lines if ident.lower() in l.lower()]
|
|
results["locations"]["procedural_matches"] = len(match)
|
|
if match and not dry:
|
|
keep = [l for l in lines if ident.lower() not in l.lower()]
|
|
open(f,"w").writelines(keep)
|
|
results["locations"]["procedural_deleted"] = len(match)
|
|
except Exception as e:
|
|
results["locations"]["procedural_error"] = str(e)[:100]
|
|
|
|
# 3. Audit trail (do NOT delete — immutability principle, but mark as tombstone)
|
|
# Just report occurrences
|
|
try:
|
|
f = "/var/log/weval/audit/audit.jsonl"
|
|
lines = open(f).readlines() if __import__('os').path.exists(f) else []
|
|
occur = sum(1 for l in lines if ident.lower() in l.lower())
|
|
results["locations"]["audit_occurrences"] = occur
|
|
results["locations"]["audit_note"] = "audit trail is immutable by design (RGPD Art.30 balance)"
|
|
except Exception as e:
|
|
pass
|
|
|
|
print(json.dumps(results, ensure_ascii=False))
|
|
PY
|
|
|
|
# Log completion
|
|
/opt/weval-ops/top-ia/audit_log.sh "rgpd" "forget_completed" "$ID" "{}" >/dev/null 2>&1
|