#!/usr/bin/env python3 """Purge GOLD backups: keep max 5 per basename (doctrine 59 no-delete = archive tar)""" import os, re, glob, tarfile, datetime from collections import defaultdict from pathlib import Path VAULT = "/opt/wevads/vault" ARCHIVE_DIR = f"{VAULT}/archived_gold_purge" KEEP = 5 LOG = "/var/log/weval/gold-purge.log" Path(LOG).parent.mkdir(parents=True, exist_ok=True) Path(ARCHIVE_DIR).mkdir(parents=True, exist_ok=True) def log(m): with open(LOG, "a") as f: f.write(f"[{datetime.datetime.now().isoformat()}] {m}\n") print(m) # Group by basename stripping date suffix groups = defaultdict(list) for f in glob.glob(f"{VAULT}/*"): if os.path.isdir(f): continue name = os.path.basename(f) if name.startswith("archived_") or name.startswith("auto-"): continue # Extract basename by stripping date patterns base = re.sub(r"[._-]?(gold|GOLD|20260\d{3}[-_][\d]+|pre[-_].*)[-_.].*$", "", name, count=1) base = re.sub(r"[._-]?\d{14,}$", "", base) # trailing timestamp base = re.sub(r"\.gold-.*$", "", base, flags=re.IGNORECASE) base = re.sub(r"\.GOLD-.*$", "", base) groups[base].append((f, os.path.getmtime(f))) total_archived = 0 saved_bytes = 0 for base, files in groups.items(): if len(files) <= KEEP: continue # Sort by mtime desc (newest first) files.sort(key=lambda x: x[1], reverse=True) to_archive = files[KEEP:] total_archived += len(to_archive) # Tar them together per basename if to_archive: tar_name = f"{ARCHIVE_DIR}/{base.replace('/','_')}-olderthan-top{KEEP}-{datetime.date.today().isoformat()}.tar.gz" try: with tarfile.open(tar_name, "w:gz") as tar: for f, _ in to_archive: try: saved_bytes += os.path.getsize(f) tar.add(f, arcname=os.path.basename(f)) except: pass # Now remove originals for f, _ in to_archive: try: os.remove(f) except: pass except Exception as e: log(f"ARCH_ERR {base}: {e}") log(f"TOTAL_ARCHIVED {total_archived} files, saved_bytes={saved_bytes/1024/1024:.1f}MB") print(f"Archived {total_archived} old GOLDs, freed {saved_bytes/1024/1024:.1f}MB")