Files
wevads-platform/wevia-vectorize.py

28 lines
1.1 KiB
Python
Executable File

#!/usr/bin/env python3
import psycopg2, json, urllib.request, sys
DB = "host=localhost dbname=adx_system user=admin password=admin123"
GPU = "http://88.198.4.195:11434/api/embeddings"
def embed(text):
try:
body = json.dumps({"model":"nomic-embed-text","prompt":text[:2000]}).encode()
req = urllib.request.Request(GPU, data=body, headers={"Content-Type":"application/json"})
resp = urllib.request.urlopen(req, timeout=30)
return json.loads(resp.read()).get("embedding")
except: return None
conn = psycopg2.connect(DB)
cur = conn.cursor()
cur.execute("SELECT id, content FROM admin.wevia_memory WHERE embedding IS NULL AND content IS NOT NULL AND length(content)>20 ORDER BY created_at DESC LIMIT 50")
rows = cur.fetchall()
if not rows: print("Nothing to vectorize"); sys.exit(0)
ok = 0
for rid, content in rows:
emb = embed(content)
if emb:
cur.execute("UPDATE admin.wevia_memory SET embedding=%s::vector, updated_at=NOW() WHERE id=%s", (str(emb), rid))
ok += 1
if ok % 10 == 0: conn.commit()
conn.commit()
cur.close()
conn.close()
print(f"Vectorized {ok}/{len(rows)}")