28 lines
1.1 KiB
Python
Executable File
28 lines
1.1 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
import psycopg2, json, urllib.request, sys
|
|
DB = "host=localhost dbname=adx_system user=admin password=admin123"
|
|
GPU = "http://88.198.4.195:11434/api/embeddings"
|
|
def embed(text):
|
|
try:
|
|
body = json.dumps({"model":"nomic-embed-text","prompt":text[:2000]}).encode()
|
|
req = urllib.request.Request(GPU, data=body, headers={"Content-Type":"application/json"})
|
|
resp = urllib.request.urlopen(req, timeout=30)
|
|
return json.loads(resp.read()).get("embedding")
|
|
except: return None
|
|
conn = psycopg2.connect(DB)
|
|
cur = conn.cursor()
|
|
cur.execute("SELECT id, content FROM admin.wevia_memory WHERE embedding IS NULL AND content IS NOT NULL AND length(content)>20 ORDER BY created_at DESC LIMIT 50")
|
|
rows = cur.fetchall()
|
|
if not rows: print("Nothing to vectorize"); sys.exit(0)
|
|
ok = 0
|
|
for rid, content in rows:
|
|
emb = embed(content)
|
|
if emb:
|
|
cur.execute("UPDATE admin.wevia_memory SET embedding=%s::vector, updated_at=NOW() WHERE id=%s", (str(emb), rid))
|
|
ok += 1
|
|
if ok % 10 == 0: conn.commit()
|
|
conn.commit()
|
|
cur.close()
|
|
conn.close()
|
|
print(f"Vectorized {ok}/{len(rows)}")
|