Files
weval-l99/qdrant-mini-sync.py

61 lines
2.0 KiB
Python

import os, json, hashlib, urllib.request, time, sys
OLLAMA = "http://127.0.0.1:11434"
QDRANT = "http://127.0.0.1:6333"
COLLECTION = "weval_skills"
BATCH = 50
def embed(text):
data = json.dumps({"model":"all-minilm","input":text[:500]}).encode()
req = urllib.request.Request(f"{OLLAMA}/api/embed", data=data, headers={"Content-Type":"application/json"})
resp = urllib.request.urlopen(req, timeout=15)
d = json.loads(resp.read())
return d.get("embeddings",[[]])[0]
def upsert(pid, vec, payload):
data = json.dumps({"points":[{"id":pid,"vector":vec,"payload":payload}]}).encode()
req = urllib.request.Request(f"{QDRANT}/collections/{COLLECTION}/points", data, headers={"Content-Type":"application/json"}, method="PUT")
urllib.request.urlopen(req, timeout=10)
# Collect skills
skills = []
for root, dirs, files in os.walk("/opt"):
for f in files:
if f == "SKILL.md":
path = os.path.join(root, f)
try:
content = open(path).read()[:500]
if len(content) > 30:
name = os.path.basename(os.path.dirname(path))
skills.append((path, name, content))
except: pass
print(f"Found {len(skills)} SKILL.md files")
# Batch embed + upsert
synced = 0
errors = 0
for i in range(0, min(len(skills), 200), 1): # Limit to 200 for speed
path, name, content = skills[i]
try:
vec = embed(content)
if vec:
pid = int(hashlib.md5(path.encode()).hexdigest()[:8], 16)
upsert(pid, vec, {"path": path, "name": name, "content": content[:200]})
synced += 1
except Exception as e:
errors += 1
if synced % 25 == 0 and synced > 0:
print(f" Synced {synced}...")
print(f"DONE: {synced} synced, {errors} errors")
# Check count
try:
r = urllib.request.urlopen(f"{QDRANT}/collections/{COLLECTION}", timeout=5)
d = json.loads(r.read())
print(f"Qdrant now: {d.get('result',{}).get('points_count',0)} vectors")
except Exception as e:
print(f"Check error: {e}")