61 lines
2.0 KiB
Python
61 lines
2.0 KiB
Python
|
|
import os, json, hashlib, urllib.request, time, sys
|
|
|
|
OLLAMA = "http://127.0.0.1:11434"
|
|
QDRANT = "http://127.0.0.1:6333"
|
|
COLLECTION = "weval_skills"
|
|
BATCH = 50
|
|
|
|
def embed(text):
|
|
data = json.dumps({"model":"all-minilm","input":text[:500]}).encode()
|
|
req = urllib.request.Request(f"{OLLAMA}/api/embed", data=data, headers={"Content-Type":"application/json"})
|
|
resp = urllib.request.urlopen(req, timeout=15)
|
|
d = json.loads(resp.read())
|
|
return d.get("embeddings",[[]])[0]
|
|
|
|
def upsert(pid, vec, payload):
|
|
data = json.dumps({"points":[{"id":pid,"vector":vec,"payload":payload}]}).encode()
|
|
req = urllib.request.Request(f"{QDRANT}/collections/{COLLECTION}/points", data, headers={"Content-Type":"application/json"}, method="PUT")
|
|
urllib.request.urlopen(req, timeout=10)
|
|
|
|
# Collect skills
|
|
skills = []
|
|
for root, dirs, files in os.walk("/opt"):
|
|
for f in files:
|
|
if f == "SKILL.md":
|
|
path = os.path.join(root, f)
|
|
try:
|
|
content = open(path).read()[:500]
|
|
if len(content) > 30:
|
|
name = os.path.basename(os.path.dirname(path))
|
|
skills.append((path, name, content))
|
|
except: pass
|
|
|
|
print(f"Found {len(skills)} SKILL.md files")
|
|
|
|
# Batch embed + upsert
|
|
synced = 0
|
|
errors = 0
|
|
for i in range(0, min(len(skills), 200), 1): # Limit to 200 for speed
|
|
path, name, content = skills[i]
|
|
try:
|
|
vec = embed(content)
|
|
if vec:
|
|
pid = int(hashlib.md5(path.encode()).hexdigest()[:8], 16)
|
|
upsert(pid, vec, {"path": path, "name": name, "content": content[:200]})
|
|
synced += 1
|
|
except Exception as e:
|
|
errors += 1
|
|
if synced % 25 == 0 and synced > 0:
|
|
print(f" Synced {synced}...")
|
|
|
|
print(f"DONE: {synced} synced, {errors} errors")
|
|
|
|
# Check count
|
|
try:
|
|
r = urllib.request.urlopen(f"{QDRANT}/collections/{COLLECTION}", timeout=5)
|
|
d = json.loads(r.read())
|
|
print(f"Qdrant now: {d.get('result',{}).get('points_count',0)} vectors")
|
|
except Exception as e:
|
|
print(f"Check error: {e}")
|