91 lines
3.1 KiB
Python
91 lines
3.1 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Sync SKILL.md files to Qdrant weval_skills collection
|
|
Reads skills from /opt/*, embeds via Ollama, upserts to Qdrant
|
|
"""
|
|
import os, json, hashlib, time
|
|
import urllib.request
|
|
|
|
OLLAMA_URL = "http://127.0.0.1:11434"
|
|
QDRANT_URL = "http://127.0.0.1:6333"
|
|
COLLECTION = "weval_skills"
|
|
|
|
SKILL_DIRS = [
|
|
"/opt/antigravity-awesome-skills",
|
|
"/opt/deer-flow/skills",
|
|
"/opt/paperclip-skills",
|
|
"/opt/mxyhi_ok-skills",
|
|
"/opt/huggingface-skills",
|
|
"/opt/wevia-brain/cognitive",
|
|
"/opt/wevia-brain/knowledge",
|
|
]
|
|
|
|
def embed(text):
|
|
data = json.dumps({"model": "all-minilm", "input": text[:500]}).encode()
|
|
req = urllib.request.Request(f"{OLLAMA_URL}/api/embed", data=data, headers={"Content-Type": "application/json"})
|
|
try:
|
|
resp = urllib.request.urlopen(req, timeout=10)
|
|
d = json.loads(resp.read())
|
|
vecs = d.get("embeddings", [])
|
|
return vecs[0] if vecs else None
|
|
except:
|
|
return None
|
|
|
|
def upsert_point(point_id, vector, payload):
|
|
data = json.dumps({"points": [{"id": point_id, "vector": vector, "payload": payload}]}).encode()
|
|
req = urllib.request.Request(f"{QDRANT_URL}/collections/{COLLECTION}/points", data=data,
|
|
headers={"Content-Type": "application/json"}, method="PUT")
|
|
try:
|
|
urllib.request.urlopen(req, timeout=10)
|
|
return True
|
|
except:
|
|
return False
|
|
|
|
def main():
|
|
synced = 0
|
|
errors = 0
|
|
for skill_dir in SKILL_DIRS:
|
|
if not os.path.exists(skill_dir):
|
|
continue
|
|
for root, dirs, files in os.walk(skill_dir):
|
|
for fname in files:
|
|
if fname not in ("SKILL.md",) and not fname.endswith((".md", ".json")):
|
|
continue
|
|
fpath = os.path.join(root, fname)
|
|
try:
|
|
content = open(fpath).read()[:2000]
|
|
if len(content) < 50:
|
|
continue
|
|
|
|
# Generate stable ID from path
|
|
pid = int(hashlib.md5(fpath.encode()).hexdigest()[:8], 16)
|
|
|
|
vec = embed(content[:500])
|
|
if not vec:
|
|
errors += 1
|
|
continue
|
|
|
|
payload = {
|
|
"file": fpath,
|
|
"source": os.path.basename(skill_dir),
|
|
"content": content[:1000],
|
|
"type": "skill" if fname == "SKILL.md" else "knowledge"
|
|
}
|
|
|
|
if upsert_point(pid, vec, payload):
|
|
synced += 1
|
|
else:
|
|
errors += 1
|
|
|
|
if synced % 50 == 0 and synced > 0:
|
|
print(f" Synced {synced}...")
|
|
time.sleep(0.5)
|
|
|
|
except Exception as e:
|
|
errors += 1
|
|
|
|
print(f"Sync complete: {synced} upserted, {errors} errors")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|