Files
weval-l99/tools/vault-embed.py
2026-04-13 12:43:21 +02:00

73 lines
2.0 KiB
Python

#!/usr/bin/env python3
"""Embed obsidian vault markdown into Qdrant for semantic search"""
import os, json, hashlib, requests
VAULT = "/opt/obsidian-vault"
QDRANT = "http://127.0.0.1:6333"
OLLAMA = "http://127.0.0.1:11435"
COLLECTION = "obsidian_vault"
# Create collection if not exists
try:
r = requests.get(f"{QDRANT}/collections/{COLLECTION}", timeout=5)
if r.status_code != 200:
requests.put(f"{QDRANT}/collections/{COLLECTION}", json={
"vectors": {"size": 384, "distance": "Cosine"}
}, timeout=10)
print(f"Created collection {COLLECTION}")
except Exception as e:
print(f"Qdrant error: {e}")
exit(1)
# Get embedding from Ollama
def embed(text):
try:
r = requests.post(f"{OLLAMA}/api/embed", json={
"model": "all-minilm", "input": text[:2000]
}, timeout=10)
return r.json().get("embeddings", [[]])[0]
except:
return None
# Scan vault
points = []
idx = 0
for root, dirs, files in os.walk(VAULT):
for f in files:
if not f.endswith(".md"):
continue
path = os.path.join(root, f)
rel = path.replace(VAULT + "/", "")
content = open(path).read()
# Skip frontmatter for embedding
if content.startswith("---"):
parts = content.split("---", 2)
if len(parts) >= 3:
content = parts[2].strip()
vec = embed(f"{rel}: {content[:500]}")
if not vec or len(vec) != 384:
continue
idx += 1
points.append({
"id": idx,
"vector": vec,
"payload": {
"file": rel,
"content": content[:500],
"size": len(content),
"source": "obsidian_vault"
}
})
# Upsert to Qdrant
if points:
r = requests.put(f"{QDRANT}/collections/{COLLECTION}/points", json={
"points": points
}, timeout=30)
print(f"Embedded {len(points)} notes into Qdrant ({r.status_code})")
else:
print("No points to embed")