#!/usr/bin/env python3 import json, urllib.request, sys, os, hashlib OLLAMA = "http://127.0.0.1:11434/api/embeddings" QDRANT = "http://127.0.0.1:6333" def embed(text): try: body = json.dumps({"model":"nomic-embed-text","prompt":text[:2000]}).encode() req = urllib.request.Request(OLLAMA, data=body, headers={"Content-Type":"application/json"}) return json.loads(urllib.request.urlopen(req, timeout=30).read()).get("embedding") except: return None def qdrant_upsert(col, pid, vec, payload): body = json.dumps({"points":[{"id":pid,"vector":vec,"payload":payload}]}).encode() req = urllib.request.Request(QDRANT+"/collections/"+col+"/points?wait=true",data=body,headers={"Content-Type":"application/json"},method="PUT") return json.loads(urllib.request.urlopen(req,timeout=10).read()) def ensure_col(name, size): try: urllib.request.urlopen(QDRANT+"/collections/"+name,timeout=5) except: body = json.dumps({"vectors":{"size":size,"distance":"Cosine"}}).encode() req = urllib.request.Request(QDRANT+"/collections/"+name,data=body,headers={"Content-Type":"application/json"},method="PUT") urllib.request.urlopen(req,timeout=10) print("Created: "+name) def run(kb_dir="/opt/wevia-brain/knowledge"): files = [] for root, dirs, fnames in os.walk(kb_dir): for f in fnames: if f.endswith((".md",".json",".txt")): files.append(os.path.join(root,f)) test = embed("test") if not test: print("Ollama embed failed"); return ensure_col("wevia_kb", len(test)) ok = 0 for fp in files: content = open(fp).read() if len(content) < 50: continue chunks = [content[i:i+1500] for i in range(0,len(content),1200)] for ci, chunk in enumerate(chunks): vec = embed(chunk) if not vec: continue pid = int(hashlib.md5((fp+str(ci)).encode()).hexdigest()[:8],16) qdrant_upsert("wevia_kb",pid,vec,{"file":os.path.basename(fp),"chunk":ci,"text":chunk[:500]}) ok += 1 print("Done: "+str(ok)+" chunks from "+str(len(files))+" files") if __name__=="__main__": run()