#!/usr/bin/env python3
import json, urllib.request, sys, os, hashlib

OLLAMA = "http://127.0.0.1:11434/api/embeddings"
QDRANT = "http://127.0.0.1:6333"

def embed(text):
    try:
        body = json.dumps({"model":"nomic-embed-text","prompt":text[:2000]}).encode()
        req = urllib.request.Request(OLLAMA, data=body, headers={"Content-Type":"application/json"})
        return json.loads(urllib.request.urlopen(req, timeout=30).read()).get("embedding")
    except: return None

def qdrant_upsert(col, pid, vec, payload):
    body = json.dumps({"points":[{"id":pid,"vector":vec,"payload":payload}]}).encode()
    req = urllib.request.Request(QDRANT+"/collections/"+col+"/points?wait=true",data=body,headers={"Content-Type":"application/json"},method="PUT")
    return json.loads(urllib.request.urlopen(req,timeout=10).read())

def ensure_col(name, size):
    try: urllib.request.urlopen(QDRANT+"/collections/"+name,timeout=5)
    except:
        body = json.dumps({"vectors":{"size":size,"distance":"Cosine"}}).encode()
        req = urllib.request.Request(QDRANT+"/collections/"+name,data=body,headers={"Content-Type":"application/json"},method="PUT")
        urllib.request.urlopen(req,timeout=10)
        print("Created: "+name)

def run(kb_dir="/opt/wevia-brain/knowledge"):
    files = []
    for root, dirs, fnames in os.walk(kb_dir):
        for f in fnames:
            if f.endswith((".md",".json",".txt")):
                files.append(os.path.join(root,f))
    test = embed("test")
    if not test: print("Ollama embed failed"); return
    ensure_col("wevia_kb", len(test))
    ok = 0
    for fp in files:
        content = open(fp).read()
        if len(content) < 50: continue
        chunks = [content[i:i+1500] for i in range(0,len(content),1200)]
        for ci, chunk in enumerate(chunks):
            vec = embed(chunk)
            if not vec: continue
            pid = int(hashlib.md5((fp+str(ci)).encode()).hexdigest()[:8],16)
            qdrant_upsert("wevia_kb",pid,vec,{"file":os.path.basename(fp),"chunk":ci,"text":chunk[:500]})
            ok += 1
    print("Done: "+str(ok)+" chunks from "+str(len(files))+" files")

if __name__=="__main__": run()