html/top-ia/cot_tree.sh

#!/bin/bash
Q="$*"
[ -z "$Q" ] && { echo '{"error":"need question"}'; exit 1; }
source /etc/weval/secrets.env 2>/dev/null
export Q NVIDIA_NIM_KEY GEMINI_KEY HF_TOKEN
python3 <<'PY'
import os, json, urllib.request
q = os.environ['Q']
def ask(url, key, model, system, q, max_tok=200):
    try:
        body = json.dumps({"model":model,"messages":[{"role":"system","content":system},{"role":"user","content":q}],"max_tokens":max_tok}).encode()
        req = urllib.request.Request(url, data=body, headers={"Authorization":"Bearer "+key,"Content-Type":"application/json"})
        d = json.loads(urllib.request.urlopen(req, timeout=15).read())
        return d.get('choices',[{}])[0].get('message',{}).get('content','')[:500]
    except Exception as e:
        return f"ERR: {str(e)[:60]}"

nv_url = "https://integrate.api.nvidia.com/v1/chat/completions"
nv_key = os.environ.get('NVIDIA_NIM_KEY','')
# 3 branches (analytical, creative, practical)
b1 = ask(nv_url, nv_key, "meta/llama-3.1-8b-instruct", "You are analytical: decompose logically step-by-step.", q)
b2 = ask(nv_url, nv_key, "meta/llama-3.1-8b-instruct", "You are creative: find unexpected angles.", q)
b3 = ask(nv_url, nv_key, "meta/llama-3.1-8b-instruct", "You are practical: focus on actionable, concrete steps.", q)
# Select best via another pass
sel_prompt = f"QUESTION: {q}\n\nANALYTICAL:\n{b1}\n\nCREATIVE:\n{b2}\n\nPRACTICAL:\n{b3}\n\nSynthesize the BEST answer combining strengths of all 3 (max 150 words):"
final = ask(nv_url, nv_key, "meta/llama-3.1-8b-instruct", "You synthesize multiple perspectives into best answer.", sel_prompt, 250)
print(json.dumps({"question":q,"branches":{"analytical":b1,"creative":b2,"practical":b3},"synthesis":final},ensure_ascii=False))
PY