html/top-ia/reflect_loop.sh

#!/bin/bash
Q="$*"
[ -z "$Q" ] && { echo '{"error":"need question"}'; exit 1; }
source /etc/weval/secrets.env 2>/dev/null
export Q NVIDIA_NIM_KEY
python3 <<'PY'
import os, json, urllib.request, re
q = os.environ['Q']
nv_url = "https://integrate.api.nvidia.com/v1/chat/completions"
nv_key = os.environ.get('NVIDIA_NIM_KEY','')

def llm(prompt, max_tok=400):
    try:
        body = json.dumps({"model":"meta/llama-3.1-8b-instruct","messages":[{"role":"user","content":prompt}],"max_tokens":max_tok}).encode()
        req = urllib.request.Request(nv_url, data=body, headers={"Authorization":"Bearer "+nv_key,"Content-Type":"application/json"})
        d = json.loads(urllib.request.urlopen(req, timeout=15).read())
        return d.get('choices',[{}])[0].get('message',{}).get('content','')
    except Exception as e:
        return f"ERR:{str(e)[:60]}"

iterations = []
answer = llm(q, 300)
for i in range(3):  # max 3 iterations
    critique = llm(f"Score this answer 1-10 for accuracy/completeness/clarity. Output JSON ONLY: {{\"score\":N,\"issues\":\"...\"}}\n\nAnswer: {answer[:500]}", 150)
    # Extract score
    m = re.search(r'"score"\s*:\s*(\d+)', critique)
    score = int(m.group(1)) if m else 5
    iterations.append({"iter":i+1,"answer":answer[:300],"score":score,"critique":critique[:200]})
    if score >= 7: break
    # Refine
    answer = llm(f"Original question: {q}\n\nPrevious answer: {answer[:400]}\n\nCritique: {critique[:200]}\n\nWrite IMPROVED answer addressing the critique:", 350)

print(json.dumps({"question":q,"iterations":iterations,"final_answer":answer[:600],"final_score":iterations[-1]["score"] if iterations else 0,"converged":iterations[-1]["score"]>=7 if iterations else False}, ensure_ascii=False))
PY