35 lines
1.7 KiB
Bash
Executable File
35 lines
1.7 KiB
Bash
Executable File
#!/bin/bash
|
|
Q="$*"
|
|
[ -z "$Q" ] && { echo '{"error":"need question"}'; exit 1; }
|
|
source /etc/weval/secrets.env 2>/dev/null
|
|
export Q NVIDIA_NIM_KEY
|
|
python3 <<'PY'
|
|
import os, json, urllib.request, re
|
|
q = os.environ['Q']
|
|
nv_url = "https://integrate.api.nvidia.com/v1/chat/completions"
|
|
nv_key = os.environ.get('NVIDIA_NIM_KEY','')
|
|
|
|
def llm(prompt, max_tok=400):
|
|
try:
|
|
body = json.dumps({"model":"meta/llama-3.1-8b-instruct","messages":[{"role":"user","content":prompt}],"max_tokens":max_tok}).encode()
|
|
req = urllib.request.Request(nv_url, data=body, headers={"Authorization":"Bearer "+nv_key,"Content-Type":"application/json"})
|
|
d = json.loads(urllib.request.urlopen(req, timeout=15).read())
|
|
return d.get('choices',[{}])[0].get('message',{}).get('content','')
|
|
except Exception as e:
|
|
return f"ERR:{str(e)[:60]}"
|
|
|
|
iterations = []
|
|
answer = llm(q, 300)
|
|
for i in range(3): # max 3 iterations
|
|
critique = llm(f"Score this answer 1-10 for accuracy/completeness/clarity. Output JSON ONLY: {{\"score\":N,\"issues\":\"...\"}}\n\nAnswer: {answer[:500]}", 150)
|
|
# Extract score
|
|
m = re.search(r'"score"\s*:\s*(\d+)', critique)
|
|
score = int(m.group(1)) if m else 5
|
|
iterations.append({"iter":i+1,"answer":answer[:300],"score":score,"critique":critique[:200]})
|
|
if score >= 7: break
|
|
# Refine
|
|
answer = llm(f"Original question: {q}\n\nPrevious answer: {answer[:400]}\n\nCritique: {critique[:200]}\n\nWrite IMPROVED answer addressing the critique:", 350)
|
|
|
|
print(json.dumps({"question":q,"iterations":iterations,"final_answer":answer[:600],"final_score":iterations[-1]["score"] if iterations else 0,"converged":iterations[-1]["score"]>=7 if iterations else False}, ensure_ascii=False))
|
|
PY
|