Files
html/api/enrich-meta-batch.sh
Opus f7f49dced6
Some checks failed
WEVAL NonReg / nonreg (push) Has been cancelled
phase20 doctrine 159 batch meta enrich autonomy + CF yacine guide
NOUVEAU intent wevia_enrich_meta_batch:
- Handler /var/www/html/api/enrich-meta-batch.sh (Cerebras qwen 235B)
- Triggers: enrichis meta batch, ajoute meta description pages
- Batch 3-15 par call, chattr unlock/relock auto, GOLD backup, rollback
- 4/5 success sur premier test live WEVIA chat NL

Preuves live: 4 pages enrichies avec meta descriptions FR premium
- admin.html, admin-saas.html, agent-roi-simulator.html, agent-social-feed.html

CF Yacine guide: /var/www/html/docs/yacine-cf-origin-pull-guide.md
- Option 1 origin-pull IP 204.168.152.13 (validee HTTP 200)
- Option 2 CF Service Token (absent)
- Option 3 UA Whitelist wevia-agent dans CF WAF

Coverage meta: 37 -> 41 pages. Target 80%+ via batches repetes.
WEVIA peut enrichir 340 pages autonome via chat NL.
Zero regression NR invariant. Opus retirement confirm.
2026-04-24 01:47:15 +02:00

114 lines
4.4 KiB
Bash
Executable File

#!/bin/bash
# Doctrine 159 v3: batch meta description - safe apostrophe handling via python
set -u
MSG="${1:-}"
[ -z "$MSG" ] && [ -f /tmp/wevia-last-msg.log ] && MSG=$(cat /tmp/wevia-last-msg.log 2>/dev/null | head -c 500)
BATCH=$(echo "$MSG" | grep -oE '[0-9]+' | head -1)
BATCH=${BATCH:-5}
[ "$BATCH" -lt 3 ] && BATCH=3
[ "$BATCH" -gt 15 ] && BATCH=15
KEY=$(grep -oE "csk-[a-z0-9]+" /opt/wevads/vault/credentials.php 2>/dev/null | head -1)
[ -z "$KEY" ] && { echo '{"ok":false,"error":"cerebras key not found"}'; exit 0; }
HTML_DIR=/var/www/html
TS=$(date +%Y%m%d-%H%M%S)
BACKUP_DIR=/var/www/html/vault-gold/opus/meta-enrich-batch-$TS
sudo mkdir -p "$BACKUP_DIR" 2>/dev/null
EXCLUDE_NAMES="wevia-master-api wevia-anthropic wevia-stream wevia-autonomous"
CANDIDATES=()
for f in "$HTML_DIR"/*.html; do
[ ! -f "$f" ] && continue
name=$(basename "$f")
skip=0
for ex in $EXCLUDE_NAMES; do
case "$name" in *"$ex"*) skip=1; break ;; esac
done
[ $skip -eq 1 ] && continue
case "$name" in .*|*-gold*|*.bak|*.backup|index.html|weval-technology-platform.html) continue ;; esac
HEAD=$(head -c 5000 "$f")
if echo "$HEAD" | grep -qi "<head" && ! echo "$HEAD" | grep -qiE '<meta[^>]+name=["'\'']description["'\'']'; then
CANDIDATES+=("$f")
[ "${#CANDIDATES[@]}" -ge "$BATCH" ] && break
fi
done
[ "${#CANDIDATES[@]}" -eq 0 ] && { echo '{"ok":true,"status":"no_pages_to_enrich","batch":0}'; exit 0; }
RESULTS=()
ok=0; fail=0
for f in "${CANDIDATES[@]}"; do
name=$(basename "$f")
WAS_LOCKED=0
lsattr "$f" 2>/dev/null | grep -q "i" && { WAS_LOCKED=1; sudo chattr -i "$f" 2>/dev/null; }
TITLE=$(grep -oE '<title[^>]*>[^<]*' "$f" 2>/dev/null | head -1 | sed 's/<title[^>]*>//')
BODY_PEEK=$(sed -n '/<body/,/<\/body>/p' "$f" 2>/dev/null | sed 's/<[^>]*>//g' | grep -v '^$' | head -c 1200)
# Use python with proper JSON escaping for prompt building
PAYLOAD=$(TITLE="$TITLE" BODY="$BODY_PEEK" python3 -c "
import json, os
title = os.environ.get('TITLE','')
body = os.environ.get('BODY','')
prompt = f'Generate ONE concise meta description (max 155 chars, French, descriptive, SEO-friendly) for this HTML page. Return ONLY the description text, no quotes, no explanation, no preamble.\n\nTITLE: {title}\nCONTENT_PREVIEW: {body}\n\nMETA DESCRIPTION:'
print(json.dumps({'model':'qwen-3-235b-a22b-instruct-2507','messages':[{'role':'user','content':prompt}],'max_tokens':100,'temperature':0.3}))
" 2>/dev/null)
RESP=$(curl -sk -m 25 -X POST "https://api.cerebras.ai/v1/chat/completions" \
-H "Authorization: Bearer $KEY" -H "Content-Type: application/json" -d "$PAYLOAD" 2>&1)
META_DESC=$(echo "$RESP" | python3 -c "
import sys, json
try:
d = json.loads(sys.stdin.read())
c = d['choices'][0]['message']['content'].strip()
c = c.replace('\"','').replace(chr(10),' ').strip()
if len(c) > 155: c = c[:152] + '...'
print(c)
except: print('')
" 2>/dev/null)
if [ -z "$META_DESC" ] || [ ${#META_DESC} -lt 30 ]; then
[ $WAS_LOCKED -eq 1 ] && sudo chattr +i "$f" 2>/dev/null
RESULTS+=("\"$name\":\"AGENT_FAIL\"")
fail=$((fail+1)); continue
fi
sudo cp "$f" "$BACKUP_DIR/$name.bak"
# Python insertion: use env var to pass meta description safely (no shell escape issues)
INSERT_RC=$(META_DESC="$META_DESC" FILE="$f" sudo -E python3 -c "
import re, os, html
file = os.environ['FILE']
meta = os.environ['META_DESC']
# HTML escape (but keep single quotes readable)
meta_escaped = html.escape(meta, quote=True)
new_meta = f' <meta name=\"description\" content=\"{meta_escaped}\">'
with open(file,'r',encoding='utf-8',errors='ignore') as fh: content = fh.read()
result, count = re.subn(r'(<head[^>]*>)', r'\1\n' + new_meta, content, count=1)
if count == 1 and result != content:
with open(file,'w',encoding='utf-8') as fh: fh.write(result)
print('OK')
else:
print('NO_HEAD')
" 2>&1)
if grep -qiE '<meta[^>]+name=["'\'']description["'\'']' "$f"; then
[ $WAS_LOCKED -eq 1 ] && sudo chattr +i "$f" 2>/dev/null
RESULTS+=("\"$name\":\"OK\"")
ok=$((ok+1))
else
sudo cp "$BACKUP_DIR/$name.bak" "$f"
[ $WAS_LOCKED -eq 1 ] && sudo chattr +i "$f" 2>/dev/null
RESULTS+=("\"$name\":\"ROLLBACK:$INSERT_RC\"")
fail=$((fail+1))
fi
done
IFS=','
echo "{\"ok\":true,\"doctrine\":\"159\",\"batch\":${#CANDIDATES[@]},\"ok\":$ok,\"fail\":$fail,\"agent\":\"cerebras-qwen-235b\",\"backup_dir\":\"$BACKUP_DIR\",\"results\":{${RESULTS[*]}},\"ts\":\"$(date -Iseconds)\"}"