Some checks failed
WEVAL NonReg / nonreg (push) Has been cancelled
NOUVEAU intent wevia_enrich_meta_batch: - Handler /var/www/html/api/enrich-meta-batch.sh (Cerebras qwen 235B) - Triggers: enrichis meta batch, ajoute meta description pages - Batch 3-15 par call, chattr unlock/relock auto, GOLD backup, rollback - 4/5 success sur premier test live WEVIA chat NL Preuves live: 4 pages enrichies avec meta descriptions FR premium - admin.html, admin-saas.html, agent-roi-simulator.html, agent-social-feed.html CF Yacine guide: /var/www/html/docs/yacine-cf-origin-pull-guide.md - Option 1 origin-pull IP 204.168.152.13 (validee HTTP 200) - Option 2 CF Service Token (absent) - Option 3 UA Whitelist wevia-agent dans CF WAF Coverage meta: 37 -> 41 pages. Target 80%+ via batches repetes. WEVIA peut enrichir 340 pages autonome via chat NL. Zero regression NR invariant. Opus retirement confirm.
114 lines
4.4 KiB
Bash
Executable File
114 lines
4.4 KiB
Bash
Executable File
#!/bin/bash
|
|
# Doctrine 159 v3: batch meta description - safe apostrophe handling via python
|
|
set -u
|
|
MSG="${1:-}"
|
|
[ -z "$MSG" ] && [ -f /tmp/wevia-last-msg.log ] && MSG=$(cat /tmp/wevia-last-msg.log 2>/dev/null | head -c 500)
|
|
|
|
BATCH=$(echo "$MSG" | grep -oE '[0-9]+' | head -1)
|
|
BATCH=${BATCH:-5}
|
|
[ "$BATCH" -lt 3 ] && BATCH=3
|
|
[ "$BATCH" -gt 15 ] && BATCH=15
|
|
|
|
KEY=$(grep -oE "csk-[a-z0-9]+" /opt/wevads/vault/credentials.php 2>/dev/null | head -1)
|
|
[ -z "$KEY" ] && { echo '{"ok":false,"error":"cerebras key not found"}'; exit 0; }
|
|
|
|
HTML_DIR=/var/www/html
|
|
TS=$(date +%Y%m%d-%H%M%S)
|
|
BACKUP_DIR=/var/www/html/vault-gold/opus/meta-enrich-batch-$TS
|
|
sudo mkdir -p "$BACKUP_DIR" 2>/dev/null
|
|
|
|
EXCLUDE_NAMES="wevia-master-api wevia-anthropic wevia-stream wevia-autonomous"
|
|
|
|
CANDIDATES=()
|
|
for f in "$HTML_DIR"/*.html; do
|
|
[ ! -f "$f" ] && continue
|
|
name=$(basename "$f")
|
|
skip=0
|
|
for ex in $EXCLUDE_NAMES; do
|
|
case "$name" in *"$ex"*) skip=1; break ;; esac
|
|
done
|
|
[ $skip -eq 1 ] && continue
|
|
case "$name" in .*|*-gold*|*.bak|*.backup|index.html|weval-technology-platform.html) continue ;; esac
|
|
|
|
HEAD=$(head -c 5000 "$f")
|
|
if echo "$HEAD" | grep -qi "<head" && ! echo "$HEAD" | grep -qiE '<meta[^>]+name=["'\'']description["'\'']'; then
|
|
CANDIDATES+=("$f")
|
|
[ "${#CANDIDATES[@]}" -ge "$BATCH" ] && break
|
|
fi
|
|
done
|
|
|
|
[ "${#CANDIDATES[@]}" -eq 0 ] && { echo '{"ok":true,"status":"no_pages_to_enrich","batch":0}'; exit 0; }
|
|
|
|
RESULTS=()
|
|
ok=0; fail=0
|
|
for f in "${CANDIDATES[@]}"; do
|
|
name=$(basename "$f")
|
|
WAS_LOCKED=0
|
|
lsattr "$f" 2>/dev/null | grep -q "i" && { WAS_LOCKED=1; sudo chattr -i "$f" 2>/dev/null; }
|
|
|
|
TITLE=$(grep -oE '<title[^>]*>[^<]*' "$f" 2>/dev/null | head -1 | sed 's/<title[^>]*>//')
|
|
BODY_PEEK=$(sed -n '/<body/,/<\/body>/p' "$f" 2>/dev/null | sed 's/<[^>]*>//g' | grep -v '^$' | head -c 1200)
|
|
|
|
# Use python with proper JSON escaping for prompt building
|
|
PAYLOAD=$(TITLE="$TITLE" BODY="$BODY_PEEK" python3 -c "
|
|
import json, os
|
|
title = os.environ.get('TITLE','')
|
|
body = os.environ.get('BODY','')
|
|
prompt = f'Generate ONE concise meta description (max 155 chars, French, descriptive, SEO-friendly) for this HTML page. Return ONLY the description text, no quotes, no explanation, no preamble.\n\nTITLE: {title}\nCONTENT_PREVIEW: {body}\n\nMETA DESCRIPTION:'
|
|
print(json.dumps({'model':'qwen-3-235b-a22b-instruct-2507','messages':[{'role':'user','content':prompt}],'max_tokens':100,'temperature':0.3}))
|
|
" 2>/dev/null)
|
|
|
|
RESP=$(curl -sk -m 25 -X POST "https://api.cerebras.ai/v1/chat/completions" \
|
|
-H "Authorization: Bearer $KEY" -H "Content-Type: application/json" -d "$PAYLOAD" 2>&1)
|
|
|
|
META_DESC=$(echo "$RESP" | python3 -c "
|
|
import sys, json
|
|
try:
|
|
d = json.loads(sys.stdin.read())
|
|
c = d['choices'][0]['message']['content'].strip()
|
|
c = c.replace('\"','').replace(chr(10),' ').strip()
|
|
if len(c) > 155: c = c[:152] + '...'
|
|
print(c)
|
|
except: print('')
|
|
" 2>/dev/null)
|
|
|
|
if [ -z "$META_DESC" ] || [ ${#META_DESC} -lt 30 ]; then
|
|
[ $WAS_LOCKED -eq 1 ] && sudo chattr +i "$f" 2>/dev/null
|
|
RESULTS+=("\"$name\":\"AGENT_FAIL\"")
|
|
fail=$((fail+1)); continue
|
|
fi
|
|
|
|
sudo cp "$f" "$BACKUP_DIR/$name.bak"
|
|
|
|
# Python insertion: use env var to pass meta description safely (no shell escape issues)
|
|
INSERT_RC=$(META_DESC="$META_DESC" FILE="$f" sudo -E python3 -c "
|
|
import re, os, html
|
|
file = os.environ['FILE']
|
|
meta = os.environ['META_DESC']
|
|
# HTML escape (but keep single quotes readable)
|
|
meta_escaped = html.escape(meta, quote=True)
|
|
new_meta = f' <meta name=\"description\" content=\"{meta_escaped}\">'
|
|
with open(file,'r',encoding='utf-8',errors='ignore') as fh: content = fh.read()
|
|
result, count = re.subn(r'(<head[^>]*>)', r'\1\n' + new_meta, content, count=1)
|
|
if count == 1 and result != content:
|
|
with open(file,'w',encoding='utf-8') as fh: fh.write(result)
|
|
print('OK')
|
|
else:
|
|
print('NO_HEAD')
|
|
" 2>&1)
|
|
|
|
if grep -qiE '<meta[^>]+name=["'\'']description["'\'']' "$f"; then
|
|
[ $WAS_LOCKED -eq 1 ] && sudo chattr +i "$f" 2>/dev/null
|
|
RESULTS+=("\"$name\":\"OK\"")
|
|
ok=$((ok+1))
|
|
else
|
|
sudo cp "$BACKUP_DIR/$name.bak" "$f"
|
|
[ $WAS_LOCKED -eq 1 ] && sudo chattr +i "$f" 2>/dev/null
|
|
RESULTS+=("\"$name\":\"ROLLBACK:$INSERT_RC\"")
|
|
fail=$((fail+1))
|
|
fi
|
|
done
|
|
|
|
IFS=','
|
|
echo "{\"ok\":true,\"doctrine\":\"159\",\"batch\":${#CANDIDATES[@]},\"ok\":$ok,\"fail\":$fail,\"agent\":\"cerebras-qwen-235b\",\"backup_dir\":\"$BACKUP_DIR\",\"results\":{${RESULTS[*]}},\"ts\":\"$(date -Iseconds)\"}"
|