html/api/ambre-tool-docx.php

<?php
/**
 * ambre-tool-docx.php — Premium Word document generation
 * Input: JSON {topic: "..."}
 * Output: JSON {ok:true, url:"/files/xxx.docx", title, sections, size}
 *
 * Pipeline:
 * 1. Call sovereign LLM cascade to generate structured JSON content
 * 2. Python python-docx renders professional .docx with heading styles, TOC, tables
 * 3. Upload to /files/ returns public URL
 */
header('Content-Type: application/json');
if ($_SERVER['REQUEST_METHOD'] !== 'POST') {
    echo json_encode(['ok'=>false, 'error'=>'POST only']); exit;
}

$input = json_decode(file_get_contents('php://input'), true);
$topic = trim($input['topic'] ?? '');
if (strlen($topic) < 3) {
    echo json_encode(['ok'=>false, 'error'=>'topic too short']); exit;
}
$topic = substr($topic, 0, 500);

// Step 1: Generate content via sovereign LLM
$prompt = "Genere un document Word professionnel structure sur: \"$topic\"\n\n"
  . "Retourne UNIQUEMENT du JSON valide (sans markdown code fence) avec:\n"
  . "{\n"
  . "  \"title\": \"Titre principal\",\n"
  . "  \"subtitle\": \"Sous-titre\",\n"
  . "  \"author\": \"WEVAL Consulting\",\n"
  . "  \"executive_summary\": \"Paragraphe de synthese de 4-6 phrases\",\n"
  . "  \"sections\": [\n"
  . "    {\n"
  . "      \"heading\": \"1. Titre section\",\n"
  . "      \"paragraphs\": [\"Paragraphe 1...\", \"Paragraphe 2...\"],\n"
  . "      \"bullets\": [\"Point cle 1\", \"Point cle 2\"],\n"
  . "      \"table\": {\"headers\":[\"Col1\",\"Col2\"], \"rows\":[[\"v1\",\"v2\"]]}\n"
  . "    }\n"
  . "  ],\n"
  . "  \"conclusion\": \"Paragraphe de conclusion\"\n"
  . "}\n\n"
  . "IMPORTANT:\n"
  . "- 5 a 7 sections completes\n"
  . "- Chaque section a 2-3 paragraphes detailes (60-120 mots chacun)\n"
  . "- 3-5 bullets par section quand pertinent\n"
  . "- Ajouter une table dans au moins 2 sections\n"
  . "- Francais professionnel sans accents probematiques\n"
  . "- Pas d'info confidentielle WEVAL, generique et factuelle\n"
  . "- JSON valide uniquement, aucun texte avant ou apres";

// Use sovereign cascade
$ch = curl_init('http://127.0.0.1:4000/v1/chat/completions');
curl_setopt_array($ch, [
    CURLOPT_POST => true,
    CURLOPT_RETURNTRANSFER => true,
    CURLOPT_POSTFIELDS => json_encode([
        'model' => 'auto',
        'messages' => [['role'=>'user', 'content'=>$prompt]],
        'max_tokens' => 4000,
        'temperature' => 0.7
    ]),
    CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
    CURLOPT_TIMEOUT => 90,
]);
$resp = curl_exec($ch);
$http = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);

if ($http !== 200) {
    echo json_encode(['ok'=>false, 'error'=>"LLM HTTP $http"]); exit;
}

$data = json_decode($resp, true);
$content_raw = $data['choices'][0]['message']['content'] ?? '';
// Extract JSON from markdown fences if any
/* BALANCED_JSON_V2 */
if (preg_match('/```(?:json)?\s*\n?(.*?)\n?```/s', $content_raw, $m)) {
    $content_raw = $m[1];
}
$_jstart = strpos($content_raw, '{');
if ($_jstart !== false) {
    $_depth = 0; $_jend = -1;
    for ($_i = $_jstart; $_i < strlen($content_raw); $_i++) {
        if ($content_raw[$_i] === '{') $_depth++;
        elseif ($content_raw[$_i] === '}') { $_depth--; if ($_depth === 0) { $_jend = $_i; break; } }
    }
    if ($_jend > $_jstart) $content_raw = substr($content_raw, $_jstart, $_jend - $_jstart + 1);
}
$doc = json_decode($content_raw, true);
if (!$doc || !isset($doc['title'])) {
    echo json_encode(['ok'=>false, 'error'=>'LLM returned invalid JSON', 'raw'=>substr($content_raw,0,500)]);
    exit;
}

// Step 2: Python docx generation
$tmpjson = tempnam('/tmp', 'docx_') . '.json';
file_put_contents($tmpjson, json_encode($doc));

$filename = 'weval-' . substr(md5($topic . microtime(true)), 0, 10) . '.docx';
$outpath = '/var/www/html/files/' . $filename;
if (!is_dir('/var/www/html/files')) { mkdir('/var/www/html/files', 0755, true); }

$pyScript = '/var/www/html/api/ambre-tool-docx-render.py';

$cmd = "python3 " . escapeshellarg($pyScript) . " " . escapeshellarg($tmpjson) . " " . escapeshellarg($outpath) . " 2>&1";
$out = shell_exec($cmd);
@unlink($tmpjson);

if (!file_exists($outpath)) {
    echo json_encode(['ok'=>false, 'error'=>'docx render failed', 'py_out'=>substr($out, 0, 500)]);
    exit;
}

$size = filesize($outpath);
$n_sections = count($doc['sections'] ?? []);

echo json_encode([
    'ok' => true,
    'url' => '/files/' . $filename,
    'title' => $doc['title'],
    'sections' => $n_sections,
    'size' => $size,
    'size_kb' => round($size/1024, 1),
]);