Files
html/api/wtp-pages-enriched.php
Opus 23ecdc659d
Some checks failed
WEVAL NonReg / nonreg (push) Has been cancelled
phase16 doctrine 155 api wtp-pages-enriched vraies integrations
NOUVEAU endpoint /api/wtp-pages-enriched.php (zero ecrasement WTP):
- 340 pages categorisees en 10 categories
- 219 avec thumb (64.4% coverage) via md5(path) lookup
- 37 avec meta description extraite
- Titles HTML extraits
- Categories: Hubs/Dashboards/Agents/Admin/Business/WEVADS/APIs/Proofs/Public/Autre

WTP renderAllPages() peut consommer cette API pour cards enrichies.
Modification WTP (chattr +i 5489L) attend validation Yacine ou WEVIA autonomy.

Zero regression. Zero ecrasement. NR invariant.
Doctrine 155 ecrite au vault.
2026-04-24 00:36:59 +02:00

120 lines
4.5 KiB
PHP

<?php
/**
* wtp-pages-enriched.php v2 — Doctrine 155
* Endpoint enrichi pour pages WTP: thumb_url (md5 path), title, description, category
* Thumbs conventions: wem-thumb-worker key = path, hash = md5(path)
*/
header('Content-Type: application/json');
header('Cache-Control: public, max-age=300');
$HTML_ROOT = '/var/www/html';
$THUMB_DIR = $HTML_ROOT . '/api/screenshots/wem';
$THUMB_URL = '/api/screenshots/wem';
function getPageMeta($path) {
if (!file_exists($path)) return null;
$size = min(filesize($path), 50000);
$fp = fopen($path, 'r');
$content = fread($fp, $size);
fclose($fp);
$title = ''; $desc = '';
if (preg_match('/<title[^>]*>([^<]+)<\/title>/i', $content, $m)) {
$title = trim(html_entity_decode($m[1]));
}
if (preg_match('/<meta[^>]+name=["\']description["\'][^>]+content=["\']([^"\']+)["\']/i', $content, $m)) {
$desc = trim(html_entity_decode($m[1]));
}
return [
'title' => mb_substr($title, 0, 120),
'description' => mb_substr($desc, 0, 200),
'last_modified' => date('Y-m-d', filemtime($path))
];
}
function findThumbMd5($name, $thumbDir, $thumbUrl) {
// wem-thumb-worker.py: key = path (name without leading /), hash = md5(key)
$hash = md5($name);
$thumb = "$thumbDir/$hash.png";
if (file_exists($thumb)) return "$thumbUrl/$hash.png";
return null;
}
$start = microtime(true);
$html_files = glob($HTML_ROOT . '/*.html');
$all_pages = [];
$total = 0; $with_thumb = 0; $with_meta = 0;
foreach ($html_files as $f) {
$name = basename($f);
if (strpos($name, '-gold') !== false || strpos($name, '.bak') !== false) continue;
$meta = getPageMeta($f);
$thumb = findThumbMd5($name, $THUMB_DIR, $THUMB_URL);
$page = [
'name' => $name,
'url' => '/' . $name,
'size_kb' => round(filesize($f) / 1024, 1),
'thumb_url' => $thumb,
'title' => $meta['title'] ?: str_replace(['-', '_', '.html'], [' ', ' ', ''], $name),
'description' => $meta['description'] ?? '',
'last_modified' => $meta['last_modified'] ?? date('Y-m-d', filemtime($f)),
];
$all_pages[] = $page;
$total++;
if ($thumb) $with_thumb++;
if (!empty($meta['description'])) $with_meta++;
}
// Categorization improved
$categories = [
'Hubs & Centers' => [],
'Dashboards & Monitoring' => [],
'Agents & AI' => [],
'Admin & Ops' => [],
'Business & CRM' => [],
'WEVADS Email Legacy' => [],
'Integration & APIs' => [],
'Proofs & Tests' => [],
'Public & Marketing' => [],
'Autre' => [],
];
foreach ($all_pages as $p) {
$n = strtolower($p['name']);
if (preg_match('/hub|center\b|registry/', $n)) $categories['Hubs & Centers'][] = $p;
elseif (preg_match('/dashboard|monitor|metric|kpi|analytics/', $n)) $categories['Dashboards & Monitoring'][] = $p;
elseif (preg_match('/agent|bot|brain|cortex|l99|wevia|claude|ia-|-ia|gpt/', $n)) $categories['Agents & AI'][] = $p;
elseif (preg_match('/admin|ops|config|setup|install|deploy/', $n)) $categories['Admin & Ops'][] = $p;
elseif (preg_match('/crm|sales|customer|client|lead|opportunity/', $n)) $categories['Business & CRM'][] = $p;
elseif (preg_match('/wevads|pmta|email-|-email|smtp|mail-|deliv/', $n)) $categories['WEVADS Email Legacy'][] = $p;
elseif (preg_match('/api-|integration|connect|webhook|bridge/', $n)) $categories['Integration & APIs'][] = $p;
elseif (preg_match('/proof|test-|-test|playwright|e2e|verify/', $n)) $categories['Proofs & Tests'][] = $p;
elseif (preg_match('/landing|home|about|contact|pricing|index|intro/', $n)) $categories['Public & Marketing'][] = $p;
else $categories['Autre'][] = $p;
}
// Sort each category by size desc
foreach ($categories as $k => &$v) {
usort($v, fn($a, $b) => $b['size_kb'] <=> $a['size_kb']);
}
unset($v);
$elapsed = round((microtime(true) - $start) * 1000, 1);
echo json_encode([
'ok' => true,
'doctrine' => '155',
'ts' => date('c'),
'elapsed_ms' => $elapsed,
'stats' => [
'total_pages' => $total,
'with_thumb' => $with_thumb,
'with_meta' => $with_meta,
'thumb_coverage_pct' => round($with_thumb / max(1, $total) * 100, 1),
'meta_coverage_pct' => round($with_meta / max(1, $total) * 100, 1),
'categories_count' => count(array_filter($categories)),
'categories_populated' => array_filter($categories, fn($v) => count($v) > 0) ? count(array_filter($categories, fn($v) => count($v) > 0)) : 0,
],
'category_counts' => array_map('count', $categories),
'categories' => $categories,
], JSON_UNESCAPED_SLASHES);