]*>([^<]+)<\/title>/i', $content, $m)) { $title = trim(html_entity_decode($m[1])); } if (preg_match('/]+name=["\']description["\'][^>]+content=["\']([^"\']+)["\']/i', $content, $m)) { $desc = trim(html_entity_decode($m[1])); } return [ 'title' => mb_substr($title, 0, 120), 'description' => mb_substr($desc, 0, 200), 'last_modified' => date('Y-m-d', filemtime($path)) ]; } function findThumbMd5($name, $thumbDir, $thumbUrl) { // wem-thumb-worker.py: key = path (name without leading /), hash = md5(key) $hash = md5($name); $thumb = "$thumbDir/$hash.png"; if (file_exists($thumb)) return "$thumbUrl/$hash.png"; return null; } $start = microtime(true); $html_files = glob($HTML_ROOT . '/*.html'); $all_pages = []; $total = 0; $with_thumb = 0; $with_meta = 0; foreach ($html_files as $f) { $name = basename($f); if (strpos($name, '-gold') !== false || strpos($name, '.bak') !== false) continue; $meta = getPageMeta($f); $thumb = findThumbMd5($name, $THUMB_DIR, $THUMB_URL); $page = [ 'name' => $name, 'url' => '/' . $name, 'size_kb' => round(filesize($f) / 1024, 1), 'thumb_url' => $thumb, 'title' => $meta['title'] ?: str_replace(['-', '_', '.html'], [' ', ' ', ''], $name), 'description' => $meta['description'] ?? '', 'last_modified' => $meta['last_modified'] ?? date('Y-m-d', filemtime($f)), ]; $all_pages[] = $page; $total++; if ($thumb) $with_thumb++; if (!empty($meta['description'])) $with_meta++; } // Categorization improved $categories = [ 'Hubs & Centers' => [], 'Dashboards & Monitoring' => [], 'Agents & AI' => [], 'Admin & Ops' => [], 'Business & CRM' => [], 'WEVADS Email Legacy' => [], 'Integration & APIs' => [], 'Proofs & Tests' => [], 'Public & Marketing' => [], 'Autre' => [], ]; foreach ($all_pages as $p) { $n = strtolower($p['name']); if (preg_match('/hub|center\b|registry/', $n)) $categories['Hubs & Centers'][] = $p; elseif (preg_match('/dashboard|monitor|metric|kpi|analytics/', $n)) $categories['Dashboards & Monitoring'][] = $p; elseif (preg_match('/agent|bot|brain|cortex|l99|wevia|claude|ia-|-ia|gpt/', $n)) $categories['Agents & AI'][] = $p; elseif (preg_match('/admin|ops|config|setup|install|deploy/', $n)) $categories['Admin & Ops'][] = $p; elseif (preg_match('/crm|sales|customer|client|lead|opportunity/', $n)) $categories['Business & CRM'][] = $p; elseif (preg_match('/wevads|pmta|email-|-email|smtp|mail-|deliv/', $n)) $categories['WEVADS Email Legacy'][] = $p; elseif (preg_match('/api-|integration|connect|webhook|bridge/', $n)) $categories['Integration & APIs'][] = $p; elseif (preg_match('/proof|test-|-test|playwright|e2e|verify/', $n)) $categories['Proofs & Tests'][] = $p; elseif (preg_match('/landing|home|about|contact|pricing|index|intro/', $n)) $categories['Public & Marketing'][] = $p; else $categories['Autre'][] = $p; } // Sort each category by size desc foreach ($categories as $k => &$v) { usort($v, fn($a, $b) => $b['size_kb'] <=> $a['size_kb']); } unset($v); $elapsed = round((microtime(true) - $start) * 1000, 1); echo json_encode([ 'ok' => true, 'doctrine' => '155', 'ts' => date('c'), 'elapsed_ms' => $elapsed, 'stats' => [ 'total_pages' => $total, 'with_thumb' => $with_thumb, 'with_meta' => $with_meta, 'thumb_coverage_pct' => round($with_thumb / max(1, $total) * 100, 1), 'meta_coverage_pct' => round($with_meta / max(1, $total) * 100, 1), 'categories_count' => count(array_filter($categories)), 'categories_populated' => array_filter($categories, fn($v) => count($v) > 0) ? count(array_filter($categories, fn($v) => count($v) > 0)) : 0, ], 'category_counts' => array_map('count', $categories), 'categories' => $categories, ], JSON_UNESCAPED_SLASHES);