Files
html/api/wevia-pages-registry.php
opus a80a3ffd6e
Some checks failed
WEVAL NonReg / nonreg (push) Has been cancelled
auto-commit via WEVIA vault_git intent 2026-04-19T14:36:24+00:00
2026-04-19 16:36:24 +02:00

207 lines
8.3 KiB
PHP

<?php
/**
* V79 — Pages Registry & Orphans Enricher
*
* Builds a live catalog of all HTML pages, classifies them,
* identifies orphans, exposes via API & chat.
*
* ZERO modification des pages existantes.
* ADDITIVE ONLY: nouvelle source de vérité pages.
*
* Endpoints:
* GET /api/wevia-pages-registry.php?action=summary
* GET /api/wevia-pages-registry.php?action=orphans
* GET /api/wevia-pages-registry.php?action=full
* GET /api/wevia-pages-registry.php?action=by_class&class=hub
* GET /api/wevia-pages-registry.php?action=links_of&page=foo.html
*/
header('Content-Type: application/json');
header('Access-Control-Allow-Origin: *');
$HTML_DIR = '/var/www/html';
$CACHE_FILE = '/tmp/wevia-pages-registry-cache.json';
$CACHE_TTL = 300; // 5 min
function build_registry($dir) {
$pages = [];
$hrefs_by_page = [];
$all_hrefs = [];
// Scan all .html files
$files = glob($dir . '/*.html');
foreach ($files as $file) {
$name = basename($file);
$pages[$name] = [
'name' => $name,
'path' => '/' . $name,
'size_kb' => round(filesize($file) / 1024, 1),
'mtime' => date('c', filemtime($file)),
'class' => classify($name),
'incoming_links' => 0,
'outgoing_links' => 0,
'orphan' => true, // will flip to false if referenced
'title' => extract_title($file),
];
}
// Scan for hrefs
foreach ($files as $file) {
$name = basename($file);
$content = @file_get_contents($file);
if (!$content) continue;
if (preg_match_all('/href=["\']([^"\'#?]+\.html)/', $content, $m)) {
$outgoing = [];
foreach ($m[1] as $target) {
$target_name = basename($target);
if (isset($pages[$target_name])) {
$pages[$target_name]['incoming_links']++;
$pages[$target_name]['orphan'] = false;
$outgoing[] = $target_name;
$all_hrefs[] = [$name, $target_name];
}
}
$pages[$name]['outgoing_links'] = count(array_unique($outgoing));
$hrefs_by_page[$name] = array_values(array_unique($outgoing));
}
}
// index.html is root entry, never orphan
if (isset($pages['index.html'])) $pages['index.html']['orphan'] = false;
$orphans = array_filter($pages, function($p) { return $p['orphan']; });
$top_referenced = $pages;
uasort($top_referenced, function($a, $b) { return $b['incoming_links'] - $a['incoming_links']; });
return [
'ts' => date('c'),
'total_pages' => count($pages),
'orphans_count' => count($orphans),
'referenced_pages' => count($pages) - count($orphans),
'orphans' => array_keys($orphans),
'top_referenced' => array_slice(array_keys($top_referenced), 0, 20),
'by_class' => group_by_class($pages),
'links_count' => count($all_hrefs),
'pages' => $pages,
'hrefs_by_page' => $hrefs_by_page,
];
}
function classify($name) {
if (strpos($name, 'index.html') !== false) return 'entry';
if (strpos($name, 'hub') !== false) return 'hub';
if (strpos($name, 'dashboard') !== false) return 'dashboard';
if (strpos($name, 'archi') !== false || strpos($name, 'enterprise') !== false) return 'architecture';
if (strpos($name, 'chart') !== false || strpos($name, 'visual') !== false || strpos($name, 'cartograph') !== false) return 'visualization';
if (strpos($name, 'admin') !== false) return 'admin';
if (strpos($name, 'wevia') !== false) return 'wevia';
if (strpos($name, 'ethica') !== false) return 'ethica';
if (strpos($name, 'paperclip') !== false) return 'paperclip';
if (strpos($name, 'plan') !== false || strpos($name, 'strateg') !== false) return 'strategy';
if (strpos($name, 'scout') !== false || strpos($name, 'arena') !== false || strpos($name, 'ops') !== false) return 'operations';
if (strpos($name, 'crm') !== false || strpos($name, 'sales') !== false || strpos($name, 'deal') !== false) return 'business';
if (strpos($name, 'test') !== false || strpos($name, 'demo') !== false) return 'test';
if (strpos($name, 'tool') !== false) return 'tools';
if (strpos($name, 'office') !== false) return 'office';
if (strpos($name, 'deerflow') !== false) return 'deerflow';
if (strpos($name, 'security') !== false) return 'security';
if (strpos($name, 'monitoring') !== false || strpos($name, 'monitor') !== false) return 'monitoring';
if (strpos($name, 'agent') !== false) return 'agents';
if (strpos($name, 'api') !== false) return 'api_tools';
return 'module';
}
function group_by_class($pages) {
$groups = [];
foreach ($pages as $p) {
$c = $p['class'];
if (!isset($groups[$c])) $groups[$c] = ['count' => 0, 'orphans' => 0, 'pages' => []];
$groups[$c]['count']++;
if ($p['orphan']) $groups[$c]['orphans']++;
$groups[$c]['pages'][] = $p['name'];
}
return $groups;
}
function extract_title($file) {
$content = @file_get_contents($file, false, null, 0, 2000);
if (!$content) return '';
if (preg_match('/<title>(.*?)<\/title>/is', $content, $m)) {
return trim(html_entity_decode($m[1], ENT_QUOTES));
}
return '';
}
// Cache layer
$action = $_GET['action'] ?? 'summary';
$cached = null;
if (file_exists($CACHE_FILE) && (time() - filemtime($CACHE_FILE)) < $CACHE_TTL) {
$cached = @json_decode(@file_get_contents($CACHE_FILE), true);
}
if ($cached && empty($_GET['rebuild'])) {
$data = $cached;
$data['cached'] = true;
} else {
$data = build_registry($HTML_DIR);
$data['cached'] = false;
@file_put_contents($CACHE_FILE, json_encode($data, JSON_PRETTY_PRINT));
}
// Action routing
switch ($action) {
case 'summary':
echo json_encode([
'ts' => $data['ts'],
'total_pages' => $data['total_pages'],
'orphans_count' => $data['orphans_count'],
'referenced_pages' => $data['referenced_pages'],
'links_count' => $data['links_count'],
'cached' => $data['cached'],
'classes' => array_map(function($c) { return ['count' => $c['count'], 'orphans' => $c['orphans']]; }, $data['by_class']),
'top_referenced' => array_slice($data['top_referenced'], 0, 10),
], JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);
break;
case 'orphans':
$orphan_details = [];
foreach ($data['orphans'] as $o) {
$orphan_details[$o] = [
'class' => $data['pages'][$o]['class'],
'size_kb' => $data['pages'][$o]['size_kb'],
'title' => $data['pages'][$o]['title'],
'mtime' => $data['pages'][$o]['mtime'],
'outgoing_links' => $data['pages'][$o]['outgoing_links'],
];
}
echo json_encode(['count' => count($data['orphans']), 'orphans' => $orphan_details], JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);
break;
case 'by_class':
$cls = $_GET['class'] ?? 'hub';
if (!isset($data['by_class'][$cls])) { echo json_encode(['error'=>'class not found','classes'=>array_keys($data['by_class'])]); break; }
echo json_encode($data['by_class'][$cls], JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);
break;
case 'links_of':
$page = $_GET['page'] ?? '';
if (!$page || !isset($data['pages'][$page])) { echo json_encode(['error'=>'page not found']); break; }
$incoming = [];
foreach ($data['hrefs_by_page'] as $src => $targets) {
if (in_array($page, $targets)) $incoming[] = $src;
}
echo json_encode([
'page' => $page,
'class' => $data['pages'][$page]['class'],
'orphan' => $data['pages'][$page]['orphan'],
'incoming_links' => $data['pages'][$page]['incoming_links'],
'incoming_from' => $incoming,
'outgoing_links' => $data['pages'][$page]['outgoing_links'],
'outgoing_to' => $data['hrefs_by_page'][$page] ?? [],
], JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);
break;
case 'full':
echo json_encode($data, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);
break;
default:
echo json_encode(['error'=>'unknown action','actions'=>['summary','orphans','by_class','links_of','full']]);
}