207 lines
8.3 KiB
PHP
207 lines
8.3 KiB
PHP
<?php
|
|
/**
|
|
* V79 — Pages Registry & Orphans Enricher
|
|
*
|
|
* Builds a live catalog of all HTML pages, classifies them,
|
|
* identifies orphans, exposes via API & chat.
|
|
*
|
|
* ZERO modification des pages existantes.
|
|
* ADDITIVE ONLY: nouvelle source de vérité pages.
|
|
*
|
|
* Endpoints:
|
|
* GET /api/wevia-pages-registry.php?action=summary
|
|
* GET /api/wevia-pages-registry.php?action=orphans
|
|
* GET /api/wevia-pages-registry.php?action=full
|
|
* GET /api/wevia-pages-registry.php?action=by_class&class=hub
|
|
* GET /api/wevia-pages-registry.php?action=links_of&page=foo.html
|
|
*/
|
|
|
|
header('Content-Type: application/json');
|
|
header('Access-Control-Allow-Origin: *');
|
|
|
|
$HTML_DIR = '/var/www/html';
|
|
$CACHE_FILE = '/tmp/wevia-pages-registry-cache.json';
|
|
$CACHE_TTL = 300; // 5 min
|
|
|
|
function build_registry($dir) {
|
|
$pages = [];
|
|
$hrefs_by_page = [];
|
|
$all_hrefs = [];
|
|
|
|
// Scan all .html files
|
|
$files = glob($dir . '/*.html');
|
|
foreach ($files as $file) {
|
|
$name = basename($file);
|
|
$pages[$name] = [
|
|
'name' => $name,
|
|
'path' => '/' . $name,
|
|
'size_kb' => round(filesize($file) / 1024, 1),
|
|
'mtime' => date('c', filemtime($file)),
|
|
'class' => classify($name),
|
|
'incoming_links' => 0,
|
|
'outgoing_links' => 0,
|
|
'orphan' => true, // will flip to false if referenced
|
|
'title' => extract_title($file),
|
|
];
|
|
}
|
|
|
|
// Scan for hrefs
|
|
foreach ($files as $file) {
|
|
$name = basename($file);
|
|
$content = @file_get_contents($file);
|
|
if (!$content) continue;
|
|
if (preg_match_all('/href=["\']([^"\'#?]+\.html)/', $content, $m)) {
|
|
$outgoing = [];
|
|
foreach ($m[1] as $target) {
|
|
$target_name = basename($target);
|
|
if (isset($pages[$target_name])) {
|
|
$pages[$target_name]['incoming_links']++;
|
|
$pages[$target_name]['orphan'] = false;
|
|
$outgoing[] = $target_name;
|
|
$all_hrefs[] = [$name, $target_name];
|
|
}
|
|
}
|
|
$pages[$name]['outgoing_links'] = count(array_unique($outgoing));
|
|
$hrefs_by_page[$name] = array_values(array_unique($outgoing));
|
|
}
|
|
}
|
|
|
|
// index.html is root entry, never orphan
|
|
if (isset($pages['index.html'])) $pages['index.html']['orphan'] = false;
|
|
|
|
$orphans = array_filter($pages, function($p) { return $p['orphan']; });
|
|
$top_referenced = $pages;
|
|
uasort($top_referenced, function($a, $b) { return $b['incoming_links'] - $a['incoming_links']; });
|
|
|
|
return [
|
|
'ts' => date('c'),
|
|
'total_pages' => count($pages),
|
|
'orphans_count' => count($orphans),
|
|
'referenced_pages' => count($pages) - count($orphans),
|
|
'orphans' => array_keys($orphans),
|
|
'top_referenced' => array_slice(array_keys($top_referenced), 0, 20),
|
|
'by_class' => group_by_class($pages),
|
|
'links_count' => count($all_hrefs),
|
|
'pages' => $pages,
|
|
'hrefs_by_page' => $hrefs_by_page,
|
|
];
|
|
}
|
|
|
|
function classify($name) {
|
|
if (strpos($name, 'index.html') !== false) return 'entry';
|
|
if (strpos($name, 'hub') !== false) return 'hub';
|
|
if (strpos($name, 'dashboard') !== false) return 'dashboard';
|
|
if (strpos($name, 'archi') !== false || strpos($name, 'enterprise') !== false) return 'architecture';
|
|
if (strpos($name, 'chart') !== false || strpos($name, 'visual') !== false || strpos($name, 'cartograph') !== false) return 'visualization';
|
|
if (strpos($name, 'admin') !== false) return 'admin';
|
|
if (strpos($name, 'wevia') !== false) return 'wevia';
|
|
if (strpos($name, 'ethica') !== false) return 'ethica';
|
|
if (strpos($name, 'paperclip') !== false) return 'paperclip';
|
|
if (strpos($name, 'plan') !== false || strpos($name, 'strateg') !== false) return 'strategy';
|
|
if (strpos($name, 'scout') !== false || strpos($name, 'arena') !== false || strpos($name, 'ops') !== false) return 'operations';
|
|
if (strpos($name, 'crm') !== false || strpos($name, 'sales') !== false || strpos($name, 'deal') !== false) return 'business';
|
|
if (strpos($name, 'test') !== false || strpos($name, 'demo') !== false) return 'test';
|
|
if (strpos($name, 'tool') !== false) return 'tools';
|
|
if (strpos($name, 'office') !== false) return 'office';
|
|
if (strpos($name, 'deerflow') !== false) return 'deerflow';
|
|
if (strpos($name, 'security') !== false) return 'security';
|
|
if (strpos($name, 'monitoring') !== false || strpos($name, 'monitor') !== false) return 'monitoring';
|
|
if (strpos($name, 'agent') !== false) return 'agents';
|
|
if (strpos($name, 'api') !== false) return 'api_tools';
|
|
return 'module';
|
|
}
|
|
|
|
function group_by_class($pages) {
|
|
$groups = [];
|
|
foreach ($pages as $p) {
|
|
$c = $p['class'];
|
|
if (!isset($groups[$c])) $groups[$c] = ['count' => 0, 'orphans' => 0, 'pages' => []];
|
|
$groups[$c]['count']++;
|
|
if ($p['orphan']) $groups[$c]['orphans']++;
|
|
$groups[$c]['pages'][] = $p['name'];
|
|
}
|
|
return $groups;
|
|
}
|
|
|
|
function extract_title($file) {
|
|
$content = @file_get_contents($file, false, null, 0, 2000);
|
|
if (!$content) return '';
|
|
if (preg_match('/<title>(.*?)<\/title>/is', $content, $m)) {
|
|
return trim(html_entity_decode($m[1], ENT_QUOTES));
|
|
}
|
|
return '';
|
|
}
|
|
|
|
// Cache layer
|
|
$action = $_GET['action'] ?? 'summary';
|
|
$cached = null;
|
|
|
|
if (file_exists($CACHE_FILE) && (time() - filemtime($CACHE_FILE)) < $CACHE_TTL) {
|
|
$cached = @json_decode(@file_get_contents($CACHE_FILE), true);
|
|
}
|
|
|
|
if ($cached && empty($_GET['rebuild'])) {
|
|
$data = $cached;
|
|
$data['cached'] = true;
|
|
} else {
|
|
$data = build_registry($HTML_DIR);
|
|
$data['cached'] = false;
|
|
@file_put_contents($CACHE_FILE, json_encode($data, JSON_PRETTY_PRINT));
|
|
}
|
|
|
|
// Action routing
|
|
switch ($action) {
|
|
case 'summary':
|
|
echo json_encode([
|
|
'ts' => $data['ts'],
|
|
'total_pages' => $data['total_pages'],
|
|
'orphans_count' => $data['orphans_count'],
|
|
'referenced_pages' => $data['referenced_pages'],
|
|
'links_count' => $data['links_count'],
|
|
'cached' => $data['cached'],
|
|
'classes' => array_map(function($c) { return ['count' => $c['count'], 'orphans' => $c['orphans']]; }, $data['by_class']),
|
|
'top_referenced' => array_slice($data['top_referenced'], 0, 10),
|
|
], JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);
|
|
break;
|
|
case 'orphans':
|
|
$orphan_details = [];
|
|
foreach ($data['orphans'] as $o) {
|
|
$orphan_details[$o] = [
|
|
'class' => $data['pages'][$o]['class'],
|
|
'size_kb' => $data['pages'][$o]['size_kb'],
|
|
'title' => $data['pages'][$o]['title'],
|
|
'mtime' => $data['pages'][$o]['mtime'],
|
|
'outgoing_links' => $data['pages'][$o]['outgoing_links'],
|
|
];
|
|
}
|
|
echo json_encode(['count' => count($data['orphans']), 'orphans' => $orphan_details], JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);
|
|
break;
|
|
case 'by_class':
|
|
$cls = $_GET['class'] ?? 'hub';
|
|
if (!isset($data['by_class'][$cls])) { echo json_encode(['error'=>'class not found','classes'=>array_keys($data['by_class'])]); break; }
|
|
echo json_encode($data['by_class'][$cls], JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);
|
|
break;
|
|
case 'links_of':
|
|
$page = $_GET['page'] ?? '';
|
|
if (!$page || !isset($data['pages'][$page])) { echo json_encode(['error'=>'page not found']); break; }
|
|
$incoming = [];
|
|
foreach ($data['hrefs_by_page'] as $src => $targets) {
|
|
if (in_array($page, $targets)) $incoming[] = $src;
|
|
}
|
|
echo json_encode([
|
|
'page' => $page,
|
|
'class' => $data['pages'][$page]['class'],
|
|
'orphan' => $data['pages'][$page]['orphan'],
|
|
'incoming_links' => $data['pages'][$page]['incoming_links'],
|
|
'incoming_from' => $incoming,
|
|
'outgoing_links' => $data['pages'][$page]['outgoing_links'],
|
|
'outgoing_to' => $data['hrefs_by_page'][$page] ?? [],
|
|
], JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);
|
|
break;
|
|
case 'full':
|
|
echo json_encode($data, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);
|
|
break;
|
|
default:
|
|
echo json_encode(['error'=>'unknown action','actions'=>['summary','orphans','by_class','links_of','full']]);
|
|
}
|