Files
html/api/opus5-orphans-classifier.php

147 lines
6.3 KiB
PHP

<?php
// OPUS5 — Orphans Classifier (doctrine 91)
// Lit /api/wevia-pages-registry.php?action=orphans et classifie :
// - LEGITIMATE_ARCHIVE : 404.html, test-*, v2/v3/legacy, hidden, deprecated → OK à archiver
// - ACTIVE_ORPHAN : page active non-branchée → À rebrancher
// - DORMANT_CANDIDATE : page ancienne mais potentiellement utile → Décision user
header('Content-Type: application/json');
header('Access-Control-Allow-Origin: *');
$t0 = microtime(true);
$R = ['ts'=>date('c'), 'source'=>'opus5-orphans-classifier'];
// Fetch orphans list depuis filesystem cache direct (bypass nginx 301)
$cache_path = '/var/www/html/api/wevia-pages-registry.cache.json';
if (file_exists($cache_path)) {
$raw = file_get_contents($cache_path);
} else {
$ch = curl_init('http://127.0.0.1/api/wevia-pages-registry.php?action=orphans');
curl_setopt_array($ch, [CURLOPT_RETURNTRANSFER=>true, CURLOPT_TIMEOUT=>10, CURLOPT_FOLLOWLOCATION=>true]);
$raw = curl_exec($ch);
curl_close($ch);
}
$data = @json_decode((string)$raw, true) ?: [];
$orphans = $data['orphans'] ?? [];
// Patterns de classification
$archive_patterns = [
'/^404\.html$/' => 'error_page_default',
'/-v\d+\.html$/' => 'versioned_legacy',
'/-legacy\.html$/' => 'marked_legacy',
'/^test-/' => 'test_page',
'/-test\.html$/' => 'test_page',
'/-hidden\.html$/' => 'hidden_intentional',
'/^google[a-z0-9]+\.html$/' => 'google_verification',
'/-iso3d\.html$/' => 'experimental_3d',
'/-alive\.html$/' => 'demo_variant',
'/-final\.html$/' => 'deprecated_final',
'/-hd2\.html$/' => 'hd_variant',
'/-3d\.html$/' => 'demo_3d',
'/^data-deletion\.html$/' => 'gdpr_required_page',
'/-demo-/' => 'demo_page',
'/^dormant-/' => 'explicitly_dormant',
'/-offline\.html$/' => 'offline_variant',
'/^all-screens-live\.html$/' => 'dev_internal'
];
// Patterns pages actives à rebrancher (whitelist métier)
$active_patterns = [
'/^ethica-(login|chatbot)\.html$/' => 'ethica_module_active',
'/^office-login\.html$/' => 'office_sso_active',
'/^qa-hub\.html$/' => 'qa_hub_active',
'/^infra-monitor\.html$/' => 'infra_live_active',
'/^monitoring\.html$/' => 'monitoring_active',
'/^sso-monitor\.html$/' => 'sso_monitoring_active',
'/^cron-control\.html$/' => 'cron_admin_active',
'/^wevia-widget\.html$/' => 'wevia_widget_consumer_facing',
'/^plan-du-site\.html$/' => 'sitemap_required_seo',
'/^claw-(chat|code)\.html$/' => 'openclaw_active',
'/^droid-terminal\.html$/' => 'terminal_active',
'/^dmaic-workbench\.html$/' => 'leansigma_active',
'/^bpmn-studio-live\.html$/' => 'process_modeler_active',
'/^candidate-detail\.html$/' => 'hr_candidate_active',
'/^ecosysteme-ia-maroc\.html$/' => 'ia_ecosystem_active',
'/^weval-(arena|ops-screens)\.html$/' => 'internal_ops_active',
'/^acquired-dashboard\.html$/' => 'acquired_dashboard_active',
'/^weval-enterprise-management\.html$/' => 'enterprise_mgmt_active',
'/^enterprise-complete-v73\.html$/' => 'enterprise_v73_retained'
];
// Classification
$out = [
'LEGITIMATE_ARCHIVE' => [],
'ACTIVE_ORPHAN' => [],
'DORMANT_CANDIDATE' => []
];
foreach ($orphans as $page => $meta) {
$classified = false;
// Check archive patterns first
foreach ($archive_patterns as $pat => $reason) {
if (preg_match($pat, $page)) {
$out['LEGITIMATE_ARCHIVE'][] = [
'page' => $page,
'reason' => $reason,
'class' => $meta['class'] ?? '?',
'size_kb' => $meta['size_kb'] ?? 0,
'mtime' => $meta['mtime'] ?? ''
];
$classified = true;
break;
}
}
if ($classified) continue;
// Check active patterns
foreach ($active_patterns as $pat => $reason) {
if (preg_match($pat, $page)) {
$out['ACTIVE_ORPHAN'][] = [
'page' => $page,
'reason' => $reason,
'class' => $meta['class'] ?? '?',
'size_kb' => $meta['size_kb'] ?? 0,
'mtime' => $meta['mtime'] ?? '',
'action_required' => 'LINK_FROM_WTP_OR_HUB'
];
$classified = true;
break;
}
}
if ($classified) continue;
// Par défaut : DORMANT_CANDIDATE (décision user)
$out['DORMANT_CANDIDATE'][] = [
'page' => $page,
'class' => $meta['class'] ?? '?',
'size_kb' => $meta['size_kb'] ?? 0,
'mtime' => $meta['mtime'] ?? '',
'action_required' => 'USER_DECISION'
];
}
$R['classification'] = $out;
$R['summary'] = [
'total_orphans' => count($orphans),
'legitimate_archive' => count($out['LEGITIMATE_ARCHIVE']),
'active_orphan_to_link' => count($out['ACTIVE_ORPHAN']),
'dormant_candidate' => count($out['DORMANT_CANDIDATE']),
'legit_pct' => count($orphans) > 0 ? round(count($out['LEGITIMATE_ARCHIVE']) / count($orphans) * 100, 1) : 0,
'actionable_count' => count($out['ACTIVE_ORPHAN']) + count($out['DORMANT_CANDIDATE']),
'archived_pct' => count($orphans) > 0 ? round(count($out['LEGITIMATE_ARCHIVE']) / count($orphans) * 100, 1) : 0
];
$R['recommendation'] = [];
if (count($out['ACTIVE_ORPHAN']) > 0) {
$R['recommendation'][] = 'LINK ' . count($out['ACTIVE_ORPHAN']) . ' active orphans depuis WTP drawer ou Unified Hub';
}
if (count($out['DORMANT_CANDIDATE']) > 0) {
$R['recommendation'][] = 'REVIEW ' . count($out['DORMANT_CANDIDATE']) . ' dormant candidates : decide keep+link ou archive';
}
if (count($out['LEGITIMATE_ARCHIVE']) > 0) {
$R['recommendation'][] = 'ARCHIVE OK ' . count($out['LEGITIMATE_ARCHIVE']) . ' pages : test/legacy/deprecated (decision user si move vers /archive/)';
}
$R['doctrine'] = '91 — orphans classifier : separe archive legitime / active orphan / dormant candidate';
$R['total_ms'] = round((microtime(true) - $t0) * 1000);
echo json_encode($R, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);