100 lines
3.4 KiB
PHP
100 lines
3.4 KiB
PHP
<?php
|
|
// Wave 209 · /api/duplicates-registry.php
|
|
// Duplicate screens registry. Non-destructive audit: returns groups with
|
|
// canonical file + legacy candidates + recommendations. Zero delete.
|
|
@require_once __DIR__ . '/wevia-sanitizer-guard.php';
|
|
header('Content-Type: application/json; charset=utf-8');
|
|
header('Access-Control-Allow-Origin: *');
|
|
|
|
$DOCROOT = '/var/www/html';
|
|
$pages = array_filter(scandir($DOCROOT), function($f) use ($DOCROOT) {
|
|
return substr($f, -5) === '.html' && is_file("$DOCROOT/$f");
|
|
});
|
|
|
|
// Normalize base name (strip version/legacy/saas suffixes)
|
|
$base_groups = [];
|
|
foreach ($pages as $p) {
|
|
$base = substr($p, 0, -5);
|
|
$base = preg_replace('/-v\d+$|-new$|-legacy$|-old$|\d{8,14}$|-saas$|-v\d+-\w+$/', '', $base);
|
|
$base = preg_replace('/-pre-\w+$/', '', $base);
|
|
$base_groups[$base][] = $p;
|
|
}
|
|
|
|
// Duplicates only
|
|
$dups = [];
|
|
$total_dups = 0;
|
|
foreach ($base_groups as $base => $files) {
|
|
if (count($files) < 2) continue;
|
|
|
|
// Determine canonical = file with cleanest name (shortest), fall back to newest
|
|
usort($files, function($a, $b) use ($DOCROOT) {
|
|
// Prefer files without -v / -legacy / -old / -saas suffixes
|
|
$has_suffix_a = preg_match('/-v\d+|-legacy|-old|-saas|-new/', $a);
|
|
$has_suffix_b = preg_match('/-v\d+|-legacy|-old|-saas|-new/', $b);
|
|
if ($has_suffix_a !== $has_suffix_b) return $has_suffix_a - $has_suffix_b;
|
|
// Then prefer shorter name
|
|
if (strlen($a) !== strlen($b)) return strlen($a) - strlen($b);
|
|
// Then newest mtime
|
|
return filemtime("$DOCROOT/$b") - filemtime("$DOCROOT/$a");
|
|
});
|
|
|
|
$canonical = $files[0];
|
|
$legacy = array_slice($files, 1);
|
|
|
|
$entries = [];
|
|
foreach ($files as $f) {
|
|
$path = "$DOCROOT/$f";
|
|
$size = filesize($path);
|
|
$entries[] = [
|
|
'file' => $f,
|
|
'size_bytes' => $size,
|
|
'last_modified' => date('c', filemtime($path)),
|
|
'is_canonical' => $f === $canonical,
|
|
'url' => "/$f"
|
|
];
|
|
}
|
|
|
|
// Size similarity between canonical and legacy
|
|
$can_size = filesize("$DOCROOT/$canonical");
|
|
$close_copies = [];
|
|
foreach ($legacy as $l) {
|
|
$l_size = filesize("$DOCROOT/$l");
|
|
if ($can_size > 0 && abs($can_size - $l_size) / max($can_size, $l_size) < 0.1) {
|
|
$close_copies[] = $l;
|
|
}
|
|
}
|
|
|
|
$recommendation = count($close_copies) > 0
|
|
? 'Archive legacy copies (add HTTP 301 to canonical) - content quasi-identical'
|
|
: 'Keep for now - legacy has distinct content, verify if still referenced';
|
|
|
|
$dups[] = [
|
|
'base' => $base,
|
|
'count' => count($files),
|
|
'canonical' => $canonical,
|
|
'canonical_url' => "/$canonical",
|
|
'legacy_files' => $legacy,
|
|
'close_copies' => $close_copies,
|
|
'recommendation' => $recommendation,
|
|
'files' => $entries
|
|
];
|
|
$total_dups += count($legacy);
|
|
}
|
|
|
|
// Sort by count desc
|
|
usort($dups, function($a, $b) { return $b['count'] - $a['count']; });
|
|
|
|
$out = [
|
|
'ok' => true,
|
|
'ts' => date('c'),
|
|
'total_pages' => count($pages),
|
|
'total_duplicate_groups' => count($dups),
|
|
'total_legacy_files' => $total_dups,
|
|
'total_close_copies' => array_sum(array_map(function($g){ return count($g['close_copies']); }, $dups)),
|
|
'groups' => $dups,
|
|
'version' => 'wave-209',
|
|
'doctrine' => 'non-destructive · document canonical + legacy · zero delete'
|
|
];
|
|
|
|
echo json_encode($out, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);
|