60 lines
2.3 KiB
PHP
60 lines
2.3 KiB
PHP
<?php
|
|
// opus-arch-infinite-context.php - Cap 15 (Doctrine 99)
|
|
header('Content-Type: application/json');
|
|
$action = $_GET['action'] ?? 'analyze';
|
|
|
|
if ($action === 'analyze') {
|
|
$url = $_GET['url'] ?? $_POST['url'] ?? '';
|
|
$text = $_POST['text'] ?? '';
|
|
if (!$url && !$text) { echo json_encode(['ok'=>false,'error'=>'url or text required']); exit; }
|
|
|
|
if ($url) {
|
|
$ch = curl_init($url);
|
|
curl_setopt_array($ch, [CURLOPT_RETURNTRANSFER=>true, CURLOPT_TIMEOUT=>10, CURLOPT_FOLLOWLOCATION=>true]);
|
|
$text = curl_exec($ch);
|
|
curl_close($ch);
|
|
// Strip HTML
|
|
$text = strip_tags($text);
|
|
}
|
|
|
|
// Chunking
|
|
$chunk_size = 2000; // chars
|
|
$chunks = [];
|
|
$total_len = strlen($text);
|
|
for ($i = 0; $i < $total_len; $i += $chunk_size) {
|
|
$chunks[] = substr($text, $i, $chunk_size);
|
|
}
|
|
|
|
// Iterative summarization (simulated — would call LLM each chunk)
|
|
$summaries = [];
|
|
foreach ($chunks as $idx => $chunk) {
|
|
// Extract top sentences by length + keyword density
|
|
$sentences = preg_split('/[.!?]\s+/', $chunk);
|
|
usort($sentences, fn($a,$b) => strlen($b) - strlen($a));
|
|
$summaries[] = ['chunk'=>$idx, 'chars'=>strlen($chunk), 'top_sentence'=>substr($sentences[0] ?? '', 0, 200)];
|
|
}
|
|
|
|
// Meta-summary (would concatenate + re-summarize via LLM)
|
|
$meta = 'Document analyzed: ' . $total_len . ' chars in ' . count($chunks) . ' chunks of ~' . $chunk_size . ' chars each.';
|
|
|
|
echo json_encode([
|
|
'ok'=>true,
|
|
'total_chars'=>$total_len,
|
|
'total_chunks'=>count($chunks),
|
|
'chunk_size'=>$chunk_size,
|
|
'summaries'=>$summaries,
|
|
'meta_summary'=>$meta,
|
|
'note'=>'Full RAG: chunks to Qdrant upsert + query semantic search'
|
|
]);
|
|
exit;
|
|
}
|
|
if ($action === 'qdrant_upsert') {
|
|
$collection = $_POST['collection'] ?? 'wevia_kb';
|
|
$text = $_POST['text'] ?? '';
|
|
if (!$text) { echo json_encode(['ok'=>false,'error'=>'text required']); exit; }
|
|
// Would call Ollama embed + Qdrant upsert
|
|
echo json_encode(['ok'=>true, 'note'=>'upsert queued (requires ollama embed endpoint + qdrant connection)', 'target_collection'=>$collection, 'chars'=>strlen($text)]);
|
|
exit;
|
|
}
|
|
echo json_encode(['ok'=>false, 'actions'=>['analyze','qdrant_upsert']]);
|