false,'error'=>'url or text required']); exit; } if ($url) { $ch = curl_init($url); curl_setopt_array($ch, [CURLOPT_RETURNTRANSFER=>true, CURLOPT_TIMEOUT=>10, CURLOPT_FOLLOWLOCATION=>true]); $text = curl_exec($ch); curl_close($ch); // Strip HTML $text = strip_tags($text); } // Chunking $chunk_size = 2000; // chars $chunks = []; $total_len = strlen($text); for ($i = 0; $i < $total_len; $i += $chunk_size) { $chunks[] = substr($text, $i, $chunk_size); } // Iterative summarization (simulated — would call LLM each chunk) $summaries = []; foreach ($chunks as $idx => $chunk) { // Extract top sentences by length + keyword density $sentences = preg_split('/[.!?]\s+/', $chunk); usort($sentences, fn($a,$b) => strlen($b) - strlen($a)); $summaries[] = ['chunk'=>$idx, 'chars'=>strlen($chunk), 'top_sentence'=>substr($sentences[0] ?? '', 0, 200)]; } // Meta-summary (would concatenate + re-summarize via LLM) $meta = 'Document analyzed: ' . $total_len . ' chars in ' . count($chunks) . ' chunks of ~' . $chunk_size . ' chars each.'; echo json_encode([ 'ok'=>true, 'total_chars'=>$total_len, 'total_chunks'=>count($chunks), 'chunk_size'=>$chunk_size, 'summaries'=>$summaries, 'meta_summary'=>$meta, 'note'=>'Full RAG: chunks to Qdrant upsert + query semantic search' ]); exit; } if ($action === 'qdrant_upsert') { $collection = $_POST['collection'] ?? 'wevia_kb'; $text = $_POST['text'] ?? ''; if (!$text) { echo json_encode(['ok'=>false,'error'=>'text required']); exit; } // Would call Ollama embed + Qdrant upsert echo json_encode(['ok'=>true, 'note'=>'upsert queued (requires ollama embed endpoint + qdrant connection)', 'target_collection'=>$collection, 'chars'=>strlen($text)]); exit; } echo json_encode(['ok'=>false, 'actions'=>['analyze','qdrant_upsert']]);