Files
html/api/opus5-gpu-grid.php
2026-04-17 18:19:00 +02:00

182 lines
6.8 KiB
PHP

<?php
// OPUS5 — Grid GPU v2 (doctrine 80) — MULTI-ENDPOINT PARALLEL via curl_multi
// Utilise les 3 endpoints LLM internes existants en parallèle pour ~3x throughput
// Endpoints: sovereign-proxy (SSE), weval-ia-fast-safe, opus5-autonomous-orchestrator
header('Content-Type: application/json');
$R = ['ts'=>date('c'), 'source'=>'opus5-gpu-grid-v2'];
$raw = file_get_contents('php://input');
$d = json_decode($raw, true) ?: [];
$action = $_GET['action'] ?? ($d['action'] ?? 'health');
$PROVIDERS = [
[
'id' => 'safe_wrapper',
'url' => 'http://127.0.0.1/api/opus5-weval-ia-fast-safe.php',
'type' => 'internal'
],
[
'id' => 'meta_orchestrator',
'url' => 'http://127.0.0.1/api/opus5-autonomous-orchestrator.php',
'type' => 'internal_orch'
],
[
'id' => 'dispatch_proxy',
'url' => 'http://127.0.0.1/api/wevia-master-dispatch.php',
'type' => 'internal_disp'
]
];
function build_ch($url, $payload, $timeout = 20) {
$ch = curl_init($url);
curl_setopt_array($ch, [
CURLOPT_POST => true,
CURLOPT_POSTFIELDS => $payload,
CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TIMEOUT => $timeout,
CURLOPT_SSL_VERIFYPEER => false,
CURLOPT_SSL_VERIFYHOST => 0
]);
return $ch;
}
function parse_response($body) {
$d = @json_decode((string)$body, true);
if (!$d) return ['text' => substr((string)$body, 0, 500), 'provider' => '?'];
return [
'text' => $d['response'] ?? $d['content'] ?? $d['final_response'] ?? substr((string)$body, 0, 300),
'provider' => $d['provider'] ?? $d['final_provider'] ?? '?'
];
}
if ($action === 'health') {
$mh = curl_multi_init();
$handles = [];
$t0 = microtime(true);
foreach ($PROVIDERS as $p) {
$ch = build_ch($p['url'], json_encode(['message'=>'ping']), 5);
curl_multi_add_handle($mh, $ch);
$handles[] = ['ch'=>$ch, 'provider'=>$p];
}
$running = null;
do { curl_multi_exec($mh, $running); curl_multi_select($mh, 0.1); } while ($running > 0);
$results = [];
foreach ($handles as $h) {
$http = curl_getinfo($h['ch'], CURLINFO_HTTP_CODE);
$results[] = ['id'=>$h['provider']['id'], 'up'=>$http===200, 'http'=>$http];
curl_multi_remove_handle($mh, $h['ch']);
curl_close($h['ch']);
}
curl_multi_close($mh);
$R['providers'] = $results;
$R['up_count'] = count(array_filter($results, function($r){return $r['up'];}));
$R['parallel_ms'] = round((microtime(true)-$t0)*1000);
echo json_encode($R, JSON_PRETTY_PRINT);
exit;
}
if ($action === 'parallel_query') {
// Send same query to all providers in parallel, return first valid response + all for comparison
$query = (string)($d['query'] ?? '');
if (!$query) { http_response_code(400); echo json_encode(['err'=>'no_query']); exit; }
$mh = curl_multi_init();
$handles = [];
$t0 = microtime(true);
foreach ($PROVIDERS as $p) {
$ch = build_ch($p['url'], json_encode(['message'=>$query]), 25);
curl_multi_add_handle($mh, $ch);
$handles[] = ['ch'=>$ch, 'provider'=>$p, 'started'=>microtime(true)];
}
$running = null;
do { curl_multi_exec($mh, $running); curl_multi_select($mh, 0.1); } while ($running > 0);
$results = [];
foreach ($handles as $h) {
$body = curl_multi_getcontent($h['ch']);
$http = curl_getinfo($h['ch'], CURLINFO_HTTP_CODE);
$parsed = parse_response($body);
$results[] = [
'provider_id' => $h['provider']['id'],
'http' => $http,
'ms' => round((microtime(true) - $h['started']) * 1000),
'response_preview' => substr($parsed['text'] ?? '', 0, 300),
'response_len' => strlen($parsed['text'] ?? ''),
'engine' => $parsed['provider'] ?? '?'
];
curl_multi_remove_handle($mh, $h['ch']);
curl_close($h['ch']);
}
curl_multi_close($mh);
// Pick winner: fastest with non-empty response
$valid = array_filter($results, function($r){ return $r['http'] === 200 && $r['response_len'] > 10; });
usort($valid, function($a,$b){ return $a['ms'] - $b['ms']; });
$R['winner'] = !empty($valid) ? $valid[0] : null;
$R['all_results'] = $results;
$R['total_wall_ms'] = round((microtime(true) - $t0) * 1000);
$R['speedup'] = count($results) > 0 ? round(array_sum(array_column($results, 'ms')) / $R['total_wall_ms'], 1) . 'x' : 'N/A';
$R['doctrine'] = '77 — grid GPU parallel (curl_multi multi-endpoint + fastest-wins)';
echo json_encode($R, JSON_PRETTY_PRINT|JSON_UNESCAPED_UNICODE);
exit;
}
if ($action === 'shard_text') {
// Split large text into chunks, process each on different provider in parallel
$text = (string)($d['text'] ?? '');
$instruction = (string)($d['instruction'] ?? 'Résume');
if (!$text) { http_response_code(400); echo json_encode(['err'=>'no_text']); exit; }
$chunk_size = 600;
$chunks = [];
for ($i = 0; $i < strlen($text); $i += $chunk_size) {
$chunks[] = substr($text, $i, $chunk_size);
if (count($chunks) >= 6) break; // cap 6 shards
}
$t0 = microtime(true);
$mh = curl_multi_init();
$handles = [];
foreach ($chunks as $idx => $chunk) {
$p = $PROVIDERS[$idx % count($PROVIDERS)];
$full = "$instruction\n\n$chunk";
$ch = build_ch($p['url'], json_encode(['message'=>$full]), 20);
curl_multi_add_handle($mh, $ch);
$handles[] = ['ch'=>$ch, 'provider'=>$p, 'idx'=>$idx];
}
$running = null;
do { curl_multi_exec($mh, $running); curl_multi_select($mh, 0.1); } while ($running > 0);
$shards = [];
foreach ($handles as $h) {
$body = curl_multi_getcontent($h['ch']);
$parsed = parse_response($body);
$shards[] = [
'idx' => $h['idx'],
'provider' => $h['provider']['id'],
'response_preview' => substr($parsed['text'] ?? '', 0, 200),
'response_len' => strlen($parsed['text'] ?? '')
];
curl_multi_remove_handle($mh, $h['ch']);
curl_close($h['ch']);
}
curl_multi_close($mh);
usort($shards, function($a,$b){ return $a['idx'] - $b['idx']; });
$R['chunks_count'] = count($chunks);
$R['shards'] = $shards;
$R['total_ms'] = round((microtime(true) - $t0) * 1000);
$R['merged'] = implode("\n---\n", array_column($shards, 'response_preview'));
$R['providers_used'] = array_values(array_unique(array_column($shards, 'provider')));
echo json_encode($R, JSON_PRETTY_PRINT|JSON_UNESCAPED_UNICODE);
exit;
}
http_response_code(400);
echo json_encode(['err'=>'unknown_action', 'available'=>['health','parallel_query','shard_text']]);