'cerebras', 'tier'=>0, 'model'=>'qwen-3-235b-a22b-instruct-2507', 'url'=>'https://api.cerebras.ai/v1/chat/completions', 'key_name'=>'CEREBRAS_API_KEY', 'params'=>235], ['name'=>'groq', 'tier'=>0, 'model'=>'llama-3.3-70b-versatile', 'url'=>'https://api.groq.com/openai/v1/chat/completions', 'key_name'=>'GROQ_KEY', 'params'=>70], ['name'=>'kimi-k2', 'tier'=>0, 'model'=>'moonshotai/kimi-k2-instruct', 'url'=>'https://api.groq.com/openai/v1/chat/completions', 'key_name'=>'GROQ_KEY', 'params'=>1000], ['name'=>'sambanova', 'tier'=>0, 'model'=>'DeepSeek-V3.2', 'url'=>'https://api.sambanova.ai/v1/chat/completions', 'key_name'=>'SAMBANOVA_KEY', 'params'=>671], // TIER 1 — Gratuits rate-limited ['name'=>'nvidia', 'tier'=>1, 'model'=>'meta/llama-3.3-70b-instruct', 'url'=>'https://integrate.api.nvidia.com/v1/chat/completions', 'key_name'=>'NVIDIA_NIM_KEY', 'params'=>70], ['name'=>'mistral', 'tier'=>1, 'model'=>'mistral-large-latest', 'url'=>'https://api.mistral.ai/v1/chat/completions', 'key_name'=>'MISTRAL_KEY', 'params'=>123], ['name'=>'together', 'tier'=>1, 'model'=>'meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo', 'url'=>'https://api.together.xyz/v1/chat/completions', 'key_name'=>'TOGETHER_KEY', 'params'=>70], ['name'=>'cohere', 'tier'=>1, 'model'=>'command-r-plus', 'url'=>'https://api.cohere.ai/v1/chat', 'key_name'=>'COHERE_KEY', 'params'=>104, 'format'=>'cohere'], ['name'=>'deepseek', 'tier'=>1, 'model'=>'deepseek-chat', 'url'=>'https://api.deepseek.com/v1/chat/completions', 'key_name'=>'DEEPSEEK_KEY', 'params'=>671], ['name'=>'openrouter', 'tier'=>1, 'model'=>'meta-llama/llama-3.3-70b-instruct:free', 'url'=>'https://openrouter.ai/api/v1/chat/completions', 'key_name'=>'OPENROUTER_KEY', 'params'=>70], // TIER 2 — Souverains + Payants ['name'=>'gemini', 'tier'=>2, 'model'=>'gemini-2.5-flash', 'url'=>'https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent', 'key_name'=>'GEMINI_KEY', 'params'=>'?', 'format'=>'gemini'], ['name'=>'alibaba', 'tier'=>2, 'model'=>'qwen-turbo', 'url'=>'https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions', 'key_name'=>'ALIBABA_KEY', 'params'=>72], ['name'=>'zhipu', 'tier'=>2, 'model'=>'glm-4-flash', 'url'=>'https://open.bigmodel.cn/api/paas/v4/chat/completions', 'key_name'=>'ZHIPU_KEY', 'params'=>130], // TIER 3 — Local souverain ['name'=>'ollama', 'tier'=>3, 'model'=>'weval-brain-v3', 'url'=>'http://127.0.0.1:11434/v1/chat/completions', 'key_name'=>'', 'params'=>8], ]; // ═══ FUNCTIONS ═══ function call_provider($provider, $prompt, $secrets, $timeout = 15) { $key = $provider['key_name'] ? ($secrets[$provider['key_name']] ?? '') : 'local'; if (!$key && $provider['name'] !== 'ollama') return null; $format = $provider['format'] ?? 'openai'; if ($format === 'cohere') { $body = json_encode(['message' => $prompt, 'model' => $provider['model']]); $authHeader = "Authorization: Bearer $key"; } elseif ($format === 'gemini') { $url = $provider['url'] . "?key=$key"; $body = json_encode(['contents' => [['parts' => [['text' => $prompt]]]]]); $ch = curl_init($url); curl_setopt_array($ch, [CURLOPT_POST => true, CURLOPT_POSTFIELDS => $body, CURLOPT_HTTPHEADER => ['Content-Type: application/json'], CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => $timeout, CURLOPT_SSL_VERIFYPEER => false]); $resp = curl_exec($ch); $code = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); if ($code >= 200 && $code < 300) { $d = json_decode($resp, true); return $d['candidates'][0]['content']['parts'][0]['text'] ?? null; } return null; } else { $body = json_encode([ 'model' => $provider['model'], 'messages' => [['role' => 'user', 'content' => $prompt]], 'max_tokens' => 300, 'temperature' => 0.3 ]); $authHeader = "Authorization: Bearer $key"; } $ch = curl_init($provider['url']); curl_setopt_array($ch, [CURLOPT_POST => true, CURLOPT_POSTFIELDS => $body, CURLOPT_HTTPHEADER => ['Content-Type: application/json', $authHeader], CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => $timeout, CURLOPT_SSL_VERIFYPEER => false]); $resp = curl_exec($ch); $code = curl_getinfo($ch, CURLINFO_HTTP_CODE); $latency = round(curl_getinfo($ch, CURLINFO_TOTAL_TIME) * 1000); curl_close($ch); if ($code >= 200 && $code < 300) { $d = json_decode($resp, true); if ($format === 'cohere') return $d['text'] ?? null; return $d['choices'][0]['message']['content'] ?? null; } return null; } function consensus_parallel($prompt, $providers, $secrets, $max_providers = 5) { $results = []; $mh = curl_multi_init(); $handles = []; $count = 0; foreach ($providers as $p) { if ($count >= $max_providers) break; $key = $p['key_name'] ? ($secrets[$p['key_name']] ?? '') : 'local'; if (!$key && $p['name'] !== 'ollama') continue; $format = $p['format'] ?? 'openai'; if ($format === 'gemini') { $url = $p['url'] . "?key=$key"; $body = json_encode(['contents' => [['parts' => [['text' => $prompt]]]]]); $headers = ['Content-Type: application/json']; } elseif ($format === 'cohere') { $url = $p['url']; $body = json_encode(['message' => $prompt, 'model' => $p['model']]); $headers = ['Content-Type: application/json', "Authorization: Bearer $key"]; } else { $url = $p['url']; $body = json_encode(['model' => $p['model'], 'messages' => [['role' => 'user', 'content' => $prompt]], 'max_tokens' => 300, 'temperature' => 0.3]); $headers = ['Content-Type: application/json', "Authorization: Bearer $key"]; } $ch = curl_init($url); curl_setopt_array($ch, [CURLOPT_POST => true, CURLOPT_POSTFIELDS => $body, CURLOPT_HTTPHEADER => $headers, CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => 15, CURLOPT_SSL_VERIFYPEER => false]); curl_multi_add_handle($mh, $ch); $handles[] = ['ch' => $ch, 'provider' => $p, 'format' => $format]; $count++; } // Execute parallel do { $status = curl_multi_exec($mh, $active); } while ($status === CURLM_CALL_MULTI_PERFORM); while ($active && $status === CURLM_OK) { curl_multi_select($mh, 1); do { $status = curl_multi_exec($mh, $active); } while ($status === CURLM_CALL_MULTI_PERFORM); } // Collect results foreach ($handles as $h) { $resp = curl_multi_getcontent($h['ch']); $code = curl_getinfo($h['ch'], CURLINFO_HTTP_CODE); $latency = round(curl_getinfo($h['ch'], CURLINFO_TOTAL_TIME) * 1000); curl_multi_remove_handle($mh, $h['ch']); curl_close($h['ch']); $text = null; if ($code >= 200 && $code < 300 && $resp) { $d = json_decode($resp, true); if ($h['format'] === 'gemini') $text = $d['candidates'][0]['content']['parts'][0]['text'] ?? null; elseif ($h['format'] === 'cohere') $text = $d['text'] ?? null; else $text = $d['choices'][0]['message']['content'] ?? null; } if ($text) { $results[] = [ 'provider' => $h['provider']['name'], 'model' => $h['provider']['model'], 'params' => $h['provider']['params'], 'tier' => $h['provider']['tier'], 'latency_ms' => $latency, 'response' => $text, ]; } } curl_multi_close($mh); return $results; } function synthesize_consensus($question, $results, $secrets) { if (count($results) < 2) return $results[0]['response'] ?? 'Pas assez de réponses pour un consensus.'; $synthesis_prompt = "Tu es WEVIA Consensus Engine. Voici les réponses de " . count($results) . " IA différentes à la question: \"$question\"\n\n"; foreach ($results as $i => $r) { $synthesis_prompt .= "--- " . strtoupper($r['provider']) . " ({$r['model']}, {$r['params']}B, {$r['latency_ms']}ms) ---\n{$r['response']}\n\n"; } $synthesis_prompt .= "Synthétise un CONSENSUS en identifiant: 1) Points d'accord unanimes 2) Divergences 3) Ta recommandation finale. Sois concis."; // Use fastest provider for synthesis foreach ($ALL_PROVIDERS ?? [] as $p) { /* skip */ } // Use Cerebras for synthesis (fastest) $key = $secrets['CEREBRAS_API_KEY'] ?? ''; if (!$key) $key = $secrets['GROQ_KEY'] ?? ''; $synth_url = $key === ($secrets['CEREBRAS_API_KEY'] ?? '') ? 'https://api.cerebras.ai/v1/chat/completions' : 'https://api.groq.com/openai/v1/chat/completions'; $synth_model = $key === ($secrets['CEREBRAS_API_KEY'] ?? '') ? 'qwen-3-235b-a22b-instruct-2507' : 'llama-3.3-70b-versatile'; $body = json_encode(['model' => $synth_model, 'messages' => [['role' => 'user', 'content' => $synthesis_prompt]], 'max_tokens' => 500, 'temperature' => 0.2]); $ch = curl_init($synth_url); curl_setopt_array($ch, [CURLOPT_POST => true, CURLOPT_POSTFIELDS => $body, CURLOPT_HTTPHEADER => ['Content-Type: application/json', "Authorization: Bearer $key"], CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => 20, CURLOPT_SSL_VERIFYPEER => false]); $resp = curl_exec($ch); curl_close($ch); $d = json_decode($resp, true); return $d['choices'][0]['message']['content'] ?? 'Synthèse indisponible.'; } // ═══ IA DISCOVERY — auto-detect new free providers ═══ function ia_discovery() { global $ALL_PROVIDERS, $secrets; $discovery = ['total_providers' => count($ALL_PROVIDERS), 'active' => 0, 'inactive' => 0, 'providers' => []]; foreach ($ALL_PROVIDERS as $p) { $key = $p['key_name'] ? ($secrets[$p['key_name']] ?? '') : 'local'; $active = (bool)$key || $p['name'] === 'ollama'; $discovery['providers'][] = [ 'name' => $p['name'], 'tier' => $p['tier'], 'model' => $p['model'], 'params' => $p['params'], 'active' => $active, ]; if ($active) $discovery['active']++; else $discovery['inactive']++; } // Known free-tier endpoints to check $discovery['recommendations'] = [ "Fireworks AI (free tier) — llama-3.3-70b, fast inference", "Lepton AI (free tier) — llama-3.3-70b, no rate limit", "HuggingFace Inference API — free, 1000+ models", "Cloudflare Workers AI — free tier, @cf/meta/llama-3-8b", ]; return $discovery; } // ═══ ROUTING ═══ $input = json_decode(file_get_contents("php://input"), true); $action = $_GET['action'] ?? $input['action'] ?? 'consensus'; switch ($action) { case 'consensus': $question = $input['question'] ?? $input['message'] ?? ''; $max = intval($input['max_providers'] ?? 5); if (!$question) { echo json_encode(['error' => 'question required']); exit; } $t0 = microtime(true); $results = consensus_parallel($question, $ALL_PROVIDERS, $secrets, $max); $consensus = synthesize_consensus($question, $results, $secrets); $total_time = round((microtime(true) - $t0) * 1000); echo json_encode([ 'consensus' => $consensus, 'providers_queried' => count($results), 'total_time_ms' => $total_time, 'individual_responses' => $results, ], JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE); break; case 'discovery': echo json_encode(ia_discovery(), JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE); break; case 'providers': $list = []; foreach ($ALL_PROVIDERS as $p) { $key = $p['key_name'] ? ($secrets[$p['key_name']] ?? '') : 'local'; $list[] = ['name' => $p['name'], 'tier' => $p['tier'], 'model' => $p['model'], 'params' => $p['params'], 'active' => (bool)$key || $p['name'] === 'ollama']; } echo json_encode(['total' => count($list), 'providers' => $list], JSON_PRETTY_PRINT); break; case 'benchmark': $question = $input['question'] ?? 'Quelle est la meilleure approche pour le sovereign AI?'; $results = []; foreach ($ALL_PROVIDERS as $p) { $key = $p['key_name'] ? ($secrets[$p['key_name']] ?? '') : 'local'; if (!$key && $p['name'] !== 'ollama') continue; $t0 = microtime(true); $resp = call_provider($p, $question, $secrets, 10); $latency = round((microtime(true) - $t0) * 1000); $results[] = ['provider' => $p['name'], 'model' => $p['model'], 'params' => $p['params'], 'latency_ms' => $latency, 'chars' => $resp ? strlen($resp) : 0, 'ok' => $resp !== null]; } usort($results, fn($a, $b) => $a['latency_ms'] - $b['latency_ms']); echo json_encode(['benchmark' => $results, 'total' => count($results)], JSON_PRETTY_PRINT); break; case 'health': echo json_encode(['status' => 'ok', 'version' => '1.0', 'providers' => count($ALL_PROVIDERS), 'active' => count(array_filter($ALL_PROVIDERS, fn($p) => ($p['key_name'] ? ($secrets[$p['key_name']] ?? '') : 'local')))]); break; default: echo json_encode(['error' => 'Unknown action. Use: consensus, discovery, providers, benchmark, health']); }