Files
html/api/wevia-cascade-config.json
opus 5f015fb49a
Some checks failed
WEVAL NonReg / nonreg (push) Has been cancelled
auto-sync-0305
2026-04-17 03:05:02 +02:00

264 lines
6.6 KiB
JSON

{
"version": "v4.0",
"date": "2026-04-10",
"strategy": "6sigma_zero_variability",
"cascades": {
"exec": {
"desc": "Shell commands - no LLM needed",
"providers": [
"shell_exec"
],
"fallback": "none",
"timeout": 15,
"rate_limit": "unlimited"
},
"agent_status": {
"desc": "Agent status checks - static + live data",
"providers": [
"static_json",
"shell_exec",
"http_api"
],
"fallback": "static_response",
"timeout": 10,
"rate_limit": "unlimited"
},
"code_gen": {
"desc": "Code generation - needs powerful LLM",
"providers": [
"cerebras_235b",
"nvidia_49b",
"groq_70b",
"hf_72b",
"mistral_small",
"ollama_qwen3"
],
"fallback": "ollama_local",
"timeout": 60,
"rate_limit": "cascade_failover"
},
"long_gen": {
"desc": "Long code (300+ lines) - needs high max_tokens",
"providers": [
"nvidia_49b",
"groq_70b",
"hf_72b",
"cerebras_235b",
"mistral_small"
],
"fallback": "ollama_local",
"timeout": 120,
"max_tokens": 4096,
"rate_limit": "cascade_failover"
},
"chat": {
"desc": "Conversational - fast response needed",
"providers": [
"groq_70b",
"nvidia_49b",
"hf_72b",
"cerebras_235b",
"mistral_small",
"ollama_qwen3"
],
"fallback": "ollama_local",
"timeout": 15,
"rate_limit": "cascade_failover"
},
"translate": {
"desc": "Translation - accuracy matters",
"providers": [
"cerebras_235b",
"mistral_small",
"groq_70b",
"nvidia_49b"
],
"fallback": "ollama_local",
"timeout": 20,
"rate_limit": "cascade_failover"
},
"search": {
"desc": "Web search via SearXNG",
"providers": [
"searxng_local"
],
"fallback": "deerflow",
"timeout": 10,
"rate_limit": "unlimited"
},
"data": {
"desc": "Database queries (Ethica, CRM)",
"providers": [
"postgresql_s95",
"postgresql_s204"
],
"fallback": "static_cache",
"timeout": 10,
"rate_limit": "unlimited"
},
"security": {
"desc": "Security scans and checks",
"providers": [
"nuclei",
"crowdsec",
"guardian_sh"
],
"fallback": "static_status",
"timeout": 30,
"rate_limit": "unlimited"
},
"infra": {
"desc": "Infrastructure checks",
"providers": [
"shell_exec",
"docker_api",
"systemd"
],
"fallback": "none",
"timeout": 10,
"rate_limit": "unlimited"
},
"cognitive": {
"desc": "Complex reasoning (analyst, architect, debugger)",
"providers": [
"cerebras_235b",
"nvidia_49b",
"groq_70b",
"mistral_small"
],
"fallback": "groq_70b",
"timeout": 30,
"rate_limit": "cascade_failover"
},
"creative": {
"desc": "Design, diagram, email, blog",
"providers": [
"cerebras_235b",
"groq_70b",
"nvidia_49b",
"hf_72b"
],
"fallback": "ollama_local",
"timeout": 30,
"rate_limit": "cascade_failover"
},
"multiagent": {
"desc": "Multi-agent orchestration",
"providers": [
"paperclip_api",
"deerflow_api",
"consensus_moa"
],
"fallback": "static_fleet_status",
"timeout": 30,
"rate_limit": "unlimited"
}
},
"providers": {
"groq_70b": {
"url": "api.groq.com",
"model": "llama-3.3-70b-versatile",
"key": "GROQ_KEY",
"speed": "<1s",
"limit": "rate-limited",
"cost": "0"
},
"nvidia_49b": {
"url": "integrate.api.nvidia.com",
"model": "nvidia/llama-3.3-nemotron-super-49b-v1",
"key": "NVIDIA_NIM_KEY",
"speed": "2-5s",
"limit": "unlimited",
"cost": "0"
},
"hf_72b": {
"url": "router.huggingface.co",
"model": "Qwen/Qwen2.5-72B-Instruct",
"key": "HF_TOKEN",
"speed": "3-8s",
"limit": "rate-limited",
"cost": "0"
},
"cerebras_235b": {
"url": "api.cerebras.ai",
"model": "qwen-3-235b-a22b-instruct-2507",
"key": "CEREBRAS_API_KEY",
"speed": "1-3s",
"limit": "rate-limited",
"cost": "0"
},
"mistral_small": {
"url": "api.mistral.ai",
"model": "mistral-small-latest",
"key": "MISTRAL_KEY",
"speed": "1-2s",
"limit": "rate-limited",
"cost": "0"
},
"ollama_qwen3": {
"url": "127.0.0.1:11434",
"model": "qwen3:4b",
"key": "none",
"speed": "5-15s",
"limit": "unlimited",
"cost": "0"
},
"searxng_local": {
"url": "127.0.0.1:8080",
"key": "none",
"speed": "<1s",
"limit": "unlimited",
"cost": "0"
},
"shell_exec": {
"type": "local",
"speed": "<1s",
"limit": "unlimited",
"cost": "0"
},
"postgresql_s95": {
"host": "10.1.0.3:5432",
"db": "adx_system",
"speed": "<1s",
"limit": "unlimited",
"cost": "0"
},
"paperclip_api": {
"url": "127.0.0.1:3201",
"speed": "1s",
"limit": "unlimited",
"cost": "0"
},
"deerflow_api": {
"url": "127.0.0.1:2024",
"speed": "2-10s",
"limit": "unlimited",
"cost": "0"
},
"nuclei": {
"type": "binary",
"speed": "10-60s",
"limit": "unlimited",
"cost": "0"
}
},
"intent_to_cascade": {
"audit|diagnostic|ram|disk": "exec",
"auto-fix|nonreg|guardian|git|crons|vacuum|restart|execute": "exec",
"ethica|pharma|hcp": "data",
"paperclip|deerflow|blade|director|wedroid|wevcode|openclaw|nuclei|consensus|hamid|sentinel|benchmark|wiring": "agent_status",
"code|fonction|script|python|php|api|react|fullstack|devops": "code_gen",
"complet.*html|from.*scratch|ecommerce|landing.*page": "long_gen",
"traduis|translate": "translate",
"cherche|search|actualit": "search",
"analyst|architect|debugger|planner|qa|reviewer": "cognitive",
"writer|email|blog|pdf|diagram|design|logo": "creative",
"multi.*agent|fleet|orchestr": "multiagent",
"securise|pentest|trust|crowdsec|ssl|sso": "security",
"docker|ram|process|network|log|dns|snapshot|provider|uptime": "infra",
"default": "chat"
},
"total_cost": "0€",
"6sigma_target": "99.7% availability",
"guarantee": "If Groq rate-limited → NVIDIA (unlimited) → HF → Cerebras → Mistral → Ollama local"
}