#!/usr/bin/env python3 # WEVAL GPU Free Compute — Etat Reel 7 avril 2026 # === CE QUI TOURNE VRAIMENT (0 EUR inference) === # sovereign-api.py port 4000 — UP, PID 2777776 # Route: Ollama S204(port 11435) -> Cerebras -> Groq -> Gemini # Modeles: granite4, qwen3:8b, qwen3:4b, qwen2.5:7b, mistral, weval-brain-v2 # + cloud: cerebras qwen-3-235b | groq llama-3.3-70b | gemini-2.0-flash SOVEREIGN = "http://127.0.0.1:4000/v1" # DROP-IN OpenAI compatible — UP # === KAGGLE (clés configurées, usage training uniquement) === # KAGGLE_USERNAME + KAGGLE_API_TOKEN dans /etc/weval/secrets.env # T4/P100, 30h/semaine — PAS d'inference API temps reel # Usage: finetune, data processing, batch jobs KAGGLE = "kernel_only" # pas d'endpoint HTTP externe # === HF SPACE (deploye, GPU non actif) === # Space: https://huggingface.co/spaces/yace222/weval-vllm # Build: DONE (commit 5b4b422) — cpu-basic # T4 free: necessite plan PRO HF — pas disponible # HF Inference API serverless: SUPPRIME depuis 2024 (HTTP 410) HF_SPACE_URL = "https://yace222-weval-vllm.hf.space" # cpu-basic, vLLM KO sans GPU # === RENDER / RAILWAY (configures, CPU only) === # RENDER_WORKSPACE + RAILWAY_WORKSPACE dans secrets.env # Pas de GPU gratuit sur ces platforms RENDER = None RAILWAY = None # === CONCLUSION === # La seule inference IA gratuite et temps reel = sovereign-api port 4000 # Kaggle = reserve training/finetune batch # Pour GPU inference gratuite: besoin HF PRO (9$/mois) ou Colab Pro STATUS = { "sovereign_local": "UP", # port 4000, 6 providers, 0EUR "kaggle": "TRAINING_ONLY", "hf_space": "NO_GPU", "render": "CPU_ONLY", "railway": "CPU_ONLY", }