Files
weval-guardian/weval-gpu-deploy.py
2026-04-07 23:38:56 +02:00

41 lines
1.7 KiB
Python

#!/usr/bin/env python3
# WEVAL GPU Free Compute — Etat Reel 7 avril 2026
# === CE QUI TOURNE VRAIMENT (0 EUR inference) ===
# sovereign-api.py port 4000 — UP, PID 2777776
# Route: Ollama S204(port 11435) -> Cerebras -> Groq -> Gemini
# Modeles: granite4, qwen3:8b, qwen3:4b, qwen2.5:7b, mistral, weval-brain-v2
# + cloud: cerebras qwen-3-235b | groq llama-3.3-70b | gemini-2.0-flash
SOVEREIGN = "http://127.0.0.1:4000/v1" # DROP-IN OpenAI compatible — UP
# === KAGGLE (clés configurées, usage training uniquement) ===
# KAGGLE_USERNAME + KAGGLE_API_TOKEN dans /etc/weval/secrets.env
# T4/P100, 30h/semaine — PAS d'inference API temps reel
# Usage: finetune, data processing, batch jobs
KAGGLE = "kernel_only" # pas d'endpoint HTTP externe
# === HF SPACE (deploye, GPU non actif) ===
# Space: https://huggingface.co/spaces/yace222/weval-vllm
# Build: DONE (commit 5b4b422) — cpu-basic
# T4 free: necessite plan PRO HF — pas disponible
# HF Inference API serverless: SUPPRIME depuis 2024 (HTTP 410)
HF_SPACE_URL = "https://yace222-weval-vllm.hf.space" # cpu-basic, vLLM KO sans GPU
# === RENDER / RAILWAY (configures, CPU only) ===
# RENDER_WORKSPACE + RAILWAY_WORKSPACE dans secrets.env
# Pas de GPU gratuit sur ces platforms
RENDER = None
RAILWAY = None
# === CONCLUSION ===
# La seule inference IA gratuite et temps reel = sovereign-api port 4000
# Kaggle = reserve training/finetune batch
# Pour GPU inference gratuite: besoin HF PRO (9$/mois) ou Colab Pro
STATUS = {
"sovereign_local": "UP", # port 4000, 6 providers, 0EUR
"kaggle": "TRAINING_ONLY",
"hf_space": "NO_GPU",
"render": "CPU_ONLY",
"railway": "CPU_ONLY",
}