41 lines
1.7 KiB
Python
41 lines
1.7 KiB
Python
#!/usr/bin/env python3
|
|
# WEVAL GPU Free Compute — Etat Reel 7 avril 2026
|
|
|
|
# === CE QUI TOURNE VRAIMENT (0 EUR inference) ===
|
|
# sovereign-api.py port 4000 — UP, PID 2777776
|
|
# Route: Ollama S204(port 11435) -> Cerebras -> Groq -> Gemini
|
|
# Modeles: granite4, qwen3:8b, qwen3:4b, qwen2.5:7b, mistral, weval-brain-v2
|
|
# + cloud: cerebras qwen-3-235b | groq llama-3.3-70b | gemini-2.0-flash
|
|
SOVEREIGN = "http://127.0.0.1:4000/v1" # DROP-IN OpenAI compatible — UP
|
|
|
|
# === KAGGLE (clés configurées, usage training uniquement) ===
|
|
# KAGGLE_USERNAME + KAGGLE_API_TOKEN dans /etc/weval/secrets.env
|
|
# T4/P100, 30h/semaine — PAS d'inference API temps reel
|
|
# Usage: finetune, data processing, batch jobs
|
|
KAGGLE = "kernel_only" # pas d'endpoint HTTP externe
|
|
|
|
# === HF SPACE (deploye, GPU non actif) ===
|
|
# Space: https://huggingface.co/spaces/yace222/weval-vllm
|
|
# Build: DONE (commit 5b4b422) — cpu-basic
|
|
# T4 free: necessite plan PRO HF — pas disponible
|
|
# HF Inference API serverless: SUPPRIME depuis 2024 (HTTP 410)
|
|
HF_SPACE_URL = "https://yace222-weval-vllm.hf.space" # cpu-basic, vLLM KO sans GPU
|
|
|
|
# === RENDER / RAILWAY (configures, CPU only) ===
|
|
# RENDER_WORKSPACE + RAILWAY_WORKSPACE dans secrets.env
|
|
# Pas de GPU gratuit sur ces platforms
|
|
RENDER = None
|
|
RAILWAY = None
|
|
|
|
# === CONCLUSION ===
|
|
# La seule inference IA gratuite et temps reel = sovereign-api port 4000
|
|
# Kaggle = reserve training/finetune batch
|
|
# Pour GPU inference gratuite: besoin HF PRO (9$/mois) ou Colab Pro
|
|
STATUS = {
|
|
"sovereign_local": "UP", # port 4000, 6 providers, 0EUR
|
|
"kaggle": "TRAINING_ONLY",
|
|
"hf_space": "NO_GPU",
|
|
"render": "CPU_ONLY",
|
|
"railway": "CPU_ONLY",
|
|
}
|