wevads-platform/gpu-deploy.sh

#!/bin/bash
###############################################################################
# WEVIA SOVEREIGN FINE-TUNING PIPELINE — gpu-deploy.sh
# Server: Hetzner GEX44 (88.198.4.195) — RTX 4000 Ada 20GB
#
# Pipeline: Export DB → Format → QLoRA Fine-tune → GGUF → Ollama deploy
# Usage: bash gpu-deploy.sh [export|deps|train|convert|deploy|all]
###############################################################################
set -euo pipefail

GPU_SERVER="88.198.4.195"
DB_SERVER="95.216.167.89"
WORK_DIR="/opt/wevia-finetune"
MODEL_OUT="wevia-sovereign"
OLLAMA_PORT=11434

log() { echo -e "\033[0;32m[$(date '+%H:%M:%S')]\033[0m $1"; }
warn() { echo -e "\033[1;33m[WARN]\033[0m $1"; }

# ═══════════════════════════════════════════════════════════════
# STEP 1: EXPORT TRAINING DATA FROM WEVADS
# ═══════════════════════════════════════════════════════════════
export_data() {
    log "📦 Exporting training data from WEVADS..."
    mkdir -p "$WORK_DIR/data"

    cat > "$WORK_DIR/data/export_training.py" << 'PYEOF'
#!/usr/bin/env python3
"""Export WEVADS data into Alpaca training format"""
import json, os, sys, re

try:
    import psycopg2
except ImportError:
    os.system("pip install psycopg2-binary --break-system-packages -q")
    import psycopg2

DB = "host=95.216.167.89 port=5432 dbname=adx_system user=admin password=admin123"
OUT = "/opt/wevia-finetune/data/training_data.json"
data = []

conn = psycopg2.connect(DB)
cur = conn.cursor()

# 1. Knowledge Base
print("  Exporting KB...")
try:
    cur.execute("SELECT topic, content, category FROM admin.knowledge_base WHERE content IS NOT NULL AND length(content) > 50 LIMIT 2000")
    for topic, content, cat in cur.fetchall():
        data.append({"instruction": f"En tant qu'expert WEVAL Consulting, explique : {topic}", "input": "", "output": content[:2000]})
except Exception as e: print(f"  KB skip: {e}")

# 2. HAMID conversations
print("  Exporting HAMID conversations...")
try:
    cur.execute("SELECT user_message, assistant_response FROM admin.hamid_conversations WHERE assistant_response IS NOT NULL AND length(assistant_response) > 50 ORDER BY created_at DESC LIMIT 3000")
    for q, a in cur.fetchall():
        data.append({"instruction": q[:500], "input": "", "output": a[:2000]})
except Exception as e: print(f"  HAMID skip: {e}")

# 3. Claude KB
print("  Exporting Claude KB...")
try:
    cur.execute("SELECT topic, content FROM admin.claude_conversations_kb WHERE content IS NOT NULL AND length(content) > 100 LIMIT 1000")
    for t, c in cur.fetchall():
        data.append({"instruction": f"Explique en détail : {t}", "input": "", "output": c[:2000]})
except Exception as e: print(f"  Claude KB skip: {e}")

# 4. Chatbot history
print("  Exporting chatbot history...")
try:
    cur.execute("SELECT user_message, bot_response FROM admin.chatbot_conversations_history WHERE bot_response IS NOT NULL AND length(bot_response) > 50 ORDER BY created_at DESC LIMIT 2000")
    for q, a in cur.fetchall():
        data.append({"instruction": q[:500], "input": "", "output": a[:2000]})
except Exception as e: print(f"  Chatbot skip: {e}")

# 5. Creative templates → email marketing expertise
print("  Exporting email expertise...")
try:
    cur.execute("SELECT subject, html_body FROM admin.creatives WHERE subject IS NOT NULL AND html_body IS NOT NULL LIMIT 500")
    for subj, body in cur.fetchall():
        text = re.sub('<[^<]+?>', '', body)[:500]
        data.append({"instruction": f"Génère un email marketing professionnel avec le sujet : {subj}", "input": "", "output": f"Sujet: {subj}\n\n{text}"})
except Exception as e: print(f"  Creatives skip: {e}")

# 6. Hardcoded personality + expertise examples
personality = [
    {"instruction": "Qui es-tu?", "input": "",
     "output": "🧠 Je suis HAMID NEXUS, l'IA souveraine de WEVAL Consulting à Casablanca. Version SINGULARITY 2026, 1418+ modules, 7 serveurs interconnectés, providers IA en failover automatique. Expert en email marketing, consulting stratégique, IA et infrastructure cloud. ⚡"},
    {"instruction": "Explique PowerMTA simplement", "input": "",
     "output": "📧 PowerMTA est le moteur d'envoi d'emails le plus puissant du marché. Il gère des milliers d'IPs et domaines simultanément, route intelligemment selon l'ISP (Gmail, Outlook, Yahoo), et respecte les limites de chaque fournisseur. La clé : bon warmup + rotation IPs + headers propres = 95%+ inbox rate. 🚀"},
    {"instruction": "Diagnostique un problème de délivrabilité email", "input": "",
     "output": "🔍 DIAGNOSTIC:\n\n1. Vérifie les bases: `pmta show queue` et `pmta show status`\n2. Top 3 hypothèses: IP blacklistée (check mxtoolbox), SPF/DKIM/DMARC mal config (`dig TXT domain`), warmup trop agressif\n3. Fix rapide: bounce > 5% → flush queue + switch IP. Spam > 2% → change contenu\n\n🎯 Objectifs: bounce < 2%, spam < 0.5%, inbox > 90%"},
    {"instruction": "Stratégie WEVAL Consulting?", "input": "",
     "output": "🎯 WEVAL opère sur 3 axes:\n\n1. Transformation Digitale — Migration SAP S/4HANA, cloud, IA pour entreprises marocaines/africaines\n2. Intelligence Marketing — Plateforme WEVADS (6.6M+ contacts, 128 offres actives, scraping intelligent + IA)\n3. Recrutement Tech — WEVAL Recruitment place des talents IT avec pré-qualification IA\n\n💡 Notre avantage: on livre des résultats mesurables avec notre propre stack technologique."},
    {"instruction": "Comment optimiser le taux d'inbox Gmail?", "input": "",
     "output": "📧 Optimisation Gmail:\n\n1. **Warmup progressif** — Commence à 5 emails/jour, monte de 20%/jour max\n2. **SPF+DKIM+DMARC** — Les 3 doivent être alignés et valides\n3. **Headers propres** — Pas de X-Mailer, pas de headers suspects\n4. **Engagement** — Envoie d'abord aux contacts engagés (opens/clicks récents)\n5. **Contenu** — Ratio texte/image > 60/40, pas de tracking pixels excessifs\n\n```bash\n# Vérifier la config\ndig TXT _dmarc.tondomaine.com\ndig TXT tondomaine.com | grep spf\n```\n\n🎯 Cible: >95% inbox sur Gmail avec ces pratiques."},
    {"instruction": "Écris un script bash pour monitorer PMTA", "input": "",
     "output": "💻 Script monitoring PMTA:\n\n```bash\n#!/bin/bash\nset -euo pipefail\n\n# Check PMTA status\nif ! pgrep -x pmtad > /dev/null; then\n    echo '❌ PMTA DOWN — restarting...'\n    systemctl restart pmta\n    sleep 5\nfi\n\n# Queue check\nQUEUE=$(pmta show queue 2>/dev/null | grep -c 'active' || echo 0)\nif [ \"$QUEUE\" -gt 500 ]; then\n    echo \"⚠️ Queue haute: $QUEUE — investigation...\"\n    pmta show queue | head -20\nfi\n\n# Bounce rate\nBOUNCES=$(pmta show counters | grep 'softBounce\\|hardBounce' | awk '{sum+=$2}END{print sum}')\necho \"📊 Queue: $QUEUE | Bounces: $BOUNCES\"\n```\n\n🔧 Ajoute en cron: `*/5 * * * *` pour monitoring continu."},
]
data.extend(personality)
conn.close()

os.makedirs(os.path.dirname(OUT), exist_ok=True)
with open(OUT, 'w', encoding='utf-8') as f:
    json.dump(data, f, ensure_ascii=False, indent=2)

print(f"\n✅ Exported {len(data)} training examples to {OUT}")
PYEOF

    python3 "$WORK_DIR/data/export_training.py"
    log "✅ Data export complete"
}

# ═══════════════════════════════════════════════════════════════
# STEP 2: INSTALL DEPENDENCIES
# ═══════════════════════════════════════════════════════════════
install_deps() {
    log "📦 Installing dependencies..."
    pip install --break-system-packages -q \
        "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git" \
        datasets transformers trl peft accelerate bitsandbytes \
        2>/dev/null || {
            warn "Unsloth failed, using standard PEFT..."
            pip install --break-system-packages -q \
                transformers datasets trl peft accelerate bitsandbytes torch
        }
    log "✅ Dependencies installed"
}

# ═══════════════════════════════════════════════════════════════
# STEP 3: FINE-TUNE WITH QLoRA
# ═══════════════════════════════════════════════════════════════
train_model() {
    log "🧠 Fine-tuning Llama3.1-8b on RTX 4000 Ada 20GB..."

    cat > "$WORK_DIR/train.py" << 'PYEOF'
import json, os, torch

WORK = "/opt/wevia-finetune"
DATA = f"{WORK}/data/training_data.json"
OUT = f"{WORK}/model-out"
os.makedirs(OUT, exist_ok=True)

with open(DATA) as f:
    raw = json.load(f)
print(f"📊 {len(raw)} training examples")

try:
    from unsloth import FastLanguageModel
    model, tok = FastLanguageModel.from_pretrained(
        "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
        max_seq_length=2048, dtype=None, load_in_4bit=True)
    model = FastLanguageModel.get_peft_model(model,
        r=16, target_modules=["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"],
        lora_alpha=16, lora_dropout=0, bias="none",
        use_gradient_checkpointing="unsloth", random_state=42)
    UNSLOTH = True
    print("✅ Unsloth loaded")
except ImportError:
    from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
    from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
    bnb = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4",
                             bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True)
    model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instruct",
                                                  quantization_config=bnb, device_map="auto")
    tok = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instruct")
    tok.pad_token = tok.eos_token
    model = prepare_model_for_kbit_training(model)
    model = get_peft_model(model, LoraConfig(r=16, lora_alpha=16, lora_dropout=0.05,
        target_modules=["q_proj","k_proj","v_proj","o_proj"], bias="none", task_type="CAUSAL_LM"))
    UNSLOTH = False
    print("✅ Standard PEFT loaded")

from datasets import Dataset
from trl import SFTTrainer
from transformers import TrainingArguments

tmpl = "### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n{output}"
ds = Dataset.from_list(raw).map(lambda x: {"text": tmpl.format(**x)})

trainer = SFTTrainer(
    model=model, tokenizer=tok, train_dataset=ds,
    args=TrainingArguments(
        output_dir=OUT, num_train_epochs=3, per_device_train_batch_size=2,
        gradient_accumulation_steps=4, learning_rate=2e-4, weight_decay=0.01,
        warmup_steps=50, logging_steps=10, save_steps=100, save_total_limit=2,
        fp16=True, optim="adamw_8bit", lr_scheduler_type="cosine",
        seed=42, report_to="none"),
    dataset_text_field="text", max_seq_length=2048,
    packing=UNSLOTH)

print("\n🚀 Training: 3 epochs | batch 2 | grad_accum 4 | lr 2e-4")
trainer.train()

model.save_pretrained(f"{OUT}/lora-adapter")
tok.save_pretrained(f"{OUT}/lora-adapter")

if UNSLOTH:
    model.save_pretrained_merged(f"{OUT}/merged", tok, save_method="merged_16bit")
else:
    merged = model.merge_and_unload()
    merged.save_pretrained(f"{OUT}/merged")
    tok.save_pretrained(f"{OUT}/merged")

print(f"\n✅ Model saved to {OUT}/merged")
PYEOF

    python3 "$WORK_DIR/train.py"
    log "✅ Fine-tuning complete"
}

# ═══════════════════════════════════════════════════════════════
# STEP 4: CONVERT TO GGUF
# ═══════════════════════════════════════════════════════════════
convert_gguf() {
    log "🔄 Converting to GGUF Q4_K_M..."

    if [ ! -d "$WORK_DIR/llama.cpp" ]; then
        cd "$WORK_DIR"
        git clone --depth 1 https://github.com/ggerganov/llama.cpp.git
        pip install --break-system-packages -q -r llama.cpp/requirements.txt 2>/dev/null
    fi

    python3 "$WORK_DIR/llama.cpp/convert_hf_to_gguf.py" \
        "$WORK_DIR/model-out/merged" \
        --outfile "$WORK_DIR/wevia-sovereign.gguf" \
        --outtype q4_k_m

    log "✅ GGUF: $(ls -lh $WORK_DIR/wevia-sovereign.gguf | awk '{print $5}')"
}

# ═══════════════════════════════════════════════════════════════
# STEP 5: DEPLOY TO OLLAMA
# ═══════════════════════════════════════════════════════════════
deploy_ollama() {
    log "🚀 Deploying wevia-sovereign to Ollama..."

    cat > "$WORK_DIR/Modelfile" << 'EOF'
FROM /opt/wevia-finetune/wevia-sovereign.gguf

PARAMETER temperature 0.7
PARAMETER top_p 0.9
PARAMETER num_ctx 4096
PARAMETER stop "### Instruction:"
PARAMETER stop "### Input:"

SYSTEM """Tu es HAMID NEXUS — l'IA souveraine de WEVAL Consulting, Casablanca, Maroc.
Version: SINGULARITY 2026 | 1418+ modules | 7 serveurs | Fine-tuné sur données WEVAL

RÈGLES:
1. Pense avant de parler — décompose, vérifie, actionne
2. Sois concret — commandes, code, chiffres
3. Code = excellence — try/catch, validation, prepared statements
4. Emojis naturels — 🧠⚡🚀✅❌💡📧🔧
5. Expert: email marketing, PowerMTA, PostgreSQL, SAP, cloud, IA"""
EOF

    ollama create wevia-sovereign -f "$WORK_DIR/Modelfile"

    log "Testing..."
    ollama run wevia-sovereign "Qui es-tu? 2 phrases avec emoji." 2>&1 | head -5

    log "✅ wevia-sovereign LIVE on Ollama"
}

# ═══════════════════════════════════════════════════════════════
main() {
    local cmd="${1:-all}"
    log "════════════════════════════════════════"
    log "  WEVIA SOVEREIGN FINE-TUNING PIPELINE"
    log "  GPU: RTX 4000 Ada 20GB"
    log "════════════════════════════════════════"
    mkdir -p "$WORK_DIR/logs"

    case "$cmd" in
        export)  export_data ;;
        deps)    install_deps ;;
        train)   train_model ;;
        convert) convert_gguf ;;
        deploy)  deploy_ollama ;;
        all)     export_data; install_deps; train_model; convert_gguf; deploy_ollama
                 log "🏆 PIPELINE COMPLETE — wevia-sovereign is LIVE!" ;;
        *)       echo "Usage: $0 [export|deps|train|convert|deploy|all]"; exit 1 ;;
    esac
}

main "$@" 2>&1 | tee "$WORK_DIR/logs/finetune-$(date +%Y%m%d-%H%M).log"