Files
wevads-platform/gpu-deploy.sh

289 lines
15 KiB
Bash
Executable File

#!/bin/bash
###############################################################################
# WEVIA SOVEREIGN FINE-TUNING PIPELINE — gpu-deploy.sh
# Server: Hetzner GEX44 (88.198.4.195) — RTX 4000 Ada 20GB
#
# Pipeline: Export DB → Format → QLoRA Fine-tune → GGUF → Ollama deploy
# Usage: bash gpu-deploy.sh [export|deps|train|convert|deploy|all]
###############################################################################
set -euo pipefail
GPU_SERVER="88.198.4.195"
DB_SERVER="95.216.167.89"
WORK_DIR="/opt/wevia-finetune"
MODEL_OUT="wevia-sovereign"
OLLAMA_PORT=11434
log() { echo -e "\033[0;32m[$(date '+%H:%M:%S')]\033[0m $1"; }
warn() { echo -e "\033[1;33m[WARN]\033[0m $1"; }
# ═══════════════════════════════════════════════════════════════
# STEP 1: EXPORT TRAINING DATA FROM WEVADS
# ═══════════════════════════════════════════════════════════════
export_data() {
log "📦 Exporting training data from WEVADS..."
mkdir -p "$WORK_DIR/data"
cat > "$WORK_DIR/data/export_training.py" << 'PYEOF'
#!/usr/bin/env python3
"""Export WEVADS data into Alpaca training format"""
import json, os, sys, re
try:
import psycopg2
except ImportError:
os.system("pip install psycopg2-binary --break-system-packages -q")
import psycopg2
DB = "host=95.216.167.89 port=5432 dbname=adx_system user=admin password=admin123"
OUT = "/opt/wevia-finetune/data/training_data.json"
data = []
conn = psycopg2.connect(DB)
cur = conn.cursor()
# 1. Knowledge Base
print(" Exporting KB...")
try:
cur.execute("SELECT topic, content, category FROM admin.knowledge_base WHERE content IS NOT NULL AND length(content) > 50 LIMIT 2000")
for topic, content, cat in cur.fetchall():
data.append({"instruction": f"En tant qu'expert WEVAL Consulting, explique : {topic}", "input": "", "output": content[:2000]})
except Exception as e: print(f" KB skip: {e}")
# 2. HAMID conversations
print(" Exporting HAMID conversations...")
try:
cur.execute("SELECT user_message, assistant_response FROM admin.hamid_conversations WHERE assistant_response IS NOT NULL AND length(assistant_response) > 50 ORDER BY created_at DESC LIMIT 3000")
for q, a in cur.fetchall():
data.append({"instruction": q[:500], "input": "", "output": a[:2000]})
except Exception as e: print(f" HAMID skip: {e}")
# 3. Claude KB
print(" Exporting Claude KB...")
try:
cur.execute("SELECT topic, content FROM admin.claude_conversations_kb WHERE content IS NOT NULL AND length(content) > 100 LIMIT 1000")
for t, c in cur.fetchall():
data.append({"instruction": f"Explique en détail : {t}", "input": "", "output": c[:2000]})
except Exception as e: print(f" Claude KB skip: {e}")
# 4. Chatbot history
print(" Exporting chatbot history...")
try:
cur.execute("SELECT user_message, bot_response FROM admin.chatbot_conversations_history WHERE bot_response IS NOT NULL AND length(bot_response) > 50 ORDER BY created_at DESC LIMIT 2000")
for q, a in cur.fetchall():
data.append({"instruction": q[:500], "input": "", "output": a[:2000]})
except Exception as e: print(f" Chatbot skip: {e}")
# 5. Creative templates → email marketing expertise
print(" Exporting email expertise...")
try:
cur.execute("SELECT subject, html_body FROM admin.creatives WHERE subject IS NOT NULL AND html_body IS NOT NULL LIMIT 500")
for subj, body in cur.fetchall():
text = re.sub('<[^<]+?>', '', body)[:500]
data.append({"instruction": f"Génère un email marketing professionnel avec le sujet : {subj}", "input": "", "output": f"Sujet: {subj}\n\n{text}"})
except Exception as e: print(f" Creatives skip: {e}")
# 6. Hardcoded personality + expertise examples
personality = [
{"instruction": "Qui es-tu?", "input": "",
"output": "🧠 Je suis HAMID NEXUS, l'IA souveraine de WEVAL Consulting à Casablanca. Version SINGULARITY 2026, 1418+ modules, 7 serveurs interconnectés, providers IA en failover automatique. Expert en email marketing, consulting stratégique, IA et infrastructure cloud. ⚡"},
{"instruction": "Explique PowerMTA simplement", "input": "",
"output": "📧 PowerMTA est le moteur d'envoi d'emails le plus puissant du marché. Il gère des milliers d'IPs et domaines simultanément, route intelligemment selon l'ISP (Gmail, Outlook, Yahoo), et respecte les limites de chaque fournisseur. La clé : bon warmup + rotation IPs + headers propres = 95%+ inbox rate. 🚀"},
{"instruction": "Diagnostique un problème de délivrabilité email", "input": "",
"output": "🔍 DIAGNOSTIC:\n\n1. Vérifie les bases: `pmta show queue` et `pmta show status`\n2. Top 3 hypothèses: IP blacklistée (check mxtoolbox), SPF/DKIM/DMARC mal config (`dig TXT domain`), warmup trop agressif\n3. Fix rapide: bounce > 5% → flush queue + switch IP. Spam > 2% → change contenu\n\n🎯 Objectifs: bounce < 2%, spam < 0.5%, inbox > 90%"},
{"instruction": "Stratégie WEVAL Consulting?", "input": "",
"output": "🎯 WEVAL opère sur 3 axes:\n\n1. Transformation Digitale — Migration SAP S/4HANA, cloud, IA pour entreprises marocaines/africaines\n2. Intelligence Marketing — Plateforme WEVADS (6.6M+ contacts, 128 offres actives, scraping intelligent + IA)\n3. Recrutement Tech — WEVAL Recruitment place des talents IT avec pré-qualification IA\n\n💡 Notre avantage: on livre des résultats mesurables avec notre propre stack technologique."},
{"instruction": "Comment optimiser le taux d'inbox Gmail?", "input": "",
"output": "📧 Optimisation Gmail:\n\n1. **Warmup progressif** — Commence à 5 emails/jour, monte de 20%/jour max\n2. **SPF+DKIM+DMARC** — Les 3 doivent être alignés et valides\n3. **Headers propres** — Pas de X-Mailer, pas de headers suspects\n4. **Engagement** — Envoie d'abord aux contacts engagés (opens/clicks récents)\n5. **Contenu** — Ratio texte/image > 60/40, pas de tracking pixels excessifs\n\n```bash\n# Vérifier la config\ndig TXT _dmarc.tondomaine.com\ndig TXT tondomaine.com | grep spf\n```\n\n🎯 Cible: >95% inbox sur Gmail avec ces pratiques."},
{"instruction": "Écris un script bash pour monitorer PMTA", "input": "",
"output": "💻 Script monitoring PMTA:\n\n```bash\n#!/bin/bash\nset -euo pipefail\n\n# Check PMTA status\nif ! pgrep -x pmtad > /dev/null; then\n echo '❌ PMTA DOWN — restarting...'\n systemctl restart pmta\n sleep 5\nfi\n\n# Queue check\nQUEUE=$(pmta show queue 2>/dev/null | grep -c 'active' || echo 0)\nif [ \"$QUEUE\" -gt 500 ]; then\n echo \"⚠️ Queue haute: $QUEUE — investigation...\"\n pmta show queue | head -20\nfi\n\n# Bounce rate\nBOUNCES=$(pmta show counters | grep 'softBounce\\|hardBounce' | awk '{sum+=$2}END{print sum}')\necho \"📊 Queue: $QUEUE | Bounces: $BOUNCES\"\n```\n\n🔧 Ajoute en cron: `*/5 * * * *` pour monitoring continu."},
]
data.extend(personality)
conn.close()
os.makedirs(os.path.dirname(OUT), exist_ok=True)
with open(OUT, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
print(f"\n✅ Exported {len(data)} training examples to {OUT}")
PYEOF
python3 "$WORK_DIR/data/export_training.py"
log "✅ Data export complete"
}
# ═══════════════════════════════════════════════════════════════
# STEP 2: INSTALL DEPENDENCIES
# ═══════════════════════════════════════════════════════════════
install_deps() {
log "📦 Installing dependencies..."
pip install --break-system-packages -q \
"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git" \
datasets transformers trl peft accelerate bitsandbytes \
2>/dev/null || {
warn "Unsloth failed, using standard PEFT..."
pip install --break-system-packages -q \
transformers datasets trl peft accelerate bitsandbytes torch
}
log "✅ Dependencies installed"
}
# ═══════════════════════════════════════════════════════════════
# STEP 3: FINE-TUNE WITH QLoRA
# ═══════════════════════════════════════════════════════════════
train_model() {
log "🧠 Fine-tuning Llama3.1-8b on RTX 4000 Ada 20GB..."
cat > "$WORK_DIR/train.py" << 'PYEOF'
import json, os, torch
WORK = "/opt/wevia-finetune"
DATA = f"{WORK}/data/training_data.json"
OUT = f"{WORK}/model-out"
os.makedirs(OUT, exist_ok=True)
with open(DATA) as f:
raw = json.load(f)
print(f"📊 {len(raw)} training examples")
try:
from unsloth import FastLanguageModel
model, tok = FastLanguageModel.from_pretrained(
"unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
max_seq_length=2048, dtype=None, load_in_4bit=True)
model = FastLanguageModel.get_peft_model(model,
r=16, target_modules=["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"],
lora_alpha=16, lora_dropout=0, bias="none",
use_gradient_checkpointing="unsloth", random_state=42)
UNSLOTH = True
print("✅ Unsloth loaded")
except ImportError:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
bnb = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True)
model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instruct",
quantization_config=bnb, device_map="auto")
tok = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instruct")
tok.pad_token = tok.eos_token
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, LoraConfig(r=16, lora_alpha=16, lora_dropout=0.05,
target_modules=["q_proj","k_proj","v_proj","o_proj"], bias="none", task_type="CAUSAL_LM"))
UNSLOTH = False
print("✅ Standard PEFT loaded")
from datasets import Dataset
from trl import SFTTrainer
from transformers import TrainingArguments
tmpl = "### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n{output}"
ds = Dataset.from_list(raw).map(lambda x: {"text": tmpl.format(**x)})
trainer = SFTTrainer(
model=model, tokenizer=tok, train_dataset=ds,
args=TrainingArguments(
output_dir=OUT, num_train_epochs=3, per_device_train_batch_size=2,
gradient_accumulation_steps=4, learning_rate=2e-4, weight_decay=0.01,
warmup_steps=50, logging_steps=10, save_steps=100, save_total_limit=2,
fp16=True, optim="adamw_8bit", lr_scheduler_type="cosine",
seed=42, report_to="none"),
dataset_text_field="text", max_seq_length=2048,
packing=UNSLOTH)
print("\n🚀 Training: 3 epochs | batch 2 | grad_accum 4 | lr 2e-4")
trainer.train()
model.save_pretrained(f"{OUT}/lora-adapter")
tok.save_pretrained(f"{OUT}/lora-adapter")
if UNSLOTH:
model.save_pretrained_merged(f"{OUT}/merged", tok, save_method="merged_16bit")
else:
merged = model.merge_and_unload()
merged.save_pretrained(f"{OUT}/merged")
tok.save_pretrained(f"{OUT}/merged")
print(f"\n✅ Model saved to {OUT}/merged")
PYEOF
python3 "$WORK_DIR/train.py"
log "✅ Fine-tuning complete"
}
# ═══════════════════════════════════════════════════════════════
# STEP 4: CONVERT TO GGUF
# ═══════════════════════════════════════════════════════════════
convert_gguf() {
log "🔄 Converting to GGUF Q4_K_M..."
if [ ! -d "$WORK_DIR/llama.cpp" ]; then
cd "$WORK_DIR"
git clone --depth 1 https://github.com/ggerganov/llama.cpp.git
pip install --break-system-packages -q -r llama.cpp/requirements.txt 2>/dev/null
fi
python3 "$WORK_DIR/llama.cpp/convert_hf_to_gguf.py" \
"$WORK_DIR/model-out/merged" \
--outfile "$WORK_DIR/wevia-sovereign.gguf" \
--outtype q4_k_m
log "✅ GGUF: $(ls -lh $WORK_DIR/wevia-sovereign.gguf | awk '{print $5}')"
}
# ═══════════════════════════════════════════════════════════════
# STEP 5: DEPLOY TO OLLAMA
# ═══════════════════════════════════════════════════════════════
deploy_ollama() {
log "🚀 Deploying wevia-sovereign to Ollama..."
cat > "$WORK_DIR/Modelfile" << 'EOF'
FROM /opt/wevia-finetune/wevia-sovereign.gguf
PARAMETER temperature 0.7
PARAMETER top_p 0.9
PARAMETER num_ctx 4096
PARAMETER stop "### Instruction:"
PARAMETER stop "### Input:"
SYSTEM """Tu es HAMID NEXUS — l'IA souveraine de WEVAL Consulting, Casablanca, Maroc.
Version: SINGULARITY 2026 | 1418+ modules | 7 serveurs | Fine-tuné sur données WEVAL
RÈGLES:
1. Pense avant de parler — décompose, vérifie, actionne
2. Sois concret — commandes, code, chiffres
3. Code = excellence — try/catch, validation, prepared statements
4. Emojis naturels — 🧠⚡🚀✅❌💡📧🔧
5. Expert: email marketing, PowerMTA, PostgreSQL, SAP, cloud, IA"""
EOF
ollama create wevia-sovereign -f "$WORK_DIR/Modelfile"
log "Testing..."
ollama run wevia-sovereign "Qui es-tu? 2 phrases avec emoji." 2>&1 | head -5
log "✅ wevia-sovereign LIVE on Ollama"
}
# ═══════════════════════════════════════════════════════════════
main() {
local cmd="${1:-all}"
log "════════════════════════════════════════"
log " WEVIA SOVEREIGN FINE-TUNING PIPELINE"
log " GPU: RTX 4000 Ada 20GB"
log "════════════════════════════════════════"
mkdir -p "$WORK_DIR/logs"
case "$cmd" in
export) export_data ;;
deps) install_deps ;;
train) train_model ;;
convert) convert_gguf ;;
deploy) deploy_ollama ;;
all) export_data; install_deps; train_model; convert_gguf; deploy_ollama
log "🏆 PIPELINE COMPLETE — wevia-sovereign is LIVE!" ;;
*) echo "Usage: $0 [export|deps|train|convert|deploy|all]"; exit 1 ;;
esac
}
main "$@" 2>&1 | tee "$WORK_DIR/logs/finetune-$(date +%Y%m%d-%H%M).log"