Compare commits
1 Commits
merge/miss
...
cursor/sta
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c2dcc941b8 |
6
.gitignore
vendored
Normal file
6
.gitignore
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
# Generated execution artifacts
|
||||
reports/multiinstall_preflight_20*.csv
|
||||
|
||||
# Local temp files
|
||||
*.tmp
|
||||
*.swp
|
||||
28
README.md
28
README.md
@@ -1,7 +1,31 @@
|
||||
# WEVADS GPU Server
|
||||
- **IP**: 88.198.4.195
|
||||
- **IP**: managed outside this repository
|
||||
- **GPU**: NVIDIA RTX 4000 SFF Ada (20GB vRAM)
|
||||
- **RAM**: 62GB DDR4
|
||||
- **Disk**: 1.7TB NVMe
|
||||
- **Ollama**: localhost:11434
|
||||
- **Models**: deepseek-r1:8b, deepseek-r1:32b, llama3.1:8b
|
||||
- **Legacy local models**: deepseek-r1:8b, deepseek-r1:32b, llama3.1:8b
|
||||
|
||||
## Multi-install safe preflight
|
||||
|
||||
This repository now includes a lightweight preflight to avoid launching blocked or
|
||||
fragile multi-install batches. The script does not modify PMTA, SSH global
|
||||
configuration, or Java/JAR files. It only checks whether a server is ready before
|
||||
you include it in a batch.
|
||||
|
||||
### Included files
|
||||
|
||||
- `multiinstall-safe-preflight.sh`: validates SSH reachability/auth, free disk,
|
||||
RAM, dpkg locks, and apt health
|
||||
- `servers.example.csv`: sample input format for batch candidates
|
||||
- `reports/README.md`: explains generated readiness reports
|
||||
|
||||
### Usage
|
||||
|
||||
```bash
|
||||
chmod +x multiinstall-safe-preflight.sh
|
||||
./multiinstall-safe-preflight.sh servers.example.csv
|
||||
```
|
||||
|
||||
The script writes a timestamped CSV report into `reports/` and prints the subset
|
||||
of servers marked `ready=YES`. Launch the multi-install only with those servers.
|
||||
|
||||
162
multiinstall-safe-preflight.sh
Executable file
162
multiinstall-safe-preflight.sh
Executable file
@@ -0,0 +1,162 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# -------------------------------------------------------------------
|
||||
# Multi-install SAFE preflight
|
||||
# Goal: reduce failed batches without touching PMTA/SSH/global config.
|
||||
#
|
||||
# Input file format (CSV-like, no header):
|
||||
# server_id,ip,username,password
|
||||
# Example:
|
||||
# 180,101.46.69.207,root,Yacine.123
|
||||
# -------------------------------------------------------------------
|
||||
|
||||
INPUT_FILE="${1:-}"
|
||||
CONNECT_TIMEOUT="${CONNECT_TIMEOUT:-5}"
|
||||
SSH_BIN="${SSH_BIN:-ssh}"
|
||||
SSHPASS_BIN="${SSHPASS_BIN:-sshpass}"
|
||||
OUT_DIR="${OUT_DIR:-./reports}"
|
||||
RUN_ID="$(date +%Y%m%d_%H%M%S)"
|
||||
OUT_CSV="${OUT_DIR}/multiinstall_preflight_${RUN_ID}.csv"
|
||||
|
||||
if [[ -z "${INPUT_FILE}" || ! -f "${INPUT_FILE}" ]]; then
|
||||
echo "Usage: $0 <servers.csv>"
|
||||
echo "Missing input file: ${INPUT_FILE:-<empty>}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "${OUT_DIR}"
|
||||
echo "server_id,ip,ssh_tcp,ssh_auth,disk_ok,ram_ok,dpkg_lock,apt_health,ready,notes" > "${OUT_CSV}"
|
||||
|
||||
check_tcp_22() {
|
||||
local ip="$1"
|
||||
timeout "${CONNECT_TIMEOUT}" bash -c "exec 3<>/dev/tcp/${ip}/22" >/dev/null 2>&1
|
||||
}
|
||||
|
||||
run_ssh_password() {
|
||||
local user="$1" ip="$2" pass="$3" cmd="$4"
|
||||
"${SSHPASS_BIN}" -p "${pass}" "${SSH_BIN}" \
|
||||
-o StrictHostKeyChecking=no \
|
||||
-o UserKnownHostsFile=/dev/null \
|
||||
-o ConnectTimeout="${CONNECT_TIMEOUT}" \
|
||||
"${user}@${ip}" "${cmd}"
|
||||
}
|
||||
|
||||
run_ssh_key() {
|
||||
local user="$1" ip="$2" cmd="$3"
|
||||
"${SSH_BIN}" \
|
||||
-o StrictHostKeyChecking=no \
|
||||
-o UserKnownHostsFile=/dev/null \
|
||||
-o ConnectTimeout="${CONNECT_TIMEOUT}" \
|
||||
"${user}@${ip}" "${cmd}"
|
||||
}
|
||||
|
||||
HAVE_SSHPASS=0
|
||||
if command -v "${SSHPASS_BIN}" >/dev/null 2>&1; then
|
||||
HAVE_SSHPASS=1
|
||||
fi
|
||||
|
||||
ROW_NUM=0
|
||||
while IFS=',' read -r c1 c2 c3 c4; do
|
||||
ROW_NUM=$((ROW_NUM + 1))
|
||||
[[ -z "${c1}" ]] && continue
|
||||
[[ "${c1}" =~ ^# ]] && continue
|
||||
|
||||
# Skip common header rows.
|
||||
if [[ "${c1}" == "server_id" && "${c2}" == "ip" ]]; then
|
||||
continue
|
||||
fi
|
||||
if [[ "${c1}" == "ip" && "${c2}" == "username" ]]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
# Accept both formats:
|
||||
# 1) server_id,ip,username,password
|
||||
# 2) ip,username,password
|
||||
if [[ "${c1}" =~ ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$ && -n "${c2}" && -n "${c3}" ]]; then
|
||||
server_id="row_${ROW_NUM}"
|
||||
ip="${c1}"
|
||||
username="${c2}"
|
||||
password="${c3}"
|
||||
else
|
||||
server_id="${c1}"
|
||||
ip="${c2}"
|
||||
username="${c3}"
|
||||
password="${c4}"
|
||||
fi
|
||||
|
||||
ssh_tcp="FAIL"
|
||||
ssh_auth="FAIL"
|
||||
disk_ok="FAIL"
|
||||
ram_ok="FAIL"
|
||||
dpkg_lock="UNKNOWN"
|
||||
apt_health="UNKNOWN"
|
||||
ready="NO"
|
||||
notes=""
|
||||
|
||||
if check_tcp_22 "${ip}"; then
|
||||
ssh_tcp="PASS"
|
||||
else
|
||||
notes="port22_unreachable"
|
||||
echo "${server_id},${ip},${ssh_tcp},${ssh_auth},${disk_ok},${ram_ok},${dpkg_lock},${apt_health},${ready},${notes}" >> "${OUT_CSV}"
|
||||
continue
|
||||
fi
|
||||
|
||||
if [[ "${HAVE_SSHPASS}" == "1" ]]; then
|
||||
SSH_RUN=(run_ssh_password "${username}" "${ip}" "${password}")
|
||||
else
|
||||
SSH_RUN=(run_ssh_key "${username}" "${ip}")
|
||||
notes="${notes:+${notes}|}sshpass_missing_using_key_auth"
|
||||
fi
|
||||
|
||||
if "${SSH_RUN[@]}" "echo ok" >/dev/null 2>&1; then
|
||||
ssh_auth="PASS"
|
||||
else
|
||||
notes="ssh_auth_failed"
|
||||
echo "${server_id},${ip},${ssh_tcp},${ssh_auth},${disk_ok},${ram_ok},${dpkg_lock},${apt_health},${ready},${notes}" >> "${OUT_CSV}"
|
||||
continue
|
||||
fi
|
||||
|
||||
# Disk check: >= 8GB free on /
|
||||
if "${SSH_RUN[@]}" \
|
||||
"avail=\$(df -BG / | awk 'NR==2 {gsub(\"G\",\"\",\$4); print \$4}'); [ \"\${avail:-0}\" -ge 8 ]"; then
|
||||
disk_ok="PASS"
|
||||
else
|
||||
notes="${notes:+${notes}|}low_disk"
|
||||
fi
|
||||
|
||||
# RAM check: >= 2GB
|
||||
if "${SSH_RUN[@]}" \
|
||||
"mem=\$(awk '/MemTotal/ {print int(\$2/1024/1024)}' /proc/meminfo); [ \"\${mem:-0}\" -ge 2 ]"; then
|
||||
ram_ok="PASS"
|
||||
else
|
||||
notes="${notes:+${notes}|}low_ram"
|
||||
fi
|
||||
|
||||
# dpkg/apt lock check
|
||||
if "${SSH_RUN[@]}" \
|
||||
"if fuser /var/lib/dpkg/lock >/dev/null 2>&1 || fuser /var/lib/dpkg/lock-frontend >/dev/null 2>&1; then exit 1; else exit 0; fi"; then
|
||||
dpkg_lock="PASS"
|
||||
else
|
||||
dpkg_lock="FAIL"
|
||||
notes="${notes:+${notes}|}dpkg_lock_detected"
|
||||
fi
|
||||
|
||||
# apt health check (read-only)
|
||||
if "${SSH_RUN[@]}" "apt-cache policy >/dev/null 2>&1"; then
|
||||
apt_health="PASS"
|
||||
else
|
||||
apt_health="FAIL"
|
||||
notes="${notes:+${notes}|}apt_health_failed"
|
||||
fi
|
||||
|
||||
if [[ "${ssh_tcp}" == "PASS" && "${ssh_auth}" == "PASS" && "${disk_ok}" == "PASS" && "${ram_ok}" == "PASS" && "${dpkg_lock}" == "PASS" && "${apt_health}" == "PASS" ]]; then
|
||||
ready="YES"
|
||||
fi
|
||||
|
||||
echo "${server_id},${ip},${ssh_tcp},${ssh_auth},${disk_ok},${ram_ok},${dpkg_lock},${apt_health},${ready},${notes}" >> "${OUT_CSV}"
|
||||
done < "${INPUT_FILE}"
|
||||
|
||||
echo "Preflight report generated: ${OUT_CSV}"
|
||||
echo "Ready servers:"
|
||||
awk -F',' 'NR>1 && $9=="YES" {print " - " $1 " (" $2 ")"}' "${OUT_CSV}"
|
||||
10
reports/README.md
Normal file
10
reports/README.md
Normal file
@@ -0,0 +1,10 @@
|
||||
# Reports output
|
||||
|
||||
This folder stores generated artifacts from:
|
||||
|
||||
- `multiinstall-safe-preflight.sh`
|
||||
|
||||
Examples:
|
||||
|
||||
- `multiinstall_preflight_*.csv`: server readiness reports generated before
|
||||
multi-install batches
|
||||
4
servers.example.csv
Normal file
4
servers.example.csv
Normal file
@@ -0,0 +1,4 @@
|
||||
# server_id,ip,username,password
|
||||
180,101.46.69.207,root,CHANGE_ME
|
||||
181,101.46.69.121,root,CHANGE_ME
|
||||
182,101.46.65.209,root,CHANGE_ME
|
||||
|
Reference in New Issue
Block a user