feat: add optional health check auto-restart for crashed model sessions (#7855)

* feat: add optional health check auto-restart for crashed model sessions

* fix: update

* fix: update

* fix: lint

* fix: lint

* fix: tauri

* fix: build

* fix: update

* fix: update
This commit is contained in:
Clayton
2026-03-31 02:04:23 -05:00
committed by GitHub
parent 813260a365
commit 1c3a03557c
13 changed files with 300 additions and 33 deletions

View File

@@ -36,6 +36,13 @@
"controllerType": "checkbox",
"controllerProps": { "value": true }
},
{
"key": "auto_restart_on_crash",
"title": "Auto-Restart Crashed Models",
"description": "Automatically reloads a model if its llama-server process crashes.",
"controllerType": "checkbox",
"controllerProps": { "value": false }
},
{
"key": "timeout",
"title": "Timeout for llamacpp",

View File

@@ -1857,28 +1857,17 @@ export default class llamacpp_extension extends AIEngine {
}
}
private async ensureHealthySession(modelId: string): Promise<SessionInfo> {
return invoke<SessionInfo>('plugin:llamacpp|ensure_session_ready', {
modelId,
})
}
override async chat(
opts: chatCompletionRequest,
abortController?: AbortController
): Promise<chatCompletion | AsyncIterable<chatCompletionChunk>> {
const sessionInfo = await this.findSessionByModel(opts.model)
if (!sessionInfo) {
throw new Error(`No active session found for model: ${opts.model}`)
}
// check if the process is alive
const result = await invoke<boolean>('plugin:llamacpp|is_process_running', {
pid: sessionInfo.pid,
})
if (result) {
try {
await fetch(`http://localhost:${sessionInfo.port}/health`)
} catch (e) {
this.unload(sessionInfo.model_id)
throw new Error('Model appears to have crashed! Please reload!')
}
} else {
throw new Error('Model have crashed! Please reload!')
}
const sessionInfo = await this.ensureHealthySession(opts.model)
const baseUrl = `http://localhost:${sessionInfo.port}/v1`
const url = `${baseUrl}/chat/completions`
const headers = {
@@ -2260,21 +2249,25 @@ export default class llamacpp_extension extends AIEngine {
throw new Error(`No active session found for model: ${opts.model}`)
}
// Check if the process is alive
const result = await invoke<boolean>('plugin:llamacpp|is_process_running', {
// Token counting should be side-effect free (no auto-restart/unload).
const isRunning = await invoke<boolean>('plugin:llamacpp|is_process_running', {
pid: sessionInfo.pid,
})
if (result) {
try {
await fetch(`http://localhost:${sessionInfo.port}/health`)
} catch (e) {
this.unload(sessionInfo.model_id)
throw new Error('Model appears to have crashed! Please reload!')
}
} else {
if (!isRunning) {
throw new Error('Model has crashed! Please reload!')
}
try {
const healthResponse = await fetch(
`http://localhost:${sessionInfo.port}/health`
)
if (!healthResponse.ok) {
throw new Error('unhealthy')
}
} catch (_e) {
throw new Error('Model appears to have crashed! Please reload!')
}
const baseUrl = `http://localhost:${sessionInfo.port}`
const headers = {
'Content-Type': 'application/json',

View File

@@ -7,6 +7,7 @@ const COMMANDS: &[&str] = &[
"get_devices",
"generate_api_key",
"is_process_running",
"ensure_session_ready",
"get_random_port",
"find_session_by_model",
"get_loaded_models",

View File

@@ -36,6 +36,7 @@ export function normalizeLlamacppConfig(config: any): LlamacppConfig {
version_backend: asString(config.version_backend),
auto_update_engine: asBool(config.auto_update_engine),
auto_unload: asBool(config.auto_unload),
auto_restart_on_crash: asBool(config.auto_restart_on_crash),
timeout: asNumber(config.timeout, 600),
llamacpp_env: asString(config.llamacpp_env),

View File

@@ -31,6 +31,7 @@ export type LlamacppConfig = {
version_backend: string
auto_update_engine: boolean
auto_unload: boolean
auto_restart_on_crash: boolean
timeout: number
llamacpp_env: string
fit: boolean

View File

@@ -10,6 +10,7 @@ Default permissions for the llamacpp plugin
- `allow-get-devices`
- `allow-generate-api-key`
- `allow-is-process-running`
- `allow-ensure-session-ready`
- `allow-get-random-port`
- `allow-find-session-by-model`
- `allow-get-loaded-models`
@@ -125,6 +126,32 @@ Denies the determine_supported_backends command without any pre-configured scope
<tr>
<td>
`llamacpp:allow-ensure-session-ready`
</td>
<td>
Enables the ensure_session_ready command without any pre-configured scope.
</td>
</tr>
<tr>
<td>
`llamacpp:deny-ensure-session-ready`
</td>
<td>
Denies the ensure_session_ready command without any pre-configured scope.
</td>
</tr>
<tr>
<td>
`llamacpp:allow-estimate-kv-cache-size`
</td>

View File

@@ -10,6 +10,7 @@ permissions = [
"allow-get-devices",
"allow-generate-api-key",
"allow-is-process-running",
"allow-ensure-session-ready",
"allow-get-random-port",
"allow-find-session-by-model",
"allow-get-loaded-models",

View File

@@ -330,6 +330,18 @@
"const": "deny-determine-supported-backends",
"markdownDescription": "Denies the determine_supported_backends command without any pre-configured scope."
},
{
"description": "Enables the ensure_session_ready command without any pre-configured scope.",
"type": "string",
"const": "allow-ensure-session-ready",
"markdownDescription": "Enables the ensure_session_ready command without any pre-configured scope."
},
{
"description": "Denies the ensure_session_ready command without any pre-configured scope.",
"type": "string",
"const": "deny-ensure-session-ready",
"markdownDescription": "Denies the ensure_session_ready command without any pre-configured scope."
},
{
"description": "Enables the estimate_kv_cache_size command without any pre-configured scope.",
"type": "string",
@@ -655,10 +667,10 @@
"markdownDescription": "Denies the validate_backend_string command without any pre-configured scope."
},
{
"description": "Default permissions for the llamacpp plugin\n#### This default permission set includes:\n\n- `allow-cleanup-llama-processes`\n- `allow-load-llama-model`\n- `allow-unload-llama-model`\n- `allow-get-devices`\n- `allow-generate-api-key`\n- `allow-is-process-running`\n- `allow-get-random-port`\n- `allow-find-session-by-model`\n- `allow-get-loaded-models`\n- `allow-get-all-sessions`\n- `allow-get-session-by-model`\n- `allow-read-gguf-metadata`\n- `allow-estimate-kv-cache-size`\n- `allow-get-model-size`\n- `allow-is-model-supported`\n- `allow-plan-model-load`\n- `allow-map-old-backend-to-new`\n- `allow-get-local-installed-backends`\n- `allow-list-supported-backends`\n- `allow-determine-supported-backends`\n- `allow-get-supported-features`\n- `allow-is-cuda-installed`\n- `allow-find-latest-version-for-backend`\n- `allow-prioritize-backends`\n- `allow-parse-backend-version`\n- `allow-check-backend-for-updates`\n- `allow-remove-old-backend-versions`\n- `allow-validate-backend-string`\n- `allow-should-migrate-backend`\n- `allow-handle-setting-update`",
"description": "Default permissions for the llamacpp plugin\n#### This default permission set includes:\n\n- `allow-cleanup-llama-processes`\n- `allow-load-llama-model`\n- `allow-unload-llama-model`\n- `allow-get-devices`\n- `allow-generate-api-key`\n- `allow-is-process-running`\n- `allow-ensure-session-ready`\n- `allow-get-random-port`\n- `allow-find-session-by-model`\n- `allow-get-loaded-models`\n- `allow-get-all-sessions`\n- `allow-get-session-by-model`\n- `allow-read-gguf-metadata`\n- `allow-estimate-kv-cache-size`\n- `allow-get-model-size`\n- `allow-is-model-supported`\n- `allow-plan-model-load`\n- `allow-map-old-backend-to-new`\n- `allow-get-local-installed-backends`\n- `allow-list-supported-backends`\n- `allow-determine-supported-backends`\n- `allow-get-supported-features`\n- `allow-is-cuda-installed`\n- `allow-find-latest-version-for-backend`\n- `allow-prioritize-backends`\n- `allow-parse-backend-version`\n- `allow-check-backend-for-updates`\n- `allow-remove-old-backend-versions`\n- `allow-validate-backend-string`\n- `allow-should-migrate-backend`\n- `allow-handle-setting-update`",
"type": "string",
"const": "default",
"markdownDescription": "Default permissions for the llamacpp plugin\n#### This default permission set includes:\n\n- `allow-cleanup-llama-processes`\n- `allow-load-llama-model`\n- `allow-unload-llama-model`\n- `allow-get-devices`\n- `allow-generate-api-key`\n- `allow-is-process-running`\n- `allow-get-random-port`\n- `allow-find-session-by-model`\n- `allow-get-loaded-models`\n- `allow-get-all-sessions`\n- `allow-get-session-by-model`\n- `allow-read-gguf-metadata`\n- `allow-estimate-kv-cache-size`\n- `allow-get-model-size`\n- `allow-is-model-supported`\n- `allow-plan-model-load`\n- `allow-map-old-backend-to-new`\n- `allow-get-local-installed-backends`\n- `allow-list-supported-backends`\n- `allow-determine-supported-backends`\n- `allow-get-supported-features`\n- `allow-is-cuda-installed`\n- `allow-find-latest-version-for-backend`\n- `allow-prioritize-backends`\n- `allow-parse-backend-version`\n- `allow-check-backend-for-updates`\n- `allow-remove-old-backend-versions`\n- `allow-validate-backend-string`\n- `allow-should-migrate-backend`\n- `allow-handle-setting-update`"
"markdownDescription": "Default permissions for the llamacpp plugin\n#### This default permission set includes:\n\n- `allow-cleanup-llama-processes`\n- `allow-load-llama-model`\n- `allow-unload-llama-model`\n- `allow-get-devices`\n- `allow-generate-api-key`\n- `allow-is-process-running`\n- `allow-ensure-session-ready`\n- `allow-get-random-port`\n- `allow-find-session-by-model`\n- `allow-get-loaded-models`\n- `allow-get-all-sessions`\n- `allow-get-session-by-model`\n- `allow-read-gguf-metadata`\n- `allow-estimate-kv-cache-size`\n- `allow-get-model-size`\n- `allow-is-model-supported`\n- `allow-plan-model-load`\n- `allow-map-old-backend-to-new`\n- `allow-get-local-installed-backends`\n- `allow-list-supported-backends`\n- `allow-determine-supported-backends`\n- `allow-get-supported-features`\n- `allow-is-cuda-installed`\n- `allow-find-latest-version-for-backend`\n- `allow-prioritize-backends`\n- `allow-parse-backend-version`\n- `allow-check-backend-for-updates`\n- `allow-remove-old-backend-versions`\n- `allow-validate-backend-string`\n- `allow-should-migrate-backend`\n- `allow-handle-setting-update`"
}
]
}

View File

@@ -8,6 +8,8 @@ pub struct LlamacppConfig {
pub version_backend: String,
pub auto_update_engine: bool,
pub auto_unload: bool,
#[serde(default)]
pub auto_restart_on_crash: bool,
pub timeout: i32,
pub llamacpp_env: String,
pub fit: bool,
@@ -403,6 +405,7 @@ mod tests {
version_backend: "v1.0/standard".to_string(),
auto_update_engine: false,
auto_unload: false,
auto_restart_on_crash: false,
timeout: 120,
llamacpp_env: String::new(),
fit: false,

View File

@@ -4,8 +4,9 @@ use sha2::Sha256;
use std::collections::HashMap;
use std::process::Stdio;
use std::sync::Arc;
use std::time::{SystemTime, UNIX_EPOCH};
use std::time::Duration;
use tauri::{Manager, Runtime, State};
use tauri::{Emitter, Manager, Runtime, State};
use tokio::io::{AsyncBufReadExt, BufReader};
use tokio::process::Command;
use tokio::sync::{mpsc, Mutex};
@@ -31,6 +32,15 @@ use crate::process::graceful_terminate_process;
use crate::process::force_terminate_process;
type HmacSha256 = Hmac<Sha256>;
const AUTO_RESTART_MAX_ATTEMPTS: usize = 3;
const AUTO_RESTART_WINDOW_MS: u64 = 5 * 60 * 1000;
#[derive(Clone, serde::Serialize)]
struct SessionLifecycleEvent {
model_id: String,
pid: Option<i32>,
message: String,
}
#[derive(serde::Serialize, serde::Deserialize)]
pub struct UnloadResult {
@@ -52,6 +62,17 @@ pub async fn load_llama_model_impl(
timeout: u64,
) -> ServerResult<SessionInfo> {
let mut process_map = process_map_arc.lock().await;
let launch_config = crate::state::SessionLaunchConfig {
backend_path: backend_path.to_string(),
model_id: model_id.clone(),
model_path: model_path.clone(),
port,
config: config.clone(),
envs: envs.clone(),
mmproj_path: mmproj_path.clone(),
is_embedding,
timeout,
};
log::info!("Attempting to launch server at path: {:?}", backend_path);
log::info!("Using configuration: {:?}", config);
@@ -269,12 +290,68 @@ pub async fn load_llama_model_impl(
LLamaBackendSession {
child,
info: session_info.clone(),
launch: launch_config,
restart_attempt_timestamps_ms: Vec::new(),
},
);
Ok(session_info)
}
fn now_ms() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_millis() as u64)
.unwrap_or(0)
}
fn can_attempt_restart(session: &mut LLamaBackendSession) -> bool {
let cutoff = now_ms().saturating_sub(AUTO_RESTART_WINDOW_MS);
session
.restart_attempt_timestamps_ms
.retain(|ts| *ts >= cutoff);
session.restart_attempt_timestamps_ms.len() < AUTO_RESTART_MAX_ATTEMPTS
}
fn start_session_exit_monitor<R: Runtime>(app_handle: tauri::AppHandle<R>, model_id: String) {
tokio::spawn(async move {
loop {
tokio::time::sleep(Duration::from_millis(1000)).await;
let maybe_exited = {
let state: State<LlamacppState> = app_handle.state();
let mut map = state.llama_server_process.lock().await;
let maybe_session = map
.values_mut()
.find(|session| session.info.model_id == model_id);
let Some(session) = maybe_session else {
// Session no longer exists (unloaded manually or cleaned up); stop watching.
return;
};
match session.child.try_wait() {
Ok(Some(_status)) => true,
Ok(None) => false,
Err(err) => {
log::warn!(
"Failed to inspect process state for model '{}': {}",
model_id,
err
);
false
}
}
};
if maybe_exited {
let _ = ensure_session_ready(app_handle.clone(), model_id.clone()).await;
}
}
});
}
/// Load a llama model and start the server
#[tauri::command]
pub async fn load_llama_model<R: Runtime>(
@@ -290,7 +367,7 @@ pub async fn load_llama_model<R: Runtime>(
timeout: u64,
) -> ServerResult<SessionInfo> {
let state: State<LlamacppState> = app_handle.state();
load_llama_model_impl(
let session_info = load_llama_model_impl(
state.llama_server_process.clone(),
backend_path,
model_id,
@@ -302,7 +379,12 @@ pub async fn load_llama_model<R: Runtime>(
is_embedding,
timeout,
)
.await
.await?;
// Observe process exit from plugin side immediately after load.
start_session_exit_monitor(app_handle, session_info.model_id.clone());
Ok(session_info)
}
/// Unload a llama model by terminating its process
@@ -370,6 +452,128 @@ pub async fn is_process_running<R: Runtime>(
is_process_running_by_pid(app_handle, pid).await
}
#[tauri::command]
pub async fn ensure_session_ready<R: Runtime>(
app_handle: tauri::AppHandle<R>,
model_id: String,
) -> Result<SessionInfo, String> {
let state: State<LlamacppState> = app_handle.state();
let process_map_arc = state.llama_server_process.clone();
// Step 1: resolve session by model_id and check if alive from source-of-truth child handle.
let (dead_pid, maybe_restart_launch) = {
let mut map = process_map_arc.lock().await;
let maybe_entry = map
.iter_mut()
.find(|(_, session)| session.info.model_id == model_id);
let Some((pid, session)) = maybe_entry else {
return Err(format!("No active session found for model: {}", model_id));
};
match session.child.try_wait() {
Ok(None) => {
return Ok(session.info.clone());
}
Ok(Some(status)) => {
let _ = app_handle.emit(
"llamacpp://session-exited",
SessionLifecycleEvent {
model_id: model_id.clone(),
pid: Some(*pid),
message: format!("Process exited with status: {}", status),
},
);
if !session.launch.config.auto_restart_on_crash {
return Err("Model appears to have crashed! Please reload!".to_string());
}
if !can_attempt_restart(session) {
let _ = app_handle.emit(
"llamacpp://session-restart-failed",
SessionLifecycleEvent {
model_id: model_id.clone(),
pid: Some(*pid),
message: "Auto-restart attempt limit reached".to_string(),
},
);
return Err(format!(
"Model \"{}\" crashed repeatedly. Auto-restart limit reached ({} attempts in {} minutes). Please reload manually.",
model_id,
AUTO_RESTART_MAX_ATTEMPTS,
AUTO_RESTART_WINDOW_MS / 60000
));
}
log::warn!(
"Model '{}' exited with status {:?}. Attempting automatic restart.",
model_id,
status
);
let _ = app_handle.emit(
"llamacpp://session-restarting",
SessionLifecycleEvent {
model_id: model_id.clone(),
pid: Some(*pid),
message: "Attempting automatic restart".to_string(),
},
);
session.restart_attempt_timestamps_ms.push(now_ms());
(Some(*pid), Some(session.launch.clone()))
}
Err(err) => {
return Err(format!("Failed to inspect session process state: {}", err));
}
}
};
// Step 2: remove dead session before restart.
if let Some(pid) = dead_pid {
let mut map = process_map_arc.lock().await;
map.remove(&pid);
}
// Step 3: restart using original launch config.
let launch = maybe_restart_launch
.ok_or_else(|| "Unable to restart model session: launch configuration missing".to_string())?;
let restarted = load_llama_model_impl(
process_map_arc,
&launch.backend_path,
launch.model_id,
launch.model_path,
launch.port,
launch.config,
launch.envs,
launch.mmproj_path,
launch.is_embedding,
launch.timeout,
)
.await
.map_err(|e| {
let _ = app_handle.emit(
"llamacpp://session-restart-failed",
SessionLifecycleEvent {
model_id: model_id.clone(),
pid: dead_pid,
message: format!("Automatic restart failed: {}", e),
},
);
format!("Model crashed and automatic restart failed: {}", e)
})?;
let _ = app_handle.emit(
"llamacpp://session-restarted",
SessionLifecycleEvent {
model_id: model_id.clone(),
pid: Some(restarted.pid),
message: "Automatic restart successful".to_string(),
},
);
Ok(restarted)
}
/// Get a random available port
#[tauri::command]
pub async fn get_random_port<R: Runtime>(app_handle: tauri::AppHandle<R>) -> Result<u16, String> {

View File

@@ -30,6 +30,7 @@ pub fn init<R: Runtime>() -> TauriPlugin<R> {
commands::get_devices,
commands::generate_api_key,
commands::is_process_running,
commands::ensure_session_ready,
commands::get_random_port,
commands::find_session_by_model,
commands::get_loaded_models,

View File

@@ -16,9 +16,24 @@ pub struct SessionInfo {
pub mmproj_path: Option<String>,
}
#[derive(Debug, Clone)]
pub struct SessionLaunchConfig {
pub backend_path: String,
pub model_id: String,
pub model_path: String,
pub port: u16,
pub config: crate::args::LlamacppConfig,
pub envs: HashMap<String, String>,
pub mmproj_path: Option<String>,
pub is_embedding: bool,
pub timeout: u64,
}
pub struct LLamaBackendSession {
pub child: Child,
pub info: SessionInfo,
pub launch: SessionLaunchConfig,
pub restart_attempt_timestamps_ms: Vec<u64>,
}
/// LlamaCpp plugin state

View File

@@ -1305,6 +1305,7 @@ fn build_llamacpp_config(n_gpu_layers: i32, ctx_size: i32, timeout: i32, fit: bo
version_backend: "cli/llama-server".to_string(),
auto_update_engine: false,
auto_unload: false,
auto_restart_on_crash: false,
timeout,
llamacpp_env: String::new(),
fit,