feat: add optional health check auto-restart for crashed model sessions (#7855)

* feat: add optional health check auto-restart for crashed model sessions * fix: update * fix: update * fix: lint * fix: lint * fix: tauri * fix: build * fix: update * fix: update
2026-03-31 02:04:23 -05:00
parent 813260a365
commit 1c3a03557c
13 changed files with 300 additions and 33 deletions
--- a/extensions/llamacpp-extension/settings.json
+++ b/extensions/llamacpp-extension/settings.json
@@ -36,6 +36,13 @@
    "controllerType": "checkbox",
    "controllerProps": { "value": true }
  },
+  {
+    "key": "auto_restart_on_crash",
+    "title": "Auto-Restart Crashed Models",
+    "description": "Automatically reloads a model if its llama-server process crashes.",
+    "controllerType": "checkbox",
+    "controllerProps": { "value": false }
+  },
  {
    "key": "timeout",
    "title": "Timeout for llamacpp",
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@@ -1857,28 +1857,17 @@ export default class llamacpp_extension extends AIEngine {
    }
  }

+  private async ensureHealthySession(modelId: string): Promise<SessionInfo> {
+    return invoke<SessionInfo>('plugin:llamacpp|ensure_session_ready', {
+      modelId,
+    })
+  }
+
  override async chat(
    opts: chatCompletionRequest,
    abortController?: AbortController
  ): Promise<chatCompletion | AsyncIterable<chatCompletionChunk>> {
-    const sessionInfo = await this.findSessionByModel(opts.model)
-    if (!sessionInfo) {
-      throw new Error(`No active session found for model: ${opts.model}`)
-    }
-    // check if the process is alive
-    const result = await invoke<boolean>('plugin:llamacpp|is_process_running', {
-      pid: sessionInfo.pid,
-    })
-    if (result) {
-      try {
-        await fetch(`http://localhost:${sessionInfo.port}/health`)
-      } catch (e) {
-        this.unload(sessionInfo.model_id)
-        throw new Error('Model appears to have crashed! Please reload!')
-      }
-    } else {
-      throw new Error('Model have crashed! Please reload!')
-    }
+    const sessionInfo = await this.ensureHealthySession(opts.model)
    const baseUrl = `http://localhost:${sessionInfo.port}/v1`
    const url = `${baseUrl}/chat/completions`
    const headers = {
@@ -2260,21 +2249,25 @@ export default class llamacpp_extension extends AIEngine {
      throw new Error(`No active session found for model: ${opts.model}`)
    }

-    // Check if the process is alive
-    const result = await invoke<boolean>('plugin:llamacpp|is_process_running', {
+    // Token counting should be side-effect free (no auto-restart/unload).
+    const isRunning = await invoke<boolean>('plugin:llamacpp|is_process_running', {
      pid: sessionInfo.pid,
    })
-    if (result) {
-      try {
-        await fetch(`http://localhost:${sessionInfo.port}/health`)
-      } catch (e) {
-        this.unload(sessionInfo.model_id)
-        throw new Error('Model appears to have crashed! Please reload!')
-      }
-    } else {
+    if (!isRunning) {
      throw new Error('Model has crashed! Please reload!')
    }

+    try {
+      const healthResponse = await fetch(
+        `http://localhost:${sessionInfo.port}/health`
+      )
+      if (!healthResponse.ok) {
+        throw new Error('unhealthy')
+      }
+    } catch (_e) {
+      throw new Error('Model appears to have crashed! Please reload!')
+    }
+
    const baseUrl = `http://localhost:${sessionInfo.port}`
    const headers = {
      'Content-Type': 'application/json',
--- a/src-tauri/plugins/tauri-plugin-llamacpp/build.rs
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/build.rs
@@ -7,6 +7,7 @@ const COMMANDS: &[&str] = &[
    "get_devices",
    "generate_api_key",
    "is_process_running",
+    "ensure_session_ready",
    "get_random_port",
    "find_session_by_model",
    "get_loaded_models",
--- a/src-tauri/plugins/tauri-plugin-llamacpp/guest-js/index.ts
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/guest-js/index.ts
@@ -36,6 +36,7 @@ export function normalizeLlamacppConfig(config: any): LlamacppConfig {
    version_backend: asString(config.version_backend),
    auto_update_engine: asBool(config.auto_update_engine),
    auto_unload: asBool(config.auto_unload),
+    auto_restart_on_crash: asBool(config.auto_restart_on_crash),
    timeout: asNumber(config.timeout, 600),

    llamacpp_env: asString(config.llamacpp_env),
--- a/src-tauri/plugins/tauri-plugin-llamacpp/guest-js/types.ts
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/guest-js/types.ts
@@ -31,6 +31,7 @@ export type LlamacppConfig = {
  version_backend: string
  auto_update_engine: boolean
  auto_unload: boolean
+  auto_restart_on_crash: boolean
  timeout: number
  llamacpp_env: string
  fit: boolean
--- a/src-tauri/plugins/tauri-plugin-llamacpp/permissions/autogenerated/reference.md
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/permissions/autogenerated/reference.md
@@ -10,6 +10,7 @@ Default permissions for the llamacpp plugin
 - `allow-get-devices`
 - `allow-generate-api-key`
 - `allow-is-process-running`
+- `allow-ensure-session-ready`
 - `allow-get-random-port`
 - `allow-find-session-by-model`
 - `allow-get-loaded-models`
@@ -125,6 +126,32 @@ Denies the determine_supported_backends command without any pre-configured scope
 <tr>
 <td>

+`llamacpp:allow-ensure-session-ready`
+
+</td>
+<td>
+
+Enables the ensure_session_ready command without any pre-configured scope.
+
+</td>
+</tr>
+
+<tr>
+<td>
+
+`llamacpp:deny-ensure-session-ready`
+
+</td>
+<td>
+
+Denies the ensure_session_ready command without any pre-configured scope.
+
+</td>
+</tr>
+
+<tr>
+<td>
+
 `llamacpp:allow-estimate-kv-cache-size`

 </td>
--- a/src-tauri/plugins/tauri-plugin-llamacpp/permissions/default.toml
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/permissions/default.toml
@@ -10,6 +10,7 @@ permissions = [
    "allow-get-devices",
    "allow-generate-api-key",
    "allow-is-process-running",
+    "allow-ensure-session-ready",
    "allow-get-random-port",
    "allow-find-session-by-model",
    "allow-get-loaded-models",
--- a/src-tauri/plugins/tauri-plugin-llamacpp/permissions/schemas/schema.json
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/permissions/schemas/schema.json
@@ -330,6 +330,18 @@
          "const": "deny-determine-supported-backends",
          "markdownDescription": "Denies the determine_supported_backends command without any pre-configured scope."
        },
+        {
+          "description": "Enables the ensure_session_ready command without any pre-configured scope.",
+          "type": "string",
+          "const": "allow-ensure-session-ready",
+          "markdownDescription": "Enables the ensure_session_ready command without any pre-configured scope."
+        },
+        {
+          "description": "Denies the ensure_session_ready command without any pre-configured scope.",
+          "type": "string",
+          "const": "deny-ensure-session-ready",
+          "markdownDescription": "Denies the ensure_session_ready command without any pre-configured scope."
+        },
        {
          "description": "Enables the estimate_kv_cache_size command without any pre-configured scope.",
          "type": "string",
@@ -655,10 +667,10 @@
          "markdownDescription": "Denies the validate_backend_string command without any pre-configured scope."
        },
        {
-          "description": "Default permissions for the llamacpp plugin\n#### This default permission set includes:\n\n- `allow-cleanup-llama-processes`\n- `allow-load-llama-model`\n- `allow-unload-llama-model`\n- `allow-get-devices`\n- `allow-generate-api-key`\n- `allow-is-process-running`\n- `allow-get-random-port`\n- `allow-find-session-by-model`\n- `allow-get-loaded-models`\n- `allow-get-all-sessions`\n- `allow-get-session-by-model`\n- `allow-read-gguf-metadata`\n- `allow-estimate-kv-cache-size`\n- `allow-get-model-size`\n- `allow-is-model-supported`\n- `allow-plan-model-load`\n- `allow-map-old-backend-to-new`\n- `allow-get-local-installed-backends`\n- `allow-list-supported-backends`\n- `allow-determine-supported-backends`\n- `allow-get-supported-features`\n- `allow-is-cuda-installed`\n- `allow-find-latest-version-for-backend`\n- `allow-prioritize-backends`\n- `allow-parse-backend-version`\n- `allow-check-backend-for-updates`\n- `allow-remove-old-backend-versions`\n- `allow-validate-backend-string`\n- `allow-should-migrate-backend`\n- `allow-handle-setting-update`",
+          "description": "Default permissions for the llamacpp plugin\n#### This default permission set includes:\n\n- `allow-cleanup-llama-processes`\n- `allow-load-llama-model`\n- `allow-unload-llama-model`\n- `allow-get-devices`\n- `allow-generate-api-key`\n- `allow-is-process-running`\n- `allow-ensure-session-ready`\n- `allow-get-random-port`\n- `allow-find-session-by-model`\n- `allow-get-loaded-models`\n- `allow-get-all-sessions`\n- `allow-get-session-by-model`\n- `allow-read-gguf-metadata`\n- `allow-estimate-kv-cache-size`\n- `allow-get-model-size`\n- `allow-is-model-supported`\n- `allow-plan-model-load`\n- `allow-map-old-backend-to-new`\n- `allow-get-local-installed-backends`\n- `allow-list-supported-backends`\n- `allow-determine-supported-backends`\n- `allow-get-supported-features`\n- `allow-is-cuda-installed`\n- `allow-find-latest-version-for-backend`\n- `allow-prioritize-backends`\n- `allow-parse-backend-version`\n- `allow-check-backend-for-updates`\n- `allow-remove-old-backend-versions`\n- `allow-validate-backend-string`\n- `allow-should-migrate-backend`\n- `allow-handle-setting-update`",
          "type": "string",
          "const": "default",
-          "markdownDescription": "Default permissions for the llamacpp plugin\n#### This default permission set includes:\n\n- `allow-cleanup-llama-processes`\n- `allow-load-llama-model`\n- `allow-unload-llama-model`\n- `allow-get-devices`\n- `allow-generate-api-key`\n- `allow-is-process-running`\n- `allow-get-random-port`\n- `allow-find-session-by-model`\n- `allow-get-loaded-models`\n- `allow-get-all-sessions`\n- `allow-get-session-by-model`\n- `allow-read-gguf-metadata`\n- `allow-estimate-kv-cache-size`\n- `allow-get-model-size`\n- `allow-is-model-supported`\n- `allow-plan-model-load`\n- `allow-map-old-backend-to-new`\n- `allow-get-local-installed-backends`\n- `allow-list-supported-backends`\n- `allow-determine-supported-backends`\n- `allow-get-supported-features`\n- `allow-is-cuda-installed`\n- `allow-find-latest-version-for-backend`\n- `allow-prioritize-backends`\n- `allow-parse-backend-version`\n- `allow-check-backend-for-updates`\n- `allow-remove-old-backend-versions`\n- `allow-validate-backend-string`\n- `allow-should-migrate-backend`\n- `allow-handle-setting-update`"
+          "markdownDescription": "Default permissions for the llamacpp plugin\n#### This default permission set includes:\n\n- `allow-cleanup-llama-processes`\n- `allow-load-llama-model`\n- `allow-unload-llama-model`\n- `allow-get-devices`\n- `allow-generate-api-key`\n- `allow-is-process-running`\n- `allow-ensure-session-ready`\n- `allow-get-random-port`\n- `allow-find-session-by-model`\n- `allow-get-loaded-models`\n- `allow-get-all-sessions`\n- `allow-get-session-by-model`\n- `allow-read-gguf-metadata`\n- `allow-estimate-kv-cache-size`\n- `allow-get-model-size`\n- `allow-is-model-supported`\n- `allow-plan-model-load`\n- `allow-map-old-backend-to-new`\n- `allow-get-local-installed-backends`\n- `allow-list-supported-backends`\n- `allow-determine-supported-backends`\n- `allow-get-supported-features`\n- `allow-is-cuda-installed`\n- `allow-find-latest-version-for-backend`\n- `allow-prioritize-backends`\n- `allow-parse-backend-version`\n- `allow-check-backend-for-updates`\n- `allow-remove-old-backend-versions`\n- `allow-validate-backend-string`\n- `allow-should-migrate-backend`\n- `allow-handle-setting-update`"
        }
      ]
    }
--- a/src-tauri/plugins/tauri-plugin-llamacpp/src/args.rs
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/src/args.rs
@@ -8,6 +8,8 @@ pub struct LlamacppConfig {
    pub version_backend: String,
    pub auto_update_engine: bool,
    pub auto_unload: bool,
+    #[serde(default)]
+    pub auto_restart_on_crash: bool,
    pub timeout: i32,
    pub llamacpp_env: String,
    pub fit: bool,
@@ -403,6 +405,7 @@ mod tests {
            version_backend: "v1.0/standard".to_string(),
            auto_update_engine: false,
            auto_unload: false,
+            auto_restart_on_crash: false,
            timeout: 120,
            llamacpp_env: String::new(),
            fit: false,
--- a/src-tauri/plugins/tauri-plugin-llamacpp/src/commands.rs
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/src/commands.rs
@@ -4,8 +4,9 @@ use sha2::Sha256;
 use std::collections::HashMap;
 use std::process::Stdio;
 use std::sync::Arc;
+use std::time::{SystemTime, UNIX_EPOCH};
 use std::time::Duration;
-use tauri::{Manager, Runtime, State};
+use tauri::{Emitter, Manager, Runtime, State};
 use tokio::io::{AsyncBufReadExt, BufReader};
 use tokio::process::Command;
 use tokio::sync::{mpsc, Mutex};
@@ -31,6 +32,15 @@ use crate::process::graceful_terminate_process;
 use crate::process::force_terminate_process;

 type HmacSha256 = Hmac<Sha256>;
+const AUTO_RESTART_MAX_ATTEMPTS: usize = 3;
+const AUTO_RESTART_WINDOW_MS: u64 = 5 * 60 * 1000;
+
+#[derive(Clone, serde::Serialize)]
+struct SessionLifecycleEvent {
+    model_id: String,
+    pid: Option<i32>,
+    message: String,
+}

 #[derive(serde::Serialize, serde::Deserialize)]
 pub struct UnloadResult {
@@ -52,6 +62,17 @@ pub async fn load_llama_model_impl(
    timeout: u64,
 ) -> ServerResult<SessionInfo> {
    let mut process_map = process_map_arc.lock().await;
+    let launch_config = crate::state::SessionLaunchConfig {
+        backend_path: backend_path.to_string(),
+        model_id: model_id.clone(),
+        model_path: model_path.clone(),
+        port,
+        config: config.clone(),
+        envs: envs.clone(),
+        mmproj_path: mmproj_path.clone(),
+        is_embedding,
+        timeout,
+    };

    log::info!("Attempting to launch server at path: {:?}", backend_path);
    log::info!("Using configuration: {:?}", config);
@@ -269,12 +290,68 @@ pub async fn load_llama_model_impl(
        LLamaBackendSession {
            child,
            info: session_info.clone(),
+            launch: launch_config,
+            restart_attempt_timestamps_ms: Vec::new(),
        },
    );

    Ok(session_info)
 }

+fn now_ms() -> u64 {
+    SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .map(|d| d.as_millis() as u64)
+        .unwrap_or(0)
+}
+
+fn can_attempt_restart(session: &mut LLamaBackendSession) -> bool {
+    let cutoff = now_ms().saturating_sub(AUTO_RESTART_WINDOW_MS);
+    session
+        .restart_attempt_timestamps_ms
+        .retain(|ts| *ts >= cutoff);
+    session.restart_attempt_timestamps_ms.len() < AUTO_RESTART_MAX_ATTEMPTS
+}
+
+fn start_session_exit_monitor<R: Runtime>(app_handle: tauri::AppHandle<R>, model_id: String) {
+    tokio::spawn(async move {
+        loop {
+            tokio::time::sleep(Duration::from_millis(1000)).await;
+
+            let maybe_exited = {
+                let state: State<LlamacppState> = app_handle.state();
+                let mut map = state.llama_server_process.lock().await;
+
+                let maybe_session = map
+                    .values_mut()
+                    .find(|session| session.info.model_id == model_id);
+
+                let Some(session) = maybe_session else {
+                    // Session no longer exists (unloaded manually or cleaned up); stop watching.
+                    return;
+                };
+
+                match session.child.try_wait() {
+                    Ok(Some(_status)) => true,
+                    Ok(None) => false,
+                    Err(err) => {
+                        log::warn!(
+                            "Failed to inspect process state for model '{}': {}",
+                            model_id,
+                            err
+                        );
+                        false
+                    }
+                }
+            };
+
+            if maybe_exited {
+                let _ = ensure_session_ready(app_handle.clone(), model_id.clone()).await;
+            }
+        }
+    });
+}
+
 /// Load a llama model and start the server
 #[tauri::command]
 pub async fn load_llama_model<R: Runtime>(
@@ -290,7 +367,7 @@ pub async fn load_llama_model<R: Runtime>(
    timeout: u64,
 ) -> ServerResult<SessionInfo> {
    let state: State<LlamacppState> = app_handle.state();
-    load_llama_model_impl(
+    let session_info = load_llama_model_impl(
        state.llama_server_process.clone(),
        backend_path,
        model_id,
@@ -302,7 +379,12 @@ pub async fn load_llama_model<R: Runtime>(
        is_embedding,
        timeout,
    )
-    .await
+    .await?;
+
+    // Observe process exit from plugin side immediately after load.
+    start_session_exit_monitor(app_handle, session_info.model_id.clone());
+
+    Ok(session_info)
 }

 /// Unload a llama model by terminating its process
@@ -370,6 +452,128 @@ pub async fn is_process_running<R: Runtime>(
    is_process_running_by_pid(app_handle, pid).await
 }

+#[tauri::command]
+pub async fn ensure_session_ready<R: Runtime>(
+    app_handle: tauri::AppHandle<R>,
+    model_id: String,
+) -> Result<SessionInfo, String> {
+    let state: State<LlamacppState> = app_handle.state();
+    let process_map_arc = state.llama_server_process.clone();
+
+    // Step 1: resolve session by model_id and check if alive from source-of-truth child handle.
+    let (dead_pid, maybe_restart_launch) = {
+        let mut map = process_map_arc.lock().await;
+        let maybe_entry = map
+            .iter_mut()
+            .find(|(_, session)| session.info.model_id == model_id);
+
+        let Some((pid, session)) = maybe_entry else {
+            return Err(format!("No active session found for model: {}", model_id));
+        };
+
+        match session.child.try_wait() {
+            Ok(None) => {
+                return Ok(session.info.clone());
+            }
+            Ok(Some(status)) => {
+                let _ = app_handle.emit(
+                    "llamacpp://session-exited",
+                    SessionLifecycleEvent {
+                        model_id: model_id.clone(),
+                        pid: Some(*pid),
+                        message: format!("Process exited with status: {}", status),
+                    },
+                );
+                if !session.launch.config.auto_restart_on_crash {
+                    return Err("Model appears to have crashed! Please reload!".to_string());
+                }
+                if !can_attempt_restart(session) {
+                    let _ = app_handle.emit(
+                        "llamacpp://session-restart-failed",
+                        SessionLifecycleEvent {
+                            model_id: model_id.clone(),
+                            pid: Some(*pid),
+                            message: "Auto-restart attempt limit reached".to_string(),
+                        },
+                    );
+                    return Err(format!(
+                        "Model \"{}\" crashed repeatedly. Auto-restart limit reached ({} attempts in {} minutes). Please reload manually.",
+                        model_id,
+                        AUTO_RESTART_MAX_ATTEMPTS,
+                        AUTO_RESTART_WINDOW_MS / 60000
+                    ));
+                }
+
+                log::warn!(
+                    "Model '{}' exited with status {:?}. Attempting automatic restart.",
+                    model_id,
+                    status
+                );
+                let _ = app_handle.emit(
+                    "llamacpp://session-restarting",
+                    SessionLifecycleEvent {
+                        model_id: model_id.clone(),
+                        pid: Some(*pid),
+                        message: "Attempting automatic restart".to_string(),
+                    },
+                );
+
+                session.restart_attempt_timestamps_ms.push(now_ms());
+                (Some(*pid), Some(session.launch.clone()))
+            }
+            Err(err) => {
+                return Err(format!("Failed to inspect session process state: {}", err));
+            }
+        }
+    };
+
+    // Step 2: remove dead session before restart.
+    if let Some(pid) = dead_pid {
+        let mut map = process_map_arc.lock().await;
+        map.remove(&pid);
+    }
+
+    // Step 3: restart using original launch config.
+    let launch = maybe_restart_launch
+        .ok_or_else(|| "Unable to restart model session: launch configuration missing".to_string())?;
+
+    let restarted = load_llama_model_impl(
+        process_map_arc,
+        &launch.backend_path,
+        launch.model_id,
+        launch.model_path,
+        launch.port,
+        launch.config,
+        launch.envs,
+        launch.mmproj_path,
+        launch.is_embedding,
+        launch.timeout,
+    )
+    .await
+    .map_err(|e| {
+        let _ = app_handle.emit(
+            "llamacpp://session-restart-failed",
+            SessionLifecycleEvent {
+                model_id: model_id.clone(),
+                pid: dead_pid,
+                message: format!("Automatic restart failed: {}", e),
+            },
+        );
+        format!("Model crashed and automatic restart failed: {}", e)
+    })?;
+
+    let _ = app_handle.emit(
+        "llamacpp://session-restarted",
+        SessionLifecycleEvent {
+            model_id: model_id.clone(),
+            pid: Some(restarted.pid),
+            message: "Automatic restart successful".to_string(),
+        },
+    );
+
+    Ok(restarted)
+}
+
 /// Get a random available port
 #[tauri::command]
 pub async fn get_random_port<R: Runtime>(app_handle: tauri::AppHandle<R>) -> Result<u16, String> {
--- a/src-tauri/plugins/tauri-plugin-llamacpp/src/lib.rs
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/src/lib.rs
@@ -30,6 +30,7 @@ pub fn init<R: Runtime>() -> TauriPlugin<R> {
            commands::get_devices,
            commands::generate_api_key,
            commands::is_process_running,
+            commands::ensure_session_ready,
            commands::get_random_port,
            commands::find_session_by_model,
            commands::get_loaded_models,
--- a/src-tauri/plugins/tauri-plugin-llamacpp/src/state.rs
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/src/state.rs
@@ -16,9 +16,24 @@ pub struct SessionInfo {
    pub mmproj_path: Option<String>,
 }

+#[derive(Debug, Clone)]
+pub struct SessionLaunchConfig {
+    pub backend_path: String,
+    pub model_id: String,
+    pub model_path: String,
+    pub port: u16,
+    pub config: crate::args::LlamacppConfig,
+    pub envs: HashMap<String, String>,
+    pub mmproj_path: Option<String>,
+    pub is_embedding: bool,
+    pub timeout: u64,
+}
+
 pub struct LLamaBackendSession {
    pub child: Child,
    pub info: SessionInfo,
+    pub launch: SessionLaunchConfig,
+    pub restart_attempt_timestamps_ms: Vec<u64>,
 }

 /// LlamaCpp plugin state
--- a/src-tauri/src/bin/jan-cli.rs
+++ b/src-tauri/src/bin/jan-cli.rs
@@ -1305,6 +1305,7 @@ fn build_llamacpp_config(n_gpu_layers: i32, ctx_size: i32, timeout: i32, fit: bo
        version_backend: "cli/llama-server".to_string(),
        auto_update_engine: false,
        auto_unload: false,
+        auto_restart_on_crash: false,
        timeout,
        llamacpp_env: String::new(),
        fit,