feat: add optional health check auto-restart for crashed model sessions (#7855)
* feat: add optional health check auto-restart for crashed model sessions * fix: update * fix: update * fix: lint * fix: lint * fix: tauri * fix: build * fix: update * fix: update
This commit is contained in:
@@ -36,6 +36,13 @@
|
||||
"controllerType": "checkbox",
|
||||
"controllerProps": { "value": true }
|
||||
},
|
||||
{
|
||||
"key": "auto_restart_on_crash",
|
||||
"title": "Auto-Restart Crashed Models",
|
||||
"description": "Automatically reloads a model if its llama-server process crashes.",
|
||||
"controllerType": "checkbox",
|
||||
"controllerProps": { "value": false }
|
||||
},
|
||||
{
|
||||
"key": "timeout",
|
||||
"title": "Timeout for llamacpp",
|
||||
|
||||
@@ -1857,28 +1857,17 @@ export default class llamacpp_extension extends AIEngine {
|
||||
}
|
||||
}
|
||||
|
||||
private async ensureHealthySession(modelId: string): Promise<SessionInfo> {
|
||||
return invoke<SessionInfo>('plugin:llamacpp|ensure_session_ready', {
|
||||
modelId,
|
||||
})
|
||||
}
|
||||
|
||||
override async chat(
|
||||
opts: chatCompletionRequest,
|
||||
abortController?: AbortController
|
||||
): Promise<chatCompletion | AsyncIterable<chatCompletionChunk>> {
|
||||
const sessionInfo = await this.findSessionByModel(opts.model)
|
||||
if (!sessionInfo) {
|
||||
throw new Error(`No active session found for model: ${opts.model}`)
|
||||
}
|
||||
// check if the process is alive
|
||||
const result = await invoke<boolean>('plugin:llamacpp|is_process_running', {
|
||||
pid: sessionInfo.pid,
|
||||
})
|
||||
if (result) {
|
||||
try {
|
||||
await fetch(`http://localhost:${sessionInfo.port}/health`)
|
||||
} catch (e) {
|
||||
this.unload(sessionInfo.model_id)
|
||||
throw new Error('Model appears to have crashed! Please reload!')
|
||||
}
|
||||
} else {
|
||||
throw new Error('Model have crashed! Please reload!')
|
||||
}
|
||||
const sessionInfo = await this.ensureHealthySession(opts.model)
|
||||
const baseUrl = `http://localhost:${sessionInfo.port}/v1`
|
||||
const url = `${baseUrl}/chat/completions`
|
||||
const headers = {
|
||||
@@ -2260,21 +2249,25 @@ export default class llamacpp_extension extends AIEngine {
|
||||
throw new Error(`No active session found for model: ${opts.model}`)
|
||||
}
|
||||
|
||||
// Check if the process is alive
|
||||
const result = await invoke<boolean>('plugin:llamacpp|is_process_running', {
|
||||
// Token counting should be side-effect free (no auto-restart/unload).
|
||||
const isRunning = await invoke<boolean>('plugin:llamacpp|is_process_running', {
|
||||
pid: sessionInfo.pid,
|
||||
})
|
||||
if (result) {
|
||||
try {
|
||||
await fetch(`http://localhost:${sessionInfo.port}/health`)
|
||||
} catch (e) {
|
||||
this.unload(sessionInfo.model_id)
|
||||
throw new Error('Model appears to have crashed! Please reload!')
|
||||
}
|
||||
} else {
|
||||
if (!isRunning) {
|
||||
throw new Error('Model has crashed! Please reload!')
|
||||
}
|
||||
|
||||
try {
|
||||
const healthResponse = await fetch(
|
||||
`http://localhost:${sessionInfo.port}/health`
|
||||
)
|
||||
if (!healthResponse.ok) {
|
||||
throw new Error('unhealthy')
|
||||
}
|
||||
} catch (_e) {
|
||||
throw new Error('Model appears to have crashed! Please reload!')
|
||||
}
|
||||
|
||||
const baseUrl = `http://localhost:${sessionInfo.port}`
|
||||
const headers = {
|
||||
'Content-Type': 'application/json',
|
||||
|
||||
@@ -7,6 +7,7 @@ const COMMANDS: &[&str] = &[
|
||||
"get_devices",
|
||||
"generate_api_key",
|
||||
"is_process_running",
|
||||
"ensure_session_ready",
|
||||
"get_random_port",
|
||||
"find_session_by_model",
|
||||
"get_loaded_models",
|
||||
|
||||
@@ -36,6 +36,7 @@ export function normalizeLlamacppConfig(config: any): LlamacppConfig {
|
||||
version_backend: asString(config.version_backend),
|
||||
auto_update_engine: asBool(config.auto_update_engine),
|
||||
auto_unload: asBool(config.auto_unload),
|
||||
auto_restart_on_crash: asBool(config.auto_restart_on_crash),
|
||||
timeout: asNumber(config.timeout, 600),
|
||||
|
||||
llamacpp_env: asString(config.llamacpp_env),
|
||||
|
||||
@@ -31,6 +31,7 @@ export type LlamacppConfig = {
|
||||
version_backend: string
|
||||
auto_update_engine: boolean
|
||||
auto_unload: boolean
|
||||
auto_restart_on_crash: boolean
|
||||
timeout: number
|
||||
llamacpp_env: string
|
||||
fit: boolean
|
||||
|
||||
@@ -10,6 +10,7 @@ Default permissions for the llamacpp plugin
|
||||
- `allow-get-devices`
|
||||
- `allow-generate-api-key`
|
||||
- `allow-is-process-running`
|
||||
- `allow-ensure-session-ready`
|
||||
- `allow-get-random-port`
|
||||
- `allow-find-session-by-model`
|
||||
- `allow-get-loaded-models`
|
||||
@@ -125,6 +126,32 @@ Denies the determine_supported_backends command without any pre-configured scope
|
||||
<tr>
|
||||
<td>
|
||||
|
||||
`llamacpp:allow-ensure-session-ready`
|
||||
|
||||
</td>
|
||||
<td>
|
||||
|
||||
Enables the ensure_session_ready command without any pre-configured scope.
|
||||
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>
|
||||
|
||||
`llamacpp:deny-ensure-session-ready`
|
||||
|
||||
</td>
|
||||
<td>
|
||||
|
||||
Denies the ensure_session_ready command without any pre-configured scope.
|
||||
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>
|
||||
|
||||
`llamacpp:allow-estimate-kv-cache-size`
|
||||
|
||||
</td>
|
||||
|
||||
@@ -10,6 +10,7 @@ permissions = [
|
||||
"allow-get-devices",
|
||||
"allow-generate-api-key",
|
||||
"allow-is-process-running",
|
||||
"allow-ensure-session-ready",
|
||||
"allow-get-random-port",
|
||||
"allow-find-session-by-model",
|
||||
"allow-get-loaded-models",
|
||||
|
||||
@@ -330,6 +330,18 @@
|
||||
"const": "deny-determine-supported-backends",
|
||||
"markdownDescription": "Denies the determine_supported_backends command without any pre-configured scope."
|
||||
},
|
||||
{
|
||||
"description": "Enables the ensure_session_ready command without any pre-configured scope.",
|
||||
"type": "string",
|
||||
"const": "allow-ensure-session-ready",
|
||||
"markdownDescription": "Enables the ensure_session_ready command without any pre-configured scope."
|
||||
},
|
||||
{
|
||||
"description": "Denies the ensure_session_ready command without any pre-configured scope.",
|
||||
"type": "string",
|
||||
"const": "deny-ensure-session-ready",
|
||||
"markdownDescription": "Denies the ensure_session_ready command without any pre-configured scope."
|
||||
},
|
||||
{
|
||||
"description": "Enables the estimate_kv_cache_size command without any pre-configured scope.",
|
||||
"type": "string",
|
||||
@@ -655,10 +667,10 @@
|
||||
"markdownDescription": "Denies the validate_backend_string command without any pre-configured scope."
|
||||
},
|
||||
{
|
||||
"description": "Default permissions for the llamacpp plugin\n#### This default permission set includes:\n\n- `allow-cleanup-llama-processes`\n- `allow-load-llama-model`\n- `allow-unload-llama-model`\n- `allow-get-devices`\n- `allow-generate-api-key`\n- `allow-is-process-running`\n- `allow-get-random-port`\n- `allow-find-session-by-model`\n- `allow-get-loaded-models`\n- `allow-get-all-sessions`\n- `allow-get-session-by-model`\n- `allow-read-gguf-metadata`\n- `allow-estimate-kv-cache-size`\n- `allow-get-model-size`\n- `allow-is-model-supported`\n- `allow-plan-model-load`\n- `allow-map-old-backend-to-new`\n- `allow-get-local-installed-backends`\n- `allow-list-supported-backends`\n- `allow-determine-supported-backends`\n- `allow-get-supported-features`\n- `allow-is-cuda-installed`\n- `allow-find-latest-version-for-backend`\n- `allow-prioritize-backends`\n- `allow-parse-backend-version`\n- `allow-check-backend-for-updates`\n- `allow-remove-old-backend-versions`\n- `allow-validate-backend-string`\n- `allow-should-migrate-backend`\n- `allow-handle-setting-update`",
|
||||
"description": "Default permissions for the llamacpp plugin\n#### This default permission set includes:\n\n- `allow-cleanup-llama-processes`\n- `allow-load-llama-model`\n- `allow-unload-llama-model`\n- `allow-get-devices`\n- `allow-generate-api-key`\n- `allow-is-process-running`\n- `allow-ensure-session-ready`\n- `allow-get-random-port`\n- `allow-find-session-by-model`\n- `allow-get-loaded-models`\n- `allow-get-all-sessions`\n- `allow-get-session-by-model`\n- `allow-read-gguf-metadata`\n- `allow-estimate-kv-cache-size`\n- `allow-get-model-size`\n- `allow-is-model-supported`\n- `allow-plan-model-load`\n- `allow-map-old-backend-to-new`\n- `allow-get-local-installed-backends`\n- `allow-list-supported-backends`\n- `allow-determine-supported-backends`\n- `allow-get-supported-features`\n- `allow-is-cuda-installed`\n- `allow-find-latest-version-for-backend`\n- `allow-prioritize-backends`\n- `allow-parse-backend-version`\n- `allow-check-backend-for-updates`\n- `allow-remove-old-backend-versions`\n- `allow-validate-backend-string`\n- `allow-should-migrate-backend`\n- `allow-handle-setting-update`",
|
||||
"type": "string",
|
||||
"const": "default",
|
||||
"markdownDescription": "Default permissions for the llamacpp plugin\n#### This default permission set includes:\n\n- `allow-cleanup-llama-processes`\n- `allow-load-llama-model`\n- `allow-unload-llama-model`\n- `allow-get-devices`\n- `allow-generate-api-key`\n- `allow-is-process-running`\n- `allow-get-random-port`\n- `allow-find-session-by-model`\n- `allow-get-loaded-models`\n- `allow-get-all-sessions`\n- `allow-get-session-by-model`\n- `allow-read-gguf-metadata`\n- `allow-estimate-kv-cache-size`\n- `allow-get-model-size`\n- `allow-is-model-supported`\n- `allow-plan-model-load`\n- `allow-map-old-backend-to-new`\n- `allow-get-local-installed-backends`\n- `allow-list-supported-backends`\n- `allow-determine-supported-backends`\n- `allow-get-supported-features`\n- `allow-is-cuda-installed`\n- `allow-find-latest-version-for-backend`\n- `allow-prioritize-backends`\n- `allow-parse-backend-version`\n- `allow-check-backend-for-updates`\n- `allow-remove-old-backend-versions`\n- `allow-validate-backend-string`\n- `allow-should-migrate-backend`\n- `allow-handle-setting-update`"
|
||||
"markdownDescription": "Default permissions for the llamacpp plugin\n#### This default permission set includes:\n\n- `allow-cleanup-llama-processes`\n- `allow-load-llama-model`\n- `allow-unload-llama-model`\n- `allow-get-devices`\n- `allow-generate-api-key`\n- `allow-is-process-running`\n- `allow-ensure-session-ready`\n- `allow-get-random-port`\n- `allow-find-session-by-model`\n- `allow-get-loaded-models`\n- `allow-get-all-sessions`\n- `allow-get-session-by-model`\n- `allow-read-gguf-metadata`\n- `allow-estimate-kv-cache-size`\n- `allow-get-model-size`\n- `allow-is-model-supported`\n- `allow-plan-model-load`\n- `allow-map-old-backend-to-new`\n- `allow-get-local-installed-backends`\n- `allow-list-supported-backends`\n- `allow-determine-supported-backends`\n- `allow-get-supported-features`\n- `allow-is-cuda-installed`\n- `allow-find-latest-version-for-backend`\n- `allow-prioritize-backends`\n- `allow-parse-backend-version`\n- `allow-check-backend-for-updates`\n- `allow-remove-old-backend-versions`\n- `allow-validate-backend-string`\n- `allow-should-migrate-backend`\n- `allow-handle-setting-update`"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -8,6 +8,8 @@ pub struct LlamacppConfig {
|
||||
pub version_backend: String,
|
||||
pub auto_update_engine: bool,
|
||||
pub auto_unload: bool,
|
||||
#[serde(default)]
|
||||
pub auto_restart_on_crash: bool,
|
||||
pub timeout: i32,
|
||||
pub llamacpp_env: String,
|
||||
pub fit: bool,
|
||||
@@ -403,6 +405,7 @@ mod tests {
|
||||
version_backend: "v1.0/standard".to_string(),
|
||||
auto_update_engine: false,
|
||||
auto_unload: false,
|
||||
auto_restart_on_crash: false,
|
||||
timeout: 120,
|
||||
llamacpp_env: String::new(),
|
||||
fit: false,
|
||||
|
||||
@@ -4,8 +4,9 @@ use sha2::Sha256;
|
||||
use std::collections::HashMap;
|
||||
use std::process::Stdio;
|
||||
use std::sync::Arc;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
use std::time::Duration;
|
||||
use tauri::{Manager, Runtime, State};
|
||||
use tauri::{Emitter, Manager, Runtime, State};
|
||||
use tokio::io::{AsyncBufReadExt, BufReader};
|
||||
use tokio::process::Command;
|
||||
use tokio::sync::{mpsc, Mutex};
|
||||
@@ -31,6 +32,15 @@ use crate::process::graceful_terminate_process;
|
||||
use crate::process::force_terminate_process;
|
||||
|
||||
type HmacSha256 = Hmac<Sha256>;
|
||||
const AUTO_RESTART_MAX_ATTEMPTS: usize = 3;
|
||||
const AUTO_RESTART_WINDOW_MS: u64 = 5 * 60 * 1000;
|
||||
|
||||
#[derive(Clone, serde::Serialize)]
|
||||
struct SessionLifecycleEvent {
|
||||
model_id: String,
|
||||
pid: Option<i32>,
|
||||
message: String,
|
||||
}
|
||||
|
||||
#[derive(serde::Serialize, serde::Deserialize)]
|
||||
pub struct UnloadResult {
|
||||
@@ -52,6 +62,17 @@ pub async fn load_llama_model_impl(
|
||||
timeout: u64,
|
||||
) -> ServerResult<SessionInfo> {
|
||||
let mut process_map = process_map_arc.lock().await;
|
||||
let launch_config = crate::state::SessionLaunchConfig {
|
||||
backend_path: backend_path.to_string(),
|
||||
model_id: model_id.clone(),
|
||||
model_path: model_path.clone(),
|
||||
port,
|
||||
config: config.clone(),
|
||||
envs: envs.clone(),
|
||||
mmproj_path: mmproj_path.clone(),
|
||||
is_embedding,
|
||||
timeout,
|
||||
};
|
||||
|
||||
log::info!("Attempting to launch server at path: {:?}", backend_path);
|
||||
log::info!("Using configuration: {:?}", config);
|
||||
@@ -269,12 +290,68 @@ pub async fn load_llama_model_impl(
|
||||
LLamaBackendSession {
|
||||
child,
|
||||
info: session_info.clone(),
|
||||
launch: launch_config,
|
||||
restart_attempt_timestamps_ms: Vec::new(),
|
||||
},
|
||||
);
|
||||
|
||||
Ok(session_info)
|
||||
}
|
||||
|
||||
fn now_ms() -> u64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map(|d| d.as_millis() as u64)
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
fn can_attempt_restart(session: &mut LLamaBackendSession) -> bool {
|
||||
let cutoff = now_ms().saturating_sub(AUTO_RESTART_WINDOW_MS);
|
||||
session
|
||||
.restart_attempt_timestamps_ms
|
||||
.retain(|ts| *ts >= cutoff);
|
||||
session.restart_attempt_timestamps_ms.len() < AUTO_RESTART_MAX_ATTEMPTS
|
||||
}
|
||||
|
||||
fn start_session_exit_monitor<R: Runtime>(app_handle: tauri::AppHandle<R>, model_id: String) {
|
||||
tokio::spawn(async move {
|
||||
loop {
|
||||
tokio::time::sleep(Duration::from_millis(1000)).await;
|
||||
|
||||
let maybe_exited = {
|
||||
let state: State<LlamacppState> = app_handle.state();
|
||||
let mut map = state.llama_server_process.lock().await;
|
||||
|
||||
let maybe_session = map
|
||||
.values_mut()
|
||||
.find(|session| session.info.model_id == model_id);
|
||||
|
||||
let Some(session) = maybe_session else {
|
||||
// Session no longer exists (unloaded manually or cleaned up); stop watching.
|
||||
return;
|
||||
};
|
||||
|
||||
match session.child.try_wait() {
|
||||
Ok(Some(_status)) => true,
|
||||
Ok(None) => false,
|
||||
Err(err) => {
|
||||
log::warn!(
|
||||
"Failed to inspect process state for model '{}': {}",
|
||||
model_id,
|
||||
err
|
||||
);
|
||||
false
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if maybe_exited {
|
||||
let _ = ensure_session_ready(app_handle.clone(), model_id.clone()).await;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/// Load a llama model and start the server
|
||||
#[tauri::command]
|
||||
pub async fn load_llama_model<R: Runtime>(
|
||||
@@ -290,7 +367,7 @@ pub async fn load_llama_model<R: Runtime>(
|
||||
timeout: u64,
|
||||
) -> ServerResult<SessionInfo> {
|
||||
let state: State<LlamacppState> = app_handle.state();
|
||||
load_llama_model_impl(
|
||||
let session_info = load_llama_model_impl(
|
||||
state.llama_server_process.clone(),
|
||||
backend_path,
|
||||
model_id,
|
||||
@@ -302,7 +379,12 @@ pub async fn load_llama_model<R: Runtime>(
|
||||
is_embedding,
|
||||
timeout,
|
||||
)
|
||||
.await
|
||||
.await?;
|
||||
|
||||
// Observe process exit from plugin side immediately after load.
|
||||
start_session_exit_monitor(app_handle, session_info.model_id.clone());
|
||||
|
||||
Ok(session_info)
|
||||
}
|
||||
|
||||
/// Unload a llama model by terminating its process
|
||||
@@ -370,6 +452,128 @@ pub async fn is_process_running<R: Runtime>(
|
||||
is_process_running_by_pid(app_handle, pid).await
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
pub async fn ensure_session_ready<R: Runtime>(
|
||||
app_handle: tauri::AppHandle<R>,
|
||||
model_id: String,
|
||||
) -> Result<SessionInfo, String> {
|
||||
let state: State<LlamacppState> = app_handle.state();
|
||||
let process_map_arc = state.llama_server_process.clone();
|
||||
|
||||
// Step 1: resolve session by model_id and check if alive from source-of-truth child handle.
|
||||
let (dead_pid, maybe_restart_launch) = {
|
||||
let mut map = process_map_arc.lock().await;
|
||||
let maybe_entry = map
|
||||
.iter_mut()
|
||||
.find(|(_, session)| session.info.model_id == model_id);
|
||||
|
||||
let Some((pid, session)) = maybe_entry else {
|
||||
return Err(format!("No active session found for model: {}", model_id));
|
||||
};
|
||||
|
||||
match session.child.try_wait() {
|
||||
Ok(None) => {
|
||||
return Ok(session.info.clone());
|
||||
}
|
||||
Ok(Some(status)) => {
|
||||
let _ = app_handle.emit(
|
||||
"llamacpp://session-exited",
|
||||
SessionLifecycleEvent {
|
||||
model_id: model_id.clone(),
|
||||
pid: Some(*pid),
|
||||
message: format!("Process exited with status: {}", status),
|
||||
},
|
||||
);
|
||||
if !session.launch.config.auto_restart_on_crash {
|
||||
return Err("Model appears to have crashed! Please reload!".to_string());
|
||||
}
|
||||
if !can_attempt_restart(session) {
|
||||
let _ = app_handle.emit(
|
||||
"llamacpp://session-restart-failed",
|
||||
SessionLifecycleEvent {
|
||||
model_id: model_id.clone(),
|
||||
pid: Some(*pid),
|
||||
message: "Auto-restart attempt limit reached".to_string(),
|
||||
},
|
||||
);
|
||||
return Err(format!(
|
||||
"Model \"{}\" crashed repeatedly. Auto-restart limit reached ({} attempts in {} minutes). Please reload manually.",
|
||||
model_id,
|
||||
AUTO_RESTART_MAX_ATTEMPTS,
|
||||
AUTO_RESTART_WINDOW_MS / 60000
|
||||
));
|
||||
}
|
||||
|
||||
log::warn!(
|
||||
"Model '{}' exited with status {:?}. Attempting automatic restart.",
|
||||
model_id,
|
||||
status
|
||||
);
|
||||
let _ = app_handle.emit(
|
||||
"llamacpp://session-restarting",
|
||||
SessionLifecycleEvent {
|
||||
model_id: model_id.clone(),
|
||||
pid: Some(*pid),
|
||||
message: "Attempting automatic restart".to_string(),
|
||||
},
|
||||
);
|
||||
|
||||
session.restart_attempt_timestamps_ms.push(now_ms());
|
||||
(Some(*pid), Some(session.launch.clone()))
|
||||
}
|
||||
Err(err) => {
|
||||
return Err(format!("Failed to inspect session process state: {}", err));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Step 2: remove dead session before restart.
|
||||
if let Some(pid) = dead_pid {
|
||||
let mut map = process_map_arc.lock().await;
|
||||
map.remove(&pid);
|
||||
}
|
||||
|
||||
// Step 3: restart using original launch config.
|
||||
let launch = maybe_restart_launch
|
||||
.ok_or_else(|| "Unable to restart model session: launch configuration missing".to_string())?;
|
||||
|
||||
let restarted = load_llama_model_impl(
|
||||
process_map_arc,
|
||||
&launch.backend_path,
|
||||
launch.model_id,
|
||||
launch.model_path,
|
||||
launch.port,
|
||||
launch.config,
|
||||
launch.envs,
|
||||
launch.mmproj_path,
|
||||
launch.is_embedding,
|
||||
launch.timeout,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
let _ = app_handle.emit(
|
||||
"llamacpp://session-restart-failed",
|
||||
SessionLifecycleEvent {
|
||||
model_id: model_id.clone(),
|
||||
pid: dead_pid,
|
||||
message: format!("Automatic restart failed: {}", e),
|
||||
},
|
||||
);
|
||||
format!("Model crashed and automatic restart failed: {}", e)
|
||||
})?;
|
||||
|
||||
let _ = app_handle.emit(
|
||||
"llamacpp://session-restarted",
|
||||
SessionLifecycleEvent {
|
||||
model_id: model_id.clone(),
|
||||
pid: Some(restarted.pid),
|
||||
message: "Automatic restart successful".to_string(),
|
||||
},
|
||||
);
|
||||
|
||||
Ok(restarted)
|
||||
}
|
||||
|
||||
/// Get a random available port
|
||||
#[tauri::command]
|
||||
pub async fn get_random_port<R: Runtime>(app_handle: tauri::AppHandle<R>) -> Result<u16, String> {
|
||||
|
||||
@@ -30,6 +30,7 @@ pub fn init<R: Runtime>() -> TauriPlugin<R> {
|
||||
commands::get_devices,
|
||||
commands::generate_api_key,
|
||||
commands::is_process_running,
|
||||
commands::ensure_session_ready,
|
||||
commands::get_random_port,
|
||||
commands::find_session_by_model,
|
||||
commands::get_loaded_models,
|
||||
|
||||
@@ -16,9 +16,24 @@ pub struct SessionInfo {
|
||||
pub mmproj_path: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SessionLaunchConfig {
|
||||
pub backend_path: String,
|
||||
pub model_id: String,
|
||||
pub model_path: String,
|
||||
pub port: u16,
|
||||
pub config: crate::args::LlamacppConfig,
|
||||
pub envs: HashMap<String, String>,
|
||||
pub mmproj_path: Option<String>,
|
||||
pub is_embedding: bool,
|
||||
pub timeout: u64,
|
||||
}
|
||||
|
||||
pub struct LLamaBackendSession {
|
||||
pub child: Child,
|
||||
pub info: SessionInfo,
|
||||
pub launch: SessionLaunchConfig,
|
||||
pub restart_attempt_timestamps_ms: Vec<u64>,
|
||||
}
|
||||
|
||||
/// LlamaCpp plugin state
|
||||
|
||||
@@ -1305,6 +1305,7 @@ fn build_llamacpp_config(n_gpu_layers: i32, ctx_size: i32, timeout: i32, fit: bo
|
||||
version_backend: "cli/llama-server".to_string(),
|
||||
auto_update_engine: false,
|
||||
auto_unload: false,
|
||||
auto_restart_on_crash: false,
|
||||
timeout,
|
||||
llamacpp_env: String::new(),
|
||||
fit,
|
||||
|
||||
Reference in New Issue
Block a user