fix: ctx_size overflow causing model load failure after reopening chat (#7879)
* feat: paragraph-level Edit with AI for assistant markdown (#7812) * fix: ctx_size overflow causing model load failure after reopening chat * fix linter issue --------- Co-authored-by: Clayton <118192227+claytonlin1110@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
f5432ddfa8
commit
98f1139852
@@ -21,6 +21,16 @@ function asNumber(v: any, defaultValue = 0): number {
|
||||
return isFinite(n) ? n : defaultValue
|
||||
}
|
||||
|
||||
const I32_MAX = 2147483647
|
||||
const I32_MIN = -2147483648
|
||||
|
||||
function asI32(v: any, defaultValue = 0): number {
|
||||
const n = Math.trunc(asNumber(v, defaultValue))
|
||||
if (n > I32_MAX) return I32_MAX
|
||||
if (n < I32_MIN) return I32_MIN
|
||||
return n
|
||||
}
|
||||
|
||||
function asBool(v: any): boolean {
|
||||
if (v === '' || v === null || v === undefined) return false
|
||||
return v === true || v === 'true' || v === 1 || v === '1'
|
||||
@@ -37,7 +47,7 @@ export function normalizeLlamacppConfig(config: any): LlamacppConfig {
|
||||
auto_update_engine: asBool(config.auto_update_engine),
|
||||
auto_unload: asBool(config.auto_unload),
|
||||
auto_restart_on_crash: asBool(config.auto_restart_on_crash),
|
||||
timeout: asNumber(config.timeout, 600),
|
||||
timeout: asI32(config.timeout, 600),
|
||||
|
||||
llamacpp_env: asString(config.llamacpp_env),
|
||||
fit: asBool(config.fit),
|
||||
@@ -45,23 +55,23 @@ export function normalizeLlamacppConfig(config: any): LlamacppConfig {
|
||||
fit_ctx: asString(config.fit_ctx),
|
||||
chat_template: asString(config.chat_template),
|
||||
|
||||
n_gpu_layers: asNumber(config.n_gpu_layers),
|
||||
n_gpu_layers: asI32(config.n_gpu_layers),
|
||||
offload_mmproj: asBool(config.offload_mmproj),
|
||||
cpu_moe: asBool(config.cpu_moe),
|
||||
n_cpu_moe: asNumber(config.n_cpu_moe),
|
||||
n_cpu_moe: asI32(config.n_cpu_moe),
|
||||
|
||||
override_tensor_buffer_t: asString(config.override_tensor_buffer_t),
|
||||
|
||||
ctx_size: asNumber(config.ctx_size),
|
||||
threads: asNumber(config.threads),
|
||||
threads_batch: asNumber(config.threads_batch),
|
||||
n_predict: asNumber(config.n_predict),
|
||||
batch_size: asNumber(config.batch_size),
|
||||
ubatch_size: asNumber(config.ubatch_size),
|
||||
ctx_size: asI32(config.ctx_size),
|
||||
threads: asI32(config.threads),
|
||||
threads_batch: asI32(config.threads_batch),
|
||||
n_predict: asI32(config.n_predict),
|
||||
batch_size: asI32(config.batch_size),
|
||||
ubatch_size: asI32(config.ubatch_size),
|
||||
|
||||
device: asString(config.device),
|
||||
split_mode: asString(config.split_mode),
|
||||
main_gpu: asNumber(config.main_gpu),
|
||||
main_gpu: asI32(config.main_gpu),
|
||||
|
||||
flash_attn: asString(config.flash_attn),
|
||||
cont_batching: asBool(config.cont_batching),
|
||||
@@ -81,7 +91,7 @@ export function normalizeLlamacppConfig(config: any): LlamacppConfig {
|
||||
rope_freq_scale: asNumber(config.rope_freq_scale, 1.0),
|
||||
|
||||
ctx_shift: asBool(config.ctx_shift),
|
||||
parallel: asNumber(config.parallel, 1),
|
||||
parallel: asI32(config.parallel, 1),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user