feat(mcp): optional lightweight router model for MCP tool routing (#7846)

* feat(mcp): optional dedicated model for smart tool routing

* refactor: update mcp router model filter and settings

* fix: typescript error
This commit is contained in:
dataCenter430
2026-03-31 22:09:42 -07:00
committed by GitHub
parent b79d5cd447
commit a68dcc8ccd
9 changed files with 545 additions and 6 deletions

View File

@@ -64,6 +64,9 @@ pub const DEFAULT_MCP_CONFIG: &str = r#"{
"baseRestartDelayMs": 1000,
"maxRestartDelayMs": 30000,
"backoffMultiplier": 2.0,
"enableSmartToolRouting": true
"enableSmartToolRouting": true,
"useLightweightRouterModel": false,
"routerModelProvider": "",
"routerModelId": ""
}
}"#;

View File

@@ -35,6 +35,18 @@ fn default_enable_smart_tool_routing() -> bool {
true
}
fn default_use_lightweight_router_model() -> bool {
false
}
fn default_router_model_provider() -> String {
String::new()
}
fn default_router_model_id() -> String {
String::new()
}
/// Runtime MCP settings that can be adjusted via UI
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
@@ -49,6 +61,12 @@ pub struct McpSettings {
pub backoff_multiplier: f64,
#[serde(default = "default_enable_smart_tool_routing")]
pub enable_smart_tool_routing: bool,
#[serde(default = "default_use_lightweight_router_model")]
pub use_lightweight_router_model: bool,
#[serde(default = "default_router_model_provider")]
pub router_model_provider: String,
#[serde(default = "default_router_model_id")]
pub router_model_id: String,
}
impl Default for McpSettings {
@@ -59,6 +77,9 @@ impl Default for McpSettings {
max_restart_delay_ms: super::constants::DEFAULT_MCP_MAX_RESTART_DELAY_MS,
backoff_multiplier: super::constants::DEFAULT_MCP_BACKOFF_MULTIPLIER,
enable_smart_tool_routing: true,
use_lightweight_router_model: false,
router_model_provider: String::new(),
router_model_id: String::new(),
}
}
}

View File

@@ -0,0 +1,245 @@
import { useMemo, useRef, useState, useCallback } from 'react'
import {
Popover,
PopoverContent,
PopoverTrigger,
} from '@/components/ui/popover'
import { Button } from '@/components/ui/button'
import { cn, getModelDisplayName, isLocalProvider } from '@/lib/utils'
import { IconChevronDown, IconX } from '@tabler/icons-react'
import ProvidersAvatar from '@/containers/ProvidersAvatar'
import Capabilities from '@/containers/Capabilities'
import { isRouterModelSelectable } from '@/lib/mcp-router-model-filter'
type Entry = {
model: Model
providerName: string
isLocal: boolean
hasApiKey: boolean
}
export type McpRouterModelPickerProps = {
/** Optional label for the trigger control (accessibility). */
ariaLabel?: string
providers: ModelProvider[]
selectedProvider: string
selectedModelId: string
disabled?: boolean
onSelect: (provider: string, modelId: string) => void
placeholder: string
searchPlaceholder: string
emptyListMessage: string
/** Called with trimmed search text when the filter returns no rows. */
formatEmptySearch: (query: string) => string
}
export function McpRouterModelPicker({
ariaLabel,
providers,
selectedProvider,
selectedModelId,
disabled = false,
onSelect,
placeholder,
searchPlaceholder,
emptyListMessage,
formatEmptySearch,
}: McpRouterModelPickerProps) {
const [open, setOpen] = useState(false)
const [searchValue, setSearchValue] = useState('')
const searchInputRef = useRef<HTMLInputElement>(null)
const availableModels = useMemo((): Entry[] => {
return providers
.filter((p) => p.active)
.flatMap((p) =>
p.models
.filter((m) => isRouterModelSelectable(p, m))
.map((m) => ({
model: m,
providerName: p.provider,
isLocal: !!isLocalProvider(p.provider),
hasApiKey: !!p.api_key?.length,
}))
)
}, [providers])
const filteredModels = useMemo(() => {
if (!searchValue.trim()) return availableModels
const search = searchValue.toLowerCase()
return availableModels.filter(
(e) =>
e.model.id.toLowerCase().includes(search) ||
(e.model.displayName?.toLowerCase() ?? '').includes(search) ||
e.providerName.toLowerCase().includes(search)
)
}, [availableModels, searchValue])
const groupedModels = useMemo(() => {
const groups: Record<string, Entry[]> = {}
filteredModels.forEach((e) => {
if (!groups[e.providerName]) groups[e.providerName] = []
groups[e.providerName].push(e)
})
return groups
}, [filteredModels])
const current = useMemo(() => {
if (!selectedModelId || !selectedProvider) return undefined
return availableModels.find(
(e) =>
e.providerName === selectedProvider && e.model.id === selectedModelId
)
}, [availableModels, selectedProvider, selectedModelId])
const handleSelect = useCallback(
(entry: Entry) => {
onSelect(entry.providerName, entry.model.id)
setOpen(false)
setSearchValue('')
},
[onSelect]
)
const handleOpenChange = useCallback((isOpen: boolean) => {
setOpen(isOpen)
if (!isOpen) {
setSearchValue('')
} else {
setTimeout(() => searchInputRef.current?.focus(), 100)
}
}, [])
return (
<Popover open={open} onOpenChange={handleOpenChange}>
<PopoverTrigger asChild>
<Button
variant="outline"
size="sm"
disabled={disabled}
{...(ariaLabel ? { 'aria-label': ariaLabel } : {})}
className="max-w-[min(100%,320px)] justify-between"
>
<span className="flex items-center gap-2 truncate leading-normal">
{current ? (
<>
<span
className={cn(
'text-[10px] px-1.5 py-0.5 rounded-full shrink-0',
current.isLocal
? 'bg-emerald-500/10 text-emerald-600'
: 'bg-blue-500/10 text-blue-600'
)}
>
{current.providerName}
</span>
<span className="truncate" title={current.model.id}>
{getModelDisplayName(current.model)}
</span>
</>
) : (
<span className="text-muted-foreground">{placeholder}</span>
)}
</span>
<IconChevronDown className="size-4 shrink-0 text-muted-foreground" />
</Button>
</PopoverTrigger>
<PopoverContent
className="w-[min(100vw-2rem,320px)] p-0 bg-background/95 border"
align="end"
sideOffset={8}
>
<div className="flex flex-col size-full">
<div className="relative p-2 border-b">
<input
ref={searchInputRef}
value={searchValue}
onChange={(e) => setSearchValue(e.target.value)}
placeholder={searchPlaceholder}
className="text-sm font-normal outline-0 w-full bg-transparent"
/>
{searchValue.length > 0 && (
<div className="absolute right-2 top-0 bottom-0 flex items-center justify-center">
<IconX
size={16}
className="text-muted-foreground cursor-pointer"
onClick={() => setSearchValue('')}
/>
</div>
)}
</div>
<div className="max-h-[300px] overflow-y-auto">
{Object.keys(groupedModels).length === 0 ? (
<div className="py-3 px-4 text-sm text-muted-foreground">
{searchValue.trim()
? formatEmptySearch(searchValue.trim())
: emptyListMessage}
</div>
) : (
<div className="py-1">
{Object.entries(groupedModels).map(([providerKey, entries]) => {
const providerInfo = providers.find(
(p) => p.provider === providerKey
)
if (!providerInfo) return null
return (
<div
key={providerKey}
className="bg-secondary/30 rounded-sm my-1.5 mx-1.5 first:mt-1 py-1"
>
<div className="flex items-center gap-1.5 px-2 py-1">
<ProvidersAvatar provider={providerInfo} />
<span className="capitalize text-sm font-medium text-muted-foreground">
{providerKey}
</span>
</div>
{entries.map((e) => {
const isSelected =
selectedModelId === e.model.id &&
selectedProvider === e.providerName
const capabilities = e.model.capabilities || []
return (
<div
key={`${e.providerName}:${e.model.id}`}
title={e.model.id}
onClick={() => handleSelect(e)}
className={cn(
'mx-1 mb-1 px-2 py-1.5 rounded-sm cursor-pointer flex items-center gap-2 transition-all duration-200',
'hover:bg-secondary/40',
isSelected &&
'bg-secondary/60 hover:bg-secondary/60'
)}
>
<div className="flex items-center gap-2 flex-1 min-w-0">
<span
className="text-sm truncate"
title={e.model.id}
>
{getModelDisplayName(e.model)}
</span>
<div className="flex-1" />
{capabilities.length > 0 && (
<div className="shrink-0 -mr-1.5">
<Capabilities capabilities={capabilities} />
</div>
)}
</div>
</div>
)
})}
</div>
)
})}
</div>
)}
</div>
</div>
</PopoverContent>
</Popover>
)
}

View File

@@ -29,6 +29,10 @@ export type MCPSettings = {
maxRestartDelayMs: number
backoffMultiplier: number
enableSmartToolRouting: boolean
/** When smart routing is on, use a dedicated (e.g. smaller) model for routing instead of the chat model. */
useLightweightRouterModel: boolean
routerModelProvider: string
routerModelId: string
}
export const DEFAULT_MCP_SETTINGS: MCPSettings = {
@@ -37,6 +41,9 @@ export const DEFAULT_MCP_SETTINGS: MCPSettings = {
maxRestartDelayMs: 30000,
backoffMultiplier: 2,
enableSmartToolRouting: true,
useLightweightRouterModel: false,
routerModelProvider: '',
routerModelId: '',
}
type MCPServerStoreState = {

View File

@@ -0,0 +1,34 @@
import { describe, it, expect } from 'vitest'
import { isLikelyLightweightRouterModel } from '../mcp-router-model-filter'
const m = (id: string, displayName?: string): Model => ({
id,
displayName,
})
describe('isLikelyLightweightRouterModel', () => {
it('allows small / flash / haiku style names', () => {
expect(isLikelyLightweightRouterModel(m('claude-3-5-haiku'))).toBe(true)
expect(isLikelyLightweightRouterModel(m('gemini-2.0-flash'))).toBe(true)
expect(isLikelyLightweightRouterModel(m('gpt-4o-mini'))).toBe(true)
})
it('rejects opus and heavy reasoning tiers', () => {
expect(isLikelyLightweightRouterModel(m('claude-3-opus'))).toBe(false)
expect(isLikelyLightweightRouterModel(m('x', 'Opus 4'))).toBe(false)
expect(isLikelyLightweightRouterModel(m('o1-preview'))).toBe(false)
expect(isLikelyLightweightRouterModel(m('gpt-4-turbo'))).toBe(false)
})
it('rejects unknown midsize ids with no signals', () => {
expect(isLikelyLightweightRouterModel(m('company-prod-v2'))).toBe(false)
})
it('allows typical GGUF paths with small param counts', () => {
expect(
isLikelyLightweightRouterModel(
m('models/Meta-Llama-3-8B-Q4_K_M.gguf')
)
).toBe(true)
})
})

View File

@@ -120,6 +120,8 @@ function prependTextDeltaToUIStream(
export class CustomChatTransport implements ChatTransport<UIMessage> {
public model: LanguageModel | null = null
private routerModel: LanguageModel | null = null
private routerModelKey = ''
private tools: Record<string, Tool> = {}
private onTokenUsage?: TokenUsageCallback
private hasDocuments = false
@@ -261,14 +263,20 @@ export class CustomChatTransport implements ChatTransport<UIMessage> {
try {
const mcpService = this.serviceHub.mcp()
let mcpTools: MCPTool[]
const routingEnabled =
useMCPServers.getState().settings.enableSmartToolRouting
const mcpSettings = useMCPServers.getState().settings
const routingEnabled = mcpSettings.enableSmartToolRouting
if (
routingEnabled &&
mcpService.getToolsForServers &&
mcpService.getServerSummaries
) {
const routerModel =
mcpSettings.useLightweightRouterModel &&
mcpSettings.routerModelProvider.trim() &&
mcpSettings.routerModelId.trim()
? (await this.resolveRouterModel(mcpSettings)) ?? this.model
: this.model
mcpTools = await mcpOrchestrator.getRelevantTools(
this.lastUserMessage,
{
@@ -279,7 +287,7 @@ export class CustomChatTransport implements ChatTransport<UIMessage> {
},
disabledToolKeys,
{
routerModel: this.model,
routerModel,
abortSignal,
}
)
@@ -309,6 +317,45 @@ export class CustomChatTransport implements ChatTransport<UIMessage> {
this.tools = toolsRecord
}
private async resolveRouterModel(settings: {
useLightweightRouterModel: boolean
routerModelProvider: string
routerModelId: string
}): Promise<LanguageModel | null> {
if (!settings.useLightweightRouterModel) return null
const providerName = settings.routerModelProvider.trim()
const modelId = settings.routerModelId.trim()
if (!providerName || !modelId) return null
const key = `${providerName}::${modelId}`
if (this.routerModel && this.routerModelKey === key) {
return this.routerModel
}
const provider = useModelProvider.getState().getProviderByName(providerName)
if (!provider) {
console.warn(
`[MCP] Router model provider '${providerName}' not found; using chat model for routing.`
)
return null
}
try {
const model = await ModelFactory.createModel(modelId, provider, {})
this.routerModel = model
this.routerModelKey = key
return model
} catch (error) {
console.warn(
`[MCP] Failed to create router model '${key}'; using chat model for routing.`,
error
)
this.routerModel = null
this.routerModelKey = ''
return null
}
}
/**
* Get current tools
*/

View File

@@ -0,0 +1,67 @@
import { isLocalProvider } from '@/lib/utils'
/**
* Models that are a poor fit for cheap structured routing (short generateObject calls).
* Heuristic only — no API to “ask” providers for tier yet.
*/
const ROUTER_MODEL_DENY = new RegExp(
[
'opus',
'claude-opus',
'claude-3-opus',
'\\bo1\\b',
'o1-preview',
'\\bo3\\b',
'gpt-4-turbo',
'^gpt-4$',
'gpt-5(?!.*\\bmini\\b)',
].join('|'),
'i'
)
/** Names/sizes that usually indicate small / routing-friendly models. */
const ROUTER_MODEL_ALLOW = new RegExp(
[
'\\bmini\\b',
'\\bnano\\b',
'flash',
'haiku',
'\\b8b\\b',
'\\b7b\\b',
'\\b3b\\b',
'\\b2b\\b',
'\\b1b\\b',
'\\b1\\.6b\\b',
'\\bsmall\\b',
'\\btiny\\b',
'phi[-_]?3',
'gemma[-_]2b',
'gpt-3\\.5',
'gpt-4o-mini',
'deepseek',
'\\brouting\\b',
].join('|'),
'i'
)
/** True if this model is likely cheap enough to use for MCP server routing only. */
export function isLikelyLightweightRouterModel(model: Model): boolean {
const hay = `${model.id}\n${model.displayName ?? ''}`.toLowerCase()
if (ROUTER_MODEL_DENY.test(hay)) return false
if (ROUTER_MODEL_ALLOW.test(hay)) return true
if (/\b(q4|q5|q6|q8)[-_]?([0-9]+)?k?_[a-z0-9+]*\b/i.test(hay)) {
if (/\b(1|2|3|7|8)b\b/i.test(hay)) return true
}
return false
}
/** Shown in the router picker: lightweight heuristic + local or API-keyed remote. */
export function isRouterModelSelectable(
provider: ModelProvider,
model: Model
): boolean {
if (!isLikelyLightweightRouterModel(model)) return false
if (isLocalProvider(provider.provider)) return true
return !!provider.api_key?.length
}

View File

@@ -60,6 +60,14 @@
"toolCallTimeout": "Tool call timeout (seconds)",
"toolCallTimeoutDesc": "Maximum time to wait for an MCP tool response before timing out.",
"smartToolRouting": "Smart MCP tool routing",
"smartToolRoutingDesc": "When enabled, Jan selects relevant MCP servers before loading tools. Disable to always load the full MCP tool list."
"smartToolRoutingDesc": "When enabled, Jan selects relevant MCP servers before loading tools. Disable to always load the full MCP tool list.",
"useLightweightRouterModel": "Use a dedicated model for routing",
"useLightweightRouterModelDesc": "When smart routing is on, run the routing step with a separate (often smaller) model instead of the chat model. Turn off to always use the active chat model for routing.",
"routerModel": "Routing model",
"routerModelDesc": "Choose provider and model in one place. Only lightweight models are listed so routing stays fast and cheap.",
"selectRouterModelPlaceholder": "Select routing model…",
"routerModelSearchPlaceholder": "Search models…",
"routerModelEmptyList": "No lightweight models available. Add a smaller model or configure API keys in Models.",
"routerModelEmptySearch": "No matches for “{{query}}”."
}
}

View File

@@ -31,6 +31,9 @@ import { listen } from '@tauri-apps/api/event'
import { SystemEvent } from '@/types/events'
import { Button } from '@/components/ui/button'
import { cn } from '@/lib/utils'
import { useModelProvider } from '@/hooks/useModelProvider'
import { McpRouterModelPicker } from '@/containers/McpRouterModelPicker'
import { isRouterModelSelectable } from '@/lib/mcp-router-model-filter'
// Function to mask sensitive URL parameters
@@ -150,6 +153,41 @@ function MCPServersDesktop() {
}
}
const modelProviders = useModelProvider((state) => state.providers)
const routerPickerDisabled =
!settings.enableSmartToolRouting || !settings.useLightweightRouterModel
useEffect(() => {
if (
!settings.useLightweightRouterModel ||
!settings.routerModelProvider ||
!settings.routerModelId
) {
return
}
if (modelProviders.length === 0) return
const provider = modelProviders.find(
(p) => p.provider === settings.routerModelProvider && p.active
)
const model = provider?.models.find((m) => m.id === settings.routerModelId)
if (
!provider ||
!model ||
!isRouterModelSelectable(provider, model)
) {
updateSettings({ routerModelProvider: '', routerModelId: '' })
}
}, [
settings.useLightweightRouterModel,
settings.routerModelProvider,
settings.routerModelId,
modelProviders,
updateSettings,
])
const handleOpenDialog = (serverKey?: string) => {
if (serverKey) {
// Edit mode
@@ -460,13 +498,82 @@ function MCPServersDesktop() {
<Switch
checked={settings.enableSmartToolRouting}
onCheckedChange={(checked) => {
updateSettings({ enableSmartToolRouting: checked })
if (checked) {
updateSettings({ enableSmartToolRouting: true })
} else {
updateSettings({
enableSmartToolRouting: false,
useLightweightRouterModel: false,
routerModelProvider: '',
routerModelId: '',
})
}
void syncServers()
}}
/>
</div>
}
/>
<CardItem
title={t('mcp-servers:runtimeSettings.useLightweightRouterModel')}
description={t(
'mcp-servers:runtimeSettings.useLightweightRouterModelDesc'
)}
actions={
<div className="shrink-0 ml-4">
<Switch
checked={settings.useLightweightRouterModel}
disabled={!settings.enableSmartToolRouting}
onCheckedChange={(checked) => {
updateSettings(
checked
? { useLightweightRouterModel: true }
: {
useLightweightRouterModel: false,
routerModelProvider: '',
routerModelId: '',
}
)
void syncServers()
}}
/>
</div>
}
/>
<CardItem
title={t('mcp-servers:runtimeSettings.routerModel')}
description={t('mcp-servers:runtimeSettings.routerModelDesc')}
actions={
<McpRouterModelPicker
ariaLabel={t('mcp-servers:runtimeSettings.routerModel')}
providers={modelProviders}
selectedProvider={settings.routerModelProvider}
selectedModelId={settings.routerModelId}
disabled={routerPickerDisabled}
onSelect={(providerName, modelId) => {
updateSettings({
routerModelProvider: providerName,
routerModelId: modelId,
})
void syncServers()
}}
placeholder={t(
'mcp-servers:runtimeSettings.selectRouterModelPlaceholder'
)}
searchPlaceholder={t(
'mcp-servers:runtimeSettings.routerModelSearchPlaceholder'
)}
emptyListMessage={t(
'mcp-servers:runtimeSettings.routerModelEmptyList'
)}
formatEmptySearch={(q) =>
t('mcp-servers:runtimeSettings.routerModelEmptySearch', {
query: q,
})
}
/>
}
/>
</Card>
{Object.keys(mcpServers).length === 0 ? (