From a68dcc8ccd6ffdb05a0af348cb43f99eb0a9aa8a Mon Sep 17 00:00:00 2001 From: dataCenter430 <161712630+dataCenter430@users.noreply.github.com> Date: Tue, 31 Mar 2026 22:09:42 -0700 Subject: [PATCH] feat(mcp): optional lightweight router model for MCP tool routing (#7846) * feat(mcp): optional dedicated model for smart tool routing * refactor: update mcp router model filter and settings * fix: typescript error --- src-tauri/src/core/mcp/constants.rs | 5 +- src-tauri/src/core/mcp/models.rs | 21 ++ .../src/containers/McpRouterModelPicker.tsx | 245 ++++++++++++++++++ web-app/src/hooks/useMCPServers.ts | 7 + .../__tests__/mcp-router-model-filter.test.ts | 34 +++ web-app/src/lib/custom-chat-transport.ts | 53 +++- web-app/src/lib/mcp-router-model-filter.ts | 67 +++++ web-app/src/locales/en/mcp-servers.json | 10 +- web-app/src/routes/settings/mcp-servers.tsx | 109 +++++++- 9 files changed, 545 insertions(+), 6 deletions(-) create mode 100644 web-app/src/containers/McpRouterModelPicker.tsx create mode 100644 web-app/src/lib/__tests__/mcp-router-model-filter.test.ts create mode 100644 web-app/src/lib/mcp-router-model-filter.ts diff --git a/src-tauri/src/core/mcp/constants.rs b/src-tauri/src/core/mcp/constants.rs index befde5fd4..658cc4aaa 100644 --- a/src-tauri/src/core/mcp/constants.rs +++ b/src-tauri/src/core/mcp/constants.rs @@ -64,6 +64,9 @@ pub const DEFAULT_MCP_CONFIG: &str = r#"{ "baseRestartDelayMs": 1000, "maxRestartDelayMs": 30000, "backoffMultiplier": 2.0, - "enableSmartToolRouting": true + "enableSmartToolRouting": true, + "useLightweightRouterModel": false, + "routerModelProvider": "", + "routerModelId": "" } }"#; diff --git a/src-tauri/src/core/mcp/models.rs b/src-tauri/src/core/mcp/models.rs index 6555eef28..5b12bed96 100644 --- a/src-tauri/src/core/mcp/models.rs +++ b/src-tauri/src/core/mcp/models.rs @@ -35,6 +35,18 @@ fn default_enable_smart_tool_routing() -> bool { true } +fn default_use_lightweight_router_model() -> bool { + false +} + +fn default_router_model_provider() -> String { + String::new() +} + +fn default_router_model_id() -> String { + String::new() +} + /// Runtime MCP settings that can be adjusted via UI #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] @@ -49,6 +61,12 @@ pub struct McpSettings { pub backoff_multiplier: f64, #[serde(default = "default_enable_smart_tool_routing")] pub enable_smart_tool_routing: bool, + #[serde(default = "default_use_lightweight_router_model")] + pub use_lightweight_router_model: bool, + #[serde(default = "default_router_model_provider")] + pub router_model_provider: String, + #[serde(default = "default_router_model_id")] + pub router_model_id: String, } impl Default for McpSettings { @@ -59,6 +77,9 @@ impl Default for McpSettings { max_restart_delay_ms: super::constants::DEFAULT_MCP_MAX_RESTART_DELAY_MS, backoff_multiplier: super::constants::DEFAULT_MCP_BACKOFF_MULTIPLIER, enable_smart_tool_routing: true, + use_lightweight_router_model: false, + router_model_provider: String::new(), + router_model_id: String::new(), } } } diff --git a/web-app/src/containers/McpRouterModelPicker.tsx b/web-app/src/containers/McpRouterModelPicker.tsx new file mode 100644 index 000000000..7549ca479 --- /dev/null +++ b/web-app/src/containers/McpRouterModelPicker.tsx @@ -0,0 +1,245 @@ +import { useMemo, useRef, useState, useCallback } from 'react' +import { + Popover, + PopoverContent, + PopoverTrigger, +} from '@/components/ui/popover' +import { Button } from '@/components/ui/button' +import { cn, getModelDisplayName, isLocalProvider } from '@/lib/utils' +import { IconChevronDown, IconX } from '@tabler/icons-react' +import ProvidersAvatar from '@/containers/ProvidersAvatar' +import Capabilities from '@/containers/Capabilities' +import { isRouterModelSelectable } from '@/lib/mcp-router-model-filter' + +type Entry = { + model: Model + providerName: string + isLocal: boolean + hasApiKey: boolean +} + +export type McpRouterModelPickerProps = { + /** Optional label for the trigger control (accessibility). */ + ariaLabel?: string + providers: ModelProvider[] + selectedProvider: string + selectedModelId: string + disabled?: boolean + onSelect: (provider: string, modelId: string) => void + placeholder: string + searchPlaceholder: string + emptyListMessage: string + /** Called with trimmed search text when the filter returns no rows. */ + formatEmptySearch: (query: string) => string +} + +export function McpRouterModelPicker({ + ariaLabel, + providers, + selectedProvider, + selectedModelId, + disabled = false, + onSelect, + placeholder, + searchPlaceholder, + emptyListMessage, + formatEmptySearch, +}: McpRouterModelPickerProps) { + const [open, setOpen] = useState(false) + const [searchValue, setSearchValue] = useState('') + const searchInputRef = useRef(null) + + const availableModels = useMemo((): Entry[] => { + return providers + .filter((p) => p.active) + .flatMap((p) => + p.models + .filter((m) => isRouterModelSelectable(p, m)) + .map((m) => ({ + model: m, + providerName: p.provider, + isLocal: !!isLocalProvider(p.provider), + hasApiKey: !!p.api_key?.length, + })) + ) + }, [providers]) + + const filteredModels = useMemo(() => { + if (!searchValue.trim()) return availableModels + const search = searchValue.toLowerCase() + return availableModels.filter( + (e) => + e.model.id.toLowerCase().includes(search) || + (e.model.displayName?.toLowerCase() ?? '').includes(search) || + e.providerName.toLowerCase().includes(search) + ) + }, [availableModels, searchValue]) + + const groupedModels = useMemo(() => { + const groups: Record = {} + filteredModels.forEach((e) => { + if (!groups[e.providerName]) groups[e.providerName] = [] + groups[e.providerName].push(e) + }) + return groups + }, [filteredModels]) + + const current = useMemo(() => { + if (!selectedModelId || !selectedProvider) return undefined + return availableModels.find( + (e) => + e.providerName === selectedProvider && e.model.id === selectedModelId + ) + }, [availableModels, selectedProvider, selectedModelId]) + + const handleSelect = useCallback( + (entry: Entry) => { + onSelect(entry.providerName, entry.model.id) + setOpen(false) + setSearchValue('') + }, + [onSelect] + ) + + const handleOpenChange = useCallback((isOpen: boolean) => { + setOpen(isOpen) + if (!isOpen) { + setSearchValue('') + } else { + setTimeout(() => searchInputRef.current?.focus(), 100) + } + }, []) + + return ( + + + + + + +
+
+ setSearchValue(e.target.value)} + placeholder={searchPlaceholder} + className="text-sm font-normal outline-0 w-full bg-transparent" + /> + {searchValue.length > 0 && ( +
+ setSearchValue('')} + /> +
+ )} +
+ +
+ {Object.keys(groupedModels).length === 0 ? ( +
+ {searchValue.trim() + ? formatEmptySearch(searchValue.trim()) + : emptyListMessage} +
+ ) : ( +
+ {Object.entries(groupedModels).map(([providerKey, entries]) => { + const providerInfo = providers.find( + (p) => p.provider === providerKey + ) + if (!providerInfo) return null + + return ( +
+
+ + + {providerKey} + +
+ + {entries.map((e) => { + const isSelected = + selectedModelId === e.model.id && + selectedProvider === e.providerName + const capabilities = e.model.capabilities || [] + + return ( +
handleSelect(e)} + className={cn( + 'mx-1 mb-1 px-2 py-1.5 rounded-sm cursor-pointer flex items-center gap-2 transition-all duration-200', + 'hover:bg-secondary/40', + isSelected && + 'bg-secondary/60 hover:bg-secondary/60' + )} + > +
+ + {getModelDisplayName(e.model)} + +
+ {capabilities.length > 0 && ( +
+ +
+ )} +
+
+ ) + })} +
+ ) + })} +
+ )} +
+
+ + + ) +} diff --git a/web-app/src/hooks/useMCPServers.ts b/web-app/src/hooks/useMCPServers.ts index b92b0361d..95b3df2b3 100644 --- a/web-app/src/hooks/useMCPServers.ts +++ b/web-app/src/hooks/useMCPServers.ts @@ -29,6 +29,10 @@ export type MCPSettings = { maxRestartDelayMs: number backoffMultiplier: number enableSmartToolRouting: boolean + /** When smart routing is on, use a dedicated (e.g. smaller) model for routing instead of the chat model. */ + useLightweightRouterModel: boolean + routerModelProvider: string + routerModelId: string } export const DEFAULT_MCP_SETTINGS: MCPSettings = { @@ -37,6 +41,9 @@ export const DEFAULT_MCP_SETTINGS: MCPSettings = { maxRestartDelayMs: 30000, backoffMultiplier: 2, enableSmartToolRouting: true, + useLightweightRouterModel: false, + routerModelProvider: '', + routerModelId: '', } type MCPServerStoreState = { diff --git a/web-app/src/lib/__tests__/mcp-router-model-filter.test.ts b/web-app/src/lib/__tests__/mcp-router-model-filter.test.ts new file mode 100644 index 000000000..192ed3ff0 --- /dev/null +++ b/web-app/src/lib/__tests__/mcp-router-model-filter.test.ts @@ -0,0 +1,34 @@ +import { describe, it, expect } from 'vitest' +import { isLikelyLightweightRouterModel } from '../mcp-router-model-filter' + +const m = (id: string, displayName?: string): Model => ({ + id, + displayName, +}) + +describe('isLikelyLightweightRouterModel', () => { + it('allows small / flash / haiku style names', () => { + expect(isLikelyLightweightRouterModel(m('claude-3-5-haiku'))).toBe(true) + expect(isLikelyLightweightRouterModel(m('gemini-2.0-flash'))).toBe(true) + expect(isLikelyLightweightRouterModel(m('gpt-4o-mini'))).toBe(true) + }) + + it('rejects opus and heavy reasoning tiers', () => { + expect(isLikelyLightweightRouterModel(m('claude-3-opus'))).toBe(false) + expect(isLikelyLightweightRouterModel(m('x', 'Opus 4'))).toBe(false) + expect(isLikelyLightweightRouterModel(m('o1-preview'))).toBe(false) + expect(isLikelyLightweightRouterModel(m('gpt-4-turbo'))).toBe(false) + }) + + it('rejects unknown midsize ids with no signals', () => { + expect(isLikelyLightweightRouterModel(m('company-prod-v2'))).toBe(false) + }) + + it('allows typical GGUF paths with small param counts', () => { + expect( + isLikelyLightweightRouterModel( + m('models/Meta-Llama-3-8B-Q4_K_M.gguf') + ) + ).toBe(true) + }) +}) diff --git a/web-app/src/lib/custom-chat-transport.ts b/web-app/src/lib/custom-chat-transport.ts index a26712090..bd2d9958f 100644 --- a/web-app/src/lib/custom-chat-transport.ts +++ b/web-app/src/lib/custom-chat-transport.ts @@ -120,6 +120,8 @@ function prependTextDeltaToUIStream( export class CustomChatTransport implements ChatTransport { public model: LanguageModel | null = null + private routerModel: LanguageModel | null = null + private routerModelKey = '' private tools: Record = {} private onTokenUsage?: TokenUsageCallback private hasDocuments = false @@ -261,14 +263,20 @@ export class CustomChatTransport implements ChatTransport { try { const mcpService = this.serviceHub.mcp() let mcpTools: MCPTool[] - const routingEnabled = - useMCPServers.getState().settings.enableSmartToolRouting + const mcpSettings = useMCPServers.getState().settings + const routingEnabled = mcpSettings.enableSmartToolRouting if ( routingEnabled && mcpService.getToolsForServers && mcpService.getServerSummaries ) { + const routerModel = + mcpSettings.useLightweightRouterModel && + mcpSettings.routerModelProvider.trim() && + mcpSettings.routerModelId.trim() + ? (await this.resolveRouterModel(mcpSettings)) ?? this.model + : this.model mcpTools = await mcpOrchestrator.getRelevantTools( this.lastUserMessage, { @@ -279,7 +287,7 @@ export class CustomChatTransport implements ChatTransport { }, disabledToolKeys, { - routerModel: this.model, + routerModel, abortSignal, } ) @@ -309,6 +317,45 @@ export class CustomChatTransport implements ChatTransport { this.tools = toolsRecord } + private async resolveRouterModel(settings: { + useLightweightRouterModel: boolean + routerModelProvider: string + routerModelId: string + }): Promise { + if (!settings.useLightweightRouterModel) return null + const providerName = settings.routerModelProvider.trim() + const modelId = settings.routerModelId.trim() + if (!providerName || !modelId) return null + + const key = `${providerName}::${modelId}` + if (this.routerModel && this.routerModelKey === key) { + return this.routerModel + } + + const provider = useModelProvider.getState().getProviderByName(providerName) + if (!provider) { + console.warn( + `[MCP] Router model provider '${providerName}' not found; using chat model for routing.` + ) + return null + } + + try { + const model = await ModelFactory.createModel(modelId, provider, {}) + this.routerModel = model + this.routerModelKey = key + return model + } catch (error) { + console.warn( + `[MCP] Failed to create router model '${key}'; using chat model for routing.`, + error + ) + this.routerModel = null + this.routerModelKey = '' + return null + } + } + /** * Get current tools */ diff --git a/web-app/src/lib/mcp-router-model-filter.ts b/web-app/src/lib/mcp-router-model-filter.ts new file mode 100644 index 000000000..f82d91c25 --- /dev/null +++ b/web-app/src/lib/mcp-router-model-filter.ts @@ -0,0 +1,67 @@ +import { isLocalProvider } from '@/lib/utils' + +/** + * Models that are a poor fit for cheap structured routing (short generateObject calls). + * Heuristic only — no API to “ask” providers for tier yet. + */ +const ROUTER_MODEL_DENY = new RegExp( + [ + 'opus', + 'claude-opus', + 'claude-3-opus', + '\\bo1\\b', + 'o1-preview', + '\\bo3\\b', + 'gpt-4-turbo', + '^gpt-4$', + 'gpt-5(?!.*\\bmini\\b)', + ].join('|'), + 'i' +) + +/** Names/sizes that usually indicate small / routing-friendly models. */ +const ROUTER_MODEL_ALLOW = new RegExp( + [ + '\\bmini\\b', + '\\bnano\\b', + 'flash', + 'haiku', + '\\b8b\\b', + '\\b7b\\b', + '\\b3b\\b', + '\\b2b\\b', + '\\b1b\\b', + '\\b1\\.6b\\b', + '\\bsmall\\b', + '\\btiny\\b', + 'phi[-_]?3', + 'gemma[-_]2b', + 'gpt-3\\.5', + 'gpt-4o-mini', + 'deepseek', + '\\brouting\\b', + ].join('|'), + 'i' +) + +/** True if this model is likely cheap enough to use for MCP server routing only. */ +export function isLikelyLightweightRouterModel(model: Model): boolean { + const hay = `${model.id}\n${model.displayName ?? ''}`.toLowerCase() + if (ROUTER_MODEL_DENY.test(hay)) return false + if (ROUTER_MODEL_ALLOW.test(hay)) return true + + if (/\b(q4|q5|q6|q8)[-_]?([0-9]+)?k?_[a-z0-9+]*\b/i.test(hay)) { + if (/\b(1|2|3|7|8)b\b/i.test(hay)) return true + } + return false +} + +/** Shown in the router picker: lightweight heuristic + local or API-keyed remote. */ +export function isRouterModelSelectable( + provider: ModelProvider, + model: Model +): boolean { + if (!isLikelyLightweightRouterModel(model)) return false + if (isLocalProvider(provider.provider)) return true + return !!provider.api_key?.length +} diff --git a/web-app/src/locales/en/mcp-servers.json b/web-app/src/locales/en/mcp-servers.json index 5ff909005..f4cbae94b 100644 --- a/web-app/src/locales/en/mcp-servers.json +++ b/web-app/src/locales/en/mcp-servers.json @@ -60,6 +60,14 @@ "toolCallTimeout": "Tool call timeout (seconds)", "toolCallTimeoutDesc": "Maximum time to wait for an MCP tool response before timing out.", "smartToolRouting": "Smart MCP tool routing", - "smartToolRoutingDesc": "When enabled, Jan selects relevant MCP servers before loading tools. Disable to always load the full MCP tool list." + "smartToolRoutingDesc": "When enabled, Jan selects relevant MCP servers before loading tools. Disable to always load the full MCP tool list.", + "useLightweightRouterModel": "Use a dedicated model for routing", + "useLightweightRouterModelDesc": "When smart routing is on, run the routing step with a separate (often smaller) model instead of the chat model. Turn off to always use the active chat model for routing.", + "routerModel": "Routing model", + "routerModelDesc": "Choose provider and model in one place. Only lightweight models are listed so routing stays fast and cheap.", + "selectRouterModelPlaceholder": "Select routing model…", + "routerModelSearchPlaceholder": "Search models…", + "routerModelEmptyList": "No lightweight models available. Add a smaller model or configure API keys in Models.", + "routerModelEmptySearch": "No matches for “{{query}}”." } } diff --git a/web-app/src/routes/settings/mcp-servers.tsx b/web-app/src/routes/settings/mcp-servers.tsx index dccf345be..bb08fcadc 100644 --- a/web-app/src/routes/settings/mcp-servers.tsx +++ b/web-app/src/routes/settings/mcp-servers.tsx @@ -31,6 +31,9 @@ import { listen } from '@tauri-apps/api/event' import { SystemEvent } from '@/types/events' import { Button } from '@/components/ui/button' import { cn } from '@/lib/utils' +import { useModelProvider } from '@/hooks/useModelProvider' +import { McpRouterModelPicker } from '@/containers/McpRouterModelPicker' +import { isRouterModelSelectable } from '@/lib/mcp-router-model-filter' // Function to mask sensitive URL parameters @@ -150,6 +153,41 @@ function MCPServersDesktop() { } } + const modelProviders = useModelProvider((state) => state.providers) + + const routerPickerDisabled = + !settings.enableSmartToolRouting || !settings.useLightweightRouterModel + + useEffect(() => { + if ( + !settings.useLightweightRouterModel || + !settings.routerModelProvider || + !settings.routerModelId + ) { + return + } + if (modelProviders.length === 0) return + + const provider = modelProviders.find( + (p) => p.provider === settings.routerModelProvider && p.active + ) + const model = provider?.models.find((m) => m.id === settings.routerModelId) + + if ( + !provider || + !model || + !isRouterModelSelectable(provider, model) + ) { + updateSettings({ routerModelProvider: '', routerModelId: '' }) + } + }, [ + settings.useLightweightRouterModel, + settings.routerModelProvider, + settings.routerModelId, + modelProviders, + updateSettings, + ]) + const handleOpenDialog = (serverKey?: string) => { if (serverKey) { // Edit mode @@ -460,13 +498,82 @@ function MCPServersDesktop() { { - updateSettings({ enableSmartToolRouting: checked }) + if (checked) { + updateSettings({ enableSmartToolRouting: true }) + } else { + updateSettings({ + enableSmartToolRouting: false, + useLightweightRouterModel: false, + routerModelProvider: '', + routerModelId: '', + }) + } void syncServers() }} />
} /> + + { + updateSettings( + checked + ? { useLightweightRouterModel: true } + : { + useLightweightRouterModel: false, + routerModelProvider: '', + routerModelId: '', + } + ) + void syncServers() + }} + /> + + } + /> + { + updateSettings({ + routerModelProvider: providerName, + routerModelId: modelId, + }) + void syncServers() + }} + placeholder={t( + 'mcp-servers:runtimeSettings.selectRouterModelPlaceholder' + )} + searchPlaceholder={t( + 'mcp-servers:runtimeSettings.routerModelSearchPlaceholder' + )} + emptyListMessage={t( + 'mcp-servers:runtimeSettings.routerModelEmptyList' + )} + formatEmptySearch={(q) => + t('mcp-servers:runtimeSettings.routerModelEmptySearch', { + query: q, + }) + } + /> + } + /> {Object.keys(mcpServers).length === 0 ? (