Merge branch 'main' of https://github.com/janhq/jan into feat/assistant-chat-button

2026-03-24 09:17:45 +11:00
parent 2a80d10a51 080f8fc7f7
commit 344d830e97
27 changed files with 815 additions and 116 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -55,6 +55,11 @@ docs/.next/
 **/yarn-error.log*
 **/pnpm-debug.log*

+## Swift Package Manager (Foundation Models server)
+src-tauri/plugins/tauri-plugin-foundation-models/swift-server/.build/
+src-tauri/plugins/tauri-plugin-foundation-models/swift-server/.swiftpm/
+src-tauri/plugins/tauri-plugin-foundation-models/swift-server/Package.resolved
+
 ## cargo
 target
 Cargo.lock
--- a/44
+++ b/44
@@ -6,15 +6,28 @@ REPORT_PORTAL_PROJECT_NAME ?= ""
 REPORT_PORTAL_LAUNCH_NAME ?= "Jan App"
 REPORT_PORTAL_DESCRIPTION ?= "Jan App report"

+# Detect OS
+ifeq ($(OS),Windows_NT)
+    DETECTED_OS := Windows
+else
+    DETECTED_OS := $(shell uname -s)
+endif
+
+ifeq ($(OS),Windows_NT)
+    MKDIR = if not exist "$(1)" mkdir "$(1)"
+else
+    MKDIR = mkdir -p $(1)
+endif
+
 # Default target, does nothing
 all:
 	@echo "Specify a target to run"

 # Installs yarn dependencies and builds core and extensions
 install-and-build:
-ifeq ($(OS),Windows_NT)
+ifeq ($(DETECTED_OS),Windows)
 	echo "skip"
-else ifeq ($(shell uname -s),Linux)
+else ifeq ($(DETECTED_OS),Linux)
 	chmod +x src-tauri/build-utils/*
 endif
 	yarn install
@@ -24,7 +37,7 @@ endif

 # Install required Rust targets for macOS universal builds
 install-rust-targets:
-ifeq ($(shell uname -s),Darwin)
+ifeq ($(DETECTED_OS),Darwin)
 	@echo "Detected macOS, installing universal build targets..."
 	rustup target add x86_64-apple-darwin
 	rustup target add aarch64-apple-darwin
@@ -88,7 +101,7 @@ dev-android: install-and-build install-android-rust-targets

 dev-ios: install-and-build install-ios-rust-targets
 	@echo "Setting up iOS development environment..."
-ifeq ($(shell uname -s),Darwin)
+ifeq ($(DETECTED_OS),Darwin)
 	@if [ ! -d "src-tauri/gen/ios" ]; then \
 		echo "iOS app not initialized. Initializing..."; \
 		yarn tauri ios init; \
@@ -110,7 +123,7 @@ lint: install-and-build
 # Testing
 test: lint install-rust-targets
 	yarn download:bin
-ifeq ($(OS),Windows_NT)
+ifeq ($(DETECTED_OS),Windows)
 endif
 	yarn test
 	yarn copy:assets:tauri
@@ -124,7 +137,7 @@ endif

 # Build MLX server (macOS Apple Silicon only) - always builds
 build-mlx-server:
-ifeq ($(shell uname -s),Darwin)
+ifeq ($(DETECTED_OS),Darwin)
 	@echo "Building MLX server for Apple Silicon..."
 	cd mlx-server && swift build -c release
 	@echo "Copying build products..."
@@ -162,7 +175,7 @@ endif

 # Build MLX server only if not already present (for dev)
 build-mlx-server-if-exists:
-ifeq ($(shell uname -s),Darwin)
+ifeq ($(DETECTED_OS),Darwin)
 	@if [ -f "src-tauri/resources/bin/mlx-server" ]; then \
 		echo "MLX server already exists at src-tauri/resources/bin/mlx-server, skipping build..."; \
 	else \
@@ -172,10 +185,9 @@ else
 	@echo "Skipping MLX server build (macOS only)"
 endif

-
 # Build jan CLI (release, platform-aware) → src-tauri/resources/bin/jan[.exe]
 build-cli:
-ifeq ($(shell uname -s),Darwin)
+ifeq ($(DETECTED_OS),Darwin)
 	cd src-tauri && cargo build --release --features cli --bin jan-cli --target aarch64-apple-darwin
 	cd src-tauri && cargo build --release --features cli --bin jan-cli --target x86_64-apple-darwin
 	lipo -create \
@@ -183,7 +195,7 @@ ifeq ($(shell uname -s),Darwin)
 		src-tauri/target/x86_64-apple-darwin/release/jan-cli \
 		-output src-tauri/resources/bin/jan-cli
 	chmod +x src-tauri/resources/bin/jan-cli
-	mkdir -p src-tauri/target/universal-apple-darwin/release
+	$(call MKDIR,'src-tauri/target/universal-apple-darwin/release')

 	echo "Checking for code signing identity..."; \
 	SIGNING_IDENTITY=$$(security find-identity -v -p codesigning | grep "Developer ID Application" | head -1 | sed 's/.*"\(.*\)".*/\1/'); \
@@ -196,7 +208,7 @@ ifeq ($(shell uname -s),Darwin)
 	fi

 	cp src-tauri/resources/bin/jan-cli src-tauri/target/universal-apple-darwin/release/jan-cli
-else ifeq ($(OS),Windows_NT)
+else ifeq ($(DETECTED_OS),Windows)
 	cd src-tauri && cargo build --release --features cli --bin jan-cli
 	cp src-tauri/target/release/jan-cli.exe src-tauri/resources/bin/jan-cli.exe
 else
@@ -206,16 +218,20 @@ endif

 # Debug build for local dev (faster, native arch only)
 build-cli-dev:
-	mkdir -p src-tauri/resources/bin
+	$(call MKDIR,'src-tauri/resources/bin')	
 	cd src-tauri && cargo build --features cli --bin jan-cli
+ifeq ($(DETECTED_OS),Windows)
+	copy src-tauri\target\debug\jan-cli.exe src-tauri\resources\bin\jan-cli.exe
+else
 	install -m755 src-tauri/target/debug/jan-cli src-tauri/resources/bin/jan-cli
+endif

 # Build
 build: install-and-build install-rust-targets
 	yarn build

 clean:
-ifeq ($(OS),Windows_NT)
+ifeq ($(DETECTED_OS),Windows)
 	-powershell -Command "Get-ChildItem -Path . -Include node_modules, .next, dist, build, out, .turbo, .yarn -Recurse -Directory | Remove-Item -Recurse -Force"
 	-powershell -Command "Get-ChildItem -Path . -Include package-lock.json, tsconfig.tsbuildinfo -Recurse -File | Remove-Item -Recurse -Force"
 	-powershell -Command "Remove-Item -Recurse -Force ./pre-install/*.tgz"
@@ -224,7 +240,7 @@ ifeq ($(OS),Windows_NT)
 	-powershell -Command "Remove-Item -Recurse -Force ./src-tauri/resources"
 	-powershell -Command "Remove-Item -Recurse -Force ./src-tauri/target"
 	-powershell -Command "if (Test-Path \"$($env:USERPROFILE)\jan\extensions\") { Remove-Item -Path \"$($env:USERPROFILE)\jan\extensions\" -Recurse -Force }"
-else ifeq ($(shell uname -s),Linux)
+else ifeq ($(DETECTED_OS),Linux)
 	find . -name "node_modules" -type d -prune -exec rm -rf '{}' +
 	find . -name ".next" -type d -exec rm -rf '{}' +
 	find . -name "dist" -type d -exec rm -rf '{}' +
--- a/docs/src/pages/changelog/2026-03-23-jan-remote-models-context-cap.mdx
+++ b/docs/src/pages/changelog/2026-03-23-jan-remote-models-context-cap.mdx
@@ -0,0 +1,33 @@
+---
+title: "Jan v0.7.9: CLI on Windows, Smarter Context Management & Safer Data Location Management"
+version: 0.7.9
+description: "Jan v0.7.9 fetches the latest models during onboarding, caps context length to avoid high RAM usage, fixes CLI on Windows, and improves data location management."
+date: 2026-03-23
+---
+
+
+import ChangelogHeader from "@/components/Changelog/ChangelogHeader"
+import { Callout } from 'nextra/components'
+
+<ChangelogHeader
+  title="Jan v0.7.9: CLI on Windows, Smarter Context Management & Safer Data Location Management"
+  date="2026-03-23"
+/>
+
+# Highlights
+
+**Remove OpenClaw Integration**
+
+We recognize that the OpenClaw integration has been causing issues for some users. OpenClaw is a popular agent that has received significant attention from the community. However, it is a resource-heavy product that consumes a large amount of memory and tokens, and can introduce critical security vulnerabilities if not carefully handled in a local AI environment. Therefore, we have decided to remove OpenClaw from Jan and shift our focus toward exploring and building a simpler, safer, and more practical agent experience for Jan users in the near future.
+
+- Jan now remotely fetches the latest models during onboarding, ready for upcoming new Jan models
+- Context length is now automatically capped and increased reasonably, avoiding excessively large context sizes that caused high RAM usage
+- Fixed CLI path installation on Windows
+- Fixed changing Jan data location
+- Safely remove Jan data folder with a registered list
+
+---
+
+Update your Jan or [download the latest](https://jan.ai/).
+
+For the complete list of changes, see the [GitHub release notes](https://github.com/janhq/jan/releases/tag/v0.7.9)
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@@ -64,7 +64,6 @@ import {
 import { getSystemUsage, getSystemInfo } from '@janhq/tauri-plugin-hardware-api'

 // Error message constant - matches web-app/src/utils/error.ts
-const OUT_OF_CONTEXT_SIZE = 'the request exceeds the available context size.'

 /**
 * Override the default app.log function to use Jan's logging system.
@@ -1830,12 +1829,6 @@ export default class llamacpp_extension extends AIEngine {
            const data = JSON.parse(jsonStr)
            const chunk = data as chatCompletionChunk

-            // Check for out-of-context error conditions
-            if (chunk.choices?.[0]?.finish_reason === 'length') {
-              // finish_reason 'length' indicates context limit was hit
-              throw new Error(OUT_OF_CONTEXT_SIZE)
-            }
-
            yield chunk
          } catch (e) {
            logger.error('Error parsing JSON from stream or server error:', e)
@@ -1923,12 +1916,6 @@ export default class llamacpp_extension extends AIEngine {

    const completionResponse = (await response.json()) as chatCompletion

-    // Check for out-of-context error conditions
-    if (completionResponse.choices?.[0]?.finish_reason === 'length') {
-      // finish_reason 'length' indicates context limit was hit
-      throw new Error(OUT_OF_CONTEXT_SIZE)
-    }
-
    return completionResponse
  }

--- a/extensions/yarn.lock
+++ b/extensions/yarn.lock
@@ -376,6 +376,18 @@ __metadata:
  languageName: node
  linkType: hard

+"@janhq/core@file:../../core/package.tgz::locator=%40janhq%2Ffoundation-models-extension%40workspace%3Afoundation-models-extension":
+  version: 0.1.10
+  resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=6c8a42&locator=%40janhq%2Ffoundation-models-extension%40workspace%3Afoundation-models-extension"
+  dependencies:
+    rxjs: "npm:^7.8.1"
+    ulidx: "npm:^2.3.0"
+  peerDependencies:
+    react: 19.0.0
+  checksum: 10c0/c80736877d9b0d9498d76588a1e92664f467a84265aee7cb6be497b89ed4b1e1c9379c12686c4676bb438eb268ba97caae607b01686e3dc0d485933a4ab69af7
+  languageName: node
+  linkType: hard
+
 "@janhq/core@file:../../core/package.tgz::locator=%40janhq%2Fllamacpp-extension%40workspace%3Allamacpp-extension":
  version: 0.1.10
  resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=6c8a42&locator=%40janhq%2Fllamacpp-extension%40workspace%3Allamacpp-extension"
@@ -439,6 +451,22 @@ __metadata:
  languageName: unknown
  linkType: soft

+"@janhq/foundation-models-extension@workspace:foundation-models-extension":
+  version: 0.0.0-use.local
+  resolution: "@janhq/foundation-models-extension@workspace:foundation-models-extension"
+  dependencies:
+    "@janhq/core": ../../core/package.tgz
+    "@janhq/tauri-plugin-foundation-models-api": "link:../../src-tauri/plugins/tauri-plugin-foundation-models"
+    "@tauri-apps/api": "npm:2.8.0"
+    "@tauri-apps/plugin-http": "npm:2.5.0"
+    "@tauri-apps/plugin-log": "npm:^2.6.0"
+    cpx: "npm:1.5.0"
+    rimraf: "npm:3.0.2"
+    rolldown: "npm:1.0.0-beta.1"
+    typescript: "npm:5.9.2"
+  languageName: unknown
+  linkType: soft
+
 "@janhq/llamacpp-extension@workspace:llamacpp-extension":
  version: 0.0.0-use.local
  resolution: "@janhq/llamacpp-extension@workspace:llamacpp-extension"
@@ -493,6 +521,12 @@ __metadata:
  languageName: unknown
  linkType: soft

+"@janhq/tauri-plugin-foundation-models-api@link:../../src-tauri/plugins/tauri-plugin-foundation-models::locator=%40janhq%2Ffoundation-models-extension%40workspace%3Afoundation-models-extension":
+  version: 0.0.0-use.local
+  resolution: "@janhq/tauri-plugin-foundation-models-api@link:../../src-tauri/plugins/tauri-plugin-foundation-models::locator=%40janhq%2Ffoundation-models-extension%40workspace%3Afoundation-models-extension"
+  languageName: node
+  linkType: soft
+
 "@janhq/tauri-plugin-hardware-api@link:../../src-tauri/plugins/tauri-plugin-hardware::locator=%40janhq%2Fllamacpp-extension%40workspace%3Allamacpp-extension":
  version: 0.0.0-use.local
  resolution: "@janhq/tauri-plugin-hardware-api@link:../../src-tauri/plugins/tauri-plugin-hardware::locator=%40janhq%2Fllamacpp-extension%40workspace%3Allamacpp-extension"
--- a/src-tauri/plugins/tauri-plugin-foundation-models/swift-server/Package.swift
+++ b/src-tauri/plugins/tauri-plugin-foundation-models/swift-server/Package.swift
@@ -0,0 +1,30 @@
+// swift-tools-version: 6.2
+
+import PackageDescription
+
+let package = Package(
+    name: "foundation-models-server",
+    platforms: [
+        .macOS(.v26)
+    ],
+    products: [
+        .executable(name: "foundation-models-server", targets: ["FoundationModelsServer"])
+    ],
+    dependencies: [
+        .package(url: "https://github.com/apple/swift-argument-parser", from: "1.7.0"),
+        .package(url: "https://github.com/hummingbird-project/hummingbird", from: "2.19.0"),
+    ],
+    targets: [
+        .executableTarget(
+            name: "FoundationModelsServer",
+            dependencies: [
+                .product(name: "ArgumentParser", package: "swift-argument-parser"),
+                .product(name: "Hummingbird", package: "hummingbird"),
+            ],
+            path: "Sources/FoundationModelsServer",
+            swiftSettings: [
+                .swiftLanguageMode(.v6)
+            ]
+        )
+    ]
+)
--- a/src-tauri/plugins/tauri-plugin-foundation-models/swift-server/README.md
+++ b/src-tauri/plugins/tauri-plugin-foundation-models/swift-server/README.md
@@ -0,0 +1,40 @@
+# foundation-models-server
+
+A lightweight OpenAI-compatible HTTP server that wraps Apple's Foundation Models framework, enabling Jan to use on-device Apple Intelligence models on macOS 26+.
+
+## Requirements
+
+- macOS 26 (Tahoe) or later
+- Apple Silicon Mac with Apple Intelligence enabled
+- Xcode 26 or later
+
+## Building
+
+```bash
+swift build -c release
+```
+
+The binary will be at `.build/release/foundation-models-server`.
+
+## Usage
+
+```bash
+# Check availability
+foundation-models-server --check
+
+# Start server on default port
+foundation-models-server --port 8080
+
+# Start server with API key
+foundation-models-server --port 8080 --api-key <key>
+```
+
+## API
+
+The server exposes an OpenAI-compatible API:
+
+- `GET /health` — health check
+- `GET /v1/models` — lists the `apple/on-device` model
+- `POST /v1/chat/completions` — chat completions (streaming and non-streaming)
+
+The model ID is always `apple/on-device`.
--- a/src-tauri/plugins/tauri-plugin-foundation-models/swift-server/Sources/FoundationModelsServer/FoundationModelsServerCommand.swift
+++ b/src-tauri/plugins/tauri-plugin-foundation-models/swift-server/Sources/FoundationModelsServer/FoundationModelsServerCommand.swift
@@ -0,0 +1,78 @@
+import ArgumentParser
+import Foundation
+import Hummingbird
+import FoundationModels
+
+@main
+struct FoundationModelsServerCommand: AsyncParsableCommand {
+    static let configuration = CommandConfiguration(
+        commandName: "foundation-models-server",
+        abstract: "Apple Foundation Models inference server with OpenAI-compatible API"
+    )
+
+    @Option(name: .long, help: "Port to listen on")
+    var port: Int = 8080
+
+    @Option(name: .long, help: "API key for authentication (optional)")
+    var apiKey: String = ""
+
+    @Flag(name: .long, help: "Check availability and exit with status 0 if available")
+    var check: Bool = false
+
+    func run() async throws {
+        let availability = SystemLanguageModel.default.availability
+
+        // In --check mode, always print a machine-readable status token and exit 0.
+        // Callers (e.g. the Tauri plugin) parse this string to decide visibility.
+        if check {
+            switch availability {
+            case .available:
+                print("available")
+            case .unavailable(.deviceNotEligible):
+                print("notEligible")
+            case .unavailable(.appleIntelligenceNotEnabled):
+                print("appleIntelligenceNotEnabled")
+            case .unavailable(.modelNotReady):
+                print("modelNotReady")
+            default:
+                print("unavailable")
+            }
+            return
+        }
+
+        guard case .available = availability else {
+            let reason: String
+            switch availability {
+            case .unavailable(.deviceNotEligible):
+                reason = "Device is not eligible for Apple Intelligence"
+            case .unavailable(.appleIntelligenceNotEnabled):
+                reason = "Apple Intelligence is not enabled in System Settings"
+            case .unavailable(.modelNotReady):
+                reason = "Foundation model is downloading or not yet ready"
+            default:
+                reason = "Foundation model is unavailable on this system"
+            }
+            fputs("[foundation-models] ERROR: \(reason)\n", stderr)
+            throw ExitCode(1)
+        }
+
+        log("[foundation-models] Foundation Models Server starting...")
+        log("[foundation-models] Port: \(port)")
+
+        let server = FoundationModelsHTTPServer(
+            modelId: "apple/on-device",
+            apiKey: apiKey
+        )
+
+        let router = server.buildRouter()
+        let app = Application(
+            router: router,
+            configuration: .init(address: .hostname("127.0.0.1", port: port))
+        )
+
+        log("[foundation-models] http server listening on http://127.0.0.1:\(port)")
+        log("[foundation-models] server is listening on 127.0.0.1:\(port)")
+
+        try await app.run()
+    }
+}
--- a/src-tauri/plugins/tauri-plugin-foundation-models/swift-server/Sources/FoundationModelsServer/Logger.swift
+++ b/src-tauri/plugins/tauri-plugin-foundation-models/swift-server/Sources/FoundationModelsServer/Logger.swift
@@ -0,0 +1,6 @@
+import Foundation
+
+func log(_ message: String) {
+    print(message)
+    fflush(stdout)
+}
--- a/src-tauri/plugins/tauri-plugin-foundation-models/swift-server/Sources/FoundationModelsServer/Server.swift
+++ b/src-tauri/plugins/tauri-plugin-foundation-models/swift-server/Sources/FoundationModelsServer/Server.swift
@@ -0,0 +1,299 @@
+import Foundation
+import Hummingbird
+import FoundationModels
+
+/// HTTP server exposing an OpenAI-compatible API backed by Apple Foundation Models
+struct FoundationModelsHTTPServer: Sendable {
+    let modelId: String
+    let apiKey: String
+
+    func buildRouter() -> Router<BasicRequestContext> {
+        let router = Router()
+
+        // Health check
+        router.get("/health") { _, _ in
+            let response = HealthResponse(status: "ok")
+            return try encodeJSONResponse(response)
+        }
+
+        // List available models
+        router.get("/v1/models") { _, _ in
+            let response = ModelsListResponse(
+                object: "list",
+                data: [
+                    ModelData(
+                        id: self.modelId,
+                        object: "model",
+                        created: currentTimestamp(),
+                        owned_by: "apple"
+                    )
+                ]
+            )
+            return try encodeJSONResponse(response)
+        }
+
+        // Chat completions (OpenAI-compatible)
+        router.post("/v1/chat/completions") { request, _ in
+            // Validate API key when configured
+            if !self.apiKey.isEmpty {
+                let authHeader = request.headers[.authorization]
+                guard authHeader == "Bearer \(self.apiKey)" else {
+                    let errorResp = ErrorResponse(
+                        error: ErrorDetail(
+                            message: "Unauthorized: invalid or missing API key",
+                            type: "authentication_error",
+                            code: "unauthorized"
+                        )
+                    )
+                    return try Response(
+                        status: .unauthorized,
+                        headers: [.contentType: "application/json"],
+                        body: .init(byteBuffer: encodeJSONBuffer(errorResp))
+                    )
+                }
+            }
+
+            let body = try await request.body.collect(upTo: 10 * 1024 * 1024)
+            let chatRequest: ChatCompletionRequest
+            do {
+                chatRequest = try JSONDecoder().decode(ChatCompletionRequest.self, from: body)
+            } catch {
+                let errorResp = ErrorResponse(
+                    error: ErrorDetail(
+                        message: "Invalid request body: \(error.localizedDescription)",
+                        type: "invalid_request_error",
+                        code: nil
+                    )
+                )
+                return try Response(
+                    status: .badRequest,
+                    headers: [.contentType: "application/json"],
+                    body: .init(byteBuffer: encodeJSONBuffer(errorResp))
+                )
+            }
+            let isStreaming = chatRequest.stream ?? false
+
+            log("[foundation-models] Request: messages=\(chatRequest.messages.count), stream=\(isStreaming)")
+
+            if isStreaming {
+                return try await self.handleStreamingRequest(chatRequest)
+            } else {
+                return try await self.handleNonStreamingRequest(chatRequest)
+            }
+        }
+
+        return router
+    }
+
+    // MARK: - Non-streaming
+
+    private func handleNonStreamingRequest(_ chatRequest: ChatCompletionRequest) async throws -> Response {
+        let session = buildSession(from: chatRequest.messages)
+        let lastUserMessage = extractLastUserMessage(from: chatRequest.messages)
+
+        let response = try await session.respond(to: lastUserMessage)
+        let content = response.content
+
+        let completionResponse = ChatCompletionResponse(
+            id: "chatcmpl-\(UUID().uuidString)",
+            object: "chat.completion",
+            created: currentTimestamp(),
+            model: modelId,
+            choices: [
+                ChatCompletionChoice(
+                    index: 0,
+                    message: ChatResponseMessage(role: "assistant", content: content),
+                    finish_reason: "stop"
+                )
+            ],
+            usage: UsageInfo(
+                prompt_tokens: 0,
+                completion_tokens: 0,
+                total_tokens: 0
+            )
+        )
+
+        return try encodeJSONResponse(completionResponse)
+    }
+
+    // MARK: - Streaming
+
+    private func handleStreamingRequest(_ chatRequest: ChatCompletionRequest) async throws -> Response {
+        let requestId = "chatcmpl-\(UUID().uuidString)"
+        let created = currentTimestamp()
+        let modelId = self.modelId
+        let messages = chatRequest.messages
+
+        let (stream, continuation) = AsyncStream<ByteBuffer>.makeStream()
+
+        let task = Task { [self] in
+            do {
+                let session = self.buildSession(from: messages)
+                let lastUserMessage = self.extractLastUserMessage(from: messages)
+
+                let roleDelta = ChatCompletionChunk(
+                    id: requestId,
+                    object: "chat.completion.chunk",
+                    created: created,
+                    model: modelId,
+                    choices: [
+                        ChunkChoice(
+                            index: 0,
+                            delta: DeltaContent(role: "assistant", content: nil),
+                            finish_reason: nil
+                        )
+                    ]
+                )
+                if let buffer = encodeSSEBuffer(roleDelta) {
+                    continuation.yield(buffer)
+                }
+
+                var previousText = ""
+                for try await snapshot in session.streamResponse(to: lastUserMessage) {
+                    let currentText = snapshot.content
+                    let delta = String(currentText.dropFirst(previousText.count))
+                    previousText = currentText
+
+                    if delta.isEmpty { continue }
+
+                    let chunk = ChatCompletionChunk(
+                        id: requestId,
+                        object: "chat.completion.chunk",
+                        created: created,
+                        model: modelId,
+                        choices: [
+                            ChunkChoice(
+                                index: 0,
+                                delta: DeltaContent(role: nil, content: delta),
+                                finish_reason: nil
+                            )
+                        ]
+                    )
+                    if let buffer = encodeSSEBuffer(chunk) {
+                        continuation.yield(buffer)
+                    }
+                }
+
+                // Send stop chunk
+                let stopChunk = ChatCompletionChunk(
+                    id: requestId,
+                    object: "chat.completion.chunk",
+                    created: created,
+                    model: modelId,
+                    choices: [
+                        ChunkChoice(
+                            index: 0,
+                            delta: DeltaContent(role: nil, content: nil),
+                            finish_reason: "stop"
+                        )
+                    ]
+                )
+                if let buffer = encodeSSEBuffer(stopChunk) {
+                    continuation.yield(buffer)
+                }
+
+                // SSE terminator
+                var doneBuffer = ByteBufferAllocator().buffer(capacity: 16)
+                doneBuffer.writeString("data: [DONE]\n\n")
+                continuation.yield(doneBuffer)
+            } catch {
+                log("[foundation-models] Streaming error: \(error.localizedDescription)")
+                var errBuffer = ByteBufferAllocator().buffer(capacity: 256)
+                errBuffer.writeString("error: {\"message\":\"\(error.localizedDescription)\"}\n\n")
+                continuation.yield(errBuffer)
+            }
+            continuation.finish()
+        }
+
+        // Cancel the generation task when the client disconnects
+        continuation.onTermination = { @Sendable _ in
+            log("[foundation-models] SSE continuation terminated by client disconnect")
+            task.cancel()
+        }
+
+        return Response(
+            status: .ok,
+            headers: [
+                .contentType: "text/event-stream",
+                .cacheControl: "no-cache",
+                .init("X-Accel-Buffering")!: "no"
+            ],
+            body: .init(asyncSequence: stream)
+        )
+    }
+
+    // MARK: - Session Construction
+
+    /// Build a `LanguageModelSession` from the OpenAI message list.
+    ///
+    /// System messages become the session instructions.
+    /// Prior user/assistant turns are serialised into the instructions block so
+    /// the model has full conversation context without re-running inference.
+    /// (The Foundation Models `Transcript` API is not used for history injection
+    /// because it is designed for observing an already-live session's state, not
+    /// for priming a fresh one with arbitrary history.)
+    private func buildSession(from messages: [ChatMessage]) -> LanguageModelSession {
+        let systemContent = messages.first(where: { $0.role == "system" })?.content ?? ""
+        let nonSystem = messages.filter { $0.role != "system" }
+        let history = nonSystem.dropLast()  // all turns except the last user message
+
+        var instructionsText: String
+        if systemContent.isEmpty {
+            instructionsText = "You are a helpful assistant."
+        } else {
+            instructionsText = systemContent
+        }
+
+        // Append prior turns so the model understands conversation context
+        if !history.isEmpty {
+            instructionsText += "\n\n[Previous conversation]\n"
+            for msg in history {
+                let label = msg.role == "assistant" ? "Assistant" : "User"
+                instructionsText += "\(label): \(msg.content ?? "")\n"
+            }
+            instructionsText += "[End of previous conversation]"
+        }
+
+        return LanguageModelSession(instructions: instructionsText)
+    }
+
+    private func extractLastUserMessage(from messages: [ChatMessage]) -> String {
+        let nonSystem = messages.filter { $0.role != "system" }
+        return nonSystem.last?.content ?? ""
+    }
+}
+
+// MARK: - Helpers
+
+private func currentTimestamp() -> Int {
+    Int(Date().timeIntervalSince1970)
+}
+
+private func encodeJSONResponse<T: Encodable>(_ value: T) throws -> Response {
+    let data = try JSONEncoder().encode(value)
+    var buffer = ByteBufferAllocator().buffer(capacity: data.count)
+    buffer.writeBytes(data)
+    return Response(
+        status: .ok,
+        headers: [.contentType: "application/json"],
+        body: .init(byteBuffer: buffer)
+    )
+}
+
+private func encodeJSONBuffer<T: Encodable>(_ value: T) -> ByteBuffer {
+    let data = (try? JSONEncoder().encode(value)) ?? Data()
+    var buffer = ByteBufferAllocator().buffer(capacity: data.count)
+    buffer.writeBytes(data)
+    return buffer
+}
+
+private func encodeSSEBuffer<T: Encodable>(_ value: T) -> ByteBuffer? {
+    guard let json = try? JSONEncoder().encode(value),
+          let jsonString = String(data: json, encoding: .utf8) else {
+        return nil
+    }
+    let line = "data: \(jsonString)\n\n"
+    var buffer = ByteBufferAllocator().buffer(capacity: line.utf8.count)
+    buffer.writeString(line)
+    return buffer
+}
--- a/src-tauri/plugins/tauri-plugin-foundation-models/swift-server/Sources/FoundationModelsServer/Types.swift
+++ b/src-tauri/plugins/tauri-plugin-foundation-models/swift-server/Sources/FoundationModelsServer/Types.swift
@@ -0,0 +1,98 @@
+import Foundation
+
+// MARK: - OpenAI Request Types
+
+struct ChatCompletionRequest: Codable, Sendable {
+    let model: String
+    let messages: [ChatMessage]
+    var temperature: Double?
+    var top_p: Double?
+    var max_tokens: Int?
+    var n_predict: Int?
+    var stream: Bool?
+    var stop: [String]?
+}
+
+struct ChatMessage: Codable, Sendable {
+    let role: String
+    let content: String?
+}
+
+// MARK: - OpenAI Response Types
+
+struct ChatCompletionResponse: Codable, Sendable {
+    let id: String
+    let object: String
+    let created: Int
+    let model: String
+    let choices: [ChatCompletionChoice]
+    let usage: UsageInfo
+}
+
+struct ChatCompletionChoice: Codable, Sendable {
+    let index: Int
+    let message: ChatResponseMessage
+    let finish_reason: String
+}
+
+struct ChatResponseMessage: Codable, Sendable {
+    let role: String
+    let content: String
+}
+
+struct UsageInfo: Codable, Sendable {
+    let prompt_tokens: Int
+    let completion_tokens: Int
+    let total_tokens: Int
+}
+
+// MARK: - Streaming Types
+
+struct ChatCompletionChunk: Codable, Sendable {
+    let id: String
+    let object: String
+    let created: Int
+    let model: String
+    let choices: [ChunkChoice]
+}
+
+struct ChunkChoice: Codable, Sendable {
+    let index: Int
+    let delta: DeltaContent
+    let finish_reason: String?
+}
+
+struct DeltaContent: Codable, Sendable {
+    let role: String?
+    let content: String?
+}
+
+// MARK: - Model List Types
+
+struct ModelsListResponse: Codable, Sendable {
+    let object: String
+    let data: [ModelData]
+}
+
+struct ModelData: Codable, Sendable {
+    let id: String
+    let object: String
+    let created: Int
+    let owned_by: String
+}
+
+// MARK: - Health / Error Types
+
+struct HealthResponse: Codable, Sendable {
+    let status: String
+}
+
+struct ErrorDetail: Codable, Sendable {
+    let message: String
+    let type: String
+    let code: String?
+}
+
+struct ErrorResponse: Codable, Sendable {
+    let error: ErrorDetail
+}
--- a/web-app/src/components/ai-elements/tool.tsx
+++ b/web-app/src/components/ai-elements/tool.tsx
@@ -108,7 +108,7 @@ export const ToolHeader = memo(
    return (
      <CollapsibleTrigger
        className={cn(
-          'flex w-full items-center gap-2 text-muted-foreground text-sm transition-colors capitalize',
+          'cursor-pointer flex w-full items-center gap-2 text-muted-foreground text-sm transition-colors capitalize', !isOpen && 'hover:bg-secondary',
          className
        )}
      >
--- a/web-app/src/constants/localStorage.ts
+++ b/web-app/src/constants/localStorage.ts
@@ -27,6 +27,7 @@ export const localStorageKey = {
  recentSearches: 'recent-searches',
  janModelPromptDismissed: 'jan-model-prompt-dismissed',
  agentMode: 'agent-mode',
+  latestJanModel: 'latest-jan-model',
 }

 export const CACHE_EXPIRY_MS = 1000 * 60 * 60 * 24
--- a/web-app/src/constants/models.ts
+++ b/web-app/src/constants/models.ts
@@ -2,7 +2,6 @@
 * Model-related constants
 */

-export const NEW_JAN_MODEL_HF_REPO = 'janhq/Jan-v3-4B-base-instruct-GGUF'
 export const JAN_CODE_HF_REPO = 'janhq/Jan-Code-4b-Gguf'
 export const DEFAULT_MODEL_QUANTIZATIONS = ['iq4_xs', 'q4_k_m']

--- a/web-app/src/containers/PromptJanModel.tsx
+++ b/web-app/src/containers/PromptJanModel.tsx
@@ -3,12 +3,9 @@ import { useJanModelPromptDismissed } from '@/hooks/useJanModelPrompt'
 import { useServiceHub } from '@/hooks/useServiceHub'
 import { useDownloadStore } from '@/hooks/useDownloadStore'
 import { useGeneralSetting } from '@/hooks/useGeneralSetting'
-import { useEffect, useState, useMemo, useCallback, useRef } from 'react'
-import type { CatalogModel } from '@/services/models/types'
-import {
-  NEW_JAN_MODEL_HF_REPO,
-  SETUP_SCREEN_QUANTIZATIONS,
-} from '@/constants/models'
+import { useMemo } from 'react'
+import { SETUP_SCREEN_QUANTIZATIONS } from '@/constants/models'
+import { useLatestJanModel } from '@/hooks/useLatestJanModel'

 export function PromptJanModel() {

@@ -18,35 +15,7 @@ export function PromptJanModel() {
    useDownloadStore()
  const huggingfaceToken = useGeneralSetting((state) => state.huggingfaceToken)

-  const [janNewModel, setJanNewModel] = useState<CatalogModel | null>(null)
-  const [isLoading, setIsLoading] = useState(true)
-  const fetchAttempted = useRef(false)
-
-  const fetchJanModel = useCallback(async () => {
-    if (fetchAttempted.current) return
-    fetchAttempted.current = true
-
-    try {
-      const repo = await serviceHub
-        .models()
-        .fetchHuggingFaceRepo(NEW_JAN_MODEL_HF_REPO, huggingfaceToken)
-
-      if (repo) {
-        const catalogModel = serviceHub
-          .models()
-          .convertHfRepoToCatalogModel(repo)
-        setJanNewModel(catalogModel)
-      }
-    } catch (error) {
-      console.error('Error fetching Jan Model:', error)
-    } finally {
-      setIsLoading(false)
-    }
-  }, [serviceHub, huggingfaceToken])
-
-  useEffect(() => {
-    fetchJanModel()
-  }, [fetchJanModel])
+  const { model: janNewModel, loading: isLoading } = useLatestJanModel()

  const defaultVariant = useMemo(() => {
    if (!janNewModel) return null
@@ -91,14 +60,14 @@ export function PromptJanModel() {
    setDismissed(true)
  }

-  if (isLoading) return null
+  if (isLoading || !janNewModel) return null

  return (
    <div className="fixed bottom-4 right-4 z-50 p-4 shadow-lg bg-background w-4/5 md:w-100 border rounded-lg">
      <div className="flex items-center gap-2">
        <img src="/images/jan-logo.png" alt="Jan" className="size-5" />
        <h2 className="font-medium">
-          Jan v3 Model
+          {janNewModel?.display_name ?? janNewModel?.model_name ?? 'Jan Model'}
          {defaultVariant && (
          <span className="text-muted-foreground">
            {' '}
@@ -108,7 +77,7 @@ export function PromptJanModel() {
        </h2>
      </div>
      <p className="mt-2 text-sm text-muted-foreground">
-        Get started with Jan v3, our recommended local AI model optimized for your device.
+        Get started with {janNewModel?.display_name ?? 'Jan'}, our recommended local AI model optimized for your device.
      </p>
      <div className="mt-4 flex justify-end space-x-2">
        <Button
--- a/web-app/src/containers/SetupScreen.tsx
+++ b/web-app/src/containers/SetupScreen.tsx
@@ -7,11 +7,8 @@ import { useDownloadStore } from '@/hooks/useDownloadStore'
 import { useServiceHub } from '@/hooks/useServiceHub'
 import { useEffect, useMemo, useCallback, useState, useRef } from 'react'
 import { AppEvent, events } from '@janhq/core'
-import type { CatalogModel } from '@/services/models/types'
-import {
-  NEW_JAN_MODEL_HF_REPO,
-  SETUP_SCREEN_QUANTIZATIONS,
-} from '@/constants/models'
+import { SETUP_SCREEN_QUANTIZATIONS } from '@/constants/models'
+import { useLatestJanModel } from '@/hooks/useLatestJanModel'
 import { toast } from 'sonner'
 import { Button } from '@/components/ui/button'
 import { IconEye, IconSquareCheck } from '@tabler/icons-react'
@@ -92,37 +89,19 @@ function SetupScreen() {
  const llamaProvider = getProviderByName('llamacpp')
  const [quickStartInitiated, setQuickStartInitiated] = useState(false)
  const [quickStartQueued, setQuickStartQueued] = useState(false)
-  const [janNewModel, setJanNewModel] = useState<CatalogModel | null>(null)
+  const {
+    model: janNewModel,
+    error: metadataFetchFailed,
+    fetchLatestJanModel,
+  } = useLatestJanModel()
  const [supportedVariants, setSupportedVariants] = useState<
    Map<string, 'RED' | 'YELLOW' | 'GREEN' | 'GREY'>
  >(new Map())
-  const [metadataFetchFailed, setMetadataFetchFailed] = useState(false)
  const supportCheckInProgress = useRef(false)
  const checkedModelId = useRef<string | null>(null)
  const [isSupportCheckComplete, setIsSupportCheckComplete] = useState(false)
  const huggingfaceToken = useGeneralSetting((state) => state.huggingfaceToken)

-  const fetchJanModel = useCallback(async () => {
-    setMetadataFetchFailed(false)
-    try {
-      const repo = await serviceHub
-        .models()
-        .fetchHuggingFaceRepo(NEW_JAN_MODEL_HF_REPO, huggingfaceToken)
-
-      if (repo) {
-        const catalogModel = serviceHub
-          .models()
-          .convertHfRepoToCatalogModel(repo)
-        setJanNewModel(catalogModel)
-      } else {
-        setMetadataFetchFailed(true)
-      }
-    } catch (error) {
-      console.error('Error fetching Jan Model V2:', error)
-      setMetadataFetchFailed(true)
-    }
-  }, [serviceHub, huggingfaceToken])
-
  // Check model support for variants when janNewModel is available
  useEffect(() => {
    const checkModelSupport = async () => {
@@ -188,8 +167,8 @@ function SetupScreen() {
  }, [janNewModel, serviceHub])

  useEffect(() => {
-    fetchJanModel()
-  }, [fetchJanModel])
+    fetchLatestJanModel(true)
+  }, [fetchLatestJanModel])

  const defaultVariant = useMemo(() => {
    if (!janNewModel) return null
@@ -459,7 +438,7 @@ function SetupScreen() {
                    <div className="flex flex-col w-full h-full justify-center">
                      <div className="flex flex-1 items-center justify-between">
                        <h1 className="font-semibold text-sm mb-1">
-                          <span>Jan v3</span>&nbsp;<span className='text-xs text-muted-foreground'>· {defaultVariant?.file_size}</span>
+                          <span>{janNewModel?.display_name ?? janNewModel?.model_name ?? 'Jan Model'}</span>&nbsp;<span className='text-xs text-muted-foreground'>· {defaultVariant?.file_size}</span>
                        </h1>
                        {(isDownloading) && (
                          <div className="flex items-center gap-1.5 text-xs text-muted-foreground">
--- a/web-app/src/hooks/useJanModelPrompt.ts
+++ b/web-app/src/hooks/useJanModelPrompt.ts
@@ -3,6 +3,7 @@ import { create } from 'zustand'
 import { createJSONStorage, persist } from 'zustand/middleware'
 import { useModelProvider } from './useModelProvider'
 import { useDownloadStore } from './useDownloadStore'
+import { useLatestJanModel } from './useLatestJanModel'
 import { predefinedProviders } from '@/constants/providers'

 export type JanModelPromptDismissedState = {
@@ -30,6 +31,7 @@ export const useJanModelPrompt = () => {
  const { dismissed, setDismissed } = useJanModelPromptDismissed()
  const { getProviderByName, providers } = useModelProvider()
  const { localDownloadingModels } = useDownloadStore()
+  const latestModel = useLatestJanModel((state) => state.model)

  const llamaProvider = getProviderByName('llamacpp')
  const setupCompleted =
@@ -54,26 +56,31 @@ export const useJanModelPrompt = () => {
  })
  const isOnSetupScreen = !hasValidProviders

-  // Check if any Jan v3 variant is downloaded
-  const isJanModelDownloaded =
-    llamaProvider?.models.some(
-      (m: { id: string }) =>
-        m.id.toLowerCase().includes('jan-v3') ||
-        m.id.toLowerCase().includes('jan_v3')
-    ) ?? false
-
-  // Check if currently downloading
-  const isDownloading = Array.from(localDownloadingModels).some(
-    (id) =>
-      id.toLowerCase().includes('jan-v3') ||
-      id.toLowerCase().includes('jan_v3')
+  // Build set of known quant model IDs from the latest Jan model
+  const latestModelQuantIds = new Set(
+    latestModel?.quants?.map((q) => q.model_id.toLowerCase()) ?? []
  )

+  // Check if any variant of the latest Jan model is downloaded
+  const isJanModelDownloaded =
+    latestModelQuantIds.size > 0 &&
+    (llamaProvider?.models.some(
+      (m: { id: string }) => latestModelQuantIds.has(m.id.toLowerCase())
+    ) ?? false)
+
+  // Check if currently downloading any variant
+  const isDownloading =
+    latestModelQuantIds.size > 0 &&
+    Array.from(localDownloadingModels).some(
+      (id) => latestModelQuantIds.has(id.toLowerCase())
+    )
+
  const showJanModelPrompt =
    isTargetVersion &&
    !isOnSetupScreen &&
    !setupCompleted &&
    !dismissed &&
+    latestModel != null &&
    !isJanModelDownloaded &&
    !isDownloading

--- a/web-app/src/hooks/useLatestJanModel.ts
+++ b/web-app/src/hooks/useLatestJanModel.ts
@@ -0,0 +1,65 @@
+import { create } from 'zustand'
+import { persist, createJSONStorage } from 'zustand/middleware'
+import { localStorageKey, CACHE_EXPIRY_MS } from '@/constants/localStorage'
+import { getServiceHub } from '@/hooks/useServiceHub'
+import type { CatalogModel } from '@/services/models/types'
+
+type LatestJanModelState = {
+  model: CatalogModel | null
+  lastFetchedAt: number | null
+  loading: boolean
+  error: boolean
+  fetchLatestJanModel: (force?: boolean) => Promise<void>
+}
+
+export const useLatestJanModel = create<LatestJanModelState>()(
+  persist(
+    (set, get) => ({
+      model: null,
+      lastFetchedAt: null,
+      loading: false,
+      error: false,
+      fetchLatestJanModel: async (force = false) => {
+        const { lastFetchedAt, loading } = get()
+
+        if (loading) return
+
+        if (
+          !force &&
+          lastFetchedAt &&
+          Date.now() - lastFetchedAt < CACHE_EXPIRY_MS
+        ) {
+          return
+        }
+
+        set({ loading: true, error: false })
+
+        try {
+          const result = await getServiceHub()
+            .models()
+            .fetchLatestJanModel()
+
+          if (result) {
+            set({
+              model: result,
+              lastFetchedAt: Date.now(),
+              loading: false,
+            })
+          } else {
+            set({ error: true, loading: false })
+          }
+        } catch {
+          set({ error: true, loading: false })
+        }
+      },
+    }),
+    {
+      name: localStorageKey.latestJanModel,
+      storage: createJSONStorage(() => localStorage),
+      partialize: (state) => ({
+        model: state.model,
+        lastFetchedAt: state.lastFetchedAt,
+      }),
+    }
+  )
+)
--- a/web-app/src/hooks/useLocalApiServer.ts
+++ b/web-app/src/hooks/useLocalApiServer.ts
@@ -51,7 +51,7 @@ export const useLocalApiServer = create<LocalApiServerState>()(
        set({ defaultModelLocalApiServer: model }),
      lastServerModels: [],
      setLastServerModels: (models) => set({ lastServerModels: models }),
-      serverHost: '0.0.0.0',
+      serverHost: '127.0.0.1',
      setServerHost: (value) => set({ serverHost: value }),
      // Use port 0 (auto-assign) for mobile to avoid conflicts, 1337 for desktop
      serverPort: (typeof window !== 'undefined' && (window as { IS_ANDROID?: boolean }).IS_ANDROID) || (typeof window !== 'undefined' && (window as { IS_IOS?: boolean }).IS_IOS) ? 0 : 1337,
--- a/web-app/src/routes/__root.tsx
+++ b/web-app/src/routes/__root.tsx
@@ -15,6 +15,7 @@ import { useAnalytic } from '@/hooks/useAnalytic'
 import { PromptAnalytic } from '@/containers/analytics/PromptAnalytic'
 import { useJanModelPrompt } from '@/hooks/useJanModelPrompt'
 import { PromptJanModel } from '@/containers/PromptJanModel'
+import { useLatestJanModel } from '@/hooks/useLatestJanModel'
 import { AnalyticProvider } from '@/providers/AnalyticProvider'
 import { useLeftPanel } from '@/hooks/useLeftPanel'
 import ToolApproval from '@/containers/dialogs/ToolApproval'
@@ -43,6 +44,13 @@ const AppLayout = () => {
    width: sidebarWidth,
    setLeftPanelWidth,
  } = useLeftPanel()
+  const fetchLatestJanModel = useLatestJanModel(
+    (state) => state.fetchLatestJanModel
+  )
+
+  useEffect(() => {
+    fetchLatestJanModel()
+  }, [fetchLatestJanModel])

  return (
    <div className="bg-neutral-50 dark:bg-background size-full relative">
--- a/web-app/src/routes/threads/$threadId.tsx
+++ b/web-app/src/routes/threads/$threadId.tsx
@@ -145,7 +145,9 @@ function ThreadDetail() {
  // context-limit hit, so the user sees it instead of a blank gap.
  const [pendingContinueMessage, setPendingContinueMessage] =
    useState<UIMessage | null>(null)
-  const [isAutoIncreasingContext, setIsAutoIncreasingContext] = useState(false)
+  const [autoIncreaseAttempts, setAutoIncreaseAttempts] = useState(0)
+  const MAX_AUTO_INCREASE_ATTEMPTS = 3
+  const isAutoIncreasingContext = autoIncreaseAttempts > 0 && autoIncreaseAttempts < MAX_AUTO_INCREASE_ATTEMPTS
  const [contextLimitError, setContextLimitError] = useState<Error | null>(null)

  // Refs so onFinish (captured in closure) always calls the latest callbacks
@@ -765,6 +767,9 @@ function ThreadDetail() {
    // Increase context length in steps: <8192 -> 8192 -> 32768 -> x1.5
    const currentCtxLen =
      (model.settings?.ctx_len?.controller_props?.value as number) ?? 8192
+    const maxCtxLen =
+      (model.settings?.ctx_len?.controller_props?.max as number) || 131072
+
    let newCtxLen: number
    if (currentCtxLen < 8192) {
      newCtxLen = 8192
@@ -774,6 +779,12 @@ function ThreadDetail() {
      newCtxLen = Math.round(currentCtxLen * 1.5)
    }

+    newCtxLen = Math.min(newCtxLen, maxCtxLen)
+    if (newCtxLen <= currentCtxLen) {
+      setContextLimitError(new Error(OUT_OF_CONTEXT_SIZE))
+      return
+    }
+
    const updatedModel = {
      ...model,
      settings: {
@@ -818,6 +829,7 @@ function ThreadDetail() {
  )
  useEffect(() => {
    if (!error || agentModeActive) return
+    if (autoIncreaseAttempts >= MAX_AUTO_INCREASE_ATTEMPTS) return
    const autoIncrease =
      selectedModel?.settings?.auto_increase_ctx_len?.controller_props?.value ??
      true
@@ -829,7 +841,7 @@ function ThreadDetail() {
          error.message?.toLowerCase().includes('limit'))) ||
      error.message === OUT_OF_CONTEXT_SIZE
    if (isContextError) {
-      setIsAutoIncreasingContext(true)
+      setAutoIncreaseAttempts((prev) => prev + 1)
      handleContextSizeIncrease()
    }
  }, [error]) // eslint-disable-line react-hooks/exhaustive-deps
@@ -838,8 +850,8 @@ function ThreadDetail() {
    if (status === 'streaming' || status === 'submitted') {
      setContextLimitError(null)
    }
-    if (isAutoIncreasingContext && (status === 'streaming' || status === 'error')) {
-      setIsAutoIncreasingContext(false)
+    if (status === 'streaming' && autoIncreaseAttempts > 0) {
+      setAutoIncreaseAttempts(0)
    }
    if (status === 'error' && pendingContinueMessage) {
      setPendingContinueMessage(null)
--- a/web-app/src/services/models/default.ts
+++ b/web-app/src/services/models/default.ts
@@ -60,6 +60,27 @@ export class DefaultModelsService implements ModelsService {
    }
  }

+  async fetchLatestJanModel(): Promise<CatalogModel | null> {
+    try {
+      const response = await fetch(LATEST_JAN_MODEL_URL)
+
+      if (!response.ok) {
+        console.error(
+          `Failed to fetch latest Jan model: ${response.status} ${response.statusText}`
+        )
+        return null
+      }
+
+      const data = await response.json()
+
+      const model: CatalogModel = Array.isArray(data) ? data[0] : data
+      return model ?? null
+    } catch (error) {
+      console.error('Error fetching latest Jan model:', error)
+      return null
+    }
+  }
+
  async fetchHuggingFaceRepo(
    repoId: string,
    hfToken?: string
--- a/web-app/src/services/models/types.ts
+++ b/web-app/src/services/models/types.ts
@@ -27,6 +27,7 @@ export interface SafetensorsFile {

 export interface CatalogModel {
  model_name: string
+  display_name?: string
  description: string
  library_name?: string
  developer?: string
@@ -105,6 +106,7 @@ export interface ModelsService {
  getModel(modelId: string): Promise<modelInfo | undefined>
  fetchModels(): Promise<modelInfo[]>
  fetchModelCatalog(): Promise<ModelCatalog>
+  fetchLatestJanModel(): Promise<CatalogModel | null>
  fetchHuggingFaceRepo(
    repoId: string,
    hfToken?: string
--- a/web-app/src/services/providers/tauri.ts
+++ b/web-app/src/services/providers/tauri.ts
@@ -45,8 +45,11 @@ export class TauriProvidersService extends DefaultProvidersService {
        }
      }).filter(Boolean)

+      // TODO: Re-enable foundation-models once migrated to apple-foundation-models crate
+      const hiddenProviders = new Set(['foundation-models'])
      const runtimeProviders: ModelProvider[] = []
      for (const [providerName, value] of EngineManager.instance().engines) {
+        if (hiddenProviders.has(providerName)) continue
        const models = await value.list() ?? [] 
        const provider: ModelProvider = {
          active: false,
--- a/web-app/src/types/global.d.ts
+++ b/web-app/src/types/global.d.ts
@@ -20,6 +20,7 @@ declare global {
  declare const POSTHOG_KEY: string
  declare const POSTHOG_HOST: string
  declare const MODEL_CATALOG_URL: string
+  declare const LATEST_JAN_MODEL_URL: string
  declare const AUTO_UPDATER_DISABLED: boolean
  declare const UPDATE_CHECK_INTERVAL_MS: number
  declare const GA_MEASUREMENT_ID: string
--- a/web-app/src/types/modelProviders.d.ts
+++ b/web-app/src/types/modelProviders.d.ts
@@ -8,6 +8,9 @@ type ControllerProps = {
  options?: Array<{ value: number | string; name: string }>
  input_actions?: string[]
  recommended?: string
+  min?: number
+  max?: number
+  step?: number
 }

 /**
--- a/web-app/vite.config.ts
+++ b/web-app/vite.config.ts
@@ -96,6 +96,9 @@ export default defineConfig(({ mode }) => {
      MODEL_CATALOG_URL: JSON.stringify(
        'https://raw.githubusercontent.com/janhq/model-catalog/main/model_catalog_v2.json'
      ),
+      LATEST_JAN_MODEL_URL: JSON.stringify(
+        'https://raw.githubusercontent.com/janhq/model-catalog/main/latest_jan_model.json'
+      ),
      AUTO_UPDATER_DISABLED: JSON.stringify(
        env.AUTO_UPDATER_DISABLED === 'true'
      ),