Feat/improve file attachments (#7080)

* embedding works for large files * attachment as inline * update tan stack router * attachment works with proper selection * fix test * wait for model to start before doing things * Token Count now counts inline * Revert "embedding works for large files" This reverts commit 85184860cde0729a7a795ea6b9caf2bf66754930. * refactor: add batch processing to embedTexts Implemented batch‑based embedding for both rag-extension and vector-db-extension. - Introduced a `batchSize` parameter with a sensible default. - Processed texts in chunks to avoid large single calls to the LlamaCPP embed API. - Mapped batch results to global indices and added per‑batch error handling. - Logged failures and re‑thrown errors with contextual information. This change improves memory usage, resilience to API timeouts, and overall scalability of the embedding pipeline. * Update web-app/src/locales/fr/common.json Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update web-app/src/locales/ru/common.json Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update web-app/src/locales/pt-BR/common.json Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fix lint * attachment works properly now * update padding --------- Co-authored-by: Akarshan <akarshan@menlo.ai> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-12-03 15:49:54 +07:00
parent 97876b55b5
commit c74bbbd9e9
35 changed files with 1740 additions and 492 deletions
--- a/core/src/browser/extensions/rag.ts
+++ b/core/src/browser/extensions/rag.ts
@@ -33,4 +33,9 @@ export abstract class RAGExtension extends BaseExtension {
  abstract callTool(toolName: string, args: Record<string, unknown>): Promise<MCPToolCallResult>

  abstract ingestAttachments(threadId: string, files: AttachmentInput[]): Promise<IngestAttachmentsResult>
+
+  /**
+   * Parse a document into plain text for inline ingestion or preprocessing.
+   */
+  abstract parseDocument(path: string, type?: string): Promise<string>
 }
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@@ -35,7 +35,12 @@ import {
  mapOldBackendToNew,
 } from './backend'
 import { invoke } from '@tauri-apps/api/core'
-import { getProxyConfig } from './util'
+import {
+  getProxyConfig,
+  buildEmbedBatches,
+  mergeEmbedResponses,
+  type EmbedBatchResult,
+} from './util'
 import { basename } from '@tauri-apps/api/path'
 import {
  loadLlamaModel,
@@ -2331,14 +2336,20 @@ export default class llamacpp_extension extends AIEngine {
      sInfo = await this.load('sentence-transformer-mini', undefined, true)
    }

-    const attemptRequest = async (session: SessionInfo) => {
+    const ubatchSize =
+      (this.config?.ubatch_size && this.config.ubatch_size > 0
+        ? this.config.ubatch_size
+        : 512) || 512
+    const batches = buildEmbedBatches(text, ubatchSize)
+
+    const attemptRequest = async (session: SessionInfo, batchInput: string[]) => {
      const baseUrl = `http://localhost:${session.port}/v1/embeddings`
      const headers = {
        'Content-Type': 'application/json',
        'Authorization': `Bearer ${session.api_key}`,
      }
      const body = JSON.stringify({
-        input: text,
+        input: batchInput,
        model: session.model_id,
        encoding_format: 'float',
      })
@@ -2350,26 +2361,38 @@ export default class llamacpp_extension extends AIEngine {
      return response
    }

-    // First try with the existing session (may have been started without --embedding previously)
-    let response = await attemptRequest(sInfo)
+    const sendBatch = async (batchInput: string[]) => {
+      let response = await attemptRequest(sInfo as SessionInfo, batchInput)

-    // If embeddings endpoint is not available (501), reload with embedding mode and retry once
-    if (response.status === 501) {
-      try {
-        await this.unload('sentence-transformer-mini')
-      } catch {}
-      sInfo = await this.load('sentence-transformer-mini', undefined, true)
-      response = await attemptRequest(sInfo)
+      // If embeddings endpoint is not available (501), reload with embedding mode and retry once
+      if (response.status === 501) {
+        try {
+          await this.unload('sentence-transformer-mini')
+        } catch {}
+        sInfo = await this.load('sentence-transformer-mini', undefined, true)
+        response = await attemptRequest(sInfo as SessionInfo, batchInput)
+      }
+
+      if (!response.ok) {
+        const errorData = await response.json().catch(() => null)
+        throw new Error(
+          `API request failed with status ${response.status}: ${JSON.stringify(errorData)}`
+        )
+      }
+      const responseData = (await response.json()) as EmbedBatchResult
+      return responseData
    }

-    if (!response.ok) {
-      const errorData = await response.json().catch(() => null)
-      throw new Error(
-        `API request failed with status ${response.status}: ${JSON.stringify(errorData)}`
-      )
+    const batchResults: Array<{ result: EmbedBatchResult; offset: number }> = []
+    for (const { batch, offset } of batches) {
+      const result = await sendBatch(batch)
+      batchResults.push({ result, offset })
    }
-    const responseData = await response.json()
-    return responseData as EmbeddingResponse
+
+    return mergeEmbedResponses(
+      (sInfo as SessionInfo).model_id,
+      batchResults
+    ) as EmbeddingResponse
  }

  /**
--- a/extensions/llamacpp-extension/src/util.ts
+++ b/extensions/llamacpp-extension/src/util.ts
@@ -106,3 +106,79 @@ export function getProxyConfig(): Record<
    throw error
  }
 }
+
+// --- Embedding batching helpers ---
+
+export type EmbedBatch = { batch: string[]; offset: number }
+export type EmbedUsage = { prompt_tokens?: number; total_tokens?: number }
+export type EmbedData = { embedding: number[]; index: number }
+
+export type EmbedBatchResult = {
+  data: EmbedData[]
+  usage?: EmbedUsage
+}
+
+export function estimateTokensFromText(text: string, charsPerToken = 3): number {
+  return Math.max(1, Math.ceil(text.length / Math.max(charsPerToken, 1)))
+}
+
+export function buildEmbedBatches(
+  inputs: string[],
+  ubatchSize: number,
+  charsPerToken = 3
+): EmbedBatch[] {
+  const batches: EmbedBatch[] = []
+  let current: string[] = []
+  let currentTokens = 0
+  let offset = 0
+
+  const push = () => {
+    if (current.length) {
+      batches.push({ batch: current, offset })
+      offset += current.length
+      current = []
+      currentTokens = 0
+    }
+  }
+
+  for (const text of inputs) {
+    const estTokens = estimateTokensFromText(text, charsPerToken)
+    if (!current.length && estTokens > ubatchSize) {
+      batches.push({ batch: [text], offset })
+      offset += 1
+      continue
+    }
+
+    if (currentTokens + estTokens > ubatchSize && current.length) {
+      push()
+    }
+
+    current.push(text)
+    currentTokens += estTokens
+  }
+
+  push()
+  return batches
+}
+
+export function mergeEmbedResponses(
+  model: string,
+  batchResults: Array<{ result: EmbedBatchResult; offset: number }>
+) {
+  const aggregated = {
+    model,
+    object: 'list',
+    usage: { prompt_tokens: 0, total_tokens: 0 },
+    data: [] as EmbedData[],
+  }
+
+  for (const { result, offset } of batchResults) {
+    aggregated.usage.prompt_tokens += result.usage?.prompt_tokens ?? 0
+    aggregated.usage.total_tokens += result.usage?.total_tokens ?? 0
+    for (const item of result.data || []) {
+      aggregated.data.push({ ...item, index: item.index + offset })
+    }
+  }
+
+  return aggregated
+}
--- a/extensions/rag-extension/settings.json
+++ b/extensions/rag-extension/settings.json
@@ -6,6 +6,28 @@
    "controllerType": "checkbox",
    "controllerProps": { "value": true }
  },
+  {
+    "key": "parse_mode",
+    "titleKey": "settings:attachments.parseMode",
+    "descriptionKey": "settings:attachments.parseModeDesc",
+    "controllerType": "dropdown",
+    "controllerProps": {
+      "value": "auto",
+      "options": [
+        { "name": "Auto", "value": "auto" },
+        { "name": "Include in chat", "value": "inline" },
+        { "name": "Ingest as embeddings", "value": "embeddings" },
+        { "name": "Ask every time", "value": "prompt" }
+      ]
+    }
+  },
+  {
+    "key": "auto_inline_context_ratio",
+    "titleKey": "settings:attachments.autoInlineThreshold",
+    "descriptionKey": "settings:attachments.autoInlineThresholdDesc",
+    "controllerType": "input",
+    "controllerProps": { "value": 0.75, "type": "number", "min": 0.05, "max": 1, "step": 0.05, "textAlign": "right" }
+  },
  {
    "key": "max_file_size_mb",
    "titleKey": "settings:attachments.maxFile",
@@ -28,14 +50,14 @@
    "controllerProps": { "value": 0.3, "type": "number", "min": 0, "max": 1, "step": 0.01, "textAlign": "right" }
  },
  {
-    "key": "chunk_size_tokens",
+    "key": "chunk_size_chars",
    "titleKey": "settings:attachments.chunkSize",
    "descriptionKey": "settings:attachments.chunkSizeDesc",
    "controllerType": "input",
    "controllerProps": { "value": 512, "type": "number", "min": 64, "max": 8192, "step": 64, "textAlign": "right" }
  },
  {
-    "key": "overlap_tokens",
+    "key": "overlap_chars",
    "titleKey": "settings:attachments.chunkOverlap",
    "descriptionKey": "settings:attachments.chunkOverlapDesc",
    "controllerType": "input",
--- a/extensions/rag-extension/src/index.ts
+++ b/extensions/rag-extension/src/index.ts
@@ -1,16 +1,19 @@
 import { RAGExtension, MCPTool, MCPToolCallResult, ExtensionTypeEnum, VectorDBExtension, type AttachmentInput, type SettingComponentProps, AIEngine, type AttachmentFileInfo } from '@janhq/core'
 import './env.d'
 import { getRAGTools, RETRIEVE, LIST_ATTACHMENTS, GET_CHUNKS } from './tools'
+import * as ragApi from '@janhq/tauri-plugin-rag-api'

 export default class RagExtension extends RAGExtension {
  private config = {
    enabled: true,
    retrievalLimit: 3,
    retrievalThreshold: 0.3,
-    chunkSizeTokens: 512,
-    overlapTokens: 64,
+    chunkSizeChars: 512,
+    overlapChars: 64,
    searchMode: 'auto' as 'auto' | 'ann' | 'linear',
    maxFileSizeMB: 20,
+    parseMode: 'auto' as 'auto' | 'inline' | 'embeddings' | 'prompt',
+    autoInlineContextRatio: 0.75,
  }

  async onLoad(): Promise<void> {
@@ -20,9 +23,19 @@ export default class RagExtension extends RAGExtension {
    this.config.maxFileSizeMB = await this.getSetting('max_file_size_mb', this.config.maxFileSizeMB)
    this.config.retrievalLimit = await this.getSetting('retrieval_limit', this.config.retrievalLimit)
    this.config.retrievalThreshold = await this.getSetting('retrieval_threshold', this.config.retrievalThreshold)
-    this.config.chunkSizeTokens = await this.getSetting('chunk_size_tokens', this.config.chunkSizeTokens)
-    this.config.overlapTokens = await this.getSetting('overlap_tokens', this.config.overlapTokens)
+    // Prefer char-based keys; fall back to legacy token keys for backward compatibility
+    this.config.chunkSizeChars =
+      (await this.getSetting('chunk_size_chars', this.config.chunkSizeChars)) ||
+      (await this.getSetting('chunk_size_tokens', this.config.chunkSizeChars))
+    this.config.overlapChars =
+      (await this.getSetting('overlap_chars', this.config.overlapChars)) ||
+      (await this.getSetting('overlap_tokens', this.config.overlapChars))
    this.config.searchMode = await this.getSetting('search_mode', this.config.searchMode)
+    this.config.parseMode = await this.getSetting('parse_mode', this.config.parseMode)
+    this.config.autoInlineContextRatio = await this.getSetting(
+      'auto_inline_context_ratio',
+      this.config.autoInlineContextRatio
+    )

    // Check ANN availability on load
    try {
@@ -234,8 +247,8 @@ export default class RagExtension extends RAGExtension {
    // Load settings
    const s = this.config
    const maxSize = (s?.enabled === false ? 0 : s?.maxFileSizeMB) || undefined
-    const chunkSize = s?.chunkSizeTokens as number | undefined
-    const chunkOverlap = s?.overlapTokens as number | undefined
+    const chunkSize = s?.chunkSizeChars as number | undefined
+    const chunkOverlap = s?.overlapChars as number | undefined

    let totalChunks = 0
    const processedFiles: AttachmentFileInfo[] = []
@@ -267,39 +280,53 @@ export default class RagExtension extends RAGExtension {
  }

  onSettingUpdate<T>(key: string, value: T): void {
-    switch (key) {
-      case 'enabled':
-        this.config.enabled = Boolean(value)
-        break
-      case 'max_file_size_mb':
-        this.config.maxFileSizeMB = Number(value)
-        break
-      case 'retrieval_limit':
-        this.config.retrievalLimit = Number(value)
-        break
-      case 'retrieval_threshold':
-        this.config.retrievalThreshold = Number(value)
-        break
-      case 'chunk_size_tokens':
-        this.config.chunkSizeTokens = Number(value)
-        break
-      case 'overlap_tokens':
-        this.config.overlapTokens = Number(value)
-        break
-      case 'search_mode':
-        this.config.searchMode = String(value) as 'auto' | 'ann' | 'linear'
-        break
-    }
+      switch (key) {
+        case 'enabled':
+          this.config.enabled = Boolean(value)
+          break
+        case 'max_file_size_mb':
+          this.config.maxFileSizeMB = Number(value)
+          break
+        case 'auto_inline_context_ratio':
+          this.config.autoInlineContextRatio = Number(value)
+          break
+        case 'retrieval_limit':
+          this.config.retrievalLimit = Number(value)
+          break
+        case 'retrieval_threshold':
+          this.config.retrievalThreshold = Number(value)
+          break
+        case 'chunk_size_chars':
+          this.config.chunkSizeChars = Number(value)
+          break
+        case 'overlap_chars':
+          this.config.overlapChars = Number(value)
+          break
+        case 'search_mode':
+          this.config.searchMode = String(value) as 'auto' | 'ann' | 'linear'
+          break
+        case 'parse_mode':
+          this.config.parseMode = String(value) as 'auto' | 'inline' | 'embeddings' | 'prompt'
+          break
+      }
+  }
+
+  async parseDocument(path: string, type?: string): Promise<string> {
+    return await ragApi.parseDocument(path, type || 'application/octet-stream')
  }

  // Locally implement embedding logic (previously in embeddings-extension)
  private async embedTexts(texts: string[]): Promise<number[][]> {
-    const llm = window.core?.extensionManager.getByName('@janhq/llamacpp-extension') as AIEngine & { embed?: (texts: string[]) => Promise<{ data: Array<{ embedding: number[]; index: number }> }> }
+    const llm = window.core?.extensionManager.getByName('@janhq/llamacpp-extension') as AIEngine & {
+      embed?: (texts: string[]) => Promise<{ data: Array<{ embedding: number[]; index: number }> }>
+    }
    if (!llm?.embed) throw new Error('llamacpp extension not available')
    const res = await llm.embed(texts)
    const data: Array<{ embedding: number[]; index: number }> = res?.data || []
    const out: number[][] = new Array(texts.length)
-    for (const item of data) out[item.index] = item.embedding
+    for (const item of data) {
+      out[item.index] = item.embedding
+    }
    return out
  }
 }
--- a/extensions/vector-db-extension/src/index.ts
+++ b/extensions/vector-db-extension/src/index.ts
@@ -50,12 +50,17 @@ export default class VectorDBExt extends VectorDBExtension {
  }

  private async embedTexts(texts: string[]): Promise<number[][]> {
-    const llm = window.core?.extensionManager.getByName('@janhq/llamacpp-extension') as AIEngine & { embed?: (texts: string[]) => Promise<{ data: Array<{ embedding: number[]; index: number }> }> }
+    const llm = window.core?.extensionManager.getByName('@janhq/llamacpp-extension') as AIEngine & {
+      embed?: (texts: string[]) => Promise<{ data: Array<{ embedding: number[]; index: number }> }>
+    }
    if (!llm?.embed) throw new Error('llamacpp extension not available')
+
    const res = await llm.embed(texts)
    const data: Array<{ embedding: number[]; index: number }> = res?.data || []
    const out: number[][] = new Array(texts.length)
-    for (const item of data) out[item.index] = item.embedding
+    for (const item of data) {
+      out[item.index] = item.embedding
+    }
    return out
  }

--- a/web-app/package.json
+++ b/web-app/package.json
@@ -37,7 +37,7 @@
    "@radix-ui/react-tooltip": "1.2.4",
    "@tabler/icons-react": "3.34.0",
    "@tailwindcss/vite": "4.1.4",
-    "@tanstack/react-router": "1.117.0",
+    "@tanstack/react-router": "^1.121.34",
    "@tanstack/react-router-devtools": "1.121.34",
    "@tanstack/react-virtual": "3.13.12",
    "@tauri-apps/api": "2.8.0",
--- a/web-app/src/components/ui/button.tsx
+++ b/web-app/src/components/ui/button.tsx
@@ -17,6 +17,10 @@ const buttonVariants = cva(
          'bg-primary text-primary-fg shadow-xs hover:bg-primary/90 focus-visible:ring-primary/60 focus:ring-primary/60 focus:border-primary focus-visible:border-primary',
        destructive:
          'bg-destructive shadow-xs hover:bg-destructive/90 focus-visible:ring-destructive/60 text-destructive-fg focus:border-destructive focus:ring-destructive/60',
+        outline:
+          'border border-main-view-fg/20 bg-transparent text-main-view-fg hover:bg-main-view/60 hover:text-main-view-fg focus-visible:ring-main-view-fg/40 focus:border-main-view-fg',
+        ghost:
+          'bg-transparent text-main-view-fg hover:bg-main-view/60 focus-visible:ring-main-view-fg/40 focus:border-main-view-fg',
        link: 'underline-offset-4 hover:no-underline',
      },
      size: {
--- a/web-app/src/containers/ChatInput.tsx
+++ b/web-app/src/containers/ChatInput.tsx
@@ -15,6 +15,7 @@ import {
 import { ArrowRight } from 'lucide-react'
 import {
  IconPhoto,
+  IconAtom,
  IconTool,
  IconCodeCircle2,
  IconPlayerStopFilled,
@@ -40,13 +41,27 @@ import { TokenCounter } from '@/components/TokenCounter'
 import { useMessages } from '@/hooks/useMessages'
 import { useShallow } from 'zustand/react/shallow'
 import { McpExtensionToolLoader } from './McpExtensionToolLoader'
-import { ExtensionTypeEnum, MCPExtension, fs, RAGExtension, VectorDBExtension } from '@janhq/core'
+import {
+  ContentType,
+  ExtensionTypeEnum,
+  MCPExtension,
+  MessageStatus,
+  ThreadMessage,
+  fs,
+  VectorDBExtension,
+} from '@janhq/core'
 import { ExtensionManager } from '@/lib/extension'
 import { useAttachments } from '@/hooks/useAttachments'
 import { toast } from 'sonner'
 import { PlatformFeatures } from '@/lib/platform/const'
 import { PlatformFeature } from '@/lib/platform/types'
 import { isPlatformTauri } from '@/lib/platform/utils'
+import { processAttachmentsForSend } from '@/lib/attachmentProcessing'
+import { useAttachmentIngestionPrompt } from '@/hooks/useAttachmentIngestionPrompt'
+import {
+  NEW_THREAD_ATTACHMENT_KEY,
+  useChatAttachments,
+} from '@/hooks/useChatAttachments'

 import {
  Attachment,
@@ -77,8 +92,10 @@ const ChatInput = ({
  const streamingContent = useAppState((state) => state.streamingContent)
  const abortControllers = useAppState((state) => state.abortControllers)
  const loadingModel = useAppState((state) => state.loadingModel)
+  const updateLoadingModel = useAppState((state) => state.updateLoadingModel)
  const tools = useAppState((state) => state.tools)
  const cancelToolCall = useAppState((state) => state.cancelToolCall)
+  const setActiveModels = useAppState((state) => state.setActiveModels)
  const prompt = usePrompt((state) => state.prompt)
  const setPrompt = usePrompt((state) => state.setPrompt)
  const currentThreadId = useThreads((state) => state.currentThreadId)
@@ -99,6 +116,7 @@ const ChatInput = ({
  )

  const maxRows = 10
+  const ATTACHMENT_AUTO_INLINE_FALLBACK_BYTES = 512 * 1024

  const selectedModel = useModelProvider((state) => state.selectedModel)
  const selectedProvider = useModelProvider((state) => state.selectedProvider)
@@ -106,7 +124,6 @@ const ChatInput = ({
  const [message, setMessage] = useState('')
  const [dropdownToolsAvailable, setDropdownToolsAvailable] = useState(false)
  const [tooltipToolsAvailable, setTooltipToolsAvailable] = useState(false)
-  const [attachments, setAttachments] = useState<Attachment[]>([])
  const [isDragOver, setIsDragOver] = useState(false)
  const [hasMmproj, setHasMmproj] = useState(false)
  const activeModels = useAppState(useShallow((state) => state.activeModels))
@@ -132,15 +149,93 @@ const ChatInput = ({
  } = useJanBrowserExtension()

  const attachmentsEnabled = useAttachments((s) => s.enabled)
+  const parsePreference = useAttachments((s) => s.parseMode)
+  const autoInlineContextRatio = useAttachments((s) => s.autoInlineContextRatio)
  // Determine whether to show the Attach documents button (simple gating)
  const showAttachmentButton =
    attachmentsEnabled && PlatformFeatures[PlatformFeature.FILE_ATTACHMENTS]
  // Derived: any document currently processing (ingestion in progress)
+  const attachmentsKey = currentThreadId ?? NEW_THREAD_ATTACHMENT_KEY
+  const attachments = useChatAttachments(
+    useCallback((state) => state.getAttachments(attachmentsKey), [attachmentsKey])
+  )
+  const attachmentsKeyRef = useRef(attachmentsKey)
+  const setAttachmentsForThread = useChatAttachments(
+    (state) => state.setAttachments
+  )
+  const clearAttachmentsForThread = useChatAttachments(
+    (state) => state.clearAttachments
+  )
+  const transferAttachments = useChatAttachments(
+    (state) => state.transferAttachments
+  )
+  const getProviderByName = useModelProvider((state) => state.getProviderByName)
+
+  useEffect(() => {
+    attachmentsKeyRef.current = attachmentsKey
+  }, [attachmentsKey])
+
  const ingestingDocs = attachments.some(
    (a) => a.type === 'document' && a.processing
  )
  const ingestingAny = attachments.some((a) => a.processing)

+  const lastTransferredThreadId = useRef<string | null>(null)
+
+  useEffect(() => {
+    if (currentThreadId && lastTransferredThreadId.current !== currentThreadId) {
+      transferAttachments(NEW_THREAD_ATTACHMENT_KEY, currentThreadId)
+      lastTransferredThreadId.current = currentThreadId
+    }
+  }, [currentThreadId, transferAttachments])
+
+  const updateAttachmentProcessing = useCallback(
+    (
+      fileName: string,
+      status: 'processing' | 'done' | 'error' | 'clear_docs' | 'clear_all'
+    ) => {
+      const targetKey = attachmentsKeyRef.current
+      const storeState = useChatAttachments.getState()
+
+      // Find all keys that have this attachment (including NEW_THREAD_ATTACHMENT_KEY)
+      const allMatchingKeys = Object.entries(storeState.attachmentsByThread)
+        .filter(([, list]) => list?.some((att) => att.name === fileName))
+        .map(([key]) => key)
+
+      // Always include targetKey and all matching keys
+      const keysToUpdate = new Set([targetKey, ...allMatchingKeys])
+
+      const applyUpdate = (key: string) => {
+        if (status === 'clear_docs') {
+          setAttachmentsForThread(key, (prev) =>
+            prev.filter((a) => a.type !== 'document')
+          )
+          return
+        }
+
+        if (status === 'clear_all') {
+          clearAttachmentsForThread(key)
+          return
+        }
+
+        setAttachmentsForThread(key, (prev) =>
+          prev.map((att) =>
+            att.name === fileName
+              ? {
+                  ...att,
+                  processing: status === 'processing',
+                  processed: status === 'done' ? true : att.processed,
+                }
+              : att
+          )
+        )
+      }
+
+      keysToUpdate.forEach((key) => applyUpdate(key as string))
+    },
+    [clearAttachmentsForThread, setAttachmentsForThread]
+  )
+
  // Check for mmproj existence or vision capability when model changes
  useEffect(() => {
    const checkMmprojSupport = async () => {
@@ -178,35 +273,13 @@ const ChatInput = ({
    if (!prompt.trim()) {
      return
    }
+    if (ingestingAny) {
+      toast.info('Please wait for attachments to finish processing')
+      return
+    }

    setMessage('')

-    // Callback to update attachment processing state
-    const updateAttachmentProcessing = (
-      fileName: string,
-      status: 'processing' | 'done' | 'error' | 'clear_docs' | 'clear_all'
-    ) => {
-      if (status === 'clear_docs') {
-        setAttachments((prev) => prev.filter((a) => a.type !== 'document'))
-        return
-      }
-      if (status === 'clear_all') {
-        setAttachments([])
-        return
-      }
-      setAttachments((prev) =>
-        prev.map((att) =>
-          att.name === fileName
-            ? {
-              ...att,
-              processing: status === 'processing',
-              processed: status === 'done' ? true : att.processed,
-            }
-            : att
-        )
-      )
-    }
-
    sendMessage(
      prompt,
      true,
@@ -278,6 +351,184 @@ const ChatInput = ({

  const fileInputRef = useRef<HTMLInputElement>(null)

+  const processNewDocumentAttachments = useCallback(
+    async (docs: Attachment[]) => {
+      if (!docs.length || !currentThreadId) return
+
+      const modelReady = await (async () => {
+        if (!selectedModel?.id) return false
+        if (activeModels.includes(selectedModel.id)) return true
+        const provider = getProviderByName(selectedProvider)
+        if (!provider) return false
+        try {
+          updateLoadingModel(true)
+          await serviceHub.models().startModel(provider, selectedModel.id)
+          const active = await serviceHub.models().getActiveModels()
+          setActiveModels(active || [])
+          return active?.includes(selectedModel.id) ?? false
+        } catch (err) {
+          console.warn('Failed to start model before attachment validation', err)
+          return false
+        } finally {
+          updateLoadingModel(false)
+        }
+      })()
+
+      const modelContextLength = (() => {
+        const ctx = selectedModel?.settings?.ctx_len?.controller_props?.value
+        if (typeof ctx === 'number') return ctx
+        if (typeof ctx === 'string') {
+          const parsed = parseInt(ctx, 10)
+          return Number.isFinite(parsed) ? parsed : undefined
+        }
+        return undefined
+      })()
+
+      const rawContextThreshold =
+        typeof modelContextLength === 'number' && modelContextLength > 0
+          ? Math.floor(
+              modelContextLength *
+                (typeof autoInlineContextRatio === 'number'
+                  ? autoInlineContextRatio
+                  : 0.75)
+            )
+          : undefined
+
+      const contextThreshold =
+        typeof rawContextThreshold === 'number' &&
+        Number.isFinite(rawContextThreshold) &&
+        rawContextThreshold > 0
+          ? rawContextThreshold
+          : undefined
+
+      const hasContextEstimate =
+        modelReady &&
+        typeof contextThreshold === 'number' &&
+        Number.isFinite(contextThreshold) &&
+        contextThreshold > 0
+      const docsNeedingPrompt = docs.filter((doc) => {
+        if (doc.processed || doc.injectionMode) return false
+        const preference = doc.parseMode ?? parsePreference
+        return preference === 'prompt' || (preference === 'auto' && !hasContextEstimate)
+      })
+
+      // Map to store individual choices for each document
+      const docChoices = new Map<string, 'inline' | 'embeddings'>()
+
+      if (docsNeedingPrompt.length > 0) {
+        // Ask for each file individually
+        for (let i = 0; i < docsNeedingPrompt.length; i++) {
+          const doc = docsNeedingPrompt[i]
+          const choice = await useAttachmentIngestionPrompt
+            .getState()
+            .showPrompt(doc, ATTACHMENT_AUTO_INLINE_FALLBACK_BYTES, i, docsNeedingPrompt.length)
+
+          if (!choice) {
+            // User cancelled - remove all pending docs
+            setAttachmentsForThread(attachmentsKey, (prev) =>
+              prev.filter(
+                (att) =>
+                  !docsNeedingPrompt.some(
+                    (doc) => doc.path && att.path && doc.path === att.path
+                  )
+              )
+            )
+            return
+          }
+
+          // Store the choice for this specific document
+          if (doc.path) {
+            docChoices.set(doc.path, choice)
+          }
+        }
+      }
+
+      const estimateTokens = async (text: string): Promise<number | undefined> => {
+        try {
+          if (!selectedModel?.id || !modelReady) return undefined
+          const tokenCount = await serviceHub
+            .models()
+            .getTokensCount(selectedModel.id, [
+              {
+                id: 'inline-attachment',
+                object: 'thread.message',
+                thread_id: currentThreadId,
+                role: 'user',
+                content: [
+                  {
+                    type: ContentType.Text,
+                    text: { value: text, annotations: [] },
+                  },
+                ],
+                status: MessageStatus.Ready,
+                created_at: Date.now(),
+                completed_at: Date.now(),
+              } as ThreadMessage,
+            ])
+          if (
+            typeof tokenCount !== 'number' ||
+            !Number.isFinite(tokenCount) ||
+            tokenCount <= 0
+          ) {
+            return undefined
+          }
+          return tokenCount
+        } catch (e) {
+          console.debug('Failed to estimate tokens for attachment content', e)
+          return undefined
+        }
+      }
+
+      try {
+        const { processedAttachments, hasEmbeddedDocuments } =
+          await processAttachmentsForSend({
+            attachments: docs,
+            threadId: currentThreadId,
+            serviceHub,
+            selectedProvider,
+            contextThreshold,
+            estimateTokens,
+            parsePreference,
+            perFileChoices: docChoices.size > 0 ? docChoices : undefined,
+            updateAttachmentProcessing,
+          })
+
+        if (processedAttachments.length > 0) {
+          setAttachmentsForThread(attachmentsKey, (prev) =>
+            prev.map((att) => {
+              const match = processedAttachments.find(
+                (p) => p.path && att.path && p.path === att.path
+              )
+              return match ? { ...att, ...match } : att
+            })
+          )
+        }
+
+        if (hasEmbeddedDocuments) {
+          useThreads.getState().updateThread(currentThreadId, {
+            metadata: { hasDocuments: true },
+          })
+        }
+      } catch (e) {
+        console.error('Failed to process attachments:', e)
+      }
+    },
+    [
+      autoInlineContextRatio,
+      activeModels,
+      currentThreadId,
+      getProviderByName,
+      parsePreference,
+      selectedModel?.id,
+      selectedModel?.settings?.ctx_len?.controller_props?.value,
+      selectedProvider,
+      serviceHub,
+      setActiveModels,
+      updateAttachmentProcessing,
+      updateLoadingModel,
+    ]
+  )
+
  const handleAttachDocsIngest = async () => {
    try {
      if (!attachmentsEnabled) {
@@ -331,6 +582,7 @@ const ChatInput = ({
            path: p,
            fileType,
            size,
+            parseMode: parsePreference,
          })
        )
      }
@@ -338,7 +590,7 @@ const ChatInput = ({
      let duplicates: string[] = []
      let newDocAttachments: Attachment[] = []

-      setAttachments((currentAttachments) => {
+      setAttachmentsForThread(attachmentsKey, (currentAttachments) => {
        const existingPaths = new Set(
          currentAttachments
            .filter((a) => a.type === 'document' && a.path)
@@ -356,10 +608,9 @@ const ChatInput = ({
          newDocAttachments.push(att)
        }

-        if (newDocAttachments.length > 0) {
-          return [...currentAttachments, ...newDocAttachments]
-        }
-        return currentAttachments
+        return newDocAttachments.length > 0
+          ? [...currentAttachments, ...newDocAttachments]
+          : currentAttachments
      })

      if (duplicates.length > 0) {
@@ -369,72 +620,7 @@ const ChatInput = ({
      }

      if (newDocAttachments.length > 0) {
-        if (currentThreadId) {
-          const ragExtension = ExtensionManager.getInstance().get(
-            ExtensionTypeEnum.RAG
-          ) as RAGExtension | undefined
-          if (!ragExtension) {
-            toast.error('RAG extension not available')
-            return
-          }
-
-          for (const doc of newDocAttachments) {
-            try {
-              // Mark as processing
-              setAttachments((prev) =>
-                prev.map((a) =>
-                  a.path === doc.path && a.type === 'document'
-                    ? { ...a, processing: true }
-                    : a
-                )
-              )
-
-              const result = await ragExtension.ingestAttachments(
-                currentThreadId,
-                [
-                  {
-                    path: doc.path!,
-                    name: doc.name,
-                    type: doc.fileType,
-                    size: doc.size,
-                  },
-                ]
-              )
-
-              const fileInfo = result.files?.[0]
-              if (fileInfo?.id) {
-                // Mark as processed with ID
-                setAttachments((prev) =>
-                  prev.map((a) =>
-                    a.path === doc.path && a.type === 'document'
-                      ? {
-                        ...a,
-                        processing: false,
-                        processed: true,
-                        id: fileInfo.id,
-                        chunkCount: fileInfo.chunk_count,
-                      }
-                      : a
-                  )
-                )
-              } else {
-                throw new Error('No file ID returned from ingestion')
-              }
-            } catch (error) {
-              console.error('Failed to ingest document:', error)
-              // Remove failed document
-              setAttachments((prev) =>
-                prev.filter(
-                  (a) => !(a.path === doc.path && a.type === 'document')
-                )
-              )
-              toast.error(`Failed to ingest ${doc.name}`, {
-                description:
-                  error instanceof Error ? error.message : String(error),
-              })
-            }
-          }
-        }
+        await processNewDocumentAttachments(newDocAttachments)
      }
    } catch (e) {
      console.error('Failed to attach documents:', e)
@@ -467,7 +653,9 @@ const ChatInput = ({
      }
    }

-    setAttachments((prev) => prev.filter((_, index) => index !== indexToRemove))
+    setAttachmentsForThread(attachmentsKey, (prev) =>
+      prev.filter((_, index) => index !== indexToRemove)
+    )
  }

  const getFileTypeFromExtension = (fileName: string): string => {
@@ -554,7 +742,7 @@ const ChatInput = ({
    let duplicates: string[] = []
    let newFiles: Attachment[] = []

-    setAttachments((currentAttachments) => {
+    setAttachmentsForThread(attachmentsKey, (currentAttachments) => {
      const existingImageNames = new Set(
        currentAttachments.filter((a) => a.type === 'image').map((a) => a.name)
      )
@@ -581,7 +769,7 @@ const ChatInput = ({
        for (const img of newFiles) {
          try {
            // Mark as processing
-            setAttachments((prev) =>
+            setAttachmentsForThread(attachmentsKey, (prev) =>
              prev.map((a) =>
                a.name === img.name && a.type === 'image'
                  ? { ...a, processing: true }
@@ -595,7 +783,7 @@ const ChatInput = ({

            if (result?.id) {
              // Mark as processed with ID
-              setAttachments((prev) =>
+              setAttachmentsForThread(attachmentsKey, (prev) =>
                prev.map((a) =>
                  a.name === img.name && a.type === 'image'
                    ? {
@@ -613,7 +801,7 @@ const ChatInput = ({
          } catch (error) {
            console.error('Failed to ingest image:', error)
            // Remove failed image
-            setAttachments((prev) =>
+            setAttachmentsForThread(attachmentsKey, (prev) =>
              prev.filter((a) => !(a.name === img.name && a.type === 'image'))
            )
            toast.error(`Failed to ingest ${img.name}`, {
@@ -938,102 +1126,105 @@ const ChatInput = ({
            onDrop={hasMmproj ? handleDrop : undefined}
          >
            {attachments.length > 0 && (
-              <div className="flex gap-3 items-center p-2 pb-0">
-                {attachments
-                  .map((att, idx) => ({ att, idx }))
-                  .map(({ att, idx }) => {
-                    const isImage = att.type === 'image'
-                    const ext = att.fileType || att.mimeType?.split('/')[1]
-                    return (
-                      <div
-                        key={`${att.type}-${idx}-${att.name}`}
-                        className="relative"
-                      >
-                        <TooltipProvider>
-                          <Tooltip>
-                            <TooltipTrigger asChild>
-                              <div
-                                className={cn(
-                                  'relative border border-main-view-fg/5 rounded-lg size-14 overflow-hidden bg-main-view/40',
-                                  'flex items-center justify-center'
-                                )}
-                              >
-                                {/* Inner content by state */}
-                                {isImage && att.dataUrl ? (
-                                  <img
-                                    className="object-cover w-full h-full"
-                                    src={att.dataUrl}
-                                    alt={`${att.name}`}
-                                  />
-                                ) : (
-                                  <div className="flex flex-col items-center justify-center text-main-view-fg/70">
-                                    <IconPaperclip size={18} />
-                                    {ext && (
-                                      <span className="text-[10px] leading-none mt-0.5 uppercase opacity-70">
-                                        .{ext}
-                                      </span>
-                                    )}
-                                  </div>
-                                )}
-
-                                {/* Overlay spinner when processing */}
-                                {att.processing && (
-                                  <div className="absolute inset-0 flex items-center justify-center bg-black/10">
-                                    <IconLoader2
-                                      size={18}
-                                      className="text-main-view-fg/80 animate-spin"
+              <div className="flex flex-col gap-2 p-2 pb-0">
+                <div className="flex gap-3 items-center">
+                  {attachments
+                    .map((att, idx) => ({ att, idx }))
+                    .map(({ att, idx }) => {
+                      const isImage = att.type === 'image'
+                      const ext = att.fileType || att.mimeType?.split('/')[1]
+                      return (
+                        <div
+                          key={`${att.type}-${idx}-${att.name}`}
+                          className="relative"
+                        >
+                          <TooltipProvider>
+                            <Tooltip>
+                              <TooltipTrigger asChild>
+                                <div
+                                  className={cn(
+                                    'relative border border-main-view-fg/5 rounded-lg size-14 overflow-hidden bg-main-view/40',
+                                    'flex items-center justify-center'
+                                  )}
+                                >
+                                  {/* Inner content by state */}
+                                  {isImage && att.dataUrl ? (
+                                    <img
+                                      className="object-cover w-full h-full"
+                                      src={att.dataUrl}
+                                      alt={`${att.name}`}
                                    />
-                                  </div>
-                                )}
+                                  ) : (
+                                    <div className="flex flex-col items-center justify-center text-main-view-fg/70">
+                                      <IconPaperclip size={18} />
+                                      {ext && (
+                                        <span className="text-[10px] leading-none mt-0.5 uppercase opacity-70">
+                                          .{ext}
+                                        </span>
+                                      )}
+                                    </div>
+                                  )}

-                                {/* Overlay success check when processed */}
-                                {att.processed && !att.processing && (
-                                  <div className="absolute inset-0 flex items-center justify-center bg-black/5">
-                                    <div className="bg-green-600/90 rounded-full p-1">
-                                      <IconCheck
-                                        size={14}
-                                        className="text-white"
+                                  {/* Overlay spinner when processing */}
+                                  {att.processing && (
+                                    <div className="absolute inset-0 flex items-center justify-center bg-black/10">
+                                      <IconLoader2
+                                        size={18}
+                                        className="text-main-view-fg/80 animate-spin"
                                      />
                                    </div>
-                                  </div>
-                                )}
-                              </div>
-                            </TooltipTrigger>
-                            <TooltipContent>
-                              <div className="text-xs">
-                                <div
-                                  className="font-medium truncate max-w-52"
-                                  title={att.name}
-                                >
-                                  {att.name}
-                                </div>
-                                <div className="opacity-70">
-                                  {isImage
-                                    ? att.mimeType || 'image'
-                                    : ext
-                                      ? `.${ext}`
-                                      : 'document'}
-                                  {att.size
-                                    ? ` · ${formatBytes(att.size)}`
-                                    : ''}
-                                </div>
-                              </div>
-                            </TooltipContent>
-                          </Tooltip>
-                        </TooltipProvider>
+                                  )}
+
+                                  {/* Overlay success check when processed */}
+                                  {att.processed && !att.processing && (
+                                    <div className="absolute inset-0 flex items-center justify-center bg-black/5">
+                                      <div className="bg-green-600/90 rounded-full p-1">
+                                        <IconCheck
+                                          size={14}
+                                          className="text-white"
+                                        />
+                                      </div>
+                                    </div>
+                                  )}
+                                </div>
+                              </TooltipTrigger>
+                              <TooltipContent>
+                                <div className="text-xs">
+                                  <div
+                                    className="font-medium truncate max-w-52"
+                                    title={att.name}
+                                  >
+                                    {att.name}
+                                  </div>
+                                  <div className="opacity-70">
+                                    {isImage
+                                      ? att.mimeType || 'image'
+                                      : ext
+                                        ? `.${ext}`
+                                        : 'document'}
+                                    {att.size
+                                      ? ` · ${formatBytes(att.size)}`
+                                      : ''}
+                                  </div>
+                                </div>
+                              </TooltipContent>
+                            </Tooltip>
+                          </TooltipProvider>
+
+                          {/* Remove button disabled while processing - outside overflow-hidden container */}
+                          {!att.processing && (
+                            <div
+                              className="absolute -top-1 -right-2.5 bg-destructive size-5 flex rounded-full items-center justify-center cursor-pointer"
+                              onClick={() => handleRemoveAttachment(idx)}
+                            >
+                              <IconX className="text-destructive-fg" size={16} />
+                            </div>
+                          )}
+                        </div>
+                      )
+                    })}
+                </div>

-                        {/* Remove button disabled while processing - outside overflow-hidden container */}
-                        {!att.processing && (
-                          <div
-                            className="absolute -top-1 -right-2.5 bg-destructive size-5 flex rounded-full items-center justify-center cursor-pointer"
-                            onClick={() => handleRemoveAttachment(idx)}
-                          >
-                            <IconX className="text-destructive-fg" size={16} />
-                          </div>
-                        )}
-                      </div>
-                    )
-                  })}
              </div>
            )}
            <TextareaAutosize
@@ -1059,7 +1250,7 @@ const ChatInput = ({
                  // - Enter is pressed without Shift
                  // - The streaming content has finished
                  // - Prompt is not empty
-                  if (!streamingContent && prompt.trim()) {
+                  if (!streamingContent && prompt.trim() && !ingestingAny) {
                    handleSendMessage(prompt)
                  }
                  // When Shift+Enter is pressed, a new line is added (default behavior)
@@ -1287,6 +1478,40 @@ const ChatInput = ({
                      </Tooltip>
                    </TooltipProvider>
                  ))}
+                {selectedModel?.capabilities?.includes('web_search') && (
+                  <TooltipProvider>
+                    <Tooltip>
+                      <TooltipTrigger asChild>
+                        <div className="h-7 p-1 flex items-center justify-center rounded-sm hover:bg-main-view-fg/10 transition-all duration-200 ease-in-out gap-1">
+                          <IconWorld
+                            size={18}
+                            className="text-main-view-fg/50"
+                          />
+                        </div>
+                      </TooltipTrigger>
+                      <TooltipContent>
+                        <p>Web Search</p>
+                      </TooltipContent>
+                    </Tooltip>
+                  </TooltipProvider>
+                )}
+                {selectedModel?.capabilities?.includes('reasoning') && (
+                  <TooltipProvider>
+                    <Tooltip>
+                      <TooltipTrigger asChild>
+                        <div className="h-7 p-1 flex items-center justify-center rounded-sm hover:bg-main-view-fg/10 transition-all duration-200 ease-in-out gap-1">
+                          <IconAtom
+                            size={18}
+                            className="text-main-view-fg/50"
+                          />
+                        </div>
+                      </TooltipTrigger>
+                      <TooltipContent>
+                        <p>{t('reasoning')}</p>
+                      </TooltipContent>
+                    </Tooltip>
+                  </TooltipProvider>
+                )}
              </div>
            </div>

--- a/web-app/src/containers/ThreadContent.tsx
+++ b/web-app/src/containers/ThreadContent.tsx
@@ -2,7 +2,13 @@
 import { ThreadMessage } from '@janhq/core'
 import { RenderMarkdown } from './RenderMarkdown'
 import React, { Fragment, memo, useCallback, useMemo, useState } from 'react'
-import { IconCopy, IconCopyCheck, IconRefresh } from '@tabler/icons-react'
+import {
+  IconCopy,
+  IconCopyCheck,
+  IconDatabase,
+  IconFileText,
+  IconRefresh,
+} from '@tabler/icons-react'
 import { useAppState } from '@/hooks/useAppState'
 import { cn } from '@/lib/utils'
 import { useMessages } from '@/hooks/useMessages'
@@ -28,6 +34,13 @@ import { useTranslation } from '@/i18n/react-i18next-compat'
 import { useModelProvider } from '@/hooks/useModelProvider'
 import { extractFilesFromPrompt } from '@/lib/fileMetadata'
 import { createImageAttachment } from '@/types/attachment'
+import {
+  Dialog,
+  DialogContent,
+  DialogDescription,
+  DialogHeader,
+  DialogTitle,
+} from '@/components/ui/dialog'

 const CopyButton = ({ text }: { text: string }) => {
  const [copied, setCopied] = useState(false)
@@ -83,6 +96,9 @@ export const ThreadContent = memo(
  ) => {
    const { t } = useTranslation()
    const selectedModel = useModelProvider((state) => state.selectedModel)
+    const [inlinePreview, setInlinePreview] = useState<
+      { name: string; content: string } | null
+    >(null)

    // Use useMemo to stabilize the components prop
    const linkComponents = useMemo(
@@ -112,6 +128,20 @@ export const ThreadContent = memo(
      return { files: [], cleanPrompt: text }
    }, [text, item.role])

+    const inlineFileContents = useMemo(() => {
+      const contents = (item.metadata as any)?.inline_file_contents
+      if (!Array.isArray(contents)) return new Map<string, string>()
+
+      return contents.reduce((map, entry) => {
+        const name = entry?.name
+        const content = entry?.content
+        if (typeof name === 'string' && typeof content === 'string') {
+          map.set(name, content)
+        }
+        return map
+      }, new Map<string, string>())
+    }, [item.metadata])
+
    const { reasoningSegment, textSegment } = useMemo(() => {
      // Check for thinking formats
      const hasThinkTag = text.includes('<think>') && !text.includes('</think>')
@@ -249,32 +279,80 @@ export const ThreadContent = memo(
            {attachedFiles.length > 0 && (
              <div className="flex justify-end w-full mt-2 mb-2">
                <div className="flex flex-wrap gap-2 max-w-[80%] justify-end">
-                  {attachedFiles.map((file, index) => (
-                    <div
-                      key={file.id || index}
-                      className="flex items-center gap-2 px-3 py-2 bg-main-view-fg/5 rounded-md border border-main-view-fg/10 text-xs"
-                    >
-                      <svg
-                        className="w-4 h-4 text-main-view-fg/50"
-                        fill="none"
-                        viewBox="0 0 24 24"
-                        stroke="currentColor"
+                  {attachedFiles.map((file, index) => {
+                    const inlineContent =
+                      file.injectionMode === 'inline'
+                        ? inlineFileContents.get(file.name) || undefined
+                        : undefined
+                    const indicator =
+                      file.injectionMode ||
+                      (inlineContent ? 'inline' : undefined)
+                    const canPreview = Boolean(
+                      indicator === 'inline' && inlineContent
+                    )
+
+                    return (
+                      <div
+                        key={file.id || index}
+                        className="flex items-center gap-2 px-3 py-2 bg-main-view-fg/5 rounded-md border border-main-view-fg/10 text-xs"
                      >
-                        <path
-                          strokeLinecap="round"
-                          strokeLinejoin="round"
-                          strokeWidth={2}
-                          d="M9 12h6m-6 4h6m2 5H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z"
-                        />
-                      </svg>
-                      <span className="text-main-view-fg">{file.name}</span>
-                      {file.type && (
-                        <span className="text-main-view-fg/40 text-[10px]">
-                          .{file.type}
-                        </span>
-                      )}
-                    </div>
-                  ))}
+                        {indicator && (
+                          <Tooltip>
+                            <TooltipTrigger asChild>
+                              <span
+                                className="inline-flex items-center justify-center size-6 rounded-full bg-main-view/70 text-main-view-fg/80"
+                                aria-label={
+                                  indicator === 'inline'
+                                    ? t('common:attachmentInjectedIndicator')
+                                    : t('common:attachmentEmbeddedIndicator')
+                                }
+                              >
+                                {indicator === 'inline' ? (
+                                  <IconFileText size={14} />
+                                ) : (
+                                  <IconDatabase size={14} />
+                                )}
+                              </span>
+                            </TooltipTrigger>
+                            <TooltipContent>
+                              {indicator === 'inline'
+                                ? t('common:attachmentInjectedIndicator')
+                                : t('common:attachmentEmbeddedIndicator')}
+                            </TooltipContent>
+                          </Tooltip>
+                        )}
+
+                        <button
+                          type="button"
+                          disabled={!canPreview}
+                          onClick={() =>
+                            canPreview &&
+                            setInlinePreview({
+                              name: file.name,
+                              content: inlineContent!,
+                            })
+                          }
+                          className={cn(
+                            'text-main-view-fg text-left truncate max-w-48',
+                            canPreview && 'hover:underline'
+                          )}
+                          title={
+                            canPreview
+                              ? t('common:viewInjectedContent')
+                              : file.name
+                          }
+                        >
+                          {file.name}
+                        </button>
+
+                        {file.type && (
+                          <span className="text-main-view-fg/40 text-[10px]">
+                            .{file.type}
+                          </span>
+                        )}
+                      </div>
+                    )
+                  })}
                </div>
              </div>
            )}
@@ -458,6 +536,23 @@ export const ThreadContent = memo(
          </div>
        )}
        {item.contextOverflowModal && item.contextOverflowModal}
+
+        <Dialog
+          open={Boolean(inlinePreview)}
+          onOpenChange={(open) => {
+            if (!open) setInlinePreview(null)
+          }}
+        >
+          <DialogContent className="sm:max-w-3xl">
+            <DialogHeader>
+              <DialogTitle>{t('common:injectedContentTitle')}</DialogTitle>
+              <DialogDescription>{inlinePreview?.name}</DialogDescription>
+            </DialogHeader>
+            <div className="max-h-[60vh] overflow-auto whitespace-pre-wrap text-sm font-mono bg-muted px-3 py-2 rounded-md">
+              {inlinePreview?.content}
+            </div>
+          </DialogContent>
+        </Dialog>
      </Fragment>
    )
  }
--- a/web-app/src/containers/dialogs/AttachmentIngestionDialog.tsx
+++ b/web-app/src/containers/dialogs/AttachmentIngestionDialog.tsx
@@ -0,0 +1,73 @@
+import {
+  Dialog,
+  DialogContent,
+  DialogDescription,
+  DialogFooter,
+  DialogHeader,
+  DialogTitle,
+} from '@/components/ui/dialog'
+import { Button } from '@/components/ui/button'
+import { useAttachmentIngestionPrompt } from '@/hooks/useAttachmentIngestionPrompt'
+import { useTranslation } from '@/i18n'
+
+const formatBytes = (bytes?: number) => {
+  if (!bytes || bytes <= 0) return ''
+  const units = ['B', 'KB', 'MB', 'GB']
+  const exponent = Math.min(Math.floor(Math.log(bytes) / Math.log(1024)), units.length - 1)
+  const value = bytes / Math.pow(1024, exponent)
+  return `${value.toFixed(value >= 10 || exponent === 0 ? 0 : 1)} ${units[exponent]}`
+}
+
+export default function AttachmentIngestionDialog() {
+  const { t } = useTranslation()
+  const { isModalOpen, currentAttachment, currentIndex, totalCount, choose, cancel } = useAttachmentIngestionPrompt()
+
+  if (!isModalOpen || !currentAttachment) return null
+
+  return (
+    <Dialog open={isModalOpen} onOpenChange={(open) => !open && cancel()}>
+      <DialogContent onInteractOutside={(e) => e.preventDefault()}>
+        <DialogHeader>
+          <DialogTitle>
+            {t('common:attachmentsIngestion.title')}
+            {totalCount > 1 && (
+              <span className="text-sm font-normal text-main-view-fg/70 ml-2">
+                ({currentIndex + 1} of {totalCount})
+              </span>
+            )}
+          </DialogTitle>
+          <DialogDescription>
+            {t('common:attachmentsIngestion.description')}
+          </DialogDescription>
+        </DialogHeader>
+
+        <div className="border border-main-view-fg/10 rounded-md p-3 bg-main-view/40">
+          <div className="flex items-center justify-between gap-2">
+            <span className="truncate font-medium" title={currentAttachment.name}>
+              {currentAttachment.name}
+            </span>
+            <span className="text-xs text-main-view-fg/70 flex-shrink-0">
+              {formatBytes(currentAttachment.size)}
+            </span>
+          </div>
+        </div>
+
+        <DialogFooter className="flex gap-2 sm:justify-end">
+          <Button variant="ghost" onClick={cancel}>
+            {t('common:cancel')}
+          </Button>
+          <Button
+            variant="outline"
+            className="border-main-view-fg/20"
+            onClick={() => choose('embeddings')}
+          >
+            {t('common:attachmentsIngestion.embeddings')}
+          </Button>
+          <Button onClick={() => choose('inline')}>
+            {t('common:attachmentsIngestion.inline')}
+          </Button>
+        </DialogFooter>
+      </DialogContent>
+    </Dialog>
+  )
+}
--- a/web-app/src/hooks/tests/useModelProvider.test.ts
+++ b/web-app/src/hooks/tests/useModelProvider.test.ts
@@ -20,10 +20,12 @@ vi.mock('@/constants/localStorage', () => ({

 // Mock localStorage
 const localStorageMock = {
-  getItem: vi.fn(),
+  getItem: vi.fn(() => null),
  setItem: vi.fn(),
  removeItem: vi.fn(),
  clear: vi.fn(),
+  length: 0,
+  key: vi.fn(() => null),
 }
 Object.defineProperty(window, 'localStorage', {
  value: localStorageMock,
@@ -32,8 +34,11 @@ Object.defineProperty(window, 'localStorage', {

 describe('useModelProvider - displayName functionality', () => {
  beforeEach(() => {
-    vi.clearAllMocks()
+    // Reset the mock implementations instead of clearing them
    localStorageMock.getItem.mockReturnValue(null)
+    localStorageMock.setItem.mockClear()
+    localStorageMock.removeItem.mockClear()
+    localStorageMock.clear.mockClear()

    // Reset Zustand store to default state
    act(() => {
--- a/web-app/src/hooks/useAttachmentIngestionPrompt.ts
+++ b/web-app/src/hooks/useAttachmentIngestionPrompt.ts
@@ -0,0 +1,54 @@
+import { Attachment } from '@/types/attachment'
+import { create } from 'zustand'
+
+type AttachmentSummary = Pick<Attachment, 'name' | 'size'>
+
+type AttachmentIngestionState = {
+  isModalOpen: boolean
+  currentAttachment: AttachmentSummary | null
+  currentIndex: number
+  totalCount: number
+  sizeThreshold: number
+  resolver: ((choice: 'inline' | 'embeddings' | undefined) => void) | null
+  showPrompt: (
+    attachment: AttachmentSummary,
+    sizeThreshold: number,
+    currentIndex: number,
+    totalCount: number
+  ) => Promise<'inline' | 'embeddings' | undefined>
+  choose: (choice: 'inline' | 'embeddings') => void
+  cancel: () => void
+}
+
+export const useAttachmentIngestionPrompt = create<AttachmentIngestionState>()(
+  (set, get) => ({
+    isModalOpen: false,
+    currentAttachment: null,
+    currentIndex: 0,
+    totalCount: 0,
+    sizeThreshold: 0,
+    resolver: null,
+    showPrompt: async (attachment, sizeThreshold, currentIndex, totalCount) => {
+      return new Promise<'inline' | 'embeddings' | undefined>((resolve) => {
+        set({
+          isModalOpen: true,
+          currentAttachment: attachment,
+          currentIndex,
+          totalCount,
+          sizeThreshold,
+          resolver: resolve,
+        })
+      })
+    },
+    choose: (choice) => {
+      const { resolver } = get()
+      resolver?.(choice)
+      set({ isModalOpen: false, currentAttachment: null, resolver: null })
+    },
+    cancel: () => {
+      const { resolver } = get()
+      resolver?.(undefined)
+      set({ isModalOpen: false, currentAttachment: null, resolver: null })
+    },
+  })
+)
--- a/web-app/src/hooks/useAttachments.ts
+++ b/web-app/src/hooks/useAttachments.ts
@@ -9,22 +9,26 @@ export type AttachmentsSettings = {
  maxFileSizeMB: number
  retrievalLimit: number
  retrievalThreshold: number
-  chunkSizeTokens: number
-  overlapTokens: number
+  chunkSizeChars: number
+  overlapChars: number
  searchMode: 'auto' | 'ann' | 'linear'
+  parseMode: 'auto' | 'inline' | 'embeddings' | 'prompt'
+  autoInlineContextRatio: number
 }

 type AttachmentsStore = AttachmentsSettings & {
  // Dynamic controller definitions for rendering UI
  settingsDefs: SettingComponentProps[]
-  loadSettingsDefs: () => Promise<void>
+  loadSettingsDefs: () => Promise<boolean>
  setEnabled: (v: boolean) => void
  setMaxFileSizeMB: (v: number) => void
  setRetrievalLimit: (v: number) => void
  setRetrievalThreshold: (v: number) => void
-  setChunkSizeTokens: (v: number) => void
-  setOverlapTokens: (v: number) => void
+  setChunkSizeChars: (v: number) => void
+  setOverlapChars: (v: number) => void
  setSearchMode: (v: 'auto' | 'ann' | 'linear') => void
+  setParseMode: (v: 'auto' | 'inline' | 'embeddings' | 'prompt') => void
+  setAutoInlineContextRatio: (v: number) => void
 }

 const getRagExtension = (): RAGExtension | undefined => {
@@ -43,18 +47,56 @@ export const useAttachments = create<AttachmentsStore>()((set) => ({
  maxFileSizeMB: 20,
  retrievalLimit: 3,
  retrievalThreshold: 0.3,
-  chunkSizeTokens: 512,
-  overlapTokens: 64,
+  chunkSizeChars: 512,
+  overlapChars: 64,
  searchMode: 'auto',
+  parseMode: 'auto',
+  autoInlineContextRatio: 0.75,
  settingsDefs: [],
  loadSettingsDefs: async () => {
    const ext = getRagExtension()
-    if (!ext?.getSettings) return
+    if (!ext?.getSettings) return false
    try {
      const defs = await ext.getSettings()
-      if (Array.isArray(defs)) set({ settingsDefs: defs })
+      if (!Array.isArray(defs)) return false
+
+      const map = new Map<string, unknown>()
+      defs.forEach((setting) => map.set(setting.key, setting?.controllerProps?.value))
+
+      set((prev) => ({
+        settingsDefs: defs,
+        enabled:
+          fileAttachmentsFeatureEnabled &&
+          ((map.get('enabled') as boolean | undefined) ?? prev.enabled),
+        maxFileSizeMB:
+          (map.get('max_file_size_mb') as number | undefined) ?? prev.maxFileSizeMB,
+        retrievalLimit:
+          (map.get('retrieval_limit') as number | undefined) ?? prev.retrievalLimit,
+        retrievalThreshold:
+          (map.get('retrieval_threshold') as number | undefined) ?? prev.retrievalThreshold,
+        chunkSizeChars:
+          (map.get('chunk_size_chars') as number | undefined) ??
+          (map.get('chunk_size_tokens') as number | undefined) ??
+          prev.chunkSizeChars,
+        overlapChars:
+          (map.get('overlap_chars') as number | undefined) ??
+          (map.get('overlap_tokens') as number | undefined) ??
+          prev.overlapChars,
+        searchMode:
+          (map.get('search_mode') as 'auto' | 'ann' | 'linear' | undefined) ??
+          prev.searchMode,
+        parseMode:
+          (map.get('parse_mode') as 'auto' | 'inline' | 'embeddings' | 'prompt' | undefined) ??
+          prev.parseMode,
+        autoInlineContextRatio:
+          (map.get('auto_inline_context_ratio') as number | undefined) ??
+          prev.autoInlineContextRatio,
+      }))
+
+      return true
    } catch (e) {
      console.debug('Failed to load attachment settings defs:', e)
+      return false
    }
  },
  setEnabled: async (v) => {
@@ -138,35 +180,35 @@ export const useAttachments = create<AttachmentsStore>()((set) => ({
      ),
    }))
  },
-  setChunkSizeTokens: async (val) => {
+  setChunkSizeChars: async (val) => {
    if (!fileAttachmentsFeatureEnabled) return
    const ext = getRagExtension()
    if (ext?.updateSettings) {
      await ext.updateSettings([
-        { key: 'chunk_size_tokens', controllerProps: { value: val } } as Partial<SettingComponentProps>,
+        { key: 'chunk_size_chars', controllerProps: { value: val } } as Partial<SettingComponentProps>,
      ])
    }
    set((s) => ({
-      chunkSizeTokens: val,
+      chunkSizeChars: val,
      settingsDefs: s.settingsDefs.map((d) =>
-        d.key === 'chunk_size_tokens'
+        d.key === 'chunk_size_chars'
          ? ({ ...d, controllerProps: { ...d.controllerProps, value: val } } as SettingComponentProps)
          : d
      ),
    }))
  },
-  setOverlapTokens: async (val) => {
+  setOverlapChars: async (val) => {
    if (!fileAttachmentsFeatureEnabled) return
    const ext = getRagExtension()
    if (ext?.updateSettings) {
      await ext.updateSettings([
-        { key: 'overlap_tokens', controllerProps: { value: val } } as Partial<SettingComponentProps>,
+        { key: 'overlap_chars', controllerProps: { value: val } } as Partial<SettingComponentProps>,
      ])
    }
    set((s) => ({
-      overlapTokens: val,
+      overlapChars: val,
      settingsDefs: s.settingsDefs.map((d) =>
-        d.key === 'overlap_tokens'
+        d.key === 'overlap_chars'
          ? ({ ...d, controllerProps: { ...d.controllerProps, value: val } } as SettingComponentProps)
          : d
      ),
@@ -189,33 +231,49 @@ export const useAttachments = create<AttachmentsStore>()((set) => ({
      ),
    }))
  },
+  setParseMode: async (v) => {
+    if (!fileAttachmentsFeatureEnabled) return
+    const ext = getRagExtension()
+    if (ext?.updateSettings) {
+      await ext.updateSettings([
+        { key: 'parse_mode', controllerProps: { value: v } } as Partial<SettingComponentProps>,
+      ])
+    }
+    set((s) => ({
+      parseMode: v,
+      settingsDefs: s.settingsDefs.map((d) =>
+        d.key === 'parse_mode'
+          ? ({ ...d, controllerProps: { ...d.controllerProps, value: v } } as SettingComponentProps)
+          : d
+      ),
+    }))
+  },
+  setAutoInlineContextRatio: async (val) => {
+    if (!fileAttachmentsFeatureEnabled) return
+    const ext = getRagExtension()
+    if (ext?.updateSettings) {
+      await ext.updateSettings([
+        { key: 'auto_inline_context_ratio', controllerProps: { value: val } } as Partial<SettingComponentProps>,
+      ])
+    }
+    set((s) => ({
+      autoInlineContextRatio: val,
+      settingsDefs: s.settingsDefs.map((d) =>
+        d.key === 'auto_inline_context_ratio'
+          ? ({ ...d, controllerProps: { ...d.controllerProps, value: val } } as SettingComponentProps)
+          : d
+      ),
+    }))
+  },
 }))

-// Initialize from extension settings once on import
+// Attempt to hydrate settings from the RAG extension, retrying briefly until it is registered
+const MAX_INIT_ATTEMPTS = 5
+const INIT_RETRY_DELAY_MS = 300
 ;(async () => {
-  try {
-    const ext = getRagExtension()
-    if (!ext?.getSettings) return
-    const settings = await ext.getSettings()
-    if (!Array.isArray(settings)) return
-    const map = new Map<string, unknown>()
-    for (const s of settings) map.set(s.key, s?.controllerProps?.value)
-    // seed defs and values
-    useAttachments.setState((prev) => ({
-      settingsDefs: settings,
-      enabled:
-        fileAttachmentsFeatureEnabled &&
-        ((map.get('enabled') as boolean | undefined) ?? prev.enabled),
-      maxFileSizeMB: (map.get('max_file_size_mb') as number | undefined) ?? prev.maxFileSizeMB,
-      retrievalLimit: (map.get('retrieval_limit') as number | undefined) ?? prev.retrievalLimit,
-      retrievalThreshold:
-        (map.get('retrieval_threshold') as number | undefined) ?? prev.retrievalThreshold,
-      chunkSizeTokens: (map.get('chunk_size_tokens') as number | undefined) ?? prev.chunkSizeTokens,
-      overlapTokens: (map.get('overlap_tokens') as number | undefined) ?? prev.overlapTokens,
-      searchMode:
-        (map.get('search_mode') as 'auto' | 'ann' | 'linear' | undefined) ?? prev.searchMode,
-    }))
-  } catch (e) {
-    console.debug('Failed to initialize attachment settings from extension:', e)
+  for (let i = 0; i < MAX_INIT_ATTEMPTS; i += 1) {
+    const success = await useAttachments.getState().loadSettingsDefs()
+    if (success) return
+    await new Promise((resolve) => setTimeout(resolve, INIT_RETRY_DELAY_MS))
  }
 })()
--- a/web-app/src/hooks/useChat.ts
+++ b/web-app/src/hooks/useChat.ts
@@ -19,6 +19,7 @@ import {
  captureProactiveScreenshots,
 } from '@/lib/completion'
 import { CompletionMessagesBuilder } from '@/lib/messages'
+import { processAttachmentsForSend } from '@/lib/attachmentProcessing'
 import { renderInstructions } from '@/lib/instructionTemplate'
 import {
  ChatCompletionMessageToolCall,
@@ -41,10 +42,10 @@ import {
 import { useAssistant } from './useAssistant'
 import { useShallow } from 'zustand/shallow'
 import { TEMPORARY_CHAT_QUERY_ID, TEMPORARY_CHAT_ID } from '@/constants/chat'
-import { toast } from 'sonner'
 import { Attachment } from '@/types/attachment'
 import { MCPTool } from '@/types/completion'
 import { useMCPServers } from '@/hooks/useMCPServers'
+import { useAttachmentIngestionPrompt } from './useAttachmentIngestionPrompt'

 // Helper to create thread content with consistent structure
 const createThreadContent = (
@@ -76,6 +77,8 @@ const cancelFrame = (handle: number | undefined) => {
  }
 }

+const ATTACHMENT_AUTO_INLINE_FALLBACK_BYTES = 512 * 1024
+
 // Helper to finalize and save a message
 const finalizeMessage = (
  finalContent: ThreadMessage,
@@ -380,6 +383,30 @@ export const useChat = () => {
    [updateLoadingModel, serviceHub, setActiveModels]
  )

+  const ensureModelLoaded = useCallback(
+    async (provider?: ProviderObject, modelId?: string | null) => {
+      if (!provider || !modelId) return false
+      try {
+        const active = await serviceHub.models().getActiveModels()
+        if (Array.isArray(active) && active.includes(modelId)) {
+          setActiveModels(active)
+          return true
+        }
+        updateLoadingModel(true)
+        await serviceHub.models().startModel(provider, modelId)
+        const refreshed = await serviceHub.models().getActiveModels()
+        setActiveModels(refreshed || [])
+        return refreshed?.includes(modelId) ?? false
+      } catch (err) {
+        console.warn('Failed to start model before attachment validation', err)
+        return false
+      } finally {
+        updateLoadingModel(false)
+      }
+    },
+    [serviceHub, setActiveModels, updateLoadingModel]
+  )
+
  const increaseModelContextSize = useCallback(
    async (modelId: string, provider: ProviderObject) => {
      /**
@@ -481,120 +508,139 @@ export const useChat = () => {
    ) => {
      const activeThread = await getCurrentThread(projectId)
      const selectedProvider = useModelProvider.getState().selectedProvider
+      let selectedModel = useModelProvider.getState().selectedModel
      let activeProvider = getProviderByName(selectedProvider)

      resetTokenSpeed()
      if (!activeThread || !activeProvider) return

-      // Separate images and documents
-      const fileAttachmentsFeatureEnabled =
-        PlatformFeatures[PlatformFeature.FILE_ATTACHMENTS]
      const allAttachments = attachments ?? []
+      const parsePreference = useAttachments.getState().parseMode
+      const autoInlineContextRatio = useAttachments.getState().autoInlineContextRatio
+      const modelReady = await ensureModelLoaded(activeProvider, selectedModel?.id)

-      const images = allAttachments.filter((a) => a.type === 'image')
-      const documents = fileAttachmentsFeatureEnabled
-        ? allAttachments.filter((a) => a.type === 'document')
-        : []
+      const modelContextLength = (() => {
+        const ctx = selectedModel?.settings?.ctx_len?.controller_props?.value
+        if (typeof ctx === 'number') return ctx
+        if (typeof ctx === 'string') {
+          const parsed = parseInt(ctx, 10)
+          return Number.isFinite(parsed) ? parsed : undefined
+        }
+        return undefined
+      })()

-      // Process attachments BEFORE sending
-      const processedAttachments: Attachment[] = []
+      const rawContextThreshold =
+        typeof modelContextLength === 'number' && modelContextLength > 0
+          ? Math.floor(
+              modelContextLength *
+                (typeof autoInlineContextRatio === 'number'
+                  ? autoInlineContextRatio
+                  : 0.75)
+            )
+          : undefined

-      // 1) Images ingestion (placeholder/no-op for now)
-      // Track attachment ingestion; all must succeed before sending
+      const contextThreshold =
+        typeof rawContextThreshold === 'number' &&
+        Number.isFinite(rawContextThreshold) &&
+        rawContextThreshold > 0
+          ? rawContextThreshold
+          : undefined

-      if (images.length > 0) {
-        for (const img of images) {
-          try {
-            // Skip if already processed (ingested in ChatInput when thread existed)
-            if (img.processed && img.id) {
-              processedAttachments.push(img)
-              continue
-            }
+      const hasContextEstimate =
+        modelReady &&
+        typeof contextThreshold === 'number' &&
+        Number.isFinite(contextThreshold) &&
+        contextThreshold > 0
+      const docsNeedingPrompt = allAttachments.filter((doc) => {
+        if (doc.type !== 'document') return false
+        // Skip already processed/ingested documents to avoid repeated prompts
+        if (doc.processed || doc.injectionMode) return false
+        const preference = doc.parseMode ?? parsePreference
+        return preference === 'prompt' || (preference === 'auto' && !hasContextEstimate)
+      })

-            if (updateAttachmentProcessing) {
-              updateAttachmentProcessing(img.name, 'processing')
-            }
-            // Upload image, get id/URL
-            const res = await serviceHub
-              .uploads()
-              .ingestImage(activeThread.id, img)
-            processedAttachments.push({
-              ...img,
-              id: res.id,
-              processed: true,
-              processing: false,
-            })
-            if (updateAttachmentProcessing) {
-              updateAttachmentProcessing(img.name, 'done')
-            }
-          } catch (err) {
-            console.error(`Failed to ingest image ${img.name}:`, err)
-            if (updateAttachmentProcessing) {
-              updateAttachmentProcessing(img.name, 'error')
-            }
-            const desc = err instanceof Error ? err.message : String(err)
-            toast.error('Failed to ingest image attachment', {
-              description: desc,
-            })
-            return
+      // Map to store individual choices for each document
+      const docChoices = new Map<string, 'inline' | 'embeddings'>()
+
+      if (docsNeedingPrompt.length > 0) {
+        // Ask for each file individually
+        for (let i = 0; i < docsNeedingPrompt.length; i++) {
+          const doc = docsNeedingPrompt[i]
+          const choice = await useAttachmentIngestionPrompt
+            .getState()
+            .showPrompt(doc, ATTACHMENT_AUTO_INLINE_FALLBACK_BYTES, i, docsNeedingPrompt.length)
+
+          if (!choice) return
+
+          // Store the choice for this specific document
+          if (doc.path) {
+            docChoices.set(doc.path, choice)
          }
        }
      }

-      if (documents.length > 0) {
+      const estimateTokens = async (text: string): Promise<number | undefined> => {
        try {
-          for (const doc of documents) {
-            // Skip if already processed (ingested in ChatInput when thread existed)
-            if (doc.processed && doc.id) {
-              processedAttachments.push(doc)
-              continue
-            }
-
-            // Update UI to show spinner on this file
-            if (updateAttachmentProcessing) {
-              updateAttachmentProcessing(doc.name, 'processing')
-            }
-
-            try {
-              const res = await serviceHub
-                .uploads()
-                .ingestFileAttachment(activeThread.id, doc)
-
-              // Add processed document with ID
-              processedAttachments.push({
-                ...doc,
-                id: res.id,
-                size: res.size ?? doc.size,
-                chunkCount: res.chunkCount ?? doc.chunkCount,
-                processing: false,
-                processed: true,
-              })
-
-              // Update UI to show done state
-              if (updateAttachmentProcessing) {
-                updateAttachmentProcessing(doc.name, 'done')
-              }
-            } catch (err) {
-              console.error(`Failed to ingest ${doc.name}:`, err)
-              if (updateAttachmentProcessing) {
-                updateAttachmentProcessing(doc.name, 'error')
-              }
-              throw err // Re-throw to handle in outer catch
-            }
+          if (!selectedModel?.id || !modelReady) return undefined
+          const tokenCount = await serviceHub
+            .models()
+            .getTokensCount(selectedModel.id, [
+              {
+                id: 'inline-attachment',
+                object: 'thread.message',
+                thread_id: activeThread.id,
+                role: 'user',
+                content: [
+                  {
+                    type: ContentType.Text,
+                    text: { value: text, annotations: [] },
+                  },
+                ],
+                status: MessageStatus.Ready,
+                created_at: Date.now(),
+                completed_at: Date.now(),
+              } as ThreadMessage,
+            ])
+          if (
+            typeof tokenCount !== 'number' ||
+            !Number.isFinite(tokenCount) ||
+            tokenCount <= 0
+          ) {
+            return undefined
          }
-          // Update thread since documents attached
-          useThreads.getState().updateThread(activeThread.id, {
-            metadata: { hasDocuments: true },
-          })
-        } catch (err) {
-          console.error('Failed to ingest documents:', err)
-          const desc = err instanceof Error ? err.message : String(err)
-          toast.error('Failed to index attachments', { description: desc })
-          // Don't continue with message send if ingestion failed
-          return
+          return tokenCount
+        } catch (e) {
+          console.debug('Failed to estimate tokens for attachment content', e)
+          return undefined
        }
      }

+      let processedAttachments: Attachment[] = []
+      let hasEmbeddedDocuments = false
+      try {
+        const result = await processAttachmentsForSend({
+          attachments: allAttachments,
+          threadId: activeThread.id,
+          serviceHub,
+          selectedProvider,
+          contextThreshold,
+          estimateTokens,
+          parsePreference,
+          perFileChoices: docChoices.size > 0 ? docChoices : undefined,
+          updateAttachmentProcessing,
+        })
+        processedAttachments = result.processedAttachments
+        hasEmbeddedDocuments = result.hasEmbeddedDocuments
+      } catch {
+        return
+      }
+
+      if (hasEmbeddedDocuments) {
+        useThreads.getState().updateThread(activeThread.id, {
+          metadata: { hasDocuments: true },
+        })
+      }
+
      // All attachments prepared successfully

      const messages = getMessages(activeThread.id)
@@ -625,7 +671,7 @@ export const useChat = () => {
      }
      updateThreadTimestamp(activeThread.id)
      usePrompt.getState().setPrompt('')
-      const selectedModel = useModelProvider.getState().selectedModel
+      selectedModel = useModelProvider.getState().selectedModel

      // If continuing, start with the previous content
      const accumulatedTextRef = {
@@ -634,7 +680,7 @@ export const useChat = () => {
      let currentAssistant: Assistant | undefined | null

      try {
-        if (selectedModel?.id) {
+        if (selectedModel?.id && !modelReady) {
          updateLoadingModel(true)
          await serviceHub.models().startModel(activeProvider, selectedModel.id)
          updateLoadingModel(false)
@@ -643,6 +689,11 @@ export const useChat = () => {
            .models()
            .getActiveModels()
            .then((models) => setActiveModels(models || []))
+        } else if (selectedModel?.id) {
+          serviceHub
+            .models()
+            .getActiveModels()
+            .then((models) => setActiveModels(models || []))
        }
        currentAssistant = useAssistant.getState().currentAssistant

@@ -1090,6 +1141,7 @@ export const useChat = () => {
      updateTokenSpeed,
      showIncreaseContextSizeModal,
      increaseModelContextSize,
+      ensureModelLoaded,
      toggleOnContextShifting,
      setModelLoadError,
      serviceHub,
--- a/web-app/src/hooks/useChatAttachments.ts
+++ b/web-app/src/hooks/useChatAttachments.ts
@@ -0,0 +1,63 @@
+import { create } from 'zustand'
+
+import { Attachment } from '@/types/attachment'
+
+export const NEW_THREAD_ATTACHMENT_KEY = '__new-thread__'
+
+const EMPTY_ATTACHMENTS: Attachment[] = []
+
+type AttachmentStore = {
+  attachmentsByThread: Record<string, Attachment[]>
+  getAttachments: (threadId?: string) => Attachment[]
+  setAttachments: (
+    threadId: string,
+    updater: Attachment[] | ((prev: Attachment[]) => Attachment[])
+  ) => void
+  clearAttachments: (threadId: string) => void
+  transferAttachments: (fromKey: string, toKey: string) => void
+}
+
+export const useChatAttachments = create<AttachmentStore>()((set, get) => ({
+  attachmentsByThread: {},
+  getAttachments: (threadId = NEW_THREAD_ATTACHMENT_KEY) => {
+    return get().attachmentsByThread[threadId] ?? EMPTY_ATTACHMENTS
+  },
+  setAttachments: (threadId, updater) => {
+    set((state) => {
+      const current = state.attachmentsByThread[threadId] ?? []
+      const next = typeof updater === 'function' ? updater(current) : updater
+      return {
+        attachmentsByThread: {
+          ...state.attachmentsByThread,
+          [threadId]: next,
+        },
+      }
+    })
+  },
+  clearAttachments: (threadId) => {
+    set((state) => {
+      // eslint-disable-next-line @typescript-eslint/no-unused-vars
+      const { [threadId]: _, ...rest } = state.attachmentsByThread
+      return { attachmentsByThread: rest }
+    })
+  },
+  transferAttachments: (fromKey, toKey) => {
+    set((state) => {
+      const fromAttachments = state.attachmentsByThread[fromKey]
+      if (!fromAttachments?.length) return state
+
+      const existingDestination = state.attachmentsByThread[toKey]
+      const attachmentsByThread = { ...state.attachmentsByThread }
+      delete attachmentsByThread[fromKey]
+
+      return {
+        attachmentsByThread: {
+          ...attachmentsByThread,
+          [toKey]: existingDestination?.length
+            ? existingDestination
+            : fromAttachments,
+        },
+      }
+    })
+  },
+}))
--- a/web-app/src/hooks/useTokensCount.ts
+++ b/web-app/src/hooks/useTokensCount.ts
@@ -14,6 +14,32 @@ export interface TokenCountData {
  error?: string
 }

+type InlineFileContent = {
+  name?: string
+  content: string
+}
+
+const getInlineFileContents = (
+  metadata: ThreadMessage['metadata']
+): InlineFileContent[] => {
+  const inlineFileContents = (
+    metadata as { inline_file_contents?: unknown }
+  )?.inline_file_contents
+
+  if (!Array.isArray(inlineFileContents)) return []
+
+  return inlineFileContents.filter((file): file is InlineFileContent => {
+    if (!file || typeof file !== 'object') return false
+    const { content, name } = file as { content?: unknown; name?: unknown }
+
+    const hasContent = typeof content === 'string' && content.length > 0
+    const hasValidName =
+      typeof name === 'string' || typeof name === 'undefined'
+
+    return hasContent && hasValidName
+  })
+}
+
 export const useTokensCount = (
  messages: ThreadMessage[] = [],
  uploadedFiles?: Array<{
@@ -38,7 +64,10 @@ export const useTokensCount = (
  const { selectedModel, selectedProvider } = useModelProvider()
  const { prompt } = usePrompt()

-  // Create messages with current prompt for live calculation
+  // Create messages with current prompt for live calculation.
+  // This mirrors the payload sent to token counting by appending the draft
+  // user message (text plus any uploaded images) to the existing thread
+  // history so the model sees the full context that will be submitted.
  const messagesWithPrompt = useMemo(() => {
    const result = [...messages]
    if (prompt.trim() || (uploadedFiles && uploadedFiles.length > 0)) {
@@ -72,19 +101,34 @@ export const useTokensCount = (
        } as ThreadMessage)
      }
    }
-    return result.map((e) => ({
-      ...e,
-      content: e.content.map((c) => ({
-        ...c,
-        text:
-          c.type === 'text'
-            ? {
-                value: removeReasoningContent(c.text?.value ?? '.'),
-                annotations: [],
-              }
-            : c.text,
-      })),
-    }))
+    return result.map((e) => {
+      // Pull inline file contents stored on the message metadata
+      const inlineFileContents = getInlineFileContents(e.metadata)
+
+      const buildInlineText = (base: string) => {
+        if (!inlineFileContents.length) return base
+        const formatted = inlineFileContents
+          .map((f) => `File: ${f.name || 'attachment'}\n${f.content ?? ''}`)
+          .join('\n\n')
+        return base ? `${base}\n\n${formatted}` : formatted
+      }
+
+      return {
+        ...e,
+        content: e.content.map((c) => ({
+          ...c,
+          text:
+            c.type === 'text'
+              ? {
+                  value: removeReasoningContent(
+                    buildInlineText(c.text?.value ?? '.')
+                  ),
+                  annotations: [],
+                }
+              : c.text,
+        })),
+      }
+    })
  }, [messages, prompt, uploadedFiles])

  // Debounced calculation that includes current prompt
--- a/web-app/src/lib/attachmentProcessing.ts
+++ b/web-app/src/lib/attachmentProcessing.ts
@@ -0,0 +1,215 @@
+import { PlatformFeatures } from '@/lib/platform/const'
+import { PlatformFeature } from '@/lib/platform/types'
+import { ServiceHub } from '@/services'
+import { Attachment } from '@/types/attachment'
+import { toast } from 'sonner'
+
+type AttachmentProcessingStatus =
+  | 'processing'
+  | 'done'
+  | 'error'
+  | 'clear_docs'
+  | 'clear_all'
+
+type AttachmentProcessingOptions = {
+  attachments: Attachment[]
+  threadId: string
+  serviceHub: ServiceHub
+  selectedProvider?: string
+  contextThreshold?: number
+  estimateTokens?: (text: string) => Promise<number | undefined>
+  parsePreference: 'auto' | 'inline' | 'embeddings' | 'prompt'
+  autoFallbackMode?: 'inline' | 'embeddings'
+  perFileChoices?: Map<string, 'inline' | 'embeddings'>
+  updateAttachmentProcessing?: (name: string, status: AttachmentProcessingStatus) => void
+}
+
+export type AttachmentProcessingResult = {
+  processedAttachments: Attachment[]
+  hasEmbeddedDocuments: boolean
+}
+
+export const processAttachmentsForSend = async (
+  options: AttachmentProcessingOptions
+): Promise<AttachmentProcessingResult> => {
+  const {
+    attachments,
+    threadId,
+    serviceHub,
+    contextThreshold,
+    estimateTokens,
+    parsePreference,
+    autoFallbackMode,
+    perFileChoices,
+    updateAttachmentProcessing,
+  } = options
+
+  const fileAttachmentsFeatureEnabled = PlatformFeatures[PlatformFeature.FILE_ATTACHMENTS]
+  const processedAttachments: Attachment[] = []
+  let hasEmbeddedDocuments = false
+  const effectiveContextThreshold =
+    typeof contextThreshold === 'number' &&
+    Number.isFinite(contextThreshold) &&
+    contextThreshold > 0
+      ? contextThreshold
+      : undefined
+
+  // Images: ingest before sending
+  const images = attachments.filter((a) => a.type === 'image')
+  if (images.length > 0) {
+    for (const img of images) {
+      try {
+        if (img.processed && img.id) {
+          processedAttachments.push(img)
+          continue
+        }
+
+        if (updateAttachmentProcessing) {
+          updateAttachmentProcessing(img.name, 'processing')
+        }
+
+        const res = await serviceHub.uploads().ingestImage(threadId, img)
+        processedAttachments.push({
+          ...img,
+          id: res.id,
+          processed: true,
+          processing: false,
+        })
+        if (updateAttachmentProcessing) {
+          updateAttachmentProcessing(img.name, 'done')
+        }
+      } catch (err) {
+        console.error(`Failed to ingest image ${img.name}:`, err)
+        if (updateAttachmentProcessing) {
+          updateAttachmentProcessing(img.name, 'error')
+        }
+        const desc = err instanceof Error ? err.message : String(err)
+        toast.error('Failed to ingest image attachment', { description: desc })
+        throw err
+      }
+    }
+  }
+
+  if (fileAttachmentsFeatureEnabled) {
+    const documents = attachments.filter((a) => a.type === 'document')
+    for (const doc of documents) {
+      try {
+        if (doc.processed && (doc.id || doc.injectionMode === 'inline')) {
+          hasEmbeddedDocuments = hasEmbeddedDocuments || doc.injectionMode !== 'inline'
+          processedAttachments.push(doc)
+          continue
+        }
+
+        if (updateAttachmentProcessing) {
+          updateAttachmentProcessing(doc.name, 'processing')
+        }
+
+        const targetPreference = doc.parseMode ?? parsePreference
+        let targetMode: 'inline' | 'embeddings' =
+          targetPreference === 'inline' ? 'inline' : 'embeddings'
+        let parsedContent: string | undefined
+
+        const canInline =
+          targetPreference !== 'embeddings' && !!doc.path
+
+        if (canInline) {
+          try {
+            parsedContent = await serviceHub.rag().parseDocument?.(doc.path!, doc.fileType)
+          } catch (err) {
+            console.warn(`Failed to parse ${doc.name} for inline use`, err)
+          }
+        }
+
+        if (targetPreference === 'auto') {
+          // Check if user made a per-file choice for this document
+          const userChoice = perFileChoices?.get(doc.path || '')
+          targetMode = userChoice ?? autoFallbackMode ?? 'embeddings'
+
+          // Only do auto-detection if no user choice was made
+          if (!userChoice && parsedContent && estimateTokens) {
+            const estimatedTokens = await estimateTokens(parsedContent)
+            const tokenCount =
+              typeof estimatedTokens === 'number' &&
+              Number.isFinite(estimatedTokens) &&
+              estimatedTokens > 0
+                ? estimatedTokens
+                : undefined
+            if (!effectiveContextThreshold) {
+              console.debug(
+                `Attachment ${doc.name}: no context threshold available; defaulting to ${targetMode}`
+              )
+            } else if (typeof tokenCount === 'number') {
+              targetMode =
+                tokenCount <= effectiveContextThreshold ? 'inline' : 'embeddings'
+            } else {
+              console.debug(
+                `Attachment ${doc.name}: token estimate unavailable or non-positive; defaulting to ${targetMode}`
+              )
+            }
+          } else if (!userChoice && !parsedContent) {
+            console.debug(
+              `Attachment ${doc.name}: parsed content unavailable for token estimation; defaulting to ${targetMode}`
+            )
+          } else if (!userChoice) {
+            console.debug(
+              `Attachment ${doc.name}: token estimator unavailable; defaulting to ${targetMode}`
+            )
+          }
+        } else if (targetPreference === 'prompt') {
+          // Check if user made a per-file choice for this document
+          const userChoice = perFileChoices?.get(doc.path || '')
+          targetMode = userChoice ?? autoFallbackMode ?? 'embeddings'
+        }
+
+        if (targetMode === 'inline' && parsedContent) {
+          processedAttachments.push({
+            ...doc,
+            processing: false,
+            processed: true,
+            inlineContent: parsedContent,
+            injectionMode: 'inline',
+          })
+
+          if (updateAttachmentProcessing) {
+            updateAttachmentProcessing(doc.name, 'done')
+          }
+          continue
+        }
+
+        // Default: ingest as embeddings
+        if (updateAttachmentProcessing) {
+          updateAttachmentProcessing(doc.name, 'processing')
+        }
+
+        const res = await serviceHub
+          .uploads()
+          .ingestFileAttachment(threadId, doc)
+
+        processedAttachments.push({
+          ...doc,
+          id: res.id,
+          size: res.size ?? doc.size,
+          chunkCount: res.chunkCount ?? doc.chunkCount,
+          processing: false,
+          processed: true,
+          injectionMode: 'embeddings',
+        })
+        hasEmbeddedDocuments = true
+
+        if (updateAttachmentProcessing) {
+          updateAttachmentProcessing(doc.name, 'done')
+        }
+      } catch (err) {
+        console.error(`Failed to ingest ${doc.name}:`, err)
+        if (updateAttachmentProcessing) {
+          updateAttachmentProcessing(doc.name, 'error')
+        }
+        const desc = err instanceof Error ? err.message : String(err)
+        toast.error('Failed to index attachments', { description: desc })
+        throw err
+      }
+    }
+  }
+
+  return { processedAttachments, hasEmbeddedDocuments }
+}
--- a/web-app/src/lib/completion.ts
+++ b/web-app/src/lib/completion.ts
@@ -65,15 +65,19 @@ export const newUserThreadContent = (
  const images = attachments?.filter((a) => a.type === 'image') || []
  const documents = attachments?.filter((a) => a.type === 'document') || []

+  const inlineDocuments = documents.filter(
+    (doc) => doc.injectionMode === 'inline' && doc.inlineContent
+  )
+
  // Inject document metadata into the text content (id, name, fileType only - no path)
  const docMetadata = documents
-    .filter((doc) => doc.id) // Only include processed documents
    .map((doc) => ({
-      id: doc.id!,
+      id: doc.id ?? doc.name,
      name: doc.name,
      type: doc.fileType,
      size: typeof doc.size === 'number' ? doc.size : undefined,
      chunkCount: typeof doc.chunkCount === 'number' ? doc.chunkCount : undefined,
+      injectionMode: doc.injectionMode,
    }))

  const textWithFiles =
@@ -112,6 +116,15 @@ export const newUserThreadContent = (
    status: MessageStatus.Ready,
    created_at: 0,
    completed_at: 0,
+    metadata:
+      inlineDocuments.length > 0
+        ? {
+            inline_file_contents: inlineDocuments.map((doc) => ({
+              name: doc.name,
+              content: doc.inlineContent,
+            })),
+          }
+        : undefined,
  }
 }
 /**
--- a/web-app/src/lib/fileMetadata.ts
+++ b/web-app/src/lib/fileMetadata.ts
@@ -8,6 +8,7 @@ export interface FileMetadata {
  type?: string
  size?: number
  chunkCount?: number
+  injectionMode?: 'inline' | 'embeddings'
 }

 const FILE_METADATA_START = '[ATTACHED_FILES]'
@@ -31,6 +32,7 @@ export function injectFilesIntoPrompt(
      if (file.type) parts.push(`type: ${file.type}`)
      if (typeof file.size === 'number') parts.push(`size: ${file.size}`)
      if (typeof file.chunkCount === 'number') parts.push(`chunks: ${file.chunkCount}`)
+      if (file.injectionMode) parts.push(`mode: ${file.injectionMode}`)
      return `- ${parts.join(', ')}`
    })
    .join('\n')
@@ -94,6 +96,10 @@ export function extractFilesFromPrompt(prompt: string): {
    if (typeof chunkCount === 'number' && !Number.isNaN(chunkCount)) {
      fileObj.chunkCount = chunkCount;
    }
+    const injectionMode = map['mode']
+    if (injectionMode === 'inline' || injectionMode === 'embeddings') {
+      fileObj.injectionMode = injectionMode
+    }
    files.push(fileObj);
  }

--- a/web-app/src/lib/messages.ts
+++ b/web-app/src/lib/messages.ts
@@ -85,6 +85,23 @@ export class CompletionMessagesBuilder {
  private toCompletionParamFromThread(
    msg: ThreadMessage
  ): ChatCompletionMessageParam {
+    const inlineFileContents = Array.isArray(
+      (msg.metadata as any)?.inline_file_contents
+    )
+      ? ((msg.metadata as any)?.inline_file_contents as Array<{
+          name?: string
+          content?: string
+        }>).filter((f) => f?.content)
+      : []
+
+    const buildInlineText = (base: string) => {
+      if (!inlineFileContents.length) return base
+      const formatted = inlineFileContents
+        .map((f) => `File: ${f.name || 'attachment'}\n${f.content ?? ''}`)
+        .join('\n\n')
+      return base ? `${base}\n\n${formatted}` : formatted
+    }
+
    if (msg.role === 'assistant') {
      return {
        role: 'assistant',
@@ -104,7 +121,10 @@ export class CompletionMessagesBuilder {
    if (Array.isArray(msg.content) && msg.content.length > 1) {
      const content = msg.content.map((part: ThreadContent) => {
        if (part.type === ContentType.Text) {
-          return { type: 'text' as const, text: part.text?.value ?? '' }
+          return {
+            type: 'text' as const,
+            text: buildInlineText(part.text?.value ?? ''),
+          }
        }
        if (part.type === ContentType.Image) {
          return {
@@ -122,7 +142,7 @@ export class CompletionMessagesBuilder {
    }
    // Single text part
    const text = msg?.content?.[0]?.text?.value ?? '.'
-    return { role: 'user', content: text }
+    return { role: 'user', content: buildInlineText(text) }
  }

  /**
--- a/web-app/src/locales/en/common.json
+++ b/web-app/src/locales/en/common.json
@@ -292,6 +292,16 @@
    "noProjectsFound": "No projects found",
    "tryDifferentSearch": "Try a different search term"
  },
+  "attachmentsIngestion": {
+    "title": "Choose how to ingest attachments",
+    "description": "Choose whether to include these files directly in the chat or index them as embeddings instead.",
+    "inline": "Inject into chat",
+    "embeddings": "Use embeddings"
+  },
+  "attachmentInjectedIndicator": "Injected into chat",
+  "attachmentEmbeddedIndicator": "Embedded for RAG",
+  "viewInjectedContent": "View injected content",
+  "injectedContentTitle": "Injected file content",
  "toast": {
    "allThreadsUnfavorited": {
      "title": "All Threads Unfavorited",
--- a/web-app/src/locales/en/settings.json
+++ b/web-app/src/locales/en/settings.json
@@ -269,6 +269,14 @@
    "featureTitle": "Feature",
    "enable": "Enable Attachments",
    "enableDesc": "Allow uploading and indexing documents for retrieval.",
+    "parseMode": "Parse preference",
+    "parseModeDesc": "Choose how parsed documents are added to conversations.",
+    "parseModeAuto": "Auto",
+    "parseModeInline": "Include in chat",
+    "parseModeEmbeddings": "Ingest as embeddings",
+    "parseModePrompt": "Ask every time",
+    "autoInlineThreshold": "Auto inline threshold",
+    "autoInlineThresholdDesc": "Fraction of the model context used as the cutoff for inlining parsed files on local models.",
    "limitsTitle": "Limits",
    "maxFile": "Max File Size (MB)",
    "maxFileDesc": "Maximum size per file. Enforced at upload and processing time.",
@@ -283,10 +291,10 @@
    "searchModeAnn": "ANN (sqlite-vec)",
    "searchModeLinear": "Linear",
    "chunkingTitle": "Chunking",
-    "chunkSize": "Chunk Size (tokens)",
-    "chunkSizeDesc": "Approximate max tokens per chunk for embeddings.",
-    "chunkOverlap": "Overlap (tokens)",
-    "chunkOverlapDesc": "Token overlap between consecutive chunks."
+    "chunkSize": "Chunk Size (characters)",
+    "chunkSizeDesc": "Approximate max characters per chunk for embeddings.",
+    "chunkOverlap": "Overlap (characters)",
+    "chunkOverlapDesc": "Character overlap between consecutive chunks."
  },
  "dialogs": {
    "changeDataFolder": {
--- a/web-app/src/locales/fr/common.json
+++ b/web-app/src/locales/fr/common.json
@@ -372,5 +372,9 @@
      "title": "Discussion retirée",
      "description": "Discussion retirée de \"{{projectName}}\" avec succès"
    }
-  }
+  },
+  "attachmentInjectedIndicator": "Injecté dans la discussion",
+  "attachmentEmbeddedIndicator": "Intégré pour RAG",
+  "viewInjectedContent": "Voir le contenu injecté",
+  "injectedContentTitle": "Contenu du fichier injecté"
 }
--- a/web-app/src/locales/fr/settings.json
+++ b/web-app/src/locales/fr/settings.json
@@ -267,6 +267,14 @@
    "featureTitle": "Fonctionnalité",
    "enable": "Activer les pièces jointes",
    "enableDesc": "Autorisez le téléchargement et l'indexation de documents pour la récupération.",
+    "parseMode": "Préférence d'analyse",
+    "parseModeDesc": "Choisissez comment les documents analysés sont ajoutés aux conversations.",
+    "parseModeAuto": "Auto",
+    "parseModeInline": "Inclure dans le chat",
+    "parseModeEmbeddings": "Ingestion en embeddings",
+    "parseModePrompt": "Demander à chaque fois",
+    "autoInlineThreshold": "Seuil d'inclusion auto",
+    "autoInlineThresholdDesc": "Part du contexte du modèle utilisée comme limite pour inclure les fichiers analysés sur les modèles locaux.",
    "limitsTitle": "Limites",
    "maxFile": "Taille de fichier max (Mo)",
    "maxFileDesc": "Taille maximale par fichier. Appliquée lors du téléchargement et du traitement.",
@@ -281,10 +289,10 @@
    "searchModeAnn": "ANN (sqlite-vec)",
    "searchModeLinear": "Linéaire",
    "chunkingTitle": "Découpage",
-    "chunkSize": "Taille du bloc (jetons)",
-    "chunkSizeDesc": "Nombre maximum approximatif de jetons par bloc pour les embeddings.",
-    "chunkOverlap": "Chevauchement (jetons)",
-    "chunkOverlapDesc": "Chevauchement de jetons entre les blocs consécutifs."
+    "chunkSize": "Taille du bloc (caractères)",
+    "chunkSizeDesc": "Nombre maximum approximatif de caractères par bloc pour les embeddings.",
+    "chunkOverlap": "Chevauchement (caractères)",
+    "chunkOverlapDesc": "Chevauchement de caractères entre les blocs consécutifs."
  },
  "dialogs": {
    "changeDataFolder": {
--- a/web-app/src/locales/pt-BR/common.json
+++ b/web-app/src/locales/pt-BR/common.json
@@ -371,5 +371,9 @@
      "title": "Conversa Removida",
      "description": "Conversa removida de \"{{projectName}}\" com sucesso"
    }
-  }
+  },
+  "attachmentInjectedIndicator": "Injetado no chat",
+  "attachmentEmbeddedIndicator": "Incorporado para RAG",
+  "viewInjectedContent": "Ver conteúdo injetado",
+  "injectedContentTitle": "Conteúdo de arquivo injetado"
 }
--- a/web-app/src/locales/pt-BR/settings.json
+++ b/web-app/src/locales/pt-BR/settings.json
@@ -261,6 +261,14 @@
    "featureTitle": "Recurso",
    "enable": "Habilitar Anexos",
    "enableDesc": "Permitir upload e indexação de documentos para recuperação.",
+    "parseMode": "Preferência de análise",
+    "parseModeDesc": "Escolha como os documentos processados são adicionados às conversas.",
+    "parseModeAuto": "Auto",
+    "parseModeInline": "Incluir no chat",
+    "parseModeEmbeddings": "Ingerir como embeddings",
+    "parseModePrompt": "Perguntar sempre",
+    "autoInlineThreshold": "Limite de inclusão automática",
+    "autoInlineThresholdDesc": "Fração do contexto do modelo usada como limite para incluir arquivos processados em modelos locais.",
    "limitsTitle": "Limites",
    "maxFile": "Tamanho Máximo do Arquivo (MB)",
    "maxFileDesc": "Tamanho máximo por arquivo. Aplicado no upload e processamento.",
@@ -275,10 +283,10 @@
    "searchModeAnn": "ANN (sqlite-vec)",
    "searchModeLinear": "Linear",
    "chunkingTitle": "Fragmentação",
-    "chunkSize": "Tamanho do Fragmento (tokens)",
-    "chunkSizeDesc": "Máximo aproximado de tokens por fragmento para embeddings.",
-    "chunkOverlap": "Sobreposição (tokens)",
-    "chunkOverlapDesc": "Sobreposição de tokens entre fragmentos consecutivos."
+    "chunkSize": "Tamanho do Fragmento (caracteres)",
+    "chunkSizeDesc": "Máximo aproximado de caracteres por fragmento para embeddings.",
+    "chunkOverlap": "Sobreposição (caracteres)",
+    "chunkOverlapDesc": "Sobreposição de caracteres entre fragmentos consecutivos."
  },
  "dialogs": {
    "changeDataFolder": {
--- a/web-app/src/locales/ru/common.json
+++ b/web-app/src/locales/ru/common.json
@@ -372,5 +372,9 @@
      "title": "Ответы удалены",
      "description": "Ответы успешно удалены из \"{{projectName}}\""
    }
-  }
+  },
+  "attachmentInjectedIndicator": "Вставлено в чат",
+  "attachmentEmbeddedIndicator": "Встроено для RAG",
+  "viewInjectedContent": "Просмотреть вставленный контент",
+  "injectedContentTitle": "Содержимое вставленного файла"
 }
--- a/web-app/src/locales/ru/settings.json
+++ b/web-app/src/locales/ru/settings.json
@@ -269,6 +269,14 @@
    "featureTitle": "Функция",
    "enable": "Включить вложения",
    "enableDesc": "Разрешить загрузку и индексацию документов для поиска.",
+    "parseMode": "Настройка разбора",
+    "parseModeDesc": "Выберите, как добавлять разобранные документы в чат.",
+    "parseModeAuto": "Авто",
+    "parseModeInline": "Включить в чат",
+    "parseModeEmbeddings": "Индексировать как embeddings",
+    "parseModePrompt": "Спрашивать каждый раз",
+    "autoInlineThreshold": "Порог авто включения",
+    "autoInlineThresholdDesc": "Доля контекста модели, используемая как предел для встроения разобранных файлов на локальных моделях.",
    "limitsTitle": "Ограничения",
    "maxFile": "Макс. размер файла (МБ)",
    "maxFileDesc": "Максимальный размер файла. Принудительно применяется при загрузке и обработке.",
@@ -283,10 +291,10 @@
    "searchModeAnn": "ANN (sqlite-vec)",
    "searchModeLinear": "Линейный",
    "chunkingTitle": "Чанкинг",
-    "chunkSize": "Размер фрагмента (токенов)",
-    "chunkSizeDesc": "Ориентировочное макс. количество токенов во фрагменте для эмбеддингов.",
-    "chunkOverlap": "Перекрытие (токенов)",
-    "chunkOverlapDesc": "Перекрытие токенов между последовательными фрагментами."
+    "chunkSize": "Размер фрагмента (символов)",
+    "chunkSizeDesc": "Ориентировочное макс. количество символов во фрагменте для эмбеддингов.",
+    "chunkOverlap": "Перекрытие (символов)",
+    "chunkOverlapDesc": "Перекрытие символов между последовательными фрагментами."
  },
  "dialogs": {
    "changeDataFolder": {
--- a/web-app/src/routes/__root.tsx
+++ b/web-app/src/routes/__root.tsx
@@ -23,6 +23,7 @@ import { TranslationProvider } from '@/i18n/TranslationContext'
 import OutOfContextPromiseModal from '@/containers/dialogs/OutOfContextDialog'
 import LoadModelErrorDialog from '@/containers/dialogs/LoadModelErrorDialog'
 import { useSmallScreen } from '@/hooks/useMediaQuery'
+import AttachmentIngestionDialog from '@/containers/dialogs/AttachmentIngestionDialog'
 import {
  ResizablePanelGroup,
  ResizablePanel,
@@ -251,6 +252,7 @@ function RootLayout() {
          <ToolApproval />
          <LoadModelErrorDialog />
          <ErrorDialog />
+          <AttachmentIngestionDialog />
          <OutOfContextPromiseModal />
        </TranslationProvider>
      </ServiceHubProvider>
--- a/web-app/src/routes/settings/attachments.tsx
+++ b/web-app/src/routes/settings/attachments.tsx
@@ -58,16 +58,20 @@ function AttachmentsSettings() {
      maxFileSizeMB: s.maxFileSizeMB,
      retrievalLimit: s.retrievalLimit,
      retrievalThreshold: s.retrievalThreshold,
-      chunkSizeTokens: s.chunkSizeTokens,
-      overlapTokens: s.overlapTokens,
+      chunkSizeChars: s.chunkSizeChars,
+      overlapChars: s.overlapChars,
      searchMode: s.searchMode,
+      parseMode: s.parseMode,
+      autoInlineContextRatio: s.autoInlineContextRatio,
      setEnabled: s.setEnabled,
      setMaxFileSizeMB: s.setMaxFileSizeMB,
      setRetrievalLimit: s.setRetrievalLimit,
      setRetrievalThreshold: s.setRetrievalThreshold,
-      setChunkSizeTokens: s.setChunkSizeTokens,
-      setOverlapTokens: s.setOverlapTokens,
+      setChunkSizeChars: s.setChunkSizeChars,
+      setOverlapChars: s.setOverlapChars,
      setSearchMode: s.setSearchMode,
+      setParseMode: s.setParseMode,
+      setAutoInlineContextRatio: s.setAutoInlineContextRatio,
    }))
  )

@@ -99,9 +103,11 @@ function AttachmentsSettings() {
    }))

    // For non-numeric inputs, apply immediately without debounce
-    if (key === 'enabled' || key === 'search_mode') {
+    if (key === 'enabled' || key === 'search_mode' || key === 'parse_mode') {
      if (key === 'enabled') sel.setEnabled(!!val)
      else if (key === 'search_mode') sel.setSearchMode(val as 'auto' | 'ann' | 'linear')
+      else if (key === 'parse_mode')
+        sel.setParseMode(val as 'auto' | 'inline' | 'embeddings' | 'prompt')
      return
    }

@@ -109,12 +115,20 @@ function AttachmentsSettings() {
    timersRef.current[key] = setTimeout(() => {
      const currentStoreValue = (() => {
        switch (key) {
-          case 'max_file_size_mb': return sel.maxFileSizeMB
-          case 'retrieval_limit': return sel.retrievalLimit
-          case 'retrieval_threshold': return sel.retrievalThreshold
-          case 'chunk_size_tokens': return sel.chunkSizeTokens
-          case 'overlap_tokens': return sel.overlapTokens
-          default: return 0
+          case 'max_file_size_mb':
+            return sel.maxFileSizeMB
+          case 'retrieval_limit':
+            return sel.retrievalLimit
+          case 'retrieval_threshold':
+            return sel.retrievalThreshold
+          case 'chunk_size_chars':
+            return sel.chunkSizeChars
+          case 'overlap_chars':
+            return sel.overlapChars
+          case 'auto_inline_context_ratio':
+            return sel.autoInlineContextRatio
+          default:
+            return 0
        }
      })()

@@ -130,11 +144,14 @@ function AttachmentsSettings() {
        case 'retrieval_threshold':
          sel.setRetrievalThreshold(validated)
          break
-        case 'chunk_size_tokens':
-          sel.setChunkSizeTokens(validated)
+        case 'chunk_size_chars':
+          sel.setChunkSizeChars(validated)
          break
-        case 'overlap_tokens':
-          sel.setOverlapTokens(validated)
+        case 'overlap_chars':
+          sel.setOverlapChars(validated)
+          break
+        case 'auto_inline_context_ratio':
+          sel.setAutoInlineContextRatio(validated)
          break
      }

@@ -169,12 +186,16 @@ function AttachmentsSettings() {
                        return sel.retrievalLimit
                      case 'retrieval_threshold':
                        return sel.retrievalThreshold
-                      case 'chunk_size_tokens':
-                        return sel.chunkSizeTokens
-                      case 'overlap_tokens':
-                        return sel.overlapTokens
+                      case 'chunk_size_chars':
+                        return sel.chunkSizeChars
+                      case 'overlap_chars':
+                        return sel.overlapChars
                      case 'search_mode':
                        return sel.searchMode
+                      case 'parse_mode':
+                        return sel.parseMode
+                      case 'auto_inline_context_ratio':
+                        return sel.autoInlineContextRatio
                      default:
                        return d?.controllerProps?.value
                    }
--- a/web-app/src/services/rag/default.ts
+++ b/web-app/src/services/rag/default.ts
@@ -47,4 +47,16 @@ export class DefaultRAGService implements RAGService {
      return []
    }
  }
+
+  async parseDocument(path: string, type?: string): Promise<string> {
+    try {
+      const ext = ExtensionManager.getInstance().get<RAGExtension>(ExtensionTypeEnum.RAG)
+      const parsed = await ext?.parseDocument?.(path, type)
+      return parsed ?? ''
+    } catch (e) {
+      console.debug('RAG parseDocument unavailable', e)
+    }
+
+    return ''
+  }
 }
--- a/web-app/src/services/rag/types.ts
+++ b/web-app/src/services/rag/types.ts
@@ -8,4 +8,6 @@ export interface RAGService {
  callTool(args: { toolName: string; arguments: object; threadId?: string }): Promise<MCPToolCallResult>
  // Convenience: return tool names for routing
  getToolNames(): Promise<string[]>
+  // Parse a document to text for inline injection decisions
+  parseDocument?: (path: string, type?: string) => Promise<string>
 }
--- a/web-app/src/types/attachment.ts
+++ b/web-app/src/types/attachment.ts
@@ -20,9 +20,12 @@ export type Attachment = {
  // For documents (local files)
  path?: string
  fileType?: string // e.g., 'pdf', 'docx'
+  parseMode?: 'auto' | 'inline' | 'embeddings' | 'prompt'

  // After processing (images uploaded, documents ingested)
  id?: string
+  injectionMode?: 'inline' | 'embeddings'
+  inlineContent?: string
 }

 /**
@@ -49,6 +52,7 @@ export function createDocumentAttachment(data: {
  path: string
  fileType?: string
  size?: number
+  parseMode?: 'auto' | 'inline' | 'embeddings' | 'prompt'
 }): Attachment {
  return {
    ...data,
--- a/yarn.lock
+++ b/yarn.lock
@@ -3532,7 +3532,7 @@ __metadata:
    "@radix-ui/react-tooltip": "npm:1.2.4"
    "@tabler/icons-react": "npm:3.34.0"
    "@tailwindcss/vite": "npm:4.1.4"
-    "@tanstack/react-router": "npm:1.117.0"
+    "@tanstack/react-router": "npm:^1.121.34"
    "@tanstack/react-router-devtools": "npm:1.121.34"
    "@tanstack/react-virtual": "npm:3.13.12"
    "@tanstack/router-plugin": "npm:1.117.0"
@@ -7081,6 +7081,13 @@ __metadata:
  languageName: node
  linkType: hard

+"@tanstack/history@npm:1.139.0":
+  version: 1.139.0
+  resolution: "@tanstack/history@npm:1.139.0"
+  checksum: 10c0/000fe41d3c3d7f0384e74fcfb1ecda25800906220925d1ab715e4fad7dab081e81c738238a3c09bfb1203ffd4ab0e1f24da1384ded322a154b9eba58405e3e90
+  languageName: node
+  linkType: hard
+
 "@tanstack/react-router-devtools@npm:1.121.34":
  version: 1.121.34
  resolution: "@tanstack/react-router-devtools@npm:1.121.34"
@@ -7094,33 +7101,33 @@ __metadata:
  languageName: node
  linkType: hard

-"@tanstack/react-router@npm:1.117.0":
-  version: 1.117.0
-  resolution: "@tanstack/react-router@npm:1.117.0"
+"@tanstack/react-router@npm:^1.121.34":
+  version: 1.139.12
+  resolution: "@tanstack/react-router@npm:1.139.12"
  dependencies:
-    "@tanstack/history": "npm:1.115.0"
-    "@tanstack/react-store": "npm:^0.7.0"
-    "@tanstack/router-core": "npm:1.117.0"
-    jsesc: "npm:^3.1.0"
+    "@tanstack/history": "npm:1.139.0"
+    "@tanstack/react-store": "npm:^0.8.0"
+    "@tanstack/router-core": "npm:1.139.12"
+    isbot: "npm:^5.1.22"
    tiny-invariant: "npm:^1.3.3"
    tiny-warning: "npm:^1.0.3"
  peerDependencies:
    react: ">=18.0.0 || >=19.0.0"
    react-dom: ">=18.0.0 || >=19.0.0"
-  checksum: 10c0/743d9ddcb57e48cc877e0fa2b1f7440891792c3866260dff70c76141f945c985499fa872b2a235fa775a4ca52ce74257afae9a7afa5228b8426e6eeb71c5e67c
+  checksum: 10c0/71d846684d902909b78ba8bf3156f4582bdbe7cfae2d1e9153516fa09f069a89ab60034a30eb2da7d145381e8867f12927743aceba92626c5eede991eba7a9c8
  languageName: node
  linkType: hard

-"@tanstack/react-store@npm:^0.7.0":
-  version: 0.7.5
-  resolution: "@tanstack/react-store@npm:0.7.5"
+"@tanstack/react-store@npm:^0.8.0":
+  version: 0.8.0
+  resolution: "@tanstack/react-store@npm:0.8.0"
  dependencies:
-    "@tanstack/store": "npm:0.7.5"
-    use-sync-external-store: "npm:^1.5.0"
+    "@tanstack/store": "npm:0.8.0"
+    use-sync-external-store: "npm:^1.6.0"
  peerDependencies:
    react: ^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0
    react-dom: ^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0
-  checksum: 10c0/a23e870f43d42bbe4797ff7cd5d576a5634491585162f61ce651edcf2a8145645f46fabb7dc36a939a872dc6d0d0507925e4ed3f7e6d6feec51e29e60a7809a1
+  checksum: 10c0/ecf7ad81d97810336d0a808a41442f235a444e98599c6e7e026efd3c4360548b84af9a23612f1d0da85e32a4d9e207632b2ee2cec6f635109a256209caa3bc59
  languageName: node
  linkType: hard

@@ -7136,7 +7143,22 @@ __metadata:
  languageName: node
  linkType: hard

-"@tanstack/router-core@npm:1.117.0, @tanstack/router-core@npm:^1.117.0":
+"@tanstack/router-core@npm:1.139.12":
+  version: 1.139.12
+  resolution: "@tanstack/router-core@npm:1.139.12"
+  dependencies:
+    "@tanstack/history": "npm:1.139.0"
+    "@tanstack/store": "npm:^0.8.0"
+    cookie-es: "npm:^2.0.0"
+    seroval: "npm:^1.4.0"
+    seroval-plugins: "npm:^1.4.0"
+    tiny-invariant: "npm:^1.3.3"
+    tiny-warning: "npm:^1.0.3"
+  checksum: 10c0/4823343c5bc8bdd17f7b5a2ff60bdceefa76cc78800721d0d46cee96b8de220ff48f3dfc3b7cc49a760b4e6a0243d561e84700e878906442ed139a7676bf6584
+  languageName: node
+  linkType: hard
+
+"@tanstack/router-core@npm:^1.117.0":
  version: 1.117.0
  resolution: "@tanstack/router-core@npm:1.117.0"
  dependencies:
@@ -7237,7 +7259,14 @@ __metadata:
  languageName: node
  linkType: hard

-"@tanstack/store@npm:0.7.5, @tanstack/store@npm:^0.7.0":
+"@tanstack/store@npm:0.8.0, @tanstack/store@npm:^0.8.0":
+  version: 0.8.0
+  resolution: "@tanstack/store@npm:0.8.0"
+  checksum: 10c0/71841a7a7653f744bdea457d2c41768b8d5e5aed1d5ff22bd068e28ced9bf658208c730963809c2223b26b753e19da987c0d98acb7c543abd97de14e0d58991f
+  languageName: node
+  linkType: hard
+
+"@tanstack/store@npm:^0.7.0":
  version: 0.7.5
  resolution: "@tanstack/store@npm:0.7.5"
  checksum: 10c0/69a83ce95db823d98e9949e6632781e819b7b01f9846f3c80a0ab4a318d974eee625ae7cab90ffeb02447c8e7108db2e7f78a82276a1b7dc3e40fac30ebf4917
@@ -9819,6 +9848,13 @@ __metadata:
  languageName: node
  linkType: hard

+"cookie-es@npm:^2.0.0":
+  version: 2.0.0
+  resolution: "cookie-es@npm:2.0.0"
+  checksum: 10c0/3b2459030a5ad2bc715aeb27a32f274340670bfc5031ac29e1fba804212517411bb617880d3fe66ace2b64dfb28f3049e2d1ff40d4bec342154ccdd124deaeaa
+  languageName: node
+  linkType: hard
+
 "cookie-signature@npm:^1.2.1":
  version: 1.2.2
  resolution: "cookie-signature@npm:1.2.2"
@@ -13617,6 +13653,13 @@ __metadata:
  languageName: node
  linkType: hard

+"isbot@npm:^5.1.22":
+  version: 5.1.32
+  resolution: "isbot@npm:5.1.32"
+  checksum: 10c0/e5aa9c5c92dae4879cf49956797c46ef77fa919230183cd6254628667ca5e22f15b24bc4d63b0e88cb96da3d7a51e33f847ef7114fa542e3e066f78178c8d97e
+  languageName: node
+  linkType: hard
+
 "isexe@npm:^2.0.0":
  version: 2.0.0
  resolution: "isexe@npm:2.0.0"
@@ -13942,7 +13985,7 @@ __metadata:
  languageName: node
  linkType: hard

-"jsesc@npm:^3.0.2, jsesc@npm:^3.1.0":
+"jsesc@npm:^3.0.2":
  version: 3.1.0
  resolution: "jsesc@npm:3.1.0"
  bin:
@@ -18296,6 +18339,15 @@ __metadata:
  languageName: node
  linkType: hard

+"seroval-plugins@npm:^1.4.0":
+  version: 1.4.0
+  resolution: "seroval-plugins@npm:1.4.0"
+  peerDependencies:
+    seroval: ^1.0
+  checksum: 10c0/d774b8a23bec45f1fefe314e38e26d2fffc0733ad50253a760a10f46cbb0be3a28ed9fcf60aadc0b3f1d2873f4118453a47e84145e858736944dbcd93b42437e
+  languageName: node
+  linkType: hard
+
 "seroval-plugins@npm:~1.3.0":
  version: 1.3.3
  resolution: "seroval-plugins@npm:1.3.3"
@@ -18305,6 +18357,13 @@ __metadata:
  languageName: node
  linkType: hard

+"seroval@npm:^1.4.0":
+  version: 1.4.0
+  resolution: "seroval@npm:1.4.0"
+  checksum: 10c0/020262db5572c16ae5d22ecefa089112a0b1b9a9c78229dbc9c6059c172ed7f0b5005c7990b80714ff8638ac86274195c2084537e0c2a9178690acacff4b705f
+  languageName: node
+  linkType: hard
+
 "seroval@npm:~1.3.0":
  version: 1.3.2
  resolution: "seroval@npm:1.3.2"
@@ -20249,6 +20308,15 @@ __metadata:
  languageName: node
  linkType: hard

+"use-sync-external-store@npm:^1.6.0":
+  version: 1.6.0
+  resolution: "use-sync-external-store@npm:1.6.0"
+  peerDependencies:
+    react: ^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0
+  checksum: 10c0/35e1179f872a53227bdf8a827f7911da4c37c0f4091c29b76b1e32473d1670ebe7bcd880b808b7549ba9a5605c233350f800ffab963ee4a4ee346ee983b6019b
+  languageName: node
+  linkType: hard
+
 "use@npm:^3.1.0":
  version: 3.1.1
  resolution: "use@npm:3.1.1"