project init

2025-12-31 11:32:36 +09:00
commit 9cb01f4abe
212 changed files with 64609 additions and 0 deletions
--- a/deepagents_sourcecode/.gitignore
+++ b/deepagents_sourcecode/.gitignore
@@ -0,0 +1,215 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+#poetry.toml
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+#pdm.lock
+#pdm.toml
+.pdm-python
+.pdm-build/
+
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+#pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+
+# Visual Studio Code
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore 
+#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#  and can be added to the global gitignore or merged into this file. However, if you prefer, 
+#  you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+
+# Ruff stuff:
+.ruff_cache/
+
+# PyPI configuration file
+.pypirc
+
+# Cursor
+#  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
+#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
+#  refer to https://docs.cursor.com/context/ignore-files
+.cursorignore
+.cursorindexingignore
+
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/
+
+# LangGraph
+.langgraph_api
+
+#claude
+.claude
+
+.idea
--- a/deepagents_sourcecode/LICENSE
+++ b/deepagents_sourcecode/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 Harrison Chase
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/deepagents_sourcecode/README.md
+++ b/deepagents_sourcecode/README.md
@@ -0,0 +1,320 @@
+# 🚀🧠 Deep Agents
+
+Agents can increasingly tackle long-horizon tasks, [with agent task length doubling every 7 months](https://metr.org/blog/2025-03-19-measuring-ai-ability-to-complete-long-tasks/)! But, long horizon tasks often span dozens of tool calls, which present cost and reliability challenges. Popular agents such as [Claude Code](https://code.claude.com/docs) and [Manus](https://www.youtube.com/watch?v=6_BcCthVvb8) use some common principles to address these challenges, including **planning** (prior to task execution), **computer access** (giving the agent access to a shell and a filesystem), and **sub-agent delegation** (isolated task execution). `deepagents` is a simple agent harness that implements these tools, but is open source and easily extendable with your own custom tools and instructions.
+
+<img src=".github/images/deepagents_banner.png" alt="deep agent" width="100%"/>
+
+## 📚 Resources
+
+- **[Documentation](https://docs.langchain.com/oss/python/deepagents/overview)** - Full overview and API reference
+- **[Korean Documentation](docs/DeepAgents_Documentation_KR.md)** - DeepAgents Technical Documentation (KR)
+- **[Quickstarts Repo](https://github.com/langchain-ai/deepagents-quickstarts)** - Examples and use-cases
+- **[CLI](libs/deepagents-cli/)** - Interactive command-line interface with skills, memory, and HITL workflows
+
+## 🚀 Quickstart
+
+You can give `deepagents` custom tools. Below, we'll optionally provide the `tavily` tool to search the web. This tool will be added to the `deepagents` build-in tools (see below).
+
+```bash
+pip install deepagents tavily-python
+```
+
+Set `TAVILY_API_KEY` in your environment ([get one here](https://www.tavily.com/)):
+
+```python
+import os
+from deepagents import create_deep_agent
+from tavily import TavilyClient
+
+tavily_client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])
+
+def internet_search(query: str, max_results: int = 5):
+    """Run a web search"""
+    return tavily_client.search(query, max_results=max_results)
+
+agent = create_deep_agent(
+    tools=[internet_search],
+    system_prompt="Conduct research and write a polished report.",
+)
+
+result = agent.invoke({"messages": [{"role": "user", "content": "What is LangGraph?"}]})
+```
+
+The agent created with `create_deep_agent` is compiled [LangGraph StateGraph](https://docs.langchain.com/oss/python/langgraph/overview), so it can be used with streaming, human-in-the-loop, memory, or Studio just like any LangGraph agent. See our [quickstarts repo](https://github.com/langchain-ai/deepagents-quickstarts) for more examples.
+
+## Customizing Deep Agents
+
+There are several parameters you can pass to `create_deep_agent`.
+
+### `model`
+
+By default, `deepagents` uses `"claude-sonnet-4-5-20250929"`. You can customize this by passing any [LangChain model object](https://python.langchain.com/docs/integrations/chat/).
+
+```python
+from langchain.chat_models import init_chat_model
+from deepagents import create_deep_agent
+
+model = init_chat_model("openai:gpt-4o")
+agent = create_deep_agent(
+    model=model,
+)
+```
+
+### `system_prompt`
+
+You can provide a `system_prompt` parameter to `create_deep_agent()`. This custom prompt is **appended to** default instructions that are automatically injected by middleware.
+
+When writing a custom system prompt, you should:
+
+- ✅ Define domain-specific workflows (e.g., research methodology, data analysis steps)
+- ✅ Provide concrete examples for your use case
+- ✅ Add specialized guidance (e.g., "batch similar research tasks into a single TODO")
+- ✅ Define stopping criteria and resource limits
+- ✅ Explain how tools work together in your workflow
+
+**Don't:**
+
+- ❌ Re-explain what standard tools do (already covered by middleware)
+- ❌ Duplicate middleware instructions about tool usage
+- ❌ Contradict default instructions (work with them, not against them)
+
+```python
+from deepagents import create_deep_agent
+research_instructions = """your custom system prompt"""
+agent = create_deep_agent(
+    system_prompt=research_instructions,
+)
+```
+
+See our [quickstarts repo](https://github.com/langchain-ai/deepagents-quickstarts) for more examples.
+
+### `tools`
+
+Provide custom tools to your agent (in addition to [Built-in Tools](#built-in-tools)):
+
+```python
+from deepagents import create_deep_agent
+
+def internet_search(query: str) -> str:
+    """Run a web search"""
+    return tavily_client.search(query)
+
+agent = create_deep_agent(tools=[internet_search])
+```
+
+You can also connect MCP tools via [langchain-mcp-adapters](https://github.com/langchain-ai/langchain-mcp-adapters):
+
+```python
+from langchain_mcp_adapters.client import MultiServerMCPClient
+from deepagents import create_deep_agent
+
+async def main():
+    mcp_client = MultiServerMCPClient(...)
+    mcp_tools = await mcp_client.get_tools()
+    agent = create_deep_agent(tools=mcp_tools)
+
+    async for chunk in agent.astream({"messages": [{"role": "user", "content": "..."}]}):
+        chunk["messages"][-1].pretty_print()
+```
+
+### `middleware`
+
+Deep agents use [middleware](https://docs.langchain.com/oss/python/langchain/middleware) for extensibility (see [Built-in Tools](#built-in-tools) for defaults). Add custom middleware to inject tools, modify prompts, or hook into the agent lifecycle:
+
+```python
+from langchain_core.tools import tool
+from deepagents import create_deep_agent
+from langchain.agents.middleware import AgentMiddleware
+
+@tool
+def get_weather(city: str) -> str:
+    """Get the weather in a city."""
+    return f"The weather in {city} is sunny."
+
+class WeatherMiddleware(AgentMiddleware):
+    tools = [get_weather]
+
+agent = create_deep_agent(middleware=[WeatherMiddleware()])
+```
+
+### `subagents`
+
+The main agent can delegate work to sub-agents via the `task` tool (see [Built-in Tools](#built-in-tools)). You can supply custom sub-agents for context isolation and custom instructions:
+
+```python
+from deepagents import create_deep_agent
+
+research_subagent = {
+    "name": "research-agent",
+    "description": "Used to research in-depth questions",
+    "system_prompt": "You are an expert researcher",
+    "tools": [internet_search],
+    "model": "openai:gpt-4o",  # Optional, defaults to main agent model
+}
+
+agent = create_deep_agent(subagents=[research_subagent])
+```
+
+For complex cases, pass a pre-built LangGraph graph:
+
+```python
+from deepagents import CompiledSubAgent, create_deep_agent
+
+custom_graph = create_agent(model=..., tools=..., system_prompt=...)
+
+agent = create_deep_agent(
+    subagents=[CompiledSubAgent(
+        name="data-analyzer",
+        description="Specialized agent for data analysis",
+        runnable=custom_graph
+    )]
+)
+```
+
+See the [subagents documentation](https://docs.langchain.com/oss/python/deepagents/subagents) for more details.
+
+### `interrupt_on`
+
+Some tools may be sensitive and require human approval before execution. Deepagents supports human-in-the-loop workflows through LangGraph’s interrupt capabilities. You can configure which tools require approval using a checkpointer.
+
+These tool configs are passed to our prebuilt [HITL middleware](https://docs.langchain.com/oss/python/langchain/middleware#human-in-the-loop) so that the agent pauses execution and waits for feedback from the user before executing configured tools.
+
+```python
+from langchain_core.tools import tool
+from deepagents import create_deep_agent
+
+@tool
+def get_weather(city: str) -> str:
+    """Get the weather in a city."""
+    return f"The weather in {city} is sunny."
+
+agent = create_deep_agent(
+    model="anthropic:claude-sonnet-4-20250514",
+    tools=[get_weather],
+    interrupt_on={
+        "get_weather": {
+            "allowed_decisions": ["approve", "edit", "reject"]
+        },
+    }
+)
+```
+
+See the [human-in-the-loop documentation](https://docs.langchain.com/oss/python/deepagents/human-in-the-loop) for more details.
+
+### `backend`
+
+Deep agents use pluggable backends to control how filesystem operations work. By default, files are stored in the agent's ephemeral state. You can configure different backends for local disk access, persistent cross-conversation storage, or hybrid routing.
+
+```python
+from deepagents import create_deep_agent
+from deepagents.backends import FilesystemBackend
+
+agent = create_deep_agent(
+    backend=FilesystemBackend(root_dir="/path/to/project"),
+)
+```
+
+Available backends include:
+
+- **StateBackend** (default): Ephemeral files stored in agent state
+- **FilesystemBackend**: Real disk operations under a root directory
+- **StoreBackend**: Persistent storage using LangGraph Store
+- **CompositeBackend**: Route different paths to different backends
+
+See the [backends documentation](https://docs.langchain.com/oss/python/deepagents/backends) for more details.
+
+### Long-term Memory
+
+Deep agents can maintain persistent memory across conversations using a `CompositeBackend` that routes specific paths to durable storage.
+
+This enables hybrid memory where working files remain ephemeral while important data (like user preferences or knowledge bases) persists across threads.
+
+```python
+from deepagents import create_deep_agent
+from deepagents.backends import CompositeBackend, StateBackend, StoreBackend
+from langgraph.store.memory import InMemoryStore
+
+agent = create_deep_agent(
+    backend=CompositeBackend(
+        default=StateBackend(),
+        routes={"/memories/": StoreBackend(store=InMemoryStore())},
+    ),
+)
+```
+
+Files under `/memories/` will persist across all conversations, while other paths remain temporary. Use cases include:
+
+- Preserving user preferences across sessions
+- Building knowledge bases from multiple conversations
+- Self-improving instructions based on feedback
+- Maintaining research progress across sessions
+
+See the [long-term memory documentation](https://docs.langchain.com/oss/python/deepagents/long-term-memory) for more details.
+
+## Built-in Tools
+
+<img src=".github/images/deepagents_tools.png" alt="deep agent" width="600"/>
+
+Every deep agent created with `create_deep_agent` comes with a standard set of tools:
+
+| Tool Name | Description | Provided By |
+|-----------|-------------|-------------|
+| `write_todos` | Create and manage structured task lists for tracking progress through complex workflows | TodoListMiddleware |
+| `read_todos` | Read the current todo list state | TodoListMiddleware |
+| `ls` | List all files in a directory (requires absolute path) | FilesystemMiddleware |
+| `read_file` | Read content from a file with optional pagination (offset/limit parameters) | FilesystemMiddleware |
+| `write_file` | Create a new file or completely overwrite an existing file | FilesystemMiddleware |
+| `edit_file` | Perform exact string replacements in files | FilesystemMiddleware |
+| `glob` | Find files matching a pattern (e.g., `**/*.py`) | FilesystemMiddleware |
+| `grep` | Search for text patterns within files | FilesystemMiddleware |
+| `execute`* | Run shell commands in a sandboxed environment | FilesystemMiddleware |
+| `task` | Delegate tasks to specialized sub-agents with isolated context windows | SubAgentMiddleware |
+
+The `execute` tool is only available if the backend implements `SandboxBackendProtocol`. By default, it uses the in-memory state backend which does not support command execution. As shown, these tools (along with other capabilities) are provided by default middleware:
+
+See the [agent harness documentation](https://docs.langchain.com/oss/python/deepagents/harness) for more details on built-in tools and capabilities.
+
+## Built-in Middleware
+
+`deepagents` uses middleware under the hood. Here is the list of the middleware used.
+
+| Middleware | Purpose |
+|------------|---------|
+| **TodoListMiddleware** | Task planning and progress tracking |
+| **FilesystemMiddleware** | File operations and context offloading (auto-saves large results) |
+| **SubAgentMiddleware** | Delegate tasks to isolated sub-agents |
+| **SummarizationMiddleware** | Auto-summarizes when context exceeds 170k tokens |
+| **AnthropicPromptCachingMiddleware** | Caches system prompts to reduce costs (Anthropic only) |
+| **PatchToolCallsMiddleware** | Fixes dangling tool calls from interruptions |
+| **HumanInTheLoopMiddleware** | Pauses execution for human approval (requires `interrupt_on` config) |
+
+## Built-in prompts
+
+The middleware automatically adds instructions about the standard tools. Your custom instructions should **complement, not duplicate** these defaults:
+
+#### From [TodoListMiddleware](https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/agents/middleware/todo.py)
+
+- Explains when to use `write_todos` and `read_todos`
+- Guidance on marking tasks completed
+- Best practices for todo list management
+- When NOT to use todos (simple tasks)
+
+#### From [FilesystemMiddleware](libs/deepagents/deepagents/middleware/filesystem.py)
+
+- Lists all filesystem tools (`ls`, `read_file`, `write_file`, `edit_file`, `glob`, `grep`, `execute`*)
+- Explains that file paths must start with `/`
+- Describes each tool's purpose and parameters
+- Notes about context offloading for large tool results
+
+#### From [SubAgentMiddleware](libs/deepagents/deepagents/middleware/subagents.py)
+
+- Explains the `task()` tool for delegating to sub-agents
+- When to use sub-agents vs when NOT to use them
+- Guidance on parallel execution
+- Subagent lifecycle (spawn → run → return → reconcile)
+
+## Security Considerations
+
+### Trust Model
+
+Deepagents follows a "trust the LLM" model similar to Claude Code. The agent can perform any action the underlying tools allow. Security boundaries should be enforced at the tool/sandbox level, not by expecting the LLM to self-police.
--- a/deepagents_sourcecode/libs/acp/Makefile
+++ b/deepagents_sourcecode/libs/acp/Makefile
@@ -0,0 +1,56 @@
+.PHONY: all lint format test help
+
+# Default target executed when no arguments are given to make.
+all: help
+
+######################
+# TESTING AND COVERAGE
+######################
+
+# Define a variable for the test file path.
+TEST_FILE ?= tests/
+
+test:
+	uv run pytest --disable-socket --allow-unix-socket $(TEST_FILE) --timeout 10
+
+test_watch:
+	uv run ptw . -- $(TEST_FILE)
+
+toad:
+	uv run toad acp "deepacp"
+
+
+######################
+# LINTING AND FORMATTING
+######################
+
+# Define a variable for Python and notebook files.
+lint format: PYTHON_FILES=deepagents_acp/ tests/
+lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=. --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')
+
+lint lint_diff:
+	[ "$(PYTHON_FILES)" = "" ] ||	uv run ruff format $(PYTHON_FILES) --diff
+	[ "$(PYTHON_FILES)" = "" ] ||	uv run ruff check $(PYTHON_FILES) --diff
+	# [ "$(PYTHON_FILES)" = "" ] || uv run mypy $(PYTHON_FILES)
+
+format format_diff:
+	[ "$(PYTHON_FILES)" = "" ] || uv run ruff format $(PYTHON_FILES)
+	[ "$(PYTHON_FILES)" = "" ] || uv run ruff check --fix $(PYTHON_FILES)
+
+	
+
+######################
+# HELP
+######################
+
+help:
+	@echo '===================='
+	@echo '-- LINTING --'
+	@echo 'format                       - run code formatters'
+	@echo 'lint                         - run linters'
+	@echo '-- TESTS --'
+	@echo 'test                         - run unit tests'
+	@echo 'test TEST_FILE=<test_file>   - run all tests in file'
+	@echo '-- DOCUMENTATION tasks are from the top-level Makefile --'
+
+
--- a/deepagents_sourcecode/libs/acp/README.md
+++ b/deepagents_sourcecode/libs/acp/README.md
@@ -0,0 +1,3 @@
+# ACP
+
+Work in progress support for Agent Client Protocol
--- a/deepagents_sourcecode/libs/acp/deepagents_acp/init.py
+++ b/deepagents_sourcecode/libs/acp/deepagents_acp/init.py
--- a/deepagents_sourcecode/libs/acp/deepagents_acp/server.py
+++ b/deepagents_sourcecode/libs/acp/deepagents_acp/server.py
@@ -0,0 +1,655 @@
+"""DeepAgents ACP server implementation."""
+
+from __future__ import annotations
+
+import asyncio
+import uuid
+from typing import Any, Literal
+
+from acp import (
+    Agent,
+    AgentSideConnection,
+    PROTOCOL_VERSION,
+    stdio_streams,
+)
+from acp.schema import (
+    AgentMessageChunk,
+    InitializeRequest,
+    InitializeResponse,
+    NewSessionRequest,
+    NewSessionResponse,
+    PromptRequest,
+    PromptResponse,
+    SessionNotification,
+    TextContentBlock,
+    Implementation,
+    AgentThoughtChunk,
+    ToolCallProgress,
+    ContentToolCallContent,
+    LoadSessionResponse,
+    SetSessionModeResponse,
+    SetSessionModelResponse,
+    CancelNotification,
+    LoadSessionRequest,
+    SetSessionModeRequest,
+    SetSessionModelRequest,
+    AgentPlanUpdate,
+    PlanEntry,
+    PermissionOption,
+    RequestPermissionRequest,
+    AllowedOutcome,
+    DeniedOutcome,
+    ToolCall as ACPToolCall,
+)
+from deepagents import create_deep_agent
+from langchain_anthropic import ChatAnthropic
+from langchain_core.messages import AIMessage, AIMessageChunk, ToolMessage
+from langchain_core.messages.content import ToolCall
+from langchain_core.tools import tool
+from langgraph.checkpoint.memory import InMemorySaver
+from langgraph.graph.state import CompiledStateGraph
+from langgraph.types import Command, Interrupt
+
+
+class DeepagentsACP(Agent):
+    """ACP Agent implementation wrapping deepagents."""
+
+    def __init__(
+        self,
+        connection: AgentSideConnection,
+        agent_graph: CompiledStateGraph,
+    ) -> None:
+        """Initialize the DeepAgents agent.
+
+        Args:
+            connection: The ACP connection for communicating with the client
+            agent_graph: A compiled LangGraph StateGraph (output of create_deep_agent)
+        """
+        self._connection = connection
+        self._agent_graph = agent_graph
+        self._sessions: dict[str, dict[str, Any]] = {}
+        # Track tool calls by ID for matching with ToolMessages
+        # Maps tool_call_id -> ToolCall TypedDict
+        self._tool_calls: dict[str, ToolCall] = {}
+
+    async def initialize(
+        self,
+        params: InitializeRequest,
+    ) -> InitializeResponse:
+        """Initialize the agent and return capabilities."""
+        return InitializeResponse(
+            protocolVersion=PROTOCOL_VERSION,
+            agentInfo=Implementation(
+                name="DeepAgents ACP Server",
+                version="0.1.0",
+                title="DeepAgents ACP Server",
+            ),
+        )
+
+    async def newSession(
+        self,
+        params: NewSessionRequest,
+    ) -> NewSessionResponse:
+        """Create a new session with a deepagents instance."""
+        session_id = str(uuid.uuid4())
+        # Store session state with the shared agent graph
+        self._sessions[session_id] = {
+            "agent": self._agent_graph,
+            "thread_id": str(uuid.uuid4()),
+        }
+
+        return NewSessionResponse(sessionId=session_id)
+
+    async def _handle_ai_message_chunk(
+        self,
+        params: PromptRequest,
+        message: AIMessageChunk,
+    ) -> None:
+        """Handle an AIMessageChunk and send appropriate notifications.
+
+        Args:
+            params: The prompt request parameters
+            message: An AIMessageChunk from the streaming response
+
+        Note:
+            According to LangChain's content block types, message.content_blocks
+            returns a list of ContentBlock unions. Each block is a TypedDict with
+            a "type" field that discriminates the block type:
+            - TextContentBlock: type="text", has "text" field
+            - ReasoningContentBlock: type="reasoning", has "reasoning" field
+            - ToolCallChunk: type="tool_call_chunk"
+            - And many others (image, audio, video, etc.)
+        """
+        for block in message.content_blocks:
+            # All content blocks have a "type" field for discrimination
+            block_type = block.get("type")
+
+            if block_type == "text":
+                # TextContentBlock has a required "text" field
+                text = block.get("text", "")
+                if not text:  # Only yield non-empty text
+                    continue
+                await self._connection.sessionUpdate(
+                    SessionNotification(
+                        update=AgentMessageChunk(
+                            content=TextContentBlock(text=text, type="text"),
+                            sessionUpdate="agent_message_chunk",
+                        ),
+                        sessionId=params.sessionId,
+                    )
+                )
+            elif block_type == "reasoning":
+                # ReasoningContentBlock has a "reasoning" field (NotRequired)
+                reasoning = block.get("reasoning", "")
+                if not reasoning:
+                    continue
+
+                await self._connection.sessionUpdate(
+                    SessionNotification(
+                        update=AgentThoughtChunk(
+                            content=TextContentBlock(text=reasoning, type="text"),
+                            sessionUpdate="agent_thought_chunk",
+                        ),
+                        sessionId=params.sessionId,
+                    )
+                )
+
+    async def _handle_completed_tool_calls(
+        self,
+        params: PromptRequest,
+        message: AIMessage,
+    ) -> None:
+        """Handle completed tool calls from an AIMessage and send notifications.
+
+        Args:
+            params: The prompt request parameters
+            message: An AIMessage containing tool_calls
+
+        Note:
+            According to LangChain's AIMessage type:
+            - message.tool_calls: list[ToolCall] where ToolCall is a TypedDict with:
+              - name: str (required)
+              - args: dict[str, Any] (required)
+              - id: str | None (required field, but can be None)
+              - type: Literal["tool_call"] (optional, NotRequired)
+        """
+        # Use direct attribute access - tool_calls is a defined field on AIMessage
+        if not message.tool_calls:
+            return
+
+        for tool_call in message.tool_calls:
+            # Access TypedDict fields directly (they're required fields)
+            tool_call_id = tool_call["id"]  # str | None
+            tool_name = tool_call["name"]  # str
+            tool_args = tool_call["args"]  # dict[str, Any]
+
+            # Skip tool calls without an ID (shouldn't happen in practice)
+            if tool_call_id is None:
+                continue
+
+            # Skip todo tool calls as they're handled separately
+            if tool_name == "todo":
+                raise NotImplementedError("TODO tool call handling not implemented yet")
+
+            # Send tool call progress update showing the tool is running
+            await self._connection.sessionUpdate(
+                SessionNotification(
+                    update=ToolCallProgress(
+                        sessionUpdate="tool_call_update",
+                        toolCallId=tool_call_id,
+                        title=tool_name,
+                        rawInput=tool_args,
+                        status="pending",
+                    ),
+                    sessionId=params.sessionId,
+                )
+            )
+
+            # Store the tool call for later matching with ToolMessage
+            self._tool_calls[tool_call_id] = tool_call
+
+    async def _handle_tool_message(
+        self,
+        params: PromptRequest,
+        tool_call: ToolCall,
+        message: ToolMessage,
+    ) -> None:
+        """Handle a ToolMessage and send appropriate notifications.
+
+        Args:
+            params: The prompt request parameters
+            tool_call: The original ToolCall that this message is responding to
+            message: A ToolMessage containing the tool execution result
+
+        Note:
+            According to LangChain's ToolMessage type (inherits from BaseMessage):
+            - message.content: str | list[str | dict] (from BaseMessage)
+            - message.tool_call_id: str (specific to ToolMessage)
+            - message.status: str | None (e.g., "error" for failed tool calls)
+        """
+        # Determine status based on message status or content
+        status: Literal["completed", "failed"] = "completed"
+        if hasattr(message, "status") and message.status == "error":
+            status = "failed"
+
+        # Build content blocks if message has content
+        content_blocks = []
+        for content_block in message.content_blocks:
+            if content_block.get("type") == "text":
+                text = content_block.get("text", "")
+                if text:
+                    content_blocks.append(
+                        ContentToolCallContent(
+                            type="content",
+                            content=TextContentBlock(text=text, type="text"),
+                        )
+                    )
+        # Send tool call progress update with the result
+        await self._connection.sessionUpdate(
+            SessionNotification(
+                update=ToolCallProgress(
+                    sessionUpdate="tool_call_update",
+                    toolCallId=message.tool_call_id,
+                    title=tool_call["name"],
+                    content=content_blocks,
+                    rawOutput=message.content,
+                    status=status,
+                ),
+                sessionId=params.sessionId,
+            )
+        )
+
+    async def _handle_todo_update(
+        self,
+        params: PromptRequest,
+        todos: list[dict[str, Any]],
+    ) -> None:
+        """Handle todo list updates from the tools node.
+
+        Args:
+            params: The prompt request parameters
+            todos: List of todo dictionaries with 'content' and 'status' fields
+
+        Note:
+            Todos come from the deepagents graph's write_todos tool and have the structure:
+            [{'content': 'Task description', 'status': 'pending'|'in_progress'|'completed'}, ...]
+        """
+        # Convert todos to PlanEntry objects
+        entries = []
+        for todo in todos:
+            # Extract fields from todo dict
+            content = todo.get("content", "")
+            status = todo.get("status", "pending")
+
+            # Validate and cast status to PlanEntryStatus
+            if status not in ("pending", "in_progress", "completed"):
+                status = "pending"
+
+            # Create PlanEntry with default priority of "medium"
+            entry = PlanEntry(
+                content=content,
+                status=status,  # type: ignore
+                priority="medium",
+            )
+            entries.append(entry)
+
+        # Send plan update notification
+        await self._connection.sessionUpdate(
+            SessionNotification(
+                update=AgentPlanUpdate(
+                    sessionUpdate="plan",
+                    entries=entries,
+                ),
+                sessionId=params.sessionId,
+            )
+        )
+
+    async def _handle_interrupt(
+        self,
+        params: PromptRequest,
+        interrupt: Interrupt,
+    ) -> list[dict[str, Any]]:
+        """Handle a LangGraph interrupt and request permission from the client.
+
+        Args:
+            params: The prompt request parameters
+            interrupt: The interrupt from LangGraph containing action_requests and review_configs
+
+        Returns:
+            List of decisions to pass to Command(resume={...})
+
+        Note:
+            The interrupt.value contains:
+            - action_requests: [{'name': str, 'args': dict, 'description': str}, ...]
+            - review_configs: [{'action_name': str, 'allowed_decisions': list[str]}, ...]
+        """
+        interrupt_data = interrupt.value
+        action_requests = interrupt_data.get("action_requests", [])
+        review_configs = interrupt_data.get("review_configs", [])
+
+        # Create a mapping of action names to their allowed decisions
+        allowed_decisions_map = {}
+        for review_config in review_configs:
+            action_name = review_config.get("action_name")
+            allowed_decisions = review_config.get("allowed_decisions", [])
+            allowed_decisions_map[action_name] = allowed_decisions
+
+        # Collect decisions for all action requests
+        decisions = []
+
+        for action_request in action_requests:
+            tool_name = action_request.get("name")
+            tool_args = action_request.get("args", {})
+
+            # Get allowed decisions for this action
+            allowed_decisions = allowed_decisions_map.get(
+                tool_name, ["approve", "reject"]
+            )
+
+            # Build permission options based on allowed decisions
+            options = []
+            if "approve" in allowed_decisions:
+                options.append(
+                    PermissionOption(
+                        optionId="allow-once",
+                        name="Allow once",
+                        kind="allow_once",
+                    )
+                )
+            if "reject" in allowed_decisions:
+                options.append(
+                    PermissionOption(
+                        optionId="reject-once",
+                        name="Reject",
+                        kind="reject_once",
+                    )
+                )
+            # Generate a tool call ID for this permission request
+            # We need to find the corresponding tool call from the stored calls
+            # For now, use a generated ID
+            tool_call_id = f"perm_{uuid.uuid4().hex[:8]}"
+
+            # Create ACP ToolCall object for the permission request
+            acp_tool_call = ACPToolCall(
+                toolCallId=tool_call_id,
+                title=tool_name,
+                rawInput=tool_args,
+                status="pending",
+            )
+
+            # Send permission request to client
+            response = await self._connection.requestPermission(
+                RequestPermissionRequest(
+                    sessionId=params.sessionId,
+                    toolCall=acp_tool_call,
+                    options=options,
+                )
+            )
+
+            # Convert ACP response to LangGraph decision
+            outcome = response.outcome
+
+            if isinstance(outcome, AllowedOutcome):
+                option_id = outcome.optionId
+                if option_id == "allow-once":
+                    # Check if this was actually an edit option
+                    selected_option = next(
+                        (opt for opt in options if opt.optionId == option_id), None
+                    )
+                    if selected_option and selected_option.field_meta:
+                        # This is an edit - for now, just approve
+                        # TODO: Implement actual edit functionality
+                        decisions.append({"type": "approve"})
+                    else:
+                        decisions.append({"type": "approve"})
+                elif option_id == "edit":
+                    # Edit option - for now, just approve
+                    # TODO: Implement actual edit functionality to collect edited args
+                    decisions.append({"type": "approve"})
+            elif isinstance(outcome, DeniedOutcome):
+                decisions.append(
+                    {
+                        "type": "reject",
+                        "message": "Action rejected by user",
+                    }
+                )
+
+        return decisions
+
+    async def _stream_and_handle_updates(
+        self,
+        params: PromptRequest,
+        agent: Any,
+        stream_input: dict[str, Any] | Command,
+        config: dict[str, Any],
+    ) -> list[Interrupt]:
+        """Stream agent execution and handle updates, returning any interrupts.
+
+        Args:
+            params: The prompt request parameters
+            agent: The agent to stream from
+            stream_input: Input to pass to agent.astream (initial message or Command)
+            config: Configuration with thread_id
+
+        Returns:
+            List of interrupts that occurred during streaming
+        """
+        interrupts = []
+
+        async for stream_mode, data in agent.astream(
+            stream_input,
+            config=config,
+            stream_mode=["messages", "updates"],
+        ):
+            if stream_mode == "messages":
+                # Handle streaming message chunks (AIMessageChunk)
+                message, metadata = data
+                if isinstance(message, AIMessageChunk):
+                    await self._handle_ai_message_chunk(params, message)
+            elif stream_mode == "updates":
+                # Handle completed node updates
+                for node_name, update in data.items():
+                    # Check for interrupts
+                    if node_name == "__interrupt__":
+                        # Extract interrupts from the update
+                        interrupts.extend(update)
+                        continue
+
+                    # Only process model and tools nodes
+                    if node_name not in ("model", "tools"):
+                        continue
+
+                    # Handle todos from tools node
+                    if node_name == "tools" and "todos" in update:
+                        todos = update.get("todos", [])
+                        if todos:
+                            await self._handle_todo_update(params, todos)
+
+                    # Get messages from the update
+                    messages = update.get("messages", [])
+                    if not messages:
+                        continue
+
+                    # Process the last message from this node
+                    last_message = messages[-1]
+
+                    # Handle completed AI messages from model node
+                    if node_name == "model" and isinstance(last_message, AIMessage):
+                        # Check if this AIMessage has tool calls
+                        if last_message.tool_calls:
+                            await self._handle_completed_tool_calls(
+                                params, last_message
+                            )
+
+                    # Handle tool execution results from tools node
+                    elif node_name == "tools" and isinstance(last_message, ToolMessage):
+                        # Look up the original tool call by ID
+                        tool_call = self._tool_calls.get(last_message.tool_call_id)
+                        if tool_call:
+                            await self._handle_tool_message(
+                                params, tool_call, last_message
+                            )
+
+        return interrupts
+
+    async def prompt(
+        self,
+        params: PromptRequest,
+    ) -> PromptResponse:
+        """Handle a user prompt and stream responses."""
+        session_id = params.sessionId
+        session = self._sessions.get(session_id)
+
+        # Extract text from prompt content blocks
+        prompt_text = ""
+        for block in params.prompt:
+            if hasattr(block, "text"):
+                prompt_text += block.text
+            elif isinstance(block, dict) and "text" in block:
+                prompt_text += block["text"]
+
+        # Stream the agent's response
+        agent = session["agent"]
+        thread_id = session["thread_id"]
+        config = {"configurable": {"thread_id": thread_id}}
+
+        # Start with the initial user message
+        stream_input: dict[str, Any] | Command = {
+            "messages": [{"role": "user", "content": prompt_text}]
+        }
+
+        # Loop until there are no more interrupts
+        while True:
+            # Stream and collect any interrupts
+            interrupts = await self._stream_and_handle_updates(
+                params, agent, stream_input, config
+            )
+
+            # If no interrupts, we're done
+            if not interrupts:
+                break
+
+            # Process each interrupt and collect decisions
+            all_decisions = []
+            for interrupt in interrupts:
+                decisions = await self._handle_interrupt(params, interrupt)
+                all_decisions.extend(decisions)
+
+            # Prepare to resume with the collected decisions
+            stream_input = Command(resume={"decisions": all_decisions})
+
+        return PromptResponse(stopReason="end_turn")
+
+    async def authenticate(self, params: Any) -> Any | None:
+        """Authenticate (optional)."""
+        # Authentication not required for now
+        return None
+
+    async def extMethod(self, method: str, params: dict[str, Any]) -> dict[str, Any]:
+        """Handle extension methods (optional)."""
+        raise NotImplementedError(f"Extension method {method} not supported")
+
+    async def extNotification(self, method: str, params: dict[str, Any]) -> None:
+        """Handle extension notifications (optional)."""
+        pass
+
+    async def cancel(self, params: CancelNotification) -> None:
+        """Cancel a running session."""
+        # TODO: Implement cancellation logic
+        pass
+
+    async def loadSession(
+        self,
+        params: LoadSessionRequest,
+    ) -> LoadSessionResponse | None:
+        """Load an existing session (optional)."""
+        # Not implemented yet - would need to serialize/deserialize session state
+        return None
+
+    async def setSessionMode(
+        self,
+        params: SetSessionModeRequest,
+    ) -> SetSessionModeResponse | None:
+        """Set session mode (optional)."""
+        # Could be used to switch between different agent modes
+        return None
+
+    async def setSessionModel(
+        self,
+        params: SetSessionModelRequest,
+    ) -> SetSessionModelResponse | None:
+        """Set session model (optional)."""
+        # Not supported - model is configured at agent graph creation time
+        return None
+
+
+async def main() -> None:
+    """Main entry point for running the ACP server."""
+    # from deepagents_cli.agent import create_agent_with_config
+    # from deepagents_cli.config import create_model
+    # from deepagents_cli.tools import fetch_url, http_request, web_search
+    #
+    # # Create model using CLI configuration
+    # model = create_model()
+    #
+    # # Setup tools - conditionally include web_search if Tavily is available
+    # tools = [http_request, fetch_url]
+    # if os.environ.get("TAVILY_API_KEY"):
+    #     tools.append(web_search)
+    #
+    # # Create CLI agent with shell access and other CLI features
+    # # Using default assistant_id "agent" for ACP server
+    # agent_graph, composite_backend = create_agent_with_config(
+    #     model=model,
+    #     assistant_id="agent",
+    #     tools=tools,
+    #     sandbox=None,  # Local mode
+    #     sandbox_type=None,
+    #     system_prompt=None,  # Use default CLI system prompt
+    #     auto_approve=False,  # Require user approval for destructive operations
+    #     enable_memory=True,  # Enable persistent memory
+    #     enable_skills=True,  # Enable custom skills
+    #     enable_shell=True,  # Enable shell access
+    # )
+    #
+    # Define default tools
+
+    from langchain.agents.middleware import HumanInTheLoopMiddleware
+
+    @tool()
+    def get_weather(location: str) -> str:
+        """Get the weather for a given location."""
+        return f"The weather in {location} is sunny with a high of 75°F."
+
+    # Create the agent graph with default configuration
+    model = ChatAnthropic(
+        model_name="claude-sonnet-4-5-20250929",
+        max_tokens=20000,
+    )
+
+    agent_graph = create_deep_agent(
+        model=model,
+        tools=[get_weather],
+        checkpointer=InMemorySaver(),
+        middleware=[
+            HumanInTheLoopMiddleware(
+                interrupt_on={
+                    "get_weather": True,
+                }
+            )
+        ],
+    )
+
+    # Start the ACP server
+    reader, writer = await stdio_streams()
+    AgentSideConnection(lambda conn: DeepagentsACP(conn, agent_graph), writer, reader)
+    await asyncio.Event().wait()
+
+
+def cli_main() -> None:
+    """Synchronous CLI entry point for the ACP server."""
+    asyncio.run(main())
+
+
+if __name__ == "__main__":
+    cli_main()
--- a/deepagents_sourcecode/libs/acp/pyproject.toml
+++ b/deepagents_sourcecode/libs/acp/pyproject.toml
@@ -0,0 +1,58 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "deepagents-acp"
+version = "0.0.1"
+description = "Agent Client Protocol integration for DeepAgents"
+readme = "README.md"
+requires-python = ">=3.14"
+license = {text = "MIT"}
+authors = [
+]
+maintainers = [
+]
+keywords = ["agent", "acp", "agent-client-protocol", "deepagents", "ai-agents"]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.13",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+]
+dependencies = [
+    "agent-client-protocol>=0.6.2",
+    "deepagents",
+    "deepagents-cli",
+]
+
+[dependency-groups]
+dev = [
+    "batrachian-toad>=0.5.2",
+]
+test = [
+    "pytest>=8.3.4",
+    "pytest-asyncio>=0.25.3",
+    "pytest-cov>=6.0.0",
+    "pytest-mock>=3.14.0",
+    "pytest-socket>=0.7.0",
+    "pytest-timeout>=2.3.1",
+    "ruff>=0.9.7",
+    "dirty-equals>=0.11",
+]
+
+
+
+[project.urls]
+Homepage = "https://github.com/langchain-ai/deepagents"
+Repository = "https://github.com/langchain-ai/deepagents"
+Issues = "https://github.com/langchain-ai/deepagents/issues"
+
+[project.scripts]
+deepacp = "deepagents_acp.server:cli_main"
+
+
+[tool.pytest.ini_options]
+asyncio_mode = "auto" # or "strict"
--- a/deepagents_sourcecode/libs/acp/tests/init.py
+++ b/deepagents_sourcecode/libs/acp/tests/init.py
--- a/deepagents_sourcecode/libs/acp/tests/chat_model.py
+++ b/deepagents_sourcecode/libs/acp/tests/chat_model.py
@@ -0,0 +1,231 @@
+"""Fake chat models for testing purposes."""
+
+import re
+from collections.abc import Callable, Iterator, Sequence
+from typing import Any, Literal, cast
+
+from typing_extensions import override
+
+from langchain_core.callbacks import CallbackManagerForLLMRun
+from langchain_core.language_models import LanguageModelInput
+from langchain_core.language_models.chat_models import BaseChatModel
+from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage
+from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
+from langchain_core.runnables import Runnable
+from langchain_core.tools import BaseTool
+
+
+class GenericFakeChatModel(BaseChatModel):
+    """Generic fake chat model that can be used to test the chat model interface.
+
+    * Chat model should be usable in both sync and async tests
+    * Invokes `on_llm_new_token` to allow for testing of callback related code for new
+        tokens.
+    * Includes configurable logic to break messages into chunks for streaming.
+
+    Args:
+        messages: An iterator over messages (use `iter()` to convert a list)
+        stream_delimiter: How to chunk content when streaming. Options:
+            - None (default): Return content in a single chunk (no streaming)
+            - A string delimiter (e.g., " "): Split content on this delimiter,
+              preserving the delimiter as separate chunks
+            - A regex pattern (e.g., r"(\\s)"): Split using the pattern with a capture
+              group to preserve delimiters
+
+    Examples:
+        # No streaming - single chunk
+        model = GenericFakeChatModel(messages=iter([AIMessage(content="Hello world")]))
+
+        # Stream on whitespace
+        model = GenericFakeChatModel(
+            messages=iter([AIMessage(content="Hello world")]),
+            stream_delimiter=" "
+        )
+        # Yields: "Hello", " ", "world"
+
+        # Stream on whitespace (regex) - more flexible
+        model = GenericFakeChatModel(
+            messages=iter([AIMessage(content="Hello world")]),
+            stream_delimiter=r"(\s)"
+        )
+        # Yields: "Hello", " ", "world"
+    """
+
+    messages: Iterator[AIMessage | str]
+    """Get an iterator over messages.
+
+    This can be expanded to accept other types like Callables / dicts / strings
+    to make the interface more generic if needed.
+
+    !!! note
+        if you want to pass a list, you can use `iter` to convert it to an iterator.
+    """
+
+    stream_delimiter: str | None = None
+    """Delimiter for chunking content during streaming.
+
+    - None (default): No chunking, returns content in a single chunk
+    - String: Split content on this exact string, preserving delimiter as chunks
+    - Regex pattern: Use re.split() with the pattern (use capture groups to preserve delimiters)
+    """
+
+    @override
+    def _generate(
+        self,
+        messages: list[BaseMessage],
+        stop: list[str] | None = None,
+        run_manager: CallbackManagerForLLMRun | None = None,
+        **kwargs: Any,
+    ) -> ChatResult:
+        message = next(self.messages)
+        message_ = AIMessage(content=message) if isinstance(message, str) else message
+        generation = ChatGeneration(message=message_)
+        return ChatResult(generations=[generation])
+
+    def _stream(
+        self,
+        messages: list[BaseMessage],
+        stop: list[str] | None = None,
+        run_manager: CallbackManagerForLLMRun | None = None,
+        **kwargs: Any,
+    ) -> Iterator[ChatGenerationChunk]:
+        chat_result = self._generate(
+            messages, stop=stop, run_manager=run_manager, **kwargs
+        )
+        if not isinstance(chat_result, ChatResult):
+            msg = (
+                f"Expected generate to return a ChatResult, "
+                f"but got {type(chat_result)} instead."
+            )
+            raise ValueError(msg)  # noqa: TRY004
+
+        message = chat_result.generations[0].message
+
+        if not isinstance(message, AIMessage):
+            msg = (
+                f"Expected invoke to return an AIMessage, "
+                f"but got {type(message)} instead."
+            )
+            raise ValueError(msg)  # noqa: TRY004
+
+        content = message.content
+        tool_calls = message.tool_calls if hasattr(message, "tool_calls") else []
+
+        if content:
+            if not isinstance(content, str):
+                msg = "Expected content to be a string."
+                raise ValueError(msg)
+
+            # Chunk content based on stream_delimiter configuration
+            if self.stream_delimiter is None:
+                # No streaming - return entire content in a single chunk
+                content_chunks = [content]
+            else:
+                # Split content using the delimiter
+                # Use re.split to support both string and regex patterns
+                content_chunks = cast(
+                    "list[str]", re.split(self.stream_delimiter, content)
+                )
+                # Remove empty strings that can result from splitting
+                content_chunks = [chunk for chunk in content_chunks if chunk]
+
+            for idx, token in enumerate(content_chunks):
+                # Include tool_calls only in the last chunk
+                is_last = idx == len(content_chunks) - 1
+                chunk_tool_calls = tool_calls if is_last else []
+
+                chunk = ChatGenerationChunk(
+                    message=AIMessageChunk(
+                        content=token,
+                        id=message.id,
+                        tool_calls=chunk_tool_calls,
+                    )
+                )
+                if (
+                    is_last
+                    and isinstance(chunk.message, AIMessageChunk)
+                    and not message.additional_kwargs
+                ):
+                    chunk.message.chunk_position = "last"
+                if run_manager:
+                    run_manager.on_llm_new_token(token, chunk=chunk)
+                yield chunk
+        elif tool_calls:
+            # If there's no content but there are tool_calls, yield a single chunk with them
+            chunk = ChatGenerationChunk(
+                message=AIMessageChunk(
+                    content="",
+                    id=message.id,
+                    tool_calls=tool_calls,
+                    chunk_position="last",
+                )
+            )
+            if run_manager:
+                run_manager.on_llm_new_token("", chunk=chunk)
+            yield chunk
+
+        if message.additional_kwargs:
+            for key, value in message.additional_kwargs.items():
+                # We should further break down the additional kwargs into chunks
+                # Special case for function call
+                if key == "function_call":
+                    for fkey, fvalue in value.items():
+                        if isinstance(fvalue, str):
+                            # Break function call by `,`
+                            fvalue_chunks = cast("list[str]", re.split(r"(,)", fvalue))
+                            for fvalue_chunk in fvalue_chunks:
+                                chunk = ChatGenerationChunk(
+                                    message=AIMessageChunk(
+                                        id=message.id,
+                                        content="",
+                                        additional_kwargs={
+                                            "function_call": {fkey: fvalue_chunk}
+                                        },
+                                    )
+                                )
+                                if run_manager:
+                                    run_manager.on_llm_new_token(
+                                        "",
+                                        chunk=chunk,  # No token for function call
+                                    )
+                                yield chunk
+                        else:
+                            chunk = ChatGenerationChunk(
+                                message=AIMessageChunk(
+                                    id=message.id,
+                                    content="",
+                                    additional_kwargs={"function_call": {fkey: fvalue}},
+                                )
+                            )
+                            if run_manager:
+                                run_manager.on_llm_new_token(
+                                    "",
+                                    chunk=chunk,  # No token for function call
+                                )
+                            yield chunk
+                else:
+                    chunk = ChatGenerationChunk(
+                        message=AIMessageChunk(
+                            id=message.id, content="", additional_kwargs={key: value}
+                        )
+                    )
+                    if run_manager:
+                        run_manager.on_llm_new_token(
+                            "",
+                            chunk=chunk,  # No token for function call
+                        )
+                    yield chunk
+
+    @property
+    def _llm_type(self) -> str:
+        return "generic-fake-chat-model"
+
+    def bind_tools(
+        self,
+        tools: Sequence[dict[str, Any] | type | Callable | BaseTool],
+        *,
+        tool_choice: str | None = None,
+        **kwargs: Any,
+    ) -> Runnable[LanguageModelInput, AIMessage]:
+        """Override bind_tools to return self for testing purposes."""
+        return self
--- a/deepagents_sourcecode/libs/acp/tests/test_server.py
+++ b/deepagents_sourcecode/libs/acp/tests/test_server.py
@@ -0,0 +1,544 @@
+from contextlib import asynccontextmanager
+from typing import Any
+
+from acp.schema import NewSessionRequest, PromptRequest
+from acp.schema import (
+    TextContentBlock,
+    RequestPermissionRequest,
+    RequestPermissionResponse,
+    AllowedOutcome,
+)
+from dirty_equals import IsUUID
+from langchain_core.messages import AIMessage, BaseMessage
+from langchain_core.tools import tool
+from langgraph.checkpoint.memory import InMemorySaver
+
+from deepagents_acp.server import DeepagentsACP
+from tests.chat_model import GenericFakeChatModel
+
+
+class FakeAgentSideConnection:
+    """Simple fake implementation of AgentSideConnection for testing."""
+
+    def __init__(self) -> None:
+        """Initialize the fake connection with an empty calls list."""
+        self.calls: list[dict[str, Any]] = []
+        self.permission_requests: list[RequestPermissionRequest] = []
+        self.permission_response: RequestPermissionResponse | None = None
+
+    async def sessionUpdate(self, notification: Any) -> None:
+        """Track sessionUpdate calls."""
+        self.calls.append(notification)
+
+    async def requestPermission(
+        self, request: RequestPermissionRequest
+    ) -> RequestPermissionResponse:
+        """Track permission requests and return a mocked response."""
+        self.permission_requests.append(request)
+        if self.permission_response:
+            return self.permission_response
+        # Default: approve the action
+        return RequestPermissionResponse(
+            outcome=AllowedOutcome(
+                outcome="selected",
+                optionId="allow-once",
+            )
+        )
+
+
+@tool(description="Get the current weather for a location")
+def get_weather_tool(location: str) -> str:
+    """Get the current weather for a location.
+
+    Args:
+        location: The city and state, e.g. "San Francisco, CA"
+
+    Returns:
+        A string describing the current weather
+    """
+    # Return fake weather data for testing
+    return f"The weather in {location} is sunny and 72°F"
+
+
+@asynccontextmanager
+async def deepagents_acp_test_context(
+    messages: list[BaseMessage],
+    prompt_request: PromptRequest,
+    tools: list[Any] | None = None,
+    stream_delimiter: str | None = r"(\s)",
+    middleware: list[Any] | None = None,
+):
+    """Context manager for testing DeepagentsACP.
+
+    Args:
+        messages: List of messages for the fake model to return
+        prompt_request: The prompt request to send to the agent
+        tools: List of tools to provide to the agent (defaults to [])
+        stream_delimiter: How to chunk content when streaming (default: r"(\\s)" for whitespace)
+        middleware: Optional middleware to add to the agent graph
+
+    Yields:
+        FakeAgentSideConnection: The connection object that tracks sessionUpdate calls
+    """
+    from deepagents.graph import create_deep_agent
+
+    connection = FakeAgentSideConnection()
+    model = GenericFakeChatModel(
+        messages=iter(messages),
+        stream_delimiter=stream_delimiter,
+    )
+    tools = tools if tools is not None else []
+
+    # Create the agent graph
+    agent_graph = create_deep_agent(
+        model=model,
+        tools=tools,
+        checkpointer=InMemorySaver(),
+        middleware=middleware or [],
+    )
+
+    deepagents_acp = DeepagentsACP(
+        connection=connection,
+        agent_graph=agent_graph,
+    )
+
+    # Create a new session
+    session_response = await deepagents_acp.newSession(
+        NewSessionRequest(cwd="/tmp", mcpServers=[])
+    )
+    session_id = session_response.sessionId
+
+    # Update the prompt request with the session ID
+    prompt_request.sessionId = session_id
+
+    # Call prompt
+    await deepagents_acp.prompt(prompt_request)
+
+    try:
+        yield connection
+    finally:
+        pass
+
+
+class TestDeepAgentsACP:
+    """Test suite for DeepagentsACP initialization."""
+
+    async def test_initialization(self) -> None:
+        """Test that DeepagentsACP can be initialized without errors."""
+        prompt_request = PromptRequest(
+            sessionId="",  # Will be set by context manager
+            prompt=[TextContentBlock(text="Hi!", type="text")],
+        )
+
+        async with deepagents_acp_test_context(
+            messages=[AIMessage(content="Hello!")],
+            prompt_request=prompt_request,
+            tools=[get_weather_tool],
+        ) as connection:
+            assert len(connection.calls) == 1
+            first_call = connection.calls[0].model_dump()
+            assert first_call == {
+                "field_meta": None,
+                "sessionId": IsUUID,
+                "update": {
+                    "content": {
+                        "annotations": None,
+                        "field_meta": None,
+                        "text": "Hello!",
+                        "type": "text",
+                    },
+                    "field_meta": None,
+                    "sessionUpdate": "agent_message_chunk",
+                },
+            }
+
+    async def test_tool_call_and_response(self) -> None:
+        """Test that DeepagentsACP handles tool calls correctly.
+
+        This test verifies that when an AI message contains tool_calls, the agent:
+        1. Detects and executes the tool call
+        2. Sends tool call progress notifications (pending and completed)
+        3. Streams the AI response content as chunks after tool execution
+
+        Note: The FakeChat model streams messages but the agent graph must actually
+        execute the tools for the flow to complete.
+        """
+        prompt_request = PromptRequest(
+            sessionId="",  # Will be set by context manager
+            prompt=[TextContentBlock(text="What's the weather in Paris?", type="text")],
+        )
+
+        # The fake model will be called multiple times by the agent graph:
+        # 1. First call: AI decides to use the tool (with tool_calls)
+        # 2. After tool execution: AI responds with the result
+        async with deepagents_acp_test_context(
+            messages=[
+                AIMessage(
+                    content="",
+                    tool_calls=[
+                        {
+                            "name": "get_weather_tool",
+                            "args": {"location": "Paris, France"},
+                            "id": "call_123",
+                            "type": "tool_call",
+                        }
+                    ],
+                ),
+                AIMessage(content="The weather in Paris is sunny and 72°F today!"),
+            ],
+            prompt_request=prompt_request,
+            tools=[get_weather_tool],
+        ) as connection:
+            # Expected call sequence:
+            # Call 0: Tool call progress (status="pending")
+            # Call 1: Tool call progress (status="completed")
+            # Calls 2+: Message chunks for "The weather in Paris is sunny and 72°F today!"
+
+            tool_call_updates = [
+                call.model_dump()
+                for call in connection.calls
+                if call.model_dump()["update"]["sessionUpdate"] == "tool_call_update"
+            ]
+
+            # Verify we have exactly 2 tool call updates
+            assert len(tool_call_updates) == 2
+
+            # Verify tool call pending with full structure
+            assert tool_call_updates[0]["update"] == {
+                "sessionUpdate": "tool_call_update",
+                "status": "pending",
+                "toolCallId": "call_123",
+                "title": "get_weather_tool",
+                "rawInput": {"location": "Paris, France"},
+                "content": None,
+                "rawOutput": None,
+                "kind": None,
+                "locations": None,
+                "field_meta": None,
+            }
+
+            # Verify tool call completed with full structure
+            assert tool_call_updates[1]["update"] == {
+                "sessionUpdate": "tool_call_update",
+                "status": "completed",
+                "toolCallId": "call_123",
+                "title": "get_weather_tool",
+                "rawInput": None,  # rawInput not included in completed status
+                "content": [
+                    {
+                        "type": "content",
+                        "content": {
+                            "type": "text",
+                            "text": "The weather in Paris, France is sunny and 72°F",
+                            "annotations": None,
+                            "field_meta": None,
+                        },
+                    }
+                ],
+                "rawOutput": "The weather in Paris, France is sunny and 72°F",
+                "kind": None,
+                "locations": None,
+                "field_meta": None,
+            }
+
+            # Verify all non-tool-call updates are message chunks
+            message_chunks = [
+                call.model_dump()
+                for call in connection.calls
+                if call.model_dump()["update"]["sessionUpdate"] == "agent_message_chunk"
+            ]
+            assert len(message_chunks) > 0
+            for chunk in message_chunks:
+                assert chunk["update"]["sessionUpdate"] == "agent_message_chunk"
+                assert chunk["update"]["content"]["type"] == "text"
+
+
+async def test_todo_list_handling() -> None:
+    """Test that DeepagentsACP handles todo list updates correctly."""
+    from deepagents.graph import create_deep_agent
+
+    prompt_request = PromptRequest(
+        sessionId="",  # Will be set by context manager
+        prompt=[TextContentBlock(text="Create a shopping list", type="text")],
+    )
+
+    # Create a mock connection to track calls
+    connection = FakeAgentSideConnection()
+    model = GenericFakeChatModel(
+        messages=iter([AIMessage(content="I'll create that shopping list for you.")]),
+        stream_delimiter=r"(\s)",
+    )
+
+    # Create agent graph
+    agent_graph = create_deep_agent(
+        model=model,
+        tools=[get_weather_tool],
+        checkpointer=InMemorySaver(),
+    )
+
+    deepagents_acp = DeepagentsACP(
+        connection=connection,
+        agent_graph=agent_graph,
+    )
+
+    # Create a new session
+    session_response = await deepagents_acp.newSession(
+        NewSessionRequest(cwd="/tmp", mcpServers=[])
+    )
+    session_id = session_response.sessionId
+    prompt_request.sessionId = session_id
+
+    # Manually inject a tools update with todos into the agent stream
+    # Simulate the graph's behavior by patching the astream method
+    agent = deepagents_acp._sessions[session_id]["agent"]
+    original_astream = agent.astream
+
+    async def mock_astream(*args, **kwargs):
+        # First yield the normal message chunks
+        async for item in original_astream(*args, **kwargs):
+            yield item
+
+        # Then inject a tools update with todos
+        yield (
+            "updates",
+            {
+                "tools": {
+                    "todos": [
+                        {"content": "Buy fresh bananas", "status": "pending"},
+                        {"content": "Buy whole grain bread", "status": "in_progress"},
+                        {"content": "Buy organic eggs", "status": "completed"},
+                    ],
+                    "messages": [],
+                }
+            },
+        )
+
+    agent.astream = mock_astream
+
+    # Call prompt
+    await deepagents_acp.prompt(prompt_request)
+
+    # Find the plan update in the calls
+    plan_updates = [
+        call.model_dump()
+        for call in connection.calls
+        if call.model_dump()["update"]["sessionUpdate"] == "plan"
+    ]
+
+    # Verify we got exactly one plan update with correct structure
+    assert len(plan_updates) == 1
+    assert plan_updates[0]["update"] == {
+        "sessionUpdate": "plan",
+        "entries": [
+            {
+                "content": "Buy fresh bananas",
+                "status": "pending",
+                "priority": "medium",
+                "field_meta": None,
+            },
+            {
+                "content": "Buy whole grain bread",
+                "status": "in_progress",
+                "priority": "medium",
+                "field_meta": None,
+            },
+            {
+                "content": "Buy organic eggs",
+                "status": "completed",
+                "priority": "medium",
+                "field_meta": None,
+            },
+        ],
+        "field_meta": None,
+    }
+
+
+async def test_fake_chat_model_streaming() -> None:
+    """Test to verify GenericFakeChatModel stream_delimiter API.
+
+    This test demonstrates the different streaming modes available via stream_delimiter.
+    """
+    # Test 1: No streaming (stream_delimiter=None) - single chunk
+    model_no_stream = GenericFakeChatModel(
+        messages=iter([AIMessage(content="Hello world")]),
+        stream_delimiter=None,
+    )
+    chunks = []
+    async for chunk in model_no_stream.astream("test"):
+        chunks.append(chunk)
+    assert len(chunks) == 1
+    assert chunks[0].content == "Hello world"
+
+    # Test 2: Stream on whitespace using regex (default behavior)
+    model_whitespace = GenericFakeChatModel(
+        messages=iter([AIMessage(content="Hello world")]),
+        stream_delimiter=r"(\s)",
+    )
+    chunks = []
+    async for chunk in model_whitespace.astream("test"):
+        chunks.append(chunk)
+    # Should split into: "Hello", " ", "world"
+    assert len(chunks) == 3
+    assert chunks[0].content == "Hello"
+    assert chunks[1].content == " "
+    assert chunks[2].content == "world"
+
+    # Test 3: Stream with tool_calls
+    model_with_tools = GenericFakeChatModel(
+        messages=iter(
+            [
+                AIMessage(
+                    content="Checking weather",
+                    tool_calls=[
+                        {
+                            "name": "get_weather_tool",
+                            "args": {"location": "paris, france"},
+                            "id": "call_123",
+                            "type": "tool_call",
+                        }
+                    ],
+                ),
+            ]
+        ),
+        stream_delimiter=r"(\s)",
+    )
+    chunks = []
+    async for chunk in model_with_tools.astream("test"):
+        chunks.append(chunk)
+    # Tool calls should only be in the last chunk
+    assert len(chunks) > 0
+    assert chunks[-1].tool_calls == [
+        {
+            "name": "get_weather_tool",
+            "args": {"location": "paris, france"},
+            "id": "call_123",
+            "type": "tool_call",
+        }
+    ]
+    # Earlier chunks should not have tool_calls
+    for chunk in chunks[:-1]:
+        assert chunk.tool_calls == []
+
+
+async def test_human_in_the_loop_approval() -> None:
+    """Test that DeepagentsACP handles HITL interrupts and permission requests correctly."""
+    from langchain.agents.middleware import HumanInTheLoopMiddleware
+    from deepagents.graph import create_deep_agent
+
+    prompt_request = PromptRequest(
+        sessionId="",  # Will be set below
+        prompt=[TextContentBlock(text="What's the weather in Tokyo?", type="text")],
+    )
+
+    # Create connection with permission response configured
+    connection = FakeAgentSideConnection()
+    # Set up the connection to approve the tool call
+    connection.permission_response = RequestPermissionResponse(
+        outcome=AllowedOutcome(
+            outcome="selected",
+            optionId="allow-once",
+        )
+    )
+
+    model = GenericFakeChatModel(
+        messages=iter(
+            [
+                # First message: AI decides to call the tool
+                AIMessage(
+                    content="",
+                    tool_calls=[
+                        {
+                            "name": "get_weather_tool",
+                            "args": {"location": "Tokyo, Japan"},
+                            "id": "call_tokyo_123",
+                            "type": "tool_call",
+                        }
+                    ],
+                ),
+                # Second message: AI responds with the weather result after tool execution
+                AIMessage(content="The weather in Tokyo is sunny and 72°F!"),
+            ]
+        ),
+        stream_delimiter=r"(\s)",
+    )
+
+    # Create agent graph with HITL middleware
+    agent_graph = create_deep_agent(
+        model=model,
+        tools=[get_weather_tool],
+        checkpointer=InMemorySaver(),
+        middleware=[HumanInTheLoopMiddleware(interrupt_on={"get_weather_tool": True})],
+    )
+
+    deepagents_acp = DeepagentsACP(
+        connection=connection,
+        agent_graph=agent_graph,
+    )
+
+    # Create a new session
+    session_response = await deepagents_acp.newSession(
+        NewSessionRequest(cwd="/tmp", mcpServers=[])
+    )
+    session_id = session_response.sessionId
+    prompt_request.sessionId = session_id
+
+    # Call prompt - this should trigger HITL
+    await deepagents_acp.prompt(prompt_request)
+
+    # Verify that a permission request was made with correct structure
+    assert len(connection.permission_requests) == 1
+    perm_request = connection.permission_requests[0]
+
+    assert {
+        "sessionId": perm_request.sessionId,
+        "toolCall": {
+            "title": perm_request.toolCall.title,
+            "rawInput": perm_request.toolCall.rawInput,
+            "status": perm_request.toolCall.status,
+        },
+        "option_ids": [opt.optionId for opt in perm_request.options],
+    } == {
+        "sessionId": session_id,
+        "toolCall": {
+            "title": "get_weather_tool",
+            "rawInput": {"location": "Tokyo, Japan"},
+            "status": "pending",
+        },
+        "option_ids": ["allow-once", "reject-once"],
+    }
+
+    # Verify that tool execution happened after approval
+    tool_call_updates = [
+        call.model_dump()
+        for call in connection.calls
+        if call.model_dump()["update"]["sessionUpdate"] == "tool_call_update"
+    ]
+
+    assert len(tool_call_updates) == 2
+    assert tool_call_updates[0]["update"] == {
+        "sessionUpdate": "tool_call_update",
+        "status": "pending",
+        "title": "get_weather_tool",
+        "toolCallId": "call_tokyo_123",
+        "rawInput": {"location": "Tokyo, Japan"},
+        "content": None,
+        "rawOutput": None,
+        "kind": None,
+        "locations": None,
+        "field_meta": None,
+    }
+
+    # Check completed status
+    completed_update = tool_call_updates[1]["update"]
+    assert completed_update["sessionUpdate"] == "tool_call_update"
+    assert completed_update["status"] == "completed"
+    assert completed_update["title"] == "get_weather_tool"
+    assert "Tokyo, Japan" in completed_update["rawOutput"]
+
+    # Verify final AI message was streamed
+    message_chunks = [
+        call
+        for call in connection.calls
+        if call.model_dump()["update"]["sessionUpdate"] == "agent_message_chunk"
+    ]
+    assert len(message_chunks) > 0
--- a/deepagents_sourcecode/libs/acp/uv.lock
+++ b/deepagents_sourcecode/libs/acp/uv.lock
--- a/deepagents_sourcecode/libs/deepagents-cli/Makefile
+++ b/deepagents_sourcecode/libs/deepagents-cli/Makefile
@@ -0,0 +1,66 @@
+.PHONY: all lint format test help run test_integration test_watch
+
+# Default target executed when no arguments are given to make.
+all: help
+
+######################
+# TESTING AND COVERAGE
+######################
+
+# Define a variable for the test file path.
+TEST_FILE ?= tests/unit_tests
+INTEGRATION_FILES ?= tests/integration_tests
+
+test:
+	uv run pytest --disable-socket --allow-unix-socket $(TEST_FILE)
+
+test_integration:
+	uv run pytest $(INTEGRATION_FILES)
+
+test_watch:
+	uv run ptw . -- $(TEST_FILE)
+
+run:
+	uvx --no-cache --reinstall .
+
+
+######################
+# LINTING AND FORMATTING
+######################
+
+# Define a variable for Python and notebook files.
+lint format: PYTHON_FILES=deepagents_cli/ tests/
+lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=. --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')
+
+lint lint_diff:
+	[ "$(PYTHON_FILES)" = "" ] ||	uv run ruff format $(PYTHON_FILES) --diff
+	@if [ "$(LINT)" != "minimal" ]; then \
+		if [ "$(PYTHON_FILES)" != "" ]; then \
+			uv run ruff check $(PYTHON_FILES) --diff; \
+		fi; \
+	fi
+	# [ "$(PYTHON_FILES)" = "" ] || uv run mypy $(PYTHON_FILES)
+
+format format_diff:
+	[ "$(PYTHON_FILES)" = "" ] || uv run ruff format $(PYTHON_FILES)
+	[ "$(PYTHON_FILES)" = "" ] || uv run ruff check --fix $(PYTHON_FILES)
+
+format_unsafe:
+	[ "$(PYTHON_FILES)" = "" ] || uv run ruff format --unsafe-fixes $(PYTHON_FILES)
+	
+
+######################
+# HELP
+######################
+
+help:
+	@echo '===================='
+	@echo '-- LINTING --'
+	@echo 'format                       - run code formatters'
+	@echo 'lint                         - run linters'
+	@echo '-- TESTS --'
+	@echo 'test                         - run unit tests'
+	@echo 'test TEST_FILE=<test_file>   - run all tests in file'
+	@echo '-- DOCUMENTATION tasks are from the top-level Makefile --'
+
+
--- a/deepagents_sourcecode/libs/deepagents-cli/README.md
+++ b/deepagents_sourcecode/libs/deepagents-cli/README.md
@@ -0,0 +1,369 @@
+# 🚀🧠 Deep Agents CLI
+
+The [deepagents](https://github.com/langchain-ai/deepagents) CLI is an open source coding assistant that runs in your terminal, similar to Claude Code.
+
+**Key Features:**
+- **Built-in Tools**: File operations (read, write, edit, glob, grep), shell commands, web search, and subagent delegation
+- **Customizable Skills**: Add domain-specific capabilities through a progressive disclosure skill system
+- **Persistent Memory**: Agent remembers your preferences, coding style, and project context across sessions
+- **Project-Aware**: Automatically detects project roots and loads project-specific configurations 
+
+<img src="cli-banner.jpg" alt="deep agent" width="100%"/>
+
+## 🚀 Quickstart
+
+`deepagents-cli` is a Python package that can be installed via pip or uv.
+
+**Install via pip:**
+```bash
+pip install deepagents-cli
+```
+
+**Or using uv (recommended):**
+```bash
+# Create a virtual environment
+uv venv
+
+# Install the package
+uv pip install deepagents-cli
+```
+
+**Run the agent in your terminal:**
+```bash
+deepagents
+```
+
+**Get help:**
+```bash
+deepagents help
+```
+
+**Common options:**
+```bash
+# Use a specific agent configuration
+deepagents --agent mybot
+
+# Use a specific model (auto-detects provider)
+deepagents --model claude-sonnet-4-5-20250929
+deepagents --model gpt-4o
+
+# Auto-approve tool usage (skip human-in-the-loop prompts)
+deepagents --auto-approve
+
+# Execute code in a remote sandbox
+deepagents --sandbox modal        # or runloop, daytona
+deepagents --sandbox-id dbx_123   # reuse existing sandbox
+```
+
+Type naturally as you would in a chat interface. The agent will use its built-in tools, skills, and memory to help you with tasks.
+
+## Model Configuration
+
+The CLI supports three LLM providers with automatic provider detection based on model name:
+
+**Supported Providers:**
+- **OpenAI** - Models like `gpt-4o`, `gpt-5-mini`, `o1-preview`, `o3-mini` (default: `gpt-5-mini`)
+- **Anthropic** - Models like `claude-sonnet-4-5-20250929`, `claude-3-opus-20240229` (default: `claude-sonnet-4-5-20250929`)
+- **Google** - Models like `gemini-3-pro-preview`, `gemini-1.5-pro` (default: `gemini-3-pro-preview`)
+
+**Specify model at startup:**
+```bash
+# Auto-detects Anthropic from model name pattern
+deepagents --model claude-sonnet-4-5-20250929
+
+# Auto-detects OpenAI from model name pattern
+deepagents --model gpt-4o
+```
+
+**Or use environment variables:**
+```bash
+# Set provider-specific model defaults
+export ANTHROPIC_MODEL="claude-sonnet-4-5-20250929"
+export OPENAI_MODEL="gpt-4o"
+export GOOGLE_MODEL="gemini-1.5-pro"
+
+# Set API keys (required)
+export ANTHROPIC_API_KEY="your-key"
+export OPENAI_API_KEY="your-key"
+export GOOGLE_API_KEY="your-key"
+```
+
+**Model name conventions:**
+
+Model names follow each provider's official naming convention:
+- **OpenAI**: See [OpenAI Models Documentation](https://platform.openai.com/docs/models)
+- **Anthropic**: See [Anthropic Models Documentation](https://docs.anthropic.com/en/docs/about-claude/models)
+- **Google**: See [Google Gemini Models Documentation](https://ai.google.dev/gemini-api/docs/models/gemini)
+
+The active model is displayed at startup in the CLI interface. 
+
+## Built-in Tools
+
+The agent comes with the following built-in tools (always available without configuration):
+
+| Tool | Description |
+|------|-------------|
+| `ls` | List files and directories |
+| `read_file` | Read contents of a file |
+| `write_file` | Create or overwrite a file |
+| `edit_file` | Make targeted edits to existing files |
+| `glob` | Find files matching a pattern (e.g., `**/*.py`) |
+| `grep` | Search for text patterns across files |
+| `shell` | Execute shell commands (local mode) |
+| `execute` | Execute commands in remote sandbox (sandbox mode) |
+| `web_search` | Search the web using Tavily API |
+| `fetch_url` | Fetch and convert web pages to markdown |
+| `task` | Delegate work to subagents for parallel execution |
+| `write_todos` | Create and manage task lists for complex work |
+
+> [!WARNING]
+> **Human-in-the-Loop (HITL) Approval Required**
+>
+> Potentially destructive operations require user approval before execution:
+> - **File operations**: `write_file`, `edit_file`
+> - **Command execution**: `shell`, `execute`
+> - **External requests**: `web_search`, `fetch_url`
+> - **Delegation**: `task` (subagents)
+>
+> Each operation will prompt for approval showing the action details. Use `--auto-approve` to skip prompts:
+> ```bash
+> deepagents --auto-approve
+> ``` 
+
+## Agent Configuration
+
+Each agent has its own configuration directory at `~/.deepagents/<agent_name>/`, with default `agent`.
+
+```bash
+# List all configured agents
+deepagents list
+
+# Create a new agent
+deepagents create <agent_name>
+```
+
+### Environment Variables
+
+#### LangSmith Tracing
+
+The CLI supports separate LangSmith project configuration for agent tracing vs user code tracing:
+
+**Agent Tracing** - Traces deepagents operations (tool calls, agent decisions):
+```bash
+export DEEPAGENTS_LANGSMITH_PROJECT="my-agent-project"
+```
+
+**User Code Tracing** - Traces code executed via shell commands:
+```bash
+export LANGSMITH_PROJECT="my-user-code-project"
+```
+
+**Complete Setup Example:**
+```bash
+# Enable LangSmith tracing
+export LANGCHAIN_TRACING_V2=true
+export LANGCHAIN_API_KEY="your-api-key"
+
+# Configure separate projects
+export DEEPAGENTS_LANGSMITH_PROJECT="agent-traces"
+export LANGSMITH_PROJECT="user-code-traces"
+
+# Run deepagents
+deepagents
+```
+
+When both are configured, the CLI displays:
+```
+✓ LangSmith tracing enabled: Deepagents → 'agent-traces'
+  User code (shell) → 'user-code-traces'
+```
+
+**Why separate projects?**
+- Keep agent operations separate from your application code traces
+- Easier debugging by isolating agent vs user code behavior
+- Different retention policies or access controls per project
+
+**Backwards Compatibility:**
+If `DEEPAGENTS_LANGSMITH_PROJECT` is not set, both agent and user code trace to the same project specified by `LANGSMITH_PROJECT`.
+
+## Customization 
+
+There are two primary ways to customize any agent: **memory** and **skills**. 
+
+Each agent has its own global configuration directory at `~/.deepagents/<agent_name>/`:
+
+```
+~/.deepagents/<agent_name>/
+  ├── agent.md              # Auto-loaded global personality/style
+  └── skills/               # Auto-loaded agent-specific skills
+      ├── web-research/
+      │   └── SKILL.md
+      └── langgraph-docs/
+          └── SKILL.md
+```
+
+Projects can extend the global configuration with project-specific instructions and skills:
+
+```
+my-project/
+  ├── .git/
+  └── .deepagents/
+      ├── agent.md          # Project-specific instructions
+      └── skills/           # Project-specific skills
+          └── custom-tool/
+              └── SKILL.md
+```
+
+The CLI automatically detects project roots (via `.git`) and loads:
+- Project-specific `agent.md` from `[project-root]/.deepagents/agent.md`
+- Project-specific skills from `[project-root]/.deepagents/skills/`
+
+Both global and project configurations are loaded together, allowing you to:
+- Keep general coding style/preferences in global agent.md
+- Add project-specific context, conventions, or guidelines in project agent.md
+- Share project-specific skills with your team (committed to version control)
+- Override global skills with project-specific versions (when skill names match)
+
+### agent.md files
+
+`agent.md` files provide persistent memory that is always loaded at session start. Both global and project-level `agent.md` files are loaded together and injected into the system prompt.
+
+**Global `agent.md`** (`~/.deepagents/agent/agent.md`) 
+  - Your personality, style, and universal coding preferences
+  - General tone and communication style
+  - Universal coding preferences (formatting, type hints, etc.)
+  - Tool usage patterns that apply everywhere
+  - Workflows and methodologies that don't change per-project
+
+**Project `agent.md`** (`.deepagents/agent.md` in project root) 
+  - Project-specific context and conventions
+  - Project architecture and design patterns
+  - Coding conventions specific to this codebase
+  - Testing strategies and deployment processes
+  - Team guidelines and project structure
+
+**How it works (AgentMemoryMiddleware):**
+- Loads both files at startup and injects into system prompt as `<user_memory>` and `<project_memory>`
+- Appends [memory management instructions](deepagents_cli/agent_memory.py#L44-L158) on when/how to update memory files
+
+**When the agent updates memory:**
+- IMMEDIATELY when you describe how it should behave
+- IMMEDIATELY when you give feedback on its work
+- When you explicitly ask it to remember something
+- When patterns or preferences emerge from your interactions
+
+The agent uses `edit_file` to update memories when learning preferences or receiving feedback.
+
+### Project memory files
+
+Beyond `agent.md`, you can create additional memory files in `.deepagents/` for structured project knowledge. These work similarly to [Anthropic's Memory Tool](https://platform.claude.com/docs/en/agents-and-tools/tool-use/memory-tool). The agent receives [detailed instructions](deepagents_cli/agent_memory.py#L123-L158) on when to read and update these files.
+
+**How it works:**
+1. Create markdown files in `[project-root]/.deepagents/` (e.g., `api-design.md`, `architecture.md`, `deployment.md`)
+2. The agent checks these files when relevant to a task (not auto-loaded into every prompt)
+3. The agent uses `write_file` or `edit_file` to create/update memory files when learning project patterns
+
+**Example workflow:**
+```bash
+# Agent discovers deployment pattern and saves it
+.deepagents/
+├── agent.md           # Always loaded (personality + conventions)
+├── architecture.md    # Loaded on-demand (system design)
+└── deployment.md      # Loaded on-demand (deploy procedures)
+```
+
+**When the agent reads memory files:**
+- At the start of new sessions (checks what files exist)
+- Before answering questions about project-specific topics
+- When you reference past work or patterns
+- When performing tasks that match saved knowledge domains
+
+**Benefits:**
+- **Persistent learning**: Agent remembers project patterns across sessions
+- **Team collaboration**: Share project knowledge through version control
+- **Contextual retrieval**: Load only relevant memory when needed (reduces token usage)
+- **Structured knowledge**: Organize information by domain (APIs, architecture, deployment, etc.)
+
+### Skills
+
+Skills are reusable agent capabilities that provide specialized workflows and domain knowledge. Example skills are provided in the `examples/skills/` directory:
+
+- **web-research** - Structured web research workflow with planning, parallel delegation, and synthesis
+- **langgraph-docs** - LangGraph documentation lookup and guidance
+
+To use an example skill globally with the default agent, just copy them to the agent's skills global or project-level skills directory:
+
+```bash
+mkdir -p ~/.deepagents/agent/skills
+cp -r examples/skills/web-research ~/.deepagents/agent/skills/
+```
+
+To manage skills: 
+
+```bash
+# List all skills (global + project)
+deepagents skills list
+
+# List only project skills
+deepagents skills list --project
+
+# Create a new global skill from template
+deepagents skills create my-skill
+
+# Create a new project skill
+deepagents skills create my-tool --project
+
+# View detailed information about a skill
+deepagents skills info web-research
+
+# View info for a project skill only
+deepagents skills info my-tool --project
+```
+
+To use skills (e.g., the langgraph-docs skill), just type a request relevant to a skill and the skill will be used automatically.
+
+```bash
+$ deepagents 
+$ "create a agent.py script that implements a LangGraph agent" 
+```
+
+Skills follow Anthropic's [progressive disclosure pattern](https://www.anthropic.com/engineering/equipping-agents-for-the-real-world-with-agent-skills) - the agent knows skills exist but only reads full instructions when needed.
+
+1. **At startup** - SkillsMiddleware scans `~/.deepagents/agent/skills/` and `.deepagents/skills/` directories
+2. **Parse metadata** - Extracts YAML frontmatter (name + description) from each `SKILL.md` file
+3. **Inject into prompt** - Adds skill list with descriptions to system prompt: "Available Skills: web-research - Use for web research tasks..."
+4. **Progressive loading** - Agent reads full `SKILL.md` content with `read_file` only when a task matches the skill's description
+5. **Execute workflow** - Agent follows the step-by-step instructions in the skill file
+
+## Development
+
+### Running Tests
+
+To run the test suite:
+
+```bash
+uv sync --all-groups
+
+make test
+```
+
+### Running During Development
+
+```bash
+# From libs/deepagents-cli directory
+uv run deepagents
+
+# Or install in editable mode
+uv pip install -e .
+deepagents
+```
+
+### Modifying the CLI
+
+- **UI changes** → Edit `ui.py` or `input.py`
+- **Add new tools** → Edit `tools.py`
+- **Change execution flow** → Edit `execution.py`
+- **Add commands** → Edit `commands.py`
+- **Agent configuration** → Edit `agent.py`
+- **Skills system** → Edit `skills/` modules
+- **Constants/colors** → Edit `config.py`
--- a/deepagents_sourcecode/libs/deepagents-cli/cli-banner.jpg
+++ b/deepagents_sourcecode/libs/deepagents-cli/cli-banner.jpg
--- a/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/init.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/init.py
@@ -0,0 +1,5 @@
+"""DeepAgents CLI - Interactive AI coding assistant."""
+
+from deepagents_cli.main import cli_main
+
+__all__ = ["cli_main"]
--- a/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/main.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/main.py
@@ -0,0 +1,6 @@
+"""Allow running the CLI as: python -m deepagents.cli."""
+
+from deepagents_cli.main import cli_main
+
+if __name__ == "__main__":
+    cli_main()
--- a/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/_version.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/_version.py
@@ -0,0 +1,3 @@
+"""Version information for deepagents-cli."""
+
+__version__ = "0.0.12"
--- a/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/agent.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/agent.py
@@ -0,0 +1,454 @@
+"""CLI를 위한 에이전트 관리 및 생성."""
+
+import os
+import shutil
+from pathlib import Path
+
+from deepagents import create_deep_agent
+from deepagents.backends import CompositeBackend
+from deepagents.backends.filesystem import FilesystemBackend
+from deepagents.backends.sandbox import SandboxBackendProtocol
+from langchain.agents.middleware import (
+    InterruptOnConfig,
+)
+from langchain.agents.middleware.types import AgentState
+from langchain.messages import ToolCall
+from langchain.tools import BaseTool
+from langchain_core.language_models import BaseChatModel
+from langgraph.checkpoint.memory import InMemorySaver
+from langgraph.pregel import Pregel
+from langgraph.runtime import Runtime
+
+from deepagents_cli.agent_memory import AgentMemoryMiddleware
+from deepagents_cli.config import COLORS, config, console, get_default_coding_instructions, settings
+from deepagents_cli.integrations.sandbox_factory import get_default_working_dir
+from deepagents_cli.shell import ShellMiddleware
+from deepagents_cli.skills import SkillsMiddleware
+
+
+def list_agents() -> None:
+    """사용 가능한 모든 에이전트를 나열합니다."""
+    agents_dir = settings.user_deepagents_dir
+
+    if not agents_dir.exists() or not any(agents_dir.iterdir()):
+        console.print("[yellow]에이전트를 찾을 수 없습니다.[/yellow]")
+        console.print(
+            "[dim]처음 사용할 때 ~/.deepagents/에 에이전트가 생성됩니다.[/dim]",
+            style=COLORS["dim"],
+        )
+        return
+
+    console.print("\n[bold]사용 가능한 에이전트:[/bold]\n", style=COLORS["primary"])
+
+    for agent_path in sorted(agents_dir.iterdir()):
+        if agent_path.is_dir():
+            agent_name = agent_path.name
+            agent_md = agent_path / "agent.md"
+
+            if agent_md.exists():
+                console.print(f"  • [bold]{agent_name}[/bold]", style=COLORS["primary"])
+                console.print(f"    {agent_path}", style=COLORS["dim"])
+            else:
+                console.print(f"  • [bold]{agent_name}[/bold] [dim](미완성)[/dim]", style=COLORS["tool"])
+                console.print(f"    {agent_path}", style=COLORS["dim"])
+
+    console.print()
+
+
+def reset_agent(agent_name: str, source_agent: str | None = None) -> None:
+    """에이전트를 기본값으로 재설정하거나 다른 에이전트로부터 복사합니다."""
+    agents_dir = settings.user_deepagents_dir
+    agent_dir = agents_dir / agent_name
+
+    if source_agent:
+        source_dir = agents_dir / source_agent
+        source_md = source_dir / "agent.md"
+
+        if not source_md.exists():
+            console.print(
+                f"[bold red]오류:[/bold red] 소스 에이전트 '{source_agent}'를 찾을 수 없거나 agent.md가 없습니다"
+            )
+            return
+
+        source_content = source_md.read_text()
+        action_desc = f"contents of agent '{source_agent}'"
+    else:
+        source_content = get_default_coding_instructions()
+        action_desc = "default"
+
+    if agent_dir.exists():
+        shutil.rmtree(agent_dir)
+        console.print(f"기존 에이전트 디렉터리를 제거했습니다: {agent_dir}", style=COLORS["tool"])
+
+    agent_dir.mkdir(parents=True, exist_ok=True)
+    agent_md = agent_dir / "agent.md"
+    agent_md.write_text(source_content)
+
+    console.print(f"✓ 에이전트 '{agent_name}'가 {action_desc}(으)로 재설정되었습니다", style=COLORS["primary"])
+    console.print(f"Location: {agent_dir}\n", style=COLORS["dim"])
+
+
+def get_system_prompt(assistant_id: str, sandbox_type: str | None = None) -> str:
+    """에이전트에 대한 기본 시스템 프롬프트를 가져옵니다.
+
+    Args:
+        assistant_id: 경로 참조를 위한 에이전트 식별자
+        sandbox_type: 샌드박스 공급자 유형("modal", "runloop", "daytona").
+                     None인 경우 에이전트는 로컬 모드에서 작동합니다.
+
+    Returns:
+        시스템 프롬프트 문자열 (agent.md 내용 제외)
+    """
+    agent_dir_path = f"~/.deepagents/{assistant_id}"
+
+    if sandbox_type:
+        # Get provider-specific working directory
+
+        working_dir = get_default_working_dir(sandbox_type)
+
+        working_dir_section = f"""### Current Working Directory
+
+You are working in a **remote Linux sandbox** at `{working_dir}`.
+
+All code execution and file operations happen in this sandbox environment.
+
+**IMPORTANT:**
+- The CLI runs locally on the user's machine, but executes code remotely.
+- Use `{working_dir}` as your working directory for all operations.
+
+"""
+    else:
+        cwd = Path.cwd()
+        working_dir_section = f"""<env>
+WORKING_DIRECTORY: {cwd}
+</env>
+
+### Current Working Directory
+
+The filesystem backend is currently operating at: `{cwd}`
+
+### File System and Paths
+
+**IMPORTANT - Path Handling:**
+- All file paths MUST be absolute (e.g. `{cwd}/file.txt`).
+- Use the WORKING_DIRECTORY from <env> to construct absolute paths.
+- Example: To create a file in the working directory, use `{cwd}/research_project/file.md`
+- Do NOT use relative paths - always construct the full absolute path.
+
+"""
+
+    return (
+        working_dir_section
+        + f"""### Skills Directory
+
+Your skills are stored at: `{agent_dir_path}/skills/`
+Skills may contain scripts or support files. Use the physical filesystem path when running skill scripts with bash:
+Example: `bash python {agent_dir_path}/skills/web-research/script.py`
+
+### Human-in-the-Loop Tool Approvals
+
+Some tool calls require user approval before execution. If a tool call is rejected by the user:
+1. Accept the decision immediately - do NOT try the same command again.
+2. Explain that you understand the user rejected the operation.
+3. Propose an alternative or ask for clarification.
+4. NEVER try to bypass a rejection by retrying the exact same command.
+
+Respect user decisions and work collaboratively.
+
+### Web Search Tool Usage
+
+When using the web_search tool:
+1. The tool returns search results with titles, URLs, and content snippets.
+2. You MUST read and process these results, then respond to the user naturally.
+3. Do NOT show raw JSON or tool results directly to the user.
+4. Synthesize information from multiple sources into a coherent answer.
+5. Cite sources by mentioning page titles or URLs when relevant.
+6. If you don't find what you need in the search, explain what you found and ask clarifying questions.
+
+The user ONLY sees your text response, not the tool results. Always provide a complete, natural language answer after using web_search.
+
+### Todo List Management
+
+When using the write_todos tool:
+1. Keep the todo list minimal - aim for 3-6 items max.
+2. Only create todos for complex, multi-step tasks that really need tracking.
+3. Break down tasks into clear, actionable items without being overly granular.
+4. For simple tasks (1-2 steps), just do them - don't create a todo.
+5. When first creating a todo list for a task, ALWAYS ask the user if the plan looks good before starting work.
+   - Create the todos so they render, then ask "Does this plan look good?" or similar.
+   - Wait for the user's response before marking the first todo in_progress.
+   - Adjust the plan if they want changes.
+6. Update todo status promptly as you complete each item.
+
+The todo list is a planning tool - use it judiciously to avoid overwhelming the user with excessive task tracking."""
+    )
+
+
+def _format_write_file_description(tool_call: ToolCall, _state: AgentState, _runtime: Runtime) -> str:
+    """승인 프롬프트를 위한 write_file 도구 호출 포맷."""
+    args = tool_call["args"]
+    file_path = args.get("file_path", "unknown")
+    content = args.get("content", "")
+
+    action = "덮어쓰기(Overwrite)" if Path(file_path).exists() else "생성(Create)"
+    line_count = len(content.splitlines())
+
+    return f"파일: {file_path}\n작업: 파일 {action}\n줄 수: {line_count}"
+
+
+def _format_edit_file_description(tool_call: ToolCall, _state: AgentState, _runtime: Runtime) -> str:
+    """승인 프롬프트를 위한 edit_file 도구 호출 포맷."""
+    args = tool_call["args"]
+    file_path = args.get("file_path", "unknown")
+    replace_all = bool(args.get("replace_all", False))
+
+    return f"파일: {file_path}\n작업: 텍스트 교체 ({'모든 항목' if replace_all else '단일 항목'})"
+
+
+def _format_web_search_description(tool_call: ToolCall, _state: AgentState, _runtime: Runtime) -> str:
+    """Format web_search tool call for approval prompt."""
+    args = tool_call["args"]
+    query = args.get("query", "unknown")
+    max_results = args.get("max_results", 5)
+
+    return f"쿼리: {query}\n최대 결과: {max_results}\n\n⚠️  이 작업은 Tavily API 크레딧을 사용합니다"
+
+
+def _format_fetch_url_description(tool_call: ToolCall, _state: AgentState, _runtime: Runtime) -> str:
+    """Format fetch_url tool call for approval prompt."""
+    args = tool_call["args"]
+    url = args.get("url", "unknown")
+    timeout = args.get("timeout", 30)
+
+    return f"URL: {url}\n시간 제한: {timeout}초\n\n⚠️  웹 콘텐츠를 가져와 마크다운으로 변환합니다"
+
+
+def _format_task_description(tool_call: ToolCall, _state: AgentState, _runtime: Runtime) -> str:
+    """승인 프롬프트를 위한 task(서브 에이전트) 도구 호출 포맷.
+
+    task 도구 서명은: task(description: str, subagent_type: str)
+    description에는 서브 에이전트에게 전송될 모든 지침이 포함됩니다.
+    """
+    args = tool_call["args"]
+    description = args.get("description", "unknown")
+    subagent_type = args.get("subagent_type", "unknown")
+
+    # Truncate description if too long for display
+    description_preview = description
+    if len(description) > 500:
+        description_preview = description[:500] + "..."
+
+    return (
+        f"서브 에이전트 유형: {subagent_type}\n\n"
+        f"작업 지침:\n"
+        f"{'─' * 40}\n"
+        f"{description_preview}\n"
+        f"{'─' * 40}\n\n"
+        f"⚠️  서브 에이전트는 파일 작업 및 셸 명령에 접근할 수 있습니다"
+    )
+
+
+def _format_shell_description(tool_call: ToolCall, _state: AgentState, _runtime: Runtime) -> str:
+    """Format shell tool call for approval prompt."""
+    args = tool_call["args"]
+    command = args.get("command", "없음")
+    return f"셸 명령: {command}\n작업 디렉터리: {Path.cwd()}"
+
+
+def _format_execute_description(tool_call: ToolCall, _state: AgentState, _runtime: Runtime) -> str:
+    """Format execute tool call for approval prompt."""
+    args = tool_call["args"]
+    command = args.get("command", "없음")
+    return f"명령 실행: {command}\n위치: 원격 샌드박스"
+
+
+def _add_interrupt_on() -> dict[str, InterruptOnConfig]:
+    """파괴적인 도구에 대해 히먼-인-더-루프(human-in-the-loop) interrupt_on 설정을 구성합니다."""
+    shell_interrupt_config: InterruptOnConfig = {
+        "allowed_decisions": ["approve", "reject"],
+        "description": _format_shell_description,
+    }
+
+    execute_interrupt_config: InterruptOnConfig = {
+        "allowed_decisions": ["approve", "reject"],
+        "description": _format_execute_description,
+    }
+
+    write_file_interrupt_config: InterruptOnConfig = {
+        "allowed_decisions": ["approve", "reject"],
+        "description": _format_write_file_description,
+    }
+
+    edit_file_interrupt_config: InterruptOnConfig = {
+        "allowed_decisions": ["approve", "reject"],
+        "description": _format_edit_file_description,
+    }
+
+    web_search_interrupt_config: InterruptOnConfig = {
+        "allowed_decisions": ["approve", "reject"],
+        "description": _format_web_search_description,
+    }
+
+    fetch_url_interrupt_config: InterruptOnConfig = {
+        "allowed_decisions": ["approve", "reject"],
+        "description": _format_fetch_url_description,
+    }
+
+    task_interrupt_config: InterruptOnConfig = {
+        "allowed_decisions": ["approve", "reject"],
+        "description": _format_task_description,
+    }
+    return {
+        "shell": shell_interrupt_config,
+        "execute": execute_interrupt_config,
+        "write_file": write_file_interrupt_config,
+        "edit_file": edit_file_interrupt_config,
+        "web_search": web_search_interrupt_config,
+        "fetch_url": fetch_url_interrupt_config,
+        "task": task_interrupt_config,
+    }
+
+
+def create_cli_agent(
+    model: str | BaseChatModel,
+    assistant_id: str,
+    *,
+    tools: list[BaseTool] | None = None,
+    sandbox: SandboxBackendProtocol | None = None,
+    sandbox_type: str | None = None,
+    system_prompt: str | None = None,
+    auto_approve: bool = False,
+    enable_memory: bool = True,
+    enable_skills: bool = True,
+    enable_shell: bool = True,
+) -> tuple[Pregel, CompositeBackend]:
+    """유연한 옵션으로 CLI 구성 에이전트를 생성합니다.
+
+    이것은 deepagents CLI 에이전트 생성을 위한 주요 진입점이며,
+    내부적으로 사용되거나 외부 코드(예: 벤치마킹 프레임워크, Harbor)에서 사용할 수 있습니다.
+
+    Args:
+        model: 사용할 LLM 모델 (예: "anthropic:claude-sonnet-4-5-20250929")
+        assistant_id: 메모리/상태 저장을 위한 에이전트 식별자
+        tools: 에이전트에 제공할 추가 도구 (기본값: 빈 목록)
+        sandbox: 원격 실행을 위한 선택적 샌드박스 백엔드 (예: ModalBackend).
+                 None인 경우 로컬 파일시스템 + 셸을 사용합니다.
+        sandbox_type: 샌드박스 공급자 유형("modal", "runloop", "daytona").
+                     시스템 프롬프트 생성에 사용됩니다.
+        system_prompt: 기본 시스템 프롬프트를 재정의합니다. None인 경우
+                      sandbox_type 및 assistant_id를 기반으로 생성합니다.
+        auto_approve: True인 경우 사람의 확인 없이 모든 도구 호출을 자동으로 승인합니다.
+                     자동화된 워크플로에 유용합니다.
+        enable_memory: 영구 메모리를 위한 AgentMemoryMiddleware 활성화
+        enable_skills: 사용자 정의 에이전트 스킬을 위한 SkillsMiddleware 활성화
+        enable_shell: 로컬 셸 실행을 위한 ShellMiddleware 활성화 (로컬 모드에서만)
+
+    Returns:
+        (agent_graph, composite_backend)의 2-튜플
+        - agent_graph: 실행 준비된 구성된 LangGraph Pregel 인스턴스
+        - composite_backend: 파일 작업을 위한 CompositeBackend
+    """
+    if tools is None:
+        tools = []
+
+    # Setup agent directory for persistent memory (if enabled)
+    if enable_memory or enable_skills:
+        agent_dir = settings.ensure_agent_dir(assistant_id)
+        agent_md = agent_dir / "agent.md"
+        if not agent_md.exists():
+            source_content = get_default_coding_instructions()
+            agent_md.write_text(source_content)
+
+    # Skills directories (if enabled)
+    skills_dir = None
+    project_skills_dir = None
+    if enable_skills:
+        skills_dir = settings.ensure_user_skills_dir(assistant_id)
+        project_skills_dir = settings.get_project_skills_dir()
+
+    # Build middleware stack based on enabled features
+    agent_middleware = []
+
+    # CONDITIONAL SETUP: Local vs Remote Sandbox
+    if sandbox is None:
+        # ========== LOCAL MODE ==========
+        composite_backend = CompositeBackend(
+            default=FilesystemBackend(),  # Current working directory
+            routes={},  # No virtualization - use real paths
+        )
+
+        # Add memory middleware
+        if enable_memory:
+            agent_middleware.append(AgentMemoryMiddleware(settings=settings, assistant_id=assistant_id))
+
+        # Add skills middleware
+        if enable_skills:
+            agent_middleware.append(
+                SkillsMiddleware(
+                    skills_dir=skills_dir,
+                    assistant_id=assistant_id,
+                    project_skills_dir=project_skills_dir,
+                )
+            )
+
+        # Add shell middleware (only in local mode)
+        if enable_shell:
+            # Create environment for shell commands
+            # Restore user's original LANGSMITH_PROJECT so their code traces separately
+            shell_env = os.environ.copy()
+            if settings.user_langchain_project:
+                shell_env["LANGSMITH_PROJECT"] = settings.user_langchain_project
+
+            agent_middleware.append(
+                ShellMiddleware(
+                    workspace_root=str(Path.cwd()),
+                    env=shell_env,
+                )
+            )
+    else:
+        # ========== REMOTE SANDBOX MODE ==========
+        composite_backend = CompositeBackend(
+            default=sandbox,  # Remote sandbox (ModalBackend, etc.)
+            routes={},  # No virtualization
+        )
+
+        # Add memory middleware
+        if enable_memory:
+            agent_middleware.append(AgentMemoryMiddleware(settings=settings, assistant_id=assistant_id))
+
+        # Add skills middleware
+        if enable_skills:
+            agent_middleware.append(
+                SkillsMiddleware(
+                    skills_dir=skills_dir,
+                    assistant_id=assistant_id,
+                    project_skills_dir=project_skills_dir,
+                )
+            )
+
+        # Note: Shell middleware not used in sandbox mode
+        # File operations and execute tool are provided by the sandbox backend
+
+    # Get or use custom system prompt
+    if system_prompt is None:
+        system_prompt = get_system_prompt(assistant_id=assistant_id, sandbox_type=sandbox_type)
+
+    # Configure interrupt_on based on auto_approve setting
+    if auto_approve:
+        # No interrupts - all tools run automatically
+        interrupt_on = {}
+    else:
+        # Full HITL for destructive operations
+        interrupt_on = _add_interrupt_on()
+
+    # Create the agent
+    agent = create_deep_agent(
+        model=model,
+        system_prompt=system_prompt,
+        tools=tools,
+        backend=composite_backend,
+        middleware=agent_middleware,
+        interrupt_on=interrupt_on,
+        checkpointer=InMemorySaver(),
+    ).with_config(config)
+    return agent, composite_backend
--- a/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/agent_memory.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/agent_memory.py
@@ -0,0 +1,328 @@
+"""에이전트별 장기 메모리를 시스템 프롬프트에 로드하기 위한 미들웨어."""
+
+import contextlib
+from collections.abc import Awaitable, Callable
+from typing import NotRequired, TypedDict, cast
+
+from langchain.agents.middleware.types import (
+    AgentMiddleware,
+    AgentState,
+    ModelRequest,
+    ModelResponse,
+)
+from langgraph.runtime import Runtime
+
+from deepagents_cli.config import Settings
+
+
+class AgentMemoryState(AgentState):
+    """에이전트 메모리 미들웨어를 위한 상태."""
+
+    user_memory: NotRequired[str]
+    """~/.deepagents/{agent}/의 개인 설정 (모든 곳에 적용됨)."""
+
+    project_memory: NotRequired[str]
+    """프로젝트별 컨텍스트 (프로젝트 루트에서 로드됨)."""
+
+
+class AgentMemoryStateUpdate(TypedDict):
+    """에이전트 메모리 미들웨어에 대한 상태 업데이트."""
+
+    user_memory: NotRequired[str]
+    """~/.deepagents/{agent}/의 개인 설정 (모든 곳에 적용됨)."""
+
+    project_memory: NotRequired[str]
+    """프로젝트별 컨텍스트 (프로젝트 루트에서 로드됨)."""
+
+
+# Long-term Memory Documentation
+# Note: Claude Code loads CLAUDE.md files hierarchically and combines them (not precedence-based):
+# - Loads recursively from cwd up to (but not including) root directory
+# - Multiple files are combined hierarchically: enterprise → project → user
+# - Both [project-root]/CLAUDE.md and [project-root]/.claude/CLAUDE.md are loaded if both exist
+# - Files higher in hierarchy load first, providing foundation for more specific memories
+# We will follow that pattern for deepagents-cli
+LONGTERM_MEMORY_SYSTEM_PROMPT = """
+
+## Long-term Memory
+
+Long-term memory is stored in files on the filesystem and persists across sessions.
+
+**User Memory Location**: `{agent_dir_absolute}` (display: `{agent_dir_display}`)
+**Project Memory Location**: {project_memory_info}
+
+The system prompt is loaded from two sources at startup:
+1. **User agent.md**: `{agent_dir_absolute}/agent.md` - personal settings that apply everywhere
+2. **Project agent.md**: loaded from the project root if available - project-specific instructions
+
+Project-specific agent.md files are loaded from the following locations (combined if both exist):
+- `[project-root]/.deepagents/agent.md` (preferred)
+- `[project-root]/agent.md` (fallback, included if both exist)
+
+**When you should check/read memory (IMPORTANT - do this first):**
+- **At the start of every new session**: Check both user and project memory
+  - User: `ls {agent_dir_absolute}`
+  - Project: `ls {project_deepagents_dir}` (if inside a project)
+- **Before answering a question**: If asked "What do you know about X?" or "How do I do Y?", check project memory first, then user.
+- **When the user asks you to do a task**: Check for project-specific guides or examples.
+- **When the user refers to past work**: Search project memory files for relevant context.
+
+**Memory-First Response Pattern:**
+1. User asks question -> Check project directory first: `ls {project_deepagents_dir}`
+2. If relevant files exist -> Read them: `read_file '{project_deepagents_dir}/[filename]'`
+3. If needed, check user memory -> `ls {agent_dir_absolute}`
+4. Answer by supplementing general knowledge with stored knowledge.
+
+**When you should update memory:**
+- **Immediately when the user describes your role or how you should behave**
+- **Immediately when the user gives you feedback** - record what went wrong and how to do better in memory.
+- When the user explicitly asks you to remember something.
+- When patterns or preferences emerge (coding style, conventions, workflow).
+- After a significant task where the context would be helpful for future sessions.
+
+**Learning from Feedback:**
+- When the user tells you something is better or worse, figure out why and encode it as a pattern.
+- Every correction is an opportunity to improve permanently - don't just fix the immediate issue, update your instructions.
+- If the user says "You should remember X" or "Pay attention to Y", treat this as highest priority and update memory immediately.
+- Look for the underlying principles behind corrections, not just the specific mistakes.
+
+## Deciding Where to Store Memory
+
+When writing or updating agent memory, decide where each fact, configuration, or behavior belongs:
+
+### User Agent File: `{agent_dir_absolute}/agent.md`
+-> Describes the agent's **personality, style, and universal behaviors** across all projects.
+
+**Store here:**
+- General tone and communication style
+- Universal coding preferences (formatting, commenting style, etc.)
+- General workflows and methodologies to follow
+- Tool usage patterns that apply everywhere
+- Personal preferences that don't change between projects
+
+**Examples:**
+- "Be concise and direct in your answers"
+- "Always use type hints in Python"
+- "Prefer functional programming patterns"
+
+### Project Agent File: `{project_deepagents_dir}/agent.md`
+-> Describes **how this specific project works** and **how the agent should behave here only**.
+
+**Store here:**
+- Project-specific architecture and design patterns
+- Coding conventions specific to this codebase
+- Project structure and organization
+- Testing strategies for this project
+- Deployment processes and workflows
+- Team conventions and guidelines
+
+**Examples:**
+- "This project uses FastAPI with SQLAlchemy"
+- "Tests are located in tests/ directory mirroring src structure"
+- "All API changes require updating OpenAPI specs"
+
+### Project Memory Files: `{project_deepagents_dir}/*.md`
+-> Use for **project-specific reference information** and structured notes.
+
+**Store here:**
+- API design documentation
+- Architecture decisions and reasoning
+- Deployment procedures
+- Common debugging patterns
+- Onboarding information
+
+**Examples:**
+- `{project_deepagents_dir}/api-design.md` - REST API patterns used
+- `{project_deepagents_dir}/architecture.md` - System architecture overview
+- `{project_deepagents_dir}/deployment.md` - How to deploy this project
+
+### File Operations:
+
+**User Memory:**
+```
+ls {agent_dir_absolute}                              # List user memory files
+read_file '{agent_dir_absolute}/agent.md'            # Read user preferences
+edit_file '{agent_dir_absolute}/agent.md' ...        # Update user preferences
+```
+
+**Project Memory (Preferred for project-specific info):**
+```
+ls {project_deepagents_dir}                          # List project memory files
+read_file '{project_deepagents_dir}/agent.md'        # Read project guidelines
+edit_file '{project_deepagents_dir}/agent.md' ...    # Update project guidelines
+write_file '{project_deepagents_dir}/agent.md' ...  # Create project memory file
+```
+
+**IMPORTANT**:
+- Project memory files are stored in `.deepagents/` inside the project root.
+- Always use absolute paths for file operations.
+- Determine if info is project-specific (check user vs project memory) before answering."""
+
+
+DEFAULT_MEMORY_SNIPPET = """<user_memory>
+{user_memory}
+</user_memory>
+
+<project_memory>
+{project_memory}
+</project_memory>"""
+
+
+class AgentMemoryMiddleware(AgentMiddleware):
+    """에이전트별 장기 메모리를 로드하기 위한 미들웨어.
+
+    이 미들웨어는 파일(agent.md)에서 에이전트의 장기 메모리를 로드하고
+    시스템 프롬프트에 주입합니다. 메모리는 대화 시작 시 한 번 로드되어
+    상태에 저장됩니다.
+    """
+
+    state_schema = AgentMemoryState
+
+    def __init__(
+        self,
+        *,
+        settings: Settings,
+        assistant_id: str,
+        system_prompt_template: str | None = None,
+    ) -> None:
+        """에이전트 메모리 미들웨어를 초기화합니다.
+
+        Args:
+            settings: 프로젝트 감지 및 경로가 포함된 전역 설정 인스턴스.
+            assistant_id: 에이전트 식별자.
+            system_prompt_template: 시스템 프롬프트에 에이전트 메모리를 주입하기 위한
+                선택적 사용자 정의 템플릿.
+        """
+        self.settings = settings
+        self.assistant_id = assistant_id
+
+        # User paths
+        self.agent_dir = settings.get_agent_dir(assistant_id)
+        # Store both display path (with ~) and absolute path for file operations
+        self.agent_dir_display = f"~/.deepagents/{assistant_id}"
+        self.agent_dir_absolute = str(self.agent_dir)
+
+        # Project paths (from settings)
+        self.project_root = settings.project_root
+
+        self.system_prompt_template = system_prompt_template or DEFAULT_MEMORY_SNIPPET
+
+    def before_agent(
+        self,
+        state: AgentMemoryState,
+        runtime: Runtime,
+    ) -> AgentMemoryStateUpdate:
+        """에이전트 실행 전에 파일에서 에이전트 메모리를 로드합니다.
+
+        사용자 agent.md와 프로젝트별 agent.md가 있으면 로드합니다.
+        상태에 아직 없는 경우에만 로드합니다.
+
+        사용자 업데이트를 포착하기 위해 매 호출마다 파일 존재 여부를 동적으로 확인합니다.
+
+        Args:
+            state: 현재 에이전트 상태.
+            runtime: 런타임 컨텍스트.
+
+        Returns:
+            user_memory 및 project_memory가 채워진 업데이트된 상태.
+        """
+        result: AgentMemoryStateUpdate = {}
+
+        # Load user memory if not already in state
+        if "user_memory" not in state:
+            user_path = self.settings.get_user_agent_md_path(self.assistant_id)
+            if user_path.exists():
+                with contextlib.suppress(OSError, UnicodeDecodeError):
+                    result["user_memory"] = user_path.read_text()
+
+        # Load project memory if not already in state
+        if "project_memory" not in state:
+            project_path = self.settings.get_project_agent_md_path()
+            if project_path and project_path.exists():
+                with contextlib.suppress(OSError, UnicodeDecodeError):
+                    result["project_memory"] = project_path.read_text()
+
+        return result
+
+    def _build_system_prompt(self, request: ModelRequest) -> str:
+        """메모리 섹션이 포함된 전체 시스템 프롬프트를 작성합니다.
+
+        Args:
+            request: 상태 및 기본 시스템 프롬프트가 포함된 모델 요청.
+
+        Returns:
+            메모리 섹션이 주입된 전체 시스템 프롬프트.
+        """
+        # Extract memory from state
+        state = cast("AgentMemoryState", request.state)
+        user_memory = state.get("user_memory")
+        project_memory = state.get("project_memory")
+        base_system_prompt = request.system_prompt
+
+        # Build project memory info for documentation
+        if self.project_root and project_memory:
+            project_memory_info = f"`{self.project_root}` (detected)"
+        elif self.project_root:
+            project_memory_info = f"`{self.project_root}` (no agent.md found)"
+        else:
+            project_memory_info = "None (not in a git project)"
+
+        # Build project deepagents directory path
+        if self.project_root:
+            project_deepagents_dir = str(self.project_root / ".deepagents")
+        else:
+            project_deepagents_dir = "[project-root]/.deepagents (not in a project)"
+
+        # Format memory section with both memories
+        memory_section = self.system_prompt_template.format(
+            user_memory=user_memory if user_memory else "(No user agent.md)",
+            project_memory=project_memory if project_memory else "(No project agent.md)",
+        )
+
+        system_prompt = memory_section
+
+        if base_system_prompt:
+            system_prompt += "\n\n" + base_system_prompt
+
+        system_prompt += "\n\n" + LONGTERM_MEMORY_SYSTEM_PROMPT.format(
+            agent_dir_absolute=self.agent_dir_absolute,
+            agent_dir_display=self.agent_dir_display,
+            project_memory_info=project_memory_info,
+            project_deepagents_dir=project_deepagents_dir,
+        )
+
+        return system_prompt
+
+    def wrap_model_call(
+        self,
+        request: ModelRequest,
+        handler: Callable[[ModelRequest], ModelResponse],
+    ) -> ModelResponse:
+        """시스템 프롬프트에 에이전트 메모리를 주입합니다.
+
+        Args:
+            request: 처리 중인 모델 요청.
+            handler: 수정된 요청으로 호출할 핸들러 함수.
+
+        Returns:
+            핸들러의 모델 응답.
+        """
+        system_prompt = self._build_system_prompt(request)
+        return handler(request.override(system_prompt=system_prompt))
+
+    async def awrap_model_call(
+        self,
+        request: ModelRequest,
+        handler: Callable[[ModelRequest], Awaitable[ModelResponse]],
+    ) -> ModelResponse:
+        """(비동기) 시스템 프롬프트에 에이전트 메모리를 주입합니다.
+
+        Args:
+            request: 처리 중인 모델 요청.
+            handler: 수정된 요청으로 호출할 핸들러 함수.
+
+        Returns:
+            핸들러의 모델 응답.
+        """
+        system_prompt = self._build_system_prompt(request)
+        return await handler(request.override(system_prompt=system_prompt))
--- a/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/commands.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/commands.py
@@ -0,0 +1,87 @@
+"""슬래시 명령 및 bash 실행을 위한 명령 처리기."""
+
+import subprocess
+from pathlib import Path
+
+from langgraph.checkpoint.memory import InMemorySaver
+
+from .config import COLORS, DEEP_AGENTS_ASCII, console
+from .ui import TokenTracker, show_interactive_help
+
+
+def handle_command(command: str, agent, token_tracker: TokenTracker) -> str | bool:
+    """슬래시 명령을 처리합니다. 종료하려면 'exit', 처리된 경우 True, 에이전트에게 전달하려면 False를 반환합니다."""
+    cmd = command.lower().strip().lstrip("/")
+
+    if cmd in ["quit", "exit", "q"]:
+        return "exit"
+
+    if cmd == "clear":
+        # Reset agent conversation state
+        agent.checkpointer = InMemorySaver()
+
+        # Reset token tracking to baseline
+        token_tracker.reset()
+
+        # Clear screen and show fresh UI
+        console.clear()
+        console.print(DEEP_AGENTS_ASCII, style=f"bold {COLORS['primary']}")
+        console.print()
+        console.print("... 새로 시작! 화면이 지워지고 대화가 초기화되었습니다.", style=COLORS["agent"])
+        console.print()
+        return True
+
+    if cmd == "help":
+        show_interactive_help()
+        return True
+
+    if cmd == "tokens":
+        token_tracker.display_session()
+        return True
+
+    console.print()
+    console.print(f"[yellow]알 수 없는 명령: /{cmd}[/yellow]")
+    console.print("[dim]사용 가능한 명령을 보려면 /help를 입력하세요.[/dim]")
+    console.print()
+    return True
+
+    return False
+
+
+def execute_bash_command(command: str) -> bool:
+    """bash 명령을 실행하고 출력을 표시합니다. 처리된 경우 True를 반환합니다."""
+    cmd = command.strip().lstrip("!")
+
+    if not cmd:
+        return True
+
+    try:
+        console.print()
+        console.print(f"[dim]$ {cmd}[/dim]")
+
+        # Execute the command
+        result = subprocess.run(
+            cmd, check=False, shell=True, capture_output=True, text=True, timeout=30, cwd=Path.cwd()
+        )
+
+        # Display output
+        if result.stdout:
+            console.print(result.stdout, style=COLORS["dim"], markup=False)
+        if result.stderr:
+            console.print(result.stderr, style="red", markup=False)
+
+        # Show return code if non-zero
+        if result.returncode != 0:
+            console.print(f"[dim]Exit code: {result.returncode}[/dim]")
+
+        console.print()
+        return True
+
+    except subprocess.TimeoutExpired:
+        console.print("[red]30초 후 명령 시간 초과[/red]")
+        console.print()
+        return True
+    except Exception as e:
+        console.print(f"[red]명령 실행 오류: {e}[/red]")
+        console.print()
+        return True
--- a/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/config.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/config.py
@@ -0,0 +1,509 @@
+"""CLI를 위한 구성, 상수 밎 모델 생성."""
+
+import os
+import re
+import sys
+import uuid
+from dataclasses import dataclass
+from pathlib import Path
+
+import dotenv
+from rich.console import Console
+
+from deepagents_cli._version import __version__
+
+dotenv.load_dotenv()
+
+# CRITICAL: Override LANGSMITH_PROJECT to route agent traces to separate project
+# LangSmith reads LANGSMITH_PROJECT at invocation time, so we override it here
+# and preserve the user's original value for shell commands
+_deepagents_project = os.environ.get("DEEPAGENTS_LANGSMITH_PROJECT")
+_original_langsmith_project = os.environ.get("LANGSMITH_PROJECT")
+if _deepagents_project:
+    # Override LANGSMITH_PROJECT for agent traces
+    os.environ["LANGSMITH_PROJECT"] = _deepagents_project
+
+# Now safe to import LangChain modules
+from langchain_core.language_models import BaseChatModel
+
+# Color scheme
+COLORS = {
+    "primary": "#10b981",
+    "dim": "#6b7280",
+    "user": "#ffffff",
+    "agent": "#10b981",
+    "thinking": "#34d399",
+    "tool": "#fbbf24",
+}
+
+# ASCII art banner
+
+DEEP_AGENTS_ASCII = f"""
+ ██████╗  ███████╗ ███████╗ ██████╗
+ ██╔══██╗ ██╔════╝ ██╔════╝ ██╔══██╗
+ ██║  ██║ █████╗   █████╗   ██████╔╝
+ ██║  ██║ ██╔══╝   ██╔══╝   ██╔═══╝
+ ██████╔╝ ███████╗ ███████╗ ██║
+ ╚═════╝  ╚══════╝ ╚══════╝ ╚═╝
+
+  █████╗   ██████╗  ███████╗ ███╗   ██╗ ████████╗ ███████╗
+ ██╔══██╗ ██╔════╝  ██╔════╝ ████╗  ██║ ╚══██╔══╝ ██╔════╝
+ ███████║ ██║  ███╗ █████╗   ██╔██╗ ██║    ██║    ███████╗
+ ██╔══██║ ██║   ██║ ██╔══╝   ██║╚██╗██║    ██║    ╚════██║
+ ██║  ██║ ╚██████╔╝ ███████╗ ██║ ╚████║    ██║    ███████║
+ ╚═╝  ╚═╝  ╚═════╝  ╚══════╝ ╚═╝  ╚═══╝    ╚═╝    ╚══════╝
+                                              v{__version__}
+"""
+
+# Interactive commands
+# Interactive commands
+COMMANDS = {
+    "clear": "화면을 지우고 대화를 재설정합니다",
+    "help": "도움말 정보를 표시합니다",
+    "tokens": "현재 세션의 토큰 사용량을 표시합니다",
+    "quit": "CLI를 종료합니다",
+    "exit": "CLI를 종료합니다",
+}
+
+
+# Maximum argument length for display
+MAX_ARG_LENGTH = 150
+
+# Agent configuration
+config = {"recursion_limit": 1000}
+
+# Rich console instance
+console = Console(highlight=False)
+
+
+def _find_project_root(start_path: Path | None = None) -> Path | None:
+    """git 디렉터리를 찾아 프로젝트 루트를 찾습니다.
+
+    start_path(또는 cwd)에서 디렉터리 트리를 따라 올라가며 프로젝트 루트를 나타내는
+    .git 디렉터리를 찾습니다.
+
+    Args:
+        start_path: 검색을 시작할 디렉터리. 기본값은 현재 작업 디렉터리입니다.
+
+    Returns:
+        찾은 경우 프로젝트 루트의 경로, 그렇지 않으면 None입니다.
+    """
+    current = Path(start_path or Path.cwd()).resolve()
+
+    # Walk up the directory tree
+    for parent in [current, *list(current.parents)]:
+        git_dir = parent / ".git"
+        if git_dir.exists():
+            return parent
+
+    return None
+
+
+def _find_project_agent_md(project_root: Path) -> list[Path]:
+    """프로젝트별 agent.md 파일(들)을 찾습니다.
+
+    두 위치를 확인하고 존재하는 모든 위치를 반환합니다:
+    1. project_root/.deepagents/agent.md
+    2. project_root/agent.md
+
+    두 파일이 모두 존재하면 둘 다 로드되어 결합됩니다.
+
+    Args:
+        project_root: 프로젝트 루트 디렉터리 경로.
+
+    Returns:
+        프로젝트 agent.md 파일 경로 목록 (0, 1 또는 2개의 경로를 포함할 수 있음).
+    """
+    paths = []
+
+    # Check .deepagents/agent.md (preferred)
+    deepagents_md = project_root / ".deepagents" / "agent.md"
+    if deepagents_md.exists():
+        paths.append(deepagents_md)
+
+    # Check root agent.md (fallback, but also include if both exist)
+    root_md = project_root / "agent.md"
+    if root_md.exists():
+        paths.append(root_md)
+
+    return paths
+
+
+@dataclass
+class Settings:
+    """DeepAgents-cli를 위한 전역 설정 및 환경 감지.
+
+    이 클래스는 시작 시 한 번 초기화되며 다음 정보에 대한 액세스를 제공합니다:
+    - 사용 가능한 모델 및 API 키
+    - 현재 프로젝트 정보
+    - 도구 가용성 (예: Tavily)
+    - 파일 시스템 경로
+
+    Attributes:
+        project_root: 현재 프로젝트 루트 디렉터리 (git 프로젝트 내인 경우)
+
+        openai_api_key: OpenAI API 키 (사용 가능한 경우)
+        anthropic_api_key: Anthropic API 키 (사용 가능한 경우)
+        tavily_api_key: Tavily API 키 (사용 가능한 경우)
+        deepagents_langchain_project: DeepAgents 에이전트 추적을 위한 LangSmith 프로젝트 이름
+        user_langchain_project: 환경의 원래 LANGSMITH_PROJECT (사용자 코드용)
+    """
+
+    # API keys
+    openai_api_key: str | None
+    anthropic_api_key: str | None
+    google_api_key: str | None
+    tavily_api_key: str | None
+
+    # LangSmith configuration
+    deepagents_langchain_project: str | None  # For deepagents agent tracing
+    user_langchain_project: str | None  # Original LANGSMITH_PROJECT for user code
+
+    # Model configuration
+    model_name: str | None = None  # Currently active model name
+    model_provider: str | None = None  # Provider (openai, anthropic, google)
+
+    # Project information
+    project_root: Path | None = None
+
+    @classmethod
+    def from_environment(cls, *, start_path: Path | None = None) -> "Settings":
+        """현재 환경을 감지하여 설정을 생성합니다.
+
+        Args:
+            start_path: 프로젝트 감지를 시작할 디렉터리(기본값은 cwd)
+
+        Returns:
+            감지된 구성이 포함된 Settings 인스턴스
+        """
+        # Detect API keys
+        openai_key = os.environ.get("OPENAI_API_KEY")
+        anthropic_key = os.environ.get("ANTHROPIC_API_KEY")
+        google_key = os.environ.get("GOOGLE_API_KEY")
+        tavily_key = os.environ.get("TAVILY_API_KEY")
+
+        # Detect LangSmith configuration
+        # DEEPAGENTS_LANGSMITH_PROJECT: Project for deepagents agent tracing
+        # user_langchain_project: User's ORIGINAL LANGSMITH_PROJECT (before override)
+        # Note: LANGSMITH_PROJECT was already overridden at module import time (above)
+        # so we use the saved original value, not the current os.environ value
+        deepagents_langchain_project = os.environ.get("DEEPAGENTS_LANGSMITH_PROJECT")
+        user_langchain_project = _original_langsmith_project  # Use saved original!
+
+        # Detect project
+        project_root = _find_project_root(start_path)
+
+        return cls(
+            openai_api_key=openai_key,
+            anthropic_api_key=anthropic_key,
+            google_api_key=google_key,
+            tavily_api_key=tavily_key,
+            deepagents_langchain_project=deepagents_langchain_project,
+            user_langchain_project=user_langchain_project,
+            project_root=project_root,
+        )
+
+    @property
+    def has_openai(self) -> bool:
+        """OpenAI API 키가 구성되어 있는지 확인합니다."""
+        return self.openai_api_key is not None
+
+    @property
+    def has_anthropic(self) -> bool:
+        """Anthropic API 키가 구성되어 있는지 확인합니다."""
+        return self.anthropic_api_key is not None
+
+    @property
+    def has_google(self) -> bool:
+        """Google API 키가 구성되어 있는지 확인합니다."""
+        return self.google_api_key is not None
+
+    @property
+    def has_tavily(self) -> bool:
+        """Tavily API 키가 구성되어 있는지 확인합니다."""
+        return self.tavily_api_key is not None
+
+    @property
+    def has_deepagents_langchain_project(self) -> bool:
+        """DeepAgents LangChain 프로젝트 이름이 구성되어 있는지 확인합니다."""
+        return self.deepagents_langchain_project is not None
+
+    @property
+    def has_project(self) -> bool:
+        """현재 git 프로젝트 내에 있는지 확인합니다."""
+        return self.project_root is not None
+
+    @property
+    def user_deepagents_dir(self) -> Path:
+        """기본 사용자 수준 .deepagents 디렉터리를 가져옵니다.
+
+        Returns:
+            ~/.deepagents 경로
+        """
+        return Path.home() / ".deepagents"
+
+    def get_user_agent_md_path(self, agent_name: str) -> Path:
+        """특정 에이전트에 대한 사용자 수준 agent.md 경로를 가져옵니다.
+
+        파일 존재 여부와 상관없이 경로를 반환합니다.
+
+        Args:
+            agent_name: 에이전트 이름
+
+        Returns:
+            ~/.deepagents/{agent_name}/agent.md 경로
+        """
+        return Path.home() / ".deepagents" / agent_name / "agent.md"
+
+    def get_project_agent_md_path(self) -> Path | None:
+        """프로젝트 수준 agent.md 경로를 가져옵니다.
+
+        파일 존재 여부와 상관없이 경로를 반환합니다.
+
+        Returns:
+            {project_root}/.deepagents/agent.md 경로, 프로젝트 내에 없는 경우 None
+        """
+        if not self.project_root:
+            return None
+        return self.project_root / ".deepagents" / "agent.md"
+
+    @staticmethod
+    def _is_valid_agent_name(agent_name: str) -> bool:
+        """유효하지 않은 파일시스템 경로 및 보안 문제를 방지하기 위해 검증합니다."""
+        if not agent_name or not agent_name.strip():
+            return False
+        # Allow only alphanumeric, hyphens, underscores, and whitespace
+        return bool(re.match(r"^[a-zA-Z0-9_\-\s]+$", agent_name))
+
+    def get_agent_dir(self, agent_name: str) -> Path:
+        """전역 에이전트 디렉터리 경로를 가져옵니다.
+
+        Args:
+            agent_name: 에이전트 이름
+
+        Returns:
+            ~/.deepagents/{agent_name} 경로
+        """
+        if not self._is_valid_agent_name(agent_name):
+            msg = (
+                f"Invalid agent name: {agent_name!r}. "
+                "Agent names can only contain letters, numbers, hyphens, underscores, and spaces."
+            )
+            raise ValueError(msg)
+        return Path.home() / ".deepagents" / agent_name
+
+    def ensure_agent_dir(self, agent_name: str) -> Path:
+        """전역 에이전트 디렉터리가 존재하는지 확인하고 경로를 반환합니다.
+
+        Args:
+            agent_name: 에이전트 이름
+
+        Returns:
+            ~/.deepagents/{agent_name} 경로
+        """
+        if not self._is_valid_agent_name(agent_name):
+            msg = (
+                f"Invalid agent name: {agent_name!r}. "
+                "Agent names can only contain letters, numbers, hyphens, underscores, and spaces."
+            )
+            raise ValueError(msg)
+        agent_dir = self.get_agent_dir(agent_name)
+        agent_dir.mkdir(parents=True, exist_ok=True)
+        return agent_dir
+
+    def ensure_project_deepagents_dir(self) -> Path | None:
+        """프로젝트 .deepagents 디렉터리가 존재하는지 확인하고 경로를 반환합니다.
+
+        Returns:
+            프로젝트 .deepagents 디렉터리 경로, 프로젝트 내에 없는 경우 None
+        """
+        if not self.project_root:
+            return None
+
+        project_deepagents_dir = self.project_root / ".deepagents"
+        project_deepagents_dir.mkdir(parents=True, exist_ok=True)
+        return project_deepagents_dir
+
+    def get_user_skills_dir(self, agent_name: str) -> Path:
+        """특정 에이전트에 대한 사용자 수준 기술(skills) 디렉터리 경로를 가져옵니다.
+
+        Args:
+            agent_name: 에이전트 이름
+
+        Returns:
+            ~/.deepagents/{agent_name}/skills/ 경로
+        """
+        return self.get_agent_dir(agent_name) / "skills"
+
+    def ensure_user_skills_dir(self, agent_name: str) -> Path:
+        """사용자 수준 기술(skills) 디렉터리가 존재하는지 확인하고 경로를 반환합니다.
+
+        Args:
+            agent_name: 에이전트 이름
+
+        Returns:
+            ~/.deepagents/{agent_name}/skills/ 경로
+        """
+        skills_dir = self.get_user_skills_dir(agent_name)
+        skills_dir.mkdir(parents=True, exist_ok=True)
+        return skills_dir
+
+    def get_project_skills_dir(self) -> Path | None:
+        """프로젝트 수준 기술(skills) 디렉터리 경로를 가져옵니다.
+
+        Returns:
+            {project_root}/.deepagents/skills/ 경로, 프로젝트 내에 없는 경우 None
+        """
+        if not self.project_root:
+            return None
+        return self.project_root / ".deepagents" / "skills"
+
+    def ensure_project_skills_dir(self) -> Path | None:
+        """프로젝트 수준 기술(skills) 디렉터리가 존재하는지 확인하고 경로를 반환합니다.
+
+        Returns:
+            {project_root}/.deepagents/skills/ 경로, 프로젝트 내에 없는 경우 None
+        """
+        if not self.project_root:
+            return None
+        skills_dir = self.get_project_skills_dir()
+        skills_dir.mkdir(parents=True, exist_ok=True)
+        return skills_dir
+
+
+# Global settings instance (initialized once)
+settings = Settings.from_environment()
+
+
+class SessionState:
+    """변경 가능한 세션 상태를 유지합니다 (자동 승인 모드 등)."""
+
+    def __init__(self, auto_approve: bool = False, no_splash: bool = False) -> None:
+        self.auto_approve = auto_approve
+        self.no_splash = no_splash
+        self.exit_hint_until: float | None = None
+        self.exit_hint_handle = None
+        self.thread_id = str(uuid.uuid4())
+
+    def toggle_auto_approve(self) -> bool:
+        """자동 승인을 토글하고 새로운 상태를 반환합니다."""
+        self.auto_approve = not self.auto_approve
+        return self.auto_approve
+
+
+def get_default_coding_instructions() -> str:
+    """기본 코딩 에이전트 지침을 가져옵니다.
+
+    이는 에이전트가 수정할 수 없는 불변의 기본 지침입니다.
+    장기 메모리(agent.md)는 미들웨어에서 별도로 처리합니다.
+    """
+    default_prompt_path = Path(__file__).parent / "default_agent_prompt.md"
+    return default_prompt_path.read_text()
+
+
+def _detect_provider(model_name: str) -> str | None:
+    """모델 이름에서 공급자를 자동 감지합니다.
+
+    Args:
+        model_name: 공급자를 감지할 모델 이름
+
+    Returns:
+        공급자 이름(openai, anthropic, google) 또는 감지할 수 없는 경우 None
+    """
+    model_lower = model_name.lower()
+    if any(x in model_lower for x in ["gpt", "o1", "o3"]):
+        return "openai"
+    if "claude" in model_lower:
+        return "anthropic"
+    if "gemini" in model_lower:
+        return "google"
+    return None
+
+
+def create_model(model_name_override: str | None = None) -> BaseChatModel:
+    """사용 가능한 API 키를 기반으로 적절한 모델을 생성합니다.
+
+    전역 설정 인스턴스를 사용하여 생성할 모델을 결정합니다.
+
+    Args:
+        model_name_override: 환경 변수 대신 사용할 선택적 모델 이름
+
+    Returns:
+        ChatModel 인스턴스 (OpenAI, Anthropic, 또는 Google)
+
+    Raises:
+        API 키가 구성되지 않았거나 모델 공급자를 결정할 수 없는 경우 SystemExit
+    """
+    # Determine provider and model
+    if model_name_override:
+        # Use provided model, auto-detect provider
+        provider = _detect_provider(model_name_override)
+        if not provider:
+            console.print(
+                f"[bold red]오류:[/bold red] 모델 이름에서 공급자를 감지할 수 없습니다: {model_name_override}"
+            )
+            console.print("\n지원되는 모델 이름 패턴:")
+            console.print("  - OpenAI: gpt-*, o1-*, o3-*")
+            console.print("  - Anthropic: claude-*")
+            console.print("  - Google: gemini-*")
+            sys.exit(1)
+
+        # Check if API key for detected provider is available
+        if provider == "openai" and not settings.has_openai:
+            console.print(f"[bold red]오류:[/bold red] 모델 '{model_name_override}'은(는) OPENAI_API_KEY가 필요합니다")
+            sys.exit(1)
+        elif provider == "anthropic" and not settings.has_anthropic:
+            console.print(
+                f"[bold red]오류:[/bold red] 모델 '{model_name_override}'은(는) ANTHROPIC_API_KEY가 필요합니다"
+            )
+            sys.exit(1)
+        elif provider == "google" and not settings.has_google:
+            console.print(f"[bold red]오류:[/bold red] 모델 '{model_name_override}'은(는) GOOGLE_API_KEY가 필요합니다")
+            sys.exit(1)
+
+        model_name = model_name_override
+    # Use environment variable defaults, detect provider by API key priority
+    elif settings.has_openai:
+        provider = "openai"
+        model_name = os.environ.get("OPENAI_MODEL", "gpt-5-mini")
+    elif settings.has_anthropic:
+        provider = "anthropic"
+        model_name = os.environ.get("ANTHROPIC_MODEL", "claude-sonnet-4-5-20250929")
+    elif settings.has_google:
+        provider = "google"
+        model_name = os.environ.get("GOOGLE_MODEL", "gemini-3-pro-preview")
+    else:
+        console.print("[bold red]오류:[/bold red] API 키가 구성되지 않았습니다.")
+        console.print("\n다음 환경 변수 중 하나를 설정하십시오:")
+        console.print("  - OPENAI_API_KEY     (OpenAI 모델용, 예: gpt-5-mini)")
+        console.print("  - ANTHROPIC_API_KEY  (Claude 모델용)")
+        console.print("  - GOOGLE_API_KEY     (Google Gemini 모델용)")
+        console.print("\n예시:")
+        console.print("  export OPENAI_API_KEY=your_api_key_here")
+        console.print("\n또는 .env 파일에 추가하십시오.")
+        sys.exit(1)
+
+    # Store model info in settings for display
+    settings.model_name = model_name
+    settings.model_provider = provider
+
+    # Create and return the model
+    if provider == "openai":
+        from langchain_openai import ChatOpenAI
+
+        return ChatOpenAI(model=model_name)
+    if provider == "anthropic":
+        from langchain_anthropic import ChatAnthropic
+
+        return ChatAnthropic(
+            model_name=model_name,
+            max_tokens=20_000,  # type: ignore[arg-type]
+        )
+    if provider == "google":
+        from langchain_google_genai import ChatGoogleGenerativeAI
+
+        return ChatGoogleGenerativeAI(
+            model=model_name,
+            temperature=0,
+            max_tokens=None,
+        )
--- a/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/default_agent_prompt.md
+++ b/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/default_agent_prompt.md
@@ -0,0 +1,111 @@
+You are an AI assistant that helps users with various tasks such as coding, research, and analysis.
+
+# Core Role
+Your core role and behavior can be updated based on user feedback and instructions. If the user instructs you on how to behave or about your role, immediately update this memory file to reflect those instructions.
+
+## Memory-First Protocol
+You have access to a persistent memory system. Always follow this protocol:
+
+**At the start of a session:**
+- Check `ls /memories/` to see what knowledge is stored.
+- If a specific topic is mentioned in the role description, check related guides in `/memories/`.
+
+**Before answering a question:**
+- When asked "What do you know about X?" or "How do I do Y?" → Check `ls /memories/` first.
+- If a relevant memory file exists → Read it and answer based on the saved knowledge.
+- Prioritize stored knowledge over general knowledge.
+
+**When learning new information:**
+- If the user teaches you something or asks you to remember something → Save it to `/memories/[topic].md`.
+- Use descriptive filenames: Use `/memories/deep-agents-guide.md` instead of `/memories/notes.md`.
+- After saving, read specific content again to verify.
+
+**Important:** Your memory persists between sessions. Information stored in `/memories/` is more reliable than general knowledge for topics you have specifically learned.
+
+# Tone and Style
+Be concise and direct. Answer within 4 lines unless the user asks for details.
+Stop after finishing file operations - Do not explain what you did unless asked.
+Avoid unnecessary introductions or conclusions.
+
+When executing unimportant bash commands, briefly explain what you are doing.
+
+## Proactiveness
+Take action when requested, but do not surprise the user with unrequested actions.
+If asked about an approach, answer first before taking action.
+
+## Following Conventions
+- Check existing code before assuming the availability of libraries and frameworks.
+- Mimic existing code style, naming conventions, and patterns.
+- Do not add comments unless requested.
+
+## Task Management
+Use `write_todos` for complex multi-step tasks (3 or more steps). Mark tasks as `in_progress` before starting, and `completed` immediately after finishing.
+Perform simple 1-2 step tasks immediately without todos.
+
+## File Reading Best Practices
+
+**Important**: When navigating the codebase or reading multiple files, always use pagination to prevent context overflow.
+
+**Codebase Navigation Patterns:**
+1. First Scan: `read_file(path, limit=100)` - Check file structure and key sections
+2. Targeted Reading: `read_file(path, offset=100, limit=200)` - Read specific sections if needed
+3. Full Reading: Use `read_file(path)` without limits only when needed for editing
+
+**When to use pagination:**
+- Reading any file exceeding 500 lines
+- Exploring unfamiliar codebases (Always start with limit=100)
+- Reading multiple files in succession
+- All research or investigation tasks
+
+**When full reading is allowed:**
+- Small files (under 500 lines)
+- Files required to be edited immediately after reading
+- After verifying file size with a first scan
+
+**Workflow Example:**
+```
+Bad:  read_file(/src/large_module.py)  # Fills context with 2000+ lines of code
+Good: read_file(/src/large_module.py, limit=100)  # Scan structure first
+      read_file(/src/large_module.py, offset=100, limit=100)  # Read relevant section
+```
+
+## Working with Subagents (Task Tools)
+When delegating to subagents:
+- **Use Filesystem for Large I/O**: If input instructions are large (500+ words) or expected output is large, communicate via files.
+  - Write input context/instructions to a file, and instruct the subagent to read it.
+  - Ask the subagent to write output to a file, and read it after the subagent returns.
+  - This prevents token bloat in both directions and keeps context manageable.
+- **Parallelize Independent Tasks**: When tasks are independent, create parallel subagents to work simultaneously.
+- **Clear Specifications**: Precisely inform the subagent of the required format/structure in their response or output file.
+- **Main Agent Synthesis**: Once subagents collect/execute, the main agent integrates results into the final output.
+
+## Tools
+
+### execute_bash
+Executes shell commands. Always allow path with spaces to be quoted.
+bash commands are executed in the current working directory.
+Example: `pytest /foo/bar/tests` (Good), `cd /foo/bar && pytest tests` (Bad)
+
+### File Tools
+- read_file: Read file content (use absolute path)
+- edit_file: Exact string replacement in file (must read first, provide unique old_string)
+- write_file: Create or overwrite file
+- ls: List directory contents
+- glob: Find files by pattern (e.g., "**/*.py")
+- grep: Search file content
+
+Always use absolute paths starting with /.
+
+### web_search
+Search for documentation, error solutions, and code examples.
+
+### http_request
+Sends HTTP requests to an API (GET, POST, etc.).
+
+## Code References
+When referencing code, use the following format: `file_path:line_number`
+
+## Documentation
+- Do not create excessive markdown summary/documentation files after completing tasks.
+- Focus on the task itself, not documenting what you did.
+- Write documentation only when explicitly requested.
--- a/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/execution.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/execution.py
@@ -0,0 +1,672 @@
+"""CLI를 위한 작업 실행 및 스트리밍 로직."""
+
+import asyncio
+import json
+import sys
+import termios
+import tty
+
+from langchain.agents.middleware.human_in_the_loop import (
+    ActionRequest,
+    ApproveDecision,
+    Decision,
+    HITLRequest,
+    HITLResponse,
+    RejectDecision,
+)
+from langchain_core.messages import HumanMessage, ToolMessage
+from langgraph.types import Command, Interrupt
+from pydantic import TypeAdapter, ValidationError
+from rich import box
+from rich.markdown import Markdown
+from rich.panel import Panel
+
+from deepagents_cli.config import COLORS, console
+from deepagents_cli.file_ops import FileOpTracker, build_approval_preview
+from deepagents_cli.image_utils import create_multimodal_content
+from deepagents_cli.input import ImageTracker, parse_file_mentions
+from deepagents_cli.ui import (
+    TokenTracker,
+    format_tool_display,
+    format_tool_message_content,
+    render_diff_block,
+    render_file_operation,
+    render_todo_list,
+)
+
+_HITL_REQUEST_ADAPTER = TypeAdapter(HITLRequest)
+
+
+def prompt_for_tool_approval(
+    action_request: ActionRequest,
+    assistant_id: str | None,
+) -> Decision | dict:
+    """방향키 탐색을 사용하여 도구 작업을 승인/거부하도록 사용자에게 묻습니다.
+
+    Returns:
+        Decision (ApproveDecision 또는 RejectDecision) 또는
+        자동 승인 모드로 전환하기 위한 {"type": "auto_approve_all"} dict
+    """
+    description = action_request.get("description", "No description available")
+    name = action_request["name"]
+    args = action_request["args"]
+    preview = build_approval_preview(name, args, assistant_id) if name else None
+
+    body_lines = []
+    if preview:
+        body_lines.append(f"[bold]{preview.title}[/bold]")
+        body_lines.extend(preview.details)
+        if preview.error:
+            body_lines.append(f"[red]{preview.error}[/red]")
+    else:
+        body_lines.append(description)
+
+    # Display action info first
+    console.print(
+        Panel(
+            "[bold yellow]⚠️  도구 작업 승인 필요[/bold yellow]\n\n" + "\n".join(body_lines),
+            border_style="yellow",
+            box=box.ROUNDED,
+            padding=(0, 1),
+        )
+    )
+    if preview and preview.diff and not preview.error:
+        console.print()
+        render_diff_block(preview.diff, preview.diff_title or preview.title)
+
+    options = ["approve", "reject", "auto-accept all going forward"]
+    selected = 0  # Start with approve selected
+
+    try:
+        fd = sys.stdin.fileno()
+        old_settings = termios.tcgetattr(fd)
+
+        try:
+            tty.setraw(fd)
+            # Hide cursor during menu interaction
+            sys.stdout.write("\033[?25l")
+            sys.stdout.flush()
+
+            # Initial render flag
+            first_render = True
+
+            while True:
+                if not first_render:
+                    # Move cursor back to start of menu (up 3 lines, then to start of line)
+                    sys.stdout.write("\033[3A\r")
+
+                first_render = False
+
+                # Display options vertically with ANSI color codes
+                for i, option in enumerate(options):
+                    sys.stdout.write("\r\033[K")  # Clear line from cursor to end
+
+                    if i == selected:
+                        if option == "approve":
+                            # Green bold with filled checkbox
+                            sys.stdout.write("\033[1;32m☑ 승인 (Approve)\033[0m\n")
+                        elif option == "reject":
+                            # Red bold with filled checkbox
+                            sys.stdout.write("\033[1;31m☑ 거부 (Reject)\033[0m\n")
+                        else:
+                            # Blue bold with filled checkbox for auto-accept
+                            sys.stdout.write("\033[1;34m☑ 이후 모두 자동 승인 (Auto-accept all)\033[0m\n")
+                    elif option == "approve":
+                        # Dim with empty checkbox
+                        sys.stdout.write("\033[2m☐ 승인 (Approve)\033[0m\n")
+                    elif option == "reject":
+                        # Dim with empty checkbox
+                        sys.stdout.write("\033[2m☐ 거부 (Reject)\033[0m\n")
+                    else:
+                        # Dim with empty checkbox
+                        sys.stdout.write("\033[2m☐ 이후 모두 자동 승인 (Auto-accept all)\033[0m\n")
+
+                sys.stdout.flush()
+
+                # Read key
+                char = sys.stdin.read(1)
+
+                if char == "\x1b":  # ESC sequence (arrow keys)
+                    next1 = sys.stdin.read(1)
+                    next2 = sys.stdin.read(1)
+                    if next1 == "[":
+                        if next2 == "B":  # Down arrow
+                            selected = (selected + 1) % len(options)
+                        elif next2 == "A":  # Up arrow
+                            selected = (selected - 1) % len(options)
+                elif char in {"\r", "\n"}:  # Enter
+                    sys.stdout.write("\r\n")  # Move to start of line and add newline
+                    break
+                elif char == "\x03":  # Ctrl+C
+                    sys.stdout.write("\r\n")  # Move to start of line and add newline
+                    raise KeyboardInterrupt
+                elif char.lower() == "a":
+                    selected = 0
+                    sys.stdout.write("\r\n")  # Move to start of line and add newline
+                    break
+                elif char.lower() == "r":
+                    selected = 1
+                    sys.stdout.write("\r\n")  # Move to start of line and add newline
+                    break
+
+        finally:
+            # Show cursor again
+            sys.stdout.write("\033[?25h")
+            sys.stdout.flush()
+            termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
+
+    except (termios.error, AttributeError):
+        # Fallback for non-Unix systems
+        console.print("  ☐ (A)승인 (기본값)")
+        console.print("  ☐ (R)거부")
+        console.print("  ☐ (Auto)이후 모두 자동 승인")
+        choice = input("\n선택 (A/R/Auto, 기본값=Approve): ").strip().lower()
+        if choice in {"r", "reject"}:
+            selected = 1
+        elif choice in {"auto", "auto-accept"}:
+            selected = 2
+        else:
+            selected = 0
+
+    # Return decision based on selection
+    if selected == 0:
+        return ApproveDecision(type="approve")
+    if selected == 1:
+        return RejectDecision(type="reject", message="User rejected the command")
+    # Return special marker for auto-approve mode
+    return {"type": "auto_approve_all"}
+
+
+async def execute_task(
+    user_input: str,
+    agent,
+    assistant_id: str | None,
+    session_state,
+    token_tracker: TokenTracker | None = None,
+    backend=None,
+    image_tracker: ImageTracker | None = None,
+) -> None:
+    """모든 작업을 AI 에이전트에게 직접 전달하여 실행합니다."""
+    # Parse file mentions and inject content if any
+    prompt_text, mentioned_files = parse_file_mentions(user_input)
+
+    if mentioned_files:
+        context_parts = [prompt_text, "\n\n## 참조된 파일 (Referenced Files)\n"]
+        for file_path in mentioned_files:
+            try:
+                content = file_path.read_text()
+                # Limit file content to reasonable size
+                if len(content) > 50000:
+                    content = content[:50000] + "\n... (파일 잘림)"
+                context_parts.append(f"\n### {file_path.name}\nPath: `{file_path}`\n```\n{content}\n```")
+            except Exception as e:
+                context_parts.append(f"\n### {file_path.name}\n[파일 읽기 오류: {e}]")
+
+        final_input = "\n".join(context_parts)
+    else:
+        final_input = prompt_text
+
+    # Include images in the message content
+    images_to_send = []
+    if image_tracker:
+        images_to_send = image_tracker.get_images()
+    if images_to_send:
+        message_content = create_multimodal_content(final_input, images_to_send)
+    else:
+        message_content = final_input
+
+    config = {
+        "configurable": {"thread_id": session_state.thread_id},
+        "metadata": {"assistant_id": assistant_id} if assistant_id else {},
+    }
+
+    has_responded = False
+    captured_input_tokens = 0
+    captured_output_tokens = 0
+    current_todos = None  # Track current todo list state
+
+    status = console.status(f"[bold {COLORS['thinking']}]에이전트가 생각 중...", spinner="dots")
+    status.start()
+    spinner_active = True
+
+    tool_icons = {
+        "read_file": "📖",
+        "write_file": "✏️",
+        "edit_file": "✂️",
+        "ls": "📁",
+        "glob": "🔍",
+        "grep": "🔎",
+        "shell": "⚡",
+        "execute": "🔧",
+        "web_search": "🌐",
+        "http_request": "🌍",
+        "task": "🤖",
+        "write_todos": "📋",
+    }
+
+    file_op_tracker = FileOpTracker(assistant_id=assistant_id, backend=backend)
+
+    # Track which tool calls we've displayed to avoid duplicates
+    displayed_tool_ids = set()
+    # Buffer partial tool-call chunks keyed by streaming index
+    tool_call_buffers: dict[str | int, dict] = {}
+    # Buffer assistant text so we can render complete markdown segments
+    pending_text = ""
+
+    def flush_text_buffer(*, final: bool = False) -> None:
+        """Flush accumulated assistant text as rendered markdown when appropriate."""
+        nonlocal pending_text, spinner_active, has_responded
+        if not final or not pending_text.strip():
+            return
+        if spinner_active:
+            status.stop()
+            spinner_active = False
+        if not has_responded:
+            console.print("●", style=COLORS["agent"], markup=False, end=" ")
+            has_responded = True
+        markdown = Markdown(pending_text.rstrip())
+        console.print(markdown, style=COLORS["agent"])
+        pending_text = ""
+
+    # Clear images from tracker after creating the message
+    # (they've been encoded into the message content)
+    if image_tracker:
+        image_tracker.clear()
+
+    # Stream input - may need to loop if there are interrupts
+    stream_input = {"messages": [{"role": "user", "content": message_content}]}
+
+    try:
+        while True:
+            interrupt_occurred = False
+            hitl_response: dict[str, HITLResponse] = {}
+            suppress_resumed_output = False
+            # Track all pending interrupts: {interrupt_id: request_data}
+            pending_interrupts: dict[str, HITLRequest] = {}
+
+            async for chunk in agent.astream(
+                stream_input,
+                stream_mode=["messages", "updates"],  # Dual-mode for HITL support
+                subgraphs=True,
+                config=config,
+                durability="exit",
+            ):
+                # Unpack chunk - with subgraphs=True and dual-mode, it's (namespace, stream_mode, data)
+                if not isinstance(chunk, tuple) or len(chunk) != 3:
+                    continue
+
+                _namespace, current_stream_mode, data = chunk
+
+                # Handle UPDATES stream - for interrupts and todos
+                if current_stream_mode == "updates":
+                    if not isinstance(data, dict):
+                        continue
+
+                    # Check for interrupts - collect ALL pending interrupts
+                    if "__interrupt__" in data:
+                        interrupts: list[Interrupt] = data["__interrupt__"]
+                        if interrupts:
+                            for interrupt_obj in interrupts:
+                                # Interrupt has required fields: value (HITLRequest) and id (str)
+                                # Validate the HITLRequest using TypeAdapter
+                                try:
+                                    validated_request = _HITL_REQUEST_ADAPTER.validate_python(interrupt_obj.value)
+                                    pending_interrupts[interrupt_obj.id] = validated_request
+                                    interrupt_occurred = True
+                                except ValidationError as e:
+                                    console.print(
+                                        f"[yellow]경고: 유효하지 않은 HITL 요청 데이터: {e}[/yellow]",
+                                        style="dim",
+                                    )
+                                    raise
+
+                    # Extract chunk_data from updates for todo checking
+                    chunk_data = next(iter(data.values())) if data else None
+                    if chunk_data and isinstance(chunk_data, dict):
+                        # Check for todo updates
+                        if "todos" in chunk_data:
+                            new_todos = chunk_data["todos"]
+                            if new_todos != current_todos:
+                                current_todos = new_todos
+                                # Stop spinner before rendering todos
+                                if spinner_active:
+                                    status.stop()
+                                    spinner_active = False
+                                console.print()
+                                render_todo_list(new_todos)
+                                console.print()
+
+                # Handle MESSAGES stream - for content and tool calls
+                elif current_stream_mode == "messages":
+                    # Messages stream returns (message, metadata) tuples
+                    if not isinstance(data, tuple) or len(data) != 2:
+                        continue
+
+                    message, _metadata = data
+
+                    if isinstance(message, HumanMessage):
+                        content = message.text
+                        if content:
+                            flush_text_buffer(final=True)
+                            if spinner_active:
+                                status.stop()
+                                spinner_active = False
+                            if not has_responded:
+                                console.print("●", style=COLORS["agent"], markup=False, end=" ")
+                                has_responded = True
+                            markdown = Markdown(content)
+                            console.print(markdown, style=COLORS["agent"])
+                            console.print()
+                        continue
+
+                    if isinstance(message, ToolMessage):
+                        # Tool results are sent to the agent, not displayed to users
+                        # Exception: show shell command errors to help with debugging
+                        tool_name = getattr(message, "name", "")
+                        tool_status = getattr(message, "status", "success")
+                        tool_content = format_tool_message_content(message.content)
+                        record = file_op_tracker.complete_with_message(message)
+
+                        # Reset spinner message after tool completes
+                        if spinner_active:
+                            status.update(f"[bold {COLORS['thinking']}]에이전트가 생각 중...")
+
+                        if tool_name == "shell" and tool_status != "success":
+                            flush_text_buffer(final=True)
+                            if tool_content:
+                                if spinner_active:
+                                    status.stop()
+                                    spinner_active = False
+                                console.print()
+                                console.print(tool_content, style="red", markup=False)
+                                console.print()
+                        elif tool_content and isinstance(tool_content, str):
+                            stripped = tool_content.lstrip()
+                            if stripped.lower().startswith("error"):
+                                flush_text_buffer(final=True)
+                                if spinner_active:
+                                    status.stop()
+                                    spinner_active = False
+                                console.print()
+                                console.print(tool_content, style="red", markup=False)
+                                console.print()
+
+                        if record:
+                            flush_text_buffer(final=True)
+                            if spinner_active:
+                                status.stop()
+                                spinner_active = False
+                            console.print()
+                            render_file_operation(record)
+                            console.print()
+                            if not spinner_active:
+                                status.start()
+                                spinner_active = True
+
+                        # For all other tools (web_search, http_request, etc.),
+                        # results are hidden from user - agent will process and respond
+                        continue
+
+                    # Check if this is an AIMessageChunk
+                    if not hasattr(message, "content_blocks"):
+                        # Fallback for messages without content_blocks
+                        continue
+
+                    # Extract token usage if available
+                    if token_tracker and hasattr(message, "usage_metadata"):
+                        usage = message.usage_metadata
+                        if usage:
+                            input_toks = usage.get("input_tokens", 0)
+                            output_toks = usage.get("output_tokens", 0)
+                            if input_toks or output_toks:
+                                captured_input_tokens = max(captured_input_tokens, input_toks)
+                                captured_output_tokens = max(captured_output_tokens, output_toks)
+
+                    # Process content blocks (this is the key fix!)
+                    for block in message.content_blocks:
+                        block_type = block.get("type")
+
+                        # Handle text blocks
+                        if block_type == "text":
+                            text = block.get("text", "")
+                            if text:
+                                pending_text += text
+
+                        # Handle reasoning blocks
+                        elif block_type == "reasoning":
+                            flush_text_buffer(final=True)
+                            reasoning = block.get("reasoning", "")
+                            if reasoning and spinner_active:
+                                status.stop()
+                                spinner_active = False
+                                # Could display reasoning differently if desired
+                                # For now, skip it or handle minimally
+
+                        # Handle tool call chunks
+                        # Some models (OpenAI, Anthropic) stream tool_call_chunks
+                        # Others (Gemini) don't stream them and just return the full tool_call
+                        elif block_type in ("tool_call_chunk", "tool_call"):
+                            chunk_name = block.get("name")
+                            chunk_args = block.get("args")
+                            chunk_id = block.get("id")
+                            chunk_index = block.get("index")
+
+                            # Use index as stable buffer key; fall back to id if needed
+                            buffer_key: str | int
+                            if chunk_index is not None:
+                                buffer_key = chunk_index
+                            elif chunk_id is not None:
+                                buffer_key = chunk_id
+                            else:
+                                buffer_key = f"unknown-{len(tool_call_buffers)}"
+
+                            buffer = tool_call_buffers.setdefault(
+                                buffer_key,
+                                {"name": None, "id": None, "args": None, "args_parts": []},
+                            )
+
+                            if chunk_name:
+                                buffer["name"] = chunk_name
+                            if chunk_id:
+                                buffer["id"] = chunk_id
+
+                            if isinstance(chunk_args, dict):
+                                buffer["args"] = chunk_args
+                                buffer["args_parts"] = []
+                            elif isinstance(chunk_args, str):
+                                if chunk_args:
+                                    parts: list[str] = buffer.setdefault("args_parts", [])
+                                    if not parts or chunk_args != parts[-1]:
+                                        parts.append(chunk_args)
+                                    buffer["args"] = "".join(parts)
+                            elif chunk_args is not None:
+                                buffer["args"] = chunk_args
+
+                            buffer_name = buffer.get("name")
+                            buffer_id = buffer.get("id")
+                            if buffer_name is None:
+                                continue
+
+                            parsed_args = buffer.get("args")
+                            if isinstance(parsed_args, str):
+                                if not parsed_args:
+                                    continue
+                                try:
+                                    parsed_args = json.loads(parsed_args)
+                                except json.JSONDecodeError:
+                                    # Wait for more chunks to form valid JSON
+                                    continue
+                            elif parsed_args is None:
+                                continue
+
+                            # Ensure args are in dict form for formatter
+                            if not isinstance(parsed_args, dict):
+                                parsed_args = {"value": parsed_args}
+
+                            flush_text_buffer(final=True)
+                            if buffer_id is not None:
+                                if buffer_id not in displayed_tool_ids:
+                                    displayed_tool_ids.add(buffer_id)
+                                    file_op_tracker.start_operation(buffer_name, parsed_args, buffer_id)
+                                else:
+                                    file_op_tracker.update_args(buffer_id, parsed_args)
+                            tool_call_buffers.pop(buffer_key, None)
+                            icon = tool_icons.get(buffer_name, "🔧")
+
+                            if spinner_active:
+                                status.stop()
+
+                            if has_responded:
+                                console.print()
+
+                            display_str = format_tool_display(buffer_name, parsed_args)
+                            console.print(
+                                f"  {icon} {display_str}",
+                                style=f"dim {COLORS['tool']}",
+                                markup=False,
+                            )
+
+                            # Restart spinner with context about which tool is executing
+                            status.update(f"[bold {COLORS['thinking']}]{display_str} 실행 중...")
+                            status.start()
+                            spinner_active = True
+
+                    if getattr(message, "chunk_position", None) == "last":
+                        flush_text_buffer(final=True)
+
+            # After streaming loop - handle interrupt if it occurred
+            flush_text_buffer(final=True)
+
+            # Handle human-in-the-loop after stream completes
+            if interrupt_occurred:
+                any_rejected = False
+
+                for interrupt_id, hitl_request in pending_interrupts.items():
+                    # Check if auto-approve is enabled
+                    if session_state.auto_approve:
+                        # Auto-approve all commands without prompting
+                        decisions = []
+                        for action_request in hitl_request["action_requests"]:
+                            # Show what's being auto-approved (brief, dim message)
+                            if spinner_active:
+                                status.stop()
+                                spinner_active = False
+
+                            description = action_request.get("description", "tool action")
+                            console.print()
+                            console.print(f"  [dim]⚡ {description}[/dim]")
+
+                            decisions.append({"type": "approve"})
+
+                        hitl_response[interrupt_id] = {"decisions": decisions}
+
+                        # Restart spinner for continuation
+                        if not spinner_active:
+                            status.start()
+                            spinner_active = True
+                    else:
+                        # Normal HITL flow - stop spinner and prompt user
+                        if spinner_active:
+                            status.stop()
+                            spinner_active = False
+
+                        # Handle human-in-the-loop approval
+                        decisions = []
+                        for action_index, action_request in enumerate(hitl_request["action_requests"]):
+                            decision = prompt_for_tool_approval(
+                                action_request,
+                                assistant_id,
+                            )
+
+                            # Check if user wants to switch to auto-approve mode
+                            if isinstance(decision, dict) and decision.get("type") == "auto_approve_all":
+                                # Switch to auto-approve mode
+                                session_state.auto_approve = True
+                                console.print()
+                                console.print("[bold blue]✓ 자동 승인 모드 활성화됨[/bold blue]")
+                                console.print("[dim]향후 모든 도구 작업이 자동으로 승인됩니다.[/dim]")
+                                console.print()
+
+                                # Approve this action and all remaining actions in the batch
+                                decisions.append({"type": "approve"})
+                                for _remaining_action in hitl_request["action_requests"][action_index + 1 :]:
+                                    decisions.append({"type": "approve"})
+                                break
+                            decisions.append(decision)
+
+                            # Mark file operations as HIL-approved if user approved
+                            if decision.get("type") == "approve":
+                                tool_name = action_request.get("name")
+                                if tool_name in {"write_file", "edit_file"}:
+                                    file_op_tracker.mark_hitl_approved(tool_name, action_request.get("args", {}))
+
+                        if any(decision.get("type") == "reject" for decision in decisions):
+                            any_rejected = True
+
+                        hitl_response[interrupt_id] = {"decisions": decisions}
+
+                suppress_resumed_output = any_rejected
+
+            if interrupt_occurred and hitl_response:
+                if suppress_resumed_output:
+                    if spinner_active:
+                        status.stop()
+                        spinner_active = False
+
+                    console.print("[yellow]명령이 거부되었습니다.[/yellow]", style="bold")
+                    console.print("에이전트에게 다르게 수행할 작업을 알려주세요.")
+                    console.print()
+                    return
+
+                # Resume the agent with the human decision
+                stream_input = Command(resume=hitl_response)
+                # Continue the while loop to restream
+            else:
+                # No interrupt, break out of while loop
+                break
+
+    except asyncio.CancelledError:
+        # Event loop cancelled the task (e.g. Ctrl+C during streaming) - clean up and return
+        if spinner_active:
+            status.stop()
+        console.print("\n[yellow]사용자에 의해 중단됨[/yellow]")
+        console.print("에이전트 상태 업데이트 중...", style="dim")
+
+        try:
+            await agent.aupdate_state(
+                config=config,
+                values={"messages": [HumanMessage(content="[이전 요청이 시스템에 의해 취소되었습니다]")]},
+            )
+            console.print("다음 명령 준비 완료.\n", style="dim")
+        except Exception as e:
+            console.print(f"[red]경고: 에이전트 상태 업데이트 실패: {e}[/red]\n")
+
+        return
+
+    except KeyboardInterrupt:
+        # User pressed Ctrl+C - clean up and exit gracefully
+        if spinner_active:
+            status.stop()
+        console.print("\n[yellow]사용자에 의해 중단됨[/yellow]")
+        console.print("에이전트 상태 업데이트 중...", style="dim")
+
+        # Inform the agent synchronously (in async context)
+        try:
+            await agent.aupdate_state(
+                config=config,
+                values={"messages": [HumanMessage(content="[사용자가 Ctrl+C로 이전 요청을 중단했습니다]")]},
+            )
+            console.print("다음 명령 준비 완료.\n", style="dim")
+        except Exception as e:
+            console.print(f"[red]경고: 에이전트 상태 업데이트 실패: {e}[/red]\n")
+
+        return
+
+    if spinner_active:
+        status.stop()
+
+    if has_responded:
+        console.print()
+        # Track token usage (display only via /tokens command)
+        if token_tracker and (captured_input_tokens or captured_output_tokens):
+            token_tracker.add(captured_input_tokens, captured_output_tokens)
--- a/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/file_ops.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/file_ops.py
@@ -0,0 +1,408 @@
+"""CLI 표시를 위한 파일 작업 추적 및 diff 계산 도움말."""
+
+from __future__ import annotations
+
+import difflib
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Literal
+
+from deepagents.backends.utils import perform_string_replacement
+
+from deepagents_cli.config import settings
+
+if TYPE_CHECKING:
+    from deepagents.backends.protocol import BACKEND_TYPES
+
+FileOpStatus = Literal["pending", "success", "error"]
+
+
+@dataclass
+class ApprovalPreview:
+    """HITL 미리보기를 렌더링하는 데 사용되는 데이터."""
+
+    title: str
+    details: list[str]
+    diff: str | None = None
+    diff_title: str | None = None
+    error: str | None = None
+
+
+def _safe_read(path: Path) -> str | None:
+    """파일 내용을 읽고, 실패 시 None을 반환합니다."""
+    try:
+        return path.read_text()
+    except (OSError, UnicodeDecodeError):
+        return None
+
+
+def _count_lines(text: str) -> int:
+    """빈 문자열을 0줄로 취급하여 텍스트의 줄 수를 셉니다."""
+    if not text:
+        return 0
+    return len(text.splitlines())
+
+
+def compute_unified_diff(
+    before: str,
+    after: str,
+    display_path: str,
+    *,
+    max_lines: int | None = 800,
+    context_lines: int = 3,
+) -> str | None:
+    """이전 내용과 이후 내용 간의 통합 diff를 계산합니다.
+
+    Args:
+        before: 원본 내용
+        after: 새로운 내용
+        display_path: diff 헤더에 표시할 경로
+        max_lines: 최대 diff 줄 수 (제한 없으면 None)
+        context_lines: 변경 사항 주변의 컨텍스트 줄 수 (기본값 3)
+
+    Returns:
+        통합 diff 문자열 또는 변경 사항이 없는 경우 None
+    """
+    before_lines = before.splitlines()
+    after_lines = after.splitlines()
+    diff_lines = list(
+        difflib.unified_diff(
+            before_lines,
+            after_lines,
+            fromfile=f"{display_path} (before)",
+            tofile=f"{display_path} (after)",
+            lineterm="",
+            n=context_lines,
+        )
+    )
+    if not diff_lines:
+        return None
+    if max_lines is not None and len(diff_lines) > max_lines:
+        truncated = diff_lines[: max_lines - 1]
+        truncated.append("...")
+        return "\n".join(truncated)
+    return "\n".join(diff_lines)
+
+
+@dataclass
+class FileOpMetrics:
+    """파일 작업에 대한 줄 및 바이트 수준 메트릭."""
+
+    lines_read: int = 0
+    start_line: int | None = None
+    end_line: int | None = None
+    lines_written: int = 0
+    lines_added: int = 0
+    lines_removed: int = 0
+    bytes_written: int = 0
+
+
+@dataclass
+class FileOperationRecord:
+    """단일 파일시스템 도구 호출을 추적합니다."""
+
+    tool_name: str
+    display_path: str
+    physical_path: Path | None
+    tool_call_id: str | None
+    args: dict[str, Any] = field(default_factory=dict)
+    status: FileOpStatus = "pending"
+    error: str | None = None
+    metrics: FileOpMetrics = field(default_factory=FileOpMetrics)
+    diff: str | None = None
+    before_content: str | None = None
+    after_content: str | None = None
+    read_output: str | None = None
+    hitl_approved: bool = False
+
+
+def resolve_physical_path(path_str: str | None, assistant_id: str | None) -> Path | None:
+    """가상/상대 경로를 실제 파일시스템 경로로 변환합니다."""
+    if not path_str:
+        return None
+    try:
+        if assistant_id and path_str.startswith("/memories/"):
+            agent_dir = settings.get_agent_dir(assistant_id)
+            suffix = path_str.removeprefix("/memories/").lstrip("/")
+            return (agent_dir / suffix).resolve()
+        path = Path(path_str)
+        if path.is_absolute():
+            return path
+        return (Path.cwd() / path).resolve()
+    except (OSError, ValueError):
+        return None
+
+
+def format_display_path(path_str: str | None) -> str:
+    """표시용으로 경로를 포맷합니다."""
+    if not path_str:
+        return "(알 수 없음)"
+    try:
+        path = Path(path_str)
+        if path.is_absolute():
+            return path.name or str(path)
+        return str(path)
+    except (OSError, ValueError):
+        return str(path_str)
+
+
+def build_approval_preview(
+    tool_name: str,
+    args: dict[str, Any],
+    assistant_id: str | None,
+) -> ApprovalPreview | None:
+    """HITL 승인을 위한 요약 정보 및 diff를 수집합니다."""
+    path_str = str(args.get("file_path") or args.get("path") or "")
+    display_path = format_display_path(path_str)
+    physical_path = resolve_physical_path(path_str, assistant_id)
+
+    if tool_name == "write_file":
+        content = str(args.get("content", ""))
+        before = _safe_read(physical_path) if physical_path and physical_path.exists() else ""
+        after = content
+        diff = compute_unified_diff(before or "", after, display_path, max_lines=100)
+        additions = 0
+        if diff:
+            additions = sum(1 for line in diff.splitlines() if line.startswith("+") and not line.startswith("+++"))
+        total_lines = _count_lines(after)
+        details = [
+            f"파일: {path_str}",
+            "작업: 새 파일 생성" + (" (기존 내용 덮어씀)" if before else ""),
+            f"작성할 줄 수: {additions or total_lines}",
+        ]
+        return ApprovalPreview(
+            title=f"{display_path} 쓰기",
+            details=details,
+            diff=diff,
+            diff_title=f"{display_path} 차이(Diff)",
+        )
+
+    if tool_name == "edit_file":
+        if physical_path is None:
+            return ApprovalPreview(
+                title=f"{display_path} 업데이트",
+                details=[f"파일: {path_str}", "작업: 텍스트 교체"],
+                error="파일 경로를 확인할 수 없습니다.",
+            )
+        before = _safe_read(physical_path)
+        if before is None:
+            return ApprovalPreview(
+                title=f"{display_path} 업데이트",
+                details=[f"파일: {path_str}", "작업: 텍스트 교체"],
+                error="현재 파일 내용을 읽을 수 없습니다.",
+            )
+        old_string = str(args.get("old_string", ""))
+        new_string = str(args.get("new_string", ""))
+        replace_all = bool(args.get("replace_all", False))
+        replacement = perform_string_replacement(before, old_string, new_string, replace_all)
+        if isinstance(replacement, str):
+            return ApprovalPreview(
+                title=f"{display_path} 업데이트",
+                details=[f"파일: {path_str}", "작업: 텍스트 교체"],
+                error=replacement,
+            )
+        after, occurrences = replacement
+        diff = compute_unified_diff(before, after, display_path, max_lines=None)
+        additions = 0
+        deletions = 0
+        if diff:
+            additions = sum(1 for line in diff.splitlines() if line.startswith("+") and not line.startswith("+++"))
+            deletions = sum(1 for line in diff.splitlines() if line.startswith("-") and not line.startswith("---"))
+        details = [
+            f"파일: {path_str}",
+            f"작업: 텍스트 교체 ({'모든 발생' if replace_all else '단일 발생'})",
+            f"일치하는 발생: {occurrences}",
+            f"변경된 줄: +{additions} / -{deletions}",
+        ]
+        return ApprovalPreview(
+            title=f"{display_path} 업데이트",
+            details=details,
+            diff=diff,
+            diff_title=f"{display_path} 차이(Diff)",
+        )
+
+    return None
+
+
+class FileOpTracker:
+    """CLI 상호작용 중 파일 작업 메트릭을 수집합니다."""
+
+    def __init__(self, *, assistant_id: str | None, backend: BACKEND_TYPES | None = None) -> None:
+        """추적기를 초기화합니다."""
+        self.assistant_id = assistant_id
+        self.backend = backend
+        self.active: dict[str | None, FileOperationRecord] = {}
+        self.completed: list[FileOperationRecord] = []
+
+    def start_operation(self, tool_name: str, args: dict[str, Any], tool_call_id: str | None) -> None:
+        if tool_name not in {"read_file", "write_file", "edit_file"}:
+            return
+        path_str = str(args.get("file_path") or args.get("path") or "")
+        display_path = format_display_path(path_str)
+        record = FileOperationRecord(
+            tool_name=tool_name,
+            display_path=display_path,
+            physical_path=resolve_physical_path(path_str, self.assistant_id),
+            tool_call_id=tool_call_id,
+            args=args,
+        )
+        if tool_name in {"write_file", "edit_file"}:
+            if self.backend and path_str:
+                try:
+                    responses = self.backend.download_files([path_str])
+                    if responses and responses[0].content is not None and responses[0].error is None:
+                        record.before_content = responses[0].content.decode("utf-8")
+                    else:
+                        record.before_content = ""
+                except Exception:
+                    record.before_content = ""
+            elif record.physical_path:
+                record.before_content = _safe_read(record.physical_path) or ""
+        self.active[tool_call_id] = record
+
+    def update_args(self, tool_call_id: str, args: dict[str, Any]) -> None:
+        """활성 작업의 인수를 업데이트하고 before_content 캡처를 다시 시도합니다."""
+        record = self.active.get(tool_call_id)
+        if not record:
+            return
+
+        record.args.update(args)
+
+        # If we haven't captured before_content yet, try again now that we might have the path
+        if record.before_content is None and record.tool_name in {"write_file", "edit_file"}:
+            path_str = str(record.args.get("file_path") or record.args.get("path") or "")
+            if path_str:
+                record.display_path = format_display_path(path_str)
+                record.physical_path = resolve_physical_path(path_str, self.assistant_id)
+                if self.backend:
+                    try:
+                        responses = self.backend.download_files([path_str])
+                        if responses and responses[0].content is not None and responses[0].error is None:
+                            record.before_content = responses[0].content.decode("utf-8")
+                        else:
+                            record.before_content = ""
+                    except Exception:
+                        record.before_content = ""
+                elif record.physical_path:
+                    record.before_content = _safe_read(record.physical_path) or ""
+
+    def complete_with_message(self, tool_message: Any) -> FileOperationRecord | None:
+        tool_call_id = getattr(tool_message, "tool_call_id", None)
+        record = self.active.get(tool_call_id)
+        if record is None:
+            return None
+
+        content = tool_message.content
+        if isinstance(content, list):
+            # Some tool messages may return list segments; join them for analysis.
+            joined = []
+            for item in content:
+                if isinstance(item, str):
+                    joined.append(item)
+                else:
+                    joined.append(str(item))
+            content_text = "\n".join(joined)
+        else:
+            content_text = str(content) if content is not None else ""
+
+        if getattr(tool_message, "status", "success") != "success" or content_text.lower().startswith("error"):
+            record.status = "error"
+            record.error = content_text
+            self._finalize(record)
+            return record
+
+        record.status = "success"
+
+        if record.tool_name == "read_file":
+            record.read_output = content_text
+            lines = _count_lines(content_text)
+            record.metrics.lines_read = lines
+            offset = record.args.get("offset")
+            limit = record.args.get("limit")
+            if isinstance(offset, int):
+                if offset > lines:
+                    offset = 0
+                record.metrics.start_line = offset + 1
+                if lines:
+                    record.metrics.end_line = offset + lines
+            elif lines:
+                record.metrics.start_line = 1
+                record.metrics.end_line = lines
+            if isinstance(limit, int) and lines > limit:
+                record.metrics.end_line = (record.metrics.start_line or 1) + limit - 1
+        else:
+            # For write/edit operations, read back from backend (or local filesystem)
+            self._populate_after_content(record)
+            if record.after_content is None:
+                record.status = "error"
+                record.error = "업데이트된 파일 내용을 읽을 수 없습니다."
+                self._finalize(record)
+                return record
+            record.metrics.lines_written = _count_lines(record.after_content)
+            before_lines = _count_lines(record.before_content or "")
+            diff = compute_unified_diff(
+                record.before_content or "",
+                record.after_content,
+                record.display_path,
+                max_lines=100,
+            )
+            record.diff = diff
+            if diff:
+                additions = sum(1 for line in diff.splitlines() if line.startswith("+") and not line.startswith("+++"))
+                deletions = sum(1 for line in diff.splitlines() if line.startswith("-") and not line.startswith("---"))
+                record.metrics.lines_added = additions
+                record.metrics.lines_removed = deletions
+            elif record.tool_name == "write_file" and (record.before_content or "") == "":
+                record.metrics.lines_added = record.metrics.lines_written
+            record.metrics.bytes_written = len(record.after_content.encode("utf-8"))
+            if record.diff is None and (record.before_content or "") != record.after_content:
+                record.diff = compute_unified_diff(
+                    record.before_content or "",
+                    record.after_content,
+                    record.display_path,
+                    max_lines=100,
+                )
+            if record.diff is None and before_lines != record.metrics.lines_written:
+                record.metrics.lines_added = max(record.metrics.lines_written - before_lines, 0)
+
+        self._finalize(record)
+        return record
+
+    def mark_hitl_approved(self, tool_name: str, args: dict[str, Any]) -> None:
+        """tool_name 및 file_path와 일치하는 작업을 HIL 승인됨으로 표시합니다."""
+        file_path = args.get("file_path") or args.get("path")
+        if not file_path:
+            return
+
+        # Mark all active records that match
+        for record in self.active.values():
+            if record.tool_name == tool_name:
+                record_path = record.args.get("file_path") or record.args.get("path")
+                if record_path == file_path:
+                    record.hitl_approved = True
+
+    def _populate_after_content(self, record: FileOperationRecord) -> None:
+        # Use backend if available (works for any BackendProtocol implementation)
+        if self.backend:
+            try:
+                file_path = record.args.get("file_path") or record.args.get("path")
+                if file_path:
+                    responses = self.backend.download_files([file_path])
+                    if responses and responses[0].content is not None and responses[0].error is None:
+                        record.after_content = responses[0].content.decode("utf-8")
+                    else:
+                        record.after_content = None
+                else:
+                    record.after_content = None
+            except Exception:
+                record.after_content = None
+        else:
+            # Fallback: direct filesystem read when no backend provided
+            if record.physical_path is None:
+                record.after_content = None
+                return
+            record.after_content = _safe_read(record.physical_path)
+
+    def _finalize(self, record: FileOperationRecord) -> None:
+        self.completed.append(record)
+        self.active.pop(record.tool_call_id, None)
--- a/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/image_utils.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/image_utils.py
@@ -0,0 +1,209 @@
+"""Utilities for handling image paste from clipboard."""
+
+import base64
+import io
+import os
+import subprocess
+import sys
+import tempfile
+from dataclasses import dataclass
+
+from PIL import Image
+
+
+@dataclass
+class ImageData:
+    """Represents a pasted image with its base64 encoding."""
+
+    base64_data: str
+    format: str  # "png", "jpeg", etc.
+    placeholder: str  # Display text like "[image 1]"
+
+    def to_message_content(self) -> dict:
+        """Convert to LangChain message content format.
+
+        Returns:
+            Dict with type and image_url for multimodal messages
+        """
+        return {
+            "type": "image_url",
+            "image_url": {"url": f"data:image/{self.format};base64,{self.base64_data}"},
+        }
+
+
+def get_clipboard_image() -> ImageData | None:
+    """Attempt to read an image from the system clipboard.
+
+    Supports macOS via `pngpaste` or `osascript`.
+
+    Returns:
+        ImageData if an image is found, None otherwise
+    """
+    if sys.platform == "darwin":
+        return _get_macos_clipboard_image()
+    # Linux/Windows support could be added here
+    return None
+
+
+def _get_macos_clipboard_image() -> ImageData | None:
+    """Get clipboard image on macOS using pngpaste or osascript.
+
+    First tries pngpaste (faster if installed), then falls back to osascript.
+
+    Returns:
+        ImageData if an image is found, None otherwise
+    """
+    # Try pngpaste first (fast if installed)
+    try:
+        result = subprocess.run(
+            ["pngpaste", "-"],
+            capture_output=True,
+            check=False,
+            timeout=2,
+        )
+        if result.returncode == 0 and result.stdout:
+            # Successfully got PNG data
+            try:
+                Image.open(io.BytesIO(result.stdout))  # Validate it's a real image
+                base64_data = base64.b64encode(result.stdout).decode("utf-8")
+                return ImageData(
+                    base64_data=base64_data,
+                    format="png",  # 'pngpaste -' always outputs PNG
+                    placeholder="[image]",
+                )
+            except Exception:
+                pass  # Invalid image data
+    except (FileNotFoundError, subprocess.TimeoutExpired):
+        pass  # pngpaste not installed or timed out
+
+    # Fallback to osascript with temp file (built-in but slower)
+    return _get_clipboard_via_osascript()
+
+
+def _get_clipboard_via_osascript() -> ImageData | None:
+    """Get clipboard image via osascript using a temp file.
+
+    osascript outputs data in a special format that can't be captured as raw binary,
+    so we write to a temp file instead.
+
+    Returns:
+        ImageData if an image is found, None otherwise
+    """
+    # Create a temp file for the image
+    fd, temp_path = tempfile.mkstemp(suffix=".png")
+    os.close(fd)
+
+    try:
+        # First check if clipboard has PNG data
+        check_result = subprocess.run(
+            ["osascript", "-e", "clipboard info"],
+            capture_output=True,
+            check=False,
+            timeout=2,
+            text=True,
+        )
+
+        if check_result.returncode != 0:
+            return None
+
+        # Check for PNG or TIFF in clipboard info
+        clipboard_info = check_result.stdout.lower()
+        if "pngf" not in clipboard_info and "tiff" not in clipboard_info:
+            return None
+
+        # Try to get PNG first, fall back to TIFF
+        if "pngf" in clipboard_info:
+            get_script = f"""
+            set pngData to the clipboard as «class PNGf»
+            set theFile to open for access POSIX file "{temp_path}" with write permission
+            write pngData to theFile
+            close access theFile
+            return "success"
+            """
+        else:
+            get_script = f"""
+            set tiffData to the clipboard as TIFF picture
+            set theFile to open for access POSIX file "{temp_path}" with write permission
+            write tiffData to theFile
+            close access theFile
+            return "success"
+            """
+
+        result = subprocess.run(
+            ["osascript", "-e", get_script],
+            capture_output=True,
+            check=False,
+            timeout=3,
+            text=True,
+        )
+
+        if result.returncode != 0 or "success" not in result.stdout:
+            return None
+
+        # Check if file was created and has content
+        if not os.path.exists(temp_path) or os.path.getsize(temp_path) == 0:
+            return None
+
+        # Read and validate the image
+        with open(temp_path, "rb") as f:
+            image_data = f.read()
+
+        try:
+            image = Image.open(io.BytesIO(image_data))
+            # Convert to PNG if it's not already (e.g., if we got TIFF)
+            buffer = io.BytesIO()
+            image.save(buffer, format="PNG")
+            buffer.seek(0)
+            base64_data = base64.b64encode(buffer.getvalue()).decode("utf-8")
+
+            return ImageData(
+                base64_data=base64_data,
+                format="png",
+                placeholder="[image]",
+            )
+        except Exception:
+            return None
+
+    except (subprocess.TimeoutExpired, OSError):
+        return None
+    finally:
+        # Clean up temp file
+        try:
+            os.unlink(temp_path)
+        except OSError:
+            pass
+
+
+def encode_image_to_base64(image_bytes: bytes) -> str:
+    """Encode image bytes to base64 string.
+
+    Args:
+        image_bytes: Raw image bytes
+
+    Returns:
+        Base64-encoded string
+    """
+    return base64.b64encode(image_bytes).decode("utf-8")
+
+
+def create_multimodal_content(text: str, images: list[ImageData]) -> list[dict]:
+    """Create multimodal message content with text and images.
+
+    Args:
+        text: Text content of the message
+        images: List of ImageData objects
+
+    Returns:
+        List of content blocks in LangChain format
+    """
+    content_blocks = []
+
+    # Add text block
+    if text.strip():
+        content_blocks.append({"type": "text", "text": text})
+
+    # Add image blocks
+    for image in images:
+        content_blocks.append(image.to_message_content())
+
+    return content_blocks
--- a/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/input.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/input.py
@@ -0,0 +1,420 @@
+"""CLI를 위한 입력 처리, 완성 및 프롬프트 세션."""
+
+import asyncio
+import os
+import re
+import time
+from collections.abc import Callable
+from pathlib import Path
+
+from prompt_toolkit import PromptSession
+from prompt_toolkit.completion import (
+    Completer,
+    Completion,
+    PathCompleter,
+    merge_completers,
+)
+from prompt_toolkit.document import Document
+from prompt_toolkit.enums import EditingMode
+from prompt_toolkit.formatted_text import HTML
+from prompt_toolkit.key_binding import KeyBindings
+
+from .config import COLORS, COMMANDS, SessionState, console
+from .image_utils import ImageData, get_clipboard_image
+
+# Regex patterns for context-aware completion
+AT_MENTION_RE = re.compile(r"@(?P<path>(?:[^\s@]|(?<=\\)\s)*)$")
+SLASH_COMMAND_RE = re.compile(r"^/(?P<command>[a-z]*)$")
+
+EXIT_CONFIRM_WINDOW = 3.0
+
+
+class ImageTracker:
+    """현재 대화에서 붙여넣은 이미지를 추적합니다."""
+
+    def __init__(self) -> None:
+        self.images: list[ImageData] = []
+        self.next_id = 1
+
+    def add_image(self, image_data: ImageData) -> str:
+        """이미지를 추가하고 해당 자리 표시자 텍스트를 반환합니다.
+
+        Args:
+            image_data: 추적할 이미지 데이터
+
+        Returns:
+            "[image 1]"과 같은 자리 표시자 문자열
+        """
+        placeholder = f"[image {self.next_id}]"
+        image_data.placeholder = placeholder
+        self.images.append(image_data)
+        self.next_id += 1
+        return placeholder
+
+    def get_images(self) -> list[ImageData]:
+        """추적된 모든 이미지를 가져옵니다."""
+        return self.images.copy()
+
+    def clear(self) -> None:
+        """추적된 모든 이미지를 지우고 카운터를 재설정합니다."""
+        self.images.clear()
+        self.next_id = 1
+
+
+class FilePathCompleter(Completer):
+    """커서가 '@' 뒤에 있을 때만 파일시스템 완성을 활성화합니다."""
+
+    def __init__(self) -> None:
+        self.path_completer = PathCompleter(
+            expanduser=True,
+            min_input_len=0,
+            only_directories=False,
+        )
+
+    def get_completions(self, document, complete_event):
+        """@가 감지되면 파일 경로 완성을 가져옵니다."""
+        text = document.text_before_cursor
+
+        # Use regex to detect @path pattern at end of line
+        m = AT_MENTION_RE.search(text)
+        if not m:
+            return  # Not in an @path context
+
+        path_fragment = m.group("path")
+
+        # Unescape the path for PathCompleter (it doesn't understand escape sequences)
+        unescaped_fragment = path_fragment.replace("\\ ", " ")
+
+        # Strip trailing backslash if present (user is in the process of typing an escape)
+        unescaped_fragment = unescaped_fragment.removesuffix("\\")
+
+        # Create temporary document for the unescaped path fragment
+        temp_doc = Document(text=unescaped_fragment, cursor_position=len(unescaped_fragment))
+
+        # Get completions from PathCompleter and use its start_position
+        # PathCompleter returns suffix text with start_position=0 (insert at cursor)
+        for comp in self.path_completer.get_completions(temp_doc, complete_event):
+            # Add trailing / for directories so users can continue navigating
+            completed_path = Path(unescaped_fragment + comp.text).expanduser()
+            # Re-escape spaces in the completion text for the command line
+            completion_text = comp.text.replace(" ", "\\ ")
+            if completed_path.is_dir() and not completion_text.endswith("/"):
+                completion_text += "/"
+
+            yield Completion(
+                text=completion_text,
+                start_position=comp.start_position,  # Use PathCompleter's position (usually 0)
+                display=comp.display,
+                display_meta=comp.display_meta,
+            )
+
+
+class CommandCompleter(Completer):
+    """줄이 '/'로 시작할 때만 명령 완성을 활성화합니다."""
+
+    def get_completions(self, document, _complete_event):
+        """/가 시작 부분에 있을 때 명령 완성을 가져옵니다."""
+        text = document.text_before_cursor
+
+        # Use regex to detect /command pattern at start of line
+        m = SLASH_COMMAND_RE.match(text)
+        if not m:
+            return  # Not in a /command context
+
+        command_fragment = m.group("command")
+
+        # Match commands that start with the fragment (case-insensitive)
+        for cmd_name, cmd_desc in COMMANDS.items():
+            if cmd_name.startswith(command_fragment.lower()):
+                yield Completion(
+                    text=cmd_name,
+                    start_position=-len(command_fragment),  # Fixed position for original document
+                    display=cmd_name,
+                    display_meta=cmd_desc,
+                )
+
+
+def parse_file_mentions(text: str) -> tuple[str, list[Path]]:
+    """@file 멘션을 추출하고 해결된 파일 경로가 포함된 정리된 텍스트를 반환합니다."""
+    pattern = r"@((?:[^\s@]|(?<=\\)\s)+)"  # Match @filename, allowing escaped spaces
+    matches = re.findall(pattern, text)
+
+    files = []
+    for match in matches:
+        # Remove escape characters
+        clean_path = match.replace("\\ ", " ")
+        path = Path(clean_path).expanduser()
+
+        # Try to resolve relative to cwd
+        if not path.is_absolute():
+            path = Path.cwd() / path
+
+        try:
+            path = path.resolve()
+            if path.exists() and path.is_file():
+                files.append(path)
+            else:
+                console.print(f"[yellow]경고: 파일을 찾을 수 없습니다: {match}[/yellow]")
+        except Exception as e:
+            console.print(f"[yellow]경고: 유효하지 않은 경로 {match}: {e}[/yellow]")
+
+    return text, files
+
+
+def parse_image_placeholders(text: str) -> tuple[str, int]:
+    """텍스트 내 이미지 자리 표시자 수를 셉니다.
+
+    Args:
+        text: [image] 또는 [image N] 자리 표시자가 포함될 수 있는 입력 텍스트
+
+    Returns:
+        이미지 자리 표시자 수가 포함된 (텍스트, 개수) 튜플
+    """
+    # Match [image] or [image N] patterns
+    pattern = r"\[image(?:\s+\d+)?\]"
+    matches = re.findall(pattern, text, re.IGNORECASE)
+    return text, len(matches)
+
+
+def get_bottom_toolbar(session_state: SessionState, session_ref: dict) -> Callable[[], list[tuple[str, str]]]:
+    """자동 승인 상태와 BASH 모드를 표시하는 툴바 함수를 반환합니다."""
+
+    def toolbar() -> list[tuple[str, str]]:
+        parts = []
+
+        # Check if we're in BASH mode (input starts with !)
+        try:
+            session = session_ref.get("session")
+            if session:
+                current_text = session.default_buffer.text
+                if current_text.startswith("!"):
+                    parts.append(("bg:#ff1493 fg:#ffffff bold", " BASH MODE "))
+                    parts.append(("", " | "))
+        except (AttributeError, TypeError):
+            # Silently ignore - toolbar is non-critical and called frequently
+            pass
+
+        # Base status message
+        if session_state.auto_approve:
+            base_msg = "자동 승인 켜짐 (CTRL+T로 전환)"
+            base_class = "class:toolbar-green"
+        else:
+            base_msg = "수동 승인 (CTRL+T로 전환)"
+            base_class = "class:toolbar-orange"
+
+        parts.append((base_class, base_msg))
+
+        # Show exit confirmation hint if active
+        hint_until = session_state.exit_hint_until
+        if hint_until is not None:
+            now = time.monotonic()
+            if now < hint_until:
+                parts.append(("", " | "))
+                parts.append(("class:toolbar-exit", " 종료하려면 Ctrl+C를 한번 더 누르세요 "))
+            else:
+                session_state.exit_hint_until = None
+
+        return parts
+
+    return toolbar
+
+
+def create_prompt_session(
+    _assistant_id: str, session_state: SessionState, image_tracker: ImageTracker | None = None
+) -> PromptSession:
+    """모든 기능이 구성된 PromptSession을 생성합니다."""
+    # Set default editor if not already set
+    if "EDITOR" not in os.environ:
+        os.environ["EDITOR"] = "nano"
+
+    # Create key bindings
+    kb = KeyBindings()
+
+    @kb.add("c-c")
+    def _(event) -> None:
+        """종료하려면 짧은 시간 내에 Ctrl+C를 두 번 눌러야 합니다."""
+        app = event.app
+        now = time.monotonic()
+
+        if session_state.exit_hint_until is not None and now < session_state.exit_hint_until:
+            handle = session_state.exit_hint_handle
+            if handle:
+                handle.cancel()
+                session_state.exit_hint_handle = None
+            session_state.exit_hint_until = None
+            app.invalidate()
+            app.exit(exception=KeyboardInterrupt())
+            return
+
+        session_state.exit_hint_until = now + EXIT_CONFIRM_WINDOW
+
+        handle = session_state.exit_hint_handle
+        if handle:
+            handle.cancel()
+
+        loop = asyncio.get_running_loop()
+        app_ref = app
+
+        def clear_hint() -> None:
+            if session_state.exit_hint_until is not None and time.monotonic() >= session_state.exit_hint_until:
+                session_state.exit_hint_until = None
+                session_state.exit_hint_handle = None
+                app_ref.invalidate()
+
+        session_state.exit_hint_handle = loop.call_later(EXIT_CONFIRM_WINDOW, clear_hint)
+
+        app.invalidate()
+
+    # Bind Ctrl+T to toggle auto-approve
+    @kb.add("c-t")
+    def _(event) -> None:
+        """자동 승인 모드를 토글합니다."""
+        session_state.toggle_auto_approve()
+        # Force UI refresh to update toolbar
+        event.app.invalidate()
+
+    # Custom paste handler to detect images
+    if image_tracker:
+        from prompt_toolkit.keys import Keys
+
+        def _handle_paste_with_image_check(event, pasted_text: str = "") -> None:
+            """클립보드에서 이미지를 확인하고, 그렇지 않으면 붙여넣은 텍스트를 삽입합니다."""
+            # Try to get an image from clipboard
+            clipboard_image = get_clipboard_image()
+
+            if clipboard_image:
+                # Found an image! Add it to tracker and insert placeholder
+                placeholder = image_tracker.add_image(clipboard_image)
+                # Insert placeholder (no confirmation message)
+                event.current_buffer.insert_text(placeholder)
+            elif pasted_text:
+                # No image, insert the pasted text
+                event.current_buffer.insert_text(pasted_text)
+            else:
+                # Fallback: try to get text from prompt_toolkit clipboard
+                clipboard_data = event.app.clipboard.get_data()
+                if clipboard_data and clipboard_data.text:
+                    event.current_buffer.insert_text(clipboard_data.text)
+
+        @kb.add(Keys.BracketedPaste)
+        def _(event) -> None:
+            """브래킷 붙여넣기(macOS의 Cmd+V)를 처리합니다 - 이미지를 먼저 확인합니다."""
+            # Bracketed paste provides the pasted text in event.data
+            pasted_text = event.data if hasattr(event, "data") else ""
+            _handle_paste_with_image_check(event, pasted_text)
+
+        @kb.add("c-v")
+        def _(event) -> None:
+            """Ctrl+V 붙여넣기를 처리합니다 - 이미지를 먼저 확인합니다."""
+            _handle_paste_with_image_check(event)
+
+    # Bind regular Enter to submit (intuitive behavior)
+    @kb.add("enter")
+    def _(event) -> None:
+        """완성 메뉴가 활성화되지 않은 경우 Enter는 입력을 제출합니다."""
+        buffer = event.current_buffer
+
+        # If completion menu is showing, apply the current completion
+        if buffer.complete_state:
+            # Get the current completion (the highlighted one)
+            current_completion = buffer.complete_state.current_completion
+
+            # If no completion is selected (user hasn't navigated), select and apply the first one
+            if not current_completion and buffer.complete_state.completions:
+                # Move to the first completion
+                buffer.complete_next()
+                # Now apply it
+                buffer.apply_completion(buffer.complete_state.current_completion)
+            elif current_completion:
+                # Apply the already-selected completion
+                buffer.apply_completion(current_completion)
+            else:
+                # No completions available, close menu
+                buffer.complete_state = None
+        # Don't submit if buffer is empty or only whitespace
+        elif buffer.text.strip():
+            # Normal submit
+            buffer.validate_and_handle()
+            # If empty, do nothing (don't submit)
+
+    # Alt+Enter for newlines (press ESC then Enter, or Option+Enter on Mac)
+    @kb.add("escape", "enter")
+    def _(event) -> None:
+        """Alt+Enter는 여러 줄 입력을 위해 줄바꿈을 삽입합니다."""
+        event.current_buffer.insert_text("\n")
+
+    # Ctrl+E to open in external editor
+    @kb.add("c-e")
+    def _(event) -> None:
+        """현재 입력을 외부 편집기(기본값 nano)에서 엽니다."""
+        event.current_buffer.open_in_editor()
+
+    # Backspace handler to retrigger completions and delete image tags as units
+    @kb.add("backspace")
+    def _(event) -> None:
+        """백스페이스 처리: 이미지 태그를 단일 단위로 삭제하고 완성을 다시 트리거합니다."""
+        buffer = event.current_buffer
+        text_before = buffer.document.text_before_cursor
+
+        # Check if cursor is right after an image tag like [image 1] or [image 12]
+        image_tag_pattern = r"\[image \d+\]$"
+        match = re.search(image_tag_pattern, text_before)
+
+        if match and image_tracker:
+            # Delete the entire tag
+            tag_length = len(match.group(0))
+            buffer.delete_before_cursor(count=tag_length)
+
+            # Remove the image from tracker and reset counter
+            tag_text = match.group(0)
+            image_num_match = re.search(r"\d+", tag_text)
+            if image_num_match:
+                image_num = int(image_num_match.group(0))
+                # Remove image at index (1-based to 0-based)
+                if 0 < image_num <= len(image_tracker.images):
+                    image_tracker.images.pop(image_num - 1)
+                    # Reset counter to next available number
+                    image_tracker.next_id = len(image_tracker.images) + 1
+        else:
+            # Normal backspace
+            buffer.delete_before_cursor(count=1)
+
+        # Check if we're in a completion context (@ or /)
+        text = buffer.document.text_before_cursor
+        if AT_MENTION_RE.search(text) or SLASH_COMMAND_RE.match(text):
+            # Retrigger completion
+            buffer.start_completion(select_first=False)
+
+    from prompt_toolkit.styles import Style
+
+    # Define styles for the toolbar with full-width background colors
+    toolbar_style = Style.from_dict({
+        "bottom-toolbar": "noreverse",  # Disable default reverse video
+        "toolbar-green": "bg:#10b981 #000000",  # Green for auto-accept ON
+        "toolbar-orange": "bg:#f59e0b #000000",  # Orange for manual accept
+        "toolbar-exit": "bg:#2563eb #ffffff",  # Blue for exit hint
+    })
+
+    # Create session reference dict for toolbar to access session
+    session_ref = {}
+
+    # Create the session
+    session = PromptSession(
+        message=HTML(f'<style fg="{COLORS["user"]}">></style> '),
+        multiline=True,  # Keep multiline support but Enter submits
+        key_bindings=kb,
+        completer=merge_completers([CommandCompleter(), FilePathCompleter()]),
+        editing_mode=EditingMode.EMACS,
+        complete_while_typing=True,  # Show completions as you type
+        complete_in_thread=True,  # Async completion prevents menu freezing
+        mouse_support=False,
+        enable_open_in_editor=True,  # Allow Ctrl+X Ctrl+E to open external editor
+        bottom_toolbar=get_bottom_toolbar(session_state, session_ref),  # Persistent status bar at bottom
+        style=toolbar_style,  # Apply toolbar styling
+        reserve_space_for_menu=7,  # Reserve space for completion menu to show 5-6 results
+    )
+
+    # Store session reference for toolbar to access
+    session_ref["session"] = session
+
+    return session
--- a/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/integrations/init.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/integrations/init.py
@@ -0,0 +1 @@
+"""DeepAgents CLI를 위한 샌드박스 연동."""
--- a/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/integrations/daytona.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/integrations/daytona.py
@@ -0,0 +1,115 @@
+"""Daytona 샌드박스 백엔드 구현."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from deepagents.backends.protocol import (
+    ExecuteResponse,
+    FileDownloadResponse,
+    FileUploadResponse,
+)
+from deepagents.backends.sandbox import BaseSandbox
+
+if TYPE_CHECKING:
+    from daytona import Sandbox
+
+
+class DaytonaBackend(BaseSandbox):
+    """SandboxBackendProtocol을 준수하는 Daytona 백엔드 구현.
+
+    이 구현은 BaseSandbox로부터 모든 파일 작업 메서드를 상속받으며,
+    Daytona의 API를 사용하여 execute() 메서드만 구현합니다.
+    """
+
+    def __init__(self, sandbox: Sandbox) -> None:
+        """Daytona 샌드박스 클라이언트로 DaytonaBackend를 초기화합니다.
+
+        Args:
+            sandbox: Daytona 샌드박스 인스턴스
+        """
+        self._sandbox = sandbox
+        self._timeout: int = 30 * 60  # 30분
+
+    @property
+    def id(self) -> str:
+        """샌드박스 백엔드의 고유 식별자."""
+        return self._sandbox.id
+
+    def execute(
+        self,
+        command: str,
+    ) -> ExecuteResponse:
+        """샌드박스에서 명령을 실행하고 ExecuteResponse를 반환합니다.
+
+        Args:
+            command: 실행할 전체 셸 명령 문자열.
+
+        Returns:
+            결합된 출력, 종료 코드, 선택적 시그널 및 잘림 플래그가 포함된 ExecuteResponse.
+        """
+        result = self._sandbox.process.exec(command, timeout=self._timeout)
+
+        return ExecuteResponse(
+            output=result.result,  # Daytona는 stdout/stderr를 결합함
+            exit_code=result.exit_code,
+            truncated=False,
+        )
+
+    def download_files(self, paths: list[str]) -> list[FileDownloadResponse]:
+        """Daytona 샌드박스에서 여러 파일을 다운로드합니다.
+
+        효율성을 위해 Daytona의 네이티브 일괄 다운로드 API를 활용합니다.
+        부분적인 성공을 지원하므로 개별 다운로드가 다른 다운로드에 영향을 주지 않고 실패할 수 있습니다.
+
+        Args:
+            paths: 다운로드할 파일 경로 목록.
+
+        Returns:
+            입력 경로당 하나씩 FileDownloadResponse 객체 목록.
+            응답 순서는 입력 순서와 일치합니다.
+
+        TODO: Daytona API 오류 문자열을 표준화된 FileOperationError 코드로 매핑해야 합니다.
+        현재는 정상적인 동작(happy path)만 구현되어 있습니다.
+        """
+        from daytona import FileDownloadRequest
+
+        # Daytona의 네이티브 일괄 API를 사용하여 일괄 다운로드 요청 생성
+        download_requests = [FileDownloadRequest(source=path) for path in paths]
+        daytona_responses = self._sandbox.fs.download_files(download_requests)
+
+        # Daytona 결과를 당사의 응답 형식으로 변환
+        # TODO: 사용 가능한 경우 resp.error를 표준화된 오류 코드로 매핑
+        return [
+            FileDownloadResponse(
+                path=resp.source,
+                content=resp.result,
+                error=None,  # TODO: resp.error를 FileOperationError로 매핑
+            )
+            for resp in daytona_responses
+        ]
+
+    def upload_files(self, files: list[tuple[str, bytes]]) -> list[FileUploadResponse]:
+        """Daytona 샌드박스에 여러 파일을 업로드합니다.
+
+        효율성을 위해 Daytona의 네이티브 일괄 업로드 API를 활용합니다.
+        부분적인 성공을 지원하므로 개별 업로드가 다른 업로드에 영향을 주지 않고 실패할 수 있습니다.
+
+        Args:
+            files: 업로드할 (경로, 내용) 튜플 목록.
+
+        Returns:
+            입력 파일당 하나씩 FileUploadResponse 객체 목록.
+            응답 순서는 입력 순서와 일치합니다.
+
+        TODO: Daytona API 오류 문자열을 표준화된 FileOperationError 코드로 매핑해야 합니다.
+        현재는 정상적인 동작(happy path)만 구현되어 있습니다.
+        """
+        from daytona import FileUpload
+
+        # Daytona의 네이티브 일괄 API를 사용하여 일괄 업로드 요청 생성
+        upload_requests = [FileUpload(source=content, destination=path) for path, content in files]
+        self._sandbox.fs.upload_files(upload_requests)
+
+        # TODO: Daytona가 오류 정보를 반환하는지 확인하고 FileOperationError 코드로 매핑
+        return [FileUploadResponse(path=path, error=None) for path, _ in files]
--- a/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/integrations/modal.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/integrations/modal.py
@@ -0,0 +1,124 @@
+"""Modal 샌드박스 백엔드 구현."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from deepagents.backends.protocol import (
+    ExecuteResponse,
+    FileDownloadResponse,
+    FileUploadResponse,
+)
+from deepagents.backends.sandbox import BaseSandbox
+
+if TYPE_CHECKING:
+    import modal
+
+
+class ModalBackend(BaseSandbox):
+    """SandboxBackendProtocol을 준수하는 Modal 백엔드 구현.
+
+    이 구현은 BaseSandbox로부터 모든 파일 작업 메서드를 상속받으며,
+    Modal의 API를 사용하여 execute() 메서드만 구현합니다.
+    """
+
+    def __init__(self, sandbox: modal.Sandbox) -> None:
+        """Modal 샌드박스 인스턴스로 ModalBackend를 초기화합니다.
+
+        Args:
+            sandbox: 활성 Modal 샌드박스 인스턴스
+        """
+        self._sandbox = sandbox
+        self._timeout = 30 * 60
+
+    @property
+    def id(self) -> str:
+        """샌드박스 백엔드의 고유 식별자."""
+        return self._sandbox.object_id
+
+    def execute(
+        self,
+        command: str,
+    ) -> ExecuteResponse:
+        """샌드박스에서 명령을 실행하고 ExecuteResponse를 반환합니다.
+
+        Args:
+            command: 실행할 전체 셸 명령 문자열.
+
+        Returns:
+            결합된 출력, 종료 코드 및 잘림 플래그가 포함된 ExecuteResponse.
+        """
+        # Modal의 exec API를 사용하여 명령 실행
+        process = self._sandbox.exec("bash", "-c", command, timeout=self._timeout)
+
+        # 프로세스가 완료될 때까지 대기
+        process.wait()
+
+        # stdout 및 stderr 읽기
+        stdout = process.stdout.read()
+        stderr = process.stderr.read()
+
+        # stdout과 stderr 결합 (Runloop의 방식과 일치)
+        output = stdout or ""
+        if stderr:
+            output += "\n" + stderr if output else stderr
+
+        return ExecuteResponse(
+            output=output,
+            exit_code=process.returncode,
+            truncated=False,  # Modal은 잘림 정보를 제공하지 않음
+        )
+
+    def download_files(self, paths: list[str]) -> list[FileDownloadResponse]:
+        """Modal 샌드박스에서 여러 파일을 다운로드합니다.
+
+        부분적인 성공을 지원하므로 개별 다운로드가 다른 다운로드에 영향을 주지 않고 실패할 수 있습니다.
+
+        Args:
+            paths: 다운로드할 파일 경로 목록.
+
+        Returns:
+            입력 경로당 하나씩 FileDownloadResponse 객체 목록.
+            응답 순서는 입력 순서와 일치합니다.
+
+        TODO: 표준화된 FileOperationError 코드를 사용하여 적절한 오류 처리를 구현해야 합니다.
+        Modal의 sandbox.open()이 실제로 어떤 예외를 발생시키는지 확인이 필요합니다.
+        현재는 정상적인 동작(happy path)만 구현되어 있습니다.
+        """
+        # 이 구현은 Modal 샌드박스 파일 API에 의존합니다.
+        # https://modal.com/doc/guide/sandbox-files
+        # 이 API는 현재 알파 단계이며 프로덕션 용도로는 권장되지 않습니다.
+        # CLI 애플리케이션을 대상으로 하므로 여기에서 사용하는 것은 괜찮습니다.
+        responses = []
+        for path in paths:
+            with self._sandbox.open(path, "rb") as f:
+                content = f.read()
+            responses.append(FileDownloadResponse(path=path, content=content, error=None))
+        return responses
+
+    def upload_files(self, files: list[tuple[str, bytes]]) -> list[FileUploadResponse]:
+        """Modal 샌드박스에 여러 파일을 업로드합니다.
+
+        부분적인 성공을 지원하므로 개별 업로드가 다른 업로드에 영향을 주지 않고 실패할 수 있습니다.
+
+        Args:
+            files: 업로드할 (경로, 내용) 튜플 목록.
+
+        Returns:
+            입력 파일당 하나씩 FileUploadResponse 객체 목록.
+            응답 순서는 입력 순서와 일치합니다.
+
+        TODO: 표준화된 FileOperationError 코드를 사용하여 적절한 오류 처리를 구현해야 합니다.
+        Modal의 sandbox.open()이 실제로 어떤 예외를 발생시키는지 확인이 필요합니다.
+        현재는 정상적인 동작(happy path)만 구현되어 있습니다.
+        """
+        # 이 구현은 Modal 샌드박스 파일 API에 의존합니다.
+        # https://modal.com/doc/guide/sandbox-files
+        # 이 API는 현재 알파 단계이며 프로덕션 용도로는 권장되지 않습니다.
+        # CLI 애플리케이션을 대상으로 하므로 여기에서 사용하는 것은 괜찮습니다.
+        responses = []
+        for path, content in files:
+            with self._sandbox.open(path, "wb") as f:
+                f.write(content)
+            responses.append(FileUploadResponse(path=path, error=None))
+        return responses
--- a/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/integrations/runloop.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/integrations/runloop.py
@@ -0,0 +1,121 @@
+"""Runloop을 위한 BackendProtocol 구현."""
+
+try:
+    import runloop_api_client
+except ImportError:
+    msg = (
+        "RunloopBackend를 위해서는 runloop_api_client 패키지가 필요합니다. "
+        "`pip install runloop_api_client`로 설치하십시오."
+    )
+    raise ImportError(msg)
+
+import os
+
+from deepagents.backends.protocol import ExecuteResponse, FileDownloadResponse, FileUploadResponse
+from deepagents.backends.sandbox import BaseSandbox
+from runloop_api_client import Runloop
+
+
+class RunloopBackend(BaseSandbox):
+    """Runloop devbox의 파일에서 작동하는 백엔드.
+
+    이 구현은 Runloop API 클라이언트를 사용하여 명령을 실행하고
+    원격 devbox 환경 내에서 파일을 조작합니다.
+    """
+
+    def __init__(
+        self,
+        devbox_id: str,
+        client: Runloop | None = None,
+        api_key: str | None = None,
+    ) -> None:
+        """Runloop 프로토콜을 초기화합니다.
+
+        Args:
+            devbox_id: 작업할 Runloop devbox의 ID.
+            client: 선택적인 기존 Runloop 클라이언트 인스턴스
+            api_key: 새 클라이언트를 생성하기 위한 선택적 API 키
+                         (기본값은 RUNLOOP_API_KEY 환경 변수)
+        """
+        if client and api_key:
+            msg = "client 또는 bearer_token 중 하나만 제공해야 하며, 둘 다 제공할 수는 없습니다."
+            raise ValueError(msg)
+
+        if client is None:
+            api_key = api_key or os.environ.get("RUNLOOP_API_KEY", None)
+            if api_key is None:
+                msg = "client 또는 bearer_token 중 하나는 제공되어야 합니다."
+                raise ValueError(msg)
+            client = Runloop(bearer_token=api_key)
+
+        self._client = client
+        self._devbox_id = devbox_id
+        self._timeout = 30 * 60
+
+    @property
+    def id(self) -> str:
+        """샌드박스 백엔드의 고유 식별자."""
+        return self._devbox_id
+
+    def execute(
+        self,
+        command: str,
+    ) -> ExecuteResponse:
+        """devbox에서 명령을 실행하고 ExecuteResponse를 반환합니다.
+
+        Args:
+            command: 실행할 전체 셸 명령 문자열.
+
+        Returns:
+            결합된 출력, 종료 코드, 선택적 시그널 및 잘림 플래그가 포함된 ExecuteResponse.
+        """
+        result = self._client.devboxes.execute_and_await_completion(
+            devbox_id=self._devbox_id,
+            command=command,
+            timeout=self._timeout,
+        )
+        # stdout과 stderr 결합
+        output = result.stdout or ""
+        if result.stderr:
+            output += "\n" + result.stderr if output else result.stderr
+
+        return ExecuteResponse(
+            output=output,
+            exit_code=result.exit_status,
+            truncated=False,  # Runloop는 잘림 정보를 제공하지 않음
+        )
+
+    def download_files(self, paths: list[str]) -> list[FileDownloadResponse]:
+        """Runloop devbox에서 여러 파일을 다운로드합니다.
+
+        Runloop API를 사용하여 파일을 개별적으로 다운로드합니다. 순서를 유지하고
+        예외를 발생시키는 대신 파일별 오류를 보고하는 FileDownloadResponse 객체 목록을 반환합니다.
+
+        TODO: 표준화된 FileOperationError 코드를 사용하여 적절한 오류 처리를 구현해야 합니다.
+        현재는 정상적인 동작(happy path)만 구현되어 있습니다.
+        """
+        responses: list[FileDownloadResponse] = []
+        for path in paths:
+            # devboxes.download_file은 .read()를 노출하는 BinaryAPIResponse를 반환함
+            resp = self._client.devboxes.download_file(self._devbox_id, path=path)
+            content = resp.read()
+            responses.append(FileDownloadResponse(path=path, content=content, error=None))
+
+        return responses
+
+    def upload_files(self, files: list[tuple[str, bytes]]) -> list[FileUploadResponse]:
+        """Runloop devbox에 여러 파일을 업로드합니다.
+
+        Runloop API를 사용하여 파일을 개별적으로 업로드합니다. 순서를 유지하고
+        예외를 발생시키는 대신 파일별 오류를 보고하는 FileUploadResponse 객체 목록을 반환합니다.
+
+        TODO: 표준화된 FileOperationError 코드를 사용하여 적절한 오류 처리를 구현해야 합니다.
+        현재는 정상적인 동작(happy path)만 구현되어 있습니다.
+        """
+        responses: list[FileUploadResponse] = []
+        for path, content in files:
+            # Runloop 클라이언트는 'file'을 바이트 또는 파일류 객체로 기대함
+            self._client.devboxes.upload_file(self._devbox_id, path=path, file=content)
+            responses.append(FileUploadResponse(path=path, error=None))
+
+        return responses
--- a/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/integrations/sandbox_factory.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/integrations/sandbox_factory.py
@@ -0,0 +1,345 @@
+"""컨텍스트 매니저를 통한 샌드박스 수명 주기 관리."""
+
+import os
+import shlex
+import string
+import time
+from collections.abc import Generator
+from contextlib import contextmanager
+from pathlib import Path
+
+from deepagents.backends.protocol import SandboxBackendProtocol
+
+from deepagents_cli.config import console
+
+
+def _run_sandbox_setup(backend: SandboxBackendProtocol, setup_script_path: str) -> None:
+    """환경 변수 확장을 포함하여 샌드박스에서 사용자 설정 스크립트를 실행합니다.
+
+    Args:
+        backend: 샌드박스 백엔드 인스턴스
+        setup_script_path: 설정 스크립트 파일 경로
+    """
+    script_path = Path(setup_script_path)
+    if not script_path.exists():
+        msg = f"설정 스크립트를 찾을 수 없습니다: {setup_script_path}"
+        raise FileNotFoundError(msg)
+
+    console.print(f"[dim]설정 스크립트 실행 중: {setup_script_path}...[/dim]")
+
+    # 스크립트 내용 읽기
+    script_content = script_path.read_text()
+
+    # 로컬 환경을 사용하여 ${VAR} 구문 확장
+    template = string.Template(script_content)
+    expanded_script = template.safe_substitute(os.environ)
+
+    # 5분 타임아웃으로 샌드박스에서 실행
+    result = backend.execute(f"bash -c {shlex.quote(expanded_script)}")
+
+    if result.exit_code != 0:
+        console.print(f"[red]❌ 설정 스크립트 실패 (종료 코드 {result.exit_code}):[/red]")
+        console.print(f"[dim]{result.output}[/dim]")
+        msg = "설정 실패 - 중단됨"
+        raise RuntimeError(msg)
+
+    console.print("[green]✓ 설정 완료[/green]")
+
+
+@contextmanager
+def create_modal_sandbox(
+    *, sandbox_id: str | None = None, setup_script_path: str | None = None
+) -> Generator[SandboxBackendProtocol, None, None]:
+    """Modal 샌드박스를 생성하거나 연결합니다.
+
+    Args:
+        sandbox_id: 재사용할 기존 샌드박스 ID (선택 사항)
+        setup_script_path: 샌드박스 시작 후 실행할 설정 스크립트 경로 (선택 사항)
+
+    Yields:
+        (ModalBackend, sandbox_id)
+
+    Raises:
+        ImportError: Modal SDK가 설치되지 않음
+        Exception: 샌드박스 생성/연결 실패
+        FileNotFoundError: 설정 스크립트를 찾을 수 없음
+        RuntimeError: 설정 스크립트 실패
+    """
+    import modal
+
+    from deepagents_cli.integrations.modal import ModalBackend
+
+    console.print("[yellow]Modal 샌드박스 시작 중...[/yellow]")
+
+    # 임시 앱 생성 (종료 시 자동 정리)
+    app = modal.App("deepagents-sandbox")
+
+    with app.run():
+        if sandbox_id:
+            sandbox = modal.Sandbox.from_id(sandbox_id=sandbox_id, app=app)
+            should_cleanup = False
+        else:
+            sandbox = modal.Sandbox.create(app=app, workdir="/workspace")
+            should_cleanup = True
+
+            # 실행될 때까지 폴링 (Modal에서 필요)
+            for _ in range(90):  # 180초 타임아웃 (90 * 2초)
+                if sandbox.poll() is not None:  # 샌드박스가 예기치 않게 종료됨
+                    msg = "시작 중 Modal 샌드박스가 예기치 않게 종료되었습니다"
+                    raise RuntimeError(msg)
+                # 간단한 명령을 시도하여 샌드박스가 준비되었는지 확인
+                try:
+                    process = sandbox.exec("echo", "ready", timeout=5)
+                    process.wait()
+                    if process.returncode == 0:
+                        break
+                except Exception:
+                    pass
+                time.sleep(2)
+            else:
+                # 타임아웃 - 정리 및 실패 처리
+                sandbox.terminate()
+                msg = "180초 이내에 Modal 샌드박스를 시작하지 못했습니다"
+                raise RuntimeError(msg)
+
+        backend = ModalBackend(sandbox)
+        console.print(f"[green]✓ Modal 샌드박스 준비 완료: {backend.id}[/green]")
+
+        # 설정 스크립트가 제공된 경우 실행
+        if setup_script_path:
+            _run_sandbox_setup(backend, setup_script_path)
+        try:
+            yield backend
+        finally:
+            if should_cleanup:
+                try:
+                    console.print(f"[dim]Modal 샌드박스 {sandbox_id} 종료 중...[/dim]")
+                    sandbox.terminate()
+                    console.print(f"[dim]✓ Modal 샌드박스 {sandbox_id} 종료됨[/dim]")
+                except Exception as e:
+                    console.print(f"[yellow]⚠ 정리 실패: {e}[/yellow]")
+
+
+@contextmanager
+def create_runloop_sandbox(
+    *, sandbox_id: str | None = None, setup_script_path: str | None = None
+) -> Generator[SandboxBackendProtocol, None, None]:
+    """Runloop devbox를 생성하거나 연결합니다.
+
+    Args:
+        sandbox_id: 재사용할 기존 devbox ID (선택 사항)
+        setup_script_path: 샌드박스 시작 후 실행할 설정 스크립트 경로 (선택 사항)
+
+    Yields:
+        (RunloopBackend, devbox_id)
+
+    Raises:
+        ImportError: Runloop SDK가 설치되지 않음
+        ValueError: RUNLOOP_API_KEY가 설정되지 않음
+        RuntimeError: 타임아웃 내에 devbox를 시작하지 못함
+        FileNotFoundError: 설정 스크립트를 찾을 수 없음
+        RuntimeError: 설정 스크립트 실패
+    """
+    from runloop_api_client import Runloop
+
+    from deepagents_cli.integrations.runloop import RunloopBackend
+
+    bearer_token = os.environ.get("RUNLOOP_API_KEY")
+    if not bearer_token:
+        msg = "RUNLOOP_API_KEY 환경 변수가 설정되지 않았습니다"
+        raise ValueError(msg)
+
+    client = Runloop(bearer_token=bearer_token)
+
+    console.print("[yellow]Runloop devbox 시작 중...[/yellow]")
+
+    if sandbox_id:
+        devbox = client.devboxes.retrieve(id=sandbox_id)
+        should_cleanup = False
+    else:
+        devbox = client.devboxes.create()
+        sandbox_id = devbox.id
+        should_cleanup = True
+
+        # 실행될 때까지 폴링 (Runloop에서 필요)
+        for _ in range(90):  # 180초 타임아웃 (90 * 2초)
+            status = client.devboxes.retrieve(id=devbox.id)
+            if status.status == "running":
+                break
+            time.sleep(2)
+        else:
+            # 타임아웃 - 정리 및 실패 처리
+            client.devboxes.shutdown(id=devbox.id)
+            msg = "180초 이내에 devbox를 시작하지 못했습니다"
+            raise RuntimeError(msg)
+
+    console.print(f"[green]✓ Runloop devbox 준비 완료: {sandbox_id}[/green]")
+
+    backend = RunloopBackend(devbox_id=devbox.id, client=client)
+
+    # 설정 스크립트가 제공된 경우 실행
+    if setup_script_path:
+        _run_sandbox_setup(backend, setup_script_path)
+    try:
+        yield backend
+    finally:
+        if should_cleanup:
+            try:
+                console.print(f"[dim]Runloop devbox {sandbox_id} 종료 중...[/dim]")
+                client.devboxes.shutdown(id=devbox.id)
+                console.print(f"[dim]✓ Runloop devbox {sandbox_id} 종료됨[/dim]")
+            except Exception as e:
+                console.print(f"[yellow]⚠ 정리 실패: {e}[/yellow]")
+
+
+@contextmanager
+def create_daytona_sandbox(
+    *, sandbox_id: str | None = None, setup_script_path: str | None = None
+) -> Generator[SandboxBackendProtocol, None, None]:
+    """Daytona 샌드박스를 생성합니다.
+
+    Args:
+        sandbox_id: 재사용할 기존 샌드박스 ID (선택 사항)
+        setup_script_path: 샌드박스 시작 후 실행할 설정 스크립트 경로 (선택 사항)
+
+    Yields:
+        (DaytonaBackend, sandbox_id)
+
+    Note:
+        ID로 기존 Daytona 샌드박스에 연결하는 기능은 아직 지원되지 않을 수 있습니다.
+        sandbox_id가 제공되면 NotImplementedError가 발생합니다.
+    """
+    from daytona import Daytona, DaytonaConfig
+
+    from deepagents_cli.integrations.daytona import DaytonaBackend
+
+    api_key = os.environ.get("DAYTONA_API_KEY")
+    if not api_key:
+        msg = "DAYTONA_API_KEY 환경 변수가 설정되지 않았습니다"
+        raise ValueError(msg)
+
+    if sandbox_id:
+        msg = (
+            "ID로 기존 Daytona 샌드박스에 연결하는 기능은 아직 지원되지 않습니다. "
+            "--sandbox-id를 생략하여 새 샌드박스를 생성하십시오."
+        )
+        raise NotImplementedError(msg)
+
+    console.print("[yellow]Daytona 샌드박스 시작 중...[/yellow]")
+
+    daytona = Daytona(DaytonaConfig(api_key=api_key))
+    sandbox = daytona.create()
+    sandbox_id = sandbox.id
+
+    # 실행될 때까지 폴링 (Daytona에서 필요)
+    for _ in range(90):  # 180초 타임아웃 (90 * 2초)
+        # 간단한 명령을 시도하여 샌드박스가 준비되었는지 확인
+        try:
+            result = sandbox.process.exec("echo ready", timeout=5)
+            if result.exit_code == 0:
+                break
+        except Exception:
+            pass
+        time.sleep(2)
+    else:
+        try:
+            # 가능한 경우 정리
+            sandbox.delete()
+        finally:
+            msg = "180초 이내에 Daytona 샌드박스를 시작하지 못했습니다"
+            raise RuntimeError(msg)
+
+    backend = DaytonaBackend(sandbox)
+    console.print(f"[green]✓ Daytona 샌드박스 준비 완료: {backend.id}[/green]")
+
+    # 설정 스크립트가 제공된 경우 실행
+    if setup_script_path:
+        _run_sandbox_setup(backend, setup_script_path)
+    try:
+        yield backend
+    finally:
+        console.print(f"[dim]Daytona 샌드박스 {sandbox_id} 삭제 중...[/dim]")
+        try:
+            sandbox.delete()
+            console.print(f"[dim]✓ Daytona 샌드박스 {sandbox_id} 종료됨[/dim]")
+        except Exception as e:
+            console.print(f"[yellow]⚠ 정리 실패: {e}[/yellow]")
+
+
+# 공급자별 작업 디렉토리 매핑
+_PROVIDER_TO_WORKING_DIR = {
+    "modal": "/workspace",
+    "runloop": "/home/user",
+    "daytona": "/home/daytona",
+}
+
+
+# 샌드박스 유형과 해당 컨텍스트 매니저 팩토리 매핑
+_SANDBOX_PROVIDERS = {
+    "modal": create_modal_sandbox,
+    "runloop": create_runloop_sandbox,
+    "daytona": create_daytona_sandbox,
+}
+
+
+@contextmanager
+def create_sandbox(
+    provider: str,
+    *,
+    sandbox_id: str | None = None,
+    setup_script_path: str | None = None,
+) -> Generator[SandboxBackendProtocol, None, None]:
+    """지정된 공급자의 샌드박스를 생성하거나 연결합니다.
+
+    이것은 적절한 공급자별 컨텍스트 매니저에 위임하는 샌드박스 생성을 위한 통합 인터페이스입니다.
+
+    Args:
+        provider: 샌드박스 공급자 ("modal", "runloop", "daytona")
+        sandbox_id: 재사용할 기존 샌드박스 ID (선택 사항)
+        setup_script_path: 샌드박스 시작 후 실행할 설정 스크립트 경로 (선택 사항)
+
+    Yields:
+        (SandboxBackend, sandbox_id)
+    """
+    if provider not in _SANDBOX_PROVIDERS:
+        msg = f"알 수 없는 샌드박스 공급자: {provider}. 사용 가능한 공급자: {', '.join(get_available_sandbox_types())}"
+        raise ValueError(msg)
+
+    sandbox_provider = _SANDBOX_PROVIDERS[provider]
+
+    with sandbox_provider(sandbox_id=sandbox_id, setup_script_path=setup_script_path) as backend:
+        yield backend
+
+
+def get_available_sandbox_types() -> list[str]:
+    """사용 가능한 샌드박스 공급자 유형 목록을 가져옵니다.
+
+    Returns:
+        샌드박스 유형 이름 목록 (예: ["modal", "runloop", "daytona"])
+    """
+    return list(_SANDBOX_PROVIDERS.keys())
+
+
+def get_default_working_dir(provider: str) -> str:
+    """주어진 샌드박스 공급자의 기본 작업 디렉토리를 가져옵니다.
+
+    Args:
+        provider: 샌드박스 공급자 이름 ("modal", "runloop", "daytona")
+
+    Returns:
+        기본 작업 디렉토리 경로 (문자열)
+
+    Raises:
+        ValueError: 공급자를 알 수 없는 경우
+    """
+    if provider in _PROVIDER_TO_WORKING_DIR:
+        return _PROVIDER_TO_WORKING_DIR[provider]
+    msg = f"알 수 없는 샌드박스 공급자: {provider}"
+    raise ValueError(msg)
+
+
+__all__ = [
+    "create_sandbox",
+    "get_available_sandbox_types",
+    "get_default_working_dir",
+]
--- a/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/main.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/main.py
@@ -0,0 +1,468 @@
+"""DeepAgents를 위한 메인 진입점 및 CLI 루프."""
+
+import argparse
+import asyncio
+import os
+import sys
+from pathlib import Path
+
+from deepagents.backends.protocol import SandboxBackendProtocol
+
+# Now safe to import agent (which imports LangChain modules)
+from deepagents_cli.agent import create_cli_agent, list_agents, reset_agent
+from deepagents_cli.commands import execute_bash_command, handle_command
+
+# CRITICAL: Import config FIRST to set LANGSMITH_PROJECT before LangChain loads
+from deepagents_cli.config import (
+    COLORS,
+    DEEP_AGENTS_ASCII,
+    SessionState,
+    console,
+    create_model,
+    settings,
+)
+from deepagents_cli.execution import execute_task
+from deepagents_cli.input import ImageTracker, create_prompt_session
+from deepagents_cli.integrations.sandbox_factory import (
+    create_sandbox,
+    get_default_working_dir,
+)
+from deepagents_cli.skills import execute_skills_command, setup_skills_parser
+from deepagents_cli.tools import fetch_url, http_request, web_search
+from deepagents_cli.ui import TokenTracker, show_help
+
+
+def check_cli_dependencies() -> None:
+    """CLI 선택적 종속성이 설치되어 있는지 확인합니다."""
+    missing = []
+
+    try:
+        import rich
+    except ImportError:
+        missing.append("rich")
+
+    try:
+        import requests
+    except ImportError:
+        missing.append("requests")
+
+    try:
+        import dotenv
+    except ImportError:
+        missing.append("python-dotenv")
+
+    try:
+        import tavily
+    except ImportError:
+        missing.append("tavily-python")
+
+    try:
+        import prompt_toolkit
+    except ImportError:
+        missing.append("prompt-toolkit")
+
+    if missing:
+        print("\n❌ 필수 CLI 종속성이 누락되었습니다!")
+        print("\nDeepAgents CLI를 사용하려면 다음 패키지가 필요합니다:")
+        for pkg in missing:
+            print(f"  - {pkg}")
+        print("\n다음 명령으로 설치하십시오:")
+        print("  pip install deepagents[cli]")
+        print("\n또는 모든 종속성을 설치하십시오:")
+        print("  pip install 'deepagents[cli]'")
+        sys.exit(1)
+
+
+def parse_args():
+    """명령줄 인수를 파싱합니다."""
+    parser = argparse.ArgumentParser(
+        description="DeepAgents - AI 코딩 도우미",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        add_help=False,
+    )
+
+    subparsers = parser.add_subparsers(dest="command", help="실행할 명령")
+
+    # List command
+    subparsers.add_parser("list", help="사용 가능한 모든 에이전트 나열")
+
+    # Help command
+    subparsers.add_parser("help", help="도움말 정보 표시")
+
+    # Reset command
+    reset_parser = subparsers.add_parser("reset", help="에이전트 초기화")
+    reset_parser.add_argument("--agent", required=True, help="초기화할 에이전트 이름")
+    reset_parser.add_argument("--target", dest="source_agent", help="다른 에이전트에서 프롬프트 복사")
+
+    # Skills command - setup delegated to skills module
+    setup_skills_parser(subparsers)
+
+    # Default interactive mode
+    parser.add_argument(
+        "--agent",
+        default="agent",
+        help="별도의 메모리 저장소를 위한 에이전트 식별자 (기본값: agent).",
+    )
+    parser.add_argument(
+        "--model",
+        help="사용할 모델 (예: claude-sonnet-4-5-20250929, gpt-5-mini, gemini-3-pro-preview). 모델 이름에서 공급자가 자동 감지됩니다.",
+    )
+    parser.add_argument(
+        "--auto-approve",
+        action="store_true",
+        help="프롬프트 없이 도구 사용 자동 승인 (human-in-the-loop 비활성화)",
+    )
+    parser.add_argument(
+        "--sandbox",
+        choices=["none", "modal", "daytona", "runloop"],
+        default="none",
+        help="코드 실행을 위한 원격 샌드박스 (기본값: none - 로컬 전용)",
+    )
+    parser.add_argument(
+        "--sandbox-id",
+        help="재사용할 기존 샌드박스 ID (생성 및 정리 건너뜀)",
+    )
+    parser.add_argument(
+        "--sandbox-setup",
+        help="생성 후 샌드박스에서 실행할 설정 스크립트 경로",
+    )
+    parser.add_argument(
+        "--no-splash",
+        action="store_true",
+        help="시작 스플래시 화면 비활성화",
+    )
+
+    return parser.parse_args()
+
+
+async def simple_cli(
+    agent,
+    assistant_id: str | None,
+    session_state,
+    baseline_tokens: int = 0,
+    backend=None,
+    sandbox_type: str | None = None,
+    setup_script_path: str | None = None,
+    no_splash: bool = False,
+) -> None:
+    """메인 CLI 루프.
+
+    Args:
+        backend: 파일 작업을 위한 백엔드 (CompositeBackend)
+        sandbox_type: 사용 중인 샌드박스 유형 (예: "modal", "runloop", "daytona").
+                     None인 경우 로컬 모드에서 실행.
+        sandbox_id: 활성 샌드박스의 ID
+        setup_script_path: 실행된 설정 스크립트 경로 (있는 경우)
+        no_splash: True인 경우 시작 스플래시 화면 표시 건너뜀
+    """
+    console.clear()
+    if not no_splash:
+        console.print(DEEP_AGENTS_ASCII, style=f"bold {COLORS['primary']}")
+        console.print()
+
+    # Extract sandbox ID from backend if using sandbox mode
+    sandbox_id: str | None = None
+    if backend:
+        from deepagents.backends.composite import CompositeBackend
+
+        # Check if it's a CompositeBackend with a sandbox default backend
+        if isinstance(backend, CompositeBackend):
+            if isinstance(backend.default, SandboxBackendProtocol):
+                sandbox_id = backend.default.id
+        elif isinstance(backend, SandboxBackendProtocol):
+            sandbox_id = backend.id
+
+    # Display sandbox info persistently (survives console.clear())
+    if sandbox_type and sandbox_id:
+        console.print(f"[yellow]⚡ {sandbox_type.capitalize()} 샌드박스: {sandbox_id}[/yellow]")
+        if setup_script_path:
+            console.print(f"[green]✓ 설정 스크립트 ({setup_script_path}) 완료됨[/green]")
+        console.print()
+
+    # Display model info
+    if settings.model_name and settings.model_provider:
+        provider_display = {
+            "openai": "OpenAI",
+            "anthropic": "Anthropic",
+            "google": "Google",
+        }.get(settings.model_provider, settings.model_provider)
+        console.print(
+            f"[green]✓ Model:[/green] {provider_display} → '{settings.model_name}'",
+            style=COLORS["dim"],
+        )
+        console.print()
+
+    if not settings.has_tavily:
+        console.print(
+            "[yellow]⚠ 웹 검색 비활성화됨:[/yellow] TAVILY_API_KEY를 찾을 수 없습니다.",
+            style=COLORS["dim"],
+        )
+        console.print("  웹 검색을 활성화하려면 Tavily API 키를 설정하세요:", style=COLORS["dim"])
+        console.print("    export TAVILY_API_KEY=your_api_key_here", style=COLORS["dim"])
+        console.print(
+            "  또는 .env 파일에 추가하세요. 키 발급: https://tavily.com",
+            style=COLORS["dim"],
+        )
+        console.print()
+
+    if settings.has_deepagents_langchain_project:
+        console.print(
+            f"[green]✓ LangSmith 추적 활성화됨:[/green] Deepagents → '{settings.deepagents_langchain_project}'",
+            style=COLORS["dim"],
+        )
+        if settings.user_langchain_project:
+            console.print(f"  [dim]사용자 코드 (shell) → '{settings.user_langchain_project}'[/dim]")
+        console.print()
+
+    console.print("... 코딩 준비 완료! 무엇을 만들고 싶으신가요?", style=COLORS["agent"])
+
+    if sandbox_type:
+        working_dir = get_default_working_dir(sandbox_type)
+        console.print(f"  [dim]로컬 CLI 디렉터리: {Path.cwd()}[/dim]")
+        console.print(f"  [dim]코드 실행: 원격 샌드박스 ({working_dir})[/dim]")
+    else:
+        console.print(f"  [dim]작업 디렉터리: {Path.cwd()}[/dim]")
+
+    console.print()
+
+    if session_state.auto_approve:
+        console.print("  [yellow]⚡ 자동 승인: 켜짐[/yellow] [dim](확인 없이 도구 실행)[/dim]")
+        console.print()
+
+    # Localize modifier names and show key symbols (macOS vs others)
+    if sys.platform == "darwin":
+        tips = (
+            "  팁: ⏎ Enter로 제출, ⌥ Option + ⏎ Enter로 줄바꿈 (또는 Esc+Enter), "
+            "⌃E로 편집기 열기, ⌃T로 자동 승인 전환, ⌃C로 중단"
+        )
+    else:
+        tips = (
+            "  팁: Enter로 제출, Alt+Enter (또는 Esc+Enter)로 줄바꿈, "
+            "Ctrl+E로 편집기 열기, Ctrl+T로 자동 승인 전환, Ctrl+C로 중단"
+        )
+    console.print(tips, style=f"dim {COLORS['dim']}")
+
+    console.print()
+
+    # Create prompt session, image tracker, and token tracker
+    image_tracker = ImageTracker()
+    session = create_prompt_session(assistant_id, session_state, image_tracker=image_tracker)
+    token_tracker = TokenTracker()
+    token_tracker.set_baseline(baseline_tokens)
+
+    while True:
+        try:
+            user_input = await session.prompt_async()
+            if session_state.exit_hint_handle:
+                session_state.exit_hint_handle.cancel()
+                session_state.exit_hint_handle = None
+            session_state.exit_hint_until = None
+            user_input = user_input.strip()
+        except EOFError:
+            break
+        except KeyboardInterrupt:
+            console.print("\n안녕히 가세요!", style=COLORS["primary"])
+            break
+
+        if not user_input:
+            continue
+
+        # Check for slash commands first
+        if user_input.startswith("/"):
+            result = handle_command(user_input, agent, token_tracker)
+            if result == "exit":
+                console.print("\n안녕히 가세요!", style=COLORS["primary"])
+                break
+            if result:
+                # Command was handled, continue to next input
+                continue
+
+        # Check for bash commands (!)
+        if user_input.startswith("!"):
+            execute_bash_command(user_input)
+            continue
+
+        # Handle regular quit keywords
+        if user_input.lower() in ["quit", "exit", "q"]:
+            console.print("\n안녕히 가세요!", style=COLORS["primary"])
+            break
+
+        await execute_task(
+            user_input,
+            agent,
+            assistant_id,
+            session_state,
+            token_tracker,
+            backend=backend,
+            image_tracker=image_tracker,
+        )
+
+
+async def _run_agent_session(
+    model,
+    assistant_id: str,
+    session_state,
+    sandbox_backend=None,
+    sandbox_type: str | None = None,
+    setup_script_path: str | None = None,
+) -> None:
+    """에이전트를 생성하고 CLI 세션을 실행하는 도우미.
+
+    샌드박스 모드와 로컬 모드 간의 중복을 피하기 위해 추출되었습니다.
+
+    Args:
+        model: 사용할 LLM 모델
+        assistant_id: 메모리 저장을 위한 에이전트 식별자
+        session_state: 자동 승인 설정이 포함된 세션 상태
+        sandbox_backend: 원격 실행을 위한 선택적 샌드박스 백엔드
+        sandbox_type: 사용 중인 샌드박스 유형
+        setup_script_path: 실행된 설정 스크립트 경로 (있는 경우)
+    """
+    # Create agent with conditional tools
+    tools = [http_request, fetch_url]
+    if settings.has_tavily:
+        tools.append(web_search)
+
+    agent, composite_backend = create_cli_agent(
+        model=model,
+        assistant_id=assistant_id,
+        tools=tools,
+        sandbox=sandbox_backend,
+        sandbox_type=sandbox_type,
+        auto_approve=session_state.auto_approve,
+    )
+
+    # Calculate baseline token count for accurate token tracking
+    from .agent import get_system_prompt
+    from .token_utils import calculate_baseline_tokens
+
+    agent_dir = settings.get_agent_dir(assistant_id)
+    system_prompt = get_system_prompt(assistant_id=assistant_id, sandbox_type=sandbox_type)
+    baseline_tokens = calculate_baseline_tokens(model, agent_dir, system_prompt, assistant_id)
+
+    await simple_cli(
+        agent,
+        assistant_id,
+        session_state,
+        baseline_tokens,
+        backend=composite_backend,
+        sandbox_type=sandbox_type,
+        setup_script_path=setup_script_path,
+        no_splash=session_state.no_splash,
+    )
+
+
+async def main(
+    assistant_id: str,
+    session_state,
+    sandbox_type: str = "none",
+    sandbox_id: str | None = None,
+    setup_script_path: str | None = None,
+    model_name: str | None = None,
+) -> None:
+    """조건부 샌드박스 지원이 포함된 메인 진입점.
+
+    Args:
+        assistant_id: 메모리 저장을 위한 에이전트 식별자
+        session_state: 자동 승인 설정이 포함된 세션 상태
+        sandbox_type: 샌드박스 유형 ("none", "modal", "runloop", "daytona")
+        sandbox_id: 재사용할 선택적 기존 샌드박스 ID
+        setup_script_path: 샌드박스에서 실행할 선택적 설정 스크립트 경로
+        model_name: 환경 변수 대신 사용할 선택적 모델 이름
+    """
+    model = create_model(model_name)
+
+    # Branch 1: User wants a sandbox
+    if sandbox_type != "none":
+        # Try to create sandbox
+        try:
+            console.print()
+            with create_sandbox(
+                sandbox_type, sandbox_id=sandbox_id, setup_script_path=setup_script_path
+            ) as sandbox_backend:
+                console.print(f"[yellow]⚡ 원격 실행 활성화됨 ({sandbox_type})[/yellow]")
+                console.print()
+
+                await _run_agent_session(
+                    model,
+                    assistant_id,
+                    session_state,
+                    sandbox_backend,
+                    sandbox_type=sandbox_type,
+                    setup_script_path=setup_script_path,
+                )
+        except (ImportError, ValueError, RuntimeError, NotImplementedError) as e:
+            # Sandbox creation failed - fail hard (no silent fallback)
+            console.print()
+            console.print("[red]❌ 샌드박스 생성 실패[/red]")
+            console.print(f"[dim]{e}[/dim]")
+            sys.exit(1)
+        except KeyboardInterrupt:
+            console.print("\n\n[yellow]중단됨[/yellow]")
+            sys.exit(0)
+        except Exception as e:
+            console.print(f"\n[bold red]❌ 오류:[/bold red] {e}\n")
+            console.print_exception()
+            sys.exit(1)
+
+    # Branch 2: User wants local mode (none or default)
+    else:
+        try:
+            await _run_agent_session(model, assistant_id, session_state, sandbox_backend=None)
+        except KeyboardInterrupt:
+            console.print("\n\n[yellow]중단됨[/yellow]")
+            sys.exit(0)
+        except Exception as e:
+            console.print(f"\n[bold red]❌ 오류:[/bold red] {e}\n")
+            console.print_exception()
+            sys.exit(1)
+
+
+def cli_main() -> None:
+    """콘솔 스크립트 진입점."""
+    # Fix for gRPC fork issue on macOS
+    # https://github.com/grpc/grpc/issues/37642
+    if sys.platform == "darwin":
+        os.environ["GRPC_ENABLE_FORK_SUPPORT"] = "0"
+
+    # Note: LANGSMITH_PROJECT is already overridden in config.py (before LangChain imports)
+    # This ensures agent traces → DEEPAGENTS_LANGSMITH_PROJECT
+    # Shell commands → user's original LANGSMITH_PROJECT (via ShellMiddleware env)
+
+    # Check dependencies first
+    check_cli_dependencies()
+
+    try:
+        args = parse_args()
+
+        if args.command == "help":
+            show_help()
+        elif args.command == "list":
+            list_agents()
+        elif args.command == "reset":
+            reset_agent(args.agent, args.source_agent)
+        elif args.command == "skills":
+            execute_skills_command(args)
+        else:
+            # Create session state from args
+            session_state = SessionState(auto_approve=args.auto_approve, no_splash=args.no_splash)
+
+            # API key validation happens in create_model()
+            asyncio.run(
+                main(
+                    args.agent,
+                    session_state,
+                    args.sandbox,
+                    args.sandbox_id,
+                    args.sandbox_setup,
+                    getattr(args, "model", None),
+                )
+            )
+    except KeyboardInterrupt:
+        # Clean exit on Ctrl+C - suppress ugly traceback
+        console.print("\n\n[yellow]중단됨[/yellow]")
+        sys.exit(0)
+
+
+if __name__ == "__main__":
+    cli_main()
--- a/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/project_utils.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/project_utils.py
@@ -0,0 +1,56 @@
+"""Utilities for project root detection and project-specific configuration."""
+
+from pathlib import Path
+
+
+def find_project_root(start_path: Path | None = None) -> Path | None:
+    """Find the project root by looking for .git directory.
+
+    Walks up the directory tree from start_path (or cwd) looking for a .git
+    directory, which indicates the project root.
+
+    Args:
+        start_path: Directory to start searching from. Defaults to current working directory.
+
+    Returns:
+        Path to the project root if found, None otherwise.
+    """
+    current = Path(start_path or Path.cwd()).resolve()
+
+    # Walk up the directory tree
+    for parent in [current, *list(current.parents)]:
+        git_dir = parent / ".git"
+        if git_dir.exists():
+            return parent
+
+    return None
+
+
+def find_project_agent_md(project_root: Path) -> list[Path]:
+    """Find project-specific agent.md file(s).
+
+    Checks two locations and returns ALL that exist:
+    1. project_root/.deepagents/agent.md
+    2. project_root/agent.md
+
+    Both files will be loaded and combined if both exist.
+
+    Args:
+        project_root: Path to the project root directory.
+
+    Returns:
+        List of paths to project agent.md files (may contain 0, 1, or 2 paths).
+    """
+    paths = []
+
+    # Check .deepagents/agent.md (preferred)
+    deepagents_md = project_root / ".deepagents" / "agent.md"
+    if deepagents_md.exists():
+        paths.append(deepagents_md)
+
+    # Check root agent.md (fallback, but also include if both exist)
+    root_md = project_root / "agent.md"
+    if root_md.exists():
+        paths.append(root_md)
+
+    return paths
--- a/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/py.typed
+++ b/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/py.typed
--- a/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/shell.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/shell.py
@@ -0,0 +1,138 @@
+"""에이전트에 기본 셸 도구를 노출하는 단순화된 미들웨어."""
+
+from __future__ import annotations
+
+import os
+import subprocess
+from typing import Any
+
+from langchain.agents.middleware.types import AgentMiddleware, AgentState
+from langchain.tools import ToolRuntime, tool
+from langchain_core.messages import ToolMessage
+from langchain_core.tools.base import ToolException
+
+
+class ShellMiddleware(AgentMiddleware[AgentState, Any]):
+    """shell을 통해 에이전트에게 기본 셸 액세스 권한을 부여합니다.
+
+    이 셸은 로컬 머신에서 실행되며 CLI 자체에서 제공하는 human-in-the-loop 안전장치 외에는
+    어떠한 안전장치도 없습니다.
+    """
+
+    def __init__(
+        self,
+        *,
+        workspace_root: str,
+        timeout: float = 120.0,
+        max_output_bytes: int = 100_000,
+        env: dict[str, str] | None = None,
+    ) -> None:
+        """`ShellMiddleware`의 인스턴스를 초기화합니다.
+
+        Args:
+            workspace_root: 셸 명령을 위한 작업 디렉터리.
+            timeout: 명령 완료를 기다리는 최대 시간(초).
+                기본값은 120초입니다.
+            max_output_bytes: 명령 출력에서 캡처할 최대 바이트 수.
+                기본값은 100,000바이트입니다.
+            env: 하위 프로세스에 전달할 환경 변수. None이면
+                현재 프로세스의 환경을 사용합니다. 기본값은 None입니다.
+        """
+        super().__init__()
+        self._timeout = timeout
+        self._max_output_bytes = max_output_bytes
+        self._tool_name = "shell"
+        self._env = env if env is not None else os.environ.copy()
+        self._workspace_root = workspace_root
+
+        # Build description with workspace info
+        description = (
+            f"Execute shell commands directly on the host. Commands run in this working directory: "
+            f"{workspace_root}. Each command runs in a fresh shell environment with the "
+            f"current process's environment variables. Commands may be truncated if they exceed "
+            f"configured timeout or output limits."
+        )
+
+        @tool(self._tool_name, description=description)
+        def shell_tool(
+            command: str,
+            runtime: ToolRuntime[None, AgentState],
+        ) -> ToolMessage | str:
+            """Execute a shell command.
+
+            Args:
+                command: The shell command to execute.
+                runtime: The tool runtime context.
+            """
+            return self._run_shell_command(command, tool_call_id=runtime.tool_call_id)
+
+        self._shell_tool = shell_tool
+        self.tools = [self._shell_tool]
+
+    def _run_shell_command(
+        self,
+        command: str,
+        *,
+        tool_call_id: str | None,
+    ) -> ToolMessage | str:
+        """셸 명령을 실행하고 결과를 반환합니다.
+
+        Args:
+            command: 실행할 셸 명령.
+            tool_call_id: ToolMessage 생성을 위한 도구 호출 ID.
+
+        Returns:
+            명령 출력 또는 오류 메시지가 포함된 ToolMessage.
+        """
+        if not command or not isinstance(command, str):
+            msg = "Shell 도구는 비어 있지 않은 명령 문자열을 필요로 합니다."
+            raise ToolException(msg)
+
+        try:
+            result = subprocess.run(
+                command,
+                check=False,
+                shell=True,
+                capture_output=True,
+                text=True,
+                timeout=self._timeout,
+                env=self._env,
+                cwd=self._workspace_root,
+            )
+
+            # Combine stdout and stderr
+            output_parts = []
+            if result.stdout:
+                output_parts.append(result.stdout)
+            if result.stderr:
+                stderr_lines = result.stderr.strip().split("\n")
+                for line in stderr_lines:
+                    output_parts.append(f"[stderr] {line}")
+
+            output = "\n".join(output_parts) if output_parts else "<no output>"
+
+            # 필요한 경우 출력 자르기
+            if len(output) > self._max_output_bytes:
+                output = output[: self._max_output_bytes]
+                output += f"\n\n... 출력이 {self._max_output_bytes}바이트에서 잘렸습니다."
+
+            # 0이 아닌 경우 종료 코드 정보 추가
+            if result.returncode != 0:
+                output = f"{output.rstrip()}\n\n종료 코드: {result.returncode}"
+                status = "error"
+            else:
+                status = "success"
+
+        except subprocess.TimeoutExpired:
+            output = f"오류: 명령이 {self._timeout:.1f}초 후에 시간 초과되었습니다."
+            status = "error"
+
+        return ToolMessage(
+            content=output,
+            tool_call_id=tool_call_id,
+            name=self._tool_name,
+            status=status,
+        )
+
+
+__all__ = ["ShellMiddleware"]
--- a/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/skills/init.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/skills/init.py
@@ -0,0 +1,21 @@
+"""deepagents CLI를 위한 Skills 모듈.
+
+공개 API:
+- SkillsMiddleware: 기술을 에이전트 실행에 통합하기 위한 미들웨어
+- execute_skills_command: 기술 하위 명령(list/create/info) 실행
+- setup_skills_parser: 기술 명령을 위한 argparse 설정
+
+기타 모든 구성 요소는 내부 구현 세부 사항입니다.
+"""
+
+from deepagents_cli.skills.commands import (
+    execute_skills_command,
+    setup_skills_parser,
+)
+from deepagents_cli.skills.middleware import SkillsMiddleware
+
+__all__ = [
+    "SkillsMiddleware",
+    "execute_skills_command",
+    "setup_skills_parser",
+]
--- a/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/skills/commands.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/skills/commands.py
@@ -0,0 +1,486 @@
+"""기술 관리를 위한 CLI 명령.
+
+이 명령들은 cli.py를 통해 CLI에 등록됩니다:
+- deepagents skills list --agent <agent> [--project]
+- deepagents skills create <name>
+- deepagents skills info <name>
+"""
+
+import argparse
+import re
+from pathlib import Path
+from typing import Any
+
+from deepagents_cli.config import COLORS, Settings, console
+from deepagents_cli.skills.load import MAX_SKILL_NAME_LENGTH, list_skills
+
+
+def _validate_name(name: str) -> tuple[bool, str]:
+    """Agent Skills 사양에 따라 이름을 검증합니다.
+
+    요구 사항 (https://agentskills.io/specification):
+    - 최대 64자
+    - 소문자 영숫자와 하이픈만 허용 (a-z, 0-9, -)
+    - 하이픈으로 시작하거나 끝날 수 없음
+    - 연속된 하이픈 허용 안 함
+    - 경로 탐색 시퀀스 허용 안 함
+
+    Args:
+        name: 검증할 이름
+
+    Returns:
+        (유효 여부, 오류 메시지) 튜플. 유효한 경우 오류 메시지는 비어 있습니다.
+    """
+    # 비어 있거나 공백만 있는 이름 확인
+    if not name or not name.strip():
+        return False, "비어 있을 수 없습니다"
+
+    # 길이 확인 (사양: 최대 64자)
+    if len(name) > MAX_SKILL_NAME_LENGTH:
+        return False, "64자를 초과할 수 없습니다"
+
+    # 경로 탐색 시퀀스 확인
+    if ".." in name or "/" in name or "\\" in name:
+        return False, "경로 요소를 포함할 수 없습니다"
+
+    # 사양: 소문자 영숫자와 하이픈만 허용
+    # 패턴 보장: 시작/종료 하이픈 없음, 연속 하이픈 없음
+    if not re.match(r"^[a-z0-9]+(-[a-z0-9]+)*$", name):
+        return (
+            False,
+            "소문자, 숫자, 하이픈만 사용해야 합니다 (대문자, 밑줄 불가능, 하이픈으로 시작하거나 끝날 수 없음)",
+        )
+
+    return True, ""
+
+
+def _validate_skill_path(skill_dir: Path, base_dir: Path) -> tuple[bool, str]:
+    """해결된 기술 디렉토리가 기본 디렉토리 내에 있는지 확인합니다.
+
+    Args:
+        skill_dir: 검증할 기술 디렉토리 경로
+        base_dir: skill_dir을 포함해야 하는 기본 기술 디렉토리
+
+    Returns:
+        (유효 여부, 오류 메시지) 튜플. 유효한 경우 오류 메시지는 비어 있습니다.
+    """
+    try:
+        # 두 경로를 정식 형식으로 해결
+        resolved_skill = skill_dir.resolve()
+        resolved_base = base_dir.resolve()
+
+        # skill_dir이 base_dir 내에 있는지 확인
+        # Python 3.9+인 경우 is_relative_to 사용, 그렇지 않으면 문자열 비교 사용
+        if hasattr(resolved_skill, "is_relative_to"):
+            if not resolved_skill.is_relative_to(resolved_base):
+                return False, f"기술 디렉토리는 {base_dir} 내에 있어야 합니다"
+        else:
+            # 이전 Python 버전을 위한 폴백
+            try:
+                resolved_skill.relative_to(resolved_base)
+            except ValueError:
+                return False, f"기술 디렉토리는 {base_dir} 내에 있어야 합니다"
+
+        return True, ""
+    except (OSError, RuntimeError) as e:
+        return False, f"잘못된 경로: {e}"
+
+
+def _list(agent: str, *, project: bool = False) -> None:
+    """지정된 에이전트에 대해 사용 가능한 모든 기술을 나열합니다.
+
+    Args:
+        agent: 기술을 위한 에이전트 식별자 (기본값: agent).
+        project: True인 경우 프로젝트 기술만 표시합니다.
+            False인 경우 모든 기술(사용자 + 프로젝트)을 표시합니다.
+    """
+    settings = Settings.from_environment()
+    user_skills_dir = settings.get_user_skills_dir(agent)
+    project_skills_dir = settings.get_project_skills_dir()
+
+    # --project 플래그가 사용된 경우 프로젝트 기술만 표시
+    if project:
+        if not project_skills_dir:
+            console.print("[yellow]프로젝트 디렉토리가 아닙니다.[/yellow]")
+            console.print(
+                "[dim]프로젝트 기술을 사용하려면 프로젝트 루트에 .git 디렉토리가 필요합니다.[/dim]",
+                style=COLORS["dim"],
+            )
+            return
+
+        if not project_skills_dir.exists() or not any(project_skills_dir.iterdir()):
+            console.print("[yellow]프로젝트 기술을 찾을 수 없습니다.[/yellow]")
+            console.print(
+                f"[dim]프로젝트 기술을 추가하면 {project_skills_dir}/ 에 생성됩니다.[/dim]",
+                style=COLORS["dim"],
+            )
+            console.print(
+                "\n[dim]프로젝트 기술 생성:\n  deepagents skills create my-skill --project[/dim]",
+                style=COLORS["dim"],
+            )
+            return
+
+        skills = list_skills(user_skills_dir=None, project_skills_dir=project_skills_dir)
+        console.print("\n[bold]프로젝트 기술:[/bold]\n", style=COLORS["primary"])
+    else:
+        # 사용자 및 프로젝트 기술 모두 로드
+        skills = list_skills(user_skills_dir=user_skills_dir, project_skills_dir=project_skills_dir)
+
+        if not skills:
+            console.print("[yellow]기술을 찾을 수 없습니다.[/yellow]")
+            console.print(
+                "[dim]기술을 추가하면 ~/.deepagents/agent/skills/ 에 생성됩니다.[/dim]",
+                style=COLORS["dim"],
+            )
+            console.print(
+                "\n[dim]첫 번째 기술 생성:\n  deepagents skills create my-skill[/dim]",
+                style=COLORS["dim"],
+            )
+            return
+
+        console.print("\n[bold]사용 가능한 기술:[/bold]\n", style=COLORS["primary"])
+
+    # 출처별로 기술 그룹화
+    user_skills = [s for s in skills if s["source"] == "user"]
+    project_skills_list = [s for s in skills if s["source"] == "project"]
+
+    # 사용자 기술 표시
+    if user_skills and not project:
+        console.print("[bold cyan]사용자 기술:[/bold cyan]", style=COLORS["primary"])
+        for skill in user_skills:
+            skill_path = Path(skill["path"])
+            console.print(f"  • [bold]{skill['name']}[/bold]", style=COLORS["primary"])
+            console.print(f"    {skill['description']}", style=COLORS["dim"])
+            console.print(f"    위치: {skill_path.parent}/", style=COLORS["dim"])
+            console.print()
+
+    # 프로젝트 기술 표시
+    if project_skills_list:
+        if not project and user_skills:
+            console.print()
+        console.print("[bold green]프로젝트 기술:[/bold green]", style=COLORS["primary"])
+        for skill in project_skills_list:
+            skill_path = Path(skill["path"])
+            console.print(f"  • [bold]{skill['name']}[/bold]", style=COLORS["primary"])
+            console.print(f"    {skill['description']}", style=COLORS["dim"])
+            console.print(f"    위치: {skill_path.parent}/", style=COLORS["dim"])
+            console.print()
+
+
+def _create(skill_name: str, agent: str, project: bool = False) -> None:
+    """템플릿 SKILL.md 파일을 사용하여 새 기술을 생성합니다.
+
+    Args:
+        skill_name: 생성할 기술의 이름.
+        agent: 기술을 위한 에이전트 식별자
+        project: True인 경우 프로젝트 기술 디렉토리에 생성합니다.
+            False인 경우 사용자 기술 디렉토리에 생성합니다.
+    """
+    # 기술 이름 먼저 검증 (Agent Skills 사양에 따름)
+    is_valid, error_msg = _validate_name(skill_name)
+    if not is_valid:
+        console.print(f"[bold red]오류:[/bold red] 잘못된 기술 이름: {error_msg}")
+        console.print(
+            "[dim]Agent Skills 사양에 따라: 이름은 소문자 영숫자와 하이픈만 사용해야 합니다.\n"
+            "예시: web-research, code-review, data-analysis[/dim]",
+            style=COLORS["dim"],
+        )
+        return
+
+    # 대상 디렉토리 결정
+    settings = Settings.from_environment()
+    if project:
+        if not settings.project_root:
+            console.print("[bold red]오류:[/bold red] 프로젝트 디렉토리가 아닙니다.")
+            console.print(
+                "[dim]프로젝트 기술을 사용하려면 프로젝트 루트에 .git 디렉토리가 필요합니다.[/dim]",
+                style=COLORS["dim"],
+            )
+            return
+        skills_dir = settings.ensure_project_skills_dir()
+    else:
+        skills_dir = settings.ensure_user_skills_dir(agent)
+
+    skill_dir = skills_dir / skill_name
+
+    # 해결된 경로가 skills_dir 내에 있는지 확인
+    is_valid_path, path_error = _validate_skill_path(skill_dir, skills_dir)
+    if not is_valid_path:
+        console.print(f"[bold red]오류:[/bold red] {path_error}")
+        return
+
+    if skill_dir.exists():
+        console.print(f"[bold red]오류:[/bold red] '{skill_name}' 기술이 이미 {skill_dir} 에 존재합니다")
+        return
+
+    # 기술 디렉토리 생성
+    skill_dir.mkdir(parents=True, exist_ok=True)
+
+    # 템플릿 SKILL.md 생성 (사양: https://agentskills.io/specification)
+    template = f"""---
+name: {skill_name}
+description: 이 기술이 수행하는 작업과 사용 시기에 대한 간략한 설명.
+# Agent Skills 사양에 따른 선택적 필드:
+# license: Apache-2.0
+# compatibility: Designed for deepagents CLI
+# metadata:
+#   author: your-org
+#   version: "1.0"
+# allowed-tools: Bash(git:*) Read
+---
+
+# {skill_name.title().replace("-", " ")} 기술
+
+## 설명
+
+[이 기술이 수행하는 작업과 사용해야 하는 시기에 대한 자세한 설명을 제공하십시오]
+
+## 사용 시기
+
+- [시나리오 1: 사용자가 ...를 요청할 때]
+- [시나리오 2: ...가 필요할 때]
+- [시나리오 3: 태스크에 ...가 포함될 때]
+
+## 사용 방법
+
+### 1단계: [첫 번째 작업]
+[먼저 수행할 작업을 설명하십시오]
+
+### 2단계: [두 번째 작업]
+[다음에 수행할 작업을 설명하십시오]
+
+### 3단계: [최종 작업]
+[태스크를 완료하는 방법을 설명하십시오]
+
+## 권장 사항
+
+- [권장 사항 1]
+- [권장 사항 2]
+- [권장 사항 3]
+
+## 지원 파일
+
+이 기술 디렉토리에는 지침에서 참조하는 지원 파일이 포함될 수 있습니다:
+- `helper.py` - 자동화를 위한 Python 스크립트
+- `config.json` - 설정 파일
+- `reference.md` - 추가 참조 문서
+
+## 예시
+
+### 예시 1: [시나리오 이름]
+
+**사용자 요청:** "[사용자 요청 예시]"
+
+**접근 방식:**
+1. [단계별 분석]
+2. [도구 및 명령 사용]
+3. [예상 결과]
+
+### 예시 2: [다른 시나리오]
+
+**사용자 요청:** "[다른 예시]"
+
+**접근 방식:**
+1. [다른 접근 방식]
+2. [관련 명령]
+3. [예상 결과]
+
+## 참고 사항
+
+- [추가 팁, 경고 또는 컨텍스트]
+- [알려진 제한 사항 또는 예외 케이스]
+- [도움이 되는 외부 리소스 링크]
+"""
+
+    skill_md = skill_dir / "SKILL.md"
+    skill_md.write_text(template)
+
+    console.print(f"✓ '{skill_name}' 기술이 성공적으로 생성되었습니다!", style=COLORS["primary"])
+    console.print(f"위치: {skill_dir}\n", style=COLORS["dim"])
+    console.print(
+        "[dim]SKILL.md 파일을 편집하여 사용자 정의하십시오:\n"
+        "  1. YAML frontmatter에서 설명을 업데이트하십시오\n"
+        "  2. 지침과 예시를 채우십시오\n"
+        "  3. 지원 파일(스크립트, 설정 등)을 추가하십시오\n"
+        "\n"
+        f"  nano {skill_md}\n"
+        "\n"
+        "💡 기술 예시는 deepagents 저장소의 examples/skills/ 를 참조하십시오:\n"
+        "   - web-research: 구조화된 연구 워크플로우\n"
+        "   - langgraph-docs: LangGraph 문서 조회\n"
+        "\n"
+        "   예시 복사: cp -r examples/skills/web-research ~/.deepagents/agent/skills/\n",
+        style=COLORS["dim"],
+    )
+
+
+def _info(skill_name: str, *, agent: str = "agent", project: bool = False) -> None:
+    """특정 기술에 대한 자세한 정보를 표시합니다.
+
+    Args:
+        skill_name: 세부 정보를 표시할 기술의 이름.
+        agent: 기술을 위한 에이전트 식별자 (기본값: agent).
+        project: True인 경우 프로젝트 기술만 검색합니다. False인 경우 사용자 및 프로젝트 기술 모두에서 검색합니다.
+    """
+    settings = Settings.from_environment()
+    user_skills_dir = settings.get_user_skills_dir(agent)
+    project_skills_dir = settings.get_project_skills_dir()
+
+    # --project 플래그에 따라 기술 로드
+    if project:
+        if not project_skills_dir:
+            console.print("[bold red]오류:[/bold red] 프로젝트 디렉토리가 아닙니다.")
+            return
+        skills = list_skills(user_skills_dir=None, project_skills_dir=project_skills_dir)
+    else:
+        skills = list_skills(user_skills_dir=user_skills_dir, project_skills_dir=project_skills_dir)
+
+    # 기술 찾기
+    skill = next((s for s in skills if s["name"] == skill_name), None)
+
+    if not skill:
+        console.print(f"[bold red]오류:[/bold red] '{skill_name}' 기술을 찾을 수 없습니다.")
+        console.print("\n[dim]사용 가능한 기술:[/dim]", style=COLORS["dim"])
+        for s in skills:
+            console.print(f"  - {s['name']}", style=COLORS["dim"])
+        return
+
+    # 전체 SKILL.md 파일 읽기
+    skill_path = Path(skill["path"])
+    skill_content = skill_path.read_text()
+
+    # 출처 레이블 결정
+    source_label = "프로젝트 기술" if skill["source"] == "project" else "사용자 기술"
+    source_color = "green" if skill["source"] == "project" else "cyan"
+
+    console.print(
+        f"\n[bold]기술: {skill['name']}[/bold] [bold {source_color}]({source_label})[/bold {source_color}]\n",
+        style=COLORS["primary"],
+    )
+    console.print(f"[bold]설명:[/bold] {skill['description']}\n", style=COLORS["dim"])
+    console.print(f"[bold]위치:[/bold] {skill_path.parent}/\n", style=COLORS["dim"])
+
+    # 지원 파일 나열
+    skill_dir = skill_path.parent
+    supporting_files = [f for f in skill_dir.iterdir() if f.name != "SKILL.md"]
+
+    if supporting_files:
+        console.print("[bold]지원 파일:[/bold]", style=COLORS["dim"])
+        for file in supporting_files:
+            console.print(f"  - {file.name}", style=COLORS["dim"])
+        console.print()
+
+    # 전체 SKILL.md 내용 표시
+    console.print("[bold]전체 SKILL.md 내용:[/bold]\n", style=COLORS["primary"])
+    console.print(skill_content, style=COLORS["dim"])
+    console.print()
+
+
+def setup_skills_parser(
+    subparsers: Any,
+) -> argparse.ArgumentParser:
+    """모든 하위 명령과 함께 기술 하위 명령 파서를 설정합니다."""
+    skills_parser = subparsers.add_parser(
+        "skills",
+        help="에이전트 기술 관리",
+        description="에이전트 기술 관리 - 기술 정보 생성, 나열 및 보기",
+    )
+    skills_subparsers = skills_parser.add_subparsers(dest="skills_command", help="기술 명령")
+
+    # 기술 목록
+    list_parser = skills_subparsers.add_parser(
+        "list", help="사용 가능한 모든 기술 나열", description="사용 가능한 모든 기술 나열"
+    )
+    list_parser.add_argument(
+        "--agent",
+        default="agent",
+        help="기술을 위한 에이전트 식별자 (기본값: agent)",
+    )
+    list_parser.add_argument(
+        "--project",
+        action="store_true",
+        help="프로젝트 수준 기술만 표시",
+    )
+
+    # 기술 생성
+    create_parser = skills_subparsers.add_parser(
+        "create",
+        help="새 기술 생성",
+        description="템플릿 SKILL.md 파일을 사용하여 새 기술 생성",
+    )
+    create_parser.add_argument("name", help="생성할 기술 이름 (예: web-research)")
+    create_parser.add_argument(
+        "--agent",
+        default="agent",
+        help="기술을 위한 에이전트 식별자 (기본값: agent)",
+    )
+    create_parser.add_argument(
+        "--project",
+        action="store_true",
+        help="사용자 디렉토리 대신 프로젝트 디렉토리에 기술 생성",
+    )
+
+    # 기술 정보
+    info_parser = skills_subparsers.add_parser(
+        "info",
+        help="기술에 대한 자세한 정보 표시",
+        description="특정 기술에 대한 자세한 정보 표시",
+    )
+    info_parser.add_argument("name", help="정보를 표시할 기술 이름")
+    info_parser.add_argument(
+        "--agent",
+        default="agent",
+        help="기술을 위한 에이전트 식별자 (기본값: agent)",
+    )
+    info_parser.add_argument(
+        "--project",
+        action="store_true",
+        help="프로젝트 기술만 검색",
+    )
+    return skills_parser
+
+
+def execute_skills_command(args: argparse.Namespace) -> None:
+    """파싱된 인수를 기반으로 기술 하위 명령을 실행합니다.
+
+    Args:
+        args: skills_command 속성이 있는 파싱된 명령줄 인수
+    """
+    # agent 인수 검증
+    if args.agent:
+        is_valid, error_msg = _validate_name(args.agent)
+        if not is_valid:
+            console.print(f"[bold red]오류:[/bold red] 잘못된 에이전트 이름: {error_msg}")
+            console.print(
+                "[dim]에이전트 이름은 영문자, 숫자, 하이픈 및 밑줄만 포함할 수 있습니다.[/dim]",
+                style=COLORS["dim"],
+            )
+            return
+
+    if args.skills_command == "list":
+        _list(agent=args.agent, project=args.project)
+    elif args.skills_command == "create":
+        _create(args.name, agent=args.agent, project=args.project)
+    elif args.skills_command == "info":
+        _info(args.name, agent=args.agent, project=args.project)
+    else:
+        # 하위 명령이 제공되지 않은 경우 도움말 표시
+        console.print("[yellow]기술 하위 명령을 지정하십시오: list, create, 또는 info[/yellow]")
+        console.print("\n[bold]사용법:[/bold]", style=COLORS["primary"])
+        console.print("  deepagents skills <command> [options]\n")
+        console.print("[bold]사용 가능한 명령:[/bold]", style=COLORS["primary"])
+        console.print("  list              사용 가능한 모든 기술 나열")
+        console.print("  create <name>     새 기술 생성")
+        console.print("  info <name>       기술에 대한 자세한 정보 표시")
+        console.print("\n[bold]예시:[/bold]", style=COLORS["primary"])
+        console.print("  deepagents skills list")
+        console.print("  deepagents skills create web-research")
+        console.print("  deepagents skills info web-research")
+        console.print("\n[dim]특정 명령에 대한 추가 도움말:[/dim]", style=COLORS["dim"])
+        console.print("  deepagents skills <command> --help", style=COLORS["dim"])
+
+
+__all__ = [
+    "execute_skills_command",
+    "setup_skills_parser",
+]
--- a/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/skills/load.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/skills/load.py
@@ -0,0 +1,319 @@
+"""SKILL.md 파일에서 에이전트 기술을 파싱하고 로드하기 위한 기술 로더.
+
+이 모듈은 YAML frontmatter 파싱을 통해 Anthropic의 에이전트 기술 패턴을 구현합니다.
+각 기술은 다음을 포함하는 SKILL.md 파일이 있는 디렉토리입니다:
+- YAML frontmatter (이름, 설명 필수)
+- 에이전트를 위한 마크다운 지침
+- 선택적 지원 파일 (스크립트, 설정 등)
+
+SKILL.md 구조 예시:
+```markdown
+---
+name: web-research
+description: 철저한 웹 조사를 수행하기 위한 구조화된 접근 방식
+---
+
+# 웹 조사 기술
+
+## 사용 시기
+- 사용자가 주제 조사를 요청할 때
+...
+```
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+from typing import TYPE_CHECKING, NotRequired, TypedDict
+
+import yaml
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+# SKILL.md 파일의 최대 크기 (10MB)
+MAX_SKILL_FILE_SIZE = 10 * 1024 * 1024
+
+# Agent Skills 사양 제약 조건 (https://agentskills.io/specification)
+MAX_SKILL_NAME_LENGTH = 64
+MAX_SKILL_DESCRIPTION_LENGTH = 1024
+
+
+class SkillMetadata(TypedDict):
+    """Agent Skills 사양(https://agentskills.io/specification)에 따른 기술 메타데이터."""
+
+    name: str
+    """기술 이름 (최대 64자, 소문자 영숫자와 하이픈)."""
+
+    description: str
+    """기술이 수행하는 작업에 대한 설명 (최대 1024자)."""
+
+    path: str
+    """SKILL.md 파일 경로."""
+
+    source: str
+    """기술의 출처 ('user' 또는 'project')."""
+
+    # Agent Skills 사양에 따른 선택적 필드
+    license: NotRequired[str | None]
+    """라이선스 이름 또는 번들로 제공되는 라이선스 파일에 대한 참조."""
+
+    compatibility: NotRequired[str | None]
+    """환경 요구 사항 (최대 500자)."""
+
+    metadata: NotRequired[dict[str, str] | None]
+    """추가 메타데이터를 위한 임의의 키-값 매핑."""
+
+    allowed_tools: NotRequired[str | None]
+    """사전 승인된 도구의 공백으로 구분된 목록."""
+
+
+def _is_safe_path(path: Path, base_dir: Path) -> bool:
+    """경로가 base_dir 내에 안전하게 포함되어 있는지 확인합니다.
+
+    심볼릭 링크나 경로 조작을 통한 디렉토리 탐색 공격을 방지합니다.
+    이 함수는 두 경로를 정식 형식(심볼릭 링크 따름)으로 해결하고,
+    대상 경로가 기본 디렉토리 내에 있는지 확인합니다.
+
+    Args:
+        path: 검증할 경로
+        base_dir: 경로를 포함해야 하는 기본 디렉토리
+
+    Returns:
+        경로가 base_dir 내에 안전하게 있으면 True, 그렇지 않으면 False
+
+    예시:
+        >>> base = Path("/home/user/.deepagents/skills")
+        >>> safe = Path("/home/user/.deepagents/skills/web-research/SKILL.md")
+        >>> unsafe = Path("/home/user/.deepagents/skills/../../.ssh/id_rsa")
+        >>> _is_safe_path(safe, base)
+        True
+        >>> _is_safe_path(unsafe, base)
+        False
+    """
+    try:
+        # 두 경로를 정식 형식으로 해결 (심볼릭 링크 따름)
+        resolved_path = path.resolve()
+        resolved_base = base_dir.resolve()
+
+        # 해결된 경로가 기본 디렉토리 내에 있는지 확인
+        # 이는 기본 디렉토리 외부를 가리키는 심볼릭 링크를 포착함
+        resolved_path.relative_to(resolved_base)
+        return True
+    except ValueError:
+        # 경로가 base_dir의 하위가 아님 (디렉토리 외부)
+        return False
+    except (OSError, RuntimeError):
+        # 경로 해결 중 오류 발생 (예: 순환 심볼릭 링크, 너무 많은 수준)
+        return False
+
+
+def _validate_skill_name(name: str, directory_name: str) -> tuple[bool, str]:
+    """Agent Skills 사양에 따라 기술 이름을 검증합니다.
+
+    요구 사항:
+    - 최대 64자
+    - 소문자 영숫자와 하이픈만 허용 (a-z, 0-9, -)
+    - 하이픈으로 시작하거나 끝날 수 없음
+    - 연속된 하이픈 허용 안 함
+    - 상위 디렉토리 이름과 일치해야 함
+
+    Args:
+        name: YAML frontmatter의 기술 이름.
+        directory_name: 상위 디렉토리 이름.
+
+    Returns:
+        (유효 여부, 오류 메시지) 튜플. 유효한 경우 오류 메시지는 비어 있습니다.
+    """
+    if not name:
+        return False, "이름은 필수입니다"
+    if len(name) > MAX_SKILL_NAME_LENGTH:
+        return False, "이름이 64자를 초과합니다"
+    # 패턴: 소문자 영숫자, 세그먼트 사이의 단일 하이픈, 시작/종료 하이픈 없음
+    if not re.match(r"^[a-z0-9]+(-[a-z0-9]+)*$", name):
+        return False, "이름은 소문자 영숫자와 단일 하이픈만 사용해야 합니다"
+    if name != directory_name:
+        return False, f"이름 '{name}'은 디렉토리 이름 '{directory_name}'과 일치해야 합니다"
+    return True, ""
+
+
+def _parse_skill_metadata(skill_md_path: Path, source: str) -> SkillMetadata | None:
+    """Agent Skills 사양에 따라 SKILL.md 파일에서 YAML frontmatter를 파싱합니다.
+
+    Args:
+        skill_md_path: SKILL.md 파일 경로.
+        source: 기술 출처 ('user' 또는 'project').
+
+    Returns:
+        모든 필드가 포함된 SkillMetadata, 파싱 실패 시 None.
+    """
+    try:
+        # 보안: DoS 공격 방지를 위해 파일 크기 확인
+        file_size = skill_md_path.stat().st_size
+        if file_size > MAX_SKILL_FILE_SIZE:
+            logger.warning("건너뛰는 중 %s: 파일이 너무 큼 (%d 바이트)", skill_md_path, file_size)
+            return None
+
+        content = skill_md_path.read_text(encoding="utf-8")
+
+        # --- 구분 기호 사이의 YAML frontmatter 매칭
+        frontmatter_pattern = r"^---\s*\n(.*?)\n---\s*\n"
+        match = re.match(frontmatter_pattern, content, re.DOTALL)
+
+        if not match:
+            logger.warning("건너뛰는 중 %s: 유효한 YAML frontmatter를 찾을 수 없음", skill_md_path)
+            return None
+
+        frontmatter_str = match.group(1)
+
+        # 적절한 중첩 구조 지원을 위해 safe_load를 사용하여 YAML 파싱
+        try:
+            frontmatter_data = yaml.safe_load(frontmatter_str)
+        except yaml.YAMLError as e:
+            logger.warning("%s의 잘못된 YAML: %s", skill_md_path, e)
+            return None
+
+        if not isinstance(frontmatter_data, dict):
+            logger.warning("건너뛰는 중 %s: frontmatter가 매핑이 아님", skill_md_path)
+            return None
+
+        # 필수 필드 검증
+        name = frontmatter_data.get("name")
+        description = frontmatter_data.get("description")
+
+        if not name or not description:
+            logger.warning("건너뛰는 중 %s: 필수 'name' 또는 'description'이 누락됨", skill_md_path)
+            return None
+
+        # 사양에 따라 이름 형식 검증 (경고하지만 하위 호환성을 위해 로드함)
+        directory_name = skill_md_path.parent.name
+        is_valid, error = _validate_skill_name(str(name), directory_name)
+        if not is_valid:
+            logger.warning(
+                "%s의 '%s' 기술이 Agent Skills 사양을 따르지 않음: %s. "
+                "사양을 준수하도록 이름을 변경하는 것을 고려하십시오.",
+                skill_md_path,
+                name,
+                error,
+            )
+
+        # 설명 길이 검증 (사양: 최대 1024자)
+        description_str = str(description)
+        if len(description_str) > MAX_SKILL_DESCRIPTION_LENGTH:
+            logger.warning(
+                "%s의 설명이 %d자를 초과하여 잘림",
+                skill_md_path,
+                MAX_SKILL_DESCRIPTION_LENGTH,
+            )
+            description_str = description_str[:MAX_SKILL_DESCRIPTION_LENGTH]
+
+        return SkillMetadata(
+            name=str(name),
+            description=description_str,
+            path=str(skill_md_path),
+            source=source,
+            license=frontmatter_data.get("license"),
+            compatibility=frontmatter_data.get("compatibility"),
+            metadata=frontmatter_data.get("metadata"),
+            allowed_tools=frontmatter_data.get("allowed-tools"),
+        )
+
+    except (OSError, UnicodeDecodeError) as e:
+        logger.warning("%s 읽기 오류: %s", skill_md_path, e)
+        return None
+
+
+def _list_skills(skills_dir: Path, source: str) -> list[SkillMetadata]:
+    """단일 기술 디렉토리에서 모든 기술을 나열합니다(내부 헬퍼).
+
+    기술 디렉토리에서 SKILL.md 파일이 포함된 하위 디렉토리를 스캔하고,
+    YAML frontmatter를 파싱하여 기술 메타데이터를 반환합니다.
+
+    기술 조직 구성:
+    skills/
+    ├── skill-name/
+    │   ├── SKILL.md        # 필수: YAML frontmatter가 있는 지침
+    │   ├── script.py       # 선택 사항: 지원 파일
+    │   └── config.json     # 선택 사항: 지원 파일
+
+    Args:
+        skills_dir: 기술 디렉토리 경로.
+        source: 기술 출처 ('user' 또는 'project').
+
+    Returns:
+        이름, 설명, 경로 및 출처가 포함된 기술 메타데이터 딕셔너리 목록.
+    """
+    # 기술 디렉토리 존재 여부 확인
+    skills_dir = skills_dir.expanduser()
+    if not skills_dir.exists():
+        return []
+
+    # 보안 검사를 위해 기본 디렉토리를 정식 경로로 해결
+    try:
+        resolved_base = skills_dir.resolve()
+    except (OSError, RuntimeError):
+        # 기본 디렉토리를 해결할 수 없음, 안전하게 종료
+        return []
+
+    skills: list[SkillMetadata] = []
+
+    # 하위 디렉토리 순회
+    for skill_dir in skills_dir.iterdir():
+        # 보안: 기술 디렉토리 외부를 가리키는 심볼릭 링크 포착
+        if not _is_safe_path(skill_dir, resolved_base):
+            continue
+
+        if not skill_dir.is_dir():
+            continue
+
+        # SKILL.md 파일 찾기
+        skill_md_path = skill_dir / "SKILL.md"
+        if not skill_md_path.exists():
+            continue
+
+        # 보안: 읽기 전에 SKILL.md 경로가 안전한지 검증
+        # 이는 외부를 가리키는 심볼릭 링크인 SKILL.md 파일을 포착함
+        if not _is_safe_path(skill_md_path, resolved_base):
+            continue
+
+        # 메타데이터 파싱
+        metadata = _parse_skill_metadata(skill_md_path, source=source)
+        if metadata:
+            skills.append(metadata)
+
+    return skills
+
+
+def list_skills(*, user_skills_dir: Path | None = None, project_skills_dir: Path | None = None) -> list[SkillMetadata]:
+    """사용자 및/또는 프로젝트 디렉토리에서 기술을 나열합니다.
+
+    두 디렉토리가 모두 제공되면 사용자 기술과 이름이 동일한 프로젝트 기술이
+    사용자 기술을 오버라이드합니다.
+
+    Args:
+        user_skills_dir: 사용자 수준 기술 디렉토리 경로.
+        project_skills_dir: 프로젝트 수준 기술 디렉토리 경로.
+
+    Returns:
+        두 출처의 기술 메타데이터가 병합된 목록이며, 이름이 충돌할 경우
+        프로젝트 기술이 사용자 기술보다 우선합니다.
+    """
+    all_skills: dict[str, SkillMetadata] = {}
+
+    # 사용자 기술 먼저 로드 (기본)
+    if user_skills_dir:
+        user_skills = _list_skills(user_skills_dir, source="user")
+        for skill in user_skills:
+            all_skills[skill["name"]] = skill
+
+    # 프로젝트 기술 두 번째로 로드 (오버라이드/확장)
+    if project_skills_dir:
+        project_skills = _list_skills(project_skills_dir, source="project")
+        for skill in project_skills:
+            # 프로젝트 기술은 이름이 같은 사용자 기술을 오버라이드함
+            all_skills[skill["name"]] = skill
+
+    return list(all_skills.values())
--- a/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/skills/middleware.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/skills/middleware.py
@@ -0,0 +1,273 @@
+"""에이전트 기술을 시스템 프롬프트에 로드하고 노출하기 위한 미들웨어.
+
+이 미들웨어는 점진적 노출(progressive disclosure)을 통해 Anthropic의 "Agent Skills" 패턴을 구현합니다:
+1. 세션 시작 시 SKILL.md 파일에서 YAML frontmatter 파싱
+2. 시스템 프롬프트에 기술 메타데이터(이름 + 설명) 주입
+3. 에이전트는 작업과 관련이 있을 때 SKILL.md의 전체 내용을 읽음
+
+기술 디렉토리 구조 (에이전트별 + 프로젝트):
+사용자 수준: ~/.deepagents/{AGENT_NAME}/skills/
+프로젝트 수준: {PROJECT_ROOT}/.deepagents/skills/
+
+구조 예시:
+~/.deepagents/{AGENT_NAME}/skills/
+├── web-research/
+│   ├── SKILL.md        # 필수: YAML frontmatter + 지침
+│   └── helper.py       # 선택 사항: 지원 파일
+├── code-review/
+│   ├── SKILL.md
+│   └── checklist.md
+
+.deepagents/skills/
+├── project-specific/
+│   └── SKILL.md        # 프로젝트 전용 기술
+"""
+
+from collections.abc import Awaitable, Callable
+from pathlib import Path
+from typing import NotRequired, TypedDict, cast
+
+from langchain.agents.middleware.types import (
+    AgentMiddleware,
+    AgentState,
+    ModelRequest,
+    ModelResponse,
+)
+from langgraph.runtime import Runtime
+
+from deepagents_cli.skills.load import SkillMetadata, list_skills
+
+
+class SkillsState(AgentState):
+    """기술 미들웨어를 위한 상태."""
+
+    skills_metadata: NotRequired[list[SkillMetadata]]
+    """로드된 기술 메타데이터 목록 (이름, 설명, 경로)."""
+
+
+class SkillsStateUpdate(TypedDict):
+    """기술 미들웨어를 위한 상태 업데이트."""
+
+    skills_metadata: list[SkillMetadata]
+    """로드된 기술 메타데이터 목록 (이름, 설명, 경로)."""
+
+
+# 기술 시스템 문서
+SKILLS_SYSTEM_PROMPT = """
+
+## 기술 시스템 (Skills System)
+
+당신은 전문적인 능력과 도메인 지식을 제공하는 기술 라이브러리에 접근할 수 있습니다.
+
+{skills_locations}
+
+**사용 가능한 기술:**
+
+{skills_list}
+
+**기술 사용 방법 (점진적 노출):**
+
+기술은 **점진적 노출(progressive disclosure)** 패턴을 따릅니다. 당신은 기술이 존재한다는 것(위의 이름 + 설명)은 알고 있지만, 필요할 때만 전체 지침을 읽습니다:
+
+1. **기술이 적용되는 시기 파악**: 사용자의 작업이 기술의 설명과 일치하는지 확인하십시오.
+2. **기술의 전체 지침 읽기**: 위의 기술 목록은 read_file과 함께 사용할 정확한 경로를 보여줍니다.
+3. **기술의 지침 따르기**: SKILL.md에는 단계별 워크플로우, 권장 사항 및 예시가 포함되어 있습니다.
+4. **지원 파일 접근**: 기술에는 Python 스크립트, 설정 또는 참조 문서가 포함될 수 있습니다. 절대 경로를 사용하십시오.
+
+**기술을 사용해야 하는 경우:**
+- 사용자의 요청이 기술의 도메인과 일치할 때 (예: "X 조사해줘" → web-research 기술)
+- 전문 지식이나 구조화된 워크플로우가 필요할 때
+- 기술이 복잡한 작업에 대해 검증된 패턴을 제공할 때
+
+**기술은 자체 문서화됨:**
+- 각 SKILL.md는 기술이 수행하는 작업과 사용 방법을 정확하게 알려줍니다.
+- 위의 기술 목록은 각 기술의 SKILL.md 파일에 대한 전체 경로를 보여줍니다.
+
+**기술 스크립트 실행:**
+기술에는 Python 스크립트나 기타 실행 파일이 포함될 수 있습니다. 항상 기술 목록의 절대 경로를 사용하십시오.
+
+**워크플로우 예시:**
+
+사용자: "양자 컴퓨팅의 최신 개발 동향을 조사해 줄 수 있어?"
+
+1. 위에서 사용 가능한 기술 확인 → 전체 경로와 함께 "web-research" 기술 확인
+2. 목록에 표시된 경로를 사용하여 기술 읽기
+3. 기술의 조사 워크플로우 따르기 (조사 → 정리 → 합성)
+4. 절대 경로와 함께 헬퍼 스크립트 사용
+
+주의: 기술은 당신을 더 유능하고 일관성 있게 만드는 도구입니다. 의심스러울 때는 해당 작업에 대한 기술이 있는지 확인하십시오!
+"""
+
+
+class SkillsMiddleware(AgentMiddleware):
+    """에이전트 기술을 로드하고 노출하기 위한 미들웨어.
+
+    이 미들웨어는 Anthropic의 에이전트 기술 패턴을 구현합니다:
+    - 세션 시작 시 YAML frontmatter에서 기술 메타데이터(이름, 설명)를 로드함
+    - 발견 가능성을 위해 시스템 프롬프트에 기술 목록을 주입함
+    - 기술이 관련 있을 때 에이전트가 전체 SKILL.md 내용을 읽음 (점진적 노출)
+
+    사용자 수준 및 프로젝트 수준 기술을 모두 지원합니다:
+    - 사용자 기술: ~/.deepagents/{AGENT_NAME}/skills/
+    - 프로젝트 기술: {PROJECT_ROOT}/.deepagents/skills/
+    - 프로젝트 기술은 이름이 같은 사용자 기술을 오버라이드함
+
+    Args:
+        skills_dir: 사용자 수준 기술 디렉토리 경로 (에이전트별).
+        assistant_id: 프롬프트의 경로 참조를 위한 에이전트 식별자.
+        project_skills_dir: 선택적인 프로젝트 수준 기술 디렉토리 경로.
+    """
+
+    state_schema = SkillsState
+
+    def __init__(
+        self,
+        *,
+        skills_dir: str | Path,
+        assistant_id: str,
+        project_skills_dir: str | Path | None = None,
+    ) -> None:
+        """기술 미들웨어를 초기화합니다.
+
+        Args:
+            skills_dir: 사용자 수준 기술 디렉토리 경로.
+            assistant_id: 에이전트 식별자.
+            project_skills_dir: 선택적인 프로젝트 수준 기술 디렉토리 경로.
+        """
+        self.skills_dir = Path(skills_dir).expanduser()
+        self.assistant_id = assistant_id
+        self.project_skills_dir = Path(project_skills_dir).expanduser() if project_skills_dir else None
+        # 프롬프트 표시를 위한 경로 저장
+        self.user_skills_display = f"~/.deepagents/{assistant_id}/skills"
+        self.system_prompt_template = SKILLS_SYSTEM_PROMPT
+
+    def _format_skills_locations(self) -> str:
+        """시스템 프롬프트 표시를 위해 기술 위치 형식을 지정합니다."""
+        locations = [f"**사용자 기술**: `{self.user_skills_display}`"]
+        if self.project_skills_dir:
+            locations.append(f"**프로젝트 기술**: `{self.project_skills_dir}` (사용자 기술을 오버라이드함)")
+        return "\n".join(locations)
+
+    def _format_skills_list(self, skills: list[SkillMetadata]) -> str:
+        """시스템 프롬프트 표시를 위해 기술 메타데이터 형식을 지정합니다."""
+        if not skills:
+            locations = [f"{self.user_skills_display}/"]
+            if self.project_skills_dir:
+                locations.append(f"{self.project_skills_dir}/")
+            return f"(현재 사용 가능한 기술이 없습니다. {' 또는 '.join(locations)} 에 기술을 생성할 수 있습니다)"
+
+        # 출처별로 기술 그룹화
+        user_skills = [s for s in skills if s["source"] == "user"]
+        project_skills = [s for s in skills if s["source"] == "project"]
+
+        lines = []
+
+        # 사용자 기술 표시
+        if user_skills:
+            lines.append("**사용자 기술:**")
+            for skill in user_skills:
+                lines.append(f"- **{skill['name']}**: {skill['description']}")
+                lines.append(f"  → 전체 지침을 보려면 `{skill['path']}` 읽기")
+            lines.append("")
+
+        # 프로젝트 기술 표시
+        if project_skills:
+            lines.append("**프로젝트 기술:**")
+            for skill in project_skills:
+                lines.append(f"- **{skill['name']}**: {skill['description']}")
+                lines.append(f"  → 전체 지침을 보려면 `{skill['path']}` 읽기")
+
+        return "\n".join(lines)
+
+    def before_agent(self, state: SkillsState, runtime: Runtime) -> SkillsStateUpdate | None:
+        """에이전트 실행 전 기술 메타데이터를 로드합니다.
+
+        이는 사용자 수준 및 프로젝트 수준 디렉토리 모두에서 사용 가능한 기술을 검색하기 위해
+        세션 시작 시 한 번 실행됩니다.
+
+        Args:
+            state: 현재 에이전트 상태.
+            runtime: 런타임 컨텍스트.
+
+        Returns:
+            skills_metadata가 채워진 업데이트된 상태.
+        """
+        # 기술 디렉토리의 변경 사항을 포착하기 위해
+        # 에이전트와의 매 상호 작용마다 기술을 다시 로드합니다.
+        skills = list_skills(
+            user_skills_dir=self.skills_dir,
+            project_skills_dir=self.project_skills_dir,
+        )
+        return SkillsStateUpdate(skills_metadata=skills)
+
+    def wrap_model_call(
+        self,
+        request: ModelRequest,
+        handler: Callable[[ModelRequest], ModelResponse],
+    ) -> ModelResponse:
+        """시스템 프롬프트에 기술 문서를 주입합니다.
+
+        이것은 기술 정보가 항상 사용 가능하도록 매 모델 호출 시 실행됩니다.
+
+        Args:
+            request: 처리 중인 모델 요청.
+            handler: 수정된 요청으로 호출할 핸들러 함수.
+
+        Returns:
+            핸들러의 모델 응답.
+        """
+        # 상태에서 기술 메타데이터 가져오기
+        skills_metadata = request.state.get("skills_metadata", [])
+
+        # 기술 위치 및 목록 형식 지정
+        skills_locations = self._format_skills_locations()
+        skills_list = self._format_skills_list(skills_metadata)
+
+        # 기술 문서 형식 지정
+        skills_section = self.system_prompt_template.format(
+            skills_locations=skills_locations,
+            skills_list=skills_list,
+        )
+
+        if request.system_prompt:
+            system_prompt = request.system_prompt + "\n\n" + skills_section
+        else:
+            system_prompt = skills_section
+
+        return handler(request.override(system_prompt=system_prompt))
+
+    async def awrap_model_call(
+        self,
+        request: ModelRequest,
+        handler: Callable[[ModelRequest], Awaitable[ModelResponse]],
+    ) -> ModelResponse:
+        """(비동기) 시스템 프롬프트에 기술 문서를 주입합니다.
+
+        Args:
+            request: 처리 중인 모델 요청.
+            handler: 수정된 요청으로 호출할 핸들러 함수.
+
+        Returns:
+            핸들러의 모델 응답.
+        """
+        # state_schema로 인해 상태는 SkillsState임이 보장됨
+        state = cast("SkillsState", request.state)
+        skills_metadata = state.get("skills_metadata", [])
+
+        # 기술 위치 및 목록 형식 지정
+        skills_locations = self._format_skills_locations()
+        skills_list = self._format_skills_list(skills_metadata)
+
+        # 기술 문서 형식 지정
+        skills_section = self.system_prompt_template.format(
+            skills_locations=skills_locations,
+            skills_list=skills_list,
+        )
+
+        # 시스템 프롬프트에 주입
+        if request.system_prompt:
+            system_prompt = request.system_prompt + "\n\n" + skills_section
+        else:
+            system_prompt = skills_section
+
+        return await handler(request.override(system_prompt=system_prompt))
--- a/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/token_utils.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/token_utils.py
@@ -0,0 +1,116 @@
+"""Utilities for accurate token counting using LangChain models."""
+
+from pathlib import Path
+
+from langchain_core.messages import SystemMessage
+
+from deepagents_cli.config import console, settings
+
+
+def calculate_baseline_tokens(model, agent_dir: Path, system_prompt: str, assistant_id: str) -> int:
+    """Calculate baseline context tokens using the model's official tokenizer.
+
+    This uses the model's get_num_tokens_from_messages() method to get
+    accurate token counts for the initial context (system prompt + agent.md).
+
+    Note: Tool definitions cannot be accurately counted before the first API call
+    due to LangChain limitations. They will be included in the total after the
+    first message is sent (~5,000 tokens).
+
+    Args:
+        model: LangChain model instance (ChatAnthropic or ChatOpenAI)
+        agent_dir: Path to agent directory containing agent.md
+        system_prompt: The base system prompt string
+        assistant_id: The agent identifier for path references
+
+    Returns:
+        Token count for system prompt + agent.md (tools not included)
+    """
+    # Load user agent.md content
+    agent_md_path = agent_dir / "agent.md"
+    user_memory = ""
+    if agent_md_path.exists():
+        user_memory = agent_md_path.read_text()
+
+    # Load project agent.md content
+    from .config import _find_project_agent_md, _find_project_root
+
+    project_memory = ""
+    project_root = _find_project_root()
+    if project_root:
+        project_md_paths = _find_project_agent_md(project_root)
+        if project_md_paths:
+            try:
+                # Combine all project agent.md files (if multiple exist)
+                contents = []
+                for path in project_md_paths:
+                    contents.append(path.read_text())
+                project_memory = "\n\n".join(contents)
+            except Exception:
+                pass
+
+    # Build the complete system prompt as it will be sent
+    # This mimics what AgentMemoryMiddleware.wrap_model_call() does
+    memory_section = (
+        f"<user_memory>\n{user_memory or '(No user agent.md)'}\n</user_memory>\n\n"
+        f"<project_memory>\n{project_memory or '(No project agent.md)'}\n</project_memory>"
+    )
+
+    # Get the long-term memory system prompt
+    memory_system_prompt = get_memory_system_prompt(
+        assistant_id, project_root, bool(project_memory)
+    )
+
+    # Combine all parts in the same order as the middleware
+    full_system_prompt = memory_section + "\n\n" + system_prompt + "\n\n" + memory_system_prompt
+
+    # Count tokens using the model's official method
+    messages = [SystemMessage(content=full_system_prompt)]
+
+    try:
+        # Note: tools parameter is not supported by LangChain's token counting
+        # Tool tokens will be included in the API response after first message
+        return model.get_num_tokens_from_messages(messages)
+    except Exception as e:
+        # Fallback if token counting fails
+        console.print(f"[yellow]Warning: Could not calculate baseline tokens: {e}[/yellow]")
+        return 0
+
+
+def get_memory_system_prompt(
+    assistant_id: str, project_root: Path | None = None, has_project_memory: bool = False
+) -> str:
+    """Get the long-term memory system prompt text.
+
+    Args:
+        assistant_id: The agent identifier for path references
+        project_root: Path to the detected project root (if any)
+        has_project_memory: Whether project memory was loaded
+    """
+    # Import from agent_memory middleware
+    from .agent_memory import LONGTERM_MEMORY_SYSTEM_PROMPT
+
+    agent_dir = settings.get_agent_dir(assistant_id)
+    agent_dir_absolute = str(agent_dir)
+    agent_dir_display = f"~/.deepagents/{assistant_id}"
+
+    # Build project memory info
+    if project_root and has_project_memory:
+        project_memory_info = f"`{project_root}` (detected)"
+    elif project_root:
+        project_memory_info = f"`{project_root}` (no agent.md found)"
+    else:
+        project_memory_info = "None (not in a git project)"
+
+    # Build project deepagents directory path
+    if project_root:
+        project_deepagents_dir = f"{project_root}/.deepagents"
+    else:
+        project_deepagents_dir = "[project-root]/.deepagents (not in a project)"
+
+    return LONGTERM_MEMORY_SYSTEM_PROMPT.format(
+        agent_dir_absolute=agent_dir_absolute,
+        agent_dir_display=agent_dir_display,
+        project_memory_info=project_memory_info,
+        project_deepagents_dir=project_deepagents_dir,
+    )
--- a/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/tools.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/tools.py
@@ -0,0 +1,183 @@
+"""CLI 에이전트를 위한 사용자 정의 도구."""
+
+from typing import Any, Literal
+
+import requests  # type: ignore
+from markdownify import markdownify  # type: ignore
+from tavily import TavilyClient  # type: ignore
+
+from deepagents_cli.config import settings
+
+# Initialize Tavily client if API key is available
+tavily_client = TavilyClient(api_key=settings.tavily_api_key) if settings.has_tavily else None
+
+
+def http_request(
+    url: str,
+    method: str = "GET",
+    headers: dict[str, str] | None = None,
+    data: str | dict | None = None,
+    params: dict[str, str] | None = None,
+    timeout: int = 30,
+) -> dict[str, Any]:
+    """Sends an HTTP request to an API or web service.
+
+    Args:
+        url: The URL to target
+        method: HTTP method (GET, POST, PUT, DELETE, etc.)
+        headers: HTTP headers to include
+        data: Request body data (string or dict)
+        params: URL query parameters
+        timeout: Request timeout in seconds
+
+    Returns:
+        Dictionary containing status_code, headers, and content
+    """
+    try:
+        kwargs = {"url": url, "method": method.upper(), "timeout": timeout}
+
+        if headers:
+            kwargs["headers"] = headers
+        if params:
+            kwargs["params"] = params
+        if data:
+            if isinstance(data, dict):
+                kwargs["json"] = data
+            else:
+                kwargs["data"] = data
+
+        response = requests.request(**kwargs)
+
+        try:
+            content = response.json()
+        except:
+            content = response.text
+
+        return {
+            "success": response.status_code < 400,
+            "status_code": response.status_code,
+            "headers": dict(response.headers),
+            "content": content,
+            "url": response.url,
+        }
+
+    except requests.exceptions.Timeout:
+        return {
+            "success": False,
+            "status_code": 0,
+            "headers": {},
+            "content": f"{timeout}초 후 요청 시간이 초과되었습니다",
+            "url": url,
+        }
+    except requests.exceptions.RequestException as e:
+        return {
+            "success": False,
+            "status_code": 0,
+            "headers": {},
+            "content": f"요청 오류: {e!s}",
+            "url": url,
+        }
+    except Exception as e:
+        return {
+            "success": False,
+            "status_code": 0,
+            "headers": {},
+            "content": f"요청 생성 오류: {e!s}",
+            "url": url,
+        }
+
+
+def web_search(
+    query: str,
+    max_results: int = 5,
+    topic: Literal["general", "news", "finance"] = "general",
+    include_raw_content: bool = False,
+):
+    """Performs a web search using Tavily for current information and documents.
+
+    This tool searches the web and returns relevant results. After receiving results,
+    you should synthesize the information into a natural response that helps the user.
+
+    Args:
+        query: The search query (specific and detailed)
+        max_results: Number of results to return (default: 5)
+        topic: The topic type of the search - "general" for most queries, "news" for current events
+        include_raw_content: Include full page content (Warning: uses more tokens)
+
+    Returns:
+        Dictionary containing:
+        - results: List of search results, each containing:
+            - title: Page title
+            - url: Page URL
+            - content: Relevant snippet from the page
+            - score: Relevance score (0-1)
+        - query: Original search query
+
+    IMPORTANT: After using this tool:
+    1. Read the 'content' field of each result
+    2. Extract relevant information that answers the user's question
+    3. Synthesize this into a clear, natural language response
+    4. Cite sources by mentioning the page title or URL
+    5. Do NOT show raw JSON to the user - always provide a formatted response
+    """
+    if tavily_client is None:
+        return {
+            "error": "Tavily API 키가 구성되지 않았습니다. TAVILY_API_KEY 환경 변수를 설정하십시오.",
+            "query": query,
+        }
+
+    try:
+        return tavily_client.search(
+            query,
+            max_results=max_results,
+            include_raw_content=include_raw_content,
+            topic=topic,
+        )
+    except Exception as e:
+        return {"error": f"웹 검색 오류: {e!s}", "query": query}
+
+
+def fetch_url(url: str, timeout: int = 30) -> dict[str, Any]:
+    """Fetches content from a URL and converts HTML to markdown format.
+
+    This tool fetches web page content and converts it to clean markdown text,
+    making it easier to read and process HTML content. After receiving markdown,
+    you should synthesize the information into a natural response that helps the user.
+
+    Args:
+        url: The URL to fetch (must be a valid HTTP/HTTPS URL)
+        timeout: Request timeout in seconds (default: 30)
+
+    Returns:
+        Dictionary containing:
+        - success: Whether the request was successful
+        - url: Final URL after redirects
+        - markdown_content: The page content converted to markdown
+        - status_code: HTTP status code
+        - content_length: Length of markdown content (in characters)
+
+    IMPORTANT: After using this tool:
+    1. Read the markdown_content
+    2. Extract relevant information that answers the user's question
+    3. Synthesize this into a clear, natural language response
+    4. Do NOT show raw markdown to the user unless specifically requested
+    """
+    try:
+        response = requests.get(
+            url,
+            timeout=timeout,
+            headers={"User-Agent": "Mozilla/5.0 (compatible; DeepAgents/1.0)"},
+        )
+        response.raise_for_status()
+
+        # Convert HTML content to markdown
+        markdown_content = markdownify(response.text)
+
+        return {
+            "url": str(response.url),
+            "markdown_content": markdown_content,
+            "status_code": response.status_code,
+            "content_length": len(markdown_content),
+        }
+    except Exception as e:
+        return {"error": f"URL 가져오기 오류: {e!s}", "url": url}
--- a/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/ui.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/ui.py
@@ -0,0 +1,644 @@
+"""CLI를 위한 UI 렌더링 및 디스플레이 유틸리티."""
+
+import json
+import re
+import shutil
+from pathlib import Path
+from typing import Any
+
+from rich import box
+from rich.markup import escape
+from rich.panel import Panel
+from rich.text import Text
+
+from .config import COLORS, COMMANDS, DEEP_AGENTS_ASCII, MAX_ARG_LENGTH, console
+from .file_ops import FileOperationRecord
+
+
+def truncate_value(value: str, max_length: int = MAX_ARG_LENGTH) -> str:
+    """max_length를 초과하는 경우 문자열 값을 자릅니다."""
+    if len(value) > max_length:
+        return value[:max_length] + "..."
+    return value
+
+
+def format_tool_display(tool_name: str, tool_args: dict) -> str:
+    """도구 호출을 도구별 스마트 포맷팅으로 표시합니다.
+
+    모든 인수보다는 각 도구 유형에 가장 관련성 높은 정보를 표시합니다.
+
+    Args:
+        tool_name: 호출되는 도구의 이름
+        tool_args: 도구 인수 딕셔너리
+
+    Returns:
+        표시용으로 포맷팅된 문자열 (예: "read_file(config.py)")
+
+    Examples:
+        read_file(path="/long/path/file.py") → "read_file(file.py)"
+        web_search(query="how to code", max_results=5) → 'web_search("how to code")'
+        shell(command="pip install foo") → 'shell("pip install foo")'
+    """
+    # Tool-specific formatting - show the most important argument(s)
+    if tool_name in ("read_file", "write_file", "edit_file"):
+        return _format_file_tool(tool_name, tool_args)
+
+    if tool_name == "web_search":
+        return _format_web_search_tool(tool_name, tool_args)
+
+    if tool_name == "grep":
+        return _format_grep_tool(tool_name, tool_args)
+
+    if tool_name == "shell":
+        return _format_shell_tool(tool_name, tool_args)
+
+    if tool_name == "ls":
+        return _format_ls_tool(tool_name, tool_args)
+
+    if tool_name == "glob":
+        return _format_glob_tool(tool_name, tool_args)
+
+    if tool_name == "http_request":
+        return _format_http_request_tool(tool_name, tool_args)
+
+    if tool_name == "fetch_url":
+        return _format_fetch_url_tool(tool_name, tool_args)
+
+    if tool_name == "task":
+        return _format_task_tool(tool_name, tool_args)
+
+    if tool_name == "write_todos":
+        return _format_write_todos_tool(tool_name, tool_args)
+
+    # Fallback: generic formatting
+    arg_str = ", ".join(f"{k}={truncate_value(str(v), 20)}" for k, v in tool_args.items())
+    return f"{tool_name}({arg_str})"
+
+
+def _abbreviate_path(path_str: str, max_length: int = 60) -> str:
+    """파일 경로를 지능적으로 축약합니다 - 베이스네임 또는 상대 경로를 표시합니다."""
+    try:
+        path = Path(path_str)
+
+        # If it's just a filename (no directory parts), return as-is
+        if len(path.parts) == 1:
+            return path_str
+
+        # Try to get relative path from current working directory
+        try:
+            rel_path = path.relative_to(Path.cwd())
+            rel_str = str(rel_path)
+            # Use relative if it's shorter and not too long
+            if len(rel_str) < len(path_str) and len(rel_str) <= max_length:
+                return rel_str
+        except (ValueError, Exception):
+            pass
+
+        # If absolute path is reasonable length, use it
+        if len(path_str) <= max_length:
+            return path_str
+
+        # Otherwise, just show basename (filename only)
+        return path.name
+    except Exception:
+        # Fallback to original string if any error
+        return truncate_value(path_str, max_length)
+
+
+def _format_file_tool(tool_name: str, tool_args: dict) -> str:
+    path_value = tool_args.get("file_path")
+    if path_value is None:
+        path_value = tool_args.get("path")
+    if path_value is not None:
+        path = _abbreviate_path(str(path_value))
+        return f"{tool_name}({path})"
+    return f"{tool_name}(...)"
+
+
+def _format_web_search_tool(tool_name: str, tool_args: dict) -> str:
+    if "query" in tool_args:
+        query = str(tool_args["query"])
+        query = truncate_value(query, 100)
+        return f'{tool_name}("{query}")'
+    return f"{tool_name}()"
+
+
+def _format_grep_tool(tool_name: str, tool_args: dict) -> str:
+    if "pattern" in tool_args:
+        pattern = str(tool_args["pattern"])
+        pattern = truncate_value(pattern, 70)
+        return f'{tool_name}("{pattern}")'
+    return f"{tool_name}()"
+
+
+def _format_shell_tool(tool_name: str, tool_args: dict) -> str:
+    if "command" in tool_args:
+        command = str(tool_args["command"])
+        command = truncate_value(command, 120)
+        return f'{tool_name}("{command}")'
+    return f"{tool_name}()"
+
+
+def _format_ls_tool(tool_name: str, tool_args: dict) -> str:
+    if tool_args.get("path"):
+        path = _abbreviate_path(str(tool_args["path"]))
+        return f"{tool_name}({path})"
+    return f"{tool_name}()"
+
+
+def _format_glob_tool(tool_name: str, tool_args: dict) -> str:
+    if "pattern" in tool_args:
+        pattern = str(tool_args["pattern"])
+        pattern = truncate_value(pattern, 80)
+        return f'{tool_name}("{pattern}")'
+    return f"{tool_name}()"
+
+
+def _format_http_request_tool(tool_name: str, tool_args: dict) -> str:
+    parts = []
+    if "method" in tool_args:
+        parts.append(str(tool_args["method"]).upper())
+    if "url" in tool_args:
+        url = str(tool_args["url"])
+        url = truncate_value(url, 80)
+        parts.append(url)
+    if parts:
+        return f"{tool_name}({' '.join(parts)})"
+    return f"{tool_name}()"
+
+
+def _format_fetch_url_tool(tool_name: str, tool_args: dict) -> str:
+    if "url" in tool_args:
+        url = str(tool_args["url"])
+        url = truncate_value(url, 80)
+        return f'{tool_name}("{url}")'
+    return f"{tool_name}()"
+
+
+def _format_task_tool(tool_name: str, tool_args: dict) -> str:
+    if "description" in tool_args:
+        desc = str(tool_args["description"])
+        desc = truncate_value(desc, 100)
+        return f'{tool_name}("{desc}")'
+    return f"{tool_name}()"
+
+
+def _format_write_todos_tool(tool_name: str, tool_args: dict) -> str:
+    if "todos" in tool_args and isinstance(tool_args["todos"], list):
+        count = len(tool_args["todos"])
+        return f"{tool_name}({count} items)"
+    return f"{tool_name}()"
+
+
+def format_tool_message_content(content: Any) -> str:
+    """ToolMessage 내용을 출력 가능한 문자열로 변환합니다."""
+    if content is None:
+        return ""
+    if isinstance(content, list):
+        parts = []
+        for item in content:
+            if isinstance(item, str):
+                parts.append(item)
+            else:
+                try:
+                    parts.append(json.dumps(item))
+                except Exception:
+                    parts.append(str(item))
+        return "\n".join(parts)
+    return str(content)
+
+
+class TokenTracker:
+    """대화 전반에 걸친 토큰 사용량을 추적합니다."""
+
+    def __init__(self) -> None:
+        self.baseline_context = 0  # Baseline system context (system + agent.md + tools)
+        self.current_context = 0  # Total context including messages
+        self.last_output = 0
+
+    def set_baseline(self, tokens: int) -> None:
+        """기준 컨텍스트 토큰 수를 설정합니다.
+
+        Args:
+            tokens: 기준 토큰 수 (시스템 프롬프트 + agent.md + 도구)
+        """
+        self.baseline_context = tokens
+        self.current_context = tokens
+
+    def reset(self) -> None:
+        """기준으로 재설정합니다 (/clear 명령용)."""
+        self.current_context = self.baseline_context
+        self.last_output = 0
+
+    def add(self, input_tokens: int, output_tokens: int) -> None:
+        """응답에서 토큰을 추가합니다."""
+        # input_tokens IS the current context size (what was sent to the model)
+        self.current_context = input_tokens
+        self.last_output = output_tokens
+
+    def display_last(self) -> None:
+        """이번 턴 이후의 현재 컨텍스트 크기를 표시합니다."""
+        if self.last_output and self.last_output >= 1000:
+            console.print(f"  생성됨: {self.last_output:,} 토큰", style="dim")
+        if self.current_context:
+            console.print(f"  현재 컨텍스트: {self.current_context:,} 토큰", style="dim")
+
+    def display_session(self) -> None:
+        """현재 컨텍스트 크기를 표시합니다."""
+        console.print("\n[bold]토큰 사용량:[/bold]", style=COLORS["primary"])
+
+        # Check if we've had any actual API calls yet (current > baseline means we have conversation)
+        has_conversation = self.current_context > self.baseline_context
+
+        if self.baseline_context > 0:
+            console.print(
+                f"  기준(Baseline): {self.baseline_context:,} 토큰 [dim](시스템 + agent.md)[/dim]",
+                style=COLORS["dim"],
+            )
+
+            if not has_conversation:
+                # Before first message - warn that tools aren't counted yet
+                console.print("  [dim]참고: 도구 정의(~5k 토큰)는 첫 번째 메시지 이후에 포함됩니다[/dim]")
+
+        if has_conversation:
+            tools_and_conversation = self.current_context - self.baseline_context
+            console.print(f"  도구 + 대화: {tools_and_conversation:,} 토큰", style=COLORS["dim"])
+
+        console.print(f"  합계: {self.current_context:,} 토큰", style="bold " + COLORS["dim"])
+        console.print()
+
+
+def render_todo_list(todos: list[dict]) -> None:
+    """작업 목록을 체크박스가 있는 rich 패널로 렌더링합니다."""
+    if not todos:
+        return
+
+    lines = []
+    for todo in todos:
+        status = todo.get("status", "pending")
+        content = todo.get("content", "")
+
+        if status == "completed":
+            icon = "☑"
+            style = "green"
+        elif status == "in_progress":
+            icon = "⏳"
+            style = "yellow"
+        else:  # pending
+            icon = "☐"
+            style = "dim"
+
+        lines.append(f"[{style}]{icon} {content}[/{style}]")
+
+    panel = Panel(
+        "\n".join(lines),
+        title="[bold]작업 목록[/bold]",
+        border_style="cyan",
+        box=box.ROUNDED,
+        padding=(0, 1),
+    )
+    console.print(panel)
+
+
+def _format_line_span(start: int | None, end: int | None) -> str:
+    if start is None and end is None:
+        return ""
+    if start is not None and end is None:
+        return f"({start}행부터)"
+    if start is None and end is not None:
+        return f"({end}행까지)"
+    if start == end:
+        return f"({start}행)"
+    return f"({start}-{end}행)"
+
+
+def render_file_operation(record: FileOperationRecord) -> None:
+    """파일시스템 도구 호출에 대한 간략한 요약을 렌더링합니다."""
+    label_lookup = {
+        "read_file": "읽기",
+        "write_file": "쓰기",
+        "edit_file": "업데이트",
+    }
+    label = label_lookup.get(record.tool_name, record.tool_name)
+    header = Text()
+    header.append("⏺ ", style=COLORS["tool"])
+    header.append(f"{label}({record.display_path})", style=f"bold {COLORS['tool']}")
+    console.print(header)
+
+    def _print_detail(message: str, *, style: str = COLORS["dim"]) -> None:
+        detail = Text()
+        detail.append("  ⎿  ", style=style)
+        detail.append(message, style=style)
+        console.print(detail)
+
+    if record.status == "error":
+        _print_detail(record.error or "파일 작업 실행 오류", style="red")
+        return
+
+    if record.tool_name == "read_file":
+        lines = record.metrics.lines_read
+        span = _format_line_span(record.metrics.start_line, record.metrics.end_line)
+        detail = f"{lines}줄 읽음"
+        if span:
+            detail = f"{detail} {span}"
+        _print_detail(detail)
+    else:
+        if record.tool_name == "write_file":
+            added = record.metrics.lines_added
+            removed = record.metrics.lines_removed
+            lines = record.metrics.lines_written
+            detail = f"{lines}줄 씀"
+            if added or removed:
+                detail = f"{detail} (+{added} / -{removed})"
+        else:
+            added = record.metrics.lines_added
+            removed = record.metrics.lines_removed
+            detail = f"총 {record.metrics.lines_written}줄 편집됨"
+            if added or removed:
+                detail = f"{detail} (+{added} / -{removed})"
+        _print_detail(detail)
+
+    # Skip diff display for HIL-approved operations that succeeded
+    # (user already saw the diff during approval)
+    if record.diff and not (record.hitl_approved and record.status == "success"):
+        render_diff(record)
+
+
+def render_diff(record: FileOperationRecord) -> None:
+    """파일 작업에 대한 diff를 렌더링합니다."""
+    if not record.diff:
+        return
+    render_diff_block(record.diff, f"{record.display_path} 차이(Diff)")
+
+
+def _wrap_diff_line(
+    code: str,
+    marker: str,
+    color: str,
+    line_num: int | None,
+    width: int,
+    term_width: int,
+) -> list[str]:
+    """긴 diff 줄을 적절한 들여쓰기로 줄바꿈합니다.
+
+    Args:
+        code: 래핑할 코드 콘텐츠
+        marker: Diff 마커 ('+', '-', ' ')
+        color: 해당 줄의 색상
+        line_num: 표시할 줄 번호 (연속 줄의 경우 None)
+        width: 줄 번호 열의 너비
+        term_width: 터미널 너비
+
+    Returns:
+        포맷팅된 줄 목록 (줄바꿈된 경우 여러 개일 수 있음)
+    """
+    # Escape Rich markup in code content
+    code = escape(code)
+
+    prefix_len = width + 4  # line_num + space + marker + 2 spaces
+    available_width = term_width - prefix_len
+
+    if len(code) <= available_width:
+        if line_num is not None:
+            return [f"[dim]{line_num:>{width}}[/dim] [{color}]{marker}  {code}[/{color}]"]
+        return [f"{' ' * width} [{color}]{marker}  {code}[/{color}]"]
+
+    lines = []
+    remaining = code
+    first = True
+
+    while remaining:
+        if len(remaining) <= available_width:
+            chunk = remaining
+            remaining = ""
+        else:
+            # Try to break at a good point (space, comma, etc.)
+            chunk = remaining[:available_width]
+            # Look for a good break point in the last 20 chars
+            break_point = max(
+                chunk.rfind(" "),
+                chunk.rfind(","),
+                chunk.rfind("("),
+                chunk.rfind(")"),
+            )
+            if break_point > available_width - 20:
+                # Found a good break point
+                chunk = remaining[: break_point + 1]
+                remaining = remaining[break_point + 1 :]
+            else:
+                # No good break point, just split
+                chunk = remaining[:available_width]
+                remaining = remaining[available_width:]
+
+        if first and line_num is not None:
+            lines.append(f"[dim]{line_num:>{width}}[/dim] [{color}]{marker}  {chunk}[/{color}]")
+            first = False
+        else:
+            lines.append(f"{' ' * width} [{color}]{marker}  {chunk}[/{color}]")
+
+    return lines
+
+
+def format_diff_rich(diff_lines: list[str]) -> str:
+    """줄 번호와 색상으로 diff 줄을 포맷팅합니다.
+
+    Args:
+        diff_lines: 통합 diff의 Diff 줄
+    """
+    if not diff_lines:
+        return "[dim]감지된 변경 사항 없음[/dim]"
+
+    # Get terminal width
+    term_width = shutil.get_terminal_size().columns
+
+    # Find max line number for width calculation
+    max_line = max(
+        (
+            int(m.group(i))
+            for line in diff_lines
+            if (m := re.match(r"@@ -(\d+)(?:,\d+)? \+(\d+)", line))
+            for i in (1, 2)
+        ),
+        default=0,
+    )
+    width = max(3, len(str(max_line)))
+
+    formatted_lines = []
+    old_num = new_num = 0
+
+    # Rich colors with backgrounds for better visibility
+    # White text on dark backgrounds for additions/deletions
+    addition_color = "white on dark_green"
+    deletion_color = "white on dark_red"
+    context_color = "dim"
+
+    for line in diff_lines:
+        if line.strip() == "...":
+            formatted_lines.append(f"[{context_color}]...[/{context_color}]")
+        elif line.startswith(("---", "+++")):
+            continue
+        elif m := re.match(r"@@ -(\d+)(?:,\d+)? \+(\d+)", line):
+            old_num, new_num = int(m.group(1)), int(m.group(2))
+        elif line.startswith("-"):
+            formatted_lines.extend(_wrap_diff_line(line[1:], "-", deletion_color, old_num, width, term_width))
+            old_num += 1
+        elif line.startswith("+"):
+            formatted_lines.extend(_wrap_diff_line(line[1:], "+", addition_color, new_num, width, term_width))
+            new_num += 1
+        elif line.startswith(" "):
+            formatted_lines.extend(_wrap_diff_line(line[1:], " ", context_color, old_num, width, term_width))
+            old_num += 1
+            new_num += 1
+
+    return "\n".join(formatted_lines)
+
+
+def render_diff_block(diff: str, title: str) -> None:
+    """diff 문자열을 줄 번호와 색상으로 렌더링합니다."""
+    try:
+        # Parse diff into lines and format with line numbers
+        diff_lines = diff.splitlines()
+        formatted_diff = format_diff_rich(diff_lines)
+
+        # Print with a simple header
+        console.print()
+        console.print(f"[bold {COLORS['primary']}]═══ {title} ═══[/bold {COLORS['primary']}]")
+        console.print(formatted_diff)
+        console.print()
+    except (ValueError, AttributeError, IndexError, OSError):
+        # Fallback to simple rendering if formatting fails
+        console.print()
+        console.print(f"[bold {COLORS['primary']}]{title}[/bold {COLORS['primary']}]")
+        console.print(diff)
+        console.print()
+
+
+def show_interactive_help() -> None:
+    """대화형 세션 중 사용할 수 있는 명령을 표시합니다."""
+    console.print()
+    console.print()
+    console.print("[bold]대화형 명령:[/bold]", style=COLORS["primary"])
+    console.print()
+
+    for cmd, desc in COMMANDS.items():
+        console.print(f"  /{cmd:<12} {desc}", style=COLORS["dim"])
+
+    console.print()
+    console.print("[bold]편집 기능:[/bold]", style=COLORS["primary"])
+    console.print("  Enter           메시지 제출", style=COLORS["dim"])
+    console.print(
+        "  Alt+Enter       줄바꿈 삽입 (Mac의 경우 Option+Enter, 또는 ESC 후 Enter)",
+        style=COLORS["dim"],
+    )
+    console.print("  Ctrl+E          외부 편집기에서 열기 (기본값 nano)", style=COLORS["dim"])
+    console.print("  Ctrl+T          자동 승인 모드 전환", style=COLORS["dim"])
+    console.print("  방향키          입력 탐색", style=COLORS["dim"])
+    console.print("  Ctrl+C          입력 취소 또는 작업 중인 에이전트 중단", style=COLORS["dim"])
+    console.print()
+    console.print("[bold]특수 기능:[/bold]", style=COLORS["primary"])
+    console.print("  @filename       @를 입력하여 파일 자동 완성 및 콘텐츠 주입", style=COLORS["dim"])
+    console.print("  /command        /를 입력하여 사용 가능한 명령 확인", style=COLORS["dim"])
+    console.print(
+        "  !command        !를 입력하여 bash 명령 실행 (예: !ls, !git status)",
+        style=COLORS["dim"],
+    )
+    console.print("                  입력하면 완성이 자동으로 나타납니다", style=COLORS["dim"])
+    console.print()
+    console.print("[bold]자동 승인 모드:[/bold]", style=COLORS["primary"])
+    console.print("  Ctrl+T          자동 승인 모드 전환", style=COLORS["dim"])
+    console.print(
+        "  --auto-approve  자동 승인이 활성화된 상태로 CLI 시작 (명령줄을 통해)",
+        style=COLORS["dim"],
+    )
+    console.print("  활성화되면 도구 작업이 확인 프롬프트 없이 실행됩니다", style=COLORS["dim"])
+    console.print()
+
+
+def show_help() -> None:
+    """도움말 정보를 표시합니다."""
+    console.print()
+    console.print(DEEP_AGENTS_ASCII, style=f"bold {COLORS['primary']}")
+    console.print()
+
+    console.print("[bold]사용법:[/bold]", style=COLORS["primary"])
+    console.print("  deepagents [OPTIONS]                           대화형 세션 시작")
+    console.print("  deepagents list                                사용 가능한 모든 에이전트 나열")
+    console.print("  deepagents reset --agent AGENT                 에이전트를 기본 프롬프트로 초기화")
+    console.print("  deepagents reset --agent AGENT --target SOURCE 에이전트를 다른 에이전트의 복사본으로 초기화")
+    console.print("  deepagents help                                이 도움말 메시지 표시")
+    console.print()
+
+    console.print("[bold]옵션:[/bold]", style=COLORS["primary"])
+    console.print("  --agent NAME                  에이전트 식별자 (기본값: agent)")
+    console.print("  --model MODEL                 사용할 모델 (예: claude-sonnet-4-5-20250929, gpt-4o)")
+    console.print("  --auto-approve                프롬프트 없이 도구 사용 자동 승인")
+    console.print("  --sandbox TYPE                실행을 위한 원격 샌드박스 (modal, runloop, daytona)")
+    console.print("  --sandbox-id ID               기존 샌드박스 재사용 (생성/정리 건너뜀)")
+    console.print()
+
+    console.print("[bold]예시:[/bold]", style=COLORS["primary"])
+    console.print("  deepagents                              # 기본 에이전트로 시작", style=COLORS["dim"])
+    console.print(
+        "  deepagents --agent mybot                # 'mybot'이라는 이름의 에이전트로 시작",
+        style=COLORS["dim"],
+    )
+    console.print(
+        "  deepagents --model gpt-4o               # 특정 모델 사용 (공급자 자동 감지)",
+        style=COLORS["dim"],
+    )
+    console.print(
+        "  deepagents --auto-approve               # 자동 승인이 활성화된 상태로 시작",
+        style=COLORS["dim"],
+    )
+    console.print(
+        "  deepagents --sandbox runloop            # Runloop 샌드박스에서 코드 실행",
+        style=COLORS["dim"],
+    )
+    console.print(
+        "  deepagents --sandbox modal              # Modal 샌드박스에서 코드 실행",
+        style=COLORS["dim"],
+    )
+    console.print(
+        "  deepagents --sandbox runloop --sandbox-id dbx_123  # 기존 샌드박스 재사용",
+        style=COLORS["dim"],
+    )
+    console.print("  deepagents list                         # 모든 에이전트 나열", style=COLORS["dim"])
+    console.print("  deepagents reset --agent mybot          # mybot을 기본값으로 초기화", style=COLORS["dim"])
+    console.print(
+        "  deepagents reset --agent mybot --target other # mybot을 'other' 에이전트의 복사본으로 초기화",
+        style=COLORS["dim"],
+    )
+    console.print()
+
+    console.print("[bold]장기 기억(Long-term Memory):[/bold]", style=COLORS["primary"])
+    console.print("  기본적으로 장기 기억은 'agent'라는 에이전트 이름을 사용하여 활성화됩니다.", style=COLORS["dim"])
+    console.print("  기억에는 다음이 포함됩니다:", style=COLORS["dim"])
+    console.print("  - 지침이 포함된 영구 agent.md 파일", style=COLORS["dim"])
+    console.print("  - 세션 간 컨텍스트 저장을 위한 /memories/ 폴더", style=COLORS["dim"])
+    console.print()
+
+    console.print("[bold]에이전트 저장소:[/bold]", style=COLORS["primary"])
+    console.print("  에이전트는 다음 경로에 저장됩니다: ~/.deepagents/AGENT_NAME/", style=COLORS["dim"])
+    console.print("  각 에이전트에는 프롬프트가 포함된 agent.md 파일이 있습니다", style=COLORS["dim"])
+    console.print()
+
+    console.print("[bold]대화형 기능:[/bold]", style=COLORS["primary"])
+    console.print("  Enter           메시지 제출", style=COLORS["dim"])
+    console.print(
+        "  Alt+Enter       여러 줄 입력을 위한 줄바꿈 (Option+Enter 또는 ESC 후 Enter)",
+        style=COLORS["dim"],
+    )
+    console.print("  Ctrl+J          줄바꿈 삽입 (대안)", style=COLORS["dim"])
+    console.print("  Ctrl+T          자동 승인 모드 전환", style=COLORS["dim"])
+    console.print("  방향키          입력 탐색", style=COLORS["dim"])
+    console.print("  @filename       @를 입력하여 파일 자동 완성 및 콘텐츠 주입", style=COLORS["dim"])
+    console.print("  /command        /를 입력하여 사용 가능한 명령 확인 (자동 완성)", style=COLORS["dim"])
+    console.print()
+
+    console.print("[bold]대화형 명령:[/bold]", style=COLORS["primary"])
+    console.print("  /help           사용 가능한 명령 및 기능 표시", style=COLORS["dim"])
+    console.print("  /clear          화면 지우기 및 대화 초기화", style=COLORS["dim"])
+    console.print("  /tokens         현재 세션의 토큰 사용량 표시", style=COLORS["dim"])
+    console.print("  /quit, /exit    세션 종료", style=COLORS["dim"])
+    console.print("  quit, exit, q   세션 종료 (입력하고 Enter 누름)", style=COLORS["dim"])
+    console.print()
--- a/deepagents_sourcecode/libs/deepagents-cli/examples/skills/arxiv-search/SKILL.md
+++ b/deepagents_sourcecode/libs/deepagents-cli/examples/skills/arxiv-search/SKILL.md
@@ -0,0 +1,102 @@
+---
+name: arxiv-search
+description: Search arXiv preprint repository for papers in physics, mathematics, computer science, quantitative biology, and related fields
+---
+
+# arXiv Search Skill
+
+This skill provides access to arXiv, a free distribution service and open-access archive for scholarly articles in physics, mathematics, computer science, quantitative biology, quantitative finance, statistics, electrical engineering, systems science, and economics.
+
+## When to Use This Skill
+
+Use this skill when you need to:
+- Find preprints and recent research papers before journal publication
+- Search for papers in computational biology, bioinformatics, or systems biology
+- Access mathematical or statistical methods papers relevant to biology
+- Find machine learning papers applied to biological problems
+- Get the latest research that may not yet be in PubMed
+
+## How to Use
+
+The skill provides a Python script that searches arXiv and returns formatted results.
+
+### Basic Usage
+
+**Note:** Always use the absolute path from your skills directory (shown in the system prompt above).
+
+If running deepagents from a virtual environment:
+```bash
+.venv/bin/python [YOUR_SKILLS_DIR]/arxiv-search/arxiv_search.py "your search query" [--max-papers N]
+```
+
+Or for system Python:
+```bash
+python3 [YOUR_SKILLS_DIR]/arxiv-search/arxiv_search.py "your search query" [--max-papers N]
+```
+
+Replace `[YOUR_SKILLS_DIR]` with the absolute skills directory path from your system prompt (e.g., `~/.deepagents/agent/skills` or the full absolute path).
+
+**Arguments:**
+- `query` (required): The search query string (e.g., "neural networks protein structure", "single cell RNA-seq")
+- `--max-papers` (optional): Maximum number of papers to retrieve (default: 10)
+
+### Examples
+
+Search for machine learning papers:
+```bash
+.venv/bin/python ~/.deepagents/agent/skills/arxiv-search/arxiv_search.py "deep learning drug discovery" --max-papers 5
+```
+
+Search for computational biology papers:
+```bash
+.venv/bin/python ~/.deepagents/agent/skills/arxiv-search/arxiv_search.py "protein folding prediction"
+```
+
+Search for bioinformatics methods:
+```bash
+.venv/bin/python ~/.deepagents/agent/skills/arxiv-search/arxiv_search.py "genome assembly algorithms"
+```
+
+## Output Format
+
+The script returns formatted results with:
+- **Title**: Paper title
+- **Summary**: Abstract/summary text
+
+Each paper is separated by blank lines for readability.
+
+## Features
+
+- **Relevance sorting**: Results ordered by relevance to query
+- **Fast retrieval**: Direct API access with no authentication required
+- **Simple interface**: Clean, easy-to-parse output
+- **No API key required**: Free access to arXiv database
+
+## Dependencies
+
+This skill requires the `arxiv` Python package. The script will detect if it's missing and show an error.
+
+**If you see "Error: arxiv package not installed":**
+
+If running deepagents from a virtual environment (recommended), use the venv's Python:
+```bash
+.venv/bin/python -m pip install arxiv
+```
+
+Or for system-wide install:
+```bash
+python3 -m pip install arxiv
+```
+
+The package is not included in deepagents by default since it's skill-specific. Install it on-demand when first using this skill.
+
+## Notes
+
+- arXiv is particularly strong for:
+  - Computer science (cs.LG, cs.AI, cs.CV)
+  - Quantitative biology (q-bio)
+  - Statistics (stat.ML)
+  - Physics and mathematics
+- Papers are preprints and may not be peer-reviewed
+- Results include both recent uploads and older papers
+- Best for computational/theoretical work in biology
--- a/deepagents_sourcecode/libs/deepagents-cli/examples/skills/arxiv-search/arxiv_search.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/examples/skills/arxiv-search/arxiv_search.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python3
+"""arXiv Search.
+
+Searches the arXiv preprint repository for research papers.
+"""
+
+import argparse
+
+
+def query_arxiv(query: str, max_papers: int = 10) -> str:
+    """Query arXiv for papers based on the provided search query.
+
+    Parameters
+    ----------
+    query : str
+        The search query string.
+    max_papers : int
+        The maximum number of papers to retrieve (default: 10).
+
+    Returns:
+        The formatted search results or an error message.
+    """
+    try:
+        import arxiv
+    except ImportError:
+        return "Error: arxiv package not installed. Install with: pip install arxiv"
+
+    try:
+        client = arxiv.Client()
+        search = arxiv.Search(
+            query=query, max_results=max_papers, sort_by=arxiv.SortCriterion.Relevance
+        )
+        results = "\n\n".join(
+            [f"Title: {paper.title}\nSummary: {paper.summary}" for paper in client.results(search)]
+        )
+        return results if results else "No papers found on arXiv."
+    except Exception as e:
+        return f"Error querying arXiv: {e}"
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Search arXiv for research papers")
+    parser.add_argument("query", type=str, help="Search query string")
+    parser.add_argument(
+        "--max-papers",
+        type=int,
+        default=10,
+        help="Maximum number of papers to retrieve (default: 10)",
+    )
+
+    args = parser.parse_args()
+
+    query_arxiv(args.query, max_papers=args.max_papers)
+
+
+if __name__ == "__main__":
+    main()
--- a/deepagents_sourcecode/libs/deepagents-cli/examples/skills/langgraph-docs/SKILL.md
+++ b/deepagents_sourcecode/libs/deepagents-cli/examples/skills/langgraph-docs/SKILL.md
@@ -0,0 +1,35 @@
+---
+name: langgraph-docs
+description: Use this skill for requests related to LangGraph in order to fetch relevant documentation to provide accurate, up-to-date guidance.
+---
+
+# langgraph-docs
+
+## Overview
+
+This skill explains how to access LangGraph Python documentation to help answer questions and guide implementation. 
+
+## Instructions
+
+### 1. Fetch the Documentation Index
+
+Use the fetch_url tool to read the following URL:
+https://docs.langchain.com/llms.txt
+
+This provides a structured list of all available documentation with descriptions.
+
+### 2. Select Relevant Documentation
+
+Based on the question, identify 2-4 most relevant documentation URLs from the index. Prioritize:
+- Specific how-to guides for implementation questions
+- Core concept pages for understanding questions
+- Tutorials for end-to-end examples
+- Reference docs for API details
+
+### 3. Fetch Selected Documentation
+
+Use the fetch_url tool to read the selected documentation URLs. 
+
+### 4. Provide Accurate Guidance
+
+After reading the documentation, complete the users request.
--- a/deepagents_sourcecode/libs/deepagents-cli/examples/skills/skill-creator/SKILL.md
+++ b/deepagents_sourcecode/libs/deepagents-cli/examples/skills/skill-creator/SKILL.md
@@ -0,0 +1,365 @@
+---
+name: skill-creator
+description: Guide for creating effective skills that extend agent capabilities with specialized knowledge, workflows, or tool integrations. Use this skill when the user asks to: (1) create a new skill, (2) make a skill, (3) build a skill, (4) set up a skill, (5) initialize a skill, (6) scaffold a skill, (7) update or modify an existing skill, (8) validate a skill, (9) learn about skill structure, (10) understand how skills work, or (11) get guidance on skill design patterns. Trigger on phrases like "create a skill", "new skill", "make a skill", "skill for X", "how do I create a skill", or "help me build a skill".
+---
+
+# Skill Creator
+
+This skill provides guidance for creating effective skills.
+
+## About Skills
+
+Skills are modular, self-contained packages that extend agent capabilities by providing
+specialized knowledge, workflows, and tools. Think of them as "onboarding guides" for specific
+domains or tasks—they transform a general-purpose agent into a specialized agent
+equipped with procedural knowledge and domain expertise.
+
+### Skill Location for Deepagents
+
+In deepagents CLI, skills are stored in `~/.deepagents/<agent>/skills/` where `<agent>` is your agent configuration name (default is `agent`). For example, with the default configuration, skills live at:
+
+```
+~/.deepagents/agent/skills/
+├── skill-name-1/
+│   └── SKILL.md
+├── skill-name-2/
+│   └── SKILL.md
+└── ...
+```
+
+### What Skills Provide
+
+1. Specialized workflows - Multi-step procedures for specific domains
+2. Tool integrations - Instructions for working with specific file formats or APIs
+3. Domain expertise - Company-specific knowledge, schemas, business logic
+4. Bundled resources - Scripts, references, and assets for complex and repetitive tasks
+
+## Core Principles
+
+### Concise is Key
+
+The context window is a public good. Skills share the context window with everything else the agent needs: system prompt, conversation history, other Skills' metadata, and the actual user request.
+
+**Default assumption: The agent is already very capable.** Only add context the agent doesn't already have. Challenge each piece of information: "Does the agent really need this explanation?" and "Does this paragraph justify its token cost?"
+
+Prefer concise examples over verbose explanations.
+
+### Set Appropriate Degrees of Freedom
+
+Match the level of specificity to the task's fragility and variability:
+
+**High freedom (text-based instructions)**: Use when multiple approaches are valid, decisions depend on context, or heuristics guide the approach.
+
+**Medium freedom (pseudocode or scripts with parameters)**: Use when a preferred pattern exists, some variation is acceptable, or configuration affects behavior.
+
+**Low freedom (specific scripts, few parameters)**: Use when operations are fragile and error-prone, consistency is critical, or a specific sequence must be followed.
+
+Think of the agent as exploring a path: a narrow bridge with cliffs needs specific guardrails (low freedom), while an open field allows many routes (high freedom).
+
+### Anatomy of a Skill
+
+Every skill consists of a required SKILL.md file and optional bundled resources:
+
+```
+skill-name/
+├── SKILL.md (required)
+│   ├── YAML frontmatter metadata (required)
+│   │   ├── name: (required)
+│   │   └── description: (required)
+│   └── Markdown instructions (required)
+└── Bundled Resources (optional)
+    ├── scripts/          - Executable code (Python/Bash/etc.)
+    ├── references/       - Documentation intended to be loaded into context as needed
+    └── assets/           - Files used in output (templates, icons, fonts, etc.)
+```
+
+#### SKILL.md (required)
+
+Every SKILL.md consists of:
+
+- **Frontmatter** (YAML): Contains `name` and `description` fields. These are the only fields that the agent reads to determine when the skill gets used, thus it is very important to be clear and comprehensive in describing what the skill is, and when it should be used.
+- **Body** (Markdown): Instructions and guidance for using the skill. Only loaded AFTER the skill triggers (if at all).
+
+#### Bundled Resources (optional)
+
+##### Scripts (`scripts/`)
+
+Executable code (Python/Bash/etc.) for tasks that require deterministic reliability or are repeatedly rewritten.
+
+- **When to include**: When the same code is being rewritten repeatedly or deterministic reliability is needed
+- **Example**: `scripts/rotate_pdf.py` for PDF rotation tasks
+- **Benefits**: Token efficient, deterministic, may be executed without loading into context
+- **Note**: Scripts may still need to be read by the agent for patching or environment-specific adjustments
+
+##### References (`references/`)
+
+Documentation and reference material intended to be loaded as needed into context to inform the agent's process and thinking.
+
+- **When to include**: For documentation that the agent should reference while working
+- **Examples**: `references/finance.md` for financial schemas, `references/mnda.md` for company NDA template, `references/policies.md` for company policies, `references/api_docs.md` for API specifications
+- **Use cases**: Database schemas, API documentation, domain knowledge, company policies, detailed workflow guides
+- **Benefits**: Keeps SKILL.md lean, loaded only when the agent determines it's needed
+- **Best practice**: If files are large (>10k words), include search patterns in SKILL.md
+- **Avoid duplication**: Information should live in either SKILL.md or references files, not both. Prefer references files for detailed information unless it's truly core to the skill—this keeps SKILL.md lean while making information discoverable without hogging the context window. Keep only essential procedural instructions and workflow guidance in SKILL.md; move detailed reference material, schemas, and examples to references files.
+
+##### Assets (`assets/`)
+
+Files not intended to be loaded into context, but rather used within the output the agent produces.
+
+- **When to include**: When the skill needs files that will be used in the final output
+- **Examples**: `assets/logo.png` for brand assets, `assets/slides.pptx` for PowerPoint templates, `assets/frontend-template/` for HTML/React boilerplate, `assets/font.ttf` for typography
+- **Use cases**: Templates, images, icons, boilerplate code, fonts, sample documents that get copied or modified
+- **Benefits**: Separates output resources from documentation, enables the agent to use files without loading them into context
+
+#### What to Not Include in a Skill
+
+A skill should only contain essential files that directly support its functionality. Do NOT create extraneous documentation or auxiliary files, including:
+
+- README.md
+- INSTALLATION_GUIDE.md
+- QUICK_REFERENCE.md
+- CHANGELOG.md
+- etc.
+
+The skill should only contain the information needed for an AI agent to do the job at hand. It should not contain auxilary context about the process that went into creating it, setup and testing procedures, user-facing documentation, etc. Creating additional documentation files just adds clutter and confusion.
+
+### Progressive Disclosure Design Principle
+
+Skills use a three-level loading system to manage context efficiently:
+
+1. **Metadata (name + description)** - Always in context (~100 words)
+2. **SKILL.md body** - When skill triggers (<5k words)
+3. **Bundled resources** - As needed by the agent (Unlimited because scripts can be executed without reading into context window)
+
+#### Progressive Disclosure Patterns
+
+Keep SKILL.md body to the essentials and under 500 lines to minimize context bloat. Split content into separate files when approaching this limit. When splitting out content into other files, it is very important to reference them from SKILL.md and describe clearly when to read them, to ensure the reader of the skill knows they exist and when to use them.
+
+**Key principle:** When a skill supports multiple variations, frameworks, or options, keep only the core workflow and selection guidance in SKILL.md. Move variant-specific details (patterns, examples, configuration) into separate reference files.
+
+**Pattern 1: High-level guide with references**
+
+```markdown
+# PDF Processing
+
+## Quick start
+
+Extract text with pdfplumber:
+[code example]
+
+## Advanced features
+
+- **Form filling**: See [FORMS.md](FORMS.md) for complete guide
+- **API reference**: See [REFERENCE.md](REFERENCE.md) for all methods
+- **Examples**: See [EXAMPLES.md](EXAMPLES.md) for common patterns
+```
+
+The agent loads FORMS.md, REFERENCE.md, or EXAMPLES.md only when needed.
+
+**Pattern 2: Domain-specific organization**
+
+For Skills with multiple domains, organize content by domain to avoid loading irrelevant context:
+
+```
+bigquery-skill/
+├── SKILL.md (overview and navigation)
+└── reference/
+    ├── finance.md (revenue, billing metrics)
+    ├── sales.md (opportunities, pipeline)
+    ├── product.md (API usage, features)
+    └── marketing.md (campaigns, attribution)
+```
+
+When a user asks about sales metrics, the agent only reads sales.md.
+
+Similarly, for skills supporting multiple frameworks or variants, organize by variant:
+
+```
+cloud-deploy/
+├── SKILL.md (workflow + provider selection)
+└── references/
+    ├── aws.md (AWS deployment patterns)
+    ├── gcp.md (GCP deployment patterns)
+    └── azure.md (Azure deployment patterns)
+```
+
+When the user chooses AWS, the agent only reads aws.md.
+
+**Pattern 3: Conditional details**
+
+Show basic content, link to advanced content:
+
+```markdown
+# DOCX Processing
+
+## Creating documents
+
+Use docx-js for new documents. See [DOCX-JS.md](DOCX-JS.md).
+
+## Editing documents
+
+For simple edits, modify the XML directly.
+
+**For tracked changes**: See [REDLINING.md](REDLINING.md)
+**For OOXML details**: See [OOXML.md](OOXML.md)
+```
+
+The agent reads REDLINING.md or OOXML.md only when the user needs those features.
+
+**Important guidelines:**
+
+- **Avoid deeply nested references** - Keep references one level deep from SKILL.md. All reference files should link directly from SKILL.md.
+- **Structure longer reference files** - For files longer than 100 lines, include a table of contents at the top so the agent can see the full scope when previewing.
+
+## Skill Creation Process
+
+Skill creation involves these steps:
+
+1. Understand the skill with concrete examples
+2. Plan reusable skill contents (scripts, references, assets)
+3. Initialize the skill (run init_skill.py)
+4. Edit the skill (implement resources and write SKILL.md)
+5. Validate the skill (run quick_validate.py)
+6. Iterate based on real usage
+
+Follow these steps in order, skipping only if there is a clear reason why they are not applicable.
+
+### Step 1: Understanding the Skill with Concrete Examples
+
+Skip this step only when the skill's usage patterns are already clearly understood. It remains valuable even when working with an existing skill.
+
+To create an effective skill, clearly understand concrete examples of how the skill will be used. This understanding can come from either direct user examples or generated examples that are validated with user feedback.
+
+For example, when building an image-editor skill, relevant questions include:
+
+- "What functionality should the image-editor skill support? Editing, rotating, anything else?"
+- "Can you give some examples of how this skill would be used?"
+- "I can imagine users asking for things like 'Remove the red-eye from this image' or 'Rotate this image'. Are there other ways you imagine this skill being used?"
+- "What would a user say that should trigger this skill?"
+
+To avoid overwhelming users, avoid asking too many questions in a single message. Start with the most important questions and follow up as needed for better effectiveness.
+
+Conclude this step when there is a clear sense of the functionality the skill should support.
+
+### Step 2: Planning the Reusable Skill Contents
+
+To turn concrete examples into an effective skill, analyze each example by:
+
+1. Considering how to execute on the example from scratch
+2. Identifying what scripts, references, and assets would be helpful when executing these workflows repeatedly
+
+Example: When building a `pdf-editor` skill to handle queries like "Help me rotate this PDF," the analysis shows:
+
+1. Rotating a PDF requires re-writing the same code each time
+2. A `scripts/rotate_pdf.py` script would be helpful to store in the skill
+
+Example: When designing a `frontend-webapp-builder` skill for queries like "Build me a todo app" or "Build me a dashboard to track my steps," the analysis shows:
+
+1. Writing a frontend webapp requires the same boilerplate HTML/React each time
+2. An `assets/hello-world/` template containing the boilerplate HTML/React project files would be helpful to store in the skill
+
+Example: When building a `big-query` skill to handle queries like "How many users have logged in today?" the analysis shows:
+
+1. Querying BigQuery requires re-discovering the table schemas and relationships each time
+2. A `references/schema.md` file documenting the table schemas would be helpful to store in the skill
+
+To establish the skill's contents, analyze each concrete example to create a list of the reusable resources to include: scripts, references, and assets.
+
+### Step 3: Initializing the Skill
+
+At this point, it is time to actually create the skill.
+
+Skip this step only if the skill being developed already exists, and iteration or packaging is needed. In this case, continue to the next step.
+
+When creating a new skill from scratch, always run the `init_skill.py` script. The script conveniently generates a new template skill directory that automatically includes everything a skill requires, making the skill creation process much more efficient and reliable.
+
+Usage:
+
+```bash
+scripts/init_skill.py <skill-name> --path <output-directory>
+```
+
+For deepagents CLI, use the agent's skills directory:
+
+```bash
+scripts/init_skill.py <skill-name> --path ~/.deepagents/agent/skills
+```
+
+The script:
+
+- Creates the skill directory at the specified path
+- Generates a SKILL.md template with proper frontmatter and TODO placeholders
+- Creates example resource directories: `scripts/`, `references/`, and `assets/`
+- Adds example files in each directory that can be customized or deleted
+
+After initialization, customize or remove the generated SKILL.md and example files as needed.
+
+### Step 4: Edit the Skill
+
+When editing the (newly-generated or existing) skill, remember that the skill is being created for an agent to use. Include information that would be beneficial and non-obvious to the agent. Consider what procedural knowledge, domain-specific details, or reusable assets would help the agent execute these tasks more effectively.
+
+#### Learn Proven Design Patterns
+
+Consult these helpful guides based on your skill's needs:
+
+- **Multi-step processes**: See references/workflows.md for sequential workflows and conditional logic
+- **Specific output formats or quality standards**: See references/output-patterns.md for template and example patterns
+
+These files contain established best practices for effective skill design.
+
+#### Start with Reusable Skill Contents
+
+To begin implementation, start with the reusable resources identified above: `scripts/`, `references/`, and `assets/` files. Note that this step may require user input. For example, when implementing a `brand-guidelines` skill, the user may need to provide brand assets or templates to store in `assets/`, or documentation to store in `references/`.
+
+Added scripts must be tested by actually running them to ensure there are no bugs and that the output matches what is expected. If there are many similar scripts, only a representative sample needs to be tested to ensure confidence that they all work while balancing time to completion.
+
+Any example files and directories not needed for the skill should be deleted. The initialization script creates example files in `scripts/`, `references/`, and `assets/` to demonstrate structure, but most skills won't need all of them.
+
+#### Update SKILL.md
+
+**Writing Guidelines:** Always use imperative/infinitive form.
+
+##### Frontmatter
+
+Write the YAML frontmatter with `name` and `description`:
+
+- `name`: The skill name
+- `description`: This is the primary triggering mechanism for your skill, and helps the agent understand when to use the skill.
+  - Include both what the Skill does and specific triggers/contexts for when to use it.
+  - Include all "when to use" information here - Not in the body. The body is only loaded after triggering, so "When to Use This Skill" sections in the body are not helpful to the agent.
+  - Example description for a `docx` skill: "Comprehensive document creation, editing, and analysis with support for tracked changes, comments, formatting preservation, and text extraction. Use when working with professional documents (.docx files) for: (1) Creating new documents, (2) Modifying or editing content, (3) Working with tracked changes, (4) Adding comments, or any other document tasks"
+
+Do not include any other fields in YAML frontmatter.
+
+##### Body
+
+Write instructions for using the skill and its bundled resources.
+
+### Step 5: Validate the Skill
+
+Once development of the skill is complete, validate it to ensure it meets all requirements:
+
+```bash
+scripts/quick_validate.py <path/to/skill-folder>
+```
+
+The validation script checks:
+
+- YAML frontmatter format and required fields
+- Skill naming conventions (hyphen-case, max 64 characters)
+- Description completeness (no angle brackets, max 1024 characters)
+- Required fields: `name` and `description`
+- Allowed frontmatter properties only: `name`, `description`, `license`, `allowed-tools`, `metadata`
+
+If validation fails, fix the reported errors and run the validation command again.
+
+### Step 6: Iterate
+
+After testing the skill, users may request improvements. Often this happens right after using the skill, with fresh context of how the skill performed.
+
+**Iteration workflow:**
+
+1. Use the skill on real tasks
+2. Notice struggles or inefficiencies
+3. Identify how SKILL.md or bundled resources should be updated
+4. Implement changes and test again
--- a/deepagents_sourcecode/libs/deepagents-cli/examples/skills/skill-creator/scripts/init_skill.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/examples/skills/skill-creator/scripts/init_skill.py
@@ -0,0 +1,308 @@
+#!/usr/bin/env python3
+"""
+Skill Initializer - Creates a new skill from template
+
+Usage:
+ init_skill.py <skill-name> --path <path>
+
+Examples:
+ init_skill.py my-new-skill --path skills/public
+ init_skill.py my-api-helper --path skills/private
+ init_skill.py custom-skill --path /custom/location
+
+For deepagents CLI:
+ init_skill.py my-skill --path ~/.deepagents/agent/skills
+"""
+
+import sys
+from pathlib import Path
+
+
+SKILL_TEMPLATE = """---
+name: {skill_name}
+description: [TODO: Complete and informative explanation of what the skill does and when to use it. Include WHEN to use this skill - specific scenarios, file types, or tasks that trigger it.]
+---
+
+# {skill_title}
+
+## Overview
+
+[TODO: 1-2 sentences explaining what this skill enables]
+
+## Structuring This Skill
+
+[TODO: Choose the structure that best fits this skill's purpose. Common patterns:
+
+**1. Workflow-Based** (best for sequential processes)
+- Works well when there are clear step-by-step procedures
+- Example: DOCX skill with "Workflow Decision Tree" → "Reading" → "Creating" → "Editing"
+- Structure: ## Overview → ## Workflow Decision Tree → ## Step 1 → ## Step 2...
+
+**2. Task-Based** (best for tool collections)
+- Works well when the skill offers different operations/capabilities
+- Example: PDF skill with "Quick Start" → "Merge PDFs" → "Split PDFs" → "Extract Text"
+- Structure: ## Overview → ## Quick Start → ## Task Category 1 → ## Task Category 2...
+
+**3. Reference/Guidelines** (best for standards or specifications)
+- Works well for brand guidelines, coding standards, or requirements
+- Example: Brand styling with "Brand Guidelines" → "Colors" → "Typography" → "Features"
+- Structure: ## Overview → ## Guidelines → ## Specifications → ## Usage...
+
+**4. Capabilities-Based** (best for integrated systems)
+- Works well when the skill provides multiple interrelated features
+- Example: Product Management with "Core Capabilities" → numbered capability list
+- Structure: ## Overview → ## Core Capabilities → ### 1. Feature → ### 2. Feature...
+
+Patterns can be mixed and matched as needed. Most skills combine patterns (e.g., start with task-based, add workflow for complex operations).
+
+Delete this entire "Structuring This Skill" section when done - it's just guidance.]
+
+## [TODO: Replace with the first main section based on chosen structure]
+
+[TODO: Add content here. See examples in existing skills:
+- Code samples for technical skills
+- Decision trees for complex workflows
+- Concrete examples with realistic user requests
+- References to scripts/templates/references as needed]
+
+## Resources
+
+This skill includes example resource directories that demonstrate how to organize different types of bundled resources:
+
+### scripts/
+Executable code (Python/Bash/etc.) that can be run directly to perform specific operations.
+
+**Examples from other skills:**
+- PDF skill: `fill_fillable_fields.py`, `extract_form_field_info.py` - utilities for PDF manipulation
+- DOCX skill: `document.py`, `utilities.py` - Python modules for document processing
+
+**Appropriate for:** Python scripts, shell scripts, or any executable code that performs automation, data processing, or specific operations.
+
+**Note:** Scripts may be executed without loading into context, but can still be read by Claude for patching or environment adjustments.
+
+### references/
+Documentation and reference material intended to be loaded into context to inform Claude's process and thinking.
+
+**Examples from other skills:**
+- Product management: `communication.md`, `context_building.md` - detailed workflow guides
+- BigQuery: API reference documentation and query examples
+- Finance: Schema documentation, company policies
+
+**Appropriate for:** In-depth documentation, API references, database schemas, comprehensive guides, or any detailed information that Claude should reference while working.
+
+### assets/
+Files not intended to be loaded into context, but rather used within the output Claude produces.
+
+**Examples from other skills:**
+- Brand styling: PowerPoint template files (.pptx), logo files
+- Frontend builder: HTML/React boilerplate project directories
+- Typography: Font files (.ttf, .woff2)
+
+**Appropriate for:** Templates, boilerplate code, document templates, images, icons, fonts, or any files meant to be copied or used in the final output.
+
+---
+
+**Any unneeded directories can be deleted.** Not every skill requires all three types of resources.
+"""
+
+EXAMPLE_SCRIPT = '''#!/usr/bin/env python3
+"""
+Example helper script for {skill_name}
+
+This is a placeholder script that can be executed directly.
+Replace with actual implementation or delete if not needed.
+
+Example real scripts from other skills:
+- pdf/scripts/fill_fillable_fields.py - Fills PDF form fields
+- pdf/scripts/convert_pdf_to_images.py - Converts PDF pages to images
+"""
+
+def main():
+    print("This is an example script for {skill_name}")
+    # TODO: Add actual script logic here
+    # This could be data processing, file conversion, API calls, etc.
+
+if __name__ == "__main__":
+    main()
+'''
+
+EXAMPLE_REFERENCE = """# Reference Documentation for {skill_title}
+
+This is a placeholder for detailed reference documentation.
+Replace with actual reference content or delete if not needed.
+
+Example real reference docs from other skills:
+- product-management/references/communication.md - Comprehensive guide for status updates
+- product-management/references/context_building.md - Deep-dive on gathering context
+- bigquery/references/ - API references and query examples
+
+## When Reference Docs Are Useful
+
+Reference docs are ideal for:
+- Comprehensive API documentation
+- Detailed workflow guides
+- Complex multi-step processes
+- Information too lengthy for main SKILL.md
+- Content that's only needed for specific use cases
+
+## Structure Suggestions
+
+### API Reference Example
+- Overview
+- Authentication
+- Endpoints with examples
+- Error codes
+- Rate limits
+
+### Workflow Guide Example
+- Prerequisites
+- Step-by-step instructions
+- Common patterns
+- Troubleshooting
+- Best practices
+"""
+
+EXAMPLE_ASSET = """# Example Asset File
+
+This placeholder represents where asset files would be stored.
+Replace with actual asset files (templates, images, fonts, etc.) or delete if not needed.
+
+Asset files are NOT intended to be loaded into context, but rather used within
+the output Claude produces.
+
+Example asset files from other skills:
+- Brand guidelines: logo.png, slides_template.pptx
+- Frontend builder: hello-world/ directory with HTML/React boilerplate
+- Typography: custom-font.ttf, font-family.woff2
+- Data: sample_data.csv, test_dataset.json
+
+## Common Asset Types
+
+- Templates: .pptx, .docx, boilerplate directories
+- Images: .png, .jpg, .svg, .gif
+- Fonts: .ttf, .otf, .woff, .woff2
+- Boilerplate code: Project directories, starter files
+- Icons: .ico, .svg
+- Data files: .csv, .json, .xml, .yaml
+
+Note: This is a text placeholder. Actual assets can be any file type.
+"""
+
+
+def title_case_skill_name(skill_name):
+    """Convert hyphenated skill name to Title Case for display."""
+    return ' '.join(word.capitalize() for word in skill_name.split('-'))
+
+
+def init_skill(skill_name, path):
+    """
+    Initialize a new skill directory with template SKILL.md.
+
+    Args:
+        skill_name: Name of the skill
+        path: Path where the skill directory should be created
+
+    Returns:
+        Path to created skill directory, or None if error
+    """
+    # Determine skill directory path
+    skill_dir = Path(path).resolve() / skill_name
+
+    # Check if directory already exists
+    if skill_dir.exists():
+        print(f"❌ Error: Skill directory already exists: {skill_dir}")
+        return None
+
+    # Create skill directory
+    try:
+        skill_dir.mkdir(parents=True, exist_ok=False)
+        print(f"✅ Created skill directory: {skill_dir}")
+    except Exception as e:
+        print(f"❌ Error creating directory: {e}")
+        return None
+
+    # Create SKILL.md from template
+    skill_title = title_case_skill_name(skill_name)
+    skill_content = SKILL_TEMPLATE.format(
+        skill_name=skill_name,
+        skill_title=skill_title
+    )
+
+    skill_md_path = skill_dir / 'SKILL.md'
+    try:
+        skill_md_path.write_text(skill_content)
+        print("✅ Created SKILL.md")
+    except Exception as e:
+        print(f"❌ Error creating SKILL.md: {e}")
+        return None
+
+    # Create resource directories with example files
+    try:
+        # Create scripts/ directory with example script
+        scripts_dir = skill_dir / 'scripts'
+        scripts_dir.mkdir(exist_ok=True)
+        example_script = scripts_dir / 'example.py'
+        example_script.write_text(EXAMPLE_SCRIPT.format(skill_name=skill_name))
+        example_script.chmod(0o755)
+        print("✅ Created scripts/example.py")
+
+        # Create references/ directory with example reference doc
+        references_dir = skill_dir / 'references'
+        references_dir.mkdir(exist_ok=True)
+        example_reference = references_dir / 'api_reference.md'
+        example_reference.write_text(EXAMPLE_REFERENCE.format(skill_title=skill_title))
+        print("✅ Created references/api_reference.md")
+
+        # Create assets/ directory with example asset placeholder
+        assets_dir = skill_dir / 'assets'
+        assets_dir.mkdir(exist_ok=True)
+        example_asset = assets_dir / 'example_asset.txt'
+        example_asset.write_text(EXAMPLE_ASSET)
+        print("✅ Created assets/example_asset.txt")
+    except Exception as e:
+        print(f"❌ Error creating resource directories: {e}")
+        return None
+
+    # Print next steps
+    print(f"\n✅ Skill '{skill_name}' initialized successfully at {skill_dir}")
+    print("\nNext steps:")
+    print("1. Edit SKILL.md to complete the TODO items and update the description")
+    print("2. Customize or delete the example files in scripts/, references/, and assets/")
+    print("3. Run the validator when ready to check the skill structure")
+
+    return skill_dir
+
+
+def main():
+    if len(sys.argv) < 4 or sys.argv[2] != '--path':
+        print("Usage: init_skill.py <skill-name> --path <path>")
+        print("\nSkill name requirements:")
+        print(" - Hyphen-case identifier (e.g., 'data-analyzer')")
+        print(" - Lowercase letters, digits, and hyphens only")
+        print(" - Max 40 characters")
+        print(" - Must match directory name exactly")
+        print("\nExamples:")
+        print(" init_skill.py my-new-skill --path skills/public")
+        print(" init_skill.py my-api-helper --path skills/private")
+        print(" init_skill.py custom-skill --path /custom/location")
+        print("\nFor deepagents CLI:")
+        print(" init_skill.py my-skill --path ~/.deepagents/agent/skills")
+        sys.exit(1)
+
+    skill_name = sys.argv[1]
+    path = sys.argv[3]
+
+    print(f"🚀 Initializing skill: {skill_name}")
+    print(f"   Location: {path}")
+    print()
+
+    result = init_skill(skill_name, path)
+
+    if result:
+        sys.exit(0)
+    else:
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/deepagents_sourcecode/libs/deepagents-cli/examples/skills/skill-creator/scripts/quick_validate.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/examples/skills/skill-creator/scripts/quick_validate.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python3
+"""
+Quick validation script for skills - minimal version
+
+For deepagents CLI, skills are located at:
+~/.deepagents/<agent>/skills/<skill-name>/
+
+Example:
+ python quick_validate.py ~/.deepagents/agent/skills/my-skill
+"""
+
+import sys
+import os
+import re
+import yaml
+from pathlib import Path
+
+def validate_skill(skill_path):
+    """Basic validation of a skill"""
+    skill_path = Path(skill_path)
+
+    # Check SKILL.md exists
+    skill_md = skill_path / 'SKILL.md'
+    if not skill_md.exists():
+        return False, "SKILL.md not found"
+
+    # Read and validate frontmatter
+    content = skill_md.read_text()
+    if not content.startswith('---'):
+        return False, "No YAML frontmatter found"
+
+    # Extract frontmatter
+    match = re.match(r'^---\n(.*?)\n---', content, re.DOTALL)
+    if not match:
+        return False, "Invalid frontmatter format"
+
+    frontmatter_text = match.group(1)
+
+    # Parse YAML frontmatter
+    try:
+        frontmatter = yaml.safe_load(frontmatter_text)
+        if not isinstance(frontmatter, dict):
+            return False, "Frontmatter must be a YAML dictionary"
+    except yaml.YAMLError as e:
+        return False, f"Invalid YAML in frontmatter: {e}"
+
+    # Define allowed properties
+    ALLOWED_PROPERTIES = {'name', 'description', 'license', 'allowed-tools', 'metadata'}
+
+    # Check for unexpected properties (excluding nested keys under metadata)
+    unexpected_keys = set(frontmatter.keys()) - ALLOWED_PROPERTIES
+    if unexpected_keys:
+        return False, (
+            f"Unexpected key(s) in SKILL.md frontmatter: {', '.join(sorted(unexpected_keys))}. "
+            f"Allowed properties are: {', '.join(sorted(ALLOWED_PROPERTIES))}"
+        )
+
+    # Check required fields
+    if 'name' not in frontmatter:
+        return False, "Missing 'name' in frontmatter"
+    if 'description' not in frontmatter:
+        return False, "Missing 'description' in frontmatter"
+
+    # Extract name for validation
+    name = frontmatter.get('name', '')
+    if not isinstance(name, str):
+        return False, f"Name must be a string, got {type(name).__name__}"
+    name = name.strip()
+    if name:
+        # Check naming convention (hyphen-case: lowercase with hyphens)
+        if not re.match(r'^[a-z0-9-]+$', name):
+            return False, f"Name '{name}' should be hyphen-case (lowercase letters, digits, and hyphens only)"
+        if name.startswith('-') or name.endswith('-') or '--' in name:
+            return False, f"Name '{name}' cannot start/end with hyphen or contain consecutive hyphens"
+        # Check name length (max 64 characters per spec)
+        if len(name) > 64:
+            return False, f"Name is too long ({len(name)} characters). Maximum is 64 characters."
+
+    # Extract and validate description
+    description = frontmatter.get('description', '')
+    if not isinstance(description, str):
+        return False, f"Description must be a string, got {type(description).__name__}"
+    description = description.strip()
+    if description:
+        # Check for angle brackets
+        if '<' in description or '>' in description:
+            return False, "Description cannot contain angle brackets (< or >)"
+        # Check description length (max 1024 characters per spec)
+        if len(description) > 1024:
+            return False, f"Description is too long ({len(description)} characters). Maximum is 1024 characters."
+
+    return True, "Skill is valid!"
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: python quick_validate.py <skill_directory>")
+        sys.exit(1)
+
+    valid, message = validate_skill(sys.argv[1])
+    print(message)
+    sys.exit(0 if valid else 1)
--- a/deepagents_sourcecode/libs/deepagents-cli/examples/skills/web-research/SKILL.md
+++ b/deepagents_sourcecode/libs/deepagents-cli/examples/skills/web-research/SKILL.md
@@ -0,0 +1,102 @@
+---
+name: web-research
+description: Use this skill for requests related to web research; it provides a structured approach to conducting comprehensive web research 
+---
+
+# Web Research Skill
+
+This skill provides a structured approach to conducting comprehensive web research using the `task` tool to spawn research subagents. It emphasizes planning, efficient delegation, and systematic synthesis of findings.
+
+## When to Use This Skill
+
+Use this skill when you need to:
+- Research complex topics requiring multiple information sources
+- Gather and synthesize current information from the web
+- Conduct comparative analysis across multiple subjects
+- Produce well-sourced research reports with clear citations
+
+## Research Process
+
+### Step 1: Create and Save Research Plan
+
+Before delegating to subagents, you MUST:
+
+1. **Create a research folder** - Organize all research files in a dedicated folder relative to the current working directory:
+   ```
+   mkdir research_[topic_name]
+   ```
+   This keeps files organized and prevents clutter in the working directory.
+
+2. **Analyze the research question** - Break it down into distinct, non-overlapping subtopics
+
+3. **Write a research plan file** - Use the `write_file` tool to create `research_[topic_name]/research_plan.md` containing:
+   - The main research question
+   - 2-5 specific subtopics to investigate
+   - Expected information from each subtopic
+   - How results will be synthesized
+
+**Planning Guidelines:**
+- **Simple fact-finding**: 1-2 subtopics
+- **Comparative analysis**: 1 subtopic per comparison element (max 3)
+- **Complex investigations**: 3-5 subtopics
+
+### Step 2: Delegate to Research Subagents
+
+For each subtopic in your plan:
+
+1. **Use the `task` tool** to spawn a research subagent with:
+   - Clear, specific research question (no acronyms)
+   - Instructions to write findings to a file: `research_[topic_name]/findings_[subtopic].md`
+   - Budget: 3-5 web searches maximum
+
+2. **Run up to 3 subagents in parallel** for efficient research
+
+**Subagent Instructions Template:**
+```
+Research [SPECIFIC TOPIC]. Use the web_search tool to gather information.
+After completing your research, use write_file to save your findings to research_[topic_name]/findings_[subtopic].md.
+Include key facts, relevant quotes, and source URLs.
+Use 3-5 web searches maximum.
+```
+
+### Step 3: Synthesize Findings
+
+After all subagents complete:
+
+1. **Review the findings files** that were saved locally:
+   - First run `list_files research_[topic_name]` to see what files were created
+   - Then use `read_file` with the **file paths** (e.g., `research_[topic_name]/findings_*.md`)
+   - **Important**: Use `read_file` for LOCAL files only, not URLs
+
+2. **Synthesize the information** - Create a comprehensive response that:
+   - Directly answers the original question
+   - Integrates insights from all subtopics
+   - Cites specific sources with URLs (from the findings files)
+   - Identifies any gaps or limitations
+
+3. **Write final report** (optional) - Use `write_file` to create `research_[topic_name]/research_report.md` if requested
+
+**Note**: If you need to fetch additional information from URLs, use the `fetch_url` tool, not `read_file`.
+
+## Available Tools
+
+You have access to:
+- **write_file**: Save research plans and findings to local files
+- **read_file**: Read local files (e.g., findings saved by subagents)
+- **list_files**: See what local files exist in a directory
+- **fetch_url**: Fetch content from URLs and convert to markdown (use this for web pages, not read_file)
+- **task**: Spawn research subagents with web_search access
+
+## Research Subagent Configuration
+
+Each subagent you spawn will have access to:
+- **web_search**: Search the web using Tavily (parameters: query, max_results, topic, include_raw_content)
+- **write_file**: Save their findings to the filesystem
+
+## Best Practices
+
+- **Plan before delegating** - Always write research_plan.md first
+- **Clear subtopics** - Ensure each subagent has distinct, non-overlapping scope
+- **File-based communication** - Have subagents save findings to files, not return them directly
+- **Systematic synthesis** - Read all findings files before creating final response
+- **Stop appropriately** - Don't over-research; 3-5 searches per subtopic is usually sufficient
--- a/deepagents_sourcecode/libs/deepagents-cli/pyproject.toml
+++ b/deepagents_sourcecode/libs/deepagents-cli/pyproject.toml
@@ -0,0 +1,123 @@
+[project]
+name = "deepagents-cli"
+version = "0.0.12"
+description = "Deepagents CLI"
+readme = "README.md"
+license = { text = "MIT" }
+requires-python = ">=3.11,<4.0"
+dependencies = [
+  "deepagents==0.2.8",
+  "requests",
+  "rich>=13.0.0",
+  "prompt-toolkit>=3.0.52",
+  "langchain-openai>=0.1.0",
+  "tavily-python",
+  "python-dotenv",
+  "daytona>=0.113.0",
+  "modal>=0.65.0",
+  "markdownify>=0.13.0",
+  "langchain>=1.0.7",
+  "runloop-api-client>=0.69.0",
+  "pillow>=10.0.0",
+  "pyyaml>=6.0",
+]
+
+[project.scripts]
+deepagents = "deepagents_cli:cli_main"
+deepagents-cli = "deepagents_cli:cli_main"
+
+[dependency-groups]
+test = [
+    "pytest>=8.3.4",
+    "pytest-asyncio>=0.25.3",
+    "pytest-cov>=6.0.0",
+    "pytest-mock>=3.14.0",
+    "pytest-socket>=0.7.0",
+    "pytest-timeout>=2.3.1",
+    "responses>=0.25.0",
+    "ruff>=0.9.7",
+]
+
+
+dev = [
+    "pytest",
+    "pytest-cov",
+    "build",
+    "twine",
+    "langchain-openai",
+    "pytest-timeout>=2.4.0",
+    "pytest-socket>=0.7.0",
+    "pytest-asyncio>=1.2.0",
+]
+
+lint = [
+    "ruff",
+    "mypy"
+]
+
+[build-system]
+requires = ["setuptools>=73.0.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools.package-data]
+deepagents_cli = ["default_agent_prompt.md"]
+
+[tool.ruff]
+line-length = 100
+exclude = []
+
+[tool.ruff.format]
+docstring-code-format = true  # Formats code blocks in docstrings
+
+[tool.ruff.lint]
+select = [
+    "ALL"  # Enable all rules by default
+]
+ignore = [
+    "COM812",  # Messes with the formatter
+    "ISC001",  # Messes with the formatter
+    "PERF203", # Rarely useful
+    "SLF001",  # Private member access
+    "PLC0415", # Imports should be at the top. Not always desirable
+    "PLR0913", # Too many arguments in function definition
+    "PLC0414", # Inconsistent with how type checkers expect to be notified of intentional re-exports
+    "C901",    # Too complex
+]
+unfixable = ["B028"]  # Rules that shouldn't be auto-fixed
+
+[tool.ruff.lint.pyupgrade]
+keep-runtime-typing = true
+
+[tool.ruff.lint.flake8-annotations]
+allow-star-arg-any = true
+
+[tool.ruff.lint.pydocstyle]
+convention = "google"  # Google-style docstrings
+ignore-var-parameters = true
+
+[tool.ruff.lint.per-file-ignores]
+"deepagents_cli/cli.py" = [
+    "T201",    # Allow print statements in CLI
+]
+"tests/*" = [
+    "D1",      # Skip documentation rules in tests
+    "S101",    # Allow asserts in tests
+    "S311",    # Allow pseudo-random generators in tests
+    "ANN201",  # Missing return type annotation
+    "INP001",  # Implicit namespace package
+    "PLR2004", # Magic value comparisons are fine in tests
+]
+
+[tool.pytest.ini_options]
+timeout = 10  # Default timeout for all tests (can be overridden per-test)
+
+[tool.mypy]
+strict = true
+ignore_missing_imports = true
+enable_error_code = ["deprecated"]
+# Optional: reduce strictness if needed
+disallow_any_generics = false
+warn_return_any = false
+
+[tool.uv.sources]
+deepagents = { path = "../deepagents" }
--- a/deepagents_sourcecode/libs/deepagents-cli/tests/integration_tests/init.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/tests/integration_tests/init.py
--- a/deepagents_sourcecode/libs/deepagents-cli/tests/integration_tests/benchmarks/init.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/tests/integration_tests/benchmarks/init.py
--- a/deepagents_sourcecode/libs/deepagents-cli/tests/integration_tests/benchmarks/test_simple_tasks.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/tests/integration_tests/benchmarks/test_simple_tasks.py
@@ -0,0 +1,274 @@
+"""Integration test for CLI with auto-approve mode.
+
+This module implements benchmarking for simple tasks using the DeepAgents CLI; e.g.,
+"write a poem to a file", "create multiple files", etc.
+
+The agent runs on auto-approve mode, meaning it can perform actions without
+user confirmation.
+
+Note on testing approach:
+- We use StringIO to capture console output, which is the recommended
+  approach according to Rich's documentation for unit/integration tests.
+- The capture() context manager is an alternative, but StringIO provides
+  better control and is simpler for testing purposes.
+- We patch console instances in both main and config modules to ensure
+  all output is captured in the test.
+"""
+
+import os
+import uuid
+from collections.abc import AsyncIterator
+from contextlib import asynccontextmanager
+from io import StringIO
+from pathlib import Path
+from unittest.mock import AsyncMock, patch
+
+import pytest
+from langgraph.checkpoint.memory import MemorySaver
+from rich.console import Console
+
+from deepagents_cli import config as config_module
+from deepagents_cli import main as main_module
+from deepagents_cli.agent import create_cli_agent
+from deepagents_cli.config import SessionState, create_model
+from deepagents_cli.main import simple_cli
+
+
+@asynccontextmanager
+async def run_cli_task(task: str, tmp_path: Path) -> AsyncIterator[tuple[Path, str]]:
+    """Context manager to run a CLI task with auto-approve and capture output.
+
+    Args:
+        task: The task string to give to the agent
+        tmp_path: Temporary directory for the test
+
+    Yields:
+        tuple: (working_directory: Path, console_output: str)
+    """
+    original_dir = Path.cwd()
+    os.chdir(tmp_path)
+
+    # Capture console output
+    # Using StringIO is the recommended approach for testing (per Rich docs)
+    output = StringIO()
+    captured_console = Console(
+        file=output,
+        force_terminal=False,  # Disable ANSI codes for simpler assertions
+        width=120,  # Fixed width for predictable output
+        color_system=None,  # Explicitly disable colors for testing
+        legacy_windows=False,  # Modern behavior
+    )
+
+    try:
+        # Mock the prompt session to provide input and exit
+        # Use patch.object() to fail immediately if attributes don't exist
+        with patch.object(main_module, "create_prompt_session") as mock_prompt:
+            mock_session = AsyncMock()
+            mock_session.prompt_async.side_effect = [
+                task,  # User input
+                EOFError(),  # Exit after task
+            ]
+            mock_prompt.return_value = mock_session
+
+            # Mock console to capture output
+            # Use patch.object() to fail immediately if attributes don't exist
+            with (
+                patch.object(main_module, "console", captured_console),
+                patch.object(config_module, "console", captured_console),
+            ):
+                # Import after patching
+                from deepagents_cli.agent import create_cli_agent
+                from deepagents_cli.config import create_model
+
+                # Create real agent with real model (will use env var or fail gracefully)
+                model = create_model()
+                agent, backend = create_cli_agent(
+                    model=model,
+                    assistant_id="test_agent",
+                    tools=[],
+                    sandbox=None,
+                    sandbox_type=None,
+                )
+
+                # Create session state with auto-approve
+                session_state = SessionState(auto_approve=True)
+
+                # Run the CLI
+                await simple_cli(
+                    agent=agent,
+                    assistant_id="test_agent",
+                    session_state=session_state,
+                    baseline_tokens=0,
+                    backend=backend,
+                    sandbox_type=None,
+                    setup_script_path=None,
+                )
+
+            # Verify that our mocks were actually used (ensures patching worked)
+            mock_prompt.assert_called_once()
+            assert mock_session.prompt_async.call_count >= 1, (
+                "prompt_async should have been called at least once"
+            )
+
+        # Yield the directory and captured output
+        yield tmp_path, output.getvalue()
+
+    finally:
+        os.chdir(original_dir)
+
+
+@asynccontextmanager
+async def run_agent_task_with_hitl(task: str, tmp_path: Path) -> AsyncIterator:
+    """Context manager to run an agent task with HIL and stream events.
+
+    Args:
+        task: The task string to give to the agent
+        tmp_path: Temporary directory for the test
+
+    Yields:
+        AsyncGenerator: Stream of events from the agent
+    """
+    original_dir = Path.cwd()
+    os.chdir(tmp_path)
+
+    try:
+        # Create agent with HIL enabled (no auto-approve)
+        model = create_model()
+        checkpointer = MemorySaver()
+        agent, _backend = create_cli_agent(
+            model=model,
+            assistant_id="test_agent",
+            tools=[],
+            sandbox=None,
+            sandbox_type=None,
+        )
+        agent.checkpointer = checkpointer
+
+        # Create config with thread_id for checkpointing
+        config = {"configurable": {"thread_id": str(uuid.uuid4())}}
+
+        # Yield the stream generator for the test to consume
+        yield agent.astream(
+            {"messages": [{"role": "user", "content": task}]},
+            config=config,
+            stream_mode="values",
+        )
+
+    finally:
+        os.chdir(original_dir)
+
+
+class TestSimpleTasks:
+    """A collection of simple task benchmarks for the deepagents-cli."""
+
+    @pytest.mark.asyncio
+    @pytest.mark.timeout(120)  # Agent can take 60-120 seconds
+    async def test_write_hello_to_a_file(self, tmp_path: Path) -> None:
+        """Test agents to write 'hello' to a file."""
+        async with run_cli_task("write hello to file foo.md", tmp_path) as (
+            work_dir,
+            console_output,
+        ):
+            # Verify the file was created
+            output_file = work_dir / "foo.md"
+            assert output_file.exists(), f"foo.md should have been created in {work_dir}"
+
+            content = output_file.read_text()
+            assert "hello" in content.lower(), f"File should contain 'hello', but got: {content!r}"
+
+            # Verify console output shows auto-approve mode
+            # Print output for debugging if assertion fails
+            assert "Auto-approve" in console_output or "⚡" in console_output, (
+                f"Expected auto-approve indicator in output.\nConsole output:\n{console_output}"
+            )
+
+    @pytest.mark.asyncio
+    @pytest.mark.timeout(120)
+    async def test_cli_auto_approve_multiple_operations(self, tmp_path: Path) -> None:
+        """Test agent to create multiple files with auto-approve."""
+        task = "create files test1.txt and test2.txt with content 'test file'"
+
+        async with run_cli_task(task, tmp_path) as (work_dir, console_output):
+            # Verify both files were created
+            test1 = work_dir / "test1.txt"
+            test2 = work_dir / "test2.txt"
+
+            # At least one file should be created (agent might interpret task differently)
+            created_files = [f for f in [test1, test2] if f.exists()]
+            assert len(created_files) > 0, (
+                f"Expected at least one test file to be created in {work_dir}.\n"
+                f"Files in directory: {list(work_dir.iterdir())}"
+            )
+
+            # Verify console output captured the interaction
+            assert len(console_output) > 0, "Console output should not be empty"
+
+
+class TestAgentBehavior:
+    """A collection of tests for agent behavior (non-CLI level)."""
+
+    @pytest.mark.asyncio
+    @pytest.mark.timeout(120)
+    async def test_run_command_calls_shell_tool(self, tmp_path: Path) -> None:
+        """Test that 'run make format' calls shell tool with 'make format' command.
+
+        This test verifies that when a user says "run make format", the agent
+        correctly interprets this as a shell command and calls the shell tool
+        with just "make format" (not including the word "run").
+
+        The test stops at the interrupt (HITL approval point) before the shell
+        tool is actually executed, to verify the correct command is being passed.
+        """
+        # Mock the settings to use a fresh filesystem in tmp_path
+        from deepagents_cli.config import Settings
+
+        mock_settings = Settings.from_environment(start_path=tmp_path)
+
+        # Patch settings in all modules that import it
+        patches = [
+            patch("deepagents_cli.config.settings", mock_settings),
+            patch("deepagents_cli.agent.settings", mock_settings),
+            patch("deepagents_cli.file_ops.settings", mock_settings),
+            patch("deepagents_cli.tools.settings", mock_settings),
+            patch("deepagents_cli.token_utils.settings", mock_settings),
+        ]
+
+        # Apply all patches using ExitStack for cleaner nesting
+        from contextlib import ExitStack
+
+        with ExitStack() as stack:
+            for p in patches:
+                stack.enter_context(p)
+
+            async with run_agent_task_with_hitl("run make format", tmp_path) as stream:
+                # Stream events and capture the final result
+                events = []
+                result = {}
+                async for event in stream:
+                    events.append(event)
+                    result = event
+
+                # Verify that we captured events
+                assert len(events) > 0, "Expected to receive events from agent stream"
+
+                # Verify that an interrupt occurred (shell tool requires approval)
+                assert "__interrupt__" in result, "Expected shell tool to trigger HITL interrupt"
+                assert result["__interrupt__"] is not None
+
+                # Extract interrupt information
+                interrupts = result["__interrupt__"]
+                assert len(interrupts) > 0, "Expected at least one interrupt"
+
+                interrupt_value = interrupts[0].value
+                action_requests = interrupt_value.get("action_requests", [])
+
+                # Verify that a shell tool call is present
+                shell_calls = [req for req in action_requests if req.get("name") == "shell"]
+                assert len(shell_calls) > 0, "Expected at least one shell tool call"
+
+                # Verify the shell command is "make format" (not "run make format")
+                shell_call = shell_calls[0]
+                command = shell_call.get("args", {}).get("command", "")
+                assert command == "make format", (
+                    f"Expected shell command to be 'make format', got: {command}"
+                )
--- a/deepagents_sourcecode/libs/deepagents-cli/tests/integration_tests/conftest.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/tests/integration_tests/conftest.py
@@ -0,0 +1,36 @@
+"""Pytest configuration for benchmark tests."""
+
+import os
+from collections.abc import Generator
+
+import pytest
+from langsmith import Client, get_tracing_context
+
+
+@pytest.fixture(scope="session", autouse=True)
+def langsmith_client() -> Generator[Client | None, None, None]:
+    """Create a LangSmith client if LANGSMITH_API_KEY is set.
+
+    This fixture is session-scoped and automatically used by all tests.
+    It creates a single client instance and ensures it's flushed after each test.
+    """
+    langsmith_api_key = os.environ.get("LANGSMITH_API_KEY") or os.environ.get("LANGCHAIN_API_KEY")
+
+    if langsmith_api_key:
+        client = get_tracing_context()["client"] or Client()
+        yield client
+
+        # Final flush at end of session
+        client.flush()
+    else:
+        yield None
+
+
+@pytest.fixture(autouse=True)
+def flush_langsmith_after_test(langsmith_client: Client) -> Generator[None, None, None]:
+    """Automatically flush LangSmith client after each test."""
+    yield
+
+    # This runs after each test
+    if langsmith_client is not None:
+        langsmith_client.flush()
--- a/deepagents_sourcecode/libs/deepagents-cli/tests/integration_tests/test_sandbox_factory.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/tests/integration_tests/test_sandbox_factory.py
@@ -0,0 +1,322 @@
+"""Test sandbox integrations with upload/download functionality.
+
+This module tests sandbox backends (RunLoop, Daytona, Modal) with support for
+optional sandbox reuse to reduce test execution time.
+
+Set REUSE_SANDBOX=1 environment variable to reuse sandboxes across tests within
+a class. Otherwise, a fresh sandbox is created for each test method.
+"""
+
+from abc import ABC, abstractmethod
+from collections.abc import Iterator
+
+import pytest
+from deepagents.backends.protocol import SandboxBackendProtocol
+from deepagents.backends.sandbox import BaseSandbox
+
+from deepagents_cli.integrations.sandbox_factory import create_sandbox
+
+
+class BaseSandboxIntegrationTest(ABC):
+    """Base class for sandbox integration tests.
+
+    Subclasses must implement the `sandbox` fixture to provide a sandbox instance.
+    All test methods are defined here and will be inherited by concrete test classes.
+    """
+
+    @pytest.fixture(scope="class")
+    @abstractmethod
+    def sandbox(self) -> Iterator[SandboxBackendProtocol]:
+        """Provide a sandbox instance for testing."""
+        ...
+
+    def test_sandbox_creation(self, sandbox: SandboxBackendProtocol) -> None:
+        """Test basic sandbox creation and command execution."""
+        assert sandbox.id is not None
+        result = sandbox.execute("echo 'hello'")
+        assert result.output.strip() == "hello"
+
+    def test_upload_single_file(self, sandbox: SandboxBackendProtocol) -> None:
+        """Test uploading a single file."""
+        test_path = "/tmp/test_upload_single.txt"
+        test_content = b"Hello, Sandbox!"
+        upload_responses = sandbox.upload_files([(test_path, test_content)])
+
+        assert len(upload_responses) == 1
+        assert upload_responses[0].path == test_path
+        assert upload_responses[0].error is None
+
+        # Verify file exists via command execution
+        result = sandbox.execute(f"cat {test_path}")
+        assert result.output.strip() == test_content.decode()
+
+    def test_download_single_file(self, sandbox: SandboxBackendProtocol) -> None:
+        """Test downloading a single file."""
+        test_path = "/tmp/test_download_single.txt"
+        test_content = b"Download test content"
+        # Create file first
+        sandbox.upload_files([(test_path, test_content)])
+
+        # Download and verify
+        download_responses = sandbox.download_files([test_path])
+
+        assert len(download_responses) == 1
+        assert download_responses[0].path == test_path
+        assert download_responses[0].content == test_content
+        assert download_responses[0].error is None
+
+    def test_upload_download_roundtrip(self, sandbox: SandboxBackendProtocol) -> None:
+        """Test upload followed by download for data integrity."""
+        test_path = "/tmp/test_roundtrip.txt"
+        test_content = b"Roundtrip test: special chars \n\t\r\x00"
+
+        # Upload
+        upload_responses = sandbox.upload_files([(test_path, test_content)])
+        assert upload_responses[0].error is None
+
+        # Download
+        download_responses = sandbox.download_files([test_path])
+        assert download_responses[0].error is None
+        assert download_responses[0].content == test_content
+
+    def test_upload_multiple_files(self, sandbox: SandboxBackendProtocol) -> None:
+        """Test uploading multiple files in a single batch."""
+        files = [
+            ("/tmp/test_multi_1.txt", b"Content 1"),
+            ("/tmp/test_multi_2.txt", b"Content 2"),
+            ("/tmp/test_multi_3.txt", b"Content 3"),
+        ]
+
+        upload_responses = sandbox.upload_files(files)
+
+        assert len(upload_responses) == 3
+        for i, resp in enumerate(upload_responses):
+            assert resp.path == files[i][0]
+            assert resp.error is None
+
+    def test_download_multiple_files(self, sandbox: SandboxBackendProtocol) -> None:
+        """Test downloading multiple files in a single batch."""
+        files = [
+            ("/tmp/test_batch_1.txt", b"Batch 1"),
+            ("/tmp/test_batch_2.txt", b"Batch 2"),
+            ("/tmp/test_batch_3.txt", b"Batch 3"),
+        ]
+
+        # Upload files first
+        sandbox.upload_files(files)
+
+        # Download all at once
+        paths = [f[0] for f in files]
+        download_responses = sandbox.download_files(paths)
+
+        assert len(download_responses) == 3
+        for i, resp in enumerate(download_responses):
+            assert resp.path == files[i][0]
+            assert resp.content == files[i][1]
+            assert resp.error is None
+
+    @pytest.mark.skip(reason="Error handling not implemented yet.")
+    def test_download_nonexistent_file(self, sandbox: SandboxBackendProtocol) -> None:
+        """Test that downloading a non-existent file returns an error."""
+        nonexistent_path = "/tmp/does_not_exist.txt"
+
+        download_responses = sandbox.download_files([nonexistent_path])
+
+        assert len(download_responses) == 1
+        assert download_responses[0].path == nonexistent_path
+        assert download_responses[0].content is None
+        assert download_responses[0].error is not None
+
+    def test_upload_binary_content(self, sandbox: SandboxBackendProtocol) -> None:
+        """Test uploading binary content (not valid UTF-8)."""
+        test_path = "/tmp/binary_file.bin"
+        # Create binary content with all byte values
+        test_content = bytes(range(256))
+
+        upload_responses = sandbox.upload_files([(test_path, test_content)])
+
+        assert len(upload_responses) == 1
+        assert upload_responses[0].error is None
+
+        # Verify by downloading
+        download_responses = sandbox.download_files([test_path])
+        assert download_responses[0].content == test_content
+
+    def test_partial_success_upload(self, sandbox: SandboxBackendProtocol) -> None:
+        """Test that batch upload supports partial success."""
+        files = [
+            ("/tmp/valid_upload.txt", b"Valid content"),
+            ("/tmp/another_valid.txt", b"Another valid"),
+        ]
+
+        upload_responses = sandbox.upload_files(files)
+
+        # Should get a response for each file
+        assert len(upload_responses) == len(files)
+        # At least verify we got responses with proper paths
+        for i, resp in enumerate(upload_responses):
+            assert resp.path == files[i][0]
+
+    @pytest.mark.skip(reason="Error handling not implemented yet.")
+    def test_partial_success_download(self, sandbox: SandboxBackendProtocol) -> None:
+        """Test that batch download supports partial success."""
+        # Create one valid file
+        valid_path = "/tmp/valid_file.txt"
+        valid_content = b"Valid"
+        sandbox.upload_files([(valid_path, valid_content)])
+
+        # Request both valid and invalid files
+        paths = [valid_path, "/tmp/does_not_exist.txt"]
+        download_responses = sandbox.download_files(paths)
+
+        assert len(download_responses) == 2
+        # First should succeed
+        assert download_responses[0].path == valid_path
+        assert download_responses[0].content == valid_content
+        assert download_responses[0].error is None
+        # Second should fail
+        assert download_responses[1].path == "/tmp/does_not_exist.txt"
+        assert download_responses[1].content is None
+        assert download_responses[1].error is not None
+
+    @pytest.mark.skip(
+        reason="Error handling not yet implemented in sandbox providers - requires implementation"
+    )
+    def test_download_error_file_not_found(self, sandbox: SandboxBackendProtocol) -> None:
+        """Test downloading a non-existent file returns file_not_found error.
+
+        Expected behavior: download_files should return FileDownloadResponse with
+        error='file_not_found' when the requested file doesn't exist.
+        """
+        responses = sandbox.download_files(["/tmp/nonexistent_test_file.txt"])
+
+        assert len(responses) == 1
+        assert responses[0].path == "/tmp/nonexistent_test_file.txt"
+        assert responses[0].content is None
+        assert responses[0].error == "file_not_found"
+
+    @pytest.mark.skip(
+        reason="Error handling not yet implemented in sandbox providers - requires implementation"
+    )
+    def test_download_error_is_directory(self, sandbox: SandboxBackendProtocol) -> None:
+        """Test downloading a directory returns is_directory error.
+
+        Expected behavior: download_files should return FileDownloadResponse with
+        error='is_directory' when trying to download a directory as a file.
+        """
+        # Create a directory
+        sandbox.execute("mkdir -p /tmp/test_directory")
+
+        responses = sandbox.download_files(["/tmp/test_directory"])
+
+        assert len(responses) == 1
+        assert responses[0].path == "/tmp/test_directory"
+        assert responses[0].content is None
+        assert responses[0].error == "is_directory"
+
+    @pytest.mark.skip(
+        reason="Error handling not yet implemented in sandbox providers - requires implementation"
+    )
+    def test_upload_error_parent_not_found(self, sandbox: SandboxBackendProtocol) -> None:
+        """Test uploading to a path with non-existent parent returns parent_not_found error.
+
+        Expected behavior: upload_files should return FileUploadResponse with
+        error='parent_not_found' when the parent directory doesn't exist and
+        can't be created automatically.
+
+        Note: This test may need adjustment based on whether sandbox providers
+        auto-create parent directories or not.
+        """
+        # Try to upload to a path where the parent is a file, not a directory
+        # First create a file
+        sandbox.upload_files([("/tmp/parent_is_file.txt", b"I am a file")])
+
+        # Now try to upload as if parent_is_file.txt were a directory
+        responses = sandbox.upload_files([("/tmp/parent_is_file.txt/child.txt", b"child")])
+
+        assert len(responses) == 1
+        assert responses[0].path == "/tmp/parent_is_file.txt/child.txt"
+        # Could be parent_not_found or invalid_path depending on implementation
+        assert responses[0].error in ("parent_not_found", "invalid_path")
+
+    @pytest.mark.skip(
+        reason="Error handling not yet implemented in sandbox providers - requires implementation"
+    )
+    def test_upload_error_invalid_path(self, sandbox: SandboxBackendProtocol) -> None:
+        """Test uploading with invalid path returns invalid_path error.
+
+        Expected behavior: upload_files should return FileUploadResponse with
+        error='invalid_path' for malformed paths (null bytes, invalid chars, etc).
+        """
+        # Test with null byte (invalid in most filesystems)
+        responses = sandbox.upload_files([("/tmp/file\x00name.txt", b"content")])
+
+        assert len(responses) == 1
+        assert responses[0].path == "/tmp/file\x00name.txt"
+        assert responses[0].error == "invalid_path"
+
+    @pytest.mark.skip(
+        reason="Error handling not yet implemented in sandbox providers - requires implementation"
+    )
+    def test_download_error_invalid_path(self, sandbox: SandboxBackendProtocol) -> None:
+        """Test downloading with invalid path returns invalid_path error.
+
+        Expected behavior: download_files should return FileDownloadResponse with
+        error='invalid_path' for malformed paths (null bytes, invalid chars, etc).
+        """
+        # Test with null byte (invalid in most filesystems)
+        responses = sandbox.download_files(["/tmp/file\x00name.txt"])
+
+        assert len(responses) == 1
+        assert responses[0].path == "/tmp/file\x00name.txt"
+        assert responses[0].content is None
+        assert responses[0].error == "invalid_path"
+
+    @pytest.mark.skip(
+        reason="Error handling not yet implemented in sandbox providers - requires implementation"
+    )
+    def test_upload_to_existing_directory_path(self, sandbox: SandboxBackendProtocol) -> None:
+        """Test uploading to a path that is an existing directory.
+
+        Expected behavior: This should either succeed by overwriting or return
+        an appropriate error. The exact behavior depends on the sandbox provider.
+        """
+        # Create a directory
+        sandbox.execute("mkdir -p /tmp/test_dir_upload")
+
+        # Try to upload a file with the same name as the directory
+        responses = sandbox.upload_files([("/tmp/test_dir_upload", b"file content")])
+
+        assert len(responses) == 1
+        assert responses[0].path == "/tmp/test_dir_upload"
+        # Behavior depends on implementation - just verify we get a response
+
+
+class TestRunLoopIntegration(BaseSandboxIntegrationTest):
+    """Test RunLoop backend integration."""
+
+    @pytest.fixture(scope="class")
+    def sandbox(self) -> Iterator[BaseSandbox]:
+        """Provide a RunLoop sandbox instance."""
+        with create_sandbox("runloop") as sandbox:
+            yield sandbox
+
+
+class TestDaytonaIntegration(BaseSandboxIntegrationTest):
+    """Test Daytona backend integration."""
+
+    @pytest.fixture(scope="class")
+    def sandbox(self) -> Iterator[BaseSandbox]:
+        """Provide a Daytona sandbox instance."""
+        with create_sandbox("daytona") as sandbox:
+            yield sandbox
+
+
+class TestModalIntegration(BaseSandboxIntegrationTest):
+    """Test Modal backend integration."""
+
+    @pytest.fixture(scope="class")
+    def sandbox(self) -> Iterator[BaseSandbox]:
+        """Provide a Modal sandbox instance."""
+        with create_sandbox("modal") as sandbox:
+            yield sandbox
--- a/deepagents_sourcecode/libs/deepagents-cli/tests/integration_tests/test_sandbox_operations.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/tests/integration_tests/test_sandbox_operations.py
--- a/deepagents_sourcecode/libs/deepagents-cli/tests/test_project_memory.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/tests/test_project_memory.py
@@ -0,0 +1,139 @@
+"""Tests for project-specific memory and dual agent.md loading."""
+
+import os
+from pathlib import Path
+
+import pytest
+
+from deepagents_cli.agent_memory import AgentMemoryMiddleware
+from deepagents_cli.config import Settings
+from deepagents_cli.skills import SkillsMiddleware
+
+
+class TestAgentMemoryMiddleware:
+    """Test dual memory loading in AgentMemoryMiddleware."""
+
+    def test_load_user_memory_only(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+        """Test loading user agent.md when no project memory exists."""
+        # Mock Path.home() to return tmp_path
+        monkeypatch.setattr("pathlib.Path.home", lambda: tmp_path)
+
+        # Create user agent directory
+        agent_dir = tmp_path / ".deepagents" / "test_agent"
+        agent_dir.mkdir(parents=True)
+        user_md = agent_dir / "agent.md"
+        user_md.write_text("User instructions")
+
+        # Create a directory without .git to avoid project detection
+        non_project_dir = tmp_path / "not-a-project"
+        non_project_dir.mkdir()
+
+        # Change to non-project directory for test
+        original_cwd = Path.cwd()
+        try:
+            os.chdir(non_project_dir)
+
+            # Create settings (no project detected from non_project_dir)
+            test_settings = Settings.from_environment(start_path=non_project_dir)
+
+            # Create middleware
+            middleware = AgentMemoryMiddleware(settings=test_settings, assistant_id="test_agent")
+
+            # Simulate before_agent call with no project root
+            state = {}
+            result = middleware.before_agent(state, None)
+
+            assert result["user_memory"] == "User instructions"
+            assert "project_memory" not in result
+        finally:
+            os.chdir(original_cwd)
+
+    def test_load_both_memories(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+        """Test loading both user and project agent.md."""
+        # Mock Path.home() to return tmp_path
+        monkeypatch.setattr("pathlib.Path.home", lambda: tmp_path)
+
+        # Create user agent directory
+        agent_dir = tmp_path / ".deepagents" / "test_agent"
+        agent_dir.mkdir(parents=True)
+        user_md = agent_dir / "agent.md"
+        user_md.write_text("User instructions")
+
+        # Create project with .git and agent.md in .deepagents/
+        project_root = tmp_path / "project"
+        project_root.mkdir()
+        (project_root / ".git").mkdir()
+        (project_root / ".deepagents").mkdir()
+        project_md = project_root / ".deepagents" / "agent.md"
+        project_md.write_text("Project instructions")
+
+        original_cwd = Path.cwd()
+        try:
+            os.chdir(project_root)
+
+            # Create settings (project detected from project_root)
+            test_settings = Settings.from_environment(start_path=project_root)
+
+            # Create middleware
+            middleware = AgentMemoryMiddleware(settings=test_settings, assistant_id="test_agent")
+
+            # Simulate before_agent call
+            state = {}
+            result = middleware.before_agent(state, None)
+
+            assert result["user_memory"] == "User instructions"
+            assert result["project_memory"] == "Project instructions"
+        finally:
+            os.chdir(original_cwd)
+
+    def test_memory_not_reloaded_if_already_in_state(self, tmp_path: Path) -> None:
+        """Test that memory is not reloaded if already in state."""
+        agent_dir = tmp_path / ".deepagents" / "test_agent"
+        agent_dir.mkdir(parents=True)
+
+        # Create settings
+        test_settings = Settings.from_environment(start_path=tmp_path)
+
+        middleware = AgentMemoryMiddleware(settings=test_settings, assistant_id="test_agent")
+
+        # State already has memory
+        state = {"user_memory": "Existing memory", "project_memory": "Existing project"}
+        result = middleware.before_agent(state, None)
+
+        # Should return empty dict (no updates)
+        assert result == {}
+
+
+class TestSkillsPathResolution:
+    """Test skills path resolution with per-agent structure."""
+
+    def test_skills_middleware_paths(self, tmp_path: Path) -> None:
+        """Test that skills middleware uses correct per-agent paths."""
+        agent_dir = tmp_path / ".deepagents" / "test_agent"
+        skills_dir = agent_dir / "skills"
+        skills_dir.mkdir(parents=True)
+
+        middleware = SkillsMiddleware(skills_dir=skills_dir, assistant_id="test_agent")
+
+        # Check paths are correctly set
+        assert middleware.skills_dir == skills_dir
+        assert middleware.user_skills_display == "~/.deepagents/test_agent/skills"
+
+    def test_skills_dir_per_agent(self, tmp_path: Path) -> None:
+        """Test that different agents have separate skills directories."""
+        from deepagents_cli.skills import SkillsMiddleware
+
+        # Agent 1
+        agent1_skills = tmp_path / ".deepagents" / "agent1" / "skills"
+        agent1_skills.mkdir(parents=True)
+        middleware1 = SkillsMiddleware(skills_dir=agent1_skills, assistant_id="agent1")
+
+        # Agent 2
+        agent2_skills = tmp_path / ".deepagents" / "agent2" / "skills"
+        agent2_skills.mkdir(parents=True)
+        middleware2 = SkillsMiddleware(skills_dir=agent2_skills, assistant_id="agent2")
+
+        # Should have different paths
+        assert middleware1.skills_dir != middleware2.skills_dir
+        assert "agent1" in middleware1.user_skills_display
+        assert "agent2" in middleware2.user_skills_display
--- a/deepagents_sourcecode/libs/deepagents-cli/tests/unit_tests/init.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/tests/unit_tests/init.py
--- a/deepagents_sourcecode/libs/deepagents-cli/tests/unit_tests/skills/init.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/tests/unit_tests/skills/init.py
@@ -0,0 +1 @@
+"""Skills unit tests."""
--- a/deepagents_sourcecode/libs/deepagents-cli/tests/unit_tests/skills/test_commands.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/tests/unit_tests/skills/test_commands.py
@@ -0,0 +1,217 @@
+"""Unit tests for skills command sanitization and validation."""
+
+from pathlib import Path
+
+import pytest
+
+from deepagents_cli.skills.commands import _validate_name, _validate_skill_path
+
+
+class TestValidateSkillName:
+    """Test skill name validation per Agent Skills spec (https://agentskills.io/specification)."""
+
+    def test_valid_skill_names(self):
+        """Test that spec-compliant skill names are accepted.
+
+        Per spec: lowercase alphanumeric, hyphens only, no start/end hyphen,
+        no consecutive hyphens, max 64 chars.
+        """
+        valid_names = [
+            "web-research",
+            "langgraph-docs",
+            "skill123",
+            "skill-with-many-parts",
+            "a",
+            "a1",
+            "code-review",
+            "data-analysis",
+        ]
+        for name in valid_names:
+            is_valid, error = _validate_name(name)
+            assert is_valid, f"Valid name '{name}' was rejected: {error}"
+            assert error == ""
+
+    def test_invalid_names_per_spec(self):
+        """Test that non-spec-compliant names are rejected."""
+        invalid_names = [
+            ("MySkill", "uppercase not allowed"),
+            ("my_skill", "underscores not allowed"),
+            ("skill_with_underscores", "underscores not allowed"),
+            ("-skill", "cannot start with hyphen"),
+            ("skill-", "cannot end with hyphen"),
+            ("skill--name", "consecutive hyphens not allowed"),
+        ]
+        for name, reason in invalid_names:
+            is_valid, error = _validate_name(name)
+            assert not is_valid, f"Invalid name '{name}' ({reason}) was accepted"
+            assert error != ""
+
+    def test_path_traversal_attacks(self):
+        """Test that path traversal attempts are blocked."""
+        malicious_names = [
+            "../../../etc/passwd",
+            "../../.ssh/authorized_keys",
+            "../.bashrc",
+            "..\\..\\windows\\system32",
+            "skill/../../../etc",
+            "../../tmp/exploit",
+            "../..",
+            "..",
+        ]
+        for name in malicious_names:
+            is_valid, error = _validate_name(name)
+            assert not is_valid, f"Malicious name '{name}' was accepted"
+            assert error != ""
+            assert "path" in error.lower() or ".." in error
+
+    def test_absolute_paths(self):
+        """Test that absolute paths are blocked."""
+        malicious_names = [
+            "/etc/passwd",
+            "/home/user/.ssh",
+            "\\Windows\\System32",
+            "/tmp/exploit",
+        ]
+        for name in malicious_names:
+            is_valid, error = _validate_name(name)
+            assert not is_valid, f"Absolute path '{name}' was accepted"
+            assert error != ""
+
+    def test_path_separators(self):
+        """Test that path separators are blocked."""
+        malicious_names = [
+            "skill/name",
+            "skill\\name",
+            "path/to/skill",
+            "parent\\child",
+        ]
+        for name in malicious_names:
+            is_valid, error = _validate_name(name)
+            assert not is_valid, f"Path with separator '{name}' was accepted"
+            assert error != ""
+
+    def test_invalid_characters(self):
+        """Test that invalid characters are blocked."""
+        malicious_names = [
+            "skill name",  # space
+            "skill;rm -rf /",  # command injection
+            "skill`whoami`",  # command substitution
+            "skill$(whoami)",  # command substitution
+            "skill&ls",  # command chaining
+            "skill|cat",  # pipe
+            "skill>file",  # redirect
+            "skill<file",  # redirect
+            "skill*",  # wildcard
+            "skill?",  # wildcard
+            "skill[a]",  # pattern
+            "skill{a,b}",  # brace expansion
+            "skill$VAR",  # variable expansion
+            "skill@host",  # at sign
+            "skill#comment",  # hash
+            "skill!event",  # exclamation
+            "skill'quote",  # single quote
+            'skill"quote',  # double quote
+        ]
+        for name in malicious_names:
+            is_valid, error = _validate_name(name)
+            assert not is_valid, f"Invalid character in '{name}' was accepted"
+            assert error != ""
+
+    def test_empty_names(self):
+        """Test that empty or whitespace names are blocked."""
+        malicious_names = [
+            "",
+            "   ",
+            "\t",
+            "\n",
+        ]
+        for name in malicious_names:
+            is_valid, error = _validate_name(name)
+            assert not is_valid, f"Empty/whitespace name '{name}' was accepted"
+            assert error != ""
+
+
+class TestValidateSkillPath:
+    """Test skill path validation to ensure paths stay within bounds."""
+
+    def test_valid_path_within_base(self, tmp_path: Path) -> None:
+        """Test that valid paths within base directory are accepted."""
+        base_dir = tmp_path / "skills"
+        base_dir.mkdir()
+
+        skill_dir = base_dir / "my-skill"
+        is_valid, error = _validate_skill_path(skill_dir, base_dir)
+        assert is_valid, f"Valid path was rejected: {error}"
+        assert error == ""
+
+    def test_path_traversal_outside_base(self, tmp_path: Path) -> None:
+        """Test that paths outside base directory are blocked."""
+        base_dir = tmp_path / "skills"
+        base_dir.mkdir()
+
+        # Try to escape to parent directory
+        malicious_dir = tmp_path / "malicious"
+        is_valid, error = _validate_skill_path(malicious_dir, base_dir)
+        assert not is_valid, "Path outside base directory was accepted"
+        assert error != ""
+
+    def test_symlink_path_traversal(self, tmp_path: Path) -> None:
+        """Test that symlinks pointing outside base are detected."""
+        base_dir = tmp_path / "skills"
+        base_dir.mkdir()
+
+        outside_dir = tmp_path / "outside"
+        outside_dir.mkdir()
+
+        symlink_path = base_dir / "evil-link"
+        try:
+            symlink_path.symlink_to(outside_dir)
+
+            is_valid, error = _validate_skill_path(symlink_path, base_dir)
+            # The symlink resolves to outside the base, so it should be blocked
+            assert not is_valid, "Symlink to outside directory was accepted"
+            assert error != ""
+        except OSError:
+            # Symlink creation might fail on some systems
+            pytest.skip("Symlink creation not supported")
+
+    def test_nonexistent_path_validation(self, tmp_path: Path) -> None:
+        """Test validation of paths that don't exist yet."""
+        base_dir = tmp_path / "skills"
+        base_dir.mkdir()
+
+        # Path doesn't exist yet, but should be valid
+        skill_dir = base_dir / "new-skill"
+        is_valid, error = _validate_skill_path(skill_dir, base_dir)
+        assert is_valid, f"Valid non-existent path was rejected: {error}"
+        assert error == ""
+
+
+class TestIntegrationSecurity:
+    """Integration tests for security across the command flow."""
+
+    def test_combined_validation(self, tmp_path: Path) -> None:
+        """Test that both name and path validation work together."""
+        base_dir = tmp_path / "skills"
+        base_dir.mkdir()
+
+        # Test various attack scenarios
+        attack_vectors = [
+            ("../../../etc/passwd", "path traversal"),
+            ("/etc/passwd", "absolute path"),
+            ("skill/../../../tmp", "hidden traversal"),
+            ("skill;rm -rf", "command injection"),
+        ]
+
+        for skill_name, attack_type in attack_vectors:
+            # First, name validation should catch it
+            is_valid_name, name_error = _validate_name(skill_name)
+
+            if is_valid_name:
+                # If name validation doesn't catch it, path validation must
+                skill_dir = base_dir / skill_name
+                is_valid_path, _path_error = _validate_skill_path(skill_dir, base_dir)
+                assert not is_valid_path, f"{attack_type} bypassed both validations: {skill_name}"
+            else:
+                # Name validation caught it - this is good
+                assert name_error != "", f"No error message for {attack_type}"
--- a/deepagents_sourcecode/libs/deepagents-cli/tests/unit_tests/skills/test_load.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/tests/unit_tests/skills/test_load.py
@@ -0,0 +1,292 @@
+"""Unit tests for skills loading functionality."""
+
+from pathlib import Path
+
+from deepagents_cli.skills.load import list_skills
+
+
+class TestListSkillsSingleDirectory:
+    """Test list_skills function for loading skills from a single directory."""
+
+    def test_list_skills_empty_directory(self, tmp_path: Path) -> None:
+        """Test listing skills from an empty directory."""
+        skills_dir = tmp_path / "skills"
+        skills_dir.mkdir()
+
+        skills = list_skills(user_skills_dir=skills_dir, project_skills_dir=None)
+        assert skills == []
+
+    def test_list_skills_with_valid_skill(self, tmp_path: Path) -> None:
+        """Test listing a valid skill with proper YAML frontmatter."""
+        skills_dir = tmp_path / "skills"
+        skills_dir.mkdir()
+
+        skill_dir = skills_dir / "test-skill"
+        skill_dir.mkdir()
+
+        skill_md = skill_dir / "SKILL.md"
+        skill_md.write_text("""---
+name: test-skill
+description: A test skill
+---
+
+# Test Skill
+
+This is a test skill.
+""")
+
+        skills = list_skills(user_skills_dir=skills_dir, project_skills_dir=None)
+        assert len(skills) == 1
+        assert skills[0]["name"] == "test-skill"
+        assert skills[0]["description"] == "A test skill"
+        assert skills[0]["source"] == "user"
+        assert Path(skills[0]["path"]) == skill_md
+
+    def test_list_skills_source_parameter(self, tmp_path: Path) -> None:
+        """Test that source parameter is correctly set for project skills."""
+        skills_dir = tmp_path / "skills"
+        skills_dir.mkdir()
+
+        skill_dir = skills_dir / "project-skill"
+        skill_dir.mkdir()
+
+        skill_md = skill_dir / "SKILL.md"
+        skill_md.write_text("""---
+name: project-skill
+description: A project skill
+---
+
+# Project Skill
+""")
+
+        # Test with project source
+        skills = list_skills(user_skills_dir=None, project_skills_dir=skills_dir)
+        assert len(skills) == 1
+        assert skills[0]["source"] == "project"
+
+    def test_list_skills_missing_frontmatter(self, tmp_path: Path) -> None:
+        """Test that skills without YAML frontmatter are skipped."""
+        skills_dir = tmp_path / "skills"
+        skills_dir.mkdir()
+
+        skill_dir = skills_dir / "invalid-skill"
+        skill_dir.mkdir()
+
+        skill_md = skill_dir / "SKILL.md"
+        skill_md.write_text("# Invalid Skill\n\nNo frontmatter here.")
+
+        skills = list_skills(user_skills_dir=skills_dir, project_skills_dir=None)
+        assert skills == []
+
+    def test_list_skills_missing_required_fields(self, tmp_path: Path) -> None:
+        """Test that skills with incomplete frontmatter are skipped."""
+        skills_dir = tmp_path / "skills"
+        skills_dir.mkdir()
+
+        # Missing description
+        skill_dir_1 = skills_dir / "incomplete-1"
+        skill_dir_1.mkdir()
+        (skill_dir_1 / "SKILL.md").write_text("""---
+name: incomplete-1
+---
+Content
+""")
+
+        # Missing name
+        skill_dir_2 = skills_dir / "incomplete-2"
+        skill_dir_2.mkdir()
+        (skill_dir_2 / "SKILL.md").write_text("""---
+description: Missing name
+---
+Content
+""")
+
+        skills = list_skills(user_skills_dir=skills_dir, project_skills_dir=None)
+        assert skills == []
+
+    def test_list_skills_nonexistent_directory(self, tmp_path: Path) -> None:
+        """Test listing skills from a non-existent directory."""
+        skills_dir = tmp_path / "nonexistent"
+        skills = list_skills(user_skills_dir=skills_dir, project_skills_dir=None)
+        assert skills == []
+
+
+class TestListSkillsMultipleDirectories:
+    """Test list_skills function for loading from multiple directories."""
+
+    def test_list_skills_user_only(self, tmp_path: Path) -> None:
+        """Test loading skills from user directory only."""
+        user_dir = tmp_path / "user_skills"
+        user_dir.mkdir()
+
+        skill_dir = user_dir / "user-skill"
+        skill_dir.mkdir()
+        (skill_dir / "SKILL.md").write_text("""---
+name: user-skill
+description: A user skill
+---
+Content
+""")
+
+        skills = list_skills(user_skills_dir=user_dir, project_skills_dir=None)
+        assert len(skills) == 1
+        assert skills[0]["name"] == "user-skill"
+        assert skills[0]["source"] == "user"
+
+    def test_list_skills_project_only(self, tmp_path: Path) -> None:
+        """Test loading skills from project directory only."""
+        project_dir = tmp_path / "project_skills"
+        project_dir.mkdir()
+
+        skill_dir = project_dir / "project-skill"
+        skill_dir.mkdir()
+        (skill_dir / "SKILL.md").write_text("""---
+name: project-skill
+description: A project skill
+---
+Content
+""")
+
+        skills = list_skills(user_skills_dir=None, project_skills_dir=project_dir)
+        assert len(skills) == 1
+        assert skills[0]["name"] == "project-skill"
+        assert skills[0]["source"] == "project"
+
+    def test_list_skills_both_sources(self, tmp_path: Path) -> None:
+        """Test loading skills from both user and project directories."""
+        user_dir = tmp_path / "user_skills"
+        user_dir.mkdir()
+        project_dir = tmp_path / "project_skills"
+        project_dir.mkdir()
+
+        # User skill
+        user_skill_dir = user_dir / "user-skill"
+        user_skill_dir.mkdir()
+        (user_skill_dir / "SKILL.md").write_text("""---
+name: user-skill
+description: A user skill
+---
+Content
+""")
+
+        # Project skill
+        project_skill_dir = project_dir / "project-skill"
+        project_skill_dir.mkdir()
+        (project_skill_dir / "SKILL.md").write_text("""---
+name: project-skill
+description: A project skill
+---
+Content
+""")
+
+        skills = list_skills(user_skills_dir=user_dir, project_skills_dir=project_dir)
+        assert len(skills) == 2
+
+        skill_names = {s["name"] for s in skills}
+        assert "user-skill" in skill_names
+        assert "project-skill" in skill_names
+
+        # Verify sources
+        user_skill = next(s for s in skills if s["name"] == "user-skill")
+        project_skill = next(s for s in skills if s["name"] == "project-skill")
+        assert user_skill["source"] == "user"
+        assert project_skill["source"] == "project"
+
+    def test_list_skills_project_overrides_user(self, tmp_path: Path) -> None:
+        """Test that project skills override user skills with the same name."""
+        user_dir = tmp_path / "user_skills"
+        user_dir.mkdir()
+        project_dir = tmp_path / "project_skills"
+        project_dir.mkdir()
+
+        # User skill
+        user_skill_dir = user_dir / "shared-skill"
+        user_skill_dir.mkdir()
+        (user_skill_dir / "SKILL.md").write_text("""---
+name: shared-skill
+description: User version
+---
+Content
+""")
+
+        # Project skill with same name
+        project_skill_dir = project_dir / "shared-skill"
+        project_skill_dir.mkdir()
+        (project_skill_dir / "SKILL.md").write_text("""---
+name: shared-skill
+description: Project version
+---
+Content
+""")
+
+        skills = list_skills(user_skills_dir=user_dir, project_skills_dir=project_dir)
+        assert len(skills) == 1  # Only one skill with this name
+
+        skill = skills[0]
+        assert skill["name"] == "shared-skill"
+        assert skill["description"] == "Project version"
+        assert skill["source"] == "project"
+
+    def test_list_skills_empty_directories(self, tmp_path: Path) -> None:
+        """Test loading from empty directories."""
+        user_dir = tmp_path / "user_skills"
+        user_dir.mkdir()
+        project_dir = tmp_path / "project_skills"
+        project_dir.mkdir()
+
+        skills = list_skills(user_skills_dir=user_dir, project_skills_dir=project_dir)
+        assert skills == []
+
+    def test_list_skills_no_directories(self):
+        """Test loading with no directories specified."""
+        skills = list_skills(user_skills_dir=None, project_skills_dir=None)
+        assert skills == []
+
+    def test_list_skills_multiple_user_skills(self, tmp_path: Path) -> None:
+        """Test loading multiple skills from user directory."""
+        user_dir = tmp_path / "user_skills"
+        user_dir.mkdir()
+
+        # Create multiple skills
+        for i in range(3):
+            skill_dir = user_dir / f"skill-{i}"
+            skill_dir.mkdir()
+            (skill_dir / "SKILL.md").write_text(f"""---
+name: skill-{i}
+description: Skill number {i}
+---
+Content
+""")
+
+        skills = list_skills(user_skills_dir=user_dir, project_skills_dir=None)
+        assert len(skills) == 3
+        skill_names = {s["name"] for s in skills}
+        assert skill_names == {"skill-0", "skill-1", "skill-2"}
+
+    def test_list_skills_mixed_valid_invalid(self, tmp_path: Path) -> None:
+        """Test loading with a mix of valid and invalid skills."""
+        user_dir = tmp_path / "user_skills"
+        user_dir.mkdir()
+
+        # Valid skill
+        valid_skill_dir = user_dir / "valid-skill"
+        valid_skill_dir.mkdir()
+        (valid_skill_dir / "SKILL.md").write_text("""---
+name: valid-skill
+description: A valid skill
+---
+Content
+""")
+
+        # Invalid skill (missing description)
+        invalid_skill_dir = user_dir / "invalid-skill"
+        invalid_skill_dir.mkdir()
+        (invalid_skill_dir / "SKILL.md").write_text("""---
+name: invalid-skill
+---
+Content
+""")
+
+        skills = list_skills(user_skills_dir=user_dir, project_skills_dir=None)
+        assert len(skills) == 1
+        assert skills[0]["name"] == "valid-skill"
--- a/deepagents_sourcecode/libs/deepagents-cli/tests/unit_tests/test_agent.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/tests/unit_tests/test_agent.py
@@ -0,0 +1,267 @@
+"""Unit tests for agent formatting functions."""
+
+from pathlib import Path
+from unittest.mock import Mock
+
+from deepagents_cli.agent import (
+    _format_edit_file_description,
+    _format_execute_description,
+    _format_fetch_url_description,
+    _format_shell_description,
+    _format_task_description,
+    _format_web_search_description,
+    _format_write_file_description,
+)
+
+
+def test_format_write_file_description_create_new_file(tmp_path: Path) -> None:
+    """Test write_file description for creating a new file."""
+    new_file = tmp_path / "new_file.py"
+    tool_call = {
+        "name": "write_file",
+        "args": {
+            "file_path": str(new_file),
+            "content": "def hello():\n    return 'world'\n",
+        },
+        "id": "call-1",
+    }
+
+    state = Mock()
+    runtime = Mock()
+
+    description = _format_write_file_description(tool_call, state, runtime)
+
+    assert f"File: {new_file}" in description
+    assert "Action: Create file" in description
+    assert "Lines: 2" in description
+
+
+def test_format_write_file_description_overwrite_existing_file(tmp_path: Path) -> None:
+    """Test write_file description for overwriting an existing file."""
+    existing_file = tmp_path / "existing.py"
+    existing_file.write_text("old content")
+
+    tool_call = {
+        "name": "write_file",
+        "args": {
+            "file_path": str(existing_file),
+            "content": "line1\nline2\nline3\n",
+        },
+        "id": "call-2",
+    }
+
+    state = Mock()
+    runtime = Mock()
+
+    description = _format_write_file_description(tool_call, state, runtime)
+
+    assert f"File: {existing_file}" in description
+    assert "Action: Overwrite file" in description
+    assert "Lines: 3" in description
+
+
+def test_format_edit_file_description_single_occurrence():
+    """Test edit_file description for single occurrence replacement."""
+    tool_call = {
+        "name": "edit_file",
+        "args": {
+            "file_path": "/path/to/file.py",
+            "old_string": "foo",
+            "new_string": "bar",
+            "replace_all": False,
+        },
+        "id": "call-3",
+    }
+
+    state = Mock()
+    runtime = Mock()
+
+    description = _format_edit_file_description(tool_call, state, runtime)
+
+    assert "File: /path/to/file.py" in description
+    assert "Action: Replace text (single occurrence)" in description
+
+
+def test_format_edit_file_description_all_occurrences():
+    """Test edit_file description for replacing all occurrences."""
+    tool_call = {
+        "name": "edit_file",
+        "args": {
+            "file_path": "/path/to/file.py",
+            "old_string": "foo",
+            "new_string": "bar",
+            "replace_all": True,
+        },
+        "id": "call-4",
+    }
+
+    state = Mock()
+    runtime = Mock()
+
+    description = _format_edit_file_description(tool_call, state, runtime)
+
+    assert "File: /path/to/file.py" in description
+    assert "Action: Replace text (all occurrences)" in description
+
+
+def test_format_web_search_description():
+    """Test web_search description formatting."""
+    tool_call = {
+        "name": "web_search",
+        "args": {
+            "query": "python async programming",
+            "max_results": 10,
+        },
+        "id": "call-5",
+    }
+
+    state = Mock()
+    runtime = Mock()
+
+    description = _format_web_search_description(tool_call, state, runtime)
+
+    assert "Query: python async programming" in description
+    assert "Max results: 10" in description
+    assert "⚠️  This will use Tavily API credits" in description
+
+
+def test_format_web_search_description_default_max_results():
+    """Test web_search description with default max_results."""
+    tool_call = {
+        "name": "web_search",
+        "args": {
+            "query": "langchain tutorial",
+        },
+        "id": "call-6",
+    }
+
+    state = Mock()
+    runtime = Mock()
+
+    description = _format_web_search_description(tool_call, state, runtime)
+
+    assert "Query: langchain tutorial" in description
+    assert "Max results: 5" in description
+
+
+def test_format_fetch_url_description():
+    """Test fetch_url description formatting."""
+    tool_call = {
+        "name": "fetch_url",
+        "args": {
+            "url": "https://example.com/docs",
+            "timeout": 60,
+        },
+        "id": "call-7",
+    }
+
+    state = Mock()
+    runtime = Mock()
+
+    description = _format_fetch_url_description(tool_call, state, runtime)
+
+    assert "URL: https://example.com/docs" in description
+    assert "Timeout: 60s" in description
+    assert "⚠️  Will fetch and convert web content to markdown" in description
+
+
+def test_format_fetch_url_description_default_timeout():
+    """Test fetch_url description with default timeout."""
+    tool_call = {
+        "name": "fetch_url",
+        "args": {
+            "url": "https://api.example.com",
+        },
+        "id": "call-8",
+    }
+
+    state = Mock()
+    runtime = Mock()
+
+    description = _format_fetch_url_description(tool_call, state, runtime)
+
+    assert "URL: https://api.example.com" in description
+    assert "Timeout: 30s" in description
+
+
+def test_format_task_description():
+    """Test task (subagent) description formatting."""
+    tool_call = {
+        "name": "task",
+        "args": {
+            "description": "Analyze code structure and identify the main components.",
+            "subagent_type": "general-purpose",
+        },
+        "id": "call-9",
+    }
+
+    state = Mock()
+    runtime = Mock()
+
+    description = _format_task_description(tool_call, state, runtime)
+
+    assert "Subagent Type: general-purpose" in description
+    assert "Task Instructions:" in description
+    assert "Analyze code structure and identify the main components." in description
+    assert "⚠️  Subagent will have access to file operations and shell commands" in description
+
+
+def test_format_task_description_truncates_long_description():
+    """Test task description truncates long descriptions."""
+    long_description = "x" * 600  # 600 characters
+    tool_call = {
+        "name": "task",
+        "args": {
+            "description": long_description,
+            "subagent_type": "general-purpose",
+        },
+        "id": "call-10",
+    }
+
+    state = Mock()
+    runtime = Mock()
+
+    description = _format_task_description(tool_call, state, runtime)
+
+    assert "Subagent Type: general-purpose" in description
+    assert "..." in description
+    # Description should be truncated to 500 chars + "..."
+    assert len(description) < len(long_description) + 300
+
+
+def test_format_shell_description():
+    """Test shell command description formatting."""
+    tool_call = {
+        "name": "shell",
+        "args": {
+            "command": "ls -la /tmp",
+        },
+        "id": "call-11",
+    }
+
+    state = Mock()
+    runtime = Mock()
+
+    description = _format_shell_description(tool_call, state, runtime)
+
+    assert "Shell Command: ls -la /tmp" in description
+    assert "Working Directory:" in description
+
+
+def test_format_execute_description():
+    """Test execute command description formatting."""
+    tool_call = {
+        "name": "execute",
+        "args": {
+            "command": "python script.py",
+        },
+        "id": "call-12",
+    }
+
+    state = Mock()
+    runtime = Mock()
+
+    description = _format_execute_description(tool_call, state, runtime)
+
+    assert "Execute Command: python script.py" in description
+    assert "Location: Remote Sandbox" in description
--- a/deepagents_sourcecode/libs/deepagents-cli/tests/unit_tests/test_config.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/tests/unit_tests/test_config.py
@@ -0,0 +1,109 @@
+"""Tests for config module including project discovery utilities."""
+
+from pathlib import Path
+
+from deepagents_cli.config import _find_project_agent_md, _find_project_root
+
+
+class TestProjectRootDetection:
+    """Test project root detection via .git directory."""
+
+    def test_find_project_root_with_git(self, tmp_path: Path) -> None:
+        """Test that project root is found when .git directory exists."""
+        # Create a mock project structure
+        project_root = tmp_path / "my-project"
+        project_root.mkdir()
+        git_dir = project_root / ".git"
+        git_dir.mkdir()
+
+        # Create a subdirectory to search from
+        subdir = project_root / "src" / "components"
+        subdir.mkdir(parents=True)
+
+        # Should find project root from subdirectory
+        result = _find_project_root(subdir)
+        assert result == project_root
+
+    def test_find_project_root_no_git(self, tmp_path: Path) -> None:
+        """Test that None is returned when no .git directory exists."""
+        # Create directory without .git
+        no_git_dir = tmp_path / "no-git"
+        no_git_dir.mkdir()
+
+        result = _find_project_root(no_git_dir)
+        assert result is None
+
+    def test_find_project_root_nested_git(self, tmp_path: Path) -> None:
+        """Test that nearest .git directory is found (not parent repos)."""
+        # Create nested git repos
+        outer_repo = tmp_path / "outer"
+        outer_repo.mkdir()
+        (outer_repo / ".git").mkdir()
+
+        inner_repo = outer_repo / "inner"
+        inner_repo.mkdir()
+        (inner_repo / ".git").mkdir()
+
+        # Should find inner repo, not outer
+        result = _find_project_root(inner_repo)
+        assert result == inner_repo
+
+
+class TestProjectAgentMdFinding:
+    """Test finding project-specific agent.md files."""
+
+    def test_find_agent_md_in_deepagents_dir(self, tmp_path: Path) -> None:
+        """Test finding agent.md in .deepagents/ directory."""
+        project_root = tmp_path / "project"
+        project_root.mkdir()
+
+        # Create .deepagents/agent.md
+        deepagents_dir = project_root / ".deepagents"
+        deepagents_dir.mkdir()
+        agent_md = deepagents_dir / "agent.md"
+        agent_md.write_text("Project instructions")
+
+        result = _find_project_agent_md(project_root)
+        assert len(result) == 1
+        assert result[0] == agent_md
+
+    def test_find_agent_md_in_root(self, tmp_path: Path) -> None:
+        """Test finding agent.md in project root (fallback)."""
+        project_root = tmp_path / "project"
+        project_root.mkdir()
+
+        # Create root-level agent.md (no .deepagents/)
+        agent_md = project_root / "agent.md"
+        agent_md.write_text("Project instructions")
+
+        result = _find_project_agent_md(project_root)
+        assert len(result) == 1
+        assert result[0] == agent_md
+
+    def test_both_agent_md_files_combined(self, tmp_path: Path) -> None:
+        """Test that both agent.md files are returned when both exist."""
+        project_root = tmp_path / "project"
+        project_root.mkdir()
+
+        # Create both locations
+        deepagents_dir = project_root / ".deepagents"
+        deepagents_dir.mkdir()
+        deepagents_md = deepagents_dir / "agent.md"
+        deepagents_md.write_text("In .deepagents/")
+
+        root_md = project_root / "agent.md"
+        root_md.write_text("In root")
+
+        # Should return both, with .deepagents/ first
+        result = _find_project_agent_md(project_root)
+        assert len(result) == 2
+        assert result[0] == deepagents_md
+        assert result[1] == root_md
+
+    def test_find_agent_md_not_found(self, tmp_path: Path) -> None:
+        """Test that empty list is returned when no agent.md exists."""
+        project_root = tmp_path / "project"
+        project_root.mkdir()
+
+        result = _find_project_agent_md(project_root)
+        assert result == []
--- a/deepagents_sourcecode/libs/deepagents-cli/tests/unit_tests/test_end_to_end.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/tests/unit_tests/test_end_to_end.py
@@ -0,0 +1,335 @@
+"""End-to-end unit tests for deepagents-cli with fake LLM models."""
+
+import uuid
+from collections.abc import Callable, Generator, Sequence
+from contextlib import contextmanager
+from pathlib import Path
+from typing import Any
+from unittest.mock import patch
+
+from deepagents.backends import CompositeBackend
+from deepagents.backends.filesystem import FilesystemBackend
+from langchain_core.language_models import LanguageModelInput
+from langchain_core.language_models.fake_chat_models import GenericFakeChatModel
+from langchain_core.messages import AIMessage, HumanMessage
+from langchain_core.runnables import Runnable
+from langchain_core.tools import BaseTool, tool
+
+from deepagents_cli.agent import create_cli_agent
+
+
+@tool(description="Sample tool")
+def sample_tool(sample_input: str) -> str:
+    """A sample tool that returns the input string."""
+    return sample_input
+
+
+class FixedGenericFakeChatModel(GenericFakeChatModel):
+    """Fixed version of GenericFakeChatModel that properly handles bind_tools."""
+
+    def bind_tools(
+        self,
+        tools: Sequence[dict[str, Any] | type | Callable | BaseTool],
+        *,
+        tool_choice: str | None = None,
+        **kwargs: Any,
+    ) -> Runnable[LanguageModelInput, AIMessage]:
+        """Override bind_tools to return self."""
+        return self
+
+
+@contextmanager
+def mock_settings(tmp_path: Path, assistant_id: str = "test-agent") -> Generator[Path, None, None]:
+    """Context manager for patching CLI settings with temporary directories.
+
+    Args:
+        tmp_path: Temporary directory path (typically from pytest's tmp_path fixture)
+        assistant_id: Agent identifier for directory setup
+
+    Yields:
+        The agent directory path
+    """
+    # Setup directory structure
+    agent_dir = tmp_path / "agents" / assistant_id
+    agent_dir.mkdir(parents=True)
+    agent_md = agent_dir / "agent.md"
+    agent_md.write_text("# Test Agent\nTest agent instructions.")
+
+    skills_dir = tmp_path / "skills"
+    skills_dir.mkdir(parents=True)
+
+    # Patch settings
+    with patch("deepagents_cli.agent.settings") as mock_settings_obj:
+        mock_settings_obj.user_deepagents_dir = tmp_path / "agents"
+        mock_settings_obj.ensure_agent_dir.return_value = agent_dir
+        mock_settings_obj.ensure_user_skills_dir.return_value = skills_dir
+        mock_settings_obj.get_project_skills_dir.return_value = None
+
+        # Mock methods that get called during agent execution to return real Path objects
+        # This prevents MagicMock objects from being stored in state (which would fail serialization)
+        def get_user_agent_md_path(agent_id: str) -> Path:
+            return tmp_path / "agents" / agent_id / "agent.md"
+
+        def get_agent_dir(agent_id: str) -> Path:
+            return tmp_path / "agents" / agent_id
+
+        mock_settings_obj.get_user_agent_md_path = get_user_agent_md_path
+        mock_settings_obj.get_project_agent_md_path.return_value = None
+        mock_settings_obj.get_agent_dir = get_agent_dir
+        mock_settings_obj.project_root = None
+
+        yield agent_dir
+
+
+class TestDeepAgentsCLIEndToEnd:
+    """Test suite for end-to-end deepagents-cli functionality with fake LLM."""
+
+    def test_cli_agent_with_fake_llm_basic(self, tmp_path: Path) -> None:
+        """Test basic CLI agent functionality with a fake LLM model.
+
+        This test verifies that a CLI agent can be created and invoked with
+        a fake LLM model that returns predefined responses.
+        """
+        with mock_settings(tmp_path):
+            # Create a fake model that returns predefined messages
+            model = FixedGenericFakeChatModel(
+                messages=iter(
+                    [
+                        AIMessage(
+                            content="I'll help you with that.",
+                            tool_calls=[
+                                {
+                                    "name": "write_todos",
+                                    "args": {"todos": []},
+                                    "id": "call_1",
+                                    "type": "tool_call",
+                                }
+                            ],
+                        ),
+                        AIMessage(
+                            content="Task completed successfully!",
+                        ),
+                    ]
+                )
+            )
+
+            # Create a CLI agent with the fake model
+            agent, backend = create_cli_agent(
+                model=model,
+                assistant_id="test-agent",
+                tools=[],
+            )
+
+            # Invoke the agent with a simple message
+            result = agent.invoke(
+                {"messages": [HumanMessage(content="Hello, agent!")]},
+                {"configurable": {"thread_id": str(uuid.uuid4())}},
+            )
+
+            # Verify the agent executed correctly
+            assert "messages" in result
+            assert len(result["messages"]) > 0
+
+            # Verify we got AI responses
+            ai_messages = [msg for msg in result["messages"] if msg.type == "ai"]
+            assert len(ai_messages) > 0
+
+            # Verify the final AI message contains our expected content
+            final_ai_message = ai_messages[-1]
+            assert "Task completed successfully!" in final_ai_message.content
+
+    def test_cli_agent_with_fake_llm_with_tools(self, tmp_path: Path) -> None:
+        """Test CLI agent with tools using a fake LLM model.
+
+        This test verifies that a CLI agent can handle tool calls correctly
+        when using a fake LLM model.
+        """
+        with mock_settings(tmp_path):
+            # Create a fake model that calls sample_tool
+            model = FixedGenericFakeChatModel(
+                messages=iter(
+                    [
+                        AIMessage(
+                            content="",
+                            tool_calls=[
+                                {
+                                    "name": "sample_tool",
+                                    "args": {"sample_input": "test input"},
+                                    "id": "call_1",
+                                    "type": "tool_call",
+                                }
+                            ],
+                        ),
+                        AIMessage(
+                            content="I called the sample_tool with 'test input'.",
+                        ),
+                    ]
+                )
+            )
+
+            # Create a CLI agent with the fake model and sample_tool
+            agent, backend = create_cli_agent(
+                model=model,
+                assistant_id="test-agent",
+                tools=[sample_tool],
+            )
+
+            # Invoke the agent
+            result = agent.invoke(
+                {"messages": [HumanMessage(content="Use the sample tool")]},
+                {"configurable": {"thread_id": "test-thread-2"}},
+            )
+
+            # Verify the agent executed correctly
+            assert "messages" in result
+
+            # Verify tool was called
+            tool_messages = [msg for msg in result["messages"] if msg.type == "tool"]
+            assert len(tool_messages) > 0
+
+            # Verify the tool message contains our expected input
+            assert any("test input" in msg.content for msg in tool_messages)
+
+    def test_cli_agent_with_fake_llm_filesystem_tool(self, tmp_path: Path) -> None:
+        """Test CLI agent with filesystem tools using a fake LLM model.
+
+        This test verifies that a CLI agent can use the built-in filesystem
+        tools (ls, read_file, etc.) with a fake LLM model.
+        """
+        with mock_settings(tmp_path):
+            # Create a test file to list
+            test_file = tmp_path / "test.txt"
+            test_file.write_text("test content")
+
+            # Create a fake model that uses filesystem tools
+            model = FixedGenericFakeChatModel(
+                messages=iter(
+                    [
+                        AIMessage(
+                            content="",
+                            tool_calls=[
+                                {
+                                    "name": "ls",
+                                    "args": {"path": str(tmp_path)},
+                                    "id": "call_1",
+                                    "type": "tool_call",
+                                }
+                            ],
+                        ),
+                        AIMessage(
+                            content="I've listed the files in the directory.",
+                        ),
+                    ]
+                )
+            )
+
+            # Create a CLI agent with the fake model
+            agent, backend = create_cli_agent(
+                model=model,
+                assistant_id="test-agent",
+                tools=[],
+            )
+
+            # Invoke the agent
+            result = agent.invoke(
+                {"messages": [HumanMessage(content="List files")]},
+                {"configurable": {"thread_id": "test-thread-3"}},
+            )
+
+            # Verify the agent executed correctly
+            assert "messages" in result
+
+            # Verify ls tool was called
+            tool_messages = [msg for msg in result["messages"] if msg.type == "tool"]
+            assert len(tool_messages) > 0
+
+    def test_cli_agent_with_fake_llm_multiple_tool_calls(self, tmp_path: Path) -> None:
+        """Test CLI agent with multiple tool calls using a fake LLM model.
+
+        This test verifies that a CLI agent can handle multiple sequential
+        tool calls with a fake LLM model.
+        """
+        with mock_settings(tmp_path):
+            # Create a fake model that makes multiple tool calls
+            model = FixedGenericFakeChatModel(
+                messages=iter(
+                    [
+                        AIMessage(
+                            content="",
+                            tool_calls=[
+                                {
+                                    "name": "sample_tool",
+                                    "args": {"sample_input": "first call"},
+                                    "id": "call_1",
+                                    "type": "tool_call",
+                                }
+                            ],
+                        ),
+                        AIMessage(
+                            content="",
+                            tool_calls=[
+                                {
+                                    "name": "sample_tool",
+                                    "args": {"sample_input": "second call"},
+                                    "id": "call_2",
+                                    "type": "tool_call",
+                                }
+                            ],
+                        ),
+                        AIMessage(
+                            content="I completed both tool calls successfully.",
+                        ),
+                    ]
+                )
+            )
+
+            # Create a CLI agent with the fake model and sample_tool
+            agent, backend = create_cli_agent(
+                model=model,
+                assistant_id="test-agent",
+                tools=[sample_tool],
+            )
+
+            # Invoke the agent
+            result = agent.invoke(
+                {"messages": [HumanMessage(content="Use sample tool twice")]},
+                {"configurable": {"thread_id": "test-thread-4"}},
+            )
+
+            # Verify the agent executed correctly
+            assert "messages" in result
+
+            # Verify multiple tool calls occurred
+            tool_messages = [msg for msg in result["messages"] if msg.type == "tool"]
+            assert len(tool_messages) >= 2
+
+            # Verify both inputs were used
+            tool_contents = [msg.content for msg in tool_messages]
+            assert any("first call" in content for content in tool_contents)
+            assert any("second call" in content for content in tool_contents)
+
+    def test_cli_agent_backend_setup(self, tmp_path: Path) -> None:
+        """Test that CLI agent creates the correct backend setup.
+
+        This test verifies that the backend is properly configured with
+        a CompositeBackend containing a FilesystemBackend.
+        """
+        with mock_settings(tmp_path):
+            # Create a simple fake model
+            model = FixedGenericFakeChatModel(
+                messages=iter(
+                    [
+                        AIMessage(content="Done."),
+                    ]
+                )
+            )
+
+            # Create a CLI agent
+            agent, backend = create_cli_agent(
+                model=model,
+                assistant_id="test-agent",
+                tools=[],
+            )
+
+            assert isinstance(backend, CompositeBackend)
+            assert isinstance(backend.default, FilesystemBackend)
--- a/deepagents_sourcecode/libs/deepagents-cli/tests/unit_tests/test_file_ops.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/tests/unit_tests/test_file_ops.py
@@ -0,0 +1,120 @@
+import textwrap
+from pathlib import Path
+
+from langchain_core.messages import ToolMessage
+
+from deepagents_cli.file_ops import FileOpTracker, build_approval_preview
+
+
+def test_tracker_records_read_lines(tmp_path: Path) -> None:
+    tracker = FileOpTracker(assistant_id=None)
+    path = tmp_path / "example.py"
+
+    tracker.start_operation(
+        "read_file",
+        {"file_path": str(path), "offset": 0, "limit": 100},
+        "read-1",
+    )
+
+    message = ToolMessage(
+        content="    1\tline one\n    2\tline two\n",
+        tool_call_id="read-1",
+        name="read_file",
+    )
+    record = tracker.complete_with_message(message)
+
+    assert record is not None
+    assert record.metrics.lines_read == 2
+    assert record.metrics.start_line == 1
+    assert record.metrics.end_line == 2
+
+
+def test_tracker_records_write_diff(tmp_path: Path) -> None:
+    tracker = FileOpTracker(assistant_id=None)
+    file_path = tmp_path / "created.txt"
+
+    tracker.start_operation(
+        "write_file",
+        {"file_path": str(file_path)},
+        "write-1",
+    )
+
+    file_path.write_text("hello world\nsecond line\n")
+
+    message = ToolMessage(
+        content=f"Updated file {file_path}",
+        tool_call_id="write-1",
+        name="write_file",
+    )
+    record = tracker.complete_with_message(message)
+
+    assert record is not None
+    assert record.metrics.lines_written == 2
+    assert record.metrics.lines_added == 2
+    assert record.diff is not None
+    assert "+hello world" in record.diff
+
+
+def test_tracker_records_edit_diff(tmp_path: Path) -> None:
+    tracker = FileOpTracker(assistant_id=None)
+    file_path = tmp_path / "functions.py"
+    file_path.write_text(
+        textwrap.dedent(
+            """\
+        def greet():
+            return "hello"
+        """
+        )
+    )
+
+    tracker.start_operation(
+        "edit_file",
+        {"file_path": str(file_path)},
+        "edit-1",
+    )
+
+    file_path.write_text(
+        textwrap.dedent(
+            """\
+        def greet():
+            return "hi"
+
+        def wave():
+            return "wave"
+        """
+        )
+    )
+
+    message = ToolMessage(
+        content=f"Successfully replaced 1 instance(s) of the string in '{file_path}'",
+        tool_call_id="edit-1",
+        name="edit_file",
+    )
+    record = tracker.complete_with_message(message)
+
+    assert record is not None
+    assert record.metrics.lines_added >= 1
+    assert record.metrics.lines_removed >= 1
+    assert record.diff is not None
+    assert '-    return "hello"' in record.diff
+    assert '+    return "hi"' in record.diff
+
+
+def test_build_approval_preview_generates_diff(tmp_path: Path) -> None:
+    target = tmp_path / "notes.txt"
+    target.write_text("alpha\nbeta\n")
+
+    preview = build_approval_preview(
+        "edit_file",
+        {
+            "file_path": str(target),
+            "old_string": "beta",
+            "new_string": "gamma",
+            "replace_all": False,
+        },
+        assistant_id=None,
+    )
+
+    assert preview is not None
+    assert preview.diff is not None
+    assert "+gamma" in preview.diff
--- a/deepagents_sourcecode/libs/deepagents-cli/tests/unit_tests/test_image_utils.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/tests/unit_tests/test_image_utils.py
@@ -0,0 +1,267 @@
+"""Tests for image utilities (clipboard detection, base64 encoding, multimodal content)."""
+
+import base64
+import io
+from unittest.mock import MagicMock, patch
+
+from PIL import Image
+
+from deepagents_cli.image_utils import (
+    ImageData,
+    create_multimodal_content,
+    encode_image_to_base64,
+    get_clipboard_image,
+)
+from deepagents_cli.input import ImageTracker
+
+
+class TestImageData:
+    """Tests for ImageData dataclass."""
+
+    def test_to_message_content_png(self) -> None:
+        """Test converting PNG image data to LangChain message format."""
+        image = ImageData(
+            base64_data="iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==",
+            format="png",
+            placeholder="[image 1]",
+        )
+        result = image.to_message_content()
+
+        assert result["type"] == "image_url"
+        assert "image_url" in result
+        assert result["image_url"]["url"].startswith("data:image/png;base64,")
+
+    def test_to_message_content_jpeg(self) -> None:
+        """Test converting JPEG image data to LangChain message format."""
+        image = ImageData(
+            base64_data="abc123",
+            format="jpeg",
+            placeholder="[image 2]",
+        )
+        result = image.to_message_content()
+
+        assert result["type"] == "image_url"
+        assert result["image_url"]["url"].startswith("data:image/jpeg;base64,")
+
+
+class TestImageTracker:
+    """Tests for ImageTracker class."""
+
+    def test_add_image_increments_counter(self) -> None:
+        """Test that adding images increments the counter correctly."""
+        tracker = ImageTracker()
+
+        img1 = ImageData(base64_data="abc", format="png", placeholder="")
+        img2 = ImageData(base64_data="def", format="png", placeholder="")
+
+        placeholder1 = tracker.add_image(img1)
+        placeholder2 = tracker.add_image(img2)
+
+        assert placeholder1 == "[image 1]"
+        assert placeholder2 == "[image 2]"
+        assert img1.placeholder == "[image 1]"
+        assert img2.placeholder == "[image 2]"
+
+    def test_get_images_returns_copy(self) -> None:
+        """Test that get_images returns a copy, not the original list."""
+        tracker = ImageTracker()
+        img = ImageData(base64_data="abc", format="png", placeholder="")
+        tracker.add_image(img)
+
+        images = tracker.get_images()
+        images.clear()  # Modify the returned list
+
+        # Original should be unchanged
+        assert len(tracker.get_images()) == 1
+
+    def test_clear_resets_counter(self) -> None:
+        """Test that clear resets both images and counter."""
+        tracker = ImageTracker()
+        img = ImageData(base64_data="abc", format="png", placeholder="")
+        tracker.add_image(img)
+        tracker.add_image(img)
+
+        assert tracker.next_id == 3
+        assert len(tracker.images) == 2
+
+        tracker.clear()
+
+        assert tracker.next_id == 1
+        assert len(tracker.images) == 0
+
+    def test_add_after_clear_starts_at_one(self) -> None:
+        """Test that adding after clear starts from [image 1] again."""
+        tracker = ImageTracker()
+        img = ImageData(base64_data="abc", format="png", placeholder="")
+
+        tracker.add_image(img)
+        tracker.add_image(img)
+        tracker.clear()
+
+        new_img = ImageData(base64_data="xyz", format="png", placeholder="")
+        placeholder = tracker.add_image(new_img)
+
+        assert placeholder == "[image 1]"
+
+    def test_remove_image_and_reset_counter(self) -> None:
+        """Test removing an image resets the counter appropriately."""
+        tracker = ImageTracker()
+        img1 = ImageData(base64_data="abc", format="png", placeholder="")
+        img2 = ImageData(base64_data="def", format="png", placeholder="")
+
+        tracker.add_image(img1)
+        tracker.add_image(img2)
+
+        # Simulate what happens on backspace delete
+        tracker.images.pop(1)  # Remove image 2
+        tracker.next_id = len(tracker.images) + 1
+
+        assert tracker.next_id == 2
+        assert len(tracker.images) == 1
+
+
+class TestEncodeImageToBase64:
+    """Tests for base64 encoding."""
+
+    def test_encode_image_bytes(self) -> None:
+        """Test encoding raw bytes to base64."""
+        test_bytes = b"test image data"
+        result = encode_image_to_base64(test_bytes)
+
+        # Verify it's valid base64
+        decoded = base64.b64decode(result)
+        assert decoded == test_bytes
+
+    def test_encode_png_bytes(self) -> None:
+        """Test encoding actual PNG bytes."""
+        # Create a small PNG in memory
+        img = Image.new("RGB", (10, 10), color="red")
+        buffer = io.BytesIO()
+        img.save(buffer, format="PNG")
+        png_bytes = buffer.getvalue()
+
+        result = encode_image_to_base64(png_bytes)
+
+        # Should be valid base64
+        decoded = base64.b64decode(result)
+        assert decoded == png_bytes
+
+
+class TestCreateMultimodalContent:
+    """Tests for creating multimodal message content."""
+
+    def test_text_only(self) -> None:
+        """Test creating content with text only (no images)."""
+        result = create_multimodal_content("Hello world", [])
+
+        assert len(result) == 1
+        assert result[0]["type"] == "text"
+        assert result[0]["text"] == "Hello world"
+
+    def test_text_and_one_image(self) -> None:
+        """Test creating content with text and one image."""
+        img = ImageData(base64_data="abc123", format="png", placeholder="[image 1]")
+        result = create_multimodal_content("Describe this:", [img])
+
+        assert len(result) == 2
+        assert result[0]["type"] == "text"
+        assert result[0]["text"] == "Describe this:"
+        assert result[1]["type"] == "image_url"
+
+    def test_text_and_multiple_images(self) -> None:
+        """Test creating content with text and multiple images."""
+        img1 = ImageData(base64_data="abc", format="png", placeholder="[image 1]")
+        img2 = ImageData(base64_data="def", format="png", placeholder="[image 2]")
+        result = create_multimodal_content("Compare these:", [img1, img2])
+
+        assert len(result) == 3
+        assert result[0]["type"] == "text"
+        assert result[1]["type"] == "image_url"
+        assert result[2]["type"] == "image_url"
+
+    def test_empty_text_with_image(self) -> None:
+        """Test that empty text is not included in content."""
+        img = ImageData(base64_data="abc", format="png", placeholder="[image 1]")
+        result = create_multimodal_content("", [img])
+
+        # Should only have the image, no empty text block
+        assert len(result) == 1
+        assert result[0]["type"] == "image_url"
+
+    def test_whitespace_only_text(self) -> None:
+        """Test that whitespace-only text is not included."""
+        img = ImageData(base64_data="abc", format="png", placeholder="[image 1]")
+        result = create_multimodal_content("   \n\t  ", [img])
+
+        assert len(result) == 1
+        assert result[0]["type"] == "image_url"
+
+
+class TestGetClipboardImage:
+    """Tests for clipboard image detection."""
+
+    @patch("deepagents_cli.image_utils.sys.platform", "linux")
+    def test_unsupported_platform_returns_none(self) -> None:
+        """Test that non-macOS platforms return None."""
+        result = get_clipboard_image()
+        assert result is None
+
+    @patch("deepagents_cli.image_utils.sys.platform", "darwin")
+    @patch("deepagents_cli.image_utils._get_macos_clipboard_image")
+    def test_macos_calls_macos_function(self, mock_macos_fn: MagicMock) -> None:
+        """Test that macOS platform calls the macOS-specific function."""
+        mock_macos_fn.return_value = None
+        get_clipboard_image()
+        mock_macos_fn.assert_called_once()
+
+    @patch("deepagents_cli.image_utils.sys.platform", "darwin")
+    @patch("deepagents_cli.image_utils.subprocess.run")
+    def test_pngpaste_success(self, mock_run: MagicMock) -> None:
+        """Test successful image retrieval via pngpaste."""
+        # Create a small valid PNG
+        img = Image.new("RGB", (10, 10), color="blue")
+        buffer = io.BytesIO()
+        img.save(buffer, format="PNG")
+        png_bytes = buffer.getvalue()
+
+        mock_run.return_value = MagicMock(
+            returncode=0,
+            stdout=png_bytes,
+        )
+
+        result = get_clipboard_image()
+
+        assert result is not None
+        assert result.format == "png"
+        assert len(result.base64_data) > 0
+
+    @patch("deepagents_cli.image_utils.sys.platform", "darwin")
+    @patch("deepagents_cli.image_utils.subprocess.run")
+    def test_pngpaste_not_installed_falls_back(self, mock_run: MagicMock) -> None:
+        """Test fallback to osascript when pngpaste is not installed."""
+        # First call (pngpaste) raises FileNotFoundError
+        # Second call (osascript clipboard info) returns no image info
+        mock_run.side_effect = [
+            FileNotFoundError("pngpaste not found"),
+            MagicMock(returncode=0, stdout="text data"),  # clipboard info - no pngf
+        ]
+
+        result = get_clipboard_image()
+
+        # Should return None since clipboard has no image
+        assert result is None
+        # Should have tried both methods
+        assert mock_run.call_count == 2
+
+    @patch("deepagents_cli.image_utils.sys.platform", "darwin")
+    @patch("deepagents_cli.image_utils._get_clipboard_via_osascript")
+    @patch("deepagents_cli.image_utils.subprocess.run")
+    def test_no_image_in_clipboard(self, mock_run: MagicMock, mock_osascript: MagicMock) -> None:
+        """Test behavior when clipboard has no image."""
+        # pngpaste fails
+        mock_run.return_value = MagicMock(returncode=1, stdout=b"")
+        # osascript fallback also returns None
+        mock_osascript.return_value = None
+
+        result = get_clipboard_image()
+        assert result is None
--- a/deepagents_sourcecode/libs/deepagents-cli/tests/unit_tests/test_imports.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/tests/unit_tests/test_imports.py
@@ -0,0 +1,11 @@
+"""Test importing files."""
+
+
+def test_imports() -> None:
+    """Test importing deepagents modules."""
+    from deepagents_cli import (
+        agent,  # noqa: F401
+        agent_memory,  # noqa: F401
+        integrations,  # noqa: F401
+    )
+    from deepagents_cli.main import cli_main  # noqa: F401
--- a/deepagents_sourcecode/libs/deepagents-cli/tests/unit_tests/test_version.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/tests/unit_tests/test_version.py
@@ -0,0 +1,24 @@
+"""Test version consistency between _version.py and pyproject.toml."""
+
+import tomllib
+from pathlib import Path
+
+from deepagents_cli._version import __version__
+
+
+def test_version_matches_pyproject() -> None:
+    """Verify that __version__ in _version.py matches version in pyproject.toml."""
+    # Get the project root directory
+    project_root = Path(__file__).parent.parent.parent
+    pyproject_path = project_root / "pyproject.toml"
+
+    # Read the version from pyproject.toml
+    with pyproject_path.open("rb") as f:
+        pyproject_data = tomllib.load(f)
+    pyproject_version = pyproject_data["project"]["version"]
+
+    # Compare versions
+    assert __version__ == pyproject_version, (
+        f"Version mismatch: _version.py has '{__version__}' "
+        f"but pyproject.toml has '{pyproject_version}'"
+    )
--- a/deepagents_sourcecode/libs/deepagents-cli/tests/unit_tests/tools/init.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/tests/unit_tests/tools/init.py
--- a/deepagents_sourcecode/libs/deepagents-cli/tests/unit_tests/tools/test_fetch_url.py
+++ b/deepagents_sourcecode/libs/deepagents-cli/tests/unit_tests/tools/test_fetch_url.py
@@ -0,0 +1,72 @@
+"""Tests for tools module."""
+
+import requests
+import responses
+
+from deepagents_cli.tools import fetch_url
+
+
+@responses.activate
+def test_fetch_url_success() -> None:
+    """Test successful URL fetch and HTML to markdown conversion."""
+    responses.add(
+        responses.GET,
+        "http://example.com",
+        body="<html><body><h1>Test</h1><p>Content</p></body></html>",
+        status=200,
+    )
+
+    result = fetch_url("http://example.com")
+
+    assert result["status_code"] == 200
+    assert "Test" in result["markdown_content"]
+    assert result["url"].startswith("http://example.com")
+    assert result["content_length"] > 0
+
+
+@responses.activate
+def test_fetch_url_http_error() -> None:
+    """Test handling of HTTP errors."""
+    responses.add(
+        responses.GET,
+        "http://example.com/notfound",
+        status=404,
+    )
+
+    result = fetch_url("http://example.com/notfound")
+
+    assert "error" in result
+    assert "Fetch URL error" in result["error"]
+    assert result["url"] == "http://example.com/notfound"
+
+
+@responses.activate
+def test_fetch_url_timeout() -> None:
+    """Test handling of request timeout."""
+    responses.add(
+        responses.GET,
+        "http://example.com/slow",
+        body=requests.exceptions.Timeout(),
+    )
+
+    result = fetch_url("http://example.com/slow", timeout=1)
+
+    assert "error" in result
+    assert "Fetch URL error" in result["error"]
+    assert result["url"] == "http://example.com/slow"
+
+
+@responses.activate
+def test_fetch_url_connection_error() -> None:
+    """Test handling of connection errors."""
+    responses.add(
+        responses.GET,
+        "http://example.com/error",
+        body=requests.exceptions.ConnectionError(),
+    )
+
+    result = fetch_url("http://example.com/error")
+
+    assert "error" in result
+    assert "Fetch URL error" in result["error"]
+    assert result["url"] == "http://example.com/error"
--- a/deepagents_sourcecode/libs/deepagents-cli/uv.lock
+++ b/deepagents_sourcecode/libs/deepagents-cli/uv.lock
--- a/deepagents_sourcecode/libs/deepagents/Makefile
+++ b/deepagents_sourcecode/libs/deepagents/Makefile
@@ -0,0 +1,29 @@
+PYTHON_FILES=.
+MYPY_CACHE=.mypy_cache
+lint format: PYTHON_FILES=.
+lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/deepagents --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')
+lint_package: PYTHON_FILES=.
+lint_tests: PYTHON_FILES=tests
+
+lint lint_diff lint_package lint_tests:
+	[ "$(PYTHON_FILES)" = "" ] || uv run --all-groups ruff format $(PYTHON_FILES) --diff
+	@if [ "$(LINT)" != "minimal" ]; then \
+		if [ "$(PYTHON_FILES)" != "" ]; then \
+			uv run --all-groups ruff check $(PYTHON_FILES); \
+		fi; \
+	fi
+	@if [ "$(LINT)" != "minimal" ]; then \
+		if [ "$(PYTHON_FILES)" != "" ]; then \
+			mkdir -p $(MYPY_CACHE) && uv run --all-groups mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE); \
+		fi; \
+	fi
+
+format format_diff:
+	[ "$(PYTHON_FILES)" = "" ] || uv run --all-groups ruff format $(PYTHON_FILES)
+	[ "$(PYTHON_FILES)" = "" ] || uv run --all-groups ruff check --fix $(PYTHON_FILES)
+
+test:
+	uv run pytest tests/unit_tests --cov=deepagents --cov-report=term-missing
+
+integration_test:
+	uv run pytest tests/integration_tests --cov=deepagents --cov-report=term-missing
--- a/deepagents_sourcecode/libs/deepagents/README.md
+++ b/deepagents_sourcecode/libs/deepagents/README.md
@@ -0,0 +1,495 @@
+# 🧠🤖Deep Agents
+
+Using an LLM to call tools in a loop is the simplest form of an agent. 
+This architecture, however, can yield agents that are “shallow” and fail to plan and act over longer, more complex tasks. 
+
+Applications like “Deep Research”, "Manus", and “Claude Code” have gotten around this limitation by implementing a combination of four things:
+a **planning tool**, **sub agents**, access to a **file system**, and a **detailed prompt**.
+
+<img src="../../deep_agents.png" alt="deep agent" width="600"/>
+
+`deepagents` is a Python package that implements these in a general purpose way so that you can easily create a Deep Agent for your application. For a full overview and quickstart of `deepagents`, the best resource is our [docs](https://docs.langchain.com/oss/python/deepagents/overview).
+
+**Acknowledgements: This project was primarily inspired by Claude Code, and initially was largely an attempt to see what made Claude Code general purpose, and make it even more so.**
+
+## Installation
+
+```bash
+# pip
+pip install deepagents
+
+# uv
+uv add deepagents
+
+# poetry
+poetry add deepagents
+```
+
+## Usage
+
+(To run the example below, you will need to `pip install tavily-python`).
+
+Make sure to set `TAVILY_API_KEY` in your environment. You can generate one [here](https://www.tavily.com/).
+
+```python
+import os
+from typing import Literal
+from tavily import TavilyClient
+from deepagents import create_deep_agent
+
+tavily_client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])
+
+# Web search tool
+def internet_search(
+    query: str,
+    max_results: int = 5,
+    topic: Literal["general", "news", "finance"] = "general",
+    include_raw_content: bool = False,
+):
+    """Run a web search"""
+    return tavily_client.search(
+        query,
+        max_results=max_results,
+        include_raw_content=include_raw_content,
+        topic=topic,
+    )
+
+
+# System prompt to steer the agent to be an expert researcher
+research_instructions = """You are an expert researcher. Your job is to conduct thorough research, and then write a polished report.
+
+You have access to an internet search tool as your primary means of gathering information.
+
+## `internet_search`
+
+Use this to run an internet search for a given query. You can specify the max number of results to return, the topic, and whether raw content should be included.
+"""
+
+# Create the deep agent
+agent = create_deep_agent(
+    tools=[internet_search],
+    system_prompt=research_instructions,
+)
+
+# Invoke the agent
+result = agent.invoke({"messages": [{"role": "user", "content": "What is langgraph?"}]})
+```
+
+See [examples/research/research_agent.py](examples/research/research_agent.py) for a more complex example.
+
+The agent created with `create_deep_agent` is just a LangGraph graph - so you can interact with it (streaming, human-in-the-loop, memory, studio)
+in the same way you would any LangGraph agent.
+
+## Core Capabilities
+**Planning & Task Decomposition**
+
+ Deep Agents include a built-in `write_todos` tool that enables agents to break down complex tasks into discrete steps, track progress, and adapt plans as new information emerges.
+
+**Context Management**
+
+ File system tools (`ls`, `read_file`, `write_file`, `edit_file`, `glob`, `grep`) allow agents to offload large context to memory, preventing context window overflow and enabling work with variable-length tool results.
+
+**Subagent Spawning**
+
+ A built-in `task` tool enables agents to spawn specialized subagents for context isolation. This keeps the main agent’s context clean while still going deep on specific subtasks.
+
+**Long-term Memory**
+
+ Extend agents with persistent memory across threads using LangGraph’s Store. Agents can save and retrieve information from previous conversations.
+
+## Customizing Deep Agents
+
+There are several parameters you can pass to `create_deep_agent` to create your own custom deep agent.
+
+### `model`
+
+By default, `deepagents` uses `"claude-sonnet-4-5-20250929"`. You can customize this by passing any [LangChain model object](https://python.langchain.com/docs/integrations/chat/).
+
+```python
+from langchain.chat_models import init_chat_model
+from deepagents import create_deep_agent
+
+model = init_chat_model("openai:gpt-4o")
+agent = create_deep_agent(
+    model=model,
+)
+```
+
+### `system_prompt`
+Deep Agents come with a built-in system prompt. This is relatively detailed prompt that is heavily based on and inspired by [attempts](https://github.com/kn1026/cc/blob/main/claudecode.md) to [replicate](https://github.com/asgeirtj/system_prompts_leaks/blob/main/Anthropic/claude-code.md)
+Claude Code's system prompt. It was made more general purpose than Claude Code's system prompt. The default prompt contains detailed instructions for how to use the built-in planning tool, file system tools, and sub agents.
+
+Each deep agent tailored to a use case should include a custom system prompt specific to that use case as well. The importance of prompting for creating a successful deep agent cannot be overstated.
+
+```python
+from deepagents import create_deep_agent
+
+research_instructions = """You are an expert researcher. Your job is to conduct thorough research, and then write a polished report.
+"""
+
+agent = create_deep_agent(
+    system_prompt=research_instructions,
+)
+```
+
+### `tools`
+
+Just like with tool-calling agents, you can provide a deep agent with a set of tools that it has access to.
+
+```python
+import os
+from typing import Literal
+from tavily import TavilyClient
+from deepagents import create_deep_agent
+
+tavily_client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])
+
+def internet_search(
+    query: str,
+    max_results: int = 5,
+    topic: Literal["general", "news", "finance"] = "general",
+    include_raw_content: bool = False,
+):
+    """Run a web search"""
+    return tavily_client.search(
+        query,
+        max_results=max_results,
+        include_raw_content=include_raw_content,
+        topic=topic,
+    )
+
+agent = create_deep_agent(
+    tools=[internet_search]
+)
+```
+
+### `middleware`
+`create_deep_agent` is implemented with middleware that can be customized. You can provide additional middleware to extend functionality, add tools, or implement custom hooks. 
+
+```python
+from langchain_core.tools import tool
+from deepagents import create_deep_agent
+from langchain.agents.middleware import AgentMiddleware
+
+@tool
+def get_weather(city: str) -> str:
+    """Get the weather in a city."""
+    return f"The weather in {city} is sunny."
+
+@tool
+def get_temperature(city: str) -> str:
+    """Get the temperature in a city."""
+    return f"The temperature in {city} is 70 degrees Fahrenheit."
+
+class WeatherMiddleware(AgentMiddleware):
+  tools = [get_weather, get_temperature]
+
+agent = create_deep_agent(
+    model="anthropic:claude-sonnet-4-20250514",
+    middleware=[WeatherMiddleware()]
+)
+```
+
+### `subagents`
+
+A main feature of Deep Agents is their ability to spawn subagents. You can specify custom subagents that your agent can hand off work to in the subagents parameter. Sub agents are useful for context quarantine (to help not pollute the overall context of the main agent) as well as custom instructions.
+
+`subagents` should be a list of dictionaries, where each dictionary follow this schema:
+
+```python
+class SubAgent(TypedDict):
+    name: str
+    description: str
+    system_prompt: str
+    tools: Sequence[BaseTool | Callable | dict[str, Any]]
+    model: NotRequired[str | BaseChatModel]
+    middleware: NotRequired[list[AgentMiddleware]]
+    interrupt_on: NotRequired[dict[str, bool | InterruptOnConfig]]
+
+class CompiledSubAgent(TypedDict):
+    name: str
+    description: str
+    runnable: Runnable
+```
+
+**SubAgent fields:**
+- **name**: This is the name of the subagent, and how the main agent will call the subagent
+- **description**: This is the description of the subagent that is shown to the main agent
+- **system_prompt**: This is the system prompt used for the subagent
+- **tools**: This is the list of tools that the subagent has access to.
+- **model**: Optional model name or model instance.
+- **middleware** Additional middleware to attach to the subagent. See [here](https://docs.langchain.com/oss/python/langchain/middleware) for an introduction into middleware and how it works with create_agent.
+- **interrupt_on** A custom interrupt config that specifies human-in-the-loop interactions for your tools.
+
+**CompiledSubAgent fields:**
+- **name**: This is the name of the subagent, and how the main agent will call the subagent
+- **description**: This is the description of the subagent that is shown to the main agent  
+- **runnable**: A pre-built LangGraph graph/agent that will be used as the subagent
+
+#### Using SubAgent
+
+```python
+import os
+from typing import Literal
+from tavily import TavilyClient
+from deepagents import create_deep_agent
+
+tavily_client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])
+
+def internet_search(
+    query: str,
+    max_results: int = 5,
+    topic: Literal["general", "news", "finance"] = "general",
+    include_raw_content: bool = False,
+):
+    """Run a web search"""
+    return tavily_client.search(
+        query,
+        max_results=max_results,
+        include_raw_content=include_raw_content,
+        topic=topic,
+    )
+
+research_subagent = {
+    "name": "research-agent",
+    "description": "Used to research more in depth questions",
+    "system_prompt": "You are a great researcher",
+    "tools": [internet_search],
+    "model": "openai:gpt-4o",  # Optional override, defaults to main agent model
+}
+subagents = [research_subagent]
+
+agent = create_deep_agent(
+    model="anthropic:claude-sonnet-4-20250514",
+    subagents=subagents
+)
+```
+
+#### Using CustomSubAgent
+
+For more complex use cases, you can provide your own pre-built LangGraph graph as a subagent:
+
+```python
+# Create a custom agent graph
+custom_graph = create_agent(
+    model=your_model,
+    tools=specialized_tools,
+    prompt="You are a specialized agent for data analysis..."
+)
+
+# Use it as a custom subagent
+custom_subagent = CompiledSubAgent(
+    name="data-analyzer",
+    description="Specialized agent for complex data analysis tasks",
+    runnable=custom_graph
+)
+
+subagents = [custom_subagent]
+
+agent = create_deep_agent(
+    model="anthropic:claude-sonnet-4-20250514",
+    tools=[internet_search],
+    system_prompt=research_instructions,
+    subagents=subagents
+)
+```
+
+### `interrupt_on`
+A common reality for agents is that some tool operations may be sensitive and require human approval before execution. Deep Agents supports human-in-the-loop workflows through LangGraph’s interrupt capabilities. You can configure which tools require approval using a checkpointer.
+
+These tool configs are passed to our prebuilt [HITL middleware](https://docs.langchain.com/oss/python/langchain/middleware#human-in-the-loop) so that the agent pauses execution and waits for feedback from the user before executing configured tools.
+
+```python
+from langchain_core.tools import tool
+from deepagents import create_deep_agent
+
+@tool
+def get_weather(city: str) -> str:
+    """Get the weather in a city."""
+    return f"The weather in {city} is sunny."
+
+agent = create_deep_agent(
+    model="anthropic:claude-sonnet-4-20250514",
+    tools=[get_weather],
+    interrupt_on={
+        "get_weather": {
+            "allowed_decisions": ["approve", "edit", "reject"]
+        },
+    }
+)
+
+```
+
+## Deep Agents Middleware
+
+Deep Agents are built with a modular middleware architecture. As a reminder, Deep Agents have access to:
+- A planning tool
+- A filesystem for storing context and long-term memories
+- The ability to spawn subagents
+
+Each of these features is implemented as separate middleware. When you create a deep agent with `create_deep_agent`, we automatically attach **TodoListMiddleware**, **FilesystemMiddleware** and **SubAgentMiddleware** to your agent.
+
+Middleware is a composable concept, and you can choose to add as many or as few middleware to an agent depending on your use case. That means that you can also use any of the aforementioned middleware independently!
+
+### TodoListMiddleware
+
+Planning is integral to solving complex problems. If you’ve used claude code recently, you’ll notice how it writes out a To-Do list before tackling complex, multi-part tasks. You’ll also notice how it can adapt and update this To-Do list on the fly as more information comes in.
+
+**TodoListMiddleware** provides your agent with a tool specifically for updating this To-Do list. Before, and while it executes a multi-part task, the agent is prompted to use the write_todos tool to keep track of what its doing, and what still needs to be done.
+
+```python
+from langchain.agents import create_agent
+from langchain.agents.middleware import TodoListMiddleware
+
+# TodoListMiddleware is included by default in create_deep_agent
+# You can customize it if building a custom agent
+agent = create_agent(
+    model="anthropic:claude-sonnet-4-20250514",
+    # Custom planning instructions can be added via middleware
+    middleware=[
+        TodoListMiddleware(
+            system_prompt="Use the write_todos tool to..."  # Optional: Custom addition to the system prompt
+        ),
+    ],
+)
+```
+
+### FilesystemMiddleware
+
+Context engineering is one of the main challenges in building effective agents. This can be particularly hard when using tools that can return variable length results (ex. web_search, rag), as long ToolResults can quickly fill up your context window.
+**FilesystemMiddleware** provides four tools to your agent to interact with both short-term and long-term memory.
+- **ls**: List the files in your filesystem
+- **read_file**: Read an entire file, or a certain number of lines from a file
+- **write_file**: Write a new file to your filesystem
+- **edit_file**: Edit an existing file in your filesystem
+
+```python
+from langchain.agents import create_agent
+from deepagents.middleware.filesystem import FilesystemMiddleware
+
+
+# FilesystemMiddleware is included by default in create_deep_agent
+# You can customize it if building a custom agent
+agent = create_agent(
+    model="anthropic:claude-sonnet-4-20250514",
+    middleware=[
+        FilesystemMiddleware(
+            backend=..., # Optional: customize storage backend
+            system_prompt="Write to the filesystem when...",  # Optional custom system prompt override
+            custom_tool_descriptions={
+                "ls": "Use the ls tool when...",
+                "read_file": "Use the read_file tool to..."
+            }  # Optional: Custom descriptions for filesystem tools
+        ),
+    ],
+)
+```
+
+### SubAgentMiddleware
+
+Handing off tasks to subagents is a great way to isolate context, keeping the context window of the main (supervisor) agent clean while still going deep on a task. The subagents middleware allows you supply subagents through a task tool.
+
+A subagent is defined with a name, description, system prompt, and tools. You can also provide a subagent with a custom model, or with additional middleware. This can be particularly useful when you want to give the subagent an additional state key to share with the main agent.
+
+```python
+from langchain_core.tools import tool
+from langchain.agents import create_agent
+from deepagents.middleware.subagents import SubAgentMiddleware
+
+
+@tool
+def get_weather(city: str) -> str:
+    """Get the weather in a city."""
+    return f"The weather in {city} is sunny."
+
+agent = create_agent(
+    model="claude-sonnet-4-20250514",
+    middleware=[
+        SubAgentMiddleware(
+            default_model="claude-sonnet-4-20250514",
+            default_tools=[],
+            subagents=[
+                {
+                    "name": "weather",
+                    "description": "This subagent can get weather in cities.",
+                    "system_prompt": "Use the get_weather tool to get the weather in a city.",
+                    "tools": [get_weather],
+                    "model": "gpt-4.1",
+                    "middleware": [],
+                }
+            ],
+        )
+    ],
+)
+```
+
+For more complex use cases, you can also provide your own pre-built LangGraph graph as a subagent.
+
+```python
+# Create a custom LangGraph graph
+def create_weather_graph():
+    workflow = StateGraph(...)
+    # Build your custom graph
+    return workflow.compile()
+
+weather_graph = create_weather_graph()
+
+# Wrap it in a CompiledSubAgent
+weather_subagent = CompiledSubAgent(
+    name="weather",
+    description="This subagent can get weather in cities.",
+    runnable=weather_graph
+)
+
+agent = create_agent(
+    model="anthropic:claude-sonnet-4-20250514",
+    middleware=[
+        SubAgentMiddleware(
+            default_model="claude-sonnet-4-20250514",
+            default_tools=[],
+            subagents=[weather_subagent],
+        )
+    ],
+)
+```
+
+## Sync vs Async
+
+Prior versions of deepagents separated sync and async agent factories. 
+
+`async_create_deep_agent` has been folded in to `create_deep_agent`.
+
+**You should use `create_deep_agent` as the factory for both sync and async agents**
+
+
+## MCP
+
+The `deepagents` library can be ran with MCP tools. This can be achieved by using the [Langchain MCP Adapter library](https://github.com/langchain-ai/langchain-mcp-adapters).
+
+**NOTE:** MCP tools are async, so you'll need to use `agent.ainvoke()` or `agent.astream()` for invocation.
+
+(To run the example below, will need to `pip install langchain-mcp-adapters`)
+
+```python
+import asyncio
+from langchain_mcp_adapters.client import MultiServerMCPClient
+from deepagents import create_deep_agent
+
+async def main():
+    # Collect MCP tools
+    mcp_client = MultiServerMCPClient(...)
+    mcp_tools = await mcp_client.get_tools()
+
+    # Create agent
+    agent = create_deep_agent(tools=mcp_tools, ....)
+
+    # Stream the agent
+    async for chunk in agent.astream(
+        {"messages": [{"role": "user", "content": "what is langgraph?"}]},
+        stream_mode="values"
+    ):
+        if "messages" in chunk:
+            chunk["messages"][-1].pretty_print()
+
+asyncio.run(main())
+```
--- a/deepagents_sourcecode/libs/deepagents/deepagents/init.py
+++ b/deepagents_sourcecode/libs/deepagents/deepagents/init.py
@@ -0,0 +1,7 @@
+"""DeepAgents package."""
+
+from deepagents.graph import create_deep_agent
+from deepagents.middleware.filesystem import FilesystemMiddleware
+from deepagents.middleware.subagents import CompiledSubAgent, SubAgent, SubAgentMiddleware
+
+__all__ = ["CompiledSubAgent", "FilesystemMiddleware", "SubAgent", "SubAgentMiddleware", "create_deep_agent"]
--- a/deepagents_sourcecode/libs/deepagents/deepagents/backends/init.py
+++ b/deepagents_sourcecode/libs/deepagents/deepagents/backends/init.py
@@ -0,0 +1,15 @@
+"""Memory backends for pluggable file storage."""
+
+from deepagents.backends.composite import CompositeBackend
+from deepagents.backends.filesystem import FilesystemBackend
+from deepagents.backends.protocol import BackendProtocol
+from deepagents.backends.state import StateBackend
+from deepagents.backends.store import StoreBackend
+
+__all__ = [
+    "BackendProtocol",
+    "CompositeBackend",
+    "FilesystemBackend",
+    "StateBackend",
+    "StoreBackend",
+]
--- a/deepagents_sourcecode/libs/deepagents/deepagents/backends/composite.py
+++ b/deepagents_sourcecode/libs/deepagents/deepagents/backends/composite.py
@@ -0,0 +1,557 @@
+"""CompositeBackend: 경로 접두사(prefix)를 기반으로 작업을 다른 백엔드로 라우팅합니다."""
+
+from collections import defaultdict
+
+from deepagents.backends.protocol import (
+    BackendProtocol,
+    EditResult,
+    ExecuteResponse,
+    FileDownloadResponse,
+    FileInfo,
+    FileUploadResponse,
+    GrepMatch,
+    SandboxBackendProtocol,
+    WriteResult,
+)
+from deepagents.backends.state import StateBackend
+
+
+class CompositeBackend:
+    def __init__(
+        self,
+        default: BackendProtocol | StateBackend,
+        routes: dict[str, BackendProtocol],
+    ) -> None:
+        # Default backend
+        self.default = default
+
+        # Virtual routes
+        self.routes = routes
+
+        # Sort routes by length (longest first) for correct prefix matching
+        self.sorted_routes = sorted(routes.items(), key=lambda x: len(x[0]), reverse=True)
+
+    def _get_backend_and_key(self, key: str) -> tuple[BackendProtocol, str]:
+        """어떤 백엔드가 이 키를 처리하는지 결정하고 접두사를 제거합니다.
+
+        Args:
+            key: 원본 파일 경로
+
+        Returns:
+            (backend, stripped_key) 튜플. stripped_key는 라우트 접두사가
+            제거된 상태입니다 (하지만 선행 슬래시는 유지됨).
+        """
+        # Check routes in order of length (longest first)
+        for prefix, backend in self.sorted_routes:
+            if key.startswith(prefix):
+                # Strip full prefix and ensure a leading slash remains
+                # e.g., "/memories/notes.txt" → "/notes.txt"; "/memories/" → "/"
+                suffix = key[len(prefix) :]
+                stripped_key = f"/{suffix}" if suffix else "/"
+                return backend, stripped_key
+
+        return self.default, key
+
+    def ls_info(self, path: str) -> list[FileInfo]:
+        """지정된 디렉토리의 파일과 디렉토리를 나열합니다 (비재귀적).
+
+        Args:
+            path: 디렉토리의 절대 경로.
+
+        Returns:
+            디렉토리 바로 아래에 있는 파일 및 디렉토리에 대한 FileInfo 유사 dict 목록 (라우트 접두사 추가됨).
+            디렉토리는 경로 끝에 /가 붙으며 is_dir=True입니다.
+        """
+        # Check if path matches a specific route
+        for route_prefix, backend in self.sorted_routes:
+            if path.startswith(route_prefix.rstrip("/")):
+                # Query only the matching routed backend
+                suffix = path[len(route_prefix) :]
+                search_path = f"/{suffix}" if suffix else "/"
+                infos = backend.ls_info(search_path)
+                prefixed: list[FileInfo] = []
+                for fi in infos:
+                    fi = dict(fi)
+                    fi["path"] = f"{route_prefix[:-1]}{fi['path']}"
+                    prefixed.append(fi)
+                return prefixed
+
+        # At root, aggregate default and all routed backends
+        if path == "/":
+            results: list[FileInfo] = []
+            results.extend(self.default.ls_info(path))
+            for route_prefix, backend in self.sorted_routes:
+                # Add the route itself as a directory (e.g., /memories/)
+                results.append({
+                    "path": route_prefix,
+                    "is_dir": True,
+                    "size": 0,
+                    "modified_at": "",
+                })
+
+            results.sort(key=lambda x: x.get("path", ""))
+            return results
+
+        # Path doesn't match a route: query only default backend
+        return self.default.ls_info(path)
+
+    async def als_info(self, path: str) -> list[FileInfo]:
+        """ls_info의 비동기 버전입니다."""
+        # Check if path matches a specific route
+        for route_prefix, backend in self.sorted_routes:
+            if path.startswith(route_prefix.rstrip("/")):
+                # Query only the matching routed backend
+                suffix = path[len(route_prefix) :]
+                search_path = f"/{suffix}" if suffix else "/"
+                infos = await backend.als_info(search_path)
+                prefixed: list[FileInfo] = []
+                for fi in infos:
+                    fi = dict(fi)
+                    fi["path"] = f"{route_prefix[:-1]}{fi['path']}"
+                    prefixed.append(fi)
+                return prefixed
+
+        # At root, aggregate default and all routed backends
+        if path == "/":
+            results: list[FileInfo] = []
+            results.extend(await self.default.als_info(path))
+            for route_prefix, backend in self.sorted_routes:
+                # Add the route itself as a directory (e.g., /memories/)
+                results.append({
+                    "path": route_prefix,
+                    "is_dir": True,
+                    "size": 0,
+                    "modified_at": "",
+                })
+
+            results.sort(key=lambda x: x.get("path", ""))
+            return results
+
+        # Path doesn't match a route: query only default backend
+        return await self.default.als_info(path)
+
+    def read(
+        self,
+        file_path: str,
+        offset: int = 0,
+        limit: int = 2000,
+    ) -> str:
+        """적절한 백엔드로 라우팅하여 파일 내용을 읽습니다.
+
+        Args:
+            file_path: 파일 절대 경로.
+            offset: 읽기 시작할 라인 오프셋 (0부터 시작).
+            limit: 읽을 최대 라인 수.
+
+        Returns:
+            라인 번호가 포함된 형식화된 파일 내용, 또는 에러 메시지.
+        """
+        backend, stripped_key = self._get_backend_and_key(file_path)
+        return backend.read(stripped_key, offset=offset, limit=limit)
+
+    async def aread(
+        self,
+        file_path: str,
+        offset: int = 0,
+        limit: int = 2000,
+    ) -> str:
+        """read의 비동기 버전입니다."""
+        backend, stripped_key = self._get_backend_and_key(file_path)
+        return await backend.aread(stripped_key, offset=offset, limit=limit)
+
+    def grep_raw(
+        self,
+        pattern: str,
+        path: str | None = None,
+        glob: str | None = None,
+    ) -> list[GrepMatch] | str:
+        # If path targets a specific route, search only that backend
+        for route_prefix, backend in self.sorted_routes:
+            if path is not None and path.startswith(route_prefix.rstrip("/")):
+                search_path = path[len(route_prefix) - 1 :]
+                raw = backend.grep_raw(pattern, search_path if search_path else "/", glob)
+                if isinstance(raw, str):
+                    return raw
+                return [{**m, "path": f"{route_prefix[:-1]}{m['path']}"} for m in raw]
+
+        # Otherwise, search default and all routed backends and merge
+        all_matches: list[GrepMatch] = []
+        raw_default = self.default.grep_raw(pattern, path, glob)  # type: ignore[attr-defined]
+        if isinstance(raw_default, str):
+            # This happens if error occurs
+            return raw_default
+        all_matches.extend(raw_default)
+
+        for route_prefix, backend in self.routes.items():
+            raw = backend.grep_raw(pattern, "/", glob)
+            if isinstance(raw, str):
+                # This happens if error occurs
+                return raw
+            all_matches.extend({**m, "path": f"{route_prefix[:-1]}{m['path']}"} for m in raw)
+
+        return all_matches
+
+    async def agrep_raw(
+        self,
+        pattern: str,
+        path: str | None = None,
+        glob: str | None = None,
+    ) -> list[GrepMatch] | str:
+        """grep_raw의 비동기 버전입니다."""
+        # If path targets a specific route, search only that backend
+        for route_prefix, backend in self.sorted_routes:
+            if path is not None and path.startswith(route_prefix.rstrip("/")):
+                search_path = path[len(route_prefix) - 1 :]
+                raw = await backend.agrep_raw(pattern, search_path if search_path else "/", glob)
+                if isinstance(raw, str):
+                    return raw
+                return [{**m, "path": f"{route_prefix[:-1]}{m['path']}"} for m in raw]
+
+        # Otherwise, search default and all routed backends and merge
+        all_matches: list[GrepMatch] = []
+        raw_default = await self.default.agrep_raw(pattern, path, glob)  # type: ignore[attr-defined]
+        if isinstance(raw_default, str):
+            # This happens if error occurs
+            return raw_default
+        all_matches.extend(raw_default)
+
+        for route_prefix, backend in self.routes.items():
+            raw = await backend.agrep_raw(pattern, "/", glob)
+            if isinstance(raw, str):
+                # This happens if error occurs
+                return raw
+            all_matches.extend({**m, "path": f"{route_prefix[:-1]}{m['path']}"} for m in raw)
+
+        return all_matches
+
+    def glob_info(self, pattern: str, path: str = "/") -> list[FileInfo]:
+        results: list[FileInfo] = []
+
+        # Route based on path, not pattern
+        for route_prefix, backend in self.sorted_routes:
+            if path.startswith(route_prefix.rstrip("/")):
+                search_path = path[len(route_prefix) - 1 :]
+                infos = backend.glob_info(pattern, search_path if search_path else "/")
+                return [{**fi, "path": f"{route_prefix[:-1]}{fi['path']}"} for fi in infos]
+
+        # Path doesn't match any specific route - search default backend AND all routed backends
+        results.extend(self.default.glob_info(pattern, path))
+
+        for route_prefix, backend in self.routes.items():
+            infos = backend.glob_info(pattern, "/")
+            results.extend({**fi, "path": f"{route_prefix[:-1]}{fi['path']}"} for fi in infos)
+
+        # Deterministic ordering
+        results.sort(key=lambda x: x.get("path", ""))
+        return results
+
+    async def aglob_info(self, pattern: str, path: str = "/") -> list[FileInfo]:
+        """glob_info의 비동기 버전입니다."""
+        results: list[FileInfo] = []
+
+        # Route based on path, not pattern
+        for route_prefix, backend in self.sorted_routes:
+            if path.startswith(route_prefix.rstrip("/")):
+                search_path = path[len(route_prefix) - 1 :]
+                infos = await backend.aglob_info(pattern, search_path if search_path else "/")
+                return [{**fi, "path": f"{route_prefix[:-1]}{fi['path']}"} for fi in infos]
+
+        # Path doesn't match any specific route - search default backend AND all routed backends
+        results.extend(await self.default.aglob_info(pattern, path))
+
+        for route_prefix, backend in self.routes.items():
+            infos = await backend.aglob_info(pattern, "/")
+            results.extend({**fi, "path": f"{route_prefix[:-1]}{fi['path']}"} for fi in infos)
+
+        # Deterministic ordering
+        results.sort(key=lambda x: x.get("path", ""))
+        return results
+
+    def write(
+        self,
+        file_path: str,
+        content: str,
+    ) -> WriteResult:
+        """적절한 백엔드로 라우팅하여 새 파일을 생성합니다.
+
+        Args:
+            file_path: 파일 절대 경로.
+            content: 문자열 형태의 파일 내용.
+
+        Returns:
+            성공 메시지 또는 Command 객체, 또는 파일이 이미 존재하는 경우 에러.
+        """
+        backend, stripped_key = self._get_backend_and_key(file_path)
+        res = backend.write(stripped_key, content)
+        # If this is a state-backed update and default has state, merge so listings reflect changes
+        if res.files_update:
+            try:
+                runtime = getattr(self.default, "runtime", None)
+                if runtime is not None:
+                    state = runtime.state
+                    files = state.get("files", {})
+                    files.update(res.files_update)
+                    state["files"] = files
+            except Exception:
+                pass
+        return res
+
+    async def awrite(
+        self,
+        file_path: str,
+        content: str,
+    ) -> WriteResult:
+        """write의 비동기 버전입니다."""
+        backend, stripped_key = self._get_backend_and_key(file_path)
+        res = await backend.awrite(stripped_key, content)
+        # If this is a state-backed update and default has state, merge so listings reflect changes
+        if res.files_update:
+            try:
+                runtime = getattr(self.default, "runtime", None)
+                if runtime is not None:
+                    state = runtime.state
+                    files = state.get("files", {})
+                    files.update(res.files_update)
+                    state["files"] = files
+            except Exception:
+                pass
+        return res
+
+    def edit(
+        self,
+        file_path: str,
+        old_string: str,
+        new_string: str,
+        replace_all: bool = False,
+    ) -> EditResult:
+        """적절한 백엔드로 라우팅하여 파일을 편집합니다.
+
+        Args:
+            file_path: 파일 절대 경로.
+            old_string: 찾아서 교체할 문자열.
+            new_string: 교체할 문자열.
+            replace_all: True인 경우 모든 발생을 교체.
+
+        Returns:
+            성공 메시지 또는 Command 객체, 또는 실패 시 에러 메시지.
+        """
+        backend, stripped_key = self._get_backend_and_key(file_path)
+        res = backend.edit(stripped_key, old_string, new_string, replace_all=replace_all)
+        if res.files_update:
+            try:
+                runtime = getattr(self.default, "runtime", None)
+                if runtime is not None:
+                    state = runtime.state
+                    files = state.get("files", {})
+                    files.update(res.files_update)
+                    state["files"] = files
+            except Exception:
+                pass
+        return res
+
+    async def aedit(
+        self,
+        file_path: str,
+        old_string: str,
+        new_string: str,
+        replace_all: bool = False,
+    ) -> EditResult:
+        """edit의 비동기 버전입니다."""
+        backend, stripped_key = self._get_backend_and_key(file_path)
+        res = await backend.aedit(stripped_key, old_string, new_string, replace_all=replace_all)
+        if res.files_update:
+            try:
+                runtime = getattr(self.default, "runtime", None)
+                if runtime is not None:
+                    state = runtime.state
+                    files = state.get("files", {})
+                    files.update(res.files_update)
+                    state["files"] = files
+            except Exception:
+                pass
+        return res
+
+    def execute(
+        self,
+        command: str,
+    ) -> ExecuteResponse:
+        """기본(default) 백엔드를 통해 명령을 실행합니다.
+
+        실행은 경로에 특정되지 않으므로, 항상 기본 백엔드로 위임됩니다.
+        이 기능이 작동하려면 기본 백엔드가 SandboxBackendProtocol을 구현해야 합니다.
+
+        Args:
+            command: 실행할 전체 쉘 명령 문자열.
+
+        Returns:
+            결합된 출력, 종료 코드, 잘림(truncation) 플래그를 포함하는 ExecuteResponse.
+
+        Raises:
+            NotImplementedError: 기본 백엔드가 실행을 지원하지 않는 경우.
+        """
+        if isinstance(self.default, SandboxBackendProtocol):
+            return self.default.execute(command)
+
+        # This shouldn't be reached if the runtime check in the execute tool works correctly,
+        # but we include it as a safety fallback.
+        raise NotImplementedError(
+            "Default backend doesn't support command execution (SandboxBackendProtocol). "
+            "To enable execution, provide a default backend that implements SandboxBackendProtocol."
+        )
+
+    async def aexecute(
+        self,
+        command: str,
+    ) -> ExecuteResponse:
+        """execute의 비동기 버전입니다."""
+        if isinstance(self.default, SandboxBackendProtocol):
+            return await self.default.aexecute(command)
+
+        # This shouldn't be reached if the runtime check in the execute tool works correctly,
+        # but we include it as a safety fallback.
+        raise NotImplementedError(
+            "Default backend doesn't support command execution (SandboxBackendProtocol). "
+            "To enable execution, provide a default backend that implements SandboxBackendProtocol."
+        )
+
+    def upload_files(self, files: list[tuple[str, bytes]]) -> list[FileUploadResponse]:
+        """효율성을 위해 백엔드별로 배치 처리하여 여러 파일을 업로드합니다.
+
+        파일을 대상 백엔드별로 그룹화하고, 각 백엔드의 upload_files를
+        해당 백엔드의 모든 파일과 함께 한 번 호출한 다음, 결과를 원래 순서대로 병합합니다.
+
+        Args:
+            files: 업로드할 (path, content) 튜플의 리스트.
+
+        Returns:
+            FileUploadResponse 객체들의 리스트. 입력 파일마다 하나씩 반환됩니다.
+            응답 순서는 입력 순서와 일치합니다.
+        """
+        # Pre-allocate result list
+        results: list[FileUploadResponse | None] = [None] * len(files)
+
+        # Group files by backend, tracking original indices
+        from collections import defaultdict
+
+        backend_batches: dict[BackendProtocol, list[tuple[int, str, bytes]]] = defaultdict(list)
+
+        for idx, (path, content) in enumerate(files):
+            backend, stripped_path = self._get_backend_and_key(path)
+            backend_batches[backend].append((idx, stripped_path, content))
+
+        # Process each backend's batch
+        for backend, batch in backend_batches.items():
+            # Extract data for backend call
+            indices, stripped_paths, contents = zip(*batch, strict=False)
+            batch_files = list(zip(stripped_paths, contents, strict=False))
+
+            # Call backend once with all its files
+            batch_responses = backend.upload_files(batch_files)
+
+            # Place responses at original indices with original paths
+            for i, orig_idx in enumerate(indices):
+                results[orig_idx] = FileUploadResponse(
+                    path=files[orig_idx][0],  # Original path
+                    error=batch_responses[i].error if i < len(batch_responses) else None,
+                )
+
+        return results  # type: ignore[return-value]
+
+    async def aupload_files(self, files: list[tuple[str, bytes]]) -> list[FileUploadResponse]:
+        """upload_files의 비동기 버전입니다."""
+        # Pre-allocate result list
+        results: list[FileUploadResponse | None] = [None] * len(files)
+
+        # Group files by backend, tracking original indices
+        backend_batches: dict[BackendProtocol, list[tuple[int, str, bytes]]] = defaultdict(list)
+
+        for idx, (path, content) in enumerate(files):
+            backend, stripped_path = self._get_backend_and_key(path)
+            backend_batches[backend].append((idx, stripped_path, content))
+
+        # Process each backend's batch
+        for backend, batch in backend_batches.items():
+            # Extract data for backend call
+            indices, stripped_paths, contents = zip(*batch, strict=False)
+            batch_files = list(zip(stripped_paths, contents, strict=False))
+
+            # Call backend once with all its files
+            batch_responses = await backend.aupload_files(batch_files)
+
+            # Place responses at original indices with original paths
+            for i, orig_idx in enumerate(indices):
+                results[orig_idx] = FileUploadResponse(
+                    path=files[orig_idx][0],  # Original path
+                    error=batch_responses[i].error if i < len(batch_responses) else None,
+                )
+
+        return results  # type: ignore[return-value]
+
+    def download_files(self, paths: list[str]) -> list[FileDownloadResponse]:
+        """효율성을 위해 백엔드별로 배치 처리하여 여러 파일을 다운로드합니다.
+
+        경로를 대상 백엔드별로 그룹화하고, 각 백엔드의 download_files를
+        해당 백엔드의 모든 경로와 함께 한 번 호출한 다음, 결과를 원래 순서대로 병합합니다.
+
+        Args:
+            paths: 다운로드할 파일 경로의 리스트.
+
+        Returns:
+            FileDownloadResponse 객체들의 리스트. 입력 경로마다 하나씩 반환됩니다.
+            응답 순서는 입력 순서와 일치합니다.
+        """
+        # Pre-allocate result list
+        results: list[FileDownloadResponse | None] = [None] * len(paths)
+
+        backend_batches: dict[BackendProtocol, list[tuple[int, str]]] = defaultdict(list)
+
+        for idx, path in enumerate(paths):
+            backend, stripped_path = self._get_backend_and_key(path)
+            backend_batches[backend].append((idx, stripped_path))
+
+        # Process each backend's batch
+        for backend, batch in backend_batches.items():
+            # Extract data for backend call
+            indices, stripped_paths = zip(*batch, strict=False)
+
+            # Call backend once with all its paths
+            batch_responses = backend.download_files(list(stripped_paths))
+
+            # Place responses at original indices with original paths
+            for i, orig_idx in enumerate(indices):
+                results[orig_idx] = FileDownloadResponse(
+                    path=paths[orig_idx],  # Original path
+                    content=batch_responses[i].content if i < len(batch_responses) else None,
+                    error=batch_responses[i].error if i < len(batch_responses) else None,
+                )
+
+        return results  # type: ignore[return-value]
+
+    async def adownload_files(self, paths: list[str]) -> list[FileDownloadResponse]:
+        """download_files의 비동기 버전입니다."""
+        # Pre-allocate result list
+        results: list[FileDownloadResponse | None] = [None] * len(paths)
+
+        backend_batches: dict[BackendProtocol, list[tuple[int, str]]] = defaultdict(list)
+
+        for idx, path in enumerate(paths):
+            backend, stripped_path = self._get_backend_and_key(path)
+            backend_batches[backend].append((idx, stripped_path))
+
+        # Process each backend's batch
+        for backend, batch in backend_batches.items():
+            # Extract data for backend call
+            indices, stripped_paths = zip(*batch, strict=False)
+
+            # Call backend once with all its paths
+            batch_responses = await backend.adownload_files(list(stripped_paths))
+
+            # Place responses at original indices with original paths
+            for i, orig_idx in enumerate(indices):
+                results[orig_idx] = FileDownloadResponse(
+                    path=paths[orig_idx],  # Original path
+                    content=batch_responses[i].content if i < len(batch_responses) else None,
+                    error=batch_responses[i].error if i < len(batch_responses) else None,
+                )
+
+        return results  # type: ignore[return-value]
--- a/deepagents_sourcecode/libs/deepagents/deepagents/backends/filesystem.py
+++ b/deepagents_sourcecode/libs/deepagents/deepagents/backends/filesystem.py
@@ -0,0 +1,544 @@
+"""FilesystemBackend: 파일시스템에서 직접 파일을 읽고 씁니다.
+
+보안 및 검색 업그레이드:
+- virtual_mode일 때 루트 포함(root containment)을 통한 보안 경로 확인 (cwd로 샌드박싱됨)
+- 가능한 경우 O_NOFOLLOW를 사용하여 파일 I/O 시 심볼릭 링크 따라가기 방지
+- JSON 파싱을 포함한 Ripgrep 기반 검색과, 가상 경로 동작을 보존하면서
+  정규식 및 선택적 glob 포함 필터링을 지원하는 Python 폴백(fallback) 기능
+"""
+
+import json
+import os
+import re
+import subprocess
+from datetime import datetime
+from pathlib import Path
+
+import wcmatch.glob as wcglob
+
+from deepagents.backends.protocol import (
+    BackendProtocol,
+    EditResult,
+    FileDownloadResponse,
+    FileInfo,
+    FileUploadResponse,
+    GrepMatch,
+    WriteResult,
+)
+from deepagents.backends.utils import (
+    check_empty_content,
+    format_content_with_line_numbers,
+    perform_string_replacement,
+)
+
+
+class FilesystemBackend(BackendProtocol):
+    """파일시스템에서 직접 파일을 읽고 쓰는 백엔드.
+
+    파일은 실제 파일시스템 경로를 사용하여 접근합니다. 상대 경로는
+    현재 작업 디렉토리에 상대적으로 해결(resolve)됩니다. 내용은 일반 텍스트로
+    읽고 쓰이며, 메타데이터(타임스탬프)는 파일시스템 상태(stat)에서 파생됩니다.
+    """
+
+    def __init__(
+        self,
+        root_dir: str | Path | None = None,
+        virtual_mode: bool = False,
+        max_file_size_mb: int = 10,
+    ) -> None:
+        """파일시스템 백엔드를 초기화합니다.
+
+        Args:
+            root_dir: 파일 작업을 위한 선택적 루트 디렉토리. 제공된 경우,
+                     모든 파일 경로는 이 디렉토리에 상대적으로 해결됩니다.
+                     제공되지 않은 경우, 현재 작업 디렉토리를 사용합니다.
+        """
+        self.cwd = Path(root_dir).resolve() if root_dir else Path.cwd()
+        self.virtual_mode = virtual_mode
+        self.max_file_size_bytes = max_file_size_mb * 1024 * 1024
+
+    def _resolve_path(self, key: str) -> Path:
+        """보안 검사를 포함하여 파일 경로를 해결(resolve)합니다.
+
+        virtual_mode=True일 때, 들어오는 경로를 self.cwd 하위의 가상 절대 경로로 취급하며,
+        상위 경로 탐색(.., ~)을 허용하지 않고 해결된 경로가 루트 내에 머물도록 보장합니다.
+        virtual_mode=False일 때, 레거시 동작을 유지합니다: 절대 경로는 그대료 허용되고,
+        상대 경로는 cwd 하위로 해결됩니다.
+
+        Args:
+            key: 파일 경로 (절대, 상대, 또는 virtual_mode=True일 때 가상 경로)
+
+        Returns:
+            해결된 절대 Path 객체
+        """
+        if self.virtual_mode:
+            vpath = key if key.startswith("/") else "/" + key
+            if ".." in vpath or vpath.startswith("~"):
+                raise ValueError("Path traversal not allowed")
+            full = (self.cwd / vpath.lstrip("/")).resolve()
+            try:
+                full.relative_to(self.cwd)
+            except ValueError:
+                raise ValueError(f"Path:{full} outside root directory: {self.cwd}") from None
+            return full
+
+        path = Path(key)
+        if path.is_absolute():
+            return path
+        return (self.cwd / path).resolve()
+
+    def ls_info(self, path: str) -> list[FileInfo]:
+        """지정된 디렉토리의 파일과 디렉토리를 나열합니다 (비재귀적).
+
+        Args:
+            path: 파일 목록을 가져올 절대 디렉토리 경로.
+
+        Returns:
+            디렉토리 바로 아래에 있는 파일 및 디렉토리에 대한 FileInfo 유사 dict 목록.
+            디렉토리는 경로 끝에 /가 붙으며 is_dir=True입니다.
+        """
+        dir_path = self._resolve_path(path)
+        if not dir_path.exists() or not dir_path.is_dir():
+            return []
+
+        results: list[FileInfo] = []
+
+        # Convert cwd to string for comparison
+        cwd_str = str(self.cwd)
+        if not cwd_str.endswith("/"):
+            cwd_str += "/"
+
+        # List only direct children (non-recursive)
+        try:
+            for child_path in dir_path.iterdir():
+                try:
+                    is_file = child_path.is_file()
+                    is_dir = child_path.is_dir()
+                except OSError:
+                    continue
+
+                abs_path = str(child_path)
+
+                if not self.virtual_mode:
+                    # Non-virtual mode: use absolute paths
+                    if is_file:
+                        try:
+                            st = child_path.stat()
+                            results.append({
+                                "path": abs_path,
+                                "is_dir": False,
+                                "size": int(st.st_size),
+                                "modified_at": datetime.fromtimestamp(st.st_mtime).isoformat(),
+                            })
+                        except OSError:
+                            results.append({"path": abs_path, "is_dir": False})
+                    elif is_dir:
+                        try:
+                            st = child_path.stat()
+                            results.append({
+                                "path": abs_path + "/",
+                                "is_dir": True,
+                                "size": 0,
+                                "modified_at": datetime.fromtimestamp(st.st_mtime).isoformat(),
+                            })
+                        except OSError:
+                            results.append({"path": abs_path + "/", "is_dir": True})
+                else:
+                    # Virtual mode: strip cwd prefix
+                    if abs_path.startswith(cwd_str):
+                        relative_path = abs_path[len(cwd_str) :]
+                    elif abs_path.startswith(str(self.cwd)):
+                        # Handle case where cwd doesn't end with /
+                        relative_path = abs_path[len(str(self.cwd)) :].lstrip("/")
+                    else:
+                        # Path is outside cwd, return as-is or skip
+                        relative_path = abs_path
+
+                    virt_path = "/" + relative_path
+
+                    if is_file:
+                        try:
+                            st = child_path.stat()
+                            results.append({
+                                "path": virt_path,
+                                "is_dir": False,
+                                "size": int(st.st_size),
+                                "modified_at": datetime.fromtimestamp(st.st_mtime).isoformat(),
+                            })
+                        except OSError:
+                            results.append({"path": virt_path, "is_dir": False})
+                    elif is_dir:
+                        try:
+                            st = child_path.stat()
+                            results.append({
+                                "path": virt_path + "/",
+                                "is_dir": True,
+                                "size": 0,
+                                "modified_at": datetime.fromtimestamp(st.st_mtime).isoformat(),
+                            })
+                        except OSError:
+                            results.append({"path": virt_path + "/", "is_dir": True})
+        except (OSError, PermissionError):
+            pass
+
+        # Keep deterministic order by path
+        results.sort(key=lambda x: x.get("path", ""))
+        return results
+
+    def read(
+        self,
+        file_path: str,
+        offset: int = 0,
+        limit: int = 2000,
+    ) -> str:
+        """파일 내용을 라인 번호와 함께 읽습니다.
+
+        Args:
+            file_path: 절대 또는 상대 파일 경로.
+            offset: 읽기 시작할 라인 오프셋 (0부터 시작).
+            limit: 읽을 최대 라인 수.
+
+        Returns:
+            라인 번호가 포함된 형식화된 파일 내용, 또는 에러 메시지.
+        """
+        resolved_path = self._resolve_path(file_path)
+
+        if not resolved_path.exists() or not resolved_path.is_file():
+            return f"Error: File '{file_path}' not found"
+
+        try:
+            # Open with O_NOFOLLOW where available to avoid symlink traversal
+            fd = os.open(resolved_path, os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0))
+            with os.fdopen(fd, "r", encoding="utf-8") as f:
+                content = f.read()
+
+            empty_msg = check_empty_content(content)
+            if empty_msg:
+                return empty_msg
+
+            lines = content.splitlines()
+            start_idx = offset
+            end_idx = min(start_idx + limit, len(lines))
+
+            if start_idx >= len(lines):
+                return f"Error: Line offset {offset} exceeds file length ({len(lines)} lines)"
+
+            selected_lines = lines[start_idx:end_idx]
+            return format_content_with_line_numbers(selected_lines, start_line=start_idx + 1)
+        except (OSError, UnicodeDecodeError) as e:
+            return f"Error reading file '{file_path}': {e}"
+
+    def write(
+        self,
+        file_path: str,
+        content: str,
+    ) -> WriteResult:
+        """내용을 포함하는 새 파일을 생성합니다.
+        WriteResult를 반환합니다. 외부 저장소는 files_update=None을 설정합니다.
+        """
+        resolved_path = self._resolve_path(file_path)
+
+        if resolved_path.exists():
+            return WriteResult(
+                error=f"Cannot write to {file_path} because it already exists. Read and then make an edit, or write to a new path."
+            )
+
+        try:
+            # Create parent directories if needed
+            resolved_path.parent.mkdir(parents=True, exist_ok=True)
+
+            # Prefer O_NOFOLLOW to avoid writing through symlinks
+            flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
+            if hasattr(os, "O_NOFOLLOW"):
+                flags |= os.O_NOFOLLOW
+            fd = os.open(resolved_path, flags, 0o644)
+            with os.fdopen(fd, "w", encoding="utf-8") as f:
+                f.write(content)
+
+            return WriteResult(path=file_path, files_update=None)
+        except (OSError, UnicodeEncodeError) as e:
+            return WriteResult(error=f"Error writing file '{file_path}': {e}")
+
+    def edit(
+        self,
+        file_path: str,
+        old_string: str,
+        new_string: str,
+        replace_all: bool = False,
+    ) -> EditResult:
+        """문자열 발생(occurrences)을 교체하여 파일을 편집합니다.
+        EditResult를 반환합니다. 외부 저장소는 files_update=None을 설정합니다.
+        """
+        resolved_path = self._resolve_path(file_path)
+
+        if not resolved_path.exists() or not resolved_path.is_file():
+            return EditResult(error=f"Error: File '{file_path}' not found")
+
+        try:
+            # Read securely
+            fd = os.open(resolved_path, os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0))
+            with os.fdopen(fd, "r", encoding="utf-8") as f:
+                content = f.read()
+
+            result = perform_string_replacement(content, old_string, new_string, replace_all)
+
+            if isinstance(result, str):
+                return EditResult(error=result)
+
+            new_content, occurrences = result
+
+            # Write securely
+            flags = os.O_WRONLY | os.O_TRUNC
+            if hasattr(os, "O_NOFOLLOW"):
+                flags |= os.O_NOFOLLOW
+            fd = os.open(resolved_path, flags)
+            with os.fdopen(fd, "w", encoding="utf-8") as f:
+                f.write(new_content)
+
+            return EditResult(path=file_path, files_update=None, occurrences=int(occurrences))
+        except (OSError, UnicodeDecodeError, UnicodeEncodeError) as e:
+            return EditResult(error=f"Error editing file '{file_path}': {e}")
+
+    def grep_raw(
+        self,
+        pattern: str,
+        path: str | None = None,
+        glob: str | None = None,
+    ) -> list[GrepMatch] | str:
+        # Validate regex
+        try:
+            re.compile(pattern)
+        except re.error as e:
+            return f"Invalid regex pattern: {e}"
+
+        # Resolve base path
+        try:
+            base_full = self._resolve_path(path or ".")
+        except ValueError:
+            return []
+
+        if not base_full.exists():
+            return []
+
+        # Try ripgrep first
+        results = self._ripgrep_search(pattern, base_full, glob)
+        if results is None:
+            results = self._python_search(pattern, base_full, glob)
+
+        matches: list[GrepMatch] = []
+        for fpath, items in results.items():
+            for line_num, line_text in items:
+                matches.append({"path": fpath, "line": int(line_num), "text": line_text})
+        return matches
+
+    def _ripgrep_search(
+        self, pattern: str, base_full: Path, include_glob: str | None
+    ) -> dict[str, list[tuple[int, str]]] | None:
+        cmd = ["rg", "--json"]
+        if include_glob:
+            cmd.extend(["--glob", include_glob])
+        cmd.extend(["--", pattern, str(base_full)])
+
+        try:
+            proc = subprocess.run(  # noqa: S603
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=30,
+                check=False,
+            )
+        except (subprocess.TimeoutExpired, FileNotFoundError):
+            return None
+
+        results: dict[str, list[tuple[int, str]]] = {}
+        for line in proc.stdout.splitlines():
+            try:
+                data = json.loads(line)
+            except json.JSONDecodeError:
+                continue
+            if data.get("type") != "match":
+                continue
+            pdata = data.get("data", {})
+            ftext = pdata.get("path", {}).get("text")
+            if not ftext:
+                continue
+            p = Path(ftext)
+            if self.virtual_mode:
+                try:
+                    virt = "/" + str(p.resolve().relative_to(self.cwd))
+                except Exception:
+                    continue
+            else:
+                virt = str(p)
+            ln = pdata.get("line_number")
+            lt = pdata.get("lines", {}).get("text", "").rstrip("\n")
+            if ln is None:
+                continue
+            results.setdefault(virt, []).append((int(ln), lt))
+
+        return results
+
+    def _python_search(
+        self, pattern: str, base_full: Path, include_glob: str | None
+    ) -> dict[str, list[tuple[int, str]]]:
+        try:
+            regex = re.compile(pattern)
+        except re.error:
+            return {}
+
+        results: dict[str, list[tuple[int, str]]] = {}
+        root = base_full if base_full.is_dir() else base_full.parent
+
+        for fp in root.rglob("*"):
+            if not fp.is_file():
+                continue
+            if include_glob and not wcglob.globmatch(fp.name, include_glob, flags=wcglob.BRACE):
+                continue
+            try:
+                if fp.stat().st_size > self.max_file_size_bytes:
+                    continue
+            except OSError:
+                continue
+            try:
+                content = fp.read_text()
+            except (UnicodeDecodeError, PermissionError, OSError):
+                continue
+            for line_num, line in enumerate(content.splitlines(), 1):
+                if regex.search(line):
+                    if self.virtual_mode:
+                        try:
+                            virt_path = "/" + str(fp.resolve().relative_to(self.cwd))
+                        except Exception:
+                            continue
+                    else:
+                        virt_path = str(fp)
+                    results.setdefault(virt_path, []).append((line_num, line))
+
+        return results
+
+    def glob_info(self, pattern: str, path: str = "/") -> list[FileInfo]:
+        if pattern.startswith("/"):
+            pattern = pattern.lstrip("/")
+
+        search_path = self.cwd if path == "/" else self._resolve_path(path)
+        if not search_path.exists() or not search_path.is_dir():
+            return []
+
+        results: list[FileInfo] = []
+        try:
+            # Use recursive globbing to match files in subdirectories as tests expect
+            for matched_path in search_path.rglob(pattern):
+                try:
+                    is_file = matched_path.is_file()
+                except OSError:
+                    continue
+                if not is_file:
+                    continue
+                abs_path = str(matched_path)
+                if not self.virtual_mode:
+                    try:
+                        st = matched_path.stat()
+                        results.append({
+                            "path": abs_path,
+                            "is_dir": False,
+                            "size": int(st.st_size),
+                            "modified_at": datetime.fromtimestamp(st.st_mtime).isoformat(),
+                        })
+                    except OSError:
+                        results.append({"path": abs_path, "is_dir": False})
+                else:
+                    cwd_str = str(self.cwd)
+                    if not cwd_str.endswith("/"):
+                        cwd_str += "/"
+                    if abs_path.startswith(cwd_str):
+                        relative_path = abs_path[len(cwd_str) :]
+                    elif abs_path.startswith(str(self.cwd)):
+                        relative_path = abs_path[len(str(self.cwd)) :].lstrip("/")
+                    else:
+                        relative_path = abs_path
+                    virt = "/" + relative_path
+                    try:
+                        st = matched_path.stat()
+                        results.append({
+                            "path": virt,
+                            "is_dir": False,
+                            "size": int(st.st_size),
+                            "modified_at": datetime.fromtimestamp(st.st_mtime).isoformat(),
+                        })
+                    except OSError:
+                        results.append({"path": virt, "is_dir": False})
+        except (OSError, ValueError):
+            pass
+
+        results.sort(key=lambda x: x.get("path", ""))
+        return results
+
+    def upload_files(self, files: list[tuple[str, bytes]]) -> list[FileUploadResponse]:
+        """파일시스템에 여러 파일을 업로드합니다.
+
+        Args:
+            files: 내용이 bytes인 (path, content) 튜플의 리스트.
+
+        Returns:
+            FileUploadResponse 객체들의 리스트. 입력 파일마다 하나씩 반환됩니다.
+            응답 순서는 입력 순서와 일치합니다.
+        """
+        responses: list[FileUploadResponse] = []
+        for path, content in files:
+            try:
+                resolved_path = self._resolve_path(path)
+
+                # Create parent directories if needed
+                resolved_path.parent.mkdir(parents=True, exist_ok=True)
+
+                flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
+                if hasattr(os, "O_NOFOLLOW"):
+                    flags |= os.O_NOFOLLOW
+                fd = os.open(resolved_path, flags, 0o644)
+                with os.fdopen(fd, "wb") as f:
+                    f.write(content)
+
+                responses.append(FileUploadResponse(path=path, error=None))
+            except FileNotFoundError:
+                responses.append(FileUploadResponse(path=path, error="file_not_found"))
+            except PermissionError:
+                responses.append(FileUploadResponse(path=path, error="permission_denied"))
+            except (ValueError, OSError) as e:
+                # ValueError from _resolve_path for path traversal, OSError for other file errors
+                if isinstance(e, ValueError) or "invalid" in str(e).lower():
+                    responses.append(FileUploadResponse(path=path, error="invalid_path"))
+                else:
+                    # Generic error fallback
+                    responses.append(FileUploadResponse(path=path, error="invalid_path"))
+
+        return responses
+
+    def download_files(self, paths: list[str]) -> list[FileDownloadResponse]:
+        """파일시스템에서 여러 파일을 다운로드합니다.
+
+        Args:
+            paths: 다운로드할 파일 경로의 리스트.
+
+        Returns:
+            FileDownloadResponse 객체들의 리스트. 입력 경로마다 하나씩 반환됩니다.
+        """
+        responses: list[FileDownloadResponse] = []
+        for path in paths:
+            try:
+                resolved_path = self._resolve_path(path)
+                # Use flags to optionally prevent symlink following if
+                # supported by the OS
+                fd = os.open(resolved_path, os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0))
+                with os.fdopen(fd, "rb") as f:
+                    content = f.read()
+                responses.append(FileDownloadResponse(path=path, content=content, error=None))
+            except FileNotFoundError:
+                responses.append(FileDownloadResponse(path=path, content=None, error="file_not_found"))
+            except PermissionError:
+                responses.append(FileDownloadResponse(path=path, content=None, error="permission_denied"))
+            except IsADirectoryError:
+                responses.append(FileDownloadResponse(path=path, content=None, error="is_directory"))
+            except ValueError:
+                responses.append(FileDownloadResponse(path=path, content=None, error="invalid_path"))
+            # Let other errors propagate
+        return responses
--- a/deepagents_sourcecode/libs/deepagents/deepagents/backends/protocol.py
+++ b/deepagents_sourcecode/libs/deepagents/deepagents/backends/protocol.py
@@ -0,0 +1,453 @@
+"""플러그형 메모리 백엔드를 위한 프로토콜 정의.
+
+이 모듈은 모든 백엔드 구현이 따라야 하는 BackendProtocol을 정의합니다.
+백엔드는 파일들을 서로 다른 위치(state, filesystem, database 등)에 저장할 수 있으며,
+파일 작업에 대해 통일된 인터페이스를 제공합니다.
+"""
+
+import abc
+import asyncio
+from collections.abc import Callable
+from dataclasses import dataclass
+from typing import Any, Literal, NotRequired, TypeAlias
+
+from langchain.tools import ToolRuntime
+from typing_extensions import TypedDict
+
+FileOperationError = Literal[
+    "file_not_found",  # Download: file doesn't exist
+    "permission_denied",  # Both: access denied
+    "is_directory",  # Download: tried to download directory as file
+    "invalid_path",  # Both: path syntax malformed (parent dir missing, invalid chars)
+]
+"""파일 업로드/다운로드 작업을 위한 표준화된 에러 코드.
+
+이 코드들은 LLM이 이해하고 잠재적으로 수정할 수 있는 일반적인 복구 가능 에러들을 나타냅니다:
+- file_not_found: 요청한 파일이 존재하지 않음 (다운로드)
+- parent_not_found: 부모 디렉토리가 존재하지 않음 (업로드)
+- permission_denied: 작업에 대한 접근이 거부됨
+- is_directory: 디렉토리를 파일로 다운로드하려고 시도함
+- invalid_path: 경로 구문이 잘못되었거나 유효하지 않은 문자를 포함함
+"""
+
+
+@dataclass
+class FileDownloadResponse:
+    """단일 파일 다운로드 작업의 결과.
+
+    이 응답은 일괄 작업에서 부분적인 성공을 허용하도록 설계되었습니다.
+    에러는 LLM이 파일 작업을 수행하는 사용 사례에서 복구 가능한 특정 조건들을 위해
+    FileOperationError 리터럴을 사용하여 표준화되었습니다.
+
+    Attributes:
+        path: 요청된 파일 경로. 일괄 결과를 처리할 때 쉬운 상호참조를 위해 포함됩니다.
+             에러 메시지에 특히 유용합니다.
+        content: 성공 시 파일 내용(bytes), 실패 시 None.
+        error: 실패 시 표준화된 에러 코드, 성공 시 None.
+             구조화되고 LLM이 조치 가능한 에러 보고를 위해 FileOperationError 리터럴을 사용합니다.
+
+    Examples:
+        >>> # Success
+        >>> FileDownloadResponse(path="/app/config.json", content=b"{...}", error=None)
+        >>> # Failure
+        >>> FileDownloadResponse(path="/wrong/path.txt", content=None, error="file_not_found")
+    """
+
+    path: str
+    content: bytes | None = None
+    error: FileOperationError | None = None
+
+
+@dataclass
+class FileUploadResponse:
+    """단일 파일 업로드 작업의 결과.
+
+    이 응답은 일괄 작업에서 부분적인 성공을 허용하도록 설계되었습니다.
+    에러는 LLM이 파일 작업을 수행하는 사용 사례에서 복구 가능한 특정 조건들을 위해
+    FileOperationError 리터럴을 사용하여 표준화되었습니다.
+
+    Attributes:
+        path: 요청된 파일 경로. 일괄 결과를 처리할 때 쉬운 상호참조와 명확한 에러 메시지를 위해 포함됩니다.
+        error: 실패 시 표준화된 에러 코드, 성공 시 None.
+            구조화되고 LLM이 조치 가능한 에러 보고를 위해 FileOperationError 리터럴을 사용합니다.
+
+    Examples:
+        >>> # Success
+        >>> FileUploadResponse(path="/app/data.txt", error=None)
+        >>> # Failure
+        >>> FileUploadResponse(path="/readonly/file.txt", error="permission_denied")
+    """
+
+    path: str
+    error: FileOperationError | None = None
+
+
+class FileInfo(TypedDict):
+    """구조화된 파일 목록 정보.
+
+    백엔드 전반에서 사용되는 최소한의 계약입니다. "path"만 필수입니다.
+    다른 필드들은 최선의 노력(best-effort)으로 제공되며 백엔드에 따라 없을 수 있습니다.
+    """
+
+    path: str
+    is_dir: NotRequired[bool]
+    size: NotRequired[int]  # bytes (approx)
+    modified_at: NotRequired[str]  # ISO timestamp if known
+
+
+class GrepMatch(TypedDict):
+    """구조화된 grep 일치(match) 항목."""
+
+    path: str
+    line: int
+    text: str
+
+
+@dataclass
+class WriteResult:
+    """백엔드 쓰기(write) 작업의 결과.
+
+    Attributes:
+        error: 실패 시 에러 메시지, 성공 시 None.
+        path: 작성된 파일의 절대 경로, 실패 시 None.
+        files_update: 체크포인트 백엔드를 위한 상태 업데이트 dict, 외부 저장소인 경우 None.
+            체크포인트 백엔드는 이를 LangGraph 상태를 위한 {file_path: file_data}로 채웁니다.
+            외부 백엔드는 None으로 설정합니다 (이미 디스크/S3/데이터베이스 등에 영구 저장됨).
+
+    Examples:
+        >>> # Checkpoint storage
+        >>> WriteResult(path="/f.txt", files_update={"/f.txt": {...}})
+        >>> # External storage
+        >>> WriteResult(path="/f.txt", files_update=None)
+        >>> # Error
+        >>> WriteResult(error="File exists")
+    """
+
+    error: str | None = None
+    path: str | None = None
+    files_update: dict[str, Any] | None = None
+
+
+@dataclass
+class EditResult:
+    """백엔드 편집(edit) 작업의 결과.
+
+    Attributes:
+        error: 실패 시 에러 메시지, 성공 시 None.
+        path: 편집된 파일의 절대 경로, 실패 시 None.
+        files_update: 체크포인트 백엔드를 위한 상태 업데이트 dict, 외부 저장소인 경우 None.
+            체크포인트 백엔드는 이를 LangGraph 상태를 위한 {file_path: file_data}로 채웁니다.
+            외부 백엔드는 None으로 설정합니다 (이미 디스크/S3/데이터베이스 등에 영구 저장됨).
+        occurrences: 교체된 횟수, 실패 시 None.
+
+    Examples:
+        >>> # Checkpoint storage
+        >>> EditResult(path="/f.txt", files_update={"/f.txt": {...}}, occurrences=1)
+        >>> # External storage
+        >>> EditResult(path="/f.txt", files_update=None, occurrences=2)
+        >>> # Error
+        >>> EditResult(error="File not found")
+    """
+
+    error: str | None = None
+    path: str | None = None
+    files_update: dict[str, Any] | None = None
+    occurrences: int | None = None
+
+
+class BackendProtocol(abc.ABC):
+    """플러그형 메모리 백엔드를 위한 프로토콜 (단일 통일 인터페이스).
+
+    백엔드는 파일들을 다양한 위치(state, filesystem, database 등)에 저장할 수 있으며,
+    파일 작업에 대해 통일된 인터페이스를 제공합니다.
+
+    모든 파일 데이터는 다음 구조를 가진 딕셔너리로 표현됩니다:
+    {
+        "content": list[str], # 텍스트 내용의 라인 리스트
+        "created_at": str, # ISO 형식 타임스탬프
+        "modified_at": str, # ISO 형식 타임스탬프
+    }
+    """
+
+    def ls_info(self, path: str) -> list["FileInfo"]:
+        """디렉토리 내의 모든 파일과 메타데이터를 나열합니다.
+
+        Args:
+            path: 목록을 조회할 디렉토리의 절대 경로. '/'로 시작해야 합니다.
+
+        Returns:
+            파일 메타데이터를 포함하는 FileInfo 딕셔너리의 리스트:
+
+            - `path` (필수): 절대 파일 경로
+            - `is_dir` (선택): 디렉토리인 경우 True
+            - `size` (선택): 바이트 단위 파일 크기
+            - `modified_at` (선택): ISO 8601 타임스탬프
+        """
+
+    async def als_info(self, path: str) -> list["FileInfo"]:
+        """Async version of ls_info."""
+        return await asyncio.to_thread(self.ls_info, path)
+
+    def read(
+        self,
+        file_path: str,
+        offset: int = 0,
+        limit: int = 2000,
+    ) -> str:
+        """파일 내용을 라인 번호와 함께 읽습니다.
+
+        Args:
+            file_path: 읽을 파일의 절대 경로. '/'로 시작해야 합니다.
+            offset: 읽기 시작할 라인 번호 (0부터 시작). 기본값: 0.
+            limit: 읽을 최대 라인 수. 기본값: 2000.
+
+        Returns:
+            라인 번호가 포함된 파일 내용 문자열 (cat -n 형식), 1번 라인부터 시작합니다.
+            2000자를 초과하는 라인은 잘립니다.
+
+            파일이 존재하지 않거나 읽을 수 없는 경우 에러 문자열을 반환합니다.
+
+        !!! note
+            - 컨텍스트 오버플로우를 방지하기 위해 대용량 파일에는 페이지네이션(offset/limit)을 사용하세요.
+            - 첫 스캔: `read(path, limit=100)`으로 파일 구조 확인
+            - 추가 읽기: `read(path, offset=100, limit=200)`으로 다음 구간 읽기
+            - ALWAYS read a file before editing it (편집 전 반드시 파일 읽기)
+            - 파일이 존재하지만 비어있는 경우, 시스템 리마인더 경고를 받게 됩니다.
+        """
+
+    async def aread(
+        self,
+        file_path: str,
+        offset: int = 0,
+        limit: int = 2000,
+    ) -> str:
+        """Async version of read."""
+        return await asyncio.to_thread(self.read, file_path, offset, limit)
+
+    def grep_raw(
+        self,
+        pattern: str,
+        path: str | None = None,
+        glob: str | None = None,
+    ) -> list["GrepMatch"] | str:
+        """파일들에서 리터럴 텍스트 패턴을 검색합니다.
+
+        Args:
+            pattern: 검색할 리터럴 문자열 (정규식 아님).
+                     파일 내용 내에서 정확한 부분 문자열 매칭을 수행합니다.
+                     예: "TODO"는 "TODO"를 포함하는 모든 라인과 일치합니다.
+
+            path: 검색할 디렉토리 경로 (선택).
+                  None인 경우 현재 작업 디렉토리에서 검색합니다.
+                  예: "/workspace/src"
+
+            glob: 검색할 파일을 필터링하기 위한 선택적 glob 패턴.
+                  내용이 아닌 파일명/경로로 필터링합니다.
+                  표준 glob 와일드카드를 지원합니다:
+                  - `*`: 파일명의 모든 문자와 일치
+                  - `**`: 모든 디렉토리를 재귀적으로 일치
+                  - `?`: 단일 문자와 일치
+                  - `[abc]`: 세트 내의 한 문자와 일치
+
+        Examples:
+                  - "*.py" - Python 파일만 검색
+                  - "**/*.txt" - 모든 .txt 파일을 재귀적으로 검색
+                  - "src/**/*.js" - src/ 하위의 JS 파일 검색
+                  - "test[0-9].txt" - test0.txt, test1.txt 등을 검색
+
+        Returns:
+            성공 시: 다음을 포함하는 구조화된 결과 list[GrepMatch] 반환:
+                - path: 절대 파일 경로
+                - line: 라인 번호 (1부터 시작)
+                - text: 매치를 포함하는 전체 라인 내용
+
+            실패 시: 에러 메시지 문자열 (예: 잘못된 경로, 권한 거부)
+        """
+
+    async def agrep_raw(
+        self,
+        pattern: str,
+        path: str | None = None,
+        glob: str | None = None,
+    ) -> list["GrepMatch"] | str:
+        """Async version of grep_raw."""
+        return await asyncio.to_thread(self.grep_raw, pattern, path, glob)
+
+    def glob_info(self, pattern: str, path: str = "/") -> list["FileInfo"]:
+        """glob 패턴과 일치하는 파일을 찾습니다.
+
+        Args:
+            pattern: 파일 경로와 일치시킬 와일드카드가 포함된 Glob 패턴.
+                     표준 glob 문법을 지원합니다:
+                     - `*` 파일명/디렉토리 내의 모든 문자와 일치
+                     - `**` 모든 디렉토리를 재귀적으로 일치
+                     - `?` 단일 문자와 일치
+                     - `[abc]` 세트 내의 한 문자와 일치
+
+            path: 검색을 시작할 기본 디렉토리. 기본값: "/" (루트).
+                  패턴은 이 경로에 상대적으로 적용됩니다.
+
+        Returns:
+            list of FileInfo
+        """
+
+    async def aglob_info(self, pattern: str, path: str = "/") -> list["FileInfo"]:
+        """Async version of glob_info."""
+        return await asyncio.to_thread(self.glob_info, pattern, path)
+
+    def write(
+        self,
+        file_path: str,
+        content: str,
+    ) -> WriteResult:
+        """파일시스템 내 새 파일에 내용을 씁니다. 파일이 존재하면 에러가 발생합니다.
+
+        Args:
+            file_path: 파일이 생성될 절대 경로.
+                       '/'로 시작해야 합니다.
+            content: 파일에 쓸 문자열 내용.
+
+        Returns:
+            WriteResult
+        """
+
+    async def awrite(
+        self,
+        file_path: str,
+        content: str,
+    ) -> WriteResult:
+        """Async version of write."""
+        return await asyncio.to_thread(self.write, file_path, content)
+
+    def edit(
+        self,
+        file_path: str,
+        old_string: str,
+        new_string: str,
+        replace_all: bool = False,
+    ) -> EditResult:
+        """기존 파일에서 정확한 문자열 교체를 수행합니다.
+
+        Args:
+            file_path: 편집할 파일의 절대 경로. '/'로 시작해야 합니다.
+            old_string: 검색 및 교체할 정확한 문자열.
+                       공백과 들여쓰기를 포함하여 정확히 일치해야 합니다.
+            new_string: old_string을 대체할 문자열.
+                       old_string과 달라야 합니다.
+            replace_all: True인 경우 모든 발생을 교체합니다. False(기본값)인 경우
+                        old_string은 파일 내에서 유일해야 하며, 그렇지 않으면 편집이 실패합니다.
+
+        Returns:
+            EditResult
+        """
+
+    async def aedit(
+        self,
+        file_path: str,
+        old_string: str,
+        new_string: str,
+        replace_all: bool = False,
+    ) -> EditResult:
+        """Async version of edit."""
+        return await asyncio.to_thread(self.edit, file_path, old_string, new_string, replace_all)
+
+    def upload_files(self, files: list[tuple[str, bytes]]) -> list[FileUploadResponse]:
+        """샌드박스에 여러 파일을 업로드합니다.
+
+        이 API는 개발자가 직접 사용하거나 커스텀 도구를 통해
+        LLM에게 노출할 수 있도록 설계되었습니다.
+
+        Args:
+            files: 업로드할 (path, content) 튜플의 리스트.
+
+        Returns:
+            FileUploadResponse 객체들의 리스트. 입력 파일마다 하나씩 반환됩니다.
+            응답 순서는 입력 순서와 일치합니다 (files[i]에 대해 response[i]).
+            파일별 성공/실패 여부는 error 필드를 확인하십시오.
+
+        Examples:
+            ```python
+            responses = sandbox.upload_files([
+                ("/app/config.json", b"{...}"),
+                ("/app/data.txt", b"content"),
+            ])
+            ```
+        """
+
+    async def aupload_files(self, files: list[tuple[str, bytes]]) -> list[FileUploadResponse]:
+        """Async version of upload_files."""
+        return await asyncio.to_thread(self.upload_files, files)
+
+    def download_files(self, paths: list[str]) -> list[FileDownloadResponse]:
+        """샌드박스에서 여러 파일을 다운로드합니다.
+
+        이 API는 개발자가 직접 사용하거나 커스텀 도구를 통해
+        LLM에게 노출할 수 있도록 설계되었습니다.
+
+        Args:
+            paths: 다운로드할 파일 경로의 리스트.
+
+        Returns:
+            FileDownloadResponse 객체들의 리스트. 입력 경로마다 하나씩 반환됩니다.
+            응답 순서는 입력 순서와 일치합니다 (paths[i]에 대해 response[i]).
+            파일별 성공/실패 여부는 error 필드를 확인하십시오.
+        """
+
+    async def adownload_files(self, paths: list[str]) -> list[FileDownloadResponse]:
+        """Async version of download_files."""
+        return await asyncio.to_thread(self.download_files, paths)
+
+
+@dataclass
+class ExecuteResponse:
+    """코드 실행 결과.
+
+    LLM 소비에 최적화된 단순화된 스키마입니다.
+    """
+
+    output: str
+    """실행된 명령의 결합된 표준 출력(stdout) 및 표준 에러(stderr)."""
+
+    exit_code: int | None = None
+    """프로세스 종료 코드. 0은 성공, 0이 아닌 값은 실패를 나타냅니다."""
+
+    truncated: bool = False
+    """백엔드 제한으로 인해 출력이 잘렸는지 여부."""
+
+
+class SandboxBackendProtocol(BackendProtocol):
+    """격리된 런타임을 가진 샌드박스 백엔드를 위한 프로토콜.
+
+    샌드박스 백엔드는 격리된 환경(예: 별도 프로세스, 컨테이너)에서 실행되며
+    정의된 인터페이스를 통해 통신합니다.
+    """
+
+    def execute(
+        self,
+        command: str,
+    ) -> ExecuteResponse:
+        """프로세스에서 명령을 실행합니다.
+
+        LLM 소비에 최적화된 단순화된 인터페이스.
+
+        Args:
+            command: 실행할 전체 쉘 명령 문자열.
+
+        Returns:
+            결합된 출력, 종료 코드, 선택적 시그널, 잘림(truncation) 플래그를 포함하는 ExecuteResponse.
+        """
+
+    async def aexecute(
+        self,
+        command: str,
+    ) -> ExecuteResponse:
+        """Async version of execute."""
+        return await asyncio.to_thread(self.execute, command)
+
+    @property
+    def id(self) -> str:
+        """Unique identifier for the sandbox backend instance."""
+
+
+BackendFactory: TypeAlias = Callable[[ToolRuntime], BackendProtocol]
+BACKEND_TYPES = BackendProtocol | BackendFactory
--- a/deepagents_sourcecode/libs/deepagents/deepagents/backends/sandbox.py
+++ b/deepagents_sourcecode/libs/deepagents/deepagents/backends/sandbox.py
@@ -0,0 +1,360 @@
+"""execute()만을 추상 메서드로 가지는 기본 샌드박스 구현.
+
+이 모듈은 execute()를 통해 쉘 명령을 실행하여 모든 SandboxBackendProtocol
+메서드를 구현하는 기본 클래스를 제공합니다. 구체적인 구현체는
+오직 execute() 메서드만 구현하면 됩니다.
+"""
+
+from __future__ import annotations
+
+import base64
+import json
+import shlex
+from abc import ABC, abstractmethod
+
+from deepagents.backends.protocol import (
+    EditResult,
+    ExecuteResponse,
+    FileDownloadResponse,
+    FileInfo,
+    FileUploadResponse,
+    GrepMatch,
+    SandboxBackendProtocol,
+    WriteResult,
+)
+
+_GLOB_COMMAND_TEMPLATE = """python3 -c "
+import glob
+import os
+import json
+import base64
+
+# base64 인코딩된 파라미터 디코딩
+path = base64.b64decode('{path_b64}').decode('utf-8')
+pattern = base64.b64decode('{pattern_b64}').decode('utf-8')
+
+os.chdir(path)
+matches = sorted(glob.glob(pattern, recursive=True))
+for m in matches:
+    stat = os.stat(m)
+    result = {{
+        'path': m,
+        'size': stat.st_size,
+        'mtime': stat.st_mtime,
+        'is_dir': os.path.isdir(m)
+    }}
+    print(json.dumps(result))
+" 2>/dev/null"""
+
+_WRITE_COMMAND_TEMPLATE = """python3 -c "
+import os
+import sys
+import base64
+
+file_path = '{file_path}'
+
+# 파일이 이미 존재하는지 확인 (쓰기와 원자적)
+if os.path.exists(file_path):
+    print(f'Error: File \\'{file_path}\\' already exists', file=sys.stderr)
+    sys.exit(1)
+
+# 필요시 부모 디렉토리 생성
+parent_dir = os.path.dirname(file_path) or '.'
+os.makedirs(parent_dir, exist_ok=True)
+
+# 내용 디코딩 및 쓰기
+content = base64.b64decode('{content_b64}').decode('utf-8')
+with open(file_path, 'w') as f:
+    f.write(content)
+" 2>&1"""
+
+_EDIT_COMMAND_TEMPLATE = """python3 -c "
+import sys
+import base64
+
+# 파일 내용 읽기
+with open('{file_path}', 'r') as f:
+    text = f.read()
+
+# base64 인코딩된 문자열 디코딩
+old = base64.b64decode('{old_b64}').decode('utf-8')
+new = base64.b64decode('{new_b64}').decode('utf-8')
+
+# 발생 횟수 계산
+count = text.count(old)
+
+# 문제가 발견되면 에러 코드와 함께 종료
+if count == 0:
+    sys.exit(1)  # 문자열을 찾을 수 없음
+elif count > 1 and not {replace_all}:
+    sys.exit(2)  # replace_all 없이 여러 번 발생
+
+# 교체 수행
+if {replace_all}:
+    result = text.replace(old, new)
+else:
+    result = text.replace(old, new, 1)
+
+# 파일에 다시 쓰기
+with open('{file_path}', 'w') as f:
+    f.write(result)
+
+print(count)
+" 2>&1"""
+
+_READ_COMMAND_TEMPLATE = """python3 -c "
+import os
+import sys
+
+file_path = '{file_path}'
+offset = {offset}
+limit = {limit}
+
+# 파일이 존재하는지 확인
+if not os.path.isfile(file_path):
+    print('Error: File not found')
+    sys.exit(1)
+
+# 파일이 비어있는지 확인
+if os.path.getsize(file_path) == 0:
+    print('System reminder: File exists but has empty contents')
+    sys.exit(0)
+
+# offset과 limit으로 파일 읽기
+with open(file_path, 'r') as f:
+    lines = f.readlines()
+
+# offset과 limit 적용
+start_idx = offset
+end_idx = offset + limit
+selected_lines = lines[start_idx:end_idx]
+
+# 라인 번호로 포맷팅 (1부터 시작, offset + 1부터 시작)
+for i, line in enumerate(selected_lines):
+    line_num = offset + i + 1
+    # 포맷팅을 위해 끝의 개행 문자 제거 후 다시 추가
+    line_content = line.rstrip('\\n')
+    print(f'{{line_num:6d}}\\t{{line_content}}')
+" 2>&1"""
+
+
+class BaseSandbox(SandboxBackendProtocol, ABC):
+    """execute()를 추상 메서드로 가지는 기본 샌드박스 구현.
+
+    이 클래스는 쉘 명령을 사용하여 모든 프로토콜 메서드에 대한 기본 구현을
+    제공합니다. 하위 클래스는 오직 execute()만 구현하면 됩니다.
+    """
+
+    @abstractmethod
+    def execute(
+        self,
+        command: str,
+    ) -> ExecuteResponse:
+        """샌드박스에서 명령을 실행하고 ExecuteResponse를 반환합니다.
+
+        Args:
+            command: 실행할 전체 쉘 명령 문자열.
+
+        Returns:
+            결합된 출력, 종료 코드, 선택적 시그널, 잘림(truncation) 플래그를 포함하는 ExecuteResponse.
+        """
+        ...
+
+    def ls_info(self, path: str) -> list[FileInfo]:
+        """os.scandir을 사용하여 파일 메타데이터가 포함된 구조화된 목록을 반환합니다."""
+        cmd = f"""python3 -c "
+import os
+import json
+
+path = '{path}'
+
+try:
+    with os.scandir(path) as it:
+        for entry in it:
+            result = {{
+                'path': entry.name,
+                'is_dir': entry.is_dir(follow_symlinks=False)
+            }}
+            print(json.dumps(result))
+except FileNotFoundError:
+    pass
+except PermissionError:
+    pass
+" 2>/dev/null"""
+
+        result = self.execute(cmd)
+
+        file_infos: list[FileInfo] = []
+        for line in result.output.strip().split("\n"):
+            if not line:
+                continue
+            try:
+                data = json.loads(line)
+                file_infos.append({"path": data["path"], "is_dir": data["is_dir"]})
+            except json.JSONDecodeError:
+                continue
+
+        return file_infos
+
+    def read(
+        self,
+        file_path: str,
+        offset: int = 0,
+        limit: int = 2000,
+    ) -> str:
+        """단일 쉘 명령을 사용하여 라인 번호와 함께 파일 내용을 읽습니다."""
+        # offset과 limit으로 파일을 읽기 위해 템플릿 사용
+        cmd = _READ_COMMAND_TEMPLATE.format(file_path=file_path, offset=offset, limit=limit)
+        result = self.execute(cmd)
+
+        output = result.output.rstrip()
+        exit_code = result.exit_code
+
+        if exit_code != 0 or "Error: File not found" in output:
+            return f"Error: File '{file_path}' not found"
+
+        return output
+
+    def write(
+        self,
+        file_path: str,
+        content: str,
+    ) -> WriteResult:
+        """새 파일을 생성합니다. WriteResult를 반환하며, 실패 시 에러가 채워집니다."""
+        # 이스케이프 문제를 피하기 위해 내용을 base64로 인코딩
+        content_b64 = base64.b64encode(content.encode("utf-8")).decode("ascii")
+
+        # 단일 원자적 확인 + 쓰기 명령
+        cmd = _WRITE_COMMAND_TEMPLATE.format(file_path=file_path, content_b64=content_b64)
+        result = self.execute(cmd)
+
+        # 에러 확인 (종료 코드 또는 출력 내 에러 메시지)
+        if result.exit_code != 0 or "Error:" in result.output:
+            error_msg = result.output.strip() or f"Failed to write file '{file_path}'"
+            return WriteResult(error=error_msg)
+
+        # 외부 저장소 - files_update 필요 없음
+        return WriteResult(path=file_path, files_update=None)
+
+    def edit(
+        self,
+        file_path: str,
+        old_string: str,
+        new_string: str,
+        replace_all: bool = False,
+    ) -> EditResult:
+        """문자열 발생(occurrences)을 교체하여 파일을 편집합니다. EditResult를 반환합니다."""
+        # 이스케이프 문제를 피하기 위해 문자열을 base64로 인코딩
+        old_b64 = base64.b64encode(old_string.encode("utf-8")).decode("ascii")
+        new_b64 = base64.b64encode(new_string.encode("utf-8")).decode("ascii")
+
+        # 문자열 교체를 위해 템플릿 사용
+        cmd = _EDIT_COMMAND_TEMPLATE.format(
+            file_path=file_path, old_b64=old_b64, new_b64=new_b64, replace_all=replace_all
+        )
+        result = self.execute(cmd)
+
+        exit_code = result.exit_code
+        output = result.output.strip()
+
+        if exit_code == 1:
+            return EditResult(error=f"Error: String not found in file: '{old_string}'")
+        if exit_code == 2:
+            return EditResult(
+                error=f"Error: String '{old_string}' appears multiple times. Use replace_all=True to replace all occurrences."
+            )
+        if exit_code != 0:
+            return EditResult(error=f"Error: File '{file_path}' not found")
+
+        count = int(output)
+        # 외부 저장소 - files_update 필요 없음
+        return EditResult(path=file_path, files_update=None, occurrences=count)
+
+    def grep_raw(
+        self,
+        pattern: str,
+        path: str | None = None,
+        glob: str | None = None,
+    ) -> list[GrepMatch] | str:
+        """구조화된 검색 결과 또는 잘못된 입력에 대한 에러 문자열을 반환합니다."""
+        search_path = shlex.quote(path or ".")
+
+        # 구조화된 출력을 얻기 위해 grep 명령 생성
+        grep_opts = "-rHnF"  # 재귀적, 파일명 포함, 라인 번호 포함, 고정 문자열 (리터럴)
+
+        # 지정된 경우 glob 패턴 추가
+        glob_pattern = ""
+        if glob:
+            glob_pattern = f"--include='{glob}'"
+
+        # 쉘을 위해 패턴 이스케이프
+        pattern_escaped = shlex.quote(pattern)
+
+        cmd = f"grep {grep_opts} {glob_pattern} -e {pattern_escaped} {search_path} 2>/dev/null || true"
+        result = self.execute(cmd)
+
+        output = result.output.rstrip()
+        if not output:
+            return []
+
+        # grep 출력을 GrepMatch 객체로 파싱
+        matches: list[GrepMatch] = []
+        for line in output.split("\n"):
+            # 형식: 경로:라인번호:텍스트
+            parts = line.split(":", 2)
+            if len(parts) >= 3:
+                matches.append({
+                    "path": parts[0],
+                    "line": int(parts[1]),
+                    "text": parts[2],
+                })
+
+        return matches
+
+    def glob_info(self, pattern: str, path: str = "/") -> list[FileInfo]:
+        """FileInfo dict를 반환하는 구조화된 glob 매칭입니다."""
+        # 이스케이프 문제를 피하기 위해 패턴과 경로를 base64로 인코딩
+        pattern_b64 = base64.b64encode(pattern.encode("utf-8")).decode("ascii")
+        path_b64 = base64.b64encode(path.encode("utf-8")).decode("ascii")
+
+        cmd = _GLOB_COMMAND_TEMPLATE.format(path_b64=path_b64, pattern_b64=pattern_b64)
+        result = self.execute(cmd)
+
+        output = result.output.strip()
+        if not output:
+            return []
+
+        # JSON 출력을 FileInfo dict로 파싱
+        file_infos: list[FileInfo] = []
+        for line in output.split("\n"):
+            try:
+                data = json.loads(line)
+                file_infos.append({
+                    "path": data["path"],
+                    "is_dir": data["is_dir"],
+                })
+            except json.JSONDecodeError:
+                continue
+
+        return file_infos
+
+    @property
+    @abstractmethod
+    def id(self) -> str:
+        """샌드박스 백엔드의 고유 식별자입니다."""
+
+    @abstractmethod
+    def upload_files(self, files: list[tuple[str, bytes]]) -> list[FileUploadResponse]:
+        """샌드박스에 여러 파일을 업로드합니다.
+
+        구현체는 부분적 성공을 지원해야 합니다 - 파일별로 예외를 catch하고
+        예외를 발생시키는 대신 FileUploadResponse 객체에 에러를 반환해야 합니다.
+        """
+
+    @abstractmethod
+    def download_files(self, paths: list[str]) -> list[FileDownloadResponse]:
+        """샌드박스에서 여러 파일을 다운로드합니다.
+
+        구현체는 부분적 성공을 지원해야 합니다 - 파일별로 예외를 catch하고
+        예외를 발생시키는 대신 FileDownloadResponse 객체에 에러를 반환해야 합니다.
+        """
--- a/deepagents_sourcecode/libs/deepagents/deepagents/backends/state.py
+++ b/deepagents_sourcecode/libs/deepagents/deepagents/backends/state.py
@@ -0,0 +1,181 @@
+"""StateBackend: 파일을 LangGraph 에이전트 상태(임시)에 저장되도록 합니다."""
+
+from typing import TYPE_CHECKING
+
+from deepagents.backends.protocol import BackendProtocol, EditResult, FileInfo, GrepMatch, WriteResult
+from deepagents.backends.utils import (
+    _glob_search_files,
+    create_file_data,
+    file_data_to_string,
+    format_read_response,
+    grep_matches_from_files,
+    perform_string_replacement,
+    update_file_data,
+)
+
+if TYPE_CHECKING:
+    from langchain.tools import ToolRuntime
+
+
+class StateBackend(BackendProtocol):
+    """에이전트 상태(임시)에 파일을 저장하는 백엔드.
+
+    LangGraph의 상태 관리 및 체크포인팅을 사용합니다. 파일은 하나의 대화 스레드 내에서만 지속되며
+    스레드 간에는 공유되지 않습니다. 상태는 각 에이전트 단계 후에 자동으로 체크포인트됩니다.
+
+    특수 처리: LangGraph 상태는 (직접 변경이 아닌) Command 객체를 통해 업데이트되어야 하므로,
+    작업은 None 대신 Command 객체를 반환할 수 있습니다. 이는 uses_state=True 플래그로 표시됩니다.
+    """
+
+    def __init__(self, runtime: "ToolRuntime"):
+        """런타임으로 StateBackend를 초기화합니다."""
+        self.runtime = runtime
+
+    def ls_info(self, path: str) -> list[FileInfo]:
+        """지정된 디렉토리의 파일과 디렉토리를 나열합니다 (비재귀적).
+
+        Args:
+            path: 디렉토리의 절대 경로.
+
+        Returns:
+            디렉토리 바로 아래에 있는 파일 및 디렉토리에 대한 FileInfo 유사 dict 목록.
+            디렉토리는 경로 끝에 /가 붙으며 is_dir=True입니다.
+        """
+        files = self.runtime.state.get("files", {})
+        infos: list[FileInfo] = []
+        subdirs: set[str] = set()
+
+        # Normalize path to have trailing slash for proper prefix matching
+        normalized_path = path if path.endswith("/") else path + "/"
+
+        for k, fd in files.items():
+            # Check if file is in the specified directory or a subdirectory
+            if not k.startswith(normalized_path):
+                continue
+
+            # Get the relative path after the directory
+            relative = k[len(normalized_path) :]
+
+            # If relative path contains '/', it's in a subdirectory
+            if "/" in relative:
+                # Extract the immediate subdirectory name
+                subdir_name = relative.split("/")[0]
+                subdirs.add(normalized_path + subdir_name + "/")
+                continue
+
+            # This is a file directly in the current directory
+            size = len("\n".join(fd.get("content", [])))
+            infos.append({
+                "path": k,
+                "is_dir": False,
+                "size": int(size),
+                "modified_at": fd.get("modified_at", ""),
+            })
+
+        # Add directories to the results
+        for subdir in sorted(subdirs):
+            infos.append({
+                "path": subdir,
+                "is_dir": True,
+                "size": 0,
+                "modified_at": "",
+            })
+
+        infos.sort(key=lambda x: x.get("path", ""))
+        return infos
+
+    def read(
+        self,
+        file_path: str,
+        offset: int = 0,
+        limit: int = 2000,
+    ) -> str:
+        """파일 내용을 라인 번호와 함께 읽습니다.
+
+        Args:
+            file_path: 파일 절대 경로.
+            offset: 읽기 시작할 라인 오프셋 (0부터 시작).
+            limit: 읽을 최대 라인 수.
+
+        Returns:
+            라인 번호가 포함된 형식화된 파일 내용, 또는 에러 메시지.
+        """
+        files = self.runtime.state.get("files", {})
+        file_data = files.get(file_path)
+
+        if file_data is None:
+            return f"Error: File '{file_path}' not found"
+
+        return format_read_response(file_data, offset, limit)
+
+    def write(
+        self,
+        file_path: str,
+        content: str,
+    ) -> WriteResult:
+        """내용을 포함하는 새 파일을 생성합니다.
+        LangGraph 상태 업데이트를 위한 files_update가 포함된 WriteResult를 반환합니다.
+        """
+        files = self.runtime.state.get("files", {})
+
+        if file_path in files:
+            return WriteResult(
+                error=f"Cannot write to {file_path} because it already exists. Read and then make an edit, or write to a new path."
+            )
+
+        new_file_data = create_file_data(content)
+        return WriteResult(path=file_path, files_update={file_path: new_file_data})
+
+    def edit(
+        self,
+        file_path: str,
+        old_string: str,
+        new_string: str,
+        replace_all: bool = False,
+    ) -> EditResult:
+        """문자열 발생(occurrences)을 교체하여 파일을 편집합니다.
+        files_update와 occurrences가 포함된 EditResult를 반환합니다.
+        """
+        files = self.runtime.state.get("files", {})
+        file_data = files.get(file_path)
+
+        if file_data is None:
+            return EditResult(error=f"Error: File '{file_path}' not found")
+
+        content = file_data_to_string(file_data)
+        result = perform_string_replacement(content, old_string, new_string, replace_all)
+
+        if isinstance(result, str):
+            return EditResult(error=result)
+
+        new_content, occurrences = result
+        new_file_data = update_file_data(file_data, new_content)
+        return EditResult(path=file_path, files_update={file_path: new_file_data}, occurrences=int(occurrences))
+
+    def grep_raw(
+        self,
+        pattern: str,
+        path: str = "/",
+        glob: str | None = None,
+    ) -> list[GrepMatch] | str:
+        files = self.runtime.state.get("files", {})
+        return grep_matches_from_files(files, pattern, path, glob)
+
+    def glob_info(self, pattern: str, path: str = "/") -> list[FileInfo]:
+        """glob 패턴과 일치하는 파일에 대한 FileInfo를 가져옵니다."""
+        files = self.runtime.state.get("files", {})
+        result = _glob_search_files(files, pattern, path)
+        if result == "No files found":
+            return []
+        paths = result.split("\n")
+        infos: list[FileInfo] = []
+        for p in paths:
+            fd = files.get(p)
+            size = len("\n".join(fd.get("content", []))) if fd else 0
+            infos.append({
+                "path": p,
+                "is_dir": False,
+                "size": int(size),
+                "modified_at": fd.get("modified_at", "") if fd else "",
+            })
+        return infos
--- a/deepagents_sourcecode/libs/deepagents/deepagents/backends/store.py
+++ b/deepagents_sourcecode/libs/deepagents/deepagents/backends/store.py
@@ -0,0 +1,438 @@
+"""StoreBackend: LangGraph의 BaseStore(영구적, 스레드 간 공유)를 위한 어댑터."""
+
+from typing import Any
+
+from langgraph.config import get_config
+from langgraph.store.base import BaseStore, Item
+
+from deepagents.backends.protocol import (
+    BackendProtocol,
+    EditResult,
+    FileDownloadResponse,
+    FileInfo,
+    FileUploadResponse,
+    GrepMatch,
+    WriteResult,
+)
+from deepagents.backends.utils import (
+    _glob_search_files,
+    create_file_data,
+    file_data_to_string,
+    format_read_response,
+    grep_matches_from_files,
+    perform_string_replacement,
+    update_file_data,
+)
+
+
+class StoreBackend(BackendProtocol):
+    """파일을 LangGraph의 BaseStore(영구적)에 저장하는 백엔드.
+
+    LangGraph의 Store를 사용하여 영구적이고 대화 간 공유되는 저장소를 사용합니다.
+    파일은 네임스페이스를 통해 조직화되며 모든 스레드에서 지속됩니다.
+
+    네임스페이스는 다중 에이전트 격리를 위해 선택적 assistant_id를 포함할 수 있습니다.
+    """
+
+    def __init__(self, runtime: "ToolRuntime"):
+        """런타임으로 StoreBackend를 초기화합니다.
+
+        Args:
+            runtime: 저장소 접근 및 구성을 제공하는 ToolRuntime 인스턴스.
+        """
+        self.runtime = runtime
+
+    def _get_store(self) -> BaseStore:
+        """저장소(store) 인스턴스를 가져옵니다.
+
+        Returns:
+            런타임의 BaseStore 인스턴스.
+
+        Raises:
+            ValueError: 런타임에서 저장소를 사용할 수 없는 경우.
+        """
+        store = self.runtime.store
+        if store is None:
+            msg = "Store is required but not available in runtime"
+            raise ValueError(msg)
+        return store
+
+    def _get_namespace(self) -> tuple[str, ...]:
+        """저장소 작업을 위한 네임스페이스를 가져옵니다.
+
+        우선순위:
+        1) 존재하는 경우 `self.runtime.config` 사용 (테스트에서 명시적으로 전달).
+        2) 가능한 경우 `langgraph.config.get_config()`로 폴백(fallback).
+        3) ("filesystem",)으로 기본 설정.
+
+        config 메타데이터에 assistant_id가 있는 경우,
+        에이전트별 격리를 제공하기 위해 (assistant_id, "filesystem")을 반환합니다.
+        """
+        namespace = "filesystem"
+
+        # Prefer the runtime-provided config when present
+        runtime_cfg = getattr(self.runtime, "config", None)
+        if isinstance(runtime_cfg, dict):
+            assistant_id = runtime_cfg.get("metadata", {}).get("assistant_id")
+            if assistant_id:
+                return (assistant_id, namespace)
+            return (namespace,)
+
+        # Fallback to langgraph's context, but guard against errors when
+        # called outside of a runnable context
+        try:
+            cfg = get_config()
+        except Exception:
+            return (namespace,)
+
+        try:
+            assistant_id = cfg.get("metadata", {}).get("assistant_id")  # type: ignore[assignment]
+        except Exception:
+            assistant_id = None
+
+        if assistant_id:
+            return (assistant_id, namespace)
+        return (namespace,)
+
+    def _convert_store_item_to_file_data(self, store_item: Item) -> dict[str, Any]:
+        """저장소 Item을 FileData 형식으로 변환합니다.
+
+        Args:
+            store_item: 파일 데이터를 포함하는 저장소 Item.
+
+        Returns:
+            content, created_at, modified_at 필드를 포함하는 FileData dict.
+
+        Raises:
+            ValueError: 필수 필드가 누락되었거나 올바르지 않은 타입인 경우.
+        """
+        if "content" not in store_item.value or not isinstance(store_item.value["content"], list):
+            msg = f"Store item does not contain valid content field. Got: {store_item.value.keys()}"
+            raise ValueError(msg)
+        if "created_at" not in store_item.value or not isinstance(store_item.value["created_at"], str):
+            msg = f"Store item does not contain valid created_at field. Got: {store_item.value.keys()}"
+            raise ValueError(msg)
+        if "modified_at" not in store_item.value or not isinstance(store_item.value["modified_at"], str):
+            msg = f"Store item does not contain valid modified_at field. Got: {store_item.value.keys()}"
+            raise ValueError(msg)
+        return {
+            "content": store_item.value["content"],
+            "created_at": store_item.value["created_at"],
+            "modified_at": store_item.value["modified_at"],
+        }
+
+    def _convert_file_data_to_store_value(self, file_data: dict[str, Any]) -> dict[str, Any]:
+        """FileData를 store.put()에 적합한 dict로 변환합니다.
+
+        Args:
+            file_data: 변환할 FileData.
+
+        Returns:
+            content, created_at, modified_at 필드를 포함하는 딕셔너리.
+        """
+        return {
+            "content": file_data["content"],
+            "created_at": file_data["created_at"],
+            "modified_at": file_data["modified_at"],
+        }
+
+    def _search_store_paginated(
+        self,
+        store: BaseStore,
+        namespace: tuple[str, ...],
+        *,
+        query: str | None = None,
+        filter: dict[str, Any] | None = None,
+        page_size: int = 100,
+    ) -> list[Item]:
+        """자동 페이지네이션으로 저장소를 검색하여 모든 결과를 가져옵니다.
+
+        Args:
+            store: 검색할 저장소.
+            namespace: 검색할 계층적 경로 접두사(prefix).
+            query: 자연어 검색을 위한 선택적 쿼리.
+            filter: 결과 필터링을 위한 키-값 쌍.
+            page_size: 페이지당 가져올 아이템 수 (기본값: 100).
+
+        Returns:
+            검색 조건과 일치하는 모든 아이템 목록.
+
+        Example:
+            ```python
+            store = _get_store(runtime)
+            namespace = _get_namespace()
+            all_items = _search_store_paginated(store, namespace)
+            ```
+        """
+        all_items: list[Item] = []
+        offset = 0
+        while True:
+            page_items = store.search(
+                namespace,
+                query=query,
+                filter=filter,
+                limit=page_size,
+                offset=offset,
+            )
+            if not page_items:
+                break
+            all_items.extend(page_items)
+            if len(page_items) < page_size:
+                break
+            offset += page_size
+
+        return all_items
+
+    def ls_info(self, path: str) -> list[FileInfo]:
+        """지정된 디렉토리의 파일과 디렉토리를 나열합니다 (비재귀적).
+
+        Args:
+            path: 디렉토리의 절대 경로.
+
+        Returns:
+            디렉토리 바로 아래에 있는 파일 및 디렉토리에 대한 FileInfo 유사 dict 목록.
+            디렉토리는 경로 끝에 /가 붙으며 is_dir=True입니다.
+        """
+        store = self._get_store()
+        namespace = self._get_namespace()
+
+        # Retrieve all items and filter by path prefix locally to avoid
+        # coupling to store-specific filter semantics
+        items = self._search_store_paginated(store, namespace)
+        infos: list[FileInfo] = []
+        subdirs: set[str] = set()
+
+        # Normalize path to have trailing slash for proper prefix matching
+        normalized_path = path if path.endswith("/") else path + "/"
+
+        for item in items:
+            # Check if file is in the specified directory or a subdirectory
+            if not str(item.key).startswith(normalized_path):
+                continue
+
+            # Get the relative path after the directory
+            relative = str(item.key)[len(normalized_path) :]
+
+            # If relative path contains '/', it's in a subdirectory
+            if "/" in relative:
+                # Extract the immediate subdirectory name
+                subdir_name = relative.split("/")[0]
+                subdirs.add(normalized_path + subdir_name + "/")
+                continue
+
+            # This is a file directly in the current directory
+            try:
+                fd = self._convert_store_item_to_file_data(item)
+            except ValueError:
+                continue
+            size = len("\n".join(fd.get("content", [])))
+            infos.append({
+                "path": item.key,
+                "is_dir": False,
+                "size": int(size),
+                "modified_at": fd.get("modified_at", ""),
+            })
+
+        # Add directories to the results
+        for subdir in sorted(subdirs):
+            infos.append({
+                "path": subdir,
+                "is_dir": True,
+                "size": 0,
+                "modified_at": "",
+            })
+
+        infos.sort(key=lambda x: x.get("path", ""))
+        return infos
+
+    def read(
+        self,
+        file_path: str,
+        offset: int = 0,
+        limit: int = 2000,
+    ) -> str:
+        """파일 내용을 라인 번호와 함께 읽습니다.
+
+        Args:
+            file_path: 파일 절대 경로.
+            offset: 읽기 시작할 라인 오프셋 (0부터 시작).
+            limit: 읽을 최대 라인 수.
+
+        Returns:
+            라인 번호가 포함된 형식화된 파일 내용, 또는 에러 메시지.
+        """
+        store = self._get_store()
+        namespace = self._get_namespace()
+        item: Item | None = store.get(namespace, file_path)
+
+        if item is None:
+            return f"Error: File '{file_path}' not found"
+
+        try:
+            file_data = self._convert_store_item_to_file_data(item)
+        except ValueError as e:
+            return f"Error: {e}"
+
+        return format_read_response(file_data, offset, limit)
+
+    def write(
+        self,
+        file_path: str,
+        content: str,
+    ) -> WriteResult:
+        """내용을 포함하는 새 파일을 생성합니다.
+        WriteResult를 반환합니다. 외부 저장소는 files_update=None을 설정합니다.
+        """
+        store = self._get_store()
+        namespace = self._get_namespace()
+
+        # Check if file exists
+        existing = store.get(namespace, file_path)
+        if existing is not None:
+            return WriteResult(
+                error=f"Cannot write to {file_path} because it already exists. Read and then make an edit, or write to a new path."
+            )
+
+        # Create new file
+        file_data = create_file_data(content)
+        store_value = self._convert_file_data_to_store_value(file_data)
+        store.put(namespace, file_path, store_value)
+        return WriteResult(path=file_path, files_update=None)
+
+    def edit(
+        self,
+        file_path: str,
+        old_string: str,
+        new_string: str,
+        replace_all: bool = False,
+    ) -> EditResult:
+        """문자열 발생(occurrences)을 교체하여 파일을 편집합니다.
+        EditResult를 반환합니다. 외부 저장소는 files_update=None을 설정합니다.
+        """
+        store = self._get_store()
+        namespace = self._get_namespace()
+
+        # Get existing file
+        item = store.get(namespace, file_path)
+        if item is None:
+            return EditResult(error=f"Error: File '{file_path}' not found")
+
+        try:
+            file_data = self._convert_store_item_to_file_data(item)
+        except ValueError as e:
+            return EditResult(error=f"Error: {e}")
+
+        content = file_data_to_string(file_data)
+        result = perform_string_replacement(content, old_string, new_string, replace_all)
+
+        if isinstance(result, str):
+            return EditResult(error=result)
+
+        new_content, occurrences = result
+        new_file_data = update_file_data(file_data, new_content)
+
+        # Update file in store
+        store_value = self._convert_file_data_to_store_value(new_file_data)
+        store.put(namespace, file_path, store_value)
+        return EditResult(path=file_path, files_update=None, occurrences=int(occurrences))
+
+    # Removed legacy grep() convenience to keep lean surface
+
+    def grep_raw(
+        self,
+        pattern: str,
+        path: str = "/",
+        glob: str | None = None,
+    ) -> list[GrepMatch] | str:
+        store = self._get_store()
+        namespace = self._get_namespace()
+        items = self._search_store_paginated(store, namespace)
+        files: dict[str, Any] = {}
+        for item in items:
+            try:
+                files[item.key] = self._convert_store_item_to_file_data(item)
+            except ValueError:
+                continue
+        return grep_matches_from_files(files, pattern, path, glob)
+
+    def glob_info(self, pattern: str, path: str = "/") -> list[FileInfo]:
+        store = self._get_store()
+        namespace = self._get_namespace()
+        items = self._search_store_paginated(store, namespace)
+        files: dict[str, Any] = {}
+        for item in items:
+            try:
+                files[item.key] = self._convert_store_item_to_file_data(item)
+            except ValueError:
+                continue
+        result = _glob_search_files(files, pattern, path)
+        if result == "No files found":
+            return []
+        paths = result.split("\n")
+        infos: list[FileInfo] = []
+        for p in paths:
+            fd = files.get(p)
+            size = len("\n".join(fd.get("content", []))) if fd else 0
+            infos.append({
+                "path": p,
+                "is_dir": False,
+                "size": int(size),
+                "modified_at": fd.get("modified_at", "") if fd else "",
+            })
+        return infos
+
+    def upload_files(self, files: list[tuple[str, bytes]]) -> list[FileUploadResponse]:
+        """저장소에 여러 파일을 업로드합니다.
+
+        Args:
+            files: 내용이 bytes인 (path, content) 튜플의 리스트.
+
+        Returns:
+            FileUploadResponse 객체들의 리스트. 입력 파일마다 하나씩 반환됩니다.
+            응답 순서는 입력 순서와 일치합니다.
+        """
+        store = self._get_store()
+        namespace = self._get_namespace()
+        responses: list[FileUploadResponse] = []
+
+        for path, content in files:
+            content_str = content.decode("utf-8")
+            # Create file data
+            file_data = create_file_data(content_str)
+            store_value = self._convert_file_data_to_store_value(file_data)
+
+            # Store the file
+            store.put(namespace, path, store_value)
+            responses.append(FileUploadResponse(path=path, error=None))
+
+        return responses
+
+    def download_files(self, paths: list[str]) -> list[FileDownloadResponse]:
+        """저장소에서 여러 파일을 다운로드합니다.
+
+        Args:
+            paths: 다운로드할 파일 경로의 리스트.
+
+        Returns:
+            FileDownloadResponse 객체들의 리스트. 입력 경로마다 하나씩 반환됩니다.
+            응답 순서는 입력 순서와 일치합니다.
+        """
+        store = self._get_store()
+        namespace = self._get_namespace()
+        responses: list[FileDownloadResponse] = []
+
+        for path in paths:
+            item = store.get(namespace, path)
+
+            if item is None:
+                responses.append(FileDownloadResponse(path=path, content=None, error="file_not_found"))
+                continue
+
+            file_data = self._convert_store_item_to_file_data(item)
+            # Convert file data to bytes
+            content_str = file_data_to_string(file_data)
+            content_bytes = content_str.encode("utf-8")
+
+            responses.append(FileDownloadResponse(path=path, content=content_bytes, error=None))
+
+        return responses
--- a/deepagents_sourcecode/libs/deepagents/deepagents/backends/utils.py
+++ b/deepagents_sourcecode/libs/deepagents/deepagents/backends/utils.py
@@ -0,0 +1,436 @@
+"""메모리 백엔드 구현을 위한 공유 유틸리티 함수들.
+
+이 모듈은 백엔드와 복합 라우터(composite router)에서 사용하는
+사용자 대면 문자열 포맷터와 구조적 헬퍼 함수를 포함합니다.
+구조적 헬퍼는 깨지기 쉬운 문자열 파싱 없이 구성을 가능하게 합니다.
+"""
+
+import re
+from datetime import UTC, datetime
+from pathlib import Path
+from typing import Any, Literal
+
+import wcmatch.glob as wcglob
+
+from deepagents.backends.protocol import FileInfo as _FileInfo
+from deepagents.backends.protocol import GrepMatch as _GrepMatch
+
+EMPTY_CONTENT_WARNING = "System reminder: File exists but has empty contents"
+MAX_LINE_LENGTH = 10000
+LINE_NUMBER_WIDTH = 6
+TOOL_RESULT_TOKEN_LIMIT = 20000  # Same threshold as eviction
+TRUNCATION_GUIDANCE = "... [results truncated, try being more specific with your parameters]"
+
+# Re-export protocol types for backwards compatibility
+FileInfo = _FileInfo
+GrepMatch = _GrepMatch
+
+
+def sanitize_tool_call_id(tool_call_id: str) -> str:
+    r"""경로 탐색(path traversal) 및 구분자 문제를 방지하기 위해 tool_call_id를 정리(sanitize)합니다.
+
+    위험한 문자(., /, \)를 밑줄(_)로 교체합니다.
+    """
+    sanitized = tool_call_id.replace(".", "_").replace("/", "_").replace("\\", "_")
+    return sanitized
+
+
+def format_content_with_line_numbers(
+    content: str | list[str],
+    start_line: int = 1,
+) -> str:
+    """파일 내용을 라인 번호와 함께 포맷팅합니다 (cat -n 스타일).
+
+    MAX_LINE_LENGTH보다 긴 라인은 연속 마커(예: 5.1, 5.2)와 함께 청크로 나눕니다.
+
+    Args:
+        content: 문자열 또는 라인 리스트 형태의 파일 내용
+        start_line: 시작 라인 번호 (기본값: 1)
+
+    Returns:
+        라인 번호와 연속 마커가 포함된 포맷팅된 내용
+    """
+    if isinstance(content, str):
+        lines = content.split("\n")
+        if lines and lines[-1] == "":
+            lines = lines[:-1]
+    else:
+        lines = content
+
+    result_lines = []
+    for i, line in enumerate(lines):
+        line_num = i + start_line
+
+        if len(line) <= MAX_LINE_LENGTH:
+            result_lines.append(f"{line_num:{LINE_NUMBER_WIDTH}d}\t{line}")
+        else:
+            # Split long line into chunks with continuation markers
+            num_chunks = (len(line) + MAX_LINE_LENGTH - 1) // MAX_LINE_LENGTH
+            for chunk_idx in range(num_chunks):
+                start = chunk_idx * MAX_LINE_LENGTH
+                end = min(start + MAX_LINE_LENGTH, len(line))
+                chunk = line[start:end]
+                if chunk_idx == 0:
+                    # First chunk: use normal line number
+                    result_lines.append(f"{line_num:{LINE_NUMBER_WIDTH}d}\t{chunk}")
+                else:
+                    # Continuation chunks: use decimal notation (e.g., 5.1, 5.2)
+                    continuation_marker = f"{line_num}.{chunk_idx}"
+                    result_lines.append(f"{continuation_marker:>{LINE_NUMBER_WIDTH}}\t{chunk}")
+
+    return "\n".join(result_lines)
+
+
+def check_empty_content(content: str) -> str | None:
+    """내용이 비어 있는지 확인하고 경고 메시지를 반환합니다.
+
+    Args:
+        content: 확인할 내용
+
+    Returns:
+        비어 있는 경우 경고 메시지, 그렇지 않으면 None
+    """
+    if not content or content.strip() == "":
+        return EMPTY_CONTENT_WARNING
+    return None
+
+
+def file_data_to_string(file_data: dict[str, Any]) -> str:
+    """FileData를 일반 문자열 내용으로 변환합니다.
+
+    Args:
+        file_data: 'content' 키를 가진 FileData dict
+
+    Returns:
+        줄바꿈으로 연결된 문자열 형태의 내용
+    """
+    return "\n".join(file_data["content"])
+
+
+def create_file_data(content: str, created_at: str | None = None) -> dict[str, Any]:
+    """타임스탬프를 포함하는 FileData 객체를 생성합니다.
+
+    Args:
+        content: 문자열 형태의 파일 내용
+        created_at: 선택적 생성 타임스탬프 (ISO 형식)
+
+    Returns:
+        내용과 타임스탬프를 포함하는 FileData dict
+    """
+    lines = content.split("\n") if isinstance(content, str) else content
+    now = datetime.now(UTC).isoformat()
+
+    return {
+        "content": lines,
+        "created_at": created_at or now,
+        "modified_at": now,
+    }
+
+
+def update_file_data(file_data: dict[str, Any], content: str) -> dict[str, Any]:
+    """생성 타임스탬프를 유지하면서 새로운 내용으로 FileData를 업데이트합니다.
+
+    Args:
+        file_data: 기존 FileData dict
+        content: 문자열 형태의 새로운 내용
+
+    Returns:
+        업데이트된 FileData dict
+    """
+    lines = content.split("\n") if isinstance(content, str) else content
+    now = datetime.now(UTC).isoformat()
+
+    return {
+        "content": lines,
+        "created_at": file_data["created_at"],
+        "modified_at": now,
+    }
+
+
+def format_read_response(
+    file_data: dict[str, Any],
+    offset: int,
+    limit: int,
+) -> str:
+    """읽기 응답을 위해 파일 데이터를 라인 번호와 함께 포맷팅합니다.
+
+    Args:
+        file_data: FileData dict
+        offset: 라인 오프셋 (0부터 시작)
+        limit: 최대 라인 수
+
+    Returns:
+        포맷팅된 내용 또는 에러 메시지
+    """
+    content = file_data_to_string(file_data)
+    empty_msg = check_empty_content(content)
+    if empty_msg:
+        return empty_msg
+
+    lines = content.splitlines()
+    start_idx = offset
+    end_idx = min(start_idx + limit, len(lines))
+
+    if start_idx >= len(lines):
+        return f"Error: Line offset {offset} exceeds file length ({len(lines)} lines)"
+
+    selected_lines = lines[start_idx:end_idx]
+    return format_content_with_line_numbers(selected_lines, start_line=start_idx + 1)
+
+
+def perform_string_replacement(
+    content: str,
+    old_string: str,
+    new_string: str,
+    replace_all: bool,
+) -> tuple[str, int] | str:
+    """발생(occurrence) 검증과 함께 문자열 교체를 수행합니다.
+
+    Args:
+        content: 원본 내용
+        old_string: 교체할 문자열
+        new_string: 새로운 문자열
+        replace_all: 모든 발생을 교체할지 여부
+
+    Returns:
+        성공 시 (new_content, occurrences) 튜플, 또는 에러 메시지 문자열
+    """
+    occurrences = content.count(old_string)
+
+    if occurrences == 0:
+        return f"Error: String not found in file: '{old_string}'"
+
+    if occurrences > 1 and not replace_all:
+        return f"Error: String '{old_string}' appears {occurrences} times in file. Use replace_all=True to replace all instances, or provide a more specific string with surrounding context."
+
+    new_content = content.replace(old_string, new_string)
+    return new_content, occurrences
+
+
+def truncate_if_too_long(result: list[str] | str) -> list[str] | str:
+    """토큰 제한을 초과하는 경우 리스트 또는 문자열 결과를 잘라냅니다 (대략적 추정: 4자/토큰)."""
+    if isinstance(result, list):
+        total_chars = sum(len(item) for item in result)
+        if total_chars > TOOL_RESULT_TOKEN_LIMIT * 4:
+            return result[: len(result) * TOOL_RESULT_TOKEN_LIMIT * 4 // total_chars] + [TRUNCATION_GUIDANCE]
+        return result
+    # string
+    if len(result) > TOOL_RESULT_TOKEN_LIMIT * 4:
+        return result[: TOOL_RESULT_TOKEN_LIMIT * 4] + "\n" + TRUNCATION_GUIDANCE
+    return result
+
+
+def _validate_path(path: str | None) -> str:
+    """경로를 검증하고 정규화합니다.
+
+    Args:
+        path: 검증할 경로
+
+    Returns:
+        /로 시작하는 정규화된 경로
+
+    Raises:
+        ValueError: 경로가 유효하지 않은 경우
+    """
+    path = path or "/"
+    if not path or path.strip() == "":
+        raise ValueError("Path cannot be empty")
+
+    normalized = path if path.startswith("/") else "/" + path
+
+    if not normalized.endswith("/"):
+        normalized += "/"
+
+    return normalized
+
+
+def _glob_search_files(
+    files: dict[str, Any],
+    pattern: str,
+    path: str = "/",
+) -> str:
+    """glob 패턴과 일치하는 경로를 찾기 위해 파일 dict를 검색합니다.
+
+    Args:
+        files: 파일 경로에서 FileData로의 딕셔너리.
+        pattern: Glob 패턴 (예: "*.py", "**/*.ts").
+        path: 검색을 시작할 기본 경로.
+
+    Returns:
+        수정 시간순(최신순)으로 정렬된, 줄바꿈으로 구분된 파일 경로들.
+        일치하는 항목이 없으면 "No files found"를 반환합니다.
+
+    Example:
+        ```python
+        files = {"/src/main.py": FileData(...), "/test.py": FileData(...)}
+        _glob_search_files(files, "*.py", "/")
+        # Returns: "/test.py\n/src/main.py" (sorted by modified_at)
+        ```
+    """
+    try:
+        normalized_path = _validate_path(path)
+    except ValueError:
+        return "No files found"
+
+    filtered = {fp: fd for fp, fd in files.items() if fp.startswith(normalized_path)}
+
+    # Respect standard glob semantics:
+    # - Patterns without path separators (e.g., "*.py") match only in the current
+    #   directory (non-recursive) relative to `path`.
+    # - Use "**" explicitly for recursive matching.
+    effective_pattern = pattern
+
+    matches = []
+    for file_path, file_data in filtered.items():
+        relative = file_path[len(normalized_path) :].lstrip("/")
+        if not relative:
+            relative = file_path.split("/")[-1]
+
+        if wcglob.globmatch(relative, effective_pattern, flags=wcglob.BRACE | wcglob.GLOBSTAR):
+            matches.append((file_path, file_data["modified_at"]))
+
+    matches.sort(key=lambda x: x[1], reverse=True)
+
+    if not matches:
+        return "No files found"
+
+    return "\n".join(fp for fp, _ in matches)
+
+
+def _format_grep_results(
+    results: dict[str, list[tuple[int, str]]],
+    output_mode: Literal["files_with_matches", "content", "count"],
+) -> str:
+    """출력 모드에 따라 grep 검색 결과를 포맷팅합니다.
+
+    Args:
+        results: 파일 경로에서 (line_num, line_content) 튜플 리스트로의 딕셔너리
+        output_mode: 출력 형식 - "files_with_matches", "content", 또는 "count"
+
+    Returns:
+        포맷팅된 문자열 출력
+    """
+    if output_mode == "files_with_matches":
+        return "\n".join(sorted(results.keys()))
+    if output_mode == "count":
+        lines = []
+        for file_path in sorted(results.keys()):
+            count = len(results[file_path])
+            lines.append(f"{file_path}: {count}")
+        return "\n".join(lines)
+    lines = []
+    for file_path in sorted(results.keys()):
+        lines.append(f"{file_path}:")
+        for line_num, line in results[file_path]:
+            lines.append(f"  {line_num}: {line}")
+    return "\n".join(lines)
+
+
+def _grep_search_files(
+    files: dict[str, Any],
+    pattern: str,
+    path: str | None = None,
+    glob: str | None = None,
+    output_mode: Literal["files_with_matches", "content", "count"] = "files_with_matches",
+) -> str:
+    """정규식 패턴에 대해 파일 내용을 검색합니다.
+
+    Args:
+        files: 파일 경로에서 FileData로의 딕셔너리.
+        pattern: 검색할 정규식 패턴.
+        path: 검색을 시작할 기본 경로.
+        glob: 파일을 필터링할 선택적 glob 패턴 (예: "*.py").
+        output_mode: 출력 형식 - "files_with_matches", "content", 또는 "count".
+
+    Returns:
+        포맷팅된 검색 결과. 결과가 없으면 "No matches found"를 반환합니다.
+
+    Example:
+        ```python
+        files = {"/file.py": FileData(content=["import os", "print('hi')"], ...)}
+        _grep_search_files(files, "import", "/")
+        # Returns: "/file.py" (with output_mode="files_with_matches")
+        ```
+    """
+    try:
+        regex = re.compile(pattern)
+    except re.error as e:
+        return f"Invalid regex pattern: {e}"
+
+    try:
+        normalized_path = _validate_path(path)
+    except ValueError:
+        return "No matches found"
+
+    filtered = {fp: fd for fp, fd in files.items() if fp.startswith(normalized_path)}
+
+    if glob:
+        filtered = {fp: fd for fp, fd in filtered.items() if wcglob.globmatch(Path(fp).name, glob, flags=wcglob.BRACE)}
+
+    results: dict[str, list[tuple[int, str]]] = {}
+    for file_path, file_data in filtered.items():
+        for line_num, line in enumerate(file_data["content"], 1):
+            if regex.search(line):
+                if file_path not in results:
+                    results[file_path] = []
+                results[file_path].append((line_num, line))
+
+    if not results:
+        return "No matches found"
+    return _format_grep_results(results, output_mode)
+
+
+# -------- Structured helpers for composition --------
+
+
+def grep_matches_from_files(
+    files: dict[str, Any],
+    pattern: str,
+    path: str | None = None,
+    glob: str | None = None,
+) -> list[GrepMatch] | str:
+    """인메모리 파일 매핑에서 구조화된 grep 일치 항목을 반환합니다.
+
+    성공 시 GrepMatch 리스트를 반환하며, 잘못된 입력(예: 잘못된 정규식)의 경우 문자열을 반환합니다.
+    도구 컨텍스트에서 백엔드가 예외를 발생시키지 않고 사용자 대면 에러 메시지를 보존하기 위해,
+    여기서는 의도적으로 예외를 발생시키지 않습니다.
+    """
+    try:
+        regex = re.compile(pattern)
+    except re.error as e:
+        return f"Invalid regex pattern: {e}"
+
+    try:
+        normalized_path = _validate_path(path)
+    except ValueError:
+        return []
+
+    filtered = {fp: fd for fp, fd in files.items() if fp.startswith(normalized_path)}
+
+    if glob:
+        filtered = {fp: fd for fp, fd in filtered.items() if wcglob.globmatch(Path(fp).name, glob, flags=wcglob.BRACE)}
+
+    matches: list[GrepMatch] = []
+    for file_path, file_data in filtered.items():
+        for line_num, line in enumerate(file_data["content"], 1):
+            if regex.search(line):
+                matches.append({"path": file_path, "line": int(line_num), "text": line})
+    return matches
+
+
+def build_grep_results_dict(matches: list[GrepMatch]) -> dict[str, list[tuple[int, str]]]:
+    """구조화된 일치 항목을 포맷터가 사용하는 레거시 dict 형태로 그룹화합니다."""
+    grouped: dict[str, list[tuple[int, str]]] = {}
+    for m in matches:
+        grouped.setdefault(m["path"], []).append((m["line"], m["text"]))
+    return grouped
+
+
+def format_grep_matches(
+    matches: list[GrepMatch],
+    output_mode: Literal["files_with_matches", "content", "count"],
+) -> str:
+    """기존 포맷팅 로직을 사용하여 구조화된 grep 일치 항목을 포맷팅합니다."""
+    if not matches:
+        return "No matches found"
+    return _format_grep_results(build_grep_results_dict(matches), output_mode)
--- a/deepagents_sourcecode/libs/deepagents/deepagents/graph.py
+++ b/deepagents_sourcecode/libs/deepagents/deepagents/graph.py
@@ -0,0 +1,158 @@
+"""Deepagents는 계획(planning), 파일시스템(filesystem), 하위 에이전트(subagents) 기능을 포함합니다."""
+
+from collections.abc import Callable, Sequence
+from typing import Any
+
+from langchain.agents import create_agent
+from langchain.agents.middleware import HumanInTheLoopMiddleware, InterruptOnConfig, TodoListMiddleware
+from langchain.agents.middleware.summarization import SummarizationMiddleware
+from langchain.agents.middleware.types import AgentMiddleware
+from langchain.agents.structured_output import ResponseFormat
+from langchain.chat_models import init_chat_model
+from langchain_anthropic import ChatAnthropic
+from langchain_anthropic.middleware import AnthropicPromptCachingMiddleware
+from langchain_core.language_models import BaseChatModel
+from langchain_core.tools import BaseTool
+from langgraph.cache.base import BaseCache
+from langgraph.graph.state import CompiledStateGraph
+from langgraph.store.base import BaseStore
+from langgraph.types import Checkpointer
+
+from deepagents.backends.protocol import BackendFactory, BackendProtocol
+from deepagents.middleware.filesystem import FilesystemMiddleware
+from deepagents.middleware.patch_tool_calls import PatchToolCallsMiddleware
+from deepagents.middleware.subagents import CompiledSubAgent, SubAgent, SubAgentMiddleware
+
+BASE_AGENT_PROMPT = "In order to complete the objective that the user asks of you, you have access to a number of standard tools."
+
+
+def get_default_model() -> ChatAnthropic:
+    """Deep Agent를 위한 기본 모델을 반환합니다.
+
+    Returns:
+        Claude Sonnet 4로 구성된 ChatAnthropic 인스턴스.
+    """
+    return ChatAnthropic(
+        model_name="claude-sonnet-4-5-20250929",
+        max_tokens=20000,
+    )
+
+
+def create_deep_agent(
+    model: str | BaseChatModel | None = None,
+    tools: Sequence[BaseTool | Callable | dict[str, Any]] | None = None,
+    *,
+    system_prompt: str | None = None,
+    middleware: Sequence[AgentMiddleware] = (),
+    subagents: list[SubAgent | CompiledSubAgent] | None = None,
+    response_format: ResponseFormat | None = None,
+    context_schema: type[Any] | None = None,
+    checkpointer: Checkpointer | None = None,
+    store: BaseStore | None = None,
+    backend: BackendProtocol | BackendFactory | None = None,
+    interrupt_on: dict[str, bool | InterruptOnConfig] | None = None,
+    debug: bool = False,
+    name: str | None = None,
+    cache: BaseCache | None = None,
+) -> CompiledStateGraph:
+    """Deep Agent를 생성합니다.
+
+    이 에이전트는 기본적으로 할 일 목록 작성 도구(write_todos), 7가지 파일 및 실행 도구
+    (ls, read_file, write_file, edit_file, glob, grep, execute), 그리고 하위 에이전트 호출 도구를 가집니다.
+
+    execute 도구는 백엔드가 SandboxBackendProtocol을 구현한 경우 쉘 명령을 실행할 수 있습니다.
+    샌드박스 백엔드가 아닌 경우 execute 도구는 오류 메시지를 반환합니다.
+
+    Args:
+        model: 사용할 모델. 기본값은 Claude Sonnet 4입니다.
+        tools: 에이전트가 접근할 수 있는 도구들입니다.
+        system_prompt: 에이전트에게 제공할 추가 지침입니다. 시스템 프롬프트에 포함됩니다.
+        middleware: 표준 미들웨어 이후에 적용할 추가 미들웨어입니다.
+        subagents: 사용할 하위 에이전트 목록입니다. 각 하위 에이전트는 다음 키를 가진 딕셔너리여야 합니다:
+                - `name`
+                - `description` (메인 에이전트가 하위 에이전트 호출 여부를 결정할 때 사용)
+                - `prompt` (하위 에이전트의 시스템 프롬프트로 사용)
+                - (선택사항) `tools`
+                - (선택사항) `model` (LanguageModelLike 인스턴스 또는 dict 설정)
+                - (선택사항) `middleware` (List[AgentMiddleware])
+        response_format: 에이전트에 사용할 구조화된 출력 응답 형식입니다.
+        context_schema: Deep Agent의 스키마입니다.
+        checkpointer: 실행 간 에이전트 상태를 유지하기 위한 선택적 체크포인터입니다.
+        store: 영구 저장을 위한 선택적 저장소 (백엔드가 StoreBackend를 사용하는 경우 필수).
+        backend: 파일 저장 및 실행을 위한 선택적 백엔드. Backend 인스턴스 또는
+            `lambda rt: StateBackend(rt)`와 같은 호출 가능한 팩토리를 전달합니다. 실행 지원을 위해서는
+            SandboxBackendProtocol을 구현하는 백엔드를 사용하십시오.
+        interrupt_on: 도구 이름을 인터럽트 설정에 매핑하는 선택적 Dict[str, bool | InterruptOnConfig]입니다.
+        debug: 디버그 모드 활성화 여부. create_agent로 전달됩니다.
+        name: 에이전트의 이름. create_agent로 전달됩니다.
+        cache: 에이전트에 사용할 캐시. create_agent로 전달됩니다.
+
+    Returns:
+        구성된 Deep Agent.
+    """
+    if model is None:
+        model = get_default_model()
+    elif isinstance(model, str):
+        model = init_chat_model(model)
+
+    if (
+        model.profile is not None
+        and isinstance(model.profile, dict)
+        and "max_input_tokens" in model.profile
+        and isinstance(model.profile["max_input_tokens"], int)
+    ):
+        trigger = ("fraction", 0.85)
+        keep = ("fraction", 0.10)
+    else:
+        trigger = ("tokens", 170000)
+        keep = ("messages", 6)
+
+    deepagent_middleware = [
+        TodoListMiddleware(),
+        FilesystemMiddleware(backend=backend),
+        SubAgentMiddleware(
+            default_model=model,
+            default_tools=tools,
+            subagents=subagents if subagents is not None else [],
+            default_middleware=[
+                TodoListMiddleware(),
+                FilesystemMiddleware(backend=backend),
+                SummarizationMiddleware(
+                    model=model,
+                    trigger=trigger,
+                    keep=keep,
+                    trim_tokens_to_summarize=None,
+                ),
+                AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"),
+                PatchToolCallsMiddleware(),
+            ],
+            default_interrupt_on=interrupt_on,
+            general_purpose_agent=True,
+        ),
+        SummarizationMiddleware(
+            model=model,
+            trigger=trigger,
+            keep=keep,
+            trim_tokens_to_summarize=None,
+        ),
+        AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"),
+        PatchToolCallsMiddleware(),
+    ]
+    if middleware:
+        deepagent_middleware.extend(middleware)
+    if interrupt_on is not None:
+        deepagent_middleware.append(HumanInTheLoopMiddleware(interrupt_on=interrupt_on))
+
+    return create_agent(
+        model,
+        system_prompt=system_prompt + "\n\n" + BASE_AGENT_PROMPT if system_prompt else BASE_AGENT_PROMPT,
+        tools=tools,
+        middleware=deepagent_middleware,
+        response_format=response_format,
+        context_schema=context_schema,
+        checkpointer=checkpointer,
+        store=store,
+        debug=debug,
+        name=name,
+        cache=cache,
+    ).with_config({"recursion_limit": 1000})
--- a/deepagents_sourcecode/libs/deepagents/deepagents/middleware/init.py
+++ b/deepagents_sourcecode/libs/deepagents/deepagents/middleware/init.py
@@ -0,0 +1,11 @@
+"""Middleware for the DeepAgent."""
+
+from deepagents.middleware.filesystem import FilesystemMiddleware
+from deepagents.middleware.subagents import CompiledSubAgent, SubAgent, SubAgentMiddleware
+
+__all__ = [
+    "CompiledSubAgent",
+    "FilesystemMiddleware",
+    "SubAgent",
+    "SubAgentMiddleware",
+]
--- a/deepagents_sourcecode/libs/deepagents/deepagents/middleware/filesystem.py
+++ b/deepagents_sourcecode/libs/deepagents/deepagents/middleware/filesystem.py
--- a/deepagents_sourcecode/libs/deepagents/deepagents/middleware/patch_tool_calls.py
+++ b/deepagents_sourcecode/libs/deepagents/deepagents/middleware/patch_tool_calls.py
@@ -0,0 +1,44 @@
+"""Middleware to patch dangling tool calls in the messages history."""
+
+from typing import Any
+
+from langchain.agents.middleware import AgentMiddleware, AgentState
+from langchain_core.messages import ToolMessage
+from langgraph.runtime import Runtime
+from langgraph.types import Overwrite
+
+
+class PatchToolCallsMiddleware(AgentMiddleware):
+    """메시지 기록에서 댕글링(dangling) 도구 호출을 패치하는 미들웨어."""
+
+    def before_agent(self, state: AgentState, runtime: Runtime[Any]) -> dict[str, Any] | None:  # noqa: ARG002
+        """에이전트가 실행되기 전에 AIMessage의 댕글링 도구 호출을 처리합니다."""
+        messages = state["messages"]
+        if not messages or len(messages) == 0:
+            return None
+
+        patched_messages = []
+        # Iterate over the messages and add any dangling tool calls
+        for i, msg in enumerate(messages):
+            patched_messages.append(msg)
+            if msg.type == "ai" and msg.tool_calls:
+                for tool_call in msg.tool_calls:
+                    corresponding_tool_msg = next(
+                        (msg for msg in messages[i:] if msg.type == "tool" and msg.tool_call_id == tool_call["id"]),
+                        None,
+                    )
+                    if corresponding_tool_msg is None:
+                        # We have a dangling tool call which needs a ToolMessage
+                        tool_msg = (
+                            f"도구 호출 {tool_call['name']} (ID: {tool_call['id']})이 취소되었습니다 - "
+                            "완료되기 전에 다른 메시지가 도착했습니다."
+                        )
+                        patched_messages.append(
+                            ToolMessage(
+                                content=tool_msg,
+                                name=tool_call["name"],
+                                tool_call_id=tool_call["id"],
+                            )
+                        )
+
+        return {"messages": Overwrite(patched_messages)}
--- a/deepagents_sourcecode/libs/deepagents/deepagents/middleware/subagents.py
+++ b/deepagents_sourcecode/libs/deepagents/deepagents/middleware/subagents.py
@@ -0,0 +1,498 @@
+"""Middleware for providing subagents to an agent via a `task` tool."""
+
+from collections.abc import Awaitable, Callable, Sequence
+from typing import Any, NotRequired, TypedDict, cast
+
+from langchain.agents import create_agent
+from langchain.agents.middleware import HumanInTheLoopMiddleware, InterruptOnConfig
+from langchain.agents.middleware.types import AgentMiddleware, ModelRequest, ModelResponse
+from langchain.tools import BaseTool, ToolRuntime
+from langchain_core.language_models import BaseChatModel
+from langchain_core.messages import HumanMessage, ToolMessage
+from langchain_core.runnables import Runnable
+from langchain_core.tools import StructuredTool
+from langgraph.types import Command
+
+
+class SubAgent(TypedDict):
+    """에이전트에 대한 사양(Specification)입니다.
+
+    사용자 정의 에이전트를 지정할 때, `SubAgentMiddleware`의 `default_middleware`가
+    먼저 적용되고, 그 다음에 이 사양에 지정된 `middleware`가 적용됩니다.
+    기본값을 제외하고 사용자 정의 미들웨어만 사용하려면, `SubAgentMiddleware`에
+    `default_middleware=[]`를 전달하십시오.
+    """
+
+    name: str
+    """에이전트의 이름."""
+
+    description: str
+    """에이전트의 설명."""
+
+    system_prompt: str
+    """에이전트에 사용할 시스템 프롬프트."""
+
+    tools: Sequence[BaseTool | Callable | dict[str, Any]]
+    """에이전트에 사용할 도구들."""
+
+    model: NotRequired[str | BaseChatModel]
+    """에이전트의 모델. 기본값은 `default_model`입니다."""
+
+    middleware: NotRequired[list[AgentMiddleware]]
+    """`default_middleware` 뒤에 추가할 추가 미들웨어."""
+
+    interrupt_on: NotRequired[dict[str, bool | InterruptOnConfig]]
+    """에이전트에 사용할 도구 설정."""
+
+
+class CompiledSubAgent(TypedDict):
+    """미리 컴파일된 에이전트 사양."""
+
+    name: str
+    """에이전트의 이름."""
+
+    description: str
+    """에이전트의 설명."""
+
+    runnable: Runnable
+    """에이전트에 사용할 Runnable."""
+
+
+DEFAULT_SUBAGENT_PROMPT = "사용자가 요청하는 목표를 완료하기 위해, 당신은 여러 표준 도구에 접근할 수 있습니다."
+
+# State keys that are excluded when passing state to subagents and when returning
+# updates from subagents.
+# When returning updates:
+# 1. The messages key is handled explicitly to ensure only the final message is included
+# 2. The todos and structured_response keys are excluded as they do not have a defined reducer
+#    and no clear meaning for returning them from a subagent to the main agent.
+_EXCLUDED_STATE_KEYS = {"messages", "todos", "structured_response"}
+
+TASK_TOOL_DESCRIPTION = """격리된 컨텍스트 창(isolated context windows)을 가진 복잡하고 다단계적인 독립 작업을 처리하기 위해 일회성(ephemeral) 서브 에이전트를 실행합니다.
+
+사용 가능한 에이전트 유형과 그들이 접근할 수 있는 도구:
+{available_agents}
+
+Task 도구를 사용할 때는 subagent_type 매개변수를 지정하여 사용할 에이전트 유형을 선택해야 합니다.
+
+## 사용 참고 사항:
+1. 성능을 극대화하기 위해 가능한 경우 여러 에이전트를 동시에(concurrently) 실행하십시오. 이를 위해 다중 도구 사용(multiple tool uses)이 포함된 단일 메시지를 사용하십시오.
+2. 에이전트가 완료되면 단일 메시지를 반환합니다. 에이전트가 반환한 결과는 사용자에게 보이지 않습니다. 사용자에게 결과를 보여주려면 결과에 대한 간결한 요약이 담긴 텍스트 메시지를 사용자에게 보내야 합니다.
+3. 각 에이전트 호출은 상태비저장(stateless)입니다. 서브 에이전트에게 추가 메시지를 보낼 수 없으며, 서브 에이전트도 최종 보고서 이외에는 당신과 통신할 수 없습니다. 따라서 프롬프트에는 에이전트가 자율적으로 수행해야 할 작업에 대한 매우 자세한 설명이 포함되어야 하며, 에이전트가 최종적이고 유일한 메시지로 어떤 정보를 반환해야 하는지 정확히 지정해야 합니다.
+4. 에이전트의 출력은 일반적으로 신뢰할 수 있어야 합니다.
+5. 에이전트는 사용자의 의도를 알지 못하므로 콘텐츠 생성, 분석 수행, 또는 단순 연구(검색, 파일 읽기, 웹 가져오기 등) 중 무엇을 수행해야 하는지 명확하게 알려주십시오.
+6. 에이전트 설명에 선제적으로(proactively) 사용해야 한다고 언급되어 있다면, 사용자가 먼저 요청하지 않아도 최선을 다해 사용해 보십시오. 판단력을 발휘하십시오.
+7. 범용(general-purpose) 에이전트만 제공되는 경우 모든 작업에 해당 에이전트를 사용해야 합니다. 메인 에이전트와 동일한 모든 기능을 갖추고 있으므로, 컨텍스트와 토큰 사용을 격리하고 특정하고 복잡한 작업을 완료하는 데 매우 적합합니다.
+
+### 범용 에이전트 사용 예시:
+
+<example_agent_descriptions>
+"general-purpose": use this agent for general purpose tasks, it has access to all tools as the main agent.
+</example_agent_descriptions>
+
+<example>
+User: "I want to conduct research on the accomplishments of Lebron James, Michael Jordan, and Kobe Bryant, and then compare them."
+Assistant: *Uses the task tool in parallel to conduct isolated research on each of the three players*
+Assistant: *Synthesizes the results of the three isolated research tasks and responds to the User*
+<commentary>
+연구는 그 자체로 복잡하고 다단계적인 작업입니다.
+각 개별 선수의 연구는 다른 선수의 연구에 의존하지 않습니다.
+어시스턴트는 task 도구를 사용하여 복잡한 목표를 세 가지 독립적인 작업으로 나눕니다.
+각 연구 작업은 한 선수에 대한 컨텍스트와 토큰만 신경 쓰면 되며, 도구 결과로 각 선수에 대한 종합된 정보를 반환합니다.
+이는 각 연구 작업이 각 선수를 깊이 있게 연구하는 데 토큰과 컨텍스트를 사용할 수 있음을 의미하며, 최종 결과는 종합된 정보이므로 선수들을 서로 비교할 때 장기적으로 토큰을 절약할 수 있습니다.
+</commentary>
+</example>
+
+<example>
+User: "Analyze a single large code repository for security vulnerabilities and generate a report."
+Assistant: *Launches a single `task` subagent for the repository analysis*
+Assistant: *Receives report and integrates results into final summary*
+<commentary>
+서브 에이전트는 단 하나라도 크고 컨텍스트가 많은 작업을 격리하는 데 사용됩니다. 이는 메인 스레드가 세부 사항으로 과부하되는 것을 방지합니다.
+사용자가 후속 질문을 하면 분석 및 도구 호출의 전체 기록 대신 참조할 간결한 보고서가 있으므로 시간과 비용을 절약할 수 있습니다.
+</commentary>
+</example>
+
+<example>
+User: "Schedule two meetings for me and prepare agendas for each."
+Assistant: *Calls the task tool in parallel to launch two `task` subagents (one per meeting) to prepare agendas*
+Assistant: *Returns final schedules and agendas*
+<commentary>
+작업은 개별적으로는 간단하지만, 서브 에이전트는 의제 준비를 격리하는 데 도움이 됩니다.
+각 서브 에이전트는 한 회의의 의제만 신경 쓰면 됩니다.
+</commentary>
+</example>
+
+<example>
+User: "I want to order a pizza from Dominos, order a burger from McDonald's, and order a salad from Subway."
+Assistant: *Calls tools directly in parallel to order a pizza from Dominos, a burger from McDonald's, and a salad from Subway*
+<commentary>
+목표가 매우 간단하고 명확하며 몇 가지 사소한 도구 호출만 필요하므로 어시스턴트는 task 도구를 사용하지 않았습니다.
+작업을 직접 완료하고 `task` 도구를 사용하지 않는 것이 더 좋습니다.
+</commentary>
+</example>
+
+### Example usage with custom agents:
+
+<example_agent_descriptions>
+"content-reviewer": use this agent after you are done creating significant content or documents
+"greeting-responder": use this agent when to respond to user greetings with a friendly joke
+"research-analyst": use this agent to conduct thorough research on complex topics
+</example_agent_description>
+
+<example>
+user: "Please write a function that checks if a number is prime"
+assistant: Sure let me write a function that checks if a number is prime
+assistant: First let me use the Write tool to write a function that checks if a number is prime
+assistant: I'm going to use the Write tool to write the following code:
+<code>
+function isPrime(n) {{
+  if (n <= 1) return false
+  for (let i = 2; i * i <= n; i++) {{
+    if (n % i === 0) return false
+  }}
+  return true
+}}
+</code>
+<commentary>
+상당한 콘텐츠가 생성되었고 작업이 완료되었으므로, 이제 content-reviewer 에이전트를 사용하여 작업을 검토합니다.
+</commentary>
+assistant: Now let me use the content-reviewer agent to review the code
+assistant: Uses the Task tool to launch with the content-reviewer agent
+</example>
+
+<example>
+user: "Can you help me research the environmental impact of different renewable energy sources and create a comprehensive report?"
+<commentary>
+이것은 철저한 분석을 수행하기 위해 research-analyst 에이전트를 사용하는 것이 도움이 되는 복잡한 연구 작업입니다.
+</commentary>
+assistant: I'll help you research the environmental impact of renewable energy sources. Let me use the research-analyst agent to conduct comprehensive research on this topic.
+assistant: Uses the Task tool to launch with the research-analyst agent, providing detailed instructions about what research to conduct and what format the report should take
+</example>
+
+<example>
+user: "Hello"
+<commentary>
+사용자가 인사를 하고 있으므로, greeting-responder 에이전트를 사용하여 친절한 농담으로 응답하십시오.
+</commentary>
+assistant: "I'm going to use the Task tool to launch with the greeting-responder agent"
+</example>"""  # noqa: E501
+
+TASK_SYSTEM_PROMPT = """## `task` (서브 에이전트 스포너(spawner))
+
+당신은 격리된 작업을 처리하는 일회성 서브 에이전트를 실행하기 위한 `task` 도구에 접근할 수 있습니다. 이 에이전트들은 일회적(ephemeral)입니다 — 작업 기간 동안에만 존재하며 단일 결과를 반환합니다.
+
+task 도구를 사용해야 하는 경우:
+- 작업이 복잡하고 다단계적이며 완전히 격리하여 위임할 수 있는 경우
+- 작업이 다른 작업과 독립적이며 병렬로 실행할 수 있는 경우
+- 작업에 집중적인 추론이나 많은 토큰/컨텍스트 사용이 필요하여 오케스트레이터 스레드를 부풀릴(bloat) 수 있는 경우
+- 샌드박싱이 신뢰성을 향상시키는 경우 (예: 코드 실행, 구조화된 검색, 데이터 포맷팅)
+- 서브 에이전트의 중간 단계가 아니라 출력에만 관심이 있는 경우 (예: 많은 연구를 수행한 후 종합된 보고서를 반환하거나, 간결하고 관련성 있는 답변을 얻기 위해 일련의 계산 또는 조회를 수행하는 경우)
+
+서브 에이전트 생명주기:
+1. **생성(Spawn)** → 명확한 역할, 지침 및 예상 출력 제공
+2. **실행(Run)** → 서브 에이전트가 자율적으로 작업 완료
+3. **반환(Return)** → 서브 에이전트가 단일 구조화된 결과를 제공
+4. **조정(Reconcile)** → 결과를 메인 스레드에 통합하거나 합성
+
+task 도구를 사용하지 말아야 하는 경우:
+- 서브 에이전트가 완료된 후 중간 추론이나 단계를 확인해야 하는 경우 (task 도구는 이를 숨깁니다)
+- 작업이 사소한 경우 (몇 번의 도구 호출 또는 간단한 조회)
+- 위임이 토큰 사용량, 복잡성 또는 컨텍스트 전환을 줄이지 않는 경우
+- 분할이 이점 없이 지연 시간만 추가하는 경우
+
+## 기억해야 할 중요한 Task 도구 사용 참고 사항
+- 가능하면 수행하는 작업을 병렬화하십시오. 이는 도구 호출(tool_calls)과 작업(tasks) 모두에 해당합니다. 완료해야 할 독립적인 단계가 있을 때마다 - 도구 호출을 하거나 작업을 병렬로 시작(kick off)하여 더 빠르게 완료하십시오. 이는 사용자에게 매우 중요한 시간을 절약해 줍니다.
+- 다중 파트 목표 내에서 독립적인 작업을 격리(silo)하려면 `task` 도구를 사용하는 것을 기억하십시오.
+- 여러 단계가 걸리고 에이전트가 완료해야 하는 다른 작업과 독립적인 복잡한 작업이 있을 때마다 `task` 도구를 사용해야 합니다. 이 에이전트들은 매우 유능하고 효율적입니다."""  # noqa: E501
+
+
+DEFAULT_GENERAL_PURPOSE_DESCRIPTION = "복잡한 질문 연구, 파일 및 콘텐츠 검색, 다중 단계 작업 실행을 위한 범용 에이전트입니다. 키워드나 파일을 검색할 때 처음 몇 번의 시도로 올바른 일치 항목을 찾을 수 있을지 확신이 서지 않는다면, 이 에이전트를 사용하여 검색을 수행하십시오. 이 에이전트는 메인 에이전트와 동일한 모든 도구에 접근할 수 있습니다."  # noqa: E501
+
+
+def _get_subagents(
+    *,
+    default_model: str | BaseChatModel,
+    default_tools: Sequence[BaseTool | Callable | dict[str, Any]],
+    default_middleware: list[AgentMiddleware] | None,
+    default_interrupt_on: dict[str, bool | InterruptOnConfig] | None,
+    subagents: list[SubAgent | CompiledSubAgent],
+    general_purpose_agent: bool,
+) -> tuple[dict[str, Any], list[str]]:
+    """사양(specifications)에서 서브 에이전트 인스턴스를 생성합니다.
+
+    Args:
+        default_model: 지정하지 않은 서브 에이전트를 위한 기본 모델.
+        default_tools: 지정하지 않은 서브 에이전트를 위한 기본 도구.
+        default_middleware: 모든 서브 에이전트에 적용할 미들웨어. `None`인 경우 기본 미들웨어가 적용되지 않습니다.
+        default_interrupt_on: 기본 범용 서브 에이전트에 사용할 도구 설정입니다.
+            이는 자체 도구 설정을 지정하지 않은 서브 에이전트에 대한 폴백(fallback)이기도 합니다.
+        subagents: 에이전트 사양 또는 미리 컴파일된 에이전트 목록.
+        general_purpose_agent: 범용 서브 에이전트 포함 여부.
+
+    Returns:
+        (agent_dict, description_list) 튜플. agent_dict는 에이전트 이름을 runnable 인스턴스에 매핑하고,
+        description_list는 포맷된 설명을 포함합니다.
+    """
+    # Use empty list if None (no default middleware)
+    default_subagent_middleware = default_middleware or []
+
+    agents: dict[str, Any] = {}
+    subagent_descriptions = []
+
+    # Create general-purpose agent if enabled
+    if general_purpose_agent:
+        general_purpose_middleware = [*default_subagent_middleware]
+        if default_interrupt_on:
+            general_purpose_middleware.append(HumanInTheLoopMiddleware(interrupt_on=default_interrupt_on))
+        general_purpose_subagent = create_agent(
+            default_model,
+            system_prompt=DEFAULT_SUBAGENT_PROMPT,
+            tools=default_tools,
+            middleware=general_purpose_middleware,
+        )
+        agents["general-purpose"] = general_purpose_subagent
+        subagent_descriptions.append(f"- general-purpose: {DEFAULT_GENERAL_PURPOSE_DESCRIPTION}")
+
+    # Process custom subagents
+    for agent_ in subagents:
+        subagent_descriptions.append(f"- {agent_['name']}: {agent_['description']}")
+        if "runnable" in agent_:
+            custom_agent = cast("CompiledSubAgent", agent_)
+            agents[custom_agent["name"]] = custom_agent["runnable"]
+            continue
+        _tools = agent_.get("tools", list(default_tools))
+
+        subagent_model = agent_.get("model", default_model)
+
+        _middleware = (
+            [*default_subagent_middleware, *agent_["middleware"]]
+            if "middleware" in agent_
+            else [*default_subagent_middleware]
+        )
+
+        interrupt_on = agent_.get("interrupt_on", default_interrupt_on)
+        if interrupt_on:
+            _middleware.append(HumanInTheLoopMiddleware(interrupt_on=interrupt_on))
+
+        agents[agent_["name"]] = create_agent(
+            subagent_model,
+            system_prompt=agent_["system_prompt"],
+            tools=_tools,
+            middleware=_middleware,
+        )
+    return agents, subagent_descriptions
+
+
+def _create_task_tool(
+    *,
+    default_model: str | BaseChatModel,
+    default_tools: Sequence[BaseTool | Callable | dict[str, Any]],
+    default_middleware: list[AgentMiddleware] | None,
+    default_interrupt_on: dict[str, bool | InterruptOnConfig] | None,
+    subagents: list[SubAgent | CompiledSubAgent],
+    general_purpose_agent: bool,
+    task_description: str | None = None,
+) -> BaseTool:
+    """서브 에이전트를 호출하기 위한 task 도구를 생성합니다.
+
+    Args:
+        default_model: 서브 에이전트용 기본 모델.
+        default_tools: 서브 에이전트용 기본 도구.
+        default_middleware: 모든 서브 에이전트에 적용할 미들웨어.
+        default_interrupt_on: 기본 범용 서브 에이전트에 사용할 도구 설정입니다.
+            이는 자체 도구 설정을 지정하지 않은 서브 에이전트에 대한 폴백이기도 합니다.
+        subagents: 서브 에이전트 사양 목록.
+        general_purpose_agent: 범용 에이전트 포함 여부.
+        task_description: task 도구에 대한 사용자 정의 설명. `None`인 경우
+            기본 템플릿을 사용합니다. `{available_agents}` 플레이스홀더를 지원합니다.
+
+    Returns:
+        유형별로 서브 에이전트를 호출할 수 있는 StructuredTool.
+    """
+    subagent_graphs, subagent_descriptions = _get_subagents(
+        default_model=default_model,
+        default_tools=default_tools,
+        default_middleware=default_middleware,
+        default_interrupt_on=default_interrupt_on,
+        subagents=subagents,
+        general_purpose_agent=general_purpose_agent,
+    )
+    subagent_description_str = "\n".join(subagent_descriptions)
+
+    def _return_command_with_state_update(result: dict, tool_call_id: str) -> Command:
+        state_update = {k: v for k, v in result.items() if k not in _EXCLUDED_STATE_KEYS}
+        # Strip trailing whitespace to prevent API errors with Anthropic
+        message_text = result["messages"][-1].text.rstrip() if result["messages"][-1].text else ""
+        return Command(
+            update={
+                **state_update,
+                "messages": [ToolMessage(message_text, tool_call_id=tool_call_id)],
+            }
+        )
+
+    def _validate_and_prepare_state(
+        subagent_type: str, description: str, runtime: ToolRuntime
+    ) -> tuple[Runnable, dict]:
+        """Prepare state for invocation."""
+        subagent = subagent_graphs[subagent_type]
+        # Create a new state dict to avoid mutating the original
+        subagent_state = {k: v for k, v in runtime.state.items() if k not in _EXCLUDED_STATE_KEYS}
+        subagent_state["messages"] = [HumanMessage(content=description)]
+        return subagent, subagent_state
+
+    # Use custom description if provided, otherwise use default template
+    if task_description is None:
+        task_description = TASK_TOOL_DESCRIPTION.format(available_agents=subagent_description_str)
+    elif "{available_agents}" in task_description:
+        # If custom description has placeholder, format with agent descriptions
+        task_description = task_description.format(available_agents=subagent_description_str)
+
+    def task(
+        description: str,
+        subagent_type: str,
+        runtime: ToolRuntime,
+    ) -> str | Command:
+        if subagent_type not in subagent_graphs:
+            allowed_types = ", ".join([f"`{k}`" for k in subagent_graphs])
+            return f"{subagent_type} 서브 에이전트는 존재하지 않으므로 호출할 수 없습니다. 허용된 유형은 다음과 같습니다: {allowed_types}"
+        subagent, subagent_state = _validate_and_prepare_state(subagent_type, description, runtime)
+        result = subagent.invoke(subagent_state, runtime.config)
+        if not runtime.tool_call_id:
+            value_error_msg = "서브 에이전트 호출에는 도구 호출 ID가 필요합니다"
+            raise ValueError(value_error_msg)
+        return _return_command_with_state_update(result, runtime.tool_call_id)
+
+    async def atask(
+        description: str,
+        subagent_type: str,
+        runtime: ToolRuntime,
+    ) -> str | Command:
+        if subagent_type not in subagent_graphs:
+            allowed_types = ", ".join([f"`{k}`" for k in subagent_graphs])
+            return f"{subagent_type} 서브 에이전트는 존재하지 않으므로 호출할 수 없습니다. 허용된 유형은 다음과 같습니다: {allowed_types}"
+        subagent, subagent_state = _validate_and_prepare_state(subagent_type, description, runtime)
+        result = await subagent.ainvoke(subagent_state, runtime.config)
+        if not runtime.tool_call_id:
+            value_error_msg = "서브 에이전트 호출에는 도구 호출 ID가 필요합니다"
+            raise ValueError(value_error_msg)
+        return _return_command_with_state_update(result, runtime.tool_call_id)
+
+    return StructuredTool.from_function(
+        name="task",
+        func=task,
+        coroutine=atask,
+        description=task_description,
+    )
+
+
+class SubAgentMiddleware(AgentMiddleware):
+    """`task` 도구를 통해 에이전트에게 서브 에이전트를 제공하기 위한 미들웨어.
+
+    이 미들웨어는 서브 에이전트를 호출하는 데 사용할 수 있는 `task` 도구를 에이전트에 추가합니다.
+    서브 에이전트는 여러 단계가 필요한 복잡한 작업이나 해결하기 위해 많은 컨텍스트가 필요한 작업을 처리하는 데 유용합니다.
+
+    서브 에이전트의 주된 이점은 다중 단계 작업을 처리한 다음,
+    깨끗하고 간결한 응답을 메인 에이전트에게 반환할 수 있다는 것입니다.
+
+    서브 에이전트는 좁은 도구 집합과 집중이 필요한 다양한 전문 분야에도 적합합니다.
+
+    이 미들웨어에는 격리된 컨텍스트에서 메인 에이전트와 동일한 작업을 처리하는 데 사용할 수 있는
+    기본 범용 서브 에이전트가 함께 제공됩니다.
+
+    Args:
+        default_model: 서브 에이전트에 사용할 모델.
+            LanguageModelLike 또는 init_chat_model을 위한 dict일 수 있습니다.
+        default_tools: 기본 범용 서브 에이전트에 사용할 도구.
+        default_middleware: 모든 서브 에이전트에 적용할 기본 미들웨어. `None`(기본값)인 경우
+            기본 미들웨어가 적용되지 않습니다. 사용자 정의 미들웨어를 지정하려면 목록을 전달하십시오.
+        default_interrupt_on: 기본 범용 서브 에이전트에 사용할 도구 설정입니다.
+            이는 자체 도구 설정을 지정하지 않은 서브 에이전트에 대한 폴백이기도 합니다.
+        subagents: 에이전트에 제공할 추가 서브 에이전트 목록.
+        system_prompt: 전체 시스템 프롬프트 재정의. 제공된 경우 에이전트의
+            시스템 프롬프트를 완전히 대체합니다.
+        general_purpose_agent: 범용 에이전트 포함 여부. 기본값은 `True`입니다.
+        task_description: task 도구에 대한 사용자 정의 설명. `None`인 경우
+            기본 설명 템플릿을 사용합니다.
+
+    Example:
+        ```python
+        from langchain.agents.middleware.subagents import SubAgentMiddleware
+        from langchain.agents import create_agent
+
+        # Basic usage with defaults (no default middleware)
+        agent = create_agent(
+            "openai:gpt-4o",
+            middleware=[
+                SubAgentMiddleware(
+                    default_model="openai:gpt-4o",
+                    subagents=[],
+                )
+            ],
+        )
+
+        # Add custom middleware to subagents
+        agent = create_agent(
+            "openai:gpt-4o",
+            middleware=[
+                SubAgentMiddleware(
+                    default_model="openai:gpt-4o",
+                    default_middleware=[TodoListMiddleware()],
+                    subagents=[],
+                )
+            ],
+        )
+        ```
+    """
+
+    def __init__(
+        self,
+        *,
+        default_model: str | BaseChatModel,
+        default_tools: Sequence[BaseTool | Callable | dict[str, Any]] | None = None,
+        default_middleware: list[AgentMiddleware] | None = None,
+        default_interrupt_on: dict[str, bool | InterruptOnConfig] | None = None,
+        subagents: list[SubAgent | CompiledSubAgent] | None = None,
+        system_prompt: str | None = TASK_SYSTEM_PROMPT,
+        general_purpose_agent: bool = True,
+        task_description: str | None = None,
+    ) -> None:
+        """SubAgentMiddleware를 초기화합니다."""
+        super().__init__()
+        self.system_prompt = system_prompt
+        task_tool = _create_task_tool(
+            default_model=default_model,
+            default_tools=default_tools or [],
+            default_middleware=default_middleware,
+            default_interrupt_on=default_interrupt_on,
+            subagents=subagents or [],
+            general_purpose_agent=general_purpose_agent,
+            task_description=task_description,
+        )
+        self.tools = [task_tool]
+
+    def wrap_model_call(
+        self,
+        request: ModelRequest,
+        handler: Callable[[ModelRequest], ModelResponse],
+    ) -> ModelResponse:
+        """시스템 프롬프트를 업데이트하여 서브 에이전트 사용 지침을 포함합니다."""
+        if self.system_prompt is not None:
+            system_prompt = (
+                request.system_prompt + "\n\n" + self.system_prompt if request.system_prompt else self.system_prompt
+            )
+            return handler(request.override(system_prompt=system_prompt))
+        return handler(request)
+
+    async def awrap_model_call(
+        self,
+        request: ModelRequest,
+        handler: Callable[[ModelRequest], Awaitable[ModelResponse]],
+    ) -> ModelResponse:
+        """(async) 시스템 프롬프트를 업데이트하여 서브 에이전트 사용 지침을 포함합니다."""
+        if self.system_prompt is not None:
+            system_prompt = (
+                request.system_prompt + "\n\n" + self.system_prompt if request.system_prompt else self.system_prompt
+            )
+            return await handler(request.override(system_prompt=system_prompt))
+        return await handler(request)
--- a/deepagents_sourcecode/libs/deepagents/pyproject.toml
+++ b/deepagents_sourcecode/libs/deepagents/pyproject.toml
@@ -0,0 +1,100 @@
+[project]
+name = "deepagents"
+version = "0.3.1"
+description = "General purpose 'deep agent' with sub-agent spawning, todo list capabilities, and mock file system. Built on LangGraph."
+readme = "README.md"
+license = { text = "MIT" }
+requires-python = ">=3.11,<4.0"
+dependencies = [
+    "langchain-anthropic>=1.2.0,<2.0.0",
+    "langchain-google-genai",
+    "langchain>=1.1.0,<2.0.0",
+    "langchain-core>=1.1.0,<2.0.0",
+    "wcmatch",
+]
+
+
+[project.urls]
+Homepage = "https://docs.langchain.com/oss/python/deepagents/overview"
+Documentation = "https://reference.langchain.com/python/deepagents/"
+Source = "https://github.com/langchain-ai/deepagents"
+Twitter = "https://x.com/LangChainAI"
+Slack = "https://www.langchain.com/join-community"
+Reddit = "https://www.reddit.com/r/LangChain/"
+
+
+[dependency-groups]
+test = [
+  "pytest",
+  "pytest-cov",
+  "pytest-xdist",
+  "ruff>=0.12.2,<0.13.0",
+  "mypy>=1.18.1,<1.19.0",
+  "pytest-asyncio>=1.3.0",
+]
+
+dev = [
+  "langchain-openai",
+  "twine",
+  "build",
+]
+
+[build-system]
+requires = ["setuptools>=73.0.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools.package-data]
+"*" = ["py.typed", "*.md"]
+
+[tool.ruff]
+line-length = 150
+# Exclude any files that shouldn't be linted
+exclude = []
+
+[tool.ruff.format]
+docstring-code-format = true  # Formats code blocks in docstrings
+
+[tool.ruff.lint]
+select = [
+    "ALL"  # Enable all rules by default
+]
+ignore = [
+    "COM812",  # Messes with the formatter
+    "ISC001",  # Messes with the formatter
+    "PERF203", # Rarely useful
+    "SLF001",  # Private member access
+    "PLC0415", # Imports should be at the top. Not always desirable
+    "PLR0913", # Too many arguments in function definition
+    "PLC0414", # Inconsistent with how type checkers expect to be notified of intentional re-exports
+    "C901",    # Too complex
+]
+unfixable = ["B028"]  # Rules that shouldn't be auto-fixed
+
+[tool.ruff.lint.pyupgrade]
+keep-runtime-typing = true
+
+[tool.ruff.lint.flake8-annotations]
+allow-star-arg-any = true
+
+[tool.ruff.lint.pydocstyle]
+convention = "google"  # Google-style docstrings
+ignore-var-parameters = true
+
+[tool.ruff.lint.per-file-ignores]
+"tests/*" = [
+    "D1",      # Skip documentation rules in tests
+    "S101",    # Allow asserts in tests
+    "S311",    # Allow pseudo-random generators in tests
+    # Add more test-specific ignores
+]
+
+[tool.mypy]
+strict = true
+ignore_missing_imports = true
+enable_error_code = ["deprecated"]
+# Optional: reduce strictness if needed
+disallow_any_generics = false
+warn_return_any = false
+
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
--- a/deepagents_sourcecode/libs/deepagents/tests/init.py
+++ b/deepagents_sourcecode/libs/deepagents/tests/init.py
--- a/deepagents_sourcecode/libs/deepagents/tests/integration_tests/init.py
+++ b/deepagents_sourcecode/libs/deepagents/tests/integration_tests/init.py
@@ -0,0 +1 @@
+# This file makes the integration_tests directory a Python package for relative imports
--- a/deepagents_sourcecode/libs/deepagents/tests/integration_tests/test_deepagents.py
+++ b/deepagents_sourcecode/libs/deepagents/tests/integration_tests/test_deepagents.py
@@ -0,0 +1,165 @@
+from langchain.agents import create_agent
+from langchain.agents.structured_output import ToolStrategy
+from langchain_core.messages import HumanMessage
+from pydantic import BaseModel
+
+from deepagents.graph import create_deep_agent
+
+from ..utils import (
+    SAMPLE_MODEL,
+    TOY_BASKETBALL_RESEARCH,
+    ResearchMiddleware,
+    ResearchMiddlewareWithTools,
+    SampleMiddlewareWithTools,
+    SampleMiddlewareWithToolsAndState,
+    WeatherToolMiddleware,
+    assert_all_deepagent_qualities,
+    get_soccer_scores,
+    get_weather,
+    sample_tool,
+)
+
+
+class TestDeepAgents:
+    def test_base_deep_agent(self):
+        agent = create_deep_agent()
+        assert_all_deepagent_qualities(agent)
+
+    def test_deep_agent_with_tool(self):
+        agent = create_deep_agent(tools=[sample_tool])
+        assert_all_deepagent_qualities(agent)
+        assert "sample_tool" in agent.nodes["tools"].bound._tools_by_name.keys()
+
+    def test_deep_agent_with_middleware_with_tool(self):
+        agent = create_deep_agent(middleware=[SampleMiddlewareWithTools()])
+        assert_all_deepagent_qualities(agent)
+        assert "sample_tool" in agent.nodes["tools"].bound._tools_by_name.keys()
+
+    def test_deep_agent_with_middleware_with_tool_and_state(self):
+        agent = create_deep_agent(middleware=[SampleMiddlewareWithToolsAndState()])
+        assert_all_deepagent_qualities(agent)
+        assert "sample_tool" in agent.nodes["tools"].bound._tools_by_name.keys()
+        assert "sample_input" in agent.stream_channels
+
+    def test_deep_agent_with_subagents(self):
+        subagents = [
+            {
+                "name": "weather_agent",
+                "description": "Use this agent to get the weather",
+                "system_prompt": "You are a weather agent.",
+                "tools": [get_weather],
+                "model": SAMPLE_MODEL,
+            }
+        ]
+        agent = create_deep_agent(tools=[sample_tool], subagents=subagents)
+        assert_all_deepagent_qualities(agent)
+        result = agent.invoke({"messages": [HumanMessage(content="What is the weather in Tokyo?")]})
+        agent_messages = [msg for msg in result.get("messages", []) if msg.type == "ai"]
+        tool_calls = [tool_call for msg in agent_messages for tool_call in msg.tool_calls]
+        assert any([tool_call["name"] == "task" and tool_call["args"].get("subagent_type") == "weather_agent" for tool_call in tool_calls])
+
+    def test_deep_agent_with_subagents_gen_purpose(self):
+        subagents = [
+            {
+                "name": "weather_agent",
+                "description": "Use this agent to get the weather",
+                "system_prompt": "You are a weather agent.",
+                "tools": [get_weather],
+                "model": SAMPLE_MODEL,
+            }
+        ]
+        agent = create_deep_agent(tools=[sample_tool], subagents=subagents)
+        assert_all_deepagent_qualities(agent)
+        result = agent.invoke({"messages": [HumanMessage(content="Use the general purpose subagent to call the sample tool")]})
+        agent_messages = [msg for msg in result.get("messages", []) if msg.type == "ai"]
+        tool_calls = [tool_call for msg in agent_messages for tool_call in msg.tool_calls]
+        assert any([tool_call["name"] == "task" and tool_call["args"].get("subagent_type") == "general-purpose" for tool_call in tool_calls])
+
+    def test_deep_agent_with_subagents_with_middleware(self):
+        subagents = [
+            {
+                "name": "weather_agent",
+                "description": "Use this agent to get the weather",
+                "system_prompt": "You are a weather agent.",
+                "tools": [],
+                "model": SAMPLE_MODEL,
+                "middleware": [WeatherToolMiddleware()],
+            }
+        ]
+        agent = create_deep_agent(tools=[sample_tool], subagents=subagents)
+        assert_all_deepagent_qualities(agent)
+        result = agent.invoke({"messages": [HumanMessage(content="What is the weather in Tokyo?")]})
+        agent_messages = [msg for msg in result.get("messages", []) if msg.type == "ai"]
+        tool_calls = [tool_call for msg in agent_messages for tool_call in msg.tool_calls]
+        assert any([tool_call["name"] == "task" and tool_call["args"].get("subagent_type") == "weather_agent" for tool_call in tool_calls])
+
+    def test_deep_agent_with_custom_subagents(self):
+        subagents = [
+            {
+                "name": "weather_agent",
+                "description": "Use this agent to get the weather",
+                "system_prompt": "You are a weather agent.",
+                "tools": [get_weather],
+                "model": SAMPLE_MODEL,
+            },
+            {
+                "name": "soccer_agent",
+                "description": "Use this agent to get the latest soccer scores",
+                "runnable": create_agent(
+                    model=SAMPLE_MODEL,
+                    tools=[get_soccer_scores],
+                    system_prompt="You are a soccer agent.",
+                ),
+            },
+        ]
+        agent = create_deep_agent(tools=[sample_tool], subagents=subagents)
+        assert_all_deepagent_qualities(agent)
+        result = agent.invoke({"messages": [HumanMessage(content="Look up the weather in Tokyo, and the latest scores for Manchester City!")]})
+        agent_messages = [msg for msg in result.get("messages", []) if msg.type == "ai"]
+        tool_calls = [tool_call for msg in agent_messages for tool_call in msg.tool_calls]
+        assert any([tool_call["name"] == "task" and tool_call["args"].get("subagent_type") == "weather_agent" for tool_call in tool_calls])
+        assert any([tool_call["name"] == "task" and tool_call["args"].get("subagent_type") == "soccer_agent" for tool_call in tool_calls])
+
+    def test_deep_agent_with_extended_state_and_subagents(self):
+        subagents = [
+            {
+                "name": "basketball_info_agent",
+                "description": "Use this agent to get surface level info on any basketball topic",
+                "system_prompt": "You are a basketball info agent.",
+                "middleware": [ResearchMiddlewareWithTools()],
+            }
+        ]
+        agent = create_deep_agent(tools=[sample_tool], subagents=subagents, middleware=[ResearchMiddleware()])
+        assert_all_deepagent_qualities(agent)
+        assert "research" in agent.stream_channels
+        result = agent.invoke({"messages": [HumanMessage(content="Get surface level info on lebron james")]}, config={"recursion_limit": 100})
+        agent_messages = [msg for msg in result.get("messages", []) if msg.type == "ai"]
+        tool_calls = [tool_call for msg in agent_messages for tool_call in msg.tool_calls]
+        assert any([tool_call["name"] == "task" and tool_call["args"].get("subagent_type") == "basketball_info_agent" for tool_call in tool_calls])
+        assert TOY_BASKETBALL_RESEARCH in result["research"]
+
+    def test_deep_agent_with_subagents_no_tools(self):
+        subagents = [
+            {
+                "name": "basketball_info_agent",
+                "description": "Use this agent to get surface level info on any basketball topic",
+                "system_prompt": "You are a basketball info agent.",
+            }
+        ]
+        agent = create_deep_agent(tools=[sample_tool], subagents=subagents)
+        assert_all_deepagent_qualities(agent)
+        result = agent.invoke(
+            {"messages": [HumanMessage(content="Use the basketball info subagent to call the sample tool")]}, config={"recursion_limit": 100}
+        )
+        agent_messages = [msg for msg in result.get("messages", []) if msg.type == "ai"]
+        tool_calls = [tool_call for msg in agent_messages for tool_call in msg.tool_calls]
+        assert any([tool_call["name"] == "task" and tool_call["args"].get("subagent_type") == "basketball_info_agent" for tool_call in tool_calls])
+
+    def test_response_format_tool_strategy(self):
+        class StructuredOutput(BaseModel):
+            pokemon: list[str]
+
+        agent = create_deep_agent(response_format=ToolStrategy(schema=StructuredOutput))
+        response = agent.invoke({"messages": [{"role": "user", "content": "Who are all of the Kanto starters?"}]})
+        structured_output = response["structured_response"]
+        assert len(structured_output.pokemon) == 3
--- a/deepagents_sourcecode/libs/deepagents/tests/integration_tests/test_filesystem_middleware.py
+++ b/deepagents_sourcecode/libs/deepagents/tests/integration_tests/test_filesystem_middleware.py
--- a/deepagents_sourcecode/libs/deepagents/tests/integration_tests/test_hitl.py
+++ b/deepagents_sourcecode/libs/deepagents/tests/integration_tests/test_hitl.py
@@ -0,0 +1,154 @@
+import uuid
+
+from langgraph.checkpoint.memory import MemorySaver
+from langgraph.types import Command
+
+from deepagents.graph import create_deep_agent
+
+from ..utils import assert_all_deepagent_qualities, get_soccer_scores, get_weather, sample_tool
+
+SAMPLE_TOOL_CONFIG = {
+    "sample_tool": True,
+    "get_weather": False,
+    "get_soccer_scores": {"allowed_decisions": ["approve", "reject"]},
+}
+
+
+class TestHITL:
+    def test_hitl_agent(self):
+        checkpointer = MemorySaver()
+        agent = create_deep_agent(tools=[sample_tool, get_weather, get_soccer_scores], interrupt_on=SAMPLE_TOOL_CONFIG, checkpointer=checkpointer)
+        config = {"configurable": {"thread_id": uuid.uuid4()}}
+        assert_all_deepagent_qualities(agent)
+        result = agent.invoke(
+            {
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": "Call the sample tool, get the weather in New York and get scores for the latest soccer games in parallel",
+                    }
+                ]
+            },
+            config=config,
+        )
+        agent_messages = [msg for msg in result.get("messages", []) if msg.type == "ai"]
+        tool_calls = [tool_call for msg in agent_messages for tool_call in msg.tool_calls]
+        assert any([tool_call["name"] == "sample_tool" for tool_call in tool_calls])
+        assert any([tool_call["name"] == "get_weather" for tool_call in tool_calls])
+        assert any([tool_call["name"] == "get_soccer_scores" for tool_call in tool_calls])
+
+        assert result["__interrupt__"] is not None
+        interrupts = result["__interrupt__"][0].value
+        action_requests = interrupts["action_requests"]
+        assert len(interrupts) == 2
+        assert any([action_request["name"] == "sample_tool" for action_request in action_requests])
+        assert any([action_request["name"] == "get_soccer_scores" for action_request in action_requests])
+        review_configs = interrupts["review_configs"]
+        assert any(
+            [
+                review_config["action_name"] == "sample_tool" and review_config["allowed_decisions"] == ["approve", "edit", "reject"]
+                for review_config in review_configs
+            ]
+        )
+        assert any(
+            [
+                review_config["action_name"] == "get_soccer_scores" and review_config["allowed_decisions"] == ["approve", "reject"]
+                for review_config in review_configs
+            ]
+        )
+
+        result2 = agent.invoke(Command(resume={"decisions": [{"type": "approve"}, {"type": "approve"}]}), config=config)
+        tool_results = [msg for msg in result2.get("messages", []) if msg.type == "tool"]
+        assert any([tool_result.name == "sample_tool" for tool_result in tool_results])
+        assert any([tool_result.name == "get_weather" for tool_result in tool_results])
+        assert any([tool_result.name == "get_soccer_scores" for tool_result in tool_results])
+        assert "__interrupt__" not in result2
+
+    def test_subagent_with_hitl(self):
+        checkpointer = MemorySaver()
+        agent = create_deep_agent(tools=[sample_tool, get_weather, get_soccer_scores], interrupt_on=SAMPLE_TOOL_CONFIG, checkpointer=checkpointer)
+        config = {"configurable": {"thread_id": uuid.uuid4()}}
+        assert_all_deepagent_qualities(agent)
+        result = agent.invoke(
+            {
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": "Use the task tool to kick off the general-purpose subagent. Tell it to call the sample tool, get the weather in New York and get scores for the latest soccer games in parallel",
+                    }
+                ]
+            },
+            config=config,
+        )
+        assert result["__interrupt__"] is not None
+        interrupts = result["__interrupt__"][0].value
+        action_requests = interrupts["action_requests"]
+        assert len(interrupts) == 2
+        assert any([action_request["name"] == "sample_tool" for action_request in action_requests])
+        assert any([action_request["name"] == "get_soccer_scores" for action_request in action_requests])
+        review_configs = interrupts["review_configs"]
+        assert any(
+            [
+                review_config["action_name"] == "sample_tool" and review_config["allowed_decisions"] == ["approve", "edit", "reject"]
+                for review_config in review_configs
+            ]
+        )
+        assert any(
+            [
+                review_config["action_name"] == "get_soccer_scores" and review_config["allowed_decisions"] == ["approve", "reject"]
+                for review_config in review_configs
+            ]
+        )
+        result2 = agent.invoke(Command(resume={"decisions": [{"type": "approve"}, {"type": "approve"}]}), config=config)
+        assert "__interrupt__" not in result2
+
+    def test_subagent_with_custom_interrupt_on(self):
+        checkpointer = MemorySaver()
+        agent = create_deep_agent(
+            tools=[sample_tool, get_weather, get_soccer_scores],
+            interrupt_on=SAMPLE_TOOL_CONFIG,
+            checkpointer=checkpointer,
+            subagents=[
+                {
+                    "name": "task_handler",
+                    "description": "A subagent that can handle all sorts of tasks",
+                    "system_prompt": "You are a task handler. You can handle all sorts of tasks.",
+                    "tools": [sample_tool, get_weather, get_soccer_scores],
+                    "interrupt_on": {"sample_tool": False, "get_weather": True, "get_soccer_scores": True},
+                },
+            ],
+        )
+        config = {"configurable": {"thread_id": uuid.uuid4()}}
+        assert_all_deepagent_qualities(agent)
+        result = agent.invoke(
+            {
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": "Use the task tool to kick off the task_handler subagent. Tell it to call the sample tool, get the weather in New York and get scores for the latest soccer games in parallel",
+                    }
+                ]
+            },
+            config=config,
+        )
+        assert result["__interrupt__"] is not None
+        interrupts = result["__interrupt__"][0].value
+        action_requests = interrupts["action_requests"]
+        assert len(interrupts) == 2
+        assert any([action_request["name"] == "get_weather" for action_request in action_requests])
+        assert any([action_request["name"] == "get_soccer_scores" for action_request in action_requests])
+        review_configs = interrupts["review_configs"]
+        assert any(
+            [
+                review_config["action_name"] == "get_weather" and review_config["allowed_decisions"] == ["approve", "edit", "reject"]
+                for review_config in review_configs
+            ]
+        )
+        assert any(
+            [
+                review_config["action_name"] == "get_soccer_scores" and review_config["allowed_decisions"] == ["approve", "edit", "reject"]
+                for review_config in review_configs
+            ]
+        )
+        result2 = agent.invoke(Command(resume={"decisions": [{"type": "approve"}, {"type": "approve"}]}), config=config)
+        assert "__interrupt__" not in result2
--- a/deepagents_sourcecode/libs/deepagents/tests/integration_tests/test_subagent_middleware.py
+++ b/deepagents_sourcecode/libs/deepagents/tests/integration_tests/test_subagent_middleware.py
@@ -0,0 +1,283 @@
+import pytest
+from langchain.agents.middleware import AgentMiddleware
+from langchain_core.messages import HumanMessage
+from langchain_core.tools import tool
+
+from deepagents.graph import create_agent
+from deepagents.middleware.patch_tool_calls import PatchToolCallsMiddleware
+from deepagents.middleware.subagents import (
+    DEFAULT_GENERAL_PURPOSE_DESCRIPTION,
+    TASK_SYSTEM_PROMPT,
+    TASK_TOOL_DESCRIPTION,
+    SubAgentMiddleware,
+)
+
+
+@tool
+def get_weather(city: str) -> str:
+    """Get the weather in a city."""
+    return f"The weather in {city} is sunny."
+
+
+class WeatherMiddleware(AgentMiddleware):
+    tools = [get_weather]
+
+
+def assert_expected_subgraph_actions(expected_tool_calls, agent, inputs):
+    current_idx = 0
+    for update in agent.stream(
+        inputs,
+        subgraphs=True,
+        stream_mode="updates",
+    ):
+        if "model" in update[1]:
+            ai_message = update[1]["model"]["messages"][-1]
+            tool_calls = ai_message.tool_calls
+            for tool_call in tool_calls:
+                if tool_call["name"] == expected_tool_calls[current_idx]["name"]:
+                    if "model" in expected_tool_calls[current_idx]:
+                        assert ai_message.response_metadata["model_name"] == expected_tool_calls[current_idx]["model"]
+                    for arg in expected_tool_calls[current_idx]["args"]:
+                        assert arg in tool_call["args"]
+                        assert tool_call["args"][arg] == expected_tool_calls[current_idx]["args"][arg]
+                    current_idx += 1
+    assert current_idx == len(expected_tool_calls)
+
+
+@pytest.mark.requires("langchain_anthropic", "langchain_openai")
+class TestSubagentMiddleware:
+    """Integration tests for the SubagentMiddleware class."""
+
+    def test_general_purpose_subagent(self):
+        agent = create_agent(
+            model="claude-sonnet-4-20250514",
+            system_prompt="Use the general-purpose subagent to get the weather in a city.",
+            middleware=[
+                SubAgentMiddleware(
+                    default_model="claude-sonnet-4-20250514",
+                    default_tools=[get_weather],
+                )
+            ],
+        )
+        assert "task" in agent.nodes["tools"].bound._tools_by_name.keys()
+        response = agent.invoke({"messages": [HumanMessage(content="What is the weather in Tokyo?")]})
+        assert response["messages"][1].tool_calls[0]["name"] == "task"
+        assert response["messages"][1].tool_calls[0]["args"]["subagent_type"] == "general-purpose"
+
+    def test_defined_subagent(self):
+        agent = create_agent(
+            model="claude-sonnet-4-20250514",
+            system_prompt="Use the task tool to call a subagent.",
+            middleware=[
+                SubAgentMiddleware(
+                    default_model="claude-sonnet-4-20250514",
+                    default_tools=[],
+                    subagents=[
+                        {
+                            "name": "weather",
+                            "description": "This subagent can get weather in cities.",
+                            "system_prompt": "Use the get_weather tool to get the weather in a city.",
+                            "tools": [get_weather],
+                        }
+                    ],
+                )
+            ],
+        )
+        assert "task" in agent.nodes["tools"].bound._tools_by_name.keys()
+        response = agent.invoke({"messages": [HumanMessage(content="What is the weather in Tokyo?")]})
+        assert response["messages"][1].tool_calls[0]["name"] == "task"
+        assert response["messages"][1].tool_calls[0]["args"]["subagent_type"] == "weather"
+
+    def test_defined_subagent_tool_calls(self):
+        agent = create_agent(
+            model="claude-sonnet-4-20250514",
+            system_prompt="Use the task tool to call a subagent.",
+            middleware=[
+                SubAgentMiddleware(
+                    default_model="claude-sonnet-4-20250514",
+                    default_tools=[],
+                    subagents=[
+                        {
+                            "name": "weather",
+                            "description": "This subagent can get weather in cities.",
+                            "system_prompt": "Use the get_weather tool to get the weather in a city.",
+                            "tools": [get_weather],
+                        }
+                    ],
+                )
+            ],
+        )
+        expected_tool_calls = [
+            {"name": "task", "args": {"subagent_type": "weather"}},
+            {"name": "get_weather", "args": {}},
+        ]
+        assert_expected_subgraph_actions(
+            expected_tool_calls,
+            agent,
+            {"messages": [HumanMessage(content="What is the weather in Tokyo?")]},
+        )
+
+    def test_defined_subagent_custom_model(self):
+        agent = create_agent(
+            model="claude-sonnet-4-20250514",
+            system_prompt="Use the task tool to call a subagent.",
+            middleware=[
+                SubAgentMiddleware(
+                    default_model="claude-sonnet-4-20250514",
+                    default_tools=[],
+                    subagents=[
+                        {
+                            "name": "weather",
+                            "description": "This subagent can get weather in cities.",
+                            "system_prompt": "Use the get_weather tool to get the weather in a city.",
+                            "tools": [get_weather],
+                            "model": "gpt-4.1",
+                        }
+                    ],
+                )
+            ],
+        )
+        expected_tool_calls = [
+            {
+                "name": "task",
+                "args": {"subagent_type": "weather"},
+                "model": "claude-sonnet-4-20250514",
+            },
+            {"name": "get_weather", "args": {}, "model": "gpt-4.1-2025-04-14"},
+        ]
+        assert_expected_subgraph_actions(
+            expected_tool_calls,
+            agent,
+            {"messages": [HumanMessage(content="What is the weather in Tokyo?")]},
+        )
+
+    def test_defined_subagent_custom_middleware(self):
+        agent = create_agent(
+            model="claude-sonnet-4-20250514",
+            system_prompt="Use the task tool to call a subagent.",
+            middleware=[
+                SubAgentMiddleware(
+                    default_model="claude-sonnet-4-20250514",
+                    default_tools=[],
+                    subagents=[
+                        {
+                            "name": "weather",
+                            "description": "This subagent can get weather in cities.",
+                            "system_prompt": "Use the get_weather tool to get the weather in a city.",
+                            "tools": [],  # No tools, only in middleware
+                            "model": "gpt-4.1",
+                            "middleware": [WeatherMiddleware()],
+                        }
+                    ],
+                )
+            ],
+        )
+        expected_tool_calls = [
+            {
+                "name": "task",
+                "args": {"subagent_type": "weather"},
+                "model": "claude-sonnet-4-20250514",
+            },
+            {"name": "get_weather", "args": {}, "model": "gpt-4.1-2025-04-14"},
+        ]
+        assert_expected_subgraph_actions(
+            expected_tool_calls,
+            agent,
+            {"messages": [HumanMessage(content="What is the weather in Tokyo?")]},
+        )
+
+    def test_defined_subagent_custom_runnable(self):
+        custom_subagent = create_agent(
+            model="gpt-4.1-2025-04-14",
+            system_prompt="Use the get_weather tool to get the weather in a city.",
+            tools=[get_weather],
+        )
+        agent = create_agent(
+            model="claude-sonnet-4-20250514",
+            system_prompt="Use the task tool to call a subagent.",
+            middleware=[
+                SubAgentMiddleware(
+                    default_model="claude-sonnet-4-20250514",
+                    default_tools=[],
+                    subagents=[
+                        {
+                            "name": "weather",
+                            "description": "This subagent can get weather in cities.",
+                            "runnable": custom_subagent,
+                        }
+                    ],
+                )
+            ],
+        )
+        expected_tool_calls = [
+            {
+                "name": "task",
+                "args": {"subagent_type": "weather"},
+                "model": "claude-sonnet-4-20250514",
+            },
+            {"name": "get_weather", "args": {}, "model": "gpt-4.1-2025-04-14"},
+        ]
+        assert_expected_subgraph_actions(
+            expected_tool_calls,
+            agent,
+            {"messages": [HumanMessage(content="What is the weather in Tokyo?")]},
+        )
+
+    def test_multiple_subagents_with_interrupt_on_no_middleware_accumulation(self):
+        agent = create_agent(
+            model="claude-sonnet-4-20250514",
+            system_prompt="Use the task tool to call subagents.",
+            middleware=[
+                SubAgentMiddleware(
+                    default_model="claude-sonnet-4-20250514",
+                    default_tools=[],
+                    default_middleware=[PatchToolCallsMiddleware()],
+                    subagents=[
+                        {
+                            "name": "subagent1",
+                            "description": "First subagent.",
+                            "system_prompt": "You are subagent 1.",
+                            "tools": [get_weather],
+                            "interrupt_on": {"get_weather": True},
+                        },
+                        {
+                            "name": "subagent2",
+                            "description": "Second subagent.",
+                            "system_prompt": "You are subagent 2.",
+                            "tools": [get_weather],
+                            "interrupt_on": {"get_weather": True},
+                        },
+                    ],
+                )
+            ],
+        )
+        # This would error if the default middleware was accumulated
+        assert True
+
+    def test_subagent_middleware_init(self):
+        middleware = SubAgentMiddleware(
+            default_model="gpt-4o-mini",
+        )
+        assert middleware is not None
+        assert middleware.system_prompt is TASK_SYSTEM_PROMPT
+        assert len(middleware.tools) == 1
+        assert middleware.tools[0].name == "task"
+        expected_desc = TASK_TOOL_DESCRIPTION.format(available_agents=f"- general-purpose: {DEFAULT_GENERAL_PURPOSE_DESCRIPTION}")
+        assert middleware.tools[0].description == expected_desc
+
+    def test_default_subagent_with_tools(self):
+        middleware = SubAgentMiddleware(
+            default_model="gpt-4o-mini",
+            default_tools=[],
+        )
+        assert middleware is not None
+        assert middleware.system_prompt == TASK_SYSTEM_PROMPT
+
+    def test_default_subagent_custom_system_prompt(self):
+        middleware = SubAgentMiddleware(
+            default_model="gpt-4o-mini",
+            default_tools=[],
+            system_prompt="Use the task tool to call a subagent.",
+        )
+        assert middleware is not None
+        assert middleware.system_prompt == "Use the task tool to call a subagent."
--- a/deepagents_sourcecode/libs/deepagents/tests/unit_tests/init.py
+++ b/deepagents_sourcecode/libs/deepagents/tests/unit_tests/init.py
@@ -0,0 +1 @@
+# This file makes the tests directory a Python package for relative imports
--- a/deepagents_sourcecode/libs/deepagents/tests/unit_tests/backends/test_composite_backend.py
+++ b/deepagents_sourcecode/libs/deepagents/tests/unit_tests/backends/test_composite_backend.py
@@ -0,0 +1,671 @@
+from pathlib import Path
+
+import pytest
+from langchain.tools import ToolRuntime
+from langgraph.store.memory import InMemoryStore
+
+from deepagents.backends.composite import CompositeBackend
+from deepagents.backends.filesystem import FilesystemBackend
+from deepagents.backends.protocol import (
+    ExecuteResponse,
+    SandboxBackendProtocol,
+    WriteResult,
+)
+from deepagents.backends.state import StateBackend
+from deepagents.backends.store import StoreBackend
+
+
+def make_runtime(tid: str = "tc"):
+    return ToolRuntime(
+        state={"messages": [], "files": {}},
+        context=None,
+        tool_call_id=tid,
+        store=InMemoryStore(),
+        stream_writer=lambda _: None,
+        config={},
+    )
+
+
+def build_composite_state_backend(runtime: ToolRuntime, *, routes):
+    built_routes = {}
+    for prefix, backend_or_factory in routes.items():
+        if callable(backend_or_factory):
+            built_routes[prefix] = backend_or_factory(runtime)
+        else:
+            built_routes[prefix] = backend_or_factory
+    default_state = StateBackend(runtime)
+    return CompositeBackend(default=default_state, routes=built_routes)
+
+
+def test_composite_state_backend_routes_and_search(tmp_path: Path):
+    rt = make_runtime("t3")
+    # route /memories/ to store
+    be = build_composite_state_backend(rt, routes={"/memories/": (lambda r: StoreBackend(r))})
+
+    # write to default (state)
+    res = be.write("/file.txt", "alpha")
+    assert isinstance(res, WriteResult) and res.files_update is not None
+
+    # write to routed (store)
+    msg = be.write("/memories/readme.md", "beta")
+    assert isinstance(msg, WriteResult) and msg.error is None and msg.files_update is None
+
+    # ls_info at root returns both
+    infos = be.ls_info("/")
+    paths = {i["path"] for i in infos}
+    assert "/file.txt" in paths and "/memories/" in paths
+
+    # grep across both
+    matches = be.grep_raw("alpha", path="/")
+    assert any(m["path"] == "/file.txt" for m in matches)
+    matches2 = be.grep_raw("beta", path="/")
+    assert any(m["path"] == "/memories/readme.md" for m in matches2)
+
+    # glob across both
+    g = be.glob_info("**/*.md", path="/")
+    assert any(i["path"] == "/memories/readme.md" for i in g)
+
+
+def test_composite_backend_filesystem_plus_store(tmp_path: Path):
+    # default filesystem, route to store under /memories/
+    root = tmp_path
+    fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+    rt = make_runtime("t4")
+    store = StoreBackend(rt)
+    comp = CompositeBackend(default=fs, routes={"/memories/": store})
+
+    # put files in both
+    r1 = comp.write("/hello.txt", "hello")
+    assert isinstance(r1, WriteResult) and r1.error is None and r1.files_update is None
+    r2 = comp.write("/memories/notes.md", "note")
+    assert isinstance(r2, WriteResult) and r2.error is None and r2.files_update is None
+
+    # ls_info path routing
+    infos_root = comp.ls_info("/")
+    assert any(i["path"] == "/hello.txt" for i in infos_root)
+    infos_mem = comp.ls_info("/memories/")
+    assert any(i["path"] == "/memories/notes.md" for i in infos_mem)
+
+    # grep_raw merges
+    gm = comp.grep_raw("hello", path="/")
+    assert any(m["path"] == "/hello.txt" for m in gm)
+    gm2 = comp.grep_raw("note", path="/")
+    assert any(m["path"] == "/memories/notes.md" for m in gm2)
+
+    # glob_info
+    gl = comp.glob_info("*.md", path="/")
+    assert any(i["path"] == "/memories/notes.md" for i in gl)
+
+
+def test_composite_backend_store_to_store():
+    """Test composite with default store and routed store (two different stores)."""
+    rt = make_runtime("t5")
+
+    # Create two separate store backends (simulating different namespaces/stores)
+    default_store = StoreBackend(rt)
+    memories_store = StoreBackend(rt)
+
+    comp = CompositeBackend(default=default_store, routes={"/memories/": memories_store})
+
+    # Write to default store
+    res1 = comp.write("/notes.txt", "default store content")
+    assert isinstance(res1, WriteResult) and res1.error is None and res1.path == "/notes.txt"
+
+    # Write to routed store
+    res2 = comp.write("/memories/important.txt", "routed store content")
+    assert isinstance(res2, WriteResult) and res2.error is None and res2.path == "/important.txt"
+
+    # Read from both
+    content1 = comp.read("/notes.txt")
+    assert "default store content" in content1
+
+    content2 = comp.read("/memories/important.txt")
+    assert "routed store content" in content2
+
+    # ls_info at root should show both
+    infos = comp.ls_info("/")
+    paths = {i["path"] for i in infos}
+    assert "/notes.txt" in paths
+    assert "/memories/" in paths
+
+    # grep across both stores
+    matches = comp.grep_raw("default", path="/")
+    assert any(m["path"] == "/notes.txt" for m in matches)
+
+    matches2 = comp.grep_raw("routed", path="/")
+    assert any(m["path"] == "/memories/important.txt" for m in matches2)
+
+
+def test_composite_backend_multiple_routes():
+    """Test composite with state default and multiple store routes."""
+    rt = make_runtime("t6")
+
+    # State backend as default, multiple stores for different routes
+    comp = build_composite_state_backend(
+        rt,
+        routes={
+            "/memories/": (lambda r: StoreBackend(r)),
+            "/archive/": (lambda r: StoreBackend(r)),
+            "/cache/": (lambda r: StoreBackend(r)),
+        },
+    )
+
+    # Write to state (default)
+    res_state = comp.write("/temp.txt", "ephemeral data")
+    assert res_state.files_update is not None  # State backend returns files_update
+    assert res_state.path == "/temp.txt"
+
+    # Write to /memories/ route
+    res_mem = comp.write("/memories/important.md", "long-term memory")
+    assert res_mem.files_update is None  # Store backend doesn't return files_update
+    assert res_mem.path == "/important.md"
+
+    # Write to /archive/ route
+    res_arch = comp.write("/archive/old.log", "archived log")
+    assert res_arch.files_update is None
+    assert res_arch.path == "/old.log"
+
+    # Write to /cache/ route
+    res_cache = comp.write("/cache/session.json", "cached session")
+    assert res_cache.files_update is None
+    assert res_cache.path == "/session.json"
+
+    # ls_info at root should aggregate all
+    infos = comp.ls_info("/")
+    paths = {i["path"] for i in infos}
+    assert "/temp.txt" in paths
+    assert "/memories/" in paths
+    assert "/archive/" in paths
+    assert "/cache/" in paths
+
+    # ls_info at specific route
+    mem_infos = comp.ls_info("/memories/")
+    mem_paths = {i["path"] for i in mem_infos}
+    assert "/memories/important.md" in mem_paths
+    assert "/temp.txt" not in mem_paths
+    assert "/archive/old.log" not in mem_paths
+
+    # grep across all backends
+    all_matches = comp.grep_raw(".", path="/")  # Match any character
+    paths_with_content = {m["path"] for m in all_matches}
+    assert "/temp.txt" in paths_with_content
+    assert "/memories/important.md" in paths_with_content
+    assert "/archive/old.log" in paths_with_content
+    assert "/cache/session.json" in paths_with_content
+
+    # glob across all backends
+    glob_results = comp.glob_info("**/*.md", path="/")
+    assert any(i["path"] == "/memories/important.md" for i in glob_results)
+
+    # Edit in routed backend
+    edit_res = comp.edit("/memories/important.md", "long-term", "persistent", replace_all=False)
+    assert edit_res.error is None
+    assert edit_res.occurrences == 1
+
+    updated_content = comp.read("/memories/important.md")
+    assert "persistent memory" in updated_content
+
+
+def test_composite_backend_ls_nested_directories(tmp_path: Path):
+    rt = make_runtime("t7")
+    root = tmp_path
+
+    files = {
+        root / "local.txt": "local file",
+        root / "src" / "main.py": "code",
+        root / "src" / "utils" / "helper.py": "utils",
+    }
+
+    for path, content in files.items():
+        path.parent.mkdir(parents=True, exist_ok=True)
+        path.write_text(content)
+
+    fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+    store = StoreBackend(rt)
+
+    comp = CompositeBackend(default=fs, routes={"/memories/": store})
+
+    comp.write("/memories/note1.txt", "note 1")
+    comp.write("/memories/deep/note2.txt", "note 2")
+    comp.write("/memories/deep/nested/note3.txt", "note 3")
+
+    root_listing = comp.ls_info("/")
+    root_paths = [fi["path"] for fi in root_listing]
+    assert "/local.txt" in root_paths
+    assert "/src/" in root_paths
+    assert "/memories/" in root_paths
+    assert "/src/main.py" not in root_paths
+    assert "/memories/note1.txt" not in root_paths
+
+    src_listing = comp.ls_info("/src/")
+    src_paths = [fi["path"] for fi in src_listing]
+    assert "/src/main.py" in src_paths
+    assert "/src/utils/" in src_paths
+    assert "/src/utils/helper.py" not in src_paths
+
+    mem_listing = comp.ls_info("/memories/")
+    mem_paths = [fi["path"] for fi in mem_listing]
+    assert "/memories/note1.txt" in mem_paths
+    assert "/memories/deep/" in mem_paths
+    assert "/memories/deep/note2.txt" not in mem_paths
+
+    deep_listing = comp.ls_info("/memories/deep/")
+    deep_paths = [fi["path"] for fi in deep_listing]
+    assert "/memories/deep/note2.txt" in deep_paths
+    assert "/memories/deep/nested/" in deep_paths
+    assert "/memories/deep/nested/note3.txt" not in deep_paths
+
+
+def test_composite_backend_ls_multiple_routes_nested():
+    rt = make_runtime("t8")
+    comp = build_composite_state_backend(
+        rt,
+        routes={
+            "/memories/": (lambda r: StoreBackend(r)),
+            "/archive/": (lambda r: StoreBackend(r)),
+        },
+    )
+
+    state_files = {
+        "/temp.txt": "temp",
+        "/work/file1.txt": "work file 1",
+        "/work/projects/proj1.txt": "project 1",
+    }
+
+    for path, content in state_files.items():
+        res = comp.write(path, content)
+        if res.files_update:
+            rt.state["files"].update(res.files_update)
+
+    memory_files = {
+        "/memories/important.txt": "important",
+        "/memories/diary/entry1.txt": "diary entry",
+    }
+
+    for path, content in memory_files.items():
+        comp.write(path, content)
+
+    archive_files = {
+        "/archive/old.txt": "old",
+        "/archive/2023/log.txt": "2023 log",
+    }
+
+    for path, content in archive_files.items():
+        comp.write(path, content)
+
+    root_listing = comp.ls_info("/")
+    root_paths = [fi["path"] for fi in root_listing]
+    assert "/temp.txt" in root_paths
+    assert "/work/" in root_paths
+    assert "/memories/" in root_paths
+    assert "/archive/" in root_paths
+    assert "/work/file1.txt" not in root_paths
+    assert "/memories/important.txt" not in root_paths
+
+    work_listing = comp.ls_info("/work/")
+    work_paths = [fi["path"] for fi in work_listing]
+    assert "/work/file1.txt" in work_paths
+    assert "/work/projects/" in work_paths
+    assert "/work/projects/proj1.txt" not in work_paths
+
+    mem_listing = comp.ls_info("/memories/")
+    mem_paths = [fi["path"] for fi in mem_listing]
+    assert "/memories/important.txt" in mem_paths
+    assert "/memories/diary/" in mem_paths
+    assert "/memories/diary/entry1.txt" not in mem_paths
+
+    arch_listing = comp.ls_info("/archive/")
+    arch_paths = [fi["path"] for fi in arch_listing]
+    assert "/archive/old.txt" in arch_paths
+    assert "/archive/2023/" in arch_paths
+    assert "/archive/2023/log.txt" not in arch_paths
+
+
+def test_composite_backend_ls_trailing_slash(tmp_path: Path):
+    rt = make_runtime("t9")
+    root = tmp_path
+
+    (root / "file.txt").write_text("content")
+
+    fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+    store = StoreBackend(rt)
+
+    comp = CompositeBackend(default=fs, routes={"/store/": store})
+
+    comp.write("/store/item.txt", "store content")
+
+    listing = comp.ls_info("/")
+    paths = [fi["path"] for fi in listing]
+    assert paths == sorted(paths)
+
+    empty_listing = comp.ls_info("/store/nonexistent/")
+    assert empty_listing == []
+
+    empty_listing2 = comp.ls_info("/nonexistent/")
+    assert empty_listing2 == []
+
+    listing1 = comp.ls_info("/store/")
+    listing2 = comp.ls_info("/store")
+    assert [fi["path"] for fi in listing1] == [fi["path"] for fi in listing2]
+
+
+def test_composite_backend_intercept_large_tool_result():
+    from langchain_core.messages import ToolMessage
+    from langgraph.types import Command
+
+    from deepagents.middleware.filesystem import FilesystemMiddleware
+
+    rt = make_runtime("t10")
+
+    middleware = FilesystemMiddleware(
+        backend=lambda r: build_composite_state_backend(r, routes={"/memories/": (lambda x: StoreBackend(x))}), tool_token_limit_before_evict=1000
+    )
+    large_content = "z" * 5000
+    tool_message = ToolMessage(content=large_content, tool_call_id="test_789")
+    result = middleware._intercept_large_tool_result(tool_message, rt)
+
+    assert isinstance(result, Command)
+    assert "/large_tool_results/test_789" in result.update["files"]
+    assert result.update["files"]["/large_tool_results/test_789"]["content"] == [large_content]
+    assert "Tool result too large" in result.update["messages"][0].content
+
+
+def test_composite_backend_intercept_large_tool_result_routed_to_store():
+    """Test that large tool results can be routed to a specific backend like StoreBackend."""
+    from langchain_core.messages import ToolMessage
+
+    from deepagents.middleware.filesystem import FilesystemMiddleware
+
+    rt = make_runtime("t11")
+
+    middleware = FilesystemMiddleware(
+        backend=lambda r: build_composite_state_backend(r, routes={"/large_tool_results/": (lambda x: StoreBackend(x))}),
+        tool_token_limit_before_evict=1000,
+    )
+
+    large_content = "w" * 5000
+    tool_message = ToolMessage(content=large_content, tool_call_id="test_routed_123")
+    result = middleware._intercept_large_tool_result(tool_message, rt)
+
+    assert isinstance(result, ToolMessage)
+    assert "Tool result too large" in result.content
+    assert "/large_tool_results/test_routed_123" in result.content
+
+    stored_item = rt.store.get(("filesystem",), "/test_routed_123")
+    assert stored_item is not None
+    assert stored_item.value["content"] == [large_content]
+
+
+# Mock sandbox backend for testing execute functionality
+class MockSandboxBackend(SandboxBackendProtocol, StateBackend):
+    """Mock sandbox backend that implements SandboxBackendProtocol."""
+
+    def execute(self, command: str, *, timeout: int = 30 * 60) -> ExecuteResponse:
+        """Mock execute that returns the command as output."""
+        return ExecuteResponse(
+            output=f"Executed: {command}",
+            exit_code=0,
+            truncated=False,
+        )
+
+    @property
+    def id(self) -> str:
+        return "mock_sandbox_backend"
+
+
+def test_composite_backend_execute_with_sandbox_default():
+    """Test that CompositeBackend.execute() delegates to sandbox default backend."""
+    rt = make_runtime("t_exec1")
+    sandbox = MockSandboxBackend(rt)
+    store = StoreBackend(rt)
+
+    comp = CompositeBackend(default=sandbox, routes={"/memories/": store})
+
+    # Execute should work since default backend supports it
+    result = comp.execute("ls -la")
+    assert isinstance(result, ExecuteResponse)
+    assert result.output == "Executed: ls -la"
+    assert result.exit_code == 0
+    assert result.truncated is False
+
+
+def test_composite_backend_execute_without_sandbox_default():
+    """Test that CompositeBackend.execute() fails when default doesn't support execution."""
+    rt = make_runtime("t_exec2")
+    state_backend = StateBackend(rt)  # StateBackend doesn't implement SandboxBackendProtocol
+    store = StoreBackend(rt)
+
+    comp = CompositeBackend(default=state_backend, routes={"/memories/": store})
+
+    # Execute should raise NotImplementedError since default backend doesn't support it
+    with pytest.raises(NotImplementedError, match="doesn't support command execution"):
+        comp.execute("ls -la")
+
+
+def test_composite_backend_supports_execution_check():
+    """Test the isinstance check works correctly for CompositeBackend."""
+    rt = make_runtime("t_exec3")
+
+    # CompositeBackend with sandbox default should pass isinstance check
+    sandbox = MockSandboxBackend(rt)
+    comp_with_sandbox = CompositeBackend(default=sandbox, routes={})
+    # Note: CompositeBackend itself has execute() method, so isinstance will pass
+    # but the actual support depends on the default backend
+    assert hasattr(comp_with_sandbox, "execute")
+
+    # CompositeBackend with non-sandbox default should still have execute() method
+    # but will raise NotImplementedError when called
+    state = StateBackend(rt)
+    comp_without_sandbox = CompositeBackend(default=state, routes={})
+    assert hasattr(comp_without_sandbox, "execute")
+
+
+def test_composite_backend_execute_with_routed_backends():
+    """Test that execution doesn't interfere with file routing."""
+    rt = make_runtime("t_exec4")
+    sandbox = MockSandboxBackend(rt)
+    store = StoreBackend(rt)
+
+    comp = CompositeBackend(default=sandbox, routes={"/memories/": store})
+
+    # Write files to both backends
+    comp.write("/local.txt", "local content")
+    comp.write("/memories/persistent.txt", "persistent content")
+
+    # Execute should still work
+    result = comp.execute("echo test")
+    assert result.output == "Executed: echo test"
+
+    # File operations should still work
+    assert "local content" in comp.read("/local.txt")
+    assert "persistent content" in comp.read("/memories/persistent.txt")
+
+
+def test_composite_upload_routing(tmp_path: Path):
+    """Test upload_files routing to different backends."""
+    rt = make_runtime("t_upload1")
+    root = tmp_path
+
+    # Create composite with filesystem default and store route
+    fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+    store = StoreBackend(rt)
+    comp = CompositeBackend(default=fs, routes={"/memories/": store})
+
+    # Upload files to default path (filesystem)
+    default_files = [
+        ("/file1.bin", b"Default content 1"),
+        ("/file2.bin", b"Default content 2"),
+    ]
+    responses = comp.upload_files(default_files)
+    assert len(responses) == 2
+    assert all(r.error is None for r in responses)
+    assert (root / "file1.bin").exists()
+    assert (root / "file2.bin").read_bytes() == b"Default content 2"
+
+    # Upload files to routed path (store)
+    routed_files = [
+        ("/memories/note1.bin", b"Memory content 1"),
+        ("/memories/note2.bin", b"Memory content 2"),
+    ]
+    responses = comp.upload_files(routed_files)
+    assert len(responses) == 2
+    assert all(r.error is None for r in responses)
+
+    # Verify files are accessible in store
+    content1 = comp.read("/memories/note1.bin")
+    assert "Memory content 1" in content1
+
+
+def test_composite_download_routing(tmp_path: Path):
+    """Test download_files routing to different backends."""
+    rt = make_runtime("t_download1")
+    root = tmp_path
+
+    # Create composite with filesystem default and store route
+    fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+    store = StoreBackend(rt)
+    comp = CompositeBackend(default=fs, routes={"/memories/": store})
+
+    # Pre-populate filesystem backend
+    (root / "local.bin").write_bytes(b"Local binary data")
+
+    # Pre-populate store backend
+    comp.write("/memories/stored.txt", "Stored text data")
+
+    # Download from default path (filesystem)
+    responses = comp.download_files(["/local.bin"])
+    assert len(responses) == 1
+    assert responses[0].path == "/local.bin"
+    assert responses[0].content == b"Local binary data"
+    assert responses[0].error is None
+
+    # Download from routed path (store) - Note: store backend doesn't implement download yet
+    # So this test focuses on routing logic
+    paths_to_download = ["/local.bin"]
+    responses = comp.download_files(paths_to_download)
+    assert len(responses) == 1
+    assert responses[0].path == "/local.bin"
+
+
+def test_composite_upload_download_roundtrip(tmp_path: Path):
+    """Test upload and download roundtrip through composite backend."""
+    rt = make_runtime("t_roundtrip1")
+    root = tmp_path
+
+    fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+    comp = CompositeBackend(default=fs, routes={})
+
+    # Upload binary content
+    test_content = bytes(range(128))  # Binary data
+    upload_responses = comp.upload_files([("/test.bin", test_content)])
+    assert upload_responses[0].error is None
+
+    # Download it back
+    download_responses = comp.download_files(["/test.bin"])
+    assert download_responses[0].error is None
+    assert download_responses[0].content == test_content
+
+
+def test_composite_partial_success_upload(tmp_path: Path):
+    """Test partial success in batch upload with mixed valid/invalid paths."""
+    rt = make_runtime("t_partial_upload")
+    root = tmp_path
+
+    fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+    comp = CompositeBackend(default=fs, routes={})
+
+    files = [
+        ("/valid1.bin", b"Valid 1"),
+        ("/../invalid.bin", b"Invalid path"),  # Path traversal
+        ("/valid2.bin", b"Valid 2"),
+    ]
+
+    responses = comp.upload_files(files)
+
+    assert len(responses) == 3
+    # First should succeed
+    assert responses[0].error is None
+    assert (root / "valid1.bin").exists()
+
+    # Second should fail
+    assert responses[1].error == "invalid_path"
+
+    # Third should still succeed (partial success)
+    assert responses[2].error is None
+    assert (root / "valid2.bin").exists()
+
+
+def test_composite_partial_success_download(tmp_path: Path):
+    """Test partial success in batch download with mixed valid/invalid paths."""
+    rt = make_runtime("t_partial_download")
+    root = tmp_path
+
+    fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+    comp = CompositeBackend(default=fs, routes={})
+
+    # Create one valid file
+    (root / "exists.bin").write_bytes(b"I exist!")
+
+    paths = ["/exists.bin", "/doesnotexist.bin", "/../invalid"]
+    responses = comp.download_files(paths)
+
+    assert len(responses) == 3
+
+    # First should succeed
+    assert responses[0].error is None
+    assert responses[0].content == b"I exist!"
+
+    # Second should fail with file_not_found
+    assert responses[1].error == "file_not_found"
+    assert responses[1].content is None
+
+    # Third should fail with invalid_path
+    assert responses[2].error == "invalid_path"
+    assert responses[2].content is None
+
+
+def test_composite_upload_download_multiple_routes(tmp_path: Path):
+    """Test upload/download with multiple routed backends."""
+    rt = make_runtime("t_multi_route")
+    root = tmp_path
+
+    fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+    store1 = StoreBackend(rt)
+    store2 = StoreBackend(rt)
+
+    comp = CompositeBackend(default=fs, routes={"/memories/": store1, "/archive/": store2})
+
+    # Upload to different backends
+    files = [
+        ("/default.bin", b"Default backend"),
+        ("/memories/mem.bin", b"Memory backend"),
+        ("/archive/arch.bin", b"Archive backend"),
+    ]
+
+    responses = comp.upload_files(files)
+    assert len(responses) == 3
+    assert all(r.error is None for r in responses)
+
+    # Verify routing worked (filesystem file should exist)
+    assert (root / "default.bin").exists()
+    assert (root / "default.bin").read_bytes() == b"Default backend"
+
+
+def test_composite_download_preserves_original_paths(tmp_path: Path):
+    """Test that download responses preserve original composite paths."""
+    rt = make_runtime("t_path_preserve")
+    root = tmp_path
+
+    fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+    comp = CompositeBackend(default=fs, routes={})
+
+    # Create files
+    (root / "subdir").mkdir()
+    (root / "subdir" / "file.bin").write_bytes(b"Nested file")
+
+    # Download with composite path
+    responses = comp.download_files(["/subdir/file.bin"])
+
+    # Response should have the original composite path, not stripped
+    assert responses[0].path == "/subdir/file.bin"
+    assert responses[0].content == b"Nested file"
--- a/deepagents_sourcecode/libs/deepagents/tests/unit_tests/backends/test_composite_backend_async.py
+++ b/deepagents_sourcecode/libs/deepagents/tests/unit_tests/backends/test_composite_backend_async.py
@@ -0,0 +1,582 @@
+"""Async tests for CompositeBackend."""
+
+from pathlib import Path
+
+import pytest
+from langchain.tools import ToolRuntime
+from langgraph.store.memory import InMemoryStore
+
+from deepagents.backends.composite import CompositeBackend
+from deepagents.backends.filesystem import FilesystemBackend
+from deepagents.backends.protocol import (
+    ExecuteResponse,
+    SandboxBackendProtocol,
+    WriteResult,
+)
+from deepagents.backends.state import StateBackend
+from deepagents.backends.store import StoreBackend
+
+
+def make_runtime(tid: str = "tc"):
+    return ToolRuntime(
+        state={"messages": [], "files": {}},
+        context=None,
+        tool_call_id=tid,
+        store=InMemoryStore(),
+        stream_writer=lambda _: None,
+        config={},
+    )
+
+
+def build_composite_state_backend(runtime: ToolRuntime, *, routes):
+    built_routes = {}
+    for prefix, backend_or_factory in routes.items():
+        if callable(backend_or_factory):
+            built_routes[prefix] = backend_or_factory(runtime)
+        else:
+            built_routes[prefix] = backend_or_factory
+    default_state = StateBackend(runtime)
+    return CompositeBackend(default=default_state, routes=built_routes)
+
+
+# Mock sandbox backend for testing execute functionality
+class MockSandboxBackend(SandboxBackendProtocol, StateBackend):
+    """Mock sandbox backend that implements SandboxBackendProtocol."""
+
+    def execute(self, command: str, *, timeout: int = 30 * 60) -> ExecuteResponse:
+        """Mock execute that returns the command as output."""
+        return ExecuteResponse(
+            output=f"Executed: {command}",
+            exit_code=0,
+            truncated=False,
+        )
+
+    async def aexecute(self, command: str) -> ExecuteResponse:
+        """Async mock execute that returns the command as output."""
+        return ExecuteResponse(
+            output=f"Async Executed: {command}",
+            exit_code=0,
+            truncated=False,
+        )
+
+    @property
+    def id(self) -> str:
+        return "mock_sandbox_backend"
+
+
+async def test_composite_state_backend_routes_and_search_async(tmp_path: Path):
+    """Test async operations with composite backend routing."""
+    rt = make_runtime("t3")
+    be = build_composite_state_backend(rt, routes={"/memories/": (lambda r: StoreBackend(r))})
+
+    # write to default (state)
+    res = await be.awrite("/file.txt", "alpha")
+    assert isinstance(res, WriteResult) and res.files_update is not None
+
+    # write to routed (store)
+    msg = await be.awrite("/memories/readme.md", "beta")
+    assert isinstance(msg, WriteResult) and msg.error is None and msg.files_update is None
+
+    # als_info at root returns both
+    infos = await be.als_info("/")
+    paths = {i["path"] for i in infos}
+    assert "/file.txt" in paths and "/memories/" in paths
+
+    # agrep across both
+    matches = await be.agrep_raw("alpha", path="/")
+    assert any(m["path"] == "/file.txt" for m in matches)
+    matches2 = await be.agrep_raw("beta", path="/")
+    assert any(m["path"] == "/memories/readme.md" for m in matches2)
+
+    # aglob across both
+    g = await be.aglob_info("**/*.md", path="/")
+    assert any(i["path"] == "/memories/readme.md" for i in g)
+
+
+async def test_composite_backend_filesystem_plus_store_async(tmp_path: Path):
+    """Test async operations with filesystem and store backends."""
+    root = tmp_path
+    fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+    rt = make_runtime("t4")
+    store = StoreBackend(rt)
+    comp = CompositeBackend(default=fs, routes={"/memories/": store})
+
+    # put files in both
+    r1 = await comp.awrite("/hello.txt", "hello")
+    assert isinstance(r1, WriteResult) and r1.error is None and r1.files_update is None
+    r2 = await comp.awrite("/memories/notes.md", "note")
+    assert isinstance(r2, WriteResult) and r2.error is None and r2.files_update is None
+
+    # als_info path routing
+    infos_root = await comp.als_info("/")
+    assert any(i["path"] == "/hello.txt" for i in infos_root)
+    infos_mem = await comp.als_info("/memories/")
+    assert any(i["path"] == "/memories/notes.md" for i in infos_mem)
+
+    # agrep_raw merges
+    gm = await comp.agrep_raw("hello", path="/")
+    assert any(m["path"] == "/hello.txt" for m in gm)
+    gm2 = await comp.agrep_raw("note", path="/")
+    assert any(m["path"] == "/memories/notes.md" for m in gm2)
+
+    # aglob_info
+    gl = await comp.aglob_info("*.md", path="/")
+    assert any(i["path"] == "/memories/notes.md" for i in gl)
+
+
+async def test_composite_backend_store_to_store_async():
+    """Test async operations with default store and routed store."""
+    rt = make_runtime("t5")
+
+    # Create two separate store backends
+    default_store = StoreBackend(rt)
+    memories_store = StoreBackend(rt)
+
+    comp = CompositeBackend(default=default_store, routes={"/memories/": memories_store})
+
+    # Write to default store
+    res1 = await comp.awrite("/notes.txt", "default store content")
+    assert isinstance(res1, WriteResult) and res1.error is None and res1.path == "/notes.txt"
+
+    # Write to routed store
+    res2 = await comp.awrite("/memories/important.txt", "routed store content")
+    assert isinstance(res2, WriteResult) and res2.error is None and res2.path == "/important.txt"
+
+    # Read from both
+    content1 = await comp.aread("/notes.txt")
+    assert "default store content" in content1
+
+    content2 = await comp.aread("/memories/important.txt")
+    assert "routed store content" in content2
+
+    # als_info at root should show both
+    infos = await comp.als_info("/")
+    paths = {i["path"] for i in infos}
+    assert "/notes.txt" in paths
+    assert "/memories/" in paths
+
+    # agrep across both stores
+    matches = await comp.agrep_raw("default", path="/")
+    assert any(m["path"] == "/notes.txt" for m in matches)
+
+    matches2 = await comp.agrep_raw("routed", path="/")
+    assert any(m["path"] == "/memories/important.txt" for m in matches2)
+
+
+async def test_composite_backend_multiple_routes_async():
+    """Test async operations with state default and multiple store routes."""
+    rt = make_runtime("t6")
+
+    comp = build_composite_state_backend(
+        rt,
+        routes={
+            "/memories/": (lambda r: StoreBackend(r)),
+            "/archive/": (lambda r: StoreBackend(r)),
+            "/cache/": (lambda r: StoreBackend(r)),
+        },
+    )
+
+    # Write to state (default)
+    res_state = await comp.awrite("/temp.txt", "ephemeral data")
+    assert res_state.files_update is not None
+    assert res_state.path == "/temp.txt"
+
+    # Write to /memories/ route
+    res_mem = await comp.awrite("/memories/important.md", "long-term memory")
+    assert res_mem.files_update is None
+    assert res_mem.path == "/important.md"
+
+    # Write to /archive/ route
+    res_arch = await comp.awrite("/archive/old.log", "archived log")
+    assert res_arch.files_update is None
+    assert res_arch.path == "/old.log"
+
+    # Write to /cache/ route
+    res_cache = await comp.awrite("/cache/session.json", "cached session")
+    assert res_cache.files_update is None
+    assert res_cache.path == "/session.json"
+
+    # als_info at root should aggregate all
+    infos = await comp.als_info("/")
+    paths = {i["path"] for i in infos}
+    assert "/temp.txt" in paths
+    assert "/memories/" in paths
+    assert "/archive/" in paths
+    assert "/cache/" in paths
+
+    # als_info at specific route
+    mem_infos = await comp.als_info("/memories/")
+    mem_paths = {i["path"] for i in mem_infos}
+    assert "/memories/important.md" in mem_paths
+    assert "/temp.txt" not in mem_paths
+    assert "/archive/old.log" not in mem_paths
+
+    # agrep across all backends
+    all_matches = await comp.agrep_raw(".", path="/")  # Match any character
+    paths_with_content = {m["path"] for m in all_matches}
+    assert "/temp.txt" in paths_with_content
+    assert "/memories/important.md" in paths_with_content
+    assert "/archive/old.log" in paths_with_content
+    assert "/cache/session.json" in paths_with_content
+
+    # aglob across all backends
+    glob_results = await comp.aglob_info("**/*.md", path="/")
+    assert any(i["path"] == "/memories/important.md" for i in glob_results)
+
+    # Edit in routed backend
+    edit_res = await comp.aedit("/memories/important.md", "long-term", "persistent", replace_all=False)
+    assert edit_res.error is None
+    assert edit_res.occurrences == 1
+
+    updated_content = await comp.aread("/memories/important.md")
+    assert "persistent memory" in updated_content
+
+
+async def test_composite_backend_als_nested_directories_async(tmp_path: Path):
+    """Test async ls operations with nested directories."""
+    rt = make_runtime("t7")
+    root = tmp_path
+
+    files = {
+        root / "local.txt": "local file",
+        root / "src" / "main.py": "code",
+        root / "src" / "utils" / "helper.py": "utils",
+    }
+
+    for path, content in files.items():
+        path.parent.mkdir(parents=True, exist_ok=True)
+        path.write_text(content)
+
+    fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+    store = StoreBackend(rt)
+
+    comp = CompositeBackend(default=fs, routes={"/memories/": store})
+
+    await comp.awrite("/memories/note1.txt", "note 1")
+    await comp.awrite("/memories/deep/note2.txt", "note 2")
+    await comp.awrite("/memories/deep/nested/note3.txt", "note 3")
+
+    root_listing = await comp.als_info("/")
+    root_paths = [fi["path"] for fi in root_listing]
+    assert "/local.txt" in root_paths
+    assert "/src/" in root_paths
+    assert "/memories/" in root_paths
+    assert "/src/main.py" not in root_paths
+    assert "/memories/note1.txt" not in root_paths
+
+    src_listing = await comp.als_info("/src/")
+    src_paths = [fi["path"] for fi in src_listing]
+    assert "/src/main.py" in src_paths
+    assert "/src/utils/" in src_paths
+    assert "/src/utils/helper.py" not in src_paths
+
+    mem_listing = await comp.als_info("/memories/")
+    mem_paths = [fi["path"] for fi in mem_listing]
+    assert "/memories/note1.txt" in mem_paths
+    assert "/memories/deep/" in mem_paths
+    assert "/memories/deep/note2.txt" not in mem_paths
+
+    deep_listing = await comp.als_info("/memories/deep/")
+    deep_paths = [fi["path"] for fi in deep_listing]
+    assert "/memories/deep/note2.txt" in deep_paths
+    assert "/memories/deep/nested/" in deep_paths
+    assert "/memories/deep/nested/note3.txt" not in deep_paths
+
+
+async def test_composite_backend_als_multiple_routes_nested_async():
+    """Test async ls with multiple routes and nested directories."""
+    rt = make_runtime("t8")
+    comp = build_composite_state_backend(
+        rt,
+        routes={
+            "/memories/": (lambda r: StoreBackend(r)),
+            "/archive/": (lambda r: StoreBackend(r)),
+        },
+    )
+
+    state_files = {
+        "/temp.txt": "temp",
+        "/work/file1.txt": "work file 1",
+        "/work/projects/proj1.txt": "project 1",
+    }
+
+    for path, content in state_files.items():
+        res = await comp.awrite(path, content)
+        if res.files_update:
+            rt.state["files"].update(res.files_update)
+
+    memory_files = {
+        "/memories/important.txt": "important",
+        "/memories/diary/entry1.txt": "diary entry",
+    }
+
+    for path, content in memory_files.items():
+        await comp.awrite(path, content)
+
+    archive_files = {
+        "/archive/old.txt": "old",
+        "/archive/2023/log.txt": "2023 log",
+    }
+
+    for path, content in archive_files.items():
+        await comp.awrite(path, content)
+
+    root_listing = await comp.als_info("/")
+    root_paths = [fi["path"] for fi in root_listing]
+    assert "/temp.txt" in root_paths
+    assert "/work/" in root_paths
+    assert "/memories/" in root_paths
+    assert "/archive/" in root_paths
+    assert "/work/file1.txt" not in root_paths
+    assert "/memories/important.txt" not in root_paths
+
+    work_listing = await comp.als_info("/work/")
+    work_paths = [fi["path"] for fi in work_listing]
+    assert "/work/file1.txt" in work_paths
+    assert "/work/projects/" in work_paths
+    assert "/work/projects/proj1.txt" not in work_paths
+
+    mem_listing = await comp.als_info("/memories/")
+    mem_paths = [fi["path"] for fi in mem_listing]
+    assert "/memories/important.txt" in mem_paths
+    assert "/memories/diary/" in mem_paths
+    assert "/memories/diary/entry1.txt" not in mem_paths
+
+    arch_listing = await comp.als_info("/archive/")
+    arch_paths = [fi["path"] for fi in arch_listing]
+    assert "/archive/old.txt" in arch_paths
+    assert "/archive/2023/" in arch_paths
+    assert "/archive/2023/log.txt" not in arch_paths
+
+
+async def test_composite_backend_aexecute_with_sandbox_default_async():
+    """Test async execute with sandbox default backend."""
+    rt = make_runtime("t_exec1")
+    sandbox = MockSandboxBackend(rt)
+    store = StoreBackend(rt)
+
+    comp = CompositeBackend(default=sandbox, routes={"/memories/": store})
+
+    # Execute should work since default backend supports it
+    result = await comp.aexecute("ls -la")
+    assert isinstance(result, ExecuteResponse)
+    assert result.output == "Async Executed: ls -la"
+    assert result.exit_code == 0
+    assert result.truncated is False
+
+
+async def test_composite_backend_aexecute_without_sandbox_default_async():
+    """Test async execute fails when default doesn't support execution."""
+    rt = make_runtime("t_exec2")
+    state_backend = StateBackend(rt)
+    store = StoreBackend(rt)
+
+    comp = CompositeBackend(default=state_backend, routes={"/memories/": store})
+
+    # Execute should raise NotImplementedError
+    with pytest.raises(NotImplementedError, match="doesn't support command execution"):
+        await comp.aexecute("ls -la")
+
+
+async def test_composite_backend_aexecute_with_routed_backends_async():
+    """Test async execution doesn't interfere with file routing."""
+    rt = make_runtime("t_exec4")
+    sandbox = MockSandboxBackend(rt)
+    store = StoreBackend(rt)
+
+    comp = CompositeBackend(default=sandbox, routes={"/memories/": store})
+
+    # Write files to both backends
+    await comp.awrite("/local.txt", "local content")
+    await comp.awrite("/memories/persistent.txt", "persistent content")
+
+    # Execute should still work
+    result = await comp.aexecute("echo test")
+    assert result.output == "Async Executed: echo test"
+
+    # File operations should still work
+    assert "local content" in await comp.aread("/local.txt")
+    assert "persistent content" in await comp.aread("/memories/persistent.txt")
+
+
+async def test_composite_aupload_routing_async(tmp_path: Path):
+    """Test async upload_files routing to different backends."""
+    rt = make_runtime("t_upload1")
+    root = tmp_path
+
+    # Create composite with filesystem default and store route
+    fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+    store = StoreBackend(rt)
+    comp = CompositeBackend(default=fs, routes={"/memories/": store})
+
+    # Upload files to default path (filesystem)
+    default_files = [
+        ("/file1.bin", b"Default content 1"),
+        ("/file2.bin", b"Default content 2"),
+    ]
+    responses = await comp.aupload_files(default_files)
+    assert len(responses) == 2
+    assert all(r.error is None for r in responses)
+    assert (root / "file1.bin").exists()
+    assert (root / "file2.bin").read_bytes() == b"Default content 2"
+
+    # Upload files to routed path (store)
+    routed_files = [
+        ("/memories/note1.bin", b"Memory content 1"),
+        ("/memories/note2.bin", b"Memory content 2"),
+    ]
+    responses = await comp.aupload_files(routed_files)
+    assert len(responses) == 2
+    assert all(r.error is None for r in responses)
+
+    # Verify files are accessible in store
+    content1 = await comp.aread("/memories/note1.bin")
+    assert "Memory content 1" in content1
+
+
+async def test_composite_adownload_routing_async(tmp_path: Path):
+    """Test async download_files routing to different backends."""
+    rt = make_runtime("t_download1")
+    root = tmp_path
+
+    # Create composite with filesystem default and store route
+    fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+    store = StoreBackend(rt)
+    comp = CompositeBackend(default=fs, routes={"/memories/": store})
+
+    # Pre-populate filesystem backend
+    (root / "local.bin").write_bytes(b"Local binary data")
+
+    # Pre-populate store backend
+    await comp.awrite("/memories/stored.txt", "Stored text data")
+
+    # Download from default path (filesystem)
+    responses = await comp.adownload_files(["/local.bin"])
+    assert len(responses) == 1
+    assert responses[0].path == "/local.bin"
+    assert responses[0].content == b"Local binary data"
+    assert responses[0].error is None
+
+
+async def test_composite_aupload_download_roundtrip_async(tmp_path: Path):
+    """Test async upload and download roundtrip through composite backend."""
+    rt = make_runtime("t_roundtrip1")
+    root = tmp_path
+
+    fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+    comp = CompositeBackend(default=fs, routes={})
+
+    # Upload binary content
+    test_content = bytes(range(128))  # Binary data
+    upload_responses = await comp.aupload_files([("/test.bin", test_content)])
+    assert upload_responses[0].error is None
+
+    # Download it back
+    download_responses = await comp.adownload_files(["/test.bin"])
+    assert download_responses[0].error is None
+    assert download_responses[0].content == test_content
+
+
+async def test_composite_partial_success_aupload_async(tmp_path: Path):
+    """Test partial success in async batch upload with mixed valid/invalid paths."""
+    rt = make_runtime("t_partial_upload")
+    root = tmp_path
+
+    fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+    comp = CompositeBackend(default=fs, routes={})
+
+    files = [
+        ("/valid1.bin", b"Valid 1"),
+        ("/../invalid.bin", b"Invalid path"),  # Path traversal
+        ("/valid2.bin", b"Valid 2"),
+    ]
+
+    responses = await comp.aupload_files(files)
+
+    assert len(responses) == 3
+    # First should succeed
+    assert responses[0].error is None
+    assert (root / "valid1.bin").exists()
+
+    # Second should fail
+    assert responses[1].error == "invalid_path"
+
+    # Third should still succeed (partial success)
+    assert responses[2].error is None
+    assert (root / "valid2.bin").exists()
+
+
+async def test_composite_partial_success_adownload_async(tmp_path: Path):
+    """Test partial success in async batch download with mixed valid/invalid paths."""
+    rt = make_runtime("t_partial_download")
+    root = tmp_path
+
+    fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+    comp = CompositeBackend(default=fs, routes={})
+
+    # Create one valid file
+    (root / "exists.bin").write_bytes(b"I exist!")
+
+    paths = ["/exists.bin", "/doesnotexist.bin", "/../invalid"]
+    responses = await comp.adownload_files(paths)
+
+    assert len(responses) == 3
+
+    # First should succeed
+    assert responses[0].error is None
+    assert responses[0].content == b"I exist!"
+
+    # Second should fail with file_not_found
+    assert responses[1].error == "file_not_found"
+    assert responses[1].content is None
+
+    # Third should fail with invalid_path
+    assert responses[2].error == "invalid_path"
+    assert responses[2].content is None
+
+
+async def test_composite_aupload_download_multiple_routes_async(tmp_path: Path):
+    """Test async upload/download with multiple routed backends."""
+    rt = make_runtime("t_multi_route")
+    root = tmp_path
+
+    fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+    store1 = StoreBackend(rt)
+    store2 = StoreBackend(rt)
+
+    comp = CompositeBackend(default=fs, routes={"/memories/": store1, "/archive/": store2})
+
+    # Upload to different backends
+    files = [
+        ("/default.bin", b"Default backend"),
+        ("/memories/mem.bin", b"Memory backend"),
+        ("/archive/arch.bin", b"Archive backend"),
+    ]
+
+    responses = await comp.aupload_files(files)
+    assert len(responses) == 3
+    assert all(r.error is None for r in responses)
+
+    # Verify routing worked (filesystem file should exist)
+    assert (root / "default.bin").exists()
+    assert (root / "default.bin").read_bytes() == b"Default backend"
+
+
+async def test_composite_adownload_preserves_original_paths_async(tmp_path: Path):
+    """Test async download responses preserve original composite paths."""
+    rt = make_runtime("t_path_preserve")
+    root = tmp_path
+
+    fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+    comp = CompositeBackend(default=fs, routes={})
+
+    # Create files
+    (root / "subdir").mkdir()
+    (root / "subdir" / "file.bin").write_bytes(b"Nested file")
+
+    # Download with composite path
+    responses = await comp.adownload_files(["/subdir/file.bin"])
+
+    # Response should have the original composite path, not stripped
+    assert responses[0].path == "/subdir/file.bin"
+    assert responses[0].content == b"Nested file"
--- a/deepagents_sourcecode/libs/deepagents/tests/unit_tests/backends/test_filesystem_backend.py
+++ b/deepagents_sourcecode/libs/deepagents/tests/unit_tests/backends/test_filesystem_backend.py
@@ -0,0 +1,491 @@
+from pathlib import Path
+
+from deepagents.backends.filesystem import FilesystemBackend
+from deepagents.backends.protocol import EditResult, WriteResult
+
+
+def write_file(p: Path, content: str):
+    p.parent.mkdir(parents=True, exist_ok=True)
+    p.write_text(content)
+
+
+def test_filesystem_backend_normal_mode(tmp_path: Path):
+    root = tmp_path
+    f1 = root / "a.txt"
+    f2 = root / "dir" / "b.py"
+    write_file(f1, "hello fs")
+    write_file(f2, "print('x')\nhello")
+
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=False)
+
+    # ls_info absolute path - should only list files in root, not subdirectories
+    infos = be.ls_info(str(root))
+    paths = {i["path"] for i in infos}
+    assert str(f1) in paths  # File in root should be listed
+    assert str(f2) not in paths  # File in subdirectory should NOT be listed
+    assert (str(root) + "/dir/") in paths  # Directory should be listed
+
+    # read, edit, write
+    txt = be.read(str(f1))
+    assert "hello fs" in txt
+    msg = be.edit(str(f1), "fs", "filesystem", replace_all=False)
+    assert isinstance(msg, EditResult) and msg.error is None and msg.occurrences == 1
+    msg2 = be.write(str(root / "new.txt"), "new content")
+    assert isinstance(msg2, WriteResult) and msg2.error is None and msg2.path.endswith("new.txt")
+
+    # grep_raw
+    matches = be.grep_raw("hello", path=str(root))
+    assert isinstance(matches, list) and any(m["path"].endswith("a.txt") for m in matches)
+
+    # glob_info
+    g = be.glob_info("*.py", path=str(root))
+    assert any(i["path"] == str(f2) for i in g)
+
+
+def test_filesystem_backend_virtual_mode(tmp_path: Path):
+    root = tmp_path
+    f1 = root / "a.txt"
+    f2 = root / "dir" / "b.md"
+    write_file(f1, "hello virtual")
+    write_file(f2, "content")
+
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+
+    # ls_info from virtual root - should only list files in root, not subdirectories
+    infos = be.ls_info("/")
+    paths = {i["path"] for i in infos}
+    assert "/a.txt" in paths  # File in root should be listed
+    assert "/dir/b.md" not in paths  # File in subdirectory should NOT be listed
+    assert "/dir/" in paths  # Directory should be listed
+
+    # read and edit via virtual path
+    txt = be.read("/a.txt")
+    assert "hello virtual" in txt
+    msg = be.edit("/a.txt", "virtual", "virt", replace_all=False)
+    assert isinstance(msg, EditResult) and msg.error is None and msg.occurrences == 1
+
+    # write new file via virtual path
+    msg2 = be.write("/new.txt", "x")
+    assert isinstance(msg2, WriteResult) and msg2.error is None
+    assert (root / "new.txt").exists()
+
+    # grep_raw limited to path
+    matches = be.grep_raw("virt", path="/")
+    assert isinstance(matches, list) and any(m["path"] == "/a.txt" for m in matches)
+
+    # glob_info
+    g = be.glob_info("**/*.md", path="/")
+    assert any(i["path"] == "/dir/b.md" for i in g)
+
+    # invalid regex returns error string
+    err = be.grep_raw("[", path="/")
+    assert isinstance(err, str)
+
+    # path traversal blocked
+    try:
+        be.read("/../a.txt")
+        assert False, "expected ValueError for traversal"
+    except ValueError:
+        pass
+
+
+def test_filesystem_backend_ls_nested_directories(tmp_path: Path):
+    root = tmp_path
+
+    files = {
+        root / "config.json": "config",
+        root / "src" / "main.py": "code",
+        root / "src" / "utils" / "helper.py": "utils code",
+        root / "src" / "utils" / "common.py": "common utils",
+        root / "docs" / "readme.md": "documentation",
+        root / "docs" / "api" / "reference.md": "api docs",
+    }
+
+    for path, content in files.items():
+        write_file(path, content)
+
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+
+    root_listing = be.ls_info("/")
+    root_paths = [fi["path"] for fi in root_listing]
+    assert "/config.json" in root_paths
+    assert "/src/" in root_paths
+    assert "/docs/" in root_paths
+    assert "/src/main.py" not in root_paths
+    assert "/src/utils/helper.py" not in root_paths
+
+    src_listing = be.ls_info("/src/")
+    src_paths = [fi["path"] for fi in src_listing]
+    assert "/src/main.py" in src_paths
+    assert "/src/utils/" in src_paths
+    assert "/src/utils/helper.py" not in src_paths
+
+    utils_listing = be.ls_info("/src/utils/")
+    utils_paths = [fi["path"] for fi in utils_listing]
+    assert "/src/utils/helper.py" in utils_paths
+    assert "/src/utils/common.py" in utils_paths
+    assert len(utils_paths) == 2
+
+    empty_listing = be.ls_info("/nonexistent/")
+    assert empty_listing == []
+
+
+def test_filesystem_backend_ls_normal_mode_nested(tmp_path: Path):
+    """Test ls_info with nested directories in normal (non-virtual) mode."""
+    root = tmp_path
+
+    files = {
+        root / "file1.txt": "content1",
+        root / "subdir" / "file2.txt": "content2",
+        root / "subdir" / "nested" / "file3.txt": "content3",
+    }
+
+    for path, content in files.items():
+        write_file(path, content)
+
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=False)
+
+    root_listing = be.ls_info(str(root))
+    root_paths = [fi["path"] for fi in root_listing]
+
+    assert str(root / "file1.txt") in root_paths
+    assert str(root / "subdir") + "/" in root_paths
+    assert str(root / "subdir" / "file2.txt") not in root_paths
+
+    subdir_listing = be.ls_info(str(root / "subdir"))
+    subdir_paths = [fi["path"] for fi in subdir_listing]
+    assert str(root / "subdir" / "file2.txt") in subdir_paths
+    assert str(root / "subdir" / "nested") + "/" in subdir_paths
+    assert str(root / "subdir" / "nested" / "file3.txt") not in subdir_paths
+
+
+def test_filesystem_backend_ls_trailing_slash(tmp_path: Path):
+    """Test ls_info edge cases for filesystem backend."""
+    root = tmp_path
+
+    files = {
+        root / "file.txt": "content",
+        root / "dir" / "nested.txt": "nested",
+    }
+
+    for path, content in files.items():
+        write_file(path, content)
+
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+
+    listing_with_slash = be.ls_info("/")
+    assert len(listing_with_slash) > 0
+
+    listing = be.ls_info("/")
+    paths = [fi["path"] for fi in listing]
+    assert paths == sorted(paths)
+
+    listing1 = be.ls_info("/dir/")
+    listing2 = be.ls_info("/dir")
+    assert len(listing1) == len(listing2)
+    assert [fi["path"] for fi in listing1] == [fi["path"] for fi in listing2]
+
+    empty = be.ls_info("/nonexistent/")
+    assert empty == []
+
+
+def test_filesystem_backend_intercept_large_tool_result(tmp_path: Path):
+    """Test that FilesystemBackend properly handles large tool result interception."""
+    from langchain.tools import ToolRuntime
+    from langchain_core.messages import ToolMessage
+
+    from deepagents.middleware.filesystem import FilesystemMiddleware
+
+    root = tmp_path
+    rt = ToolRuntime(
+        state={"messages": [], "files": {}},
+        context=None,
+        tool_call_id="test_fs",
+        store=None,
+        stream_writer=lambda _: None,
+        config={},
+    )
+
+    middleware = FilesystemMiddleware(backend=lambda r: FilesystemBackend(root_dir=str(root), virtual_mode=True), tool_token_limit_before_evict=1000)
+
+    large_content = "f" * 5000
+    tool_message = ToolMessage(content=large_content, tool_call_id="test_fs_123")
+    result = middleware._intercept_large_tool_result(tool_message, rt)
+
+    assert isinstance(result, ToolMessage)
+    assert "Tool result too large" in result.content
+    assert "/large_tool_results/test_fs_123" in result.content
+    saved_file = root / "large_tool_results" / "test_fs_123"
+    assert saved_file.exists()
+    assert saved_file.read_text() == large_content
+
+
+def test_filesystem_upload_single_file(tmp_path: Path):
+    """Test uploading a single binary file."""
+    root = tmp_path
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+
+    test_path = "/test_upload.bin"
+    test_content = b"Hello, Binary World!"
+
+    responses = be.upload_files([(test_path, test_content)])
+
+    assert len(responses) == 1
+    assert responses[0].path == test_path
+    assert responses[0].error is None
+
+    # Verify file exists and content matches
+    uploaded_file = root / "test_upload.bin"
+    assert uploaded_file.exists()
+    assert uploaded_file.read_bytes() == test_content
+
+
+def test_filesystem_upload_multiple_files(tmp_path: Path):
+    """Test uploading multiple files in one call."""
+    root = tmp_path
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+
+    files = [
+        ("/file1.bin", b"Content 1"),
+        ("/file2.bin", b"Content 2"),
+        ("/subdir/file3.bin", b"Content 3"),
+    ]
+
+    responses = be.upload_files(files)
+
+    assert len(responses) == 3
+    for i, (path, content) in enumerate(files):
+        assert responses[i].path == path
+        assert responses[i].error is None
+
+    # Verify all files created
+    assert (root / "file1.bin").read_bytes() == b"Content 1"
+    assert (root / "file2.bin").read_bytes() == b"Content 2"
+    assert (root / "subdir" / "file3.bin").read_bytes() == b"Content 3"
+
+
+def test_filesystem_download_single_file(tmp_path: Path):
+    """Test downloading a single file."""
+    root = tmp_path
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+
+    # Create a file manually
+    test_file = root / "test_download.bin"
+    test_content = b"Download me!"
+    test_file.write_bytes(test_content)
+
+    responses = be.download_files(["/test_download.bin"])
+
+    assert len(responses) == 1
+    assert responses[0].path == "/test_download.bin"
+    assert responses[0].content == test_content
+    assert responses[0].error is None
+
+
+def test_filesystem_download_multiple_files(tmp_path: Path):
+    """Test downloading multiple files in one call."""
+    root = tmp_path
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+
+    # Create several files
+    files = {
+        root / "file1.txt": b"File 1",
+        root / "file2.txt": b"File 2",
+        root / "subdir" / "file3.txt": b"File 3",
+    }
+
+    for path, content in files.items():
+        path.parent.mkdir(parents=True, exist_ok=True)
+        path.write_bytes(content)
+
+    paths = ["/file1.txt", "/file2.txt", "/subdir/file3.txt"]
+    responses = be.download_files(paths)
+
+    assert len(responses) == 3
+    assert responses[0].path == "/file1.txt"
+    assert responses[0].content == b"File 1"
+    assert responses[0].error is None
+
+    assert responses[1].path == "/file2.txt"
+    assert responses[1].content == b"File 2"
+    assert responses[1].error is None
+
+    assert responses[2].path == "/subdir/file3.txt"
+    assert responses[2].content == b"File 3"
+    assert responses[2].error is None
+
+
+def test_filesystem_upload_download_roundtrip(tmp_path: Path):
+    """Test upload followed by download for data integrity."""
+    root = tmp_path
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+
+    # Test with binary content including special bytes
+    test_path = "/roundtrip.bin"
+    test_content = bytes(range(256))  # All possible byte values
+
+    # Upload
+    upload_responses = be.upload_files([(test_path, test_content)])
+    assert upload_responses[0].error is None
+
+    # Download
+    download_responses = be.download_files([test_path])
+    assert download_responses[0].error is None
+    assert download_responses[0].content == test_content
+
+
+def test_filesystem_download_errors(tmp_path: Path):
+    """Test download error handling."""
+    root = tmp_path
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+
+    # Test file_not_found
+    responses = be.download_files(["/nonexistent.txt"])
+    assert len(responses) == 1
+    assert responses[0].path == "/nonexistent.txt"
+    assert responses[0].content is None
+    assert responses[0].error == "file_not_found"
+
+    # Test is_directory
+    (root / "testdir").mkdir()
+    responses = be.download_files(["/testdir"])
+    assert responses[0].error == "is_directory"
+    assert responses[0].content is None
+
+    # Test invalid_path (path traversal)
+    responses = be.download_files(["/../etc/passwd"])
+    assert len(responses) == 1
+    assert responses[0].error == "invalid_path"
+    assert responses[0].content is None
+
+
+def test_filesystem_upload_errors(tmp_path: Path):
+    """Test upload error handling."""
+    root = tmp_path
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+
+    # Test invalid_path (path traversal)
+    responses = be.upload_files([("/../bad/path.txt", b"content")])
+    assert len(responses) == 1
+    assert responses[0].error == "invalid_path"
+
+
+def test_filesystem_partial_success_upload(tmp_path: Path):
+    """Test partial success in batch upload."""
+    root = tmp_path
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+
+    files = [
+        ("/valid1.txt", b"Valid content 1"),
+        ("/../invalid.txt", b"Invalid path"),  # Path traversal
+        ("/valid2.txt", b"Valid content 2"),
+    ]
+
+    responses = be.upload_files(files)
+
+    assert len(responses) == 3
+    # First file should succeed
+    assert responses[0].error is None
+    assert (root / "valid1.txt").exists()
+
+    # Second file should fail
+    assert responses[1].error == "invalid_path"
+
+    # Third file should still succeed (partial success)
+    assert responses[2].error is None
+    assert (root / "valid2.txt").exists()
+
+
+def test_filesystem_partial_success_download(tmp_path: Path):
+    """Test partial success in batch download."""
+    root = tmp_path
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+
+    # Create one valid file
+    valid_file = root / "exists.txt"
+    valid_content = b"I exist!"
+    valid_file.write_bytes(valid_content)
+
+    paths = ["/exists.txt", "/doesnotexist.txt", "/../invalid"]
+    responses = be.download_files(paths)
+
+    assert len(responses) == 3
+
+    # First should succeed
+    assert responses[0].error is None
+    assert responses[0].content == valid_content
+
+    # Second should fail with file_not_found
+    assert responses[1].error == "file_not_found"
+    assert responses[1].content is None
+
+    # Third should fail with invalid_path
+    assert responses[2].error == "invalid_path"
+    assert responses[2].content is None
+
+
+def test_filesystem_upload_to_existing_directory_path(tmp_path: Path):
+    """Test uploading to a path where the target is an existing directory.
+
+    This simulates trying to overwrite a directory with a file, which should
+    produce an error. For example, if /mydir/ exists as a directory, trying
+    to upload a file to /mydir should fail.
+    """
+    root = tmp_path
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+
+    # Create a directory
+    (root / "existing_dir").mkdir()
+
+    # Try to upload a file with the same name as the directory
+    # Note: on Unix systems, this will likely succeed but create a different inode
+    # The behavior depends on the OS and filesystem. Let's just verify we get a response.
+    responses = be.upload_files([("/existing_dir", b"file content")])
+
+    assert len(responses) == 1
+    assert responses[0].path == "/existing_dir"
+    # Depending on OS behavior, this might succeed or fail
+    # We're just documenting the behavior exists
+
+
+def test_filesystem_upload_parent_is_file(tmp_path: Path):
+    """Test uploading to a path where a parent component is a file, not a directory.
+
+    For example, if /somefile.txt exists as a file, trying to upload to
+    /somefile.txt/child.txt should fail because somefile.txt is not a directory.
+    """
+    root = tmp_path
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+
+    # Create a file
+    parent_file = root / "parent.txt"
+    parent_file.write_text("I am a file, not a directory")
+
+    # Try to upload a file as if parent.txt were a directory
+    responses = be.upload_files([("/parent.txt/child.txt", b"child content")])
+
+    assert len(responses) == 1
+    assert responses[0].path == "/parent.txt/child.txt"
+    # This should produce some kind of error since parent.txt is a file
+    assert responses[0].error is not None
+
+
+def test_filesystem_download_directory_as_file(tmp_path: Path):
+    """Test that downloading a directory returns is_directory error.
+
+    This is already tested in test_filesystem_download_errors but we add
+    an explicit test case to make it clear this is a supported error scenario.
+    """
+    root = tmp_path
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+
+    # Create a directory
+    (root / "mydir").mkdir()
+
+    # Try to download the directory as if it were a file
+    responses = be.download_files(["/mydir"])
+
+    assert len(responses) == 1
+    assert responses[0].path == "/mydir"
+    assert responses[0].content is None
+    assert responses[0].error == "is_directory"
--- a/deepagents_sourcecode/libs/deepagents/tests/unit_tests/backends/test_filesystem_backend_async.py
+++ b/deepagents_sourcecode/libs/deepagents/tests/unit_tests/backends/test_filesystem_backend_async.py
@@ -0,0 +1,520 @@
+"""Async tests for FilesystemBackend."""
+
+from pathlib import Path
+
+import pytest
+
+from deepagents.backends.filesystem import FilesystemBackend
+from deepagents.backends.protocol import EditResult, WriteResult
+
+
+def write_file(p: Path, content: str):
+    p.parent.mkdir(parents=True, exist_ok=True)
+    p.write_text(content)
+
+
+async def test_filesystem_backend_async_normal_mode(tmp_path: Path):
+    """Test async operations in normal (non-virtual) mode."""
+    root = tmp_path
+    f1 = root / "a.txt"
+    f2 = root / "dir" / "b.py"
+    write_file(f1, "hello fs")
+    write_file(f2, "print('x')\nhello")
+
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=False)
+
+    # als_info absolute path - should only list files in root, not subdirectories
+    infos = await be.als_info(str(root))
+    paths = {i["path"] for i in infos}
+    assert str(f1) in paths  # File in root should be listed
+    assert str(f2) not in paths  # File in subdirectory should NOT be listed
+    assert (str(root) + "/dir/") in paths  # Directory should be listed
+
+    # aread, aedit, awrite
+    txt = await be.aread(str(f1))
+    assert "hello fs" in txt
+    msg = await be.aedit(str(f1), "fs", "filesystem", replace_all=False)
+    assert isinstance(msg, EditResult) and msg.error is None and msg.occurrences == 1
+    msg2 = await be.awrite(str(root / "new.txt"), "new content")
+    assert isinstance(msg2, WriteResult) and msg2.error is None and msg2.path.endswith("new.txt")
+
+    # agrep_raw
+    matches = await be.agrep_raw("hello", path=str(root))
+    assert isinstance(matches, list) and any(m["path"].endswith("a.txt") for m in matches)
+
+    # aglob_info
+    g = await be.aglob_info("*.py", path=str(root))
+    assert any(i["path"] == str(f2) for i in g)
+
+
+async def test_filesystem_backend_async_virtual_mode(tmp_path: Path):
+    """Test async operations in virtual mode."""
+    root = tmp_path
+    f1 = root / "a.txt"
+    f2 = root / "dir" / "b.md"
+    write_file(f1, "hello virtual")
+    write_file(f2, "content")
+
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+
+    # als_info from virtual root - should only list files in root, not subdirectories
+    infos = await be.als_info("/")
+    paths = {i["path"] for i in infos}
+    assert "/a.txt" in paths  # File in root should be listed
+    assert "/dir/b.md" not in paths  # File in subdirectory should NOT be listed
+    assert "/dir/" in paths  # Directory should be listed
+
+    # aread and aedit via virtual path
+    txt = await be.aread("/a.txt")
+    assert "hello virtual" in txt
+    msg = await be.aedit("/a.txt", "virtual", "virt", replace_all=False)
+    assert isinstance(msg, EditResult) and msg.error is None and msg.occurrences == 1
+
+    # awrite new file via virtual path
+    msg2 = await be.awrite("/new.txt", "x")
+    assert isinstance(msg2, WriteResult) and msg2.error is None
+    assert (root / "new.txt").exists()
+
+    # agrep_raw limited to path
+    matches = await be.agrep_raw("virt", path="/")
+    assert isinstance(matches, list) and any(m["path"] == "/a.txt" for m in matches)
+
+    # aglob_info
+    g = await be.aglob_info("**/*.md", path="/")
+    assert any(i["path"] == "/dir/b.md" for i in g)
+
+    # invalid regex returns error string
+    err = await be.agrep_raw("[", path="/")
+    assert isinstance(err, str)
+
+    # path traversal blocked
+    with pytest.raises(ValueError):
+        await be.aread("/../a.txt")
+
+
+async def test_filesystem_backend_als_nested_directories(tmp_path: Path):
+    """Test async ls with nested directories."""
+    root = tmp_path
+
+    files = {
+        root / "config.json": "config",
+        root / "src" / "main.py": "code",
+        root / "src" / "utils" / "helper.py": "utils code",
+        root / "src" / "utils" / "common.py": "common utils",
+        root / "docs" / "readme.md": "documentation",
+        root / "docs" / "api" / "reference.md": "api docs",
+    }
+
+    for path, content in files.items():
+        write_file(path, content)
+
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+
+    root_listing = await be.als_info("/")
+    root_paths = [fi["path"] for fi in root_listing]
+    assert "/config.json" in root_paths
+    assert "/src/" in root_paths
+    assert "/docs/" in root_paths
+    assert "/src/main.py" not in root_paths
+    assert "/src/utils/helper.py" not in root_paths
+
+    src_listing = await be.als_info("/src/")
+    src_paths = [fi["path"] for fi in src_listing]
+    assert "/src/main.py" in src_paths
+    assert "/src/utils/" in src_paths
+    assert "/src/utils/helper.py" not in src_paths
+
+    utils_listing = await be.als_info("/src/utils/")
+    utils_paths = [fi["path"] for fi in utils_listing]
+    assert "/src/utils/helper.py" in utils_paths
+    assert "/src/utils/common.py" in utils_paths
+    assert len(utils_paths) == 2
+
+    empty_listing = await be.als_info("/nonexistent/")
+    assert empty_listing == []
+
+
+async def test_filesystem_backend_als_normal_mode_nested(tmp_path: Path):
+    """Test async ls_info with nested directories in normal (non-virtual) mode."""
+    root = tmp_path
+
+    files = {
+        root / "file1.txt": "content1",
+        root / "subdir" / "file2.txt": "content2",
+        root / "subdir" / "nested" / "file3.txt": "content3",
+    }
+
+    for path, content in files.items():
+        write_file(path, content)
+
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=False)
+
+    root_listing = await be.als_info(str(root))
+    root_paths = [fi["path"] for fi in root_listing]
+
+    assert str(root / "file1.txt") in root_paths
+    assert str(root / "subdir") + "/" in root_paths
+    assert str(root / "subdir" / "file2.txt") not in root_paths
+
+    subdir_listing = await be.als_info(str(root / "subdir"))
+    subdir_paths = [fi["path"] for fi in subdir_listing]
+    assert str(root / "subdir" / "file2.txt") in subdir_paths
+    assert str(root / "subdir" / "nested") + "/" in subdir_paths
+    assert str(root / "subdir" / "nested" / "file3.txt") not in subdir_paths
+
+
+async def test_filesystem_backend_als_trailing_slash(tmp_path: Path):
+    """Test async ls_info edge cases with trailing slashes."""
+    root = tmp_path
+
+    files = {
+        root / "file.txt": "content",
+        root / "dir" / "nested.txt": "nested",
+    }
+
+    for path, content in files.items():
+        write_file(path, content)
+
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+
+    listing_with_slash = await be.als_info("/")
+    assert len(listing_with_slash) > 0
+
+    listing = await be.als_info("/")
+    paths = [fi["path"] for fi in listing]
+    assert paths == sorted(paths)
+
+    listing1 = await be.als_info("/dir/")
+    listing2 = await be.als_info("/dir")
+    assert len(listing1) == len(listing2)
+    assert [fi["path"] for fi in listing1] == [fi["path"] for fi in listing2]
+
+    empty = await be.als_info("/nonexistent/")
+    assert empty == []
+
+
+async def test_filesystem_backend_intercept_large_tool_result_async(tmp_path: Path):
+    """Test that FilesystemBackend properly handles large tool result interception in async context."""
+    from langchain.tools import ToolRuntime
+    from langchain_core.messages import ToolMessage
+
+    from deepagents.middleware.filesystem import FilesystemMiddleware
+
+    root = tmp_path
+    rt = ToolRuntime(
+        state={"messages": [], "files": {}},
+        context=None,
+        tool_call_id="test_fs",
+        store=None,
+        stream_writer=lambda _: None,
+        config={},
+    )
+
+    middleware = FilesystemMiddleware(backend=lambda r: FilesystemBackend(root_dir=str(root), virtual_mode=True), tool_token_limit_before_evict=1000)
+
+    large_content = "f" * 5000
+    tool_message = ToolMessage(content=large_content, tool_call_id="test_fs_123")
+    result = middleware._intercept_large_tool_result(tool_message, rt)
+
+    assert isinstance(result, ToolMessage)
+    assert "Tool result too large" in result.content
+    assert "/large_tool_results/test_fs_123" in result.content
+    saved_file = root / "large_tool_results" / "test_fs_123"
+    assert saved_file.exists()
+    assert saved_file.read_text() == large_content
+
+
+async def test_filesystem_aupload_single_file(tmp_path: Path):
+    """Test async uploading a single binary file."""
+    root = tmp_path
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+
+    test_path = "/test_upload.bin"
+    test_content = b"Hello, Binary World!"
+
+    responses = await be.aupload_files([(test_path, test_content)])
+
+    assert len(responses) == 1
+    assert responses[0].path == test_path
+    assert responses[0].error is None
+
+    # Verify file exists and content matches
+    uploaded_file = root / "test_upload.bin"
+    assert uploaded_file.exists()
+    assert uploaded_file.read_bytes() == test_content
+
+
+async def test_filesystem_aupload_multiple_files(tmp_path: Path):
+    """Test async uploading multiple files in one call."""
+    root = tmp_path
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+
+    files = [
+        ("/file1.bin", b"Content 1"),
+        ("/file2.bin", b"Content 2"),
+        ("/subdir/file3.bin", b"Content 3"),
+    ]
+
+    responses = await be.aupload_files(files)
+
+    assert len(responses) == 3
+    for i, (path, content) in enumerate(files):
+        assert responses[i].path == path
+        assert responses[i].error is None
+
+    # Verify all files created
+    assert (root / "file1.bin").read_bytes() == b"Content 1"
+    assert (root / "file2.bin").read_bytes() == b"Content 2"
+    assert (root / "subdir" / "file3.bin").read_bytes() == b"Content 3"
+
+
+async def test_filesystem_adownload_single_file(tmp_path: Path):
+    """Test async downloading a single file."""
+    root = tmp_path
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+
+    # Create a file manually
+    test_file = root / "test_download.bin"
+    test_content = b"Download me!"
+    test_file.write_bytes(test_content)
+
+    responses = await be.adownload_files(["/test_download.bin"])
+
+    assert len(responses) == 1
+    assert responses[0].path == "/test_download.bin"
+    assert responses[0].content == test_content
+    assert responses[0].error is None
+
+
+async def test_filesystem_adownload_multiple_files(tmp_path: Path):
+    """Test async downloading multiple files in one call."""
+    root = tmp_path
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+
+    # Create several files
+    files = {
+        root / "file1.txt": b"File 1",
+        root / "file2.txt": b"File 2",
+        root / "subdir" / "file3.txt": b"File 3",
+    }
+
+    for path, content in files.items():
+        path.parent.mkdir(parents=True, exist_ok=True)
+        path.write_bytes(content)
+
+    paths = ["/file1.txt", "/file2.txt", "/subdir/file3.txt"]
+    responses = await be.adownload_files(paths)
+
+    assert len(responses) == 3
+    assert responses[0].path == "/file1.txt"
+    assert responses[0].content == b"File 1"
+    assert responses[0].error is None
+
+    assert responses[1].path == "/file2.txt"
+    assert responses[1].content == b"File 2"
+    assert responses[1].error is None
+
+    assert responses[2].path == "/subdir/file3.txt"
+    assert responses[2].content == b"File 3"
+    assert responses[2].error is None
+
+
+async def test_filesystem_aupload_download_roundtrip(tmp_path: Path):
+    """Test async upload followed by download for data integrity."""
+    root = tmp_path
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+
+    # Test with binary content including special bytes
+    test_path = "/roundtrip.bin"
+    test_content = bytes(range(256))  # All possible byte values
+
+    # Upload
+    upload_responses = await be.aupload_files([(test_path, test_content)])
+    assert upload_responses[0].error is None
+
+    # Download
+    download_responses = await be.adownload_files([test_path])
+    assert download_responses[0].error is None
+    assert download_responses[0].content == test_content
+
+
+async def test_filesystem_adownload_errors(tmp_path: Path):
+    """Test async download error handling."""
+    root = tmp_path
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+
+    # Test file_not_found
+    responses = await be.adownload_files(["/nonexistent.txt"])
+    assert len(responses) == 1
+    assert responses[0].path == "/nonexistent.txt"
+    assert responses[0].content is None
+    assert responses[0].error == "file_not_found"
+
+    # Test is_directory
+    (root / "testdir").mkdir()
+    responses = await be.adownload_files(["/testdir"])
+    assert responses[0].error == "is_directory"
+    assert responses[0].content is None
+
+    # Test invalid_path (path traversal)
+    responses = await be.adownload_files(["/../etc/passwd"])
+    assert len(responses) == 1
+    assert responses[0].error == "invalid_path"
+    assert responses[0].content is None
+
+
+async def test_filesystem_aupload_errors(tmp_path: Path):
+    """Test async upload error handling."""
+    root = tmp_path
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+
+    # Test invalid_path (path traversal)
+    responses = await be.aupload_files([("/../bad/path.txt", b"content")])
+    assert len(responses) == 1
+    assert responses[0].error == "invalid_path"
+
+
+async def test_filesystem_partial_success_aupload(tmp_path: Path):
+    """Test partial success in async batch upload."""
+    root = tmp_path
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+
+    files = [
+        ("/valid1.txt", b"Valid content 1"),
+        ("/../invalid.txt", b"Invalid path"),  # Path traversal
+        ("/valid2.txt", b"Valid content 2"),
+    ]
+
+    responses = await be.aupload_files(files)
+
+    assert len(responses) == 3
+    # First file should succeed
+    assert responses[0].error is None
+    assert (root / "valid1.txt").exists()
+
+    # Second file should fail
+    assert responses[1].error == "invalid_path"
+
+    # Third file should still succeed (partial success)
+    assert responses[2].error is None
+    assert (root / "valid2.txt").exists()
+
+
+async def test_filesystem_partial_success_adownload(tmp_path: Path):
+    """Test partial success in async batch download."""
+    root = tmp_path
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+
+    # Create one valid file
+    valid_file = root / "exists.txt"
+    valid_content = b"I exist!"
+    valid_file.write_bytes(valid_content)
+
+    paths = ["/exists.txt", "/doesnotexist.txt", "/../invalid"]
+    responses = await be.adownload_files(paths)
+
+    assert len(responses) == 3
+
+    # First should succeed
+    assert responses[0].error is None
+    assert responses[0].content == valid_content
+
+    # Second should fail with file_not_found
+    assert responses[1].error == "file_not_found"
+    assert responses[1].content is None
+
+    # Third should fail with invalid_path
+    assert responses[2].error == "invalid_path"
+    assert responses[2].content is None
+
+
+async def test_filesystem_aedit_replace_all(tmp_path: Path):
+    """Test async edit with replace_all option."""
+    root = tmp_path
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+
+    # Create file with multiple occurrences
+    test_file = root / "test.txt"
+    test_file.write_text("foo bar foo baz")
+
+    # Edit with replace_all=False when string appears multiple times should error
+    res1 = await be.aedit("/test.txt", "foo", "qux", replace_all=False)
+    assert res1.error is not None
+    assert "appears 2 times" in res1.error
+
+    # Edit with replace_all=True - should replace all occurrences
+    res2 = await be.aedit("/test.txt", "foo", "qux", replace_all=True)
+    assert res2.error is None
+    assert res2.occurrences == 2
+    content = await be.aread("/test.txt")
+    assert "qux bar qux baz" in content
+
+    # Now test replace_all=False with unique string (should succeed)
+    res3 = await be.aedit("/test.txt", "bar", "xyz", replace_all=False)
+    assert res3.error is None
+    assert res3.occurrences == 1
+    content2 = await be.aread("/test.txt")
+    assert "qux xyz qux baz" in content2
+
+
+async def test_filesystem_aread_with_offset_and_limit(tmp_path: Path):
+    """Test async read with offset and limit."""
+    root = tmp_path
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+
+    # Create file with multiple lines
+    test_file = root / "multi.txt"
+    lines = "\n".join([f"Line {i}" for i in range(1, 11)])
+    test_file.write_text(lines)
+
+    # Read with offset and limit
+    content = await be.aread("/multi.txt", offset=2, limit=3)
+    assert "Line 3" in content
+    assert "Line 4" in content
+    assert "Line 5" in content
+    assert "Line 1" not in content
+    assert "Line 6" not in content
+
+
+async def test_filesystem_agrep_with_glob(tmp_path: Path):
+    """Test async grep with glob filter."""
+    root = tmp_path
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+
+    # Create multiple files
+    (root / "test.py").write_text("import os")
+    (root / "test.txt").write_text("import nothing")
+    (root / "main.py").write_text("import sys")
+
+    # agrep_raw with glob filter
+    matches = await be.agrep_raw("import", path="/", glob="*.py")
+    assert isinstance(matches, list)
+    py_files = [m["path"] for m in matches]
+    assert any("test.py" in p for p in py_files)
+    assert any("main.py" in p for p in py_files)
+    assert not any("test.txt" in p for p in py_files)
+
+
+async def test_filesystem_aglob_recursive(tmp_path: Path):
+    """Test async glob with recursive patterns."""
+    root = tmp_path
+    be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
+
+    # Create files in nested directories
+    files = {
+        root / "src" / "main.py": "code",
+        root / "src" / "utils" / "helper.py": "utils",
+        root / "tests" / "test_main.py": "tests",
+        root / "readme.txt": "docs",
+    }
+
+    for path, content in files.items():
+        write_file(path, content)
+
+    # Recursive glob for all .py files
+    infos = await be.aglob_info("**/*.py", path="/")
+    py_files = [i["path"] for i in infos]
+    assert any("main.py" in p for p in py_files)
+    assert any("helper.py" in p for p in py_files)
+    assert any("test_main.py" in p for p in py_files)
+    assert not any("readme.txt" in p for p in py_files)
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`"""DeepAgents CLI를 위한 샌드박스 연동."""`
				`@@ -0,0 +1 @@`
				`# This file makes the integration_tests directory a Python package for relative imports`
				`@@ -0,0 +1 @@`
				`# This file makes the tests directory a Python package for relative imports`