project init

This commit is contained in:
HyunjunJeon
2025-12-31 11:32:36 +09:00
commit 9cb01f4abe
212 changed files with 64609 additions and 0 deletions

215
deepagents_sourcecode/.gitignore vendored Normal file
View File

@@ -0,0 +1,215 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[codz]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py.cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# UV
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
#uv.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
#poetry.toml
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
#pdm.lock
#pdm.toml
.pdm-python
.pdm-build/
# pixi
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
#pixi.lock
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
# in the .venv directory. It is recommended not to include this directory in version control.
.pixi
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.envrc
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
# Abstra
# Abstra is an AI-powered process automation framework.
# Ignore directories containing user credentials, local state, and settings.
# Learn more at https://abstra.io/docs
.abstra/
# Visual Studio Code
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
# and can be added to the global gitignore or merged into this file. However, if you prefer,
# you could uncomment the following to ignore the entire vscode folder
# .vscode/
# Ruff stuff:
.ruff_cache/
# PyPI configuration file
.pypirc
# Cursor
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
# refer to https://docs.cursor.com/context/ignore-files
.cursorignore
.cursorindexingignore
# Marimo
marimo/_static/
marimo/_lsp/
__marimo__/
# LangGraph
.langgraph_api
#claude
.claude
.idea

View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2025 Harrison Chase
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -0,0 +1,320 @@
# 🚀🧠 Deep Agents
Agents can increasingly tackle long-horizon tasks, [with agent task length doubling every 7 months](https://metr.org/blog/2025-03-19-measuring-ai-ability-to-complete-long-tasks/)! But, long horizon tasks often span dozens of tool calls, which present cost and reliability challenges. Popular agents such as [Claude Code](https://code.claude.com/docs) and [Manus](https://www.youtube.com/watch?v=6_BcCthVvb8) use some common principles to address these challenges, including **planning** (prior to task execution), **computer access** (giving the agent access to a shell and a filesystem), and **sub-agent delegation** (isolated task execution). `deepagents` is a simple agent harness that implements these tools, but is open source and easily extendable with your own custom tools and instructions.
<img src=".github/images/deepagents_banner.png" alt="deep agent" width="100%"/>
## 📚 Resources
- **[Documentation](https://docs.langchain.com/oss/python/deepagents/overview)** - Full overview and API reference
- **[Korean Documentation](docs/DeepAgents_Documentation_KR.md)** - DeepAgents Technical Documentation (KR)
- **[Quickstarts Repo](https://github.com/langchain-ai/deepagents-quickstarts)** - Examples and use-cases
- **[CLI](libs/deepagents-cli/)** - Interactive command-line interface with skills, memory, and HITL workflows
## 🚀 Quickstart
You can give `deepagents` custom tools. Below, we'll optionally provide the `tavily` tool to search the web. This tool will be added to the `deepagents` build-in tools (see below).
```bash
pip install deepagents tavily-python
```
Set `TAVILY_API_KEY` in your environment ([get one here](https://www.tavily.com/)):
```python
import os
from deepagents import create_deep_agent
from tavily import TavilyClient
tavily_client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])
def internet_search(query: str, max_results: int = 5):
"""Run a web search"""
return tavily_client.search(query, max_results=max_results)
agent = create_deep_agent(
tools=[internet_search],
system_prompt="Conduct research and write a polished report.",
)
result = agent.invoke({"messages": [{"role": "user", "content": "What is LangGraph?"}]})
```
The agent created with `create_deep_agent` is compiled [LangGraph StateGraph](https://docs.langchain.com/oss/python/langgraph/overview), so it can be used with streaming, human-in-the-loop, memory, or Studio just like any LangGraph agent. See our [quickstarts repo](https://github.com/langchain-ai/deepagents-quickstarts) for more examples.
## Customizing Deep Agents
There are several parameters you can pass to `create_deep_agent`.
### `model`
By default, `deepagents` uses `"claude-sonnet-4-5-20250929"`. You can customize this by passing any [LangChain model object](https://python.langchain.com/docs/integrations/chat/).
```python
from langchain.chat_models import init_chat_model
from deepagents import create_deep_agent
model = init_chat_model("openai:gpt-4o")
agent = create_deep_agent(
model=model,
)
```
### `system_prompt`
You can provide a `system_prompt` parameter to `create_deep_agent()`. This custom prompt is **appended to** default instructions that are automatically injected by middleware.
When writing a custom system prompt, you should:
- ✅ Define domain-specific workflows (e.g., research methodology, data analysis steps)
- ✅ Provide concrete examples for your use case
- ✅ Add specialized guidance (e.g., "batch similar research tasks into a single TODO")
- ✅ Define stopping criteria and resource limits
- ✅ Explain how tools work together in your workflow
**Don't:**
- ❌ Re-explain what standard tools do (already covered by middleware)
- ❌ Duplicate middleware instructions about tool usage
- ❌ Contradict default instructions (work with them, not against them)
```python
from deepagents import create_deep_agent
research_instructions = """your custom system prompt"""
agent = create_deep_agent(
system_prompt=research_instructions,
)
```
See our [quickstarts repo](https://github.com/langchain-ai/deepagents-quickstarts) for more examples.
### `tools`
Provide custom tools to your agent (in addition to [Built-in Tools](#built-in-tools)):
```python
from deepagents import create_deep_agent
def internet_search(query: str) -> str:
"""Run a web search"""
return tavily_client.search(query)
agent = create_deep_agent(tools=[internet_search])
```
You can also connect MCP tools via [langchain-mcp-adapters](https://github.com/langchain-ai/langchain-mcp-adapters):
```python
from langchain_mcp_adapters.client import MultiServerMCPClient
from deepagents import create_deep_agent
async def main():
mcp_client = MultiServerMCPClient(...)
mcp_tools = await mcp_client.get_tools()
agent = create_deep_agent(tools=mcp_tools)
async for chunk in agent.astream({"messages": [{"role": "user", "content": "..."}]}):
chunk["messages"][-1].pretty_print()
```
### `middleware`
Deep agents use [middleware](https://docs.langchain.com/oss/python/langchain/middleware) for extensibility (see [Built-in Tools](#built-in-tools) for defaults). Add custom middleware to inject tools, modify prompts, or hook into the agent lifecycle:
```python
from langchain_core.tools import tool
from deepagents import create_deep_agent
from langchain.agents.middleware import AgentMiddleware
@tool
def get_weather(city: str) -> str:
"""Get the weather in a city."""
return f"The weather in {city} is sunny."
class WeatherMiddleware(AgentMiddleware):
tools = [get_weather]
agent = create_deep_agent(middleware=[WeatherMiddleware()])
```
### `subagents`
The main agent can delegate work to sub-agents via the `task` tool (see [Built-in Tools](#built-in-tools)). You can supply custom sub-agents for context isolation and custom instructions:
```python
from deepagents import create_deep_agent
research_subagent = {
"name": "research-agent",
"description": "Used to research in-depth questions",
"system_prompt": "You are an expert researcher",
"tools": [internet_search],
"model": "openai:gpt-4o", # Optional, defaults to main agent model
}
agent = create_deep_agent(subagents=[research_subagent])
```
For complex cases, pass a pre-built LangGraph graph:
```python
from deepagents import CompiledSubAgent, create_deep_agent
custom_graph = create_agent(model=..., tools=..., system_prompt=...)
agent = create_deep_agent(
subagents=[CompiledSubAgent(
name="data-analyzer",
description="Specialized agent for data analysis",
runnable=custom_graph
)]
)
```
See the [subagents documentation](https://docs.langchain.com/oss/python/deepagents/subagents) for more details.
### `interrupt_on`
Some tools may be sensitive and require human approval before execution. Deepagents supports human-in-the-loop workflows through LangGraphs interrupt capabilities. You can configure which tools require approval using a checkpointer.
These tool configs are passed to our prebuilt [HITL middleware](https://docs.langchain.com/oss/python/langchain/middleware#human-in-the-loop) so that the agent pauses execution and waits for feedback from the user before executing configured tools.
```python
from langchain_core.tools import tool
from deepagents import create_deep_agent
@tool
def get_weather(city: str) -> str:
"""Get the weather in a city."""
return f"The weather in {city} is sunny."
agent = create_deep_agent(
model="anthropic:claude-sonnet-4-20250514",
tools=[get_weather],
interrupt_on={
"get_weather": {
"allowed_decisions": ["approve", "edit", "reject"]
},
}
)
```
See the [human-in-the-loop documentation](https://docs.langchain.com/oss/python/deepagents/human-in-the-loop) for more details.
### `backend`
Deep agents use pluggable backends to control how filesystem operations work. By default, files are stored in the agent's ephemeral state. You can configure different backends for local disk access, persistent cross-conversation storage, or hybrid routing.
```python
from deepagents import create_deep_agent
from deepagents.backends import FilesystemBackend
agent = create_deep_agent(
backend=FilesystemBackend(root_dir="/path/to/project"),
)
```
Available backends include:
- **StateBackend** (default): Ephemeral files stored in agent state
- **FilesystemBackend**: Real disk operations under a root directory
- **StoreBackend**: Persistent storage using LangGraph Store
- **CompositeBackend**: Route different paths to different backends
See the [backends documentation](https://docs.langchain.com/oss/python/deepagents/backends) for more details.
### Long-term Memory
Deep agents can maintain persistent memory across conversations using a `CompositeBackend` that routes specific paths to durable storage.
This enables hybrid memory where working files remain ephemeral while important data (like user preferences or knowledge bases) persists across threads.
```python
from deepagents import create_deep_agent
from deepagents.backends import CompositeBackend, StateBackend, StoreBackend
from langgraph.store.memory import InMemoryStore
agent = create_deep_agent(
backend=CompositeBackend(
default=StateBackend(),
routes={"/memories/": StoreBackend(store=InMemoryStore())},
),
)
```
Files under `/memories/` will persist across all conversations, while other paths remain temporary. Use cases include:
- Preserving user preferences across sessions
- Building knowledge bases from multiple conversations
- Self-improving instructions based on feedback
- Maintaining research progress across sessions
See the [long-term memory documentation](https://docs.langchain.com/oss/python/deepagents/long-term-memory) for more details.
## Built-in Tools
<img src=".github/images/deepagents_tools.png" alt="deep agent" width="600"/>
Every deep agent created with `create_deep_agent` comes with a standard set of tools:
| Tool Name | Description | Provided By |
|-----------|-------------|-------------|
| `write_todos` | Create and manage structured task lists for tracking progress through complex workflows | TodoListMiddleware |
| `read_todos` | Read the current todo list state | TodoListMiddleware |
| `ls` | List all files in a directory (requires absolute path) | FilesystemMiddleware |
| `read_file` | Read content from a file with optional pagination (offset/limit parameters) | FilesystemMiddleware |
| `write_file` | Create a new file or completely overwrite an existing file | FilesystemMiddleware |
| `edit_file` | Perform exact string replacements in files | FilesystemMiddleware |
| `glob` | Find files matching a pattern (e.g., `**/*.py`) | FilesystemMiddleware |
| `grep` | Search for text patterns within files | FilesystemMiddleware |
| `execute`* | Run shell commands in a sandboxed environment | FilesystemMiddleware |
| `task` | Delegate tasks to specialized sub-agents with isolated context windows | SubAgentMiddleware |
The `execute` tool is only available if the backend implements `SandboxBackendProtocol`. By default, it uses the in-memory state backend which does not support command execution. As shown, these tools (along with other capabilities) are provided by default middleware:
See the [agent harness documentation](https://docs.langchain.com/oss/python/deepagents/harness) for more details on built-in tools and capabilities.
## Built-in Middleware
`deepagents` uses middleware under the hood. Here is the list of the middleware used.
| Middleware | Purpose |
|------------|---------|
| **TodoListMiddleware** | Task planning and progress tracking |
| **FilesystemMiddleware** | File operations and context offloading (auto-saves large results) |
| **SubAgentMiddleware** | Delegate tasks to isolated sub-agents |
| **SummarizationMiddleware** | Auto-summarizes when context exceeds 170k tokens |
| **AnthropicPromptCachingMiddleware** | Caches system prompts to reduce costs (Anthropic only) |
| **PatchToolCallsMiddleware** | Fixes dangling tool calls from interruptions |
| **HumanInTheLoopMiddleware** | Pauses execution for human approval (requires `interrupt_on` config) |
## Built-in prompts
The middleware automatically adds instructions about the standard tools. Your custom instructions should **complement, not duplicate** these defaults:
#### From [TodoListMiddleware](https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/agents/middleware/todo.py)
- Explains when to use `write_todos` and `read_todos`
- Guidance on marking tasks completed
- Best practices for todo list management
- When NOT to use todos (simple tasks)
#### From [FilesystemMiddleware](libs/deepagents/deepagents/middleware/filesystem.py)
- Lists all filesystem tools (`ls`, `read_file`, `write_file`, `edit_file`, `glob`, `grep`, `execute`*)
- Explains that file paths must start with `/`
- Describes each tool's purpose and parameters
- Notes about context offloading for large tool results
#### From [SubAgentMiddleware](libs/deepagents/deepagents/middleware/subagents.py)
- Explains the `task()` tool for delegating to sub-agents
- When to use sub-agents vs when NOT to use them
- Guidance on parallel execution
- Subagent lifecycle (spawn → run → return → reconcile)
## Security Considerations
### Trust Model
Deepagents follows a "trust the LLM" model similar to Claude Code. The agent can perform any action the underlying tools allow. Security boundaries should be enforced at the tool/sandbox level, not by expecting the LLM to self-police.

View File

@@ -0,0 +1,56 @@
.PHONY: all lint format test help
# Default target executed when no arguments are given to make.
all: help
######################
# TESTING AND COVERAGE
######################
# Define a variable for the test file path.
TEST_FILE ?= tests/
test:
uv run pytest --disable-socket --allow-unix-socket $(TEST_FILE) --timeout 10
test_watch:
uv run ptw . -- $(TEST_FILE)
toad:
uv run toad acp "deepacp"
######################
# LINTING AND FORMATTING
######################
# Define a variable for Python and notebook files.
lint format: PYTHON_FILES=deepagents_acp/ tests/
lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=. --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')
lint lint_diff:
[ "$(PYTHON_FILES)" = "" ] || uv run ruff format $(PYTHON_FILES) --diff
[ "$(PYTHON_FILES)" = "" ] || uv run ruff check $(PYTHON_FILES) --diff
# [ "$(PYTHON_FILES)" = "" ] || uv run mypy $(PYTHON_FILES)
format format_diff:
[ "$(PYTHON_FILES)" = "" ] || uv run ruff format $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || uv run ruff check --fix $(PYTHON_FILES)
######################
# HELP
######################
help:
@echo '===================='
@echo '-- LINTING --'
@echo 'format - run code formatters'
@echo 'lint - run linters'
@echo '-- TESTS --'
@echo 'test - run unit tests'
@echo 'test TEST_FILE=<test_file> - run all tests in file'
@echo '-- DOCUMENTATION tasks are from the top-level Makefile --'

View File

@@ -0,0 +1,3 @@
# ACP
Work in progress support for Agent Client Protocol

View File

@@ -0,0 +1,655 @@
"""DeepAgents ACP server implementation."""
from __future__ import annotations
import asyncio
import uuid
from typing import Any, Literal
from acp import (
Agent,
AgentSideConnection,
PROTOCOL_VERSION,
stdio_streams,
)
from acp.schema import (
AgentMessageChunk,
InitializeRequest,
InitializeResponse,
NewSessionRequest,
NewSessionResponse,
PromptRequest,
PromptResponse,
SessionNotification,
TextContentBlock,
Implementation,
AgentThoughtChunk,
ToolCallProgress,
ContentToolCallContent,
LoadSessionResponse,
SetSessionModeResponse,
SetSessionModelResponse,
CancelNotification,
LoadSessionRequest,
SetSessionModeRequest,
SetSessionModelRequest,
AgentPlanUpdate,
PlanEntry,
PermissionOption,
RequestPermissionRequest,
AllowedOutcome,
DeniedOutcome,
ToolCall as ACPToolCall,
)
from deepagents import create_deep_agent
from langchain_anthropic import ChatAnthropic
from langchain_core.messages import AIMessage, AIMessageChunk, ToolMessage
from langchain_core.messages.content import ToolCall
from langchain_core.tools import tool
from langgraph.checkpoint.memory import InMemorySaver
from langgraph.graph.state import CompiledStateGraph
from langgraph.types import Command, Interrupt
class DeepagentsACP(Agent):
"""ACP Agent implementation wrapping deepagents."""
def __init__(
self,
connection: AgentSideConnection,
agent_graph: CompiledStateGraph,
) -> None:
"""Initialize the DeepAgents agent.
Args:
connection: The ACP connection for communicating with the client
agent_graph: A compiled LangGraph StateGraph (output of create_deep_agent)
"""
self._connection = connection
self._agent_graph = agent_graph
self._sessions: dict[str, dict[str, Any]] = {}
# Track tool calls by ID for matching with ToolMessages
# Maps tool_call_id -> ToolCall TypedDict
self._tool_calls: dict[str, ToolCall] = {}
async def initialize(
self,
params: InitializeRequest,
) -> InitializeResponse:
"""Initialize the agent and return capabilities."""
return InitializeResponse(
protocolVersion=PROTOCOL_VERSION,
agentInfo=Implementation(
name="DeepAgents ACP Server",
version="0.1.0",
title="DeepAgents ACP Server",
),
)
async def newSession(
self,
params: NewSessionRequest,
) -> NewSessionResponse:
"""Create a new session with a deepagents instance."""
session_id = str(uuid.uuid4())
# Store session state with the shared agent graph
self._sessions[session_id] = {
"agent": self._agent_graph,
"thread_id": str(uuid.uuid4()),
}
return NewSessionResponse(sessionId=session_id)
async def _handle_ai_message_chunk(
self,
params: PromptRequest,
message: AIMessageChunk,
) -> None:
"""Handle an AIMessageChunk and send appropriate notifications.
Args:
params: The prompt request parameters
message: An AIMessageChunk from the streaming response
Note:
According to LangChain's content block types, message.content_blocks
returns a list of ContentBlock unions. Each block is a TypedDict with
a "type" field that discriminates the block type:
- TextContentBlock: type="text", has "text" field
- ReasoningContentBlock: type="reasoning", has "reasoning" field
- ToolCallChunk: type="tool_call_chunk"
- And many others (image, audio, video, etc.)
"""
for block in message.content_blocks:
# All content blocks have a "type" field for discrimination
block_type = block.get("type")
if block_type == "text":
# TextContentBlock has a required "text" field
text = block.get("text", "")
if not text: # Only yield non-empty text
continue
await self._connection.sessionUpdate(
SessionNotification(
update=AgentMessageChunk(
content=TextContentBlock(text=text, type="text"),
sessionUpdate="agent_message_chunk",
),
sessionId=params.sessionId,
)
)
elif block_type == "reasoning":
# ReasoningContentBlock has a "reasoning" field (NotRequired)
reasoning = block.get("reasoning", "")
if not reasoning:
continue
await self._connection.sessionUpdate(
SessionNotification(
update=AgentThoughtChunk(
content=TextContentBlock(text=reasoning, type="text"),
sessionUpdate="agent_thought_chunk",
),
sessionId=params.sessionId,
)
)
async def _handle_completed_tool_calls(
self,
params: PromptRequest,
message: AIMessage,
) -> None:
"""Handle completed tool calls from an AIMessage and send notifications.
Args:
params: The prompt request parameters
message: An AIMessage containing tool_calls
Note:
According to LangChain's AIMessage type:
- message.tool_calls: list[ToolCall] where ToolCall is a TypedDict with:
- name: str (required)
- args: dict[str, Any] (required)
- id: str | None (required field, but can be None)
- type: Literal["tool_call"] (optional, NotRequired)
"""
# Use direct attribute access - tool_calls is a defined field on AIMessage
if not message.tool_calls:
return
for tool_call in message.tool_calls:
# Access TypedDict fields directly (they're required fields)
tool_call_id = tool_call["id"] # str | None
tool_name = tool_call["name"] # str
tool_args = tool_call["args"] # dict[str, Any]
# Skip tool calls without an ID (shouldn't happen in practice)
if tool_call_id is None:
continue
# Skip todo tool calls as they're handled separately
if tool_name == "todo":
raise NotImplementedError("TODO tool call handling not implemented yet")
# Send tool call progress update showing the tool is running
await self._connection.sessionUpdate(
SessionNotification(
update=ToolCallProgress(
sessionUpdate="tool_call_update",
toolCallId=tool_call_id,
title=tool_name,
rawInput=tool_args,
status="pending",
),
sessionId=params.sessionId,
)
)
# Store the tool call for later matching with ToolMessage
self._tool_calls[tool_call_id] = tool_call
async def _handle_tool_message(
self,
params: PromptRequest,
tool_call: ToolCall,
message: ToolMessage,
) -> None:
"""Handle a ToolMessage and send appropriate notifications.
Args:
params: The prompt request parameters
tool_call: The original ToolCall that this message is responding to
message: A ToolMessage containing the tool execution result
Note:
According to LangChain's ToolMessage type (inherits from BaseMessage):
- message.content: str | list[str | dict] (from BaseMessage)
- message.tool_call_id: str (specific to ToolMessage)
- message.status: str | None (e.g., "error" for failed tool calls)
"""
# Determine status based on message status or content
status: Literal["completed", "failed"] = "completed"
if hasattr(message, "status") and message.status == "error":
status = "failed"
# Build content blocks if message has content
content_blocks = []
for content_block in message.content_blocks:
if content_block.get("type") == "text":
text = content_block.get("text", "")
if text:
content_blocks.append(
ContentToolCallContent(
type="content",
content=TextContentBlock(text=text, type="text"),
)
)
# Send tool call progress update with the result
await self._connection.sessionUpdate(
SessionNotification(
update=ToolCallProgress(
sessionUpdate="tool_call_update",
toolCallId=message.tool_call_id,
title=tool_call["name"],
content=content_blocks,
rawOutput=message.content,
status=status,
),
sessionId=params.sessionId,
)
)
async def _handle_todo_update(
self,
params: PromptRequest,
todos: list[dict[str, Any]],
) -> None:
"""Handle todo list updates from the tools node.
Args:
params: The prompt request parameters
todos: List of todo dictionaries with 'content' and 'status' fields
Note:
Todos come from the deepagents graph's write_todos tool and have the structure:
[{'content': 'Task description', 'status': 'pending'|'in_progress'|'completed'}, ...]
"""
# Convert todos to PlanEntry objects
entries = []
for todo in todos:
# Extract fields from todo dict
content = todo.get("content", "")
status = todo.get("status", "pending")
# Validate and cast status to PlanEntryStatus
if status not in ("pending", "in_progress", "completed"):
status = "pending"
# Create PlanEntry with default priority of "medium"
entry = PlanEntry(
content=content,
status=status, # type: ignore
priority="medium",
)
entries.append(entry)
# Send plan update notification
await self._connection.sessionUpdate(
SessionNotification(
update=AgentPlanUpdate(
sessionUpdate="plan",
entries=entries,
),
sessionId=params.sessionId,
)
)
async def _handle_interrupt(
self,
params: PromptRequest,
interrupt: Interrupt,
) -> list[dict[str, Any]]:
"""Handle a LangGraph interrupt and request permission from the client.
Args:
params: The prompt request parameters
interrupt: The interrupt from LangGraph containing action_requests and review_configs
Returns:
List of decisions to pass to Command(resume={...})
Note:
The interrupt.value contains:
- action_requests: [{'name': str, 'args': dict, 'description': str}, ...]
- review_configs: [{'action_name': str, 'allowed_decisions': list[str]}, ...]
"""
interrupt_data = interrupt.value
action_requests = interrupt_data.get("action_requests", [])
review_configs = interrupt_data.get("review_configs", [])
# Create a mapping of action names to their allowed decisions
allowed_decisions_map = {}
for review_config in review_configs:
action_name = review_config.get("action_name")
allowed_decisions = review_config.get("allowed_decisions", [])
allowed_decisions_map[action_name] = allowed_decisions
# Collect decisions for all action requests
decisions = []
for action_request in action_requests:
tool_name = action_request.get("name")
tool_args = action_request.get("args", {})
# Get allowed decisions for this action
allowed_decisions = allowed_decisions_map.get(
tool_name, ["approve", "reject"]
)
# Build permission options based on allowed decisions
options = []
if "approve" in allowed_decisions:
options.append(
PermissionOption(
optionId="allow-once",
name="Allow once",
kind="allow_once",
)
)
if "reject" in allowed_decisions:
options.append(
PermissionOption(
optionId="reject-once",
name="Reject",
kind="reject_once",
)
)
# Generate a tool call ID for this permission request
# We need to find the corresponding tool call from the stored calls
# For now, use a generated ID
tool_call_id = f"perm_{uuid.uuid4().hex[:8]}"
# Create ACP ToolCall object for the permission request
acp_tool_call = ACPToolCall(
toolCallId=tool_call_id,
title=tool_name,
rawInput=tool_args,
status="pending",
)
# Send permission request to client
response = await self._connection.requestPermission(
RequestPermissionRequest(
sessionId=params.sessionId,
toolCall=acp_tool_call,
options=options,
)
)
# Convert ACP response to LangGraph decision
outcome = response.outcome
if isinstance(outcome, AllowedOutcome):
option_id = outcome.optionId
if option_id == "allow-once":
# Check if this was actually an edit option
selected_option = next(
(opt for opt in options if opt.optionId == option_id), None
)
if selected_option and selected_option.field_meta:
# This is an edit - for now, just approve
# TODO: Implement actual edit functionality
decisions.append({"type": "approve"})
else:
decisions.append({"type": "approve"})
elif option_id == "edit":
# Edit option - for now, just approve
# TODO: Implement actual edit functionality to collect edited args
decisions.append({"type": "approve"})
elif isinstance(outcome, DeniedOutcome):
decisions.append(
{
"type": "reject",
"message": "Action rejected by user",
}
)
return decisions
async def _stream_and_handle_updates(
self,
params: PromptRequest,
agent: Any,
stream_input: dict[str, Any] | Command,
config: dict[str, Any],
) -> list[Interrupt]:
"""Stream agent execution and handle updates, returning any interrupts.
Args:
params: The prompt request parameters
agent: The agent to stream from
stream_input: Input to pass to agent.astream (initial message or Command)
config: Configuration with thread_id
Returns:
List of interrupts that occurred during streaming
"""
interrupts = []
async for stream_mode, data in agent.astream(
stream_input,
config=config,
stream_mode=["messages", "updates"],
):
if stream_mode == "messages":
# Handle streaming message chunks (AIMessageChunk)
message, metadata = data
if isinstance(message, AIMessageChunk):
await self._handle_ai_message_chunk(params, message)
elif stream_mode == "updates":
# Handle completed node updates
for node_name, update in data.items():
# Check for interrupts
if node_name == "__interrupt__":
# Extract interrupts from the update
interrupts.extend(update)
continue
# Only process model and tools nodes
if node_name not in ("model", "tools"):
continue
# Handle todos from tools node
if node_name == "tools" and "todos" in update:
todos = update.get("todos", [])
if todos:
await self._handle_todo_update(params, todos)
# Get messages from the update
messages = update.get("messages", [])
if not messages:
continue
# Process the last message from this node
last_message = messages[-1]
# Handle completed AI messages from model node
if node_name == "model" and isinstance(last_message, AIMessage):
# Check if this AIMessage has tool calls
if last_message.tool_calls:
await self._handle_completed_tool_calls(
params, last_message
)
# Handle tool execution results from tools node
elif node_name == "tools" and isinstance(last_message, ToolMessage):
# Look up the original tool call by ID
tool_call = self._tool_calls.get(last_message.tool_call_id)
if tool_call:
await self._handle_tool_message(
params, tool_call, last_message
)
return interrupts
async def prompt(
self,
params: PromptRequest,
) -> PromptResponse:
"""Handle a user prompt and stream responses."""
session_id = params.sessionId
session = self._sessions.get(session_id)
# Extract text from prompt content blocks
prompt_text = ""
for block in params.prompt:
if hasattr(block, "text"):
prompt_text += block.text
elif isinstance(block, dict) and "text" in block:
prompt_text += block["text"]
# Stream the agent's response
agent = session["agent"]
thread_id = session["thread_id"]
config = {"configurable": {"thread_id": thread_id}}
# Start with the initial user message
stream_input: dict[str, Any] | Command = {
"messages": [{"role": "user", "content": prompt_text}]
}
# Loop until there are no more interrupts
while True:
# Stream and collect any interrupts
interrupts = await self._stream_and_handle_updates(
params, agent, stream_input, config
)
# If no interrupts, we're done
if not interrupts:
break
# Process each interrupt and collect decisions
all_decisions = []
for interrupt in interrupts:
decisions = await self._handle_interrupt(params, interrupt)
all_decisions.extend(decisions)
# Prepare to resume with the collected decisions
stream_input = Command(resume={"decisions": all_decisions})
return PromptResponse(stopReason="end_turn")
async def authenticate(self, params: Any) -> Any | None:
"""Authenticate (optional)."""
# Authentication not required for now
return None
async def extMethod(self, method: str, params: dict[str, Any]) -> dict[str, Any]:
"""Handle extension methods (optional)."""
raise NotImplementedError(f"Extension method {method} not supported")
async def extNotification(self, method: str, params: dict[str, Any]) -> None:
"""Handle extension notifications (optional)."""
pass
async def cancel(self, params: CancelNotification) -> None:
"""Cancel a running session."""
# TODO: Implement cancellation logic
pass
async def loadSession(
self,
params: LoadSessionRequest,
) -> LoadSessionResponse | None:
"""Load an existing session (optional)."""
# Not implemented yet - would need to serialize/deserialize session state
return None
async def setSessionMode(
self,
params: SetSessionModeRequest,
) -> SetSessionModeResponse | None:
"""Set session mode (optional)."""
# Could be used to switch between different agent modes
return None
async def setSessionModel(
self,
params: SetSessionModelRequest,
) -> SetSessionModelResponse | None:
"""Set session model (optional)."""
# Not supported - model is configured at agent graph creation time
return None
async def main() -> None:
"""Main entry point for running the ACP server."""
# from deepagents_cli.agent import create_agent_with_config
# from deepagents_cli.config import create_model
# from deepagents_cli.tools import fetch_url, http_request, web_search
#
# # Create model using CLI configuration
# model = create_model()
#
# # Setup tools - conditionally include web_search if Tavily is available
# tools = [http_request, fetch_url]
# if os.environ.get("TAVILY_API_KEY"):
# tools.append(web_search)
#
# # Create CLI agent with shell access and other CLI features
# # Using default assistant_id "agent" for ACP server
# agent_graph, composite_backend = create_agent_with_config(
# model=model,
# assistant_id="agent",
# tools=tools,
# sandbox=None, # Local mode
# sandbox_type=None,
# system_prompt=None, # Use default CLI system prompt
# auto_approve=False, # Require user approval for destructive operations
# enable_memory=True, # Enable persistent memory
# enable_skills=True, # Enable custom skills
# enable_shell=True, # Enable shell access
# )
#
# Define default tools
from langchain.agents.middleware import HumanInTheLoopMiddleware
@tool()
def get_weather(location: str) -> str:
"""Get the weather for a given location."""
return f"The weather in {location} is sunny with a high of 75°F."
# Create the agent graph with default configuration
model = ChatAnthropic(
model_name="claude-sonnet-4-5-20250929",
max_tokens=20000,
)
agent_graph = create_deep_agent(
model=model,
tools=[get_weather],
checkpointer=InMemorySaver(),
middleware=[
HumanInTheLoopMiddleware(
interrupt_on={
"get_weather": True,
}
)
],
)
# Start the ACP server
reader, writer = await stdio_streams()
AgentSideConnection(lambda conn: DeepagentsACP(conn, agent_graph), writer, reader)
await asyncio.Event().wait()
def cli_main() -> None:
"""Synchronous CLI entry point for the ACP server."""
asyncio.run(main())
if __name__ == "__main__":
cli_main()

View File

@@ -0,0 +1,58 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project]
name = "deepagents-acp"
version = "0.0.1"
description = "Agent Client Protocol integration for DeepAgents"
readme = "README.md"
requires-python = ">=3.14"
license = {text = "MIT"}
authors = [
]
maintainers = [
]
keywords = ["agent", "acp", "agent-client-protocol", "deepagents", "ai-agents"]
classifiers = [
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.13",
"Topic :: Software Development :: Libraries :: Python Modules",
]
dependencies = [
"agent-client-protocol>=0.6.2",
"deepagents",
"deepagents-cli",
]
[dependency-groups]
dev = [
"batrachian-toad>=0.5.2",
]
test = [
"pytest>=8.3.4",
"pytest-asyncio>=0.25.3",
"pytest-cov>=6.0.0",
"pytest-mock>=3.14.0",
"pytest-socket>=0.7.0",
"pytest-timeout>=2.3.1",
"ruff>=0.9.7",
"dirty-equals>=0.11",
]
[project.urls]
Homepage = "https://github.com/langchain-ai/deepagents"
Repository = "https://github.com/langchain-ai/deepagents"
Issues = "https://github.com/langchain-ai/deepagents/issues"
[project.scripts]
deepacp = "deepagents_acp.server:cli_main"
[tool.pytest.ini_options]
asyncio_mode = "auto" # or "strict"

View File

@@ -0,0 +1,231 @@
"""Fake chat models for testing purposes."""
import re
from collections.abc import Callable, Iterator, Sequence
from typing import Any, Literal, cast
from typing_extensions import override
from langchain_core.callbacks import CallbackManagerForLLMRun
from langchain_core.language_models import LanguageModelInput
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage
from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
from langchain_core.runnables import Runnable
from langchain_core.tools import BaseTool
class GenericFakeChatModel(BaseChatModel):
"""Generic fake chat model that can be used to test the chat model interface.
* Chat model should be usable in both sync and async tests
* Invokes `on_llm_new_token` to allow for testing of callback related code for new
tokens.
* Includes configurable logic to break messages into chunks for streaming.
Args:
messages: An iterator over messages (use `iter()` to convert a list)
stream_delimiter: How to chunk content when streaming. Options:
- None (default): Return content in a single chunk (no streaming)
- A string delimiter (e.g., " "): Split content on this delimiter,
preserving the delimiter as separate chunks
- A regex pattern (e.g., r"(\\s)"): Split using the pattern with a capture
group to preserve delimiters
Examples:
# No streaming - single chunk
model = GenericFakeChatModel(messages=iter([AIMessage(content="Hello world")]))
# Stream on whitespace
model = GenericFakeChatModel(
messages=iter([AIMessage(content="Hello world")]),
stream_delimiter=" "
)
# Yields: "Hello", " ", "world"
# Stream on whitespace (regex) - more flexible
model = GenericFakeChatModel(
messages=iter([AIMessage(content="Hello world")]),
stream_delimiter=r"(\s)"
)
# Yields: "Hello", " ", "world"
"""
messages: Iterator[AIMessage | str]
"""Get an iterator over messages.
This can be expanded to accept other types like Callables / dicts / strings
to make the interface more generic if needed.
!!! note
if you want to pass a list, you can use `iter` to convert it to an iterator.
"""
stream_delimiter: str | None = None
"""Delimiter for chunking content during streaming.
- None (default): No chunking, returns content in a single chunk
- String: Split content on this exact string, preserving delimiter as chunks
- Regex pattern: Use re.split() with the pattern (use capture groups to preserve delimiters)
"""
@override
def _generate(
self,
messages: list[BaseMessage],
stop: list[str] | None = None,
run_manager: CallbackManagerForLLMRun | None = None,
**kwargs: Any,
) -> ChatResult:
message = next(self.messages)
message_ = AIMessage(content=message) if isinstance(message, str) else message
generation = ChatGeneration(message=message_)
return ChatResult(generations=[generation])
def _stream(
self,
messages: list[BaseMessage],
stop: list[str] | None = None,
run_manager: CallbackManagerForLLMRun | None = None,
**kwargs: Any,
) -> Iterator[ChatGenerationChunk]:
chat_result = self._generate(
messages, stop=stop, run_manager=run_manager, **kwargs
)
if not isinstance(chat_result, ChatResult):
msg = (
f"Expected generate to return a ChatResult, "
f"but got {type(chat_result)} instead."
)
raise ValueError(msg) # noqa: TRY004
message = chat_result.generations[0].message
if not isinstance(message, AIMessage):
msg = (
f"Expected invoke to return an AIMessage, "
f"but got {type(message)} instead."
)
raise ValueError(msg) # noqa: TRY004
content = message.content
tool_calls = message.tool_calls if hasattr(message, "tool_calls") else []
if content:
if not isinstance(content, str):
msg = "Expected content to be a string."
raise ValueError(msg)
# Chunk content based on stream_delimiter configuration
if self.stream_delimiter is None:
# No streaming - return entire content in a single chunk
content_chunks = [content]
else:
# Split content using the delimiter
# Use re.split to support both string and regex patterns
content_chunks = cast(
"list[str]", re.split(self.stream_delimiter, content)
)
# Remove empty strings that can result from splitting
content_chunks = [chunk for chunk in content_chunks if chunk]
for idx, token in enumerate(content_chunks):
# Include tool_calls only in the last chunk
is_last = idx == len(content_chunks) - 1
chunk_tool_calls = tool_calls if is_last else []
chunk = ChatGenerationChunk(
message=AIMessageChunk(
content=token,
id=message.id,
tool_calls=chunk_tool_calls,
)
)
if (
is_last
and isinstance(chunk.message, AIMessageChunk)
and not message.additional_kwargs
):
chunk.message.chunk_position = "last"
if run_manager:
run_manager.on_llm_new_token(token, chunk=chunk)
yield chunk
elif tool_calls:
# If there's no content but there are tool_calls, yield a single chunk with them
chunk = ChatGenerationChunk(
message=AIMessageChunk(
content="",
id=message.id,
tool_calls=tool_calls,
chunk_position="last",
)
)
if run_manager:
run_manager.on_llm_new_token("", chunk=chunk)
yield chunk
if message.additional_kwargs:
for key, value in message.additional_kwargs.items():
# We should further break down the additional kwargs into chunks
# Special case for function call
if key == "function_call":
for fkey, fvalue in value.items():
if isinstance(fvalue, str):
# Break function call by `,`
fvalue_chunks = cast("list[str]", re.split(r"(,)", fvalue))
for fvalue_chunk in fvalue_chunks:
chunk = ChatGenerationChunk(
message=AIMessageChunk(
id=message.id,
content="",
additional_kwargs={
"function_call": {fkey: fvalue_chunk}
},
)
)
if run_manager:
run_manager.on_llm_new_token(
"",
chunk=chunk, # No token for function call
)
yield chunk
else:
chunk = ChatGenerationChunk(
message=AIMessageChunk(
id=message.id,
content="",
additional_kwargs={"function_call": {fkey: fvalue}},
)
)
if run_manager:
run_manager.on_llm_new_token(
"",
chunk=chunk, # No token for function call
)
yield chunk
else:
chunk = ChatGenerationChunk(
message=AIMessageChunk(
id=message.id, content="", additional_kwargs={key: value}
)
)
if run_manager:
run_manager.on_llm_new_token(
"",
chunk=chunk, # No token for function call
)
yield chunk
@property
def _llm_type(self) -> str:
return "generic-fake-chat-model"
def bind_tools(
self,
tools: Sequence[dict[str, Any] | type | Callable | BaseTool],
*,
tool_choice: str | None = None,
**kwargs: Any,
) -> Runnable[LanguageModelInput, AIMessage]:
"""Override bind_tools to return self for testing purposes."""
return self

View File

@@ -0,0 +1,544 @@
from contextlib import asynccontextmanager
from typing import Any
from acp.schema import NewSessionRequest, PromptRequest
from acp.schema import (
TextContentBlock,
RequestPermissionRequest,
RequestPermissionResponse,
AllowedOutcome,
)
from dirty_equals import IsUUID
from langchain_core.messages import AIMessage, BaseMessage
from langchain_core.tools import tool
from langgraph.checkpoint.memory import InMemorySaver
from deepagents_acp.server import DeepagentsACP
from tests.chat_model import GenericFakeChatModel
class FakeAgentSideConnection:
"""Simple fake implementation of AgentSideConnection for testing."""
def __init__(self) -> None:
"""Initialize the fake connection with an empty calls list."""
self.calls: list[dict[str, Any]] = []
self.permission_requests: list[RequestPermissionRequest] = []
self.permission_response: RequestPermissionResponse | None = None
async def sessionUpdate(self, notification: Any) -> None:
"""Track sessionUpdate calls."""
self.calls.append(notification)
async def requestPermission(
self, request: RequestPermissionRequest
) -> RequestPermissionResponse:
"""Track permission requests and return a mocked response."""
self.permission_requests.append(request)
if self.permission_response:
return self.permission_response
# Default: approve the action
return RequestPermissionResponse(
outcome=AllowedOutcome(
outcome="selected",
optionId="allow-once",
)
)
@tool(description="Get the current weather for a location")
def get_weather_tool(location: str) -> str:
"""Get the current weather for a location.
Args:
location: The city and state, e.g. "San Francisco, CA"
Returns:
A string describing the current weather
"""
# Return fake weather data for testing
return f"The weather in {location} is sunny and 72°F"
@asynccontextmanager
async def deepagents_acp_test_context(
messages: list[BaseMessage],
prompt_request: PromptRequest,
tools: list[Any] | None = None,
stream_delimiter: str | None = r"(\s)",
middleware: list[Any] | None = None,
):
"""Context manager for testing DeepagentsACP.
Args:
messages: List of messages for the fake model to return
prompt_request: The prompt request to send to the agent
tools: List of tools to provide to the agent (defaults to [])
stream_delimiter: How to chunk content when streaming (default: r"(\\s)" for whitespace)
middleware: Optional middleware to add to the agent graph
Yields:
FakeAgentSideConnection: The connection object that tracks sessionUpdate calls
"""
from deepagents.graph import create_deep_agent
connection = FakeAgentSideConnection()
model = GenericFakeChatModel(
messages=iter(messages),
stream_delimiter=stream_delimiter,
)
tools = tools if tools is not None else []
# Create the agent graph
agent_graph = create_deep_agent(
model=model,
tools=tools,
checkpointer=InMemorySaver(),
middleware=middleware or [],
)
deepagents_acp = DeepagentsACP(
connection=connection,
agent_graph=agent_graph,
)
# Create a new session
session_response = await deepagents_acp.newSession(
NewSessionRequest(cwd="/tmp", mcpServers=[])
)
session_id = session_response.sessionId
# Update the prompt request with the session ID
prompt_request.sessionId = session_id
# Call prompt
await deepagents_acp.prompt(prompt_request)
try:
yield connection
finally:
pass
class TestDeepAgentsACP:
"""Test suite for DeepagentsACP initialization."""
async def test_initialization(self) -> None:
"""Test that DeepagentsACP can be initialized without errors."""
prompt_request = PromptRequest(
sessionId="", # Will be set by context manager
prompt=[TextContentBlock(text="Hi!", type="text")],
)
async with deepagents_acp_test_context(
messages=[AIMessage(content="Hello!")],
prompt_request=prompt_request,
tools=[get_weather_tool],
) as connection:
assert len(connection.calls) == 1
first_call = connection.calls[0].model_dump()
assert first_call == {
"field_meta": None,
"sessionId": IsUUID,
"update": {
"content": {
"annotations": None,
"field_meta": None,
"text": "Hello!",
"type": "text",
},
"field_meta": None,
"sessionUpdate": "agent_message_chunk",
},
}
async def test_tool_call_and_response(self) -> None:
"""Test that DeepagentsACP handles tool calls correctly.
This test verifies that when an AI message contains tool_calls, the agent:
1. Detects and executes the tool call
2. Sends tool call progress notifications (pending and completed)
3. Streams the AI response content as chunks after tool execution
Note: The FakeChat model streams messages but the agent graph must actually
execute the tools for the flow to complete.
"""
prompt_request = PromptRequest(
sessionId="", # Will be set by context manager
prompt=[TextContentBlock(text="What's the weather in Paris?", type="text")],
)
# The fake model will be called multiple times by the agent graph:
# 1. First call: AI decides to use the tool (with tool_calls)
# 2. After tool execution: AI responds with the result
async with deepagents_acp_test_context(
messages=[
AIMessage(
content="",
tool_calls=[
{
"name": "get_weather_tool",
"args": {"location": "Paris, France"},
"id": "call_123",
"type": "tool_call",
}
],
),
AIMessage(content="The weather in Paris is sunny and 72°F today!"),
],
prompt_request=prompt_request,
tools=[get_weather_tool],
) as connection:
# Expected call sequence:
# Call 0: Tool call progress (status="pending")
# Call 1: Tool call progress (status="completed")
# Calls 2+: Message chunks for "The weather in Paris is sunny and 72°F today!"
tool_call_updates = [
call.model_dump()
for call in connection.calls
if call.model_dump()["update"]["sessionUpdate"] == "tool_call_update"
]
# Verify we have exactly 2 tool call updates
assert len(tool_call_updates) == 2
# Verify tool call pending with full structure
assert tool_call_updates[0]["update"] == {
"sessionUpdate": "tool_call_update",
"status": "pending",
"toolCallId": "call_123",
"title": "get_weather_tool",
"rawInput": {"location": "Paris, France"},
"content": None,
"rawOutput": None,
"kind": None,
"locations": None,
"field_meta": None,
}
# Verify tool call completed with full structure
assert tool_call_updates[1]["update"] == {
"sessionUpdate": "tool_call_update",
"status": "completed",
"toolCallId": "call_123",
"title": "get_weather_tool",
"rawInput": None, # rawInput not included in completed status
"content": [
{
"type": "content",
"content": {
"type": "text",
"text": "The weather in Paris, France is sunny and 72°F",
"annotations": None,
"field_meta": None,
},
}
],
"rawOutput": "The weather in Paris, France is sunny and 72°F",
"kind": None,
"locations": None,
"field_meta": None,
}
# Verify all non-tool-call updates are message chunks
message_chunks = [
call.model_dump()
for call in connection.calls
if call.model_dump()["update"]["sessionUpdate"] == "agent_message_chunk"
]
assert len(message_chunks) > 0
for chunk in message_chunks:
assert chunk["update"]["sessionUpdate"] == "agent_message_chunk"
assert chunk["update"]["content"]["type"] == "text"
async def test_todo_list_handling() -> None:
"""Test that DeepagentsACP handles todo list updates correctly."""
from deepagents.graph import create_deep_agent
prompt_request = PromptRequest(
sessionId="", # Will be set by context manager
prompt=[TextContentBlock(text="Create a shopping list", type="text")],
)
# Create a mock connection to track calls
connection = FakeAgentSideConnection()
model = GenericFakeChatModel(
messages=iter([AIMessage(content="I'll create that shopping list for you.")]),
stream_delimiter=r"(\s)",
)
# Create agent graph
agent_graph = create_deep_agent(
model=model,
tools=[get_weather_tool],
checkpointer=InMemorySaver(),
)
deepagents_acp = DeepagentsACP(
connection=connection,
agent_graph=agent_graph,
)
# Create a new session
session_response = await deepagents_acp.newSession(
NewSessionRequest(cwd="/tmp", mcpServers=[])
)
session_id = session_response.sessionId
prompt_request.sessionId = session_id
# Manually inject a tools update with todos into the agent stream
# Simulate the graph's behavior by patching the astream method
agent = deepagents_acp._sessions[session_id]["agent"]
original_astream = agent.astream
async def mock_astream(*args, **kwargs):
# First yield the normal message chunks
async for item in original_astream(*args, **kwargs):
yield item
# Then inject a tools update with todos
yield (
"updates",
{
"tools": {
"todos": [
{"content": "Buy fresh bananas", "status": "pending"},
{"content": "Buy whole grain bread", "status": "in_progress"},
{"content": "Buy organic eggs", "status": "completed"},
],
"messages": [],
}
},
)
agent.astream = mock_astream
# Call prompt
await deepagents_acp.prompt(prompt_request)
# Find the plan update in the calls
plan_updates = [
call.model_dump()
for call in connection.calls
if call.model_dump()["update"]["sessionUpdate"] == "plan"
]
# Verify we got exactly one plan update with correct structure
assert len(plan_updates) == 1
assert plan_updates[0]["update"] == {
"sessionUpdate": "plan",
"entries": [
{
"content": "Buy fresh bananas",
"status": "pending",
"priority": "medium",
"field_meta": None,
},
{
"content": "Buy whole grain bread",
"status": "in_progress",
"priority": "medium",
"field_meta": None,
},
{
"content": "Buy organic eggs",
"status": "completed",
"priority": "medium",
"field_meta": None,
},
],
"field_meta": None,
}
async def test_fake_chat_model_streaming() -> None:
"""Test to verify GenericFakeChatModel stream_delimiter API.
This test demonstrates the different streaming modes available via stream_delimiter.
"""
# Test 1: No streaming (stream_delimiter=None) - single chunk
model_no_stream = GenericFakeChatModel(
messages=iter([AIMessage(content="Hello world")]),
stream_delimiter=None,
)
chunks = []
async for chunk in model_no_stream.astream("test"):
chunks.append(chunk)
assert len(chunks) == 1
assert chunks[0].content == "Hello world"
# Test 2: Stream on whitespace using regex (default behavior)
model_whitespace = GenericFakeChatModel(
messages=iter([AIMessage(content="Hello world")]),
stream_delimiter=r"(\s)",
)
chunks = []
async for chunk in model_whitespace.astream("test"):
chunks.append(chunk)
# Should split into: "Hello", " ", "world"
assert len(chunks) == 3
assert chunks[0].content == "Hello"
assert chunks[1].content == " "
assert chunks[2].content == "world"
# Test 3: Stream with tool_calls
model_with_tools = GenericFakeChatModel(
messages=iter(
[
AIMessage(
content="Checking weather",
tool_calls=[
{
"name": "get_weather_tool",
"args": {"location": "paris, france"},
"id": "call_123",
"type": "tool_call",
}
],
),
]
),
stream_delimiter=r"(\s)",
)
chunks = []
async for chunk in model_with_tools.astream("test"):
chunks.append(chunk)
# Tool calls should only be in the last chunk
assert len(chunks) > 0
assert chunks[-1].tool_calls == [
{
"name": "get_weather_tool",
"args": {"location": "paris, france"},
"id": "call_123",
"type": "tool_call",
}
]
# Earlier chunks should not have tool_calls
for chunk in chunks[:-1]:
assert chunk.tool_calls == []
async def test_human_in_the_loop_approval() -> None:
"""Test that DeepagentsACP handles HITL interrupts and permission requests correctly."""
from langchain.agents.middleware import HumanInTheLoopMiddleware
from deepagents.graph import create_deep_agent
prompt_request = PromptRequest(
sessionId="", # Will be set below
prompt=[TextContentBlock(text="What's the weather in Tokyo?", type="text")],
)
# Create connection with permission response configured
connection = FakeAgentSideConnection()
# Set up the connection to approve the tool call
connection.permission_response = RequestPermissionResponse(
outcome=AllowedOutcome(
outcome="selected",
optionId="allow-once",
)
)
model = GenericFakeChatModel(
messages=iter(
[
# First message: AI decides to call the tool
AIMessage(
content="",
tool_calls=[
{
"name": "get_weather_tool",
"args": {"location": "Tokyo, Japan"},
"id": "call_tokyo_123",
"type": "tool_call",
}
],
),
# Second message: AI responds with the weather result after tool execution
AIMessage(content="The weather in Tokyo is sunny and 72°F!"),
]
),
stream_delimiter=r"(\s)",
)
# Create agent graph with HITL middleware
agent_graph = create_deep_agent(
model=model,
tools=[get_weather_tool],
checkpointer=InMemorySaver(),
middleware=[HumanInTheLoopMiddleware(interrupt_on={"get_weather_tool": True})],
)
deepagents_acp = DeepagentsACP(
connection=connection,
agent_graph=agent_graph,
)
# Create a new session
session_response = await deepagents_acp.newSession(
NewSessionRequest(cwd="/tmp", mcpServers=[])
)
session_id = session_response.sessionId
prompt_request.sessionId = session_id
# Call prompt - this should trigger HITL
await deepagents_acp.prompt(prompt_request)
# Verify that a permission request was made with correct structure
assert len(connection.permission_requests) == 1
perm_request = connection.permission_requests[0]
assert {
"sessionId": perm_request.sessionId,
"toolCall": {
"title": perm_request.toolCall.title,
"rawInput": perm_request.toolCall.rawInput,
"status": perm_request.toolCall.status,
},
"option_ids": [opt.optionId for opt in perm_request.options],
} == {
"sessionId": session_id,
"toolCall": {
"title": "get_weather_tool",
"rawInput": {"location": "Tokyo, Japan"},
"status": "pending",
},
"option_ids": ["allow-once", "reject-once"],
}
# Verify that tool execution happened after approval
tool_call_updates = [
call.model_dump()
for call in connection.calls
if call.model_dump()["update"]["sessionUpdate"] == "tool_call_update"
]
assert len(tool_call_updates) == 2
assert tool_call_updates[0]["update"] == {
"sessionUpdate": "tool_call_update",
"status": "pending",
"title": "get_weather_tool",
"toolCallId": "call_tokyo_123",
"rawInput": {"location": "Tokyo, Japan"},
"content": None,
"rawOutput": None,
"kind": None,
"locations": None,
"field_meta": None,
}
# Check completed status
completed_update = tool_call_updates[1]["update"]
assert completed_update["sessionUpdate"] == "tool_call_update"
assert completed_update["status"] == "completed"
assert completed_update["title"] == "get_weather_tool"
assert "Tokyo, Japan" in completed_update["rawOutput"]
# Verify final AI message was streamed
message_chunks = [
call
for call in connection.calls
if call.model_dump()["update"]["sessionUpdate"] == "agent_message_chunk"
]
assert len(message_chunks) > 0

1872
deepagents_sourcecode/libs/acp/uv.lock generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,66 @@
.PHONY: all lint format test help run test_integration test_watch
# Default target executed when no arguments are given to make.
all: help
######################
# TESTING AND COVERAGE
######################
# Define a variable for the test file path.
TEST_FILE ?= tests/unit_tests
INTEGRATION_FILES ?= tests/integration_tests
test:
uv run pytest --disable-socket --allow-unix-socket $(TEST_FILE)
test_integration:
uv run pytest $(INTEGRATION_FILES)
test_watch:
uv run ptw . -- $(TEST_FILE)
run:
uvx --no-cache --reinstall .
######################
# LINTING AND FORMATTING
######################
# Define a variable for Python and notebook files.
lint format: PYTHON_FILES=deepagents_cli/ tests/
lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=. --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')
lint lint_diff:
[ "$(PYTHON_FILES)" = "" ] || uv run ruff format $(PYTHON_FILES) --diff
@if [ "$(LINT)" != "minimal" ]; then \
if [ "$(PYTHON_FILES)" != "" ]; then \
uv run ruff check $(PYTHON_FILES) --diff; \
fi; \
fi
# [ "$(PYTHON_FILES)" = "" ] || uv run mypy $(PYTHON_FILES)
format format_diff:
[ "$(PYTHON_FILES)" = "" ] || uv run ruff format $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || uv run ruff check --fix $(PYTHON_FILES)
format_unsafe:
[ "$(PYTHON_FILES)" = "" ] || uv run ruff format --unsafe-fixes $(PYTHON_FILES)
######################
# HELP
######################
help:
@echo '===================='
@echo '-- LINTING --'
@echo 'format - run code formatters'
@echo 'lint - run linters'
@echo '-- TESTS --'
@echo 'test - run unit tests'
@echo 'test TEST_FILE=<test_file> - run all tests in file'
@echo '-- DOCUMENTATION tasks are from the top-level Makefile --'

View File

@@ -0,0 +1,369 @@
# 🚀🧠 Deep Agents CLI
The [deepagents](https://github.com/langchain-ai/deepagents) CLI is an open source coding assistant that runs in your terminal, similar to Claude Code.
**Key Features:**
- **Built-in Tools**: File operations (read, write, edit, glob, grep), shell commands, web search, and subagent delegation
- **Customizable Skills**: Add domain-specific capabilities through a progressive disclosure skill system
- **Persistent Memory**: Agent remembers your preferences, coding style, and project context across sessions
- **Project-Aware**: Automatically detects project roots and loads project-specific configurations
<img src="cli-banner.jpg" alt="deep agent" width="100%"/>
## 🚀 Quickstart
`deepagents-cli` is a Python package that can be installed via pip or uv.
**Install via pip:**
```bash
pip install deepagents-cli
```
**Or using uv (recommended):**
```bash
# Create a virtual environment
uv venv
# Install the package
uv pip install deepagents-cli
```
**Run the agent in your terminal:**
```bash
deepagents
```
**Get help:**
```bash
deepagents help
```
**Common options:**
```bash
# Use a specific agent configuration
deepagents --agent mybot
# Use a specific model (auto-detects provider)
deepagents --model claude-sonnet-4-5-20250929
deepagents --model gpt-4o
# Auto-approve tool usage (skip human-in-the-loop prompts)
deepagents --auto-approve
# Execute code in a remote sandbox
deepagents --sandbox modal # or runloop, daytona
deepagents --sandbox-id dbx_123 # reuse existing sandbox
```
Type naturally as you would in a chat interface. The agent will use its built-in tools, skills, and memory to help you with tasks.
## Model Configuration
The CLI supports three LLM providers with automatic provider detection based on model name:
**Supported Providers:**
- **OpenAI** - Models like `gpt-4o`, `gpt-5-mini`, `o1-preview`, `o3-mini` (default: `gpt-5-mini`)
- **Anthropic** - Models like `claude-sonnet-4-5-20250929`, `claude-3-opus-20240229` (default: `claude-sonnet-4-5-20250929`)
- **Google** - Models like `gemini-3-pro-preview`, `gemini-1.5-pro` (default: `gemini-3-pro-preview`)
**Specify model at startup:**
```bash
# Auto-detects Anthropic from model name pattern
deepagents --model claude-sonnet-4-5-20250929
# Auto-detects OpenAI from model name pattern
deepagents --model gpt-4o
```
**Or use environment variables:**
```bash
# Set provider-specific model defaults
export ANTHROPIC_MODEL="claude-sonnet-4-5-20250929"
export OPENAI_MODEL="gpt-4o"
export GOOGLE_MODEL="gemini-1.5-pro"
# Set API keys (required)
export ANTHROPIC_API_KEY="your-key"
export OPENAI_API_KEY="your-key"
export GOOGLE_API_KEY="your-key"
```
**Model name conventions:**
Model names follow each provider's official naming convention:
- **OpenAI**: See [OpenAI Models Documentation](https://platform.openai.com/docs/models)
- **Anthropic**: See [Anthropic Models Documentation](https://docs.anthropic.com/en/docs/about-claude/models)
- **Google**: See [Google Gemini Models Documentation](https://ai.google.dev/gemini-api/docs/models/gemini)
The active model is displayed at startup in the CLI interface.
## Built-in Tools
The agent comes with the following built-in tools (always available without configuration):
| Tool | Description |
|------|-------------|
| `ls` | List files and directories |
| `read_file` | Read contents of a file |
| `write_file` | Create or overwrite a file |
| `edit_file` | Make targeted edits to existing files |
| `glob` | Find files matching a pattern (e.g., `**/*.py`) |
| `grep` | Search for text patterns across files |
| `shell` | Execute shell commands (local mode) |
| `execute` | Execute commands in remote sandbox (sandbox mode) |
| `web_search` | Search the web using Tavily API |
| `fetch_url` | Fetch and convert web pages to markdown |
| `task` | Delegate work to subagents for parallel execution |
| `write_todos` | Create and manage task lists for complex work |
> [!WARNING]
> **Human-in-the-Loop (HITL) Approval Required**
>
> Potentially destructive operations require user approval before execution:
> - **File operations**: `write_file`, `edit_file`
> - **Command execution**: `shell`, `execute`
> - **External requests**: `web_search`, `fetch_url`
> - **Delegation**: `task` (subagents)
>
> Each operation will prompt for approval showing the action details. Use `--auto-approve` to skip prompts:
> ```bash
> deepagents --auto-approve
> ```
## Agent Configuration
Each agent has its own configuration directory at `~/.deepagents/<agent_name>/`, with default `agent`.
```bash
# List all configured agents
deepagents list
# Create a new agent
deepagents create <agent_name>
```
### Environment Variables
#### LangSmith Tracing
The CLI supports separate LangSmith project configuration for agent tracing vs user code tracing:
**Agent Tracing** - Traces deepagents operations (tool calls, agent decisions):
```bash
export DEEPAGENTS_LANGSMITH_PROJECT="my-agent-project"
```
**User Code Tracing** - Traces code executed via shell commands:
```bash
export LANGSMITH_PROJECT="my-user-code-project"
```
**Complete Setup Example:**
```bash
# Enable LangSmith tracing
export LANGCHAIN_TRACING_V2=true
export LANGCHAIN_API_KEY="your-api-key"
# Configure separate projects
export DEEPAGENTS_LANGSMITH_PROJECT="agent-traces"
export LANGSMITH_PROJECT="user-code-traces"
# Run deepagents
deepagents
```
When both are configured, the CLI displays:
```
✓ LangSmith tracing enabled: Deepagents → 'agent-traces'
User code (shell) → 'user-code-traces'
```
**Why separate projects?**
- Keep agent operations separate from your application code traces
- Easier debugging by isolating agent vs user code behavior
- Different retention policies or access controls per project
**Backwards Compatibility:**
If `DEEPAGENTS_LANGSMITH_PROJECT` is not set, both agent and user code trace to the same project specified by `LANGSMITH_PROJECT`.
## Customization
There are two primary ways to customize any agent: **memory** and **skills**.
Each agent has its own global configuration directory at `~/.deepagents/<agent_name>/`:
```
~/.deepagents/<agent_name>/
├── agent.md # Auto-loaded global personality/style
└── skills/ # Auto-loaded agent-specific skills
├── web-research/
│ └── SKILL.md
└── langgraph-docs/
└── SKILL.md
```
Projects can extend the global configuration with project-specific instructions and skills:
```
my-project/
├── .git/
└── .deepagents/
├── agent.md # Project-specific instructions
└── skills/ # Project-specific skills
└── custom-tool/
└── SKILL.md
```
The CLI automatically detects project roots (via `.git`) and loads:
- Project-specific `agent.md` from `[project-root]/.deepagents/agent.md`
- Project-specific skills from `[project-root]/.deepagents/skills/`
Both global and project configurations are loaded together, allowing you to:
- Keep general coding style/preferences in global agent.md
- Add project-specific context, conventions, or guidelines in project agent.md
- Share project-specific skills with your team (committed to version control)
- Override global skills with project-specific versions (when skill names match)
### agent.md files
`agent.md` files provide persistent memory that is always loaded at session start. Both global and project-level `agent.md` files are loaded together and injected into the system prompt.
**Global `agent.md`** (`~/.deepagents/agent/agent.md`)
- Your personality, style, and universal coding preferences
- General tone and communication style
- Universal coding preferences (formatting, type hints, etc.)
- Tool usage patterns that apply everywhere
- Workflows and methodologies that don't change per-project
**Project `agent.md`** (`.deepagents/agent.md` in project root)
- Project-specific context and conventions
- Project architecture and design patterns
- Coding conventions specific to this codebase
- Testing strategies and deployment processes
- Team guidelines and project structure
**How it works (AgentMemoryMiddleware):**
- Loads both files at startup and injects into system prompt as `<user_memory>` and `<project_memory>`
- Appends [memory management instructions](deepagents_cli/agent_memory.py#L44-L158) on when/how to update memory files
**When the agent updates memory:**
- IMMEDIATELY when you describe how it should behave
- IMMEDIATELY when you give feedback on its work
- When you explicitly ask it to remember something
- When patterns or preferences emerge from your interactions
The agent uses `edit_file` to update memories when learning preferences or receiving feedback.
### Project memory files
Beyond `agent.md`, you can create additional memory files in `.deepagents/` for structured project knowledge. These work similarly to [Anthropic's Memory Tool](https://platform.claude.com/docs/en/agents-and-tools/tool-use/memory-tool). The agent receives [detailed instructions](deepagents_cli/agent_memory.py#L123-L158) on when to read and update these files.
**How it works:**
1. Create markdown files in `[project-root]/.deepagents/` (e.g., `api-design.md`, `architecture.md`, `deployment.md`)
2. The agent checks these files when relevant to a task (not auto-loaded into every prompt)
3. The agent uses `write_file` or `edit_file` to create/update memory files when learning project patterns
**Example workflow:**
```bash
# Agent discovers deployment pattern and saves it
.deepagents/
├── agent.md # Always loaded (personality + conventions)
├── architecture.md # Loaded on-demand (system design)
└── deployment.md # Loaded on-demand (deploy procedures)
```
**When the agent reads memory files:**
- At the start of new sessions (checks what files exist)
- Before answering questions about project-specific topics
- When you reference past work or patterns
- When performing tasks that match saved knowledge domains
**Benefits:**
- **Persistent learning**: Agent remembers project patterns across sessions
- **Team collaboration**: Share project knowledge through version control
- **Contextual retrieval**: Load only relevant memory when needed (reduces token usage)
- **Structured knowledge**: Organize information by domain (APIs, architecture, deployment, etc.)
### Skills
Skills are reusable agent capabilities that provide specialized workflows and domain knowledge. Example skills are provided in the `examples/skills/` directory:
- **web-research** - Structured web research workflow with planning, parallel delegation, and synthesis
- **langgraph-docs** - LangGraph documentation lookup and guidance
To use an example skill globally with the default agent, just copy them to the agent's skills global or project-level skills directory:
```bash
mkdir -p ~/.deepagents/agent/skills
cp -r examples/skills/web-research ~/.deepagents/agent/skills/
```
To manage skills:
```bash
# List all skills (global + project)
deepagents skills list
# List only project skills
deepagents skills list --project
# Create a new global skill from template
deepagents skills create my-skill
# Create a new project skill
deepagents skills create my-tool --project
# View detailed information about a skill
deepagents skills info web-research
# View info for a project skill only
deepagents skills info my-tool --project
```
To use skills (e.g., the langgraph-docs skill), just type a request relevant to a skill and the skill will be used automatically.
```bash
$ deepagents
$ "create a agent.py script that implements a LangGraph agent"
```
Skills follow Anthropic's [progressive disclosure pattern](https://www.anthropic.com/engineering/equipping-agents-for-the-real-world-with-agent-skills) - the agent knows skills exist but only reads full instructions when needed.
1. **At startup** - SkillsMiddleware scans `~/.deepagents/agent/skills/` and `.deepagents/skills/` directories
2. **Parse metadata** - Extracts YAML frontmatter (name + description) from each `SKILL.md` file
3. **Inject into prompt** - Adds skill list with descriptions to system prompt: "Available Skills: web-research - Use for web research tasks..."
4. **Progressive loading** - Agent reads full `SKILL.md` content with `read_file` only when a task matches the skill's description
5. **Execute workflow** - Agent follows the step-by-step instructions in the skill file
## Development
### Running Tests
To run the test suite:
```bash
uv sync --all-groups
make test
```
### Running During Development
```bash
# From libs/deepagents-cli directory
uv run deepagents
# Or install in editable mode
uv pip install -e .
deepagents
```
### Modifying the CLI
- **UI changes** → Edit `ui.py` or `input.py`
- **Add new tools** → Edit `tools.py`
- **Change execution flow** → Edit `execution.py`
- **Add commands** → Edit `commands.py`
- **Agent configuration** → Edit `agent.py`
- **Skills system** → Edit `skills/` modules
- **Constants/colors** → Edit `config.py`

Binary file not shown.

After

Width:  |  Height:  |  Size: 200 KiB

View File

@@ -0,0 +1,5 @@
"""DeepAgents CLI - Interactive AI coding assistant."""
from deepagents_cli.main import cli_main
__all__ = ["cli_main"]

View File

@@ -0,0 +1,6 @@
"""Allow running the CLI as: python -m deepagents.cli."""
from deepagents_cli.main import cli_main
if __name__ == "__main__":
cli_main()

View File

@@ -0,0 +1,3 @@
"""Version information for deepagents-cli."""
__version__ = "0.0.12"

View File

@@ -0,0 +1,454 @@
"""CLI를 위한 에이전트 관리 및 생성."""
import os
import shutil
from pathlib import Path
from deepagents import create_deep_agent
from deepagents.backends import CompositeBackend
from deepagents.backends.filesystem import FilesystemBackend
from deepagents.backends.sandbox import SandboxBackendProtocol
from langchain.agents.middleware import (
InterruptOnConfig,
)
from langchain.agents.middleware.types import AgentState
from langchain.messages import ToolCall
from langchain.tools import BaseTool
from langchain_core.language_models import BaseChatModel
from langgraph.checkpoint.memory import InMemorySaver
from langgraph.pregel import Pregel
from langgraph.runtime import Runtime
from deepagents_cli.agent_memory import AgentMemoryMiddleware
from deepagents_cli.config import COLORS, config, console, get_default_coding_instructions, settings
from deepagents_cli.integrations.sandbox_factory import get_default_working_dir
from deepagents_cli.shell import ShellMiddleware
from deepagents_cli.skills import SkillsMiddleware
def list_agents() -> None:
"""사용 가능한 모든 에이전트를 나열합니다."""
agents_dir = settings.user_deepagents_dir
if not agents_dir.exists() or not any(agents_dir.iterdir()):
console.print("[yellow]에이전트를 찾을 수 없습니다.[/yellow]")
console.print(
"[dim]처음 사용할 때 ~/.deepagents/에 에이전트가 생성됩니다.[/dim]",
style=COLORS["dim"],
)
return
console.print("\n[bold]사용 가능한 에이전트:[/bold]\n", style=COLORS["primary"])
for agent_path in sorted(agents_dir.iterdir()):
if agent_path.is_dir():
agent_name = agent_path.name
agent_md = agent_path / "agent.md"
if agent_md.exists():
console.print(f" • [bold]{agent_name}[/bold]", style=COLORS["primary"])
console.print(f" {agent_path}", style=COLORS["dim"])
else:
console.print(f" • [bold]{agent_name}[/bold] [dim](미완성)[/dim]", style=COLORS["tool"])
console.print(f" {agent_path}", style=COLORS["dim"])
console.print()
def reset_agent(agent_name: str, source_agent: str | None = None) -> None:
"""에이전트를 기본값으로 재설정하거나 다른 에이전트로부터 복사합니다."""
agents_dir = settings.user_deepagents_dir
agent_dir = agents_dir / agent_name
if source_agent:
source_dir = agents_dir / source_agent
source_md = source_dir / "agent.md"
if not source_md.exists():
console.print(
f"[bold red]오류:[/bold red] 소스 에이전트 '{source_agent}'를 찾을 수 없거나 agent.md가 없습니다"
)
return
source_content = source_md.read_text()
action_desc = f"contents of agent '{source_agent}'"
else:
source_content = get_default_coding_instructions()
action_desc = "default"
if agent_dir.exists():
shutil.rmtree(agent_dir)
console.print(f"기존 에이전트 디렉터리를 제거했습니다: {agent_dir}", style=COLORS["tool"])
agent_dir.mkdir(parents=True, exist_ok=True)
agent_md = agent_dir / "agent.md"
agent_md.write_text(source_content)
console.print(f"✓ 에이전트 '{agent_name}'{action_desc}(으)로 재설정되었습니다", style=COLORS["primary"])
console.print(f"Location: {agent_dir}\n", style=COLORS["dim"])
def get_system_prompt(assistant_id: str, sandbox_type: str | None = None) -> str:
"""에이전트에 대한 기본 시스템 프롬프트를 가져옵니다.
Args:
assistant_id: 경로 참조를 위한 에이전트 식별자
sandbox_type: 샌드박스 공급자 유형("modal", "runloop", "daytona").
None인 경우 에이전트는 로컬 모드에서 작동합니다.
Returns:
시스템 프롬프트 문자열 (agent.md 내용 제외)
"""
agent_dir_path = f"~/.deepagents/{assistant_id}"
if sandbox_type:
# Get provider-specific working directory
working_dir = get_default_working_dir(sandbox_type)
working_dir_section = f"""### Current Working Directory
You are working in a **remote Linux sandbox** at `{working_dir}`.
All code execution and file operations happen in this sandbox environment.
**IMPORTANT:**
- The CLI runs locally on the user's machine, but executes code remotely.
- Use `{working_dir}` as your working directory for all operations.
"""
else:
cwd = Path.cwd()
working_dir_section = f"""<env>
WORKING_DIRECTORY: {cwd}
</env>
### Current Working Directory
The filesystem backend is currently operating at: `{cwd}`
### File System and Paths
**IMPORTANT - Path Handling:**
- All file paths MUST be absolute (e.g. `{cwd}/file.txt`).
- Use the WORKING_DIRECTORY from <env> to construct absolute paths.
- Example: To create a file in the working directory, use `{cwd}/research_project/file.md`
- Do NOT use relative paths - always construct the full absolute path.
"""
return (
working_dir_section
+ f"""### Skills Directory
Your skills are stored at: `{agent_dir_path}/skills/`
Skills may contain scripts or support files. Use the physical filesystem path when running skill scripts with bash:
Example: `bash python {agent_dir_path}/skills/web-research/script.py`
### Human-in-the-Loop Tool Approvals
Some tool calls require user approval before execution. If a tool call is rejected by the user:
1. Accept the decision immediately - do NOT try the same command again.
2. Explain that you understand the user rejected the operation.
3. Propose an alternative or ask for clarification.
4. NEVER try to bypass a rejection by retrying the exact same command.
Respect user decisions and work collaboratively.
### Web Search Tool Usage
When using the web_search tool:
1. The tool returns search results with titles, URLs, and content snippets.
2. You MUST read and process these results, then respond to the user naturally.
3. Do NOT show raw JSON or tool results directly to the user.
4. Synthesize information from multiple sources into a coherent answer.
5. Cite sources by mentioning page titles or URLs when relevant.
6. If you don't find what you need in the search, explain what you found and ask clarifying questions.
The user ONLY sees your text response, not the tool results. Always provide a complete, natural language answer after using web_search.
### Todo List Management
When using the write_todos tool:
1. Keep the todo list minimal - aim for 3-6 items max.
2. Only create todos for complex, multi-step tasks that really need tracking.
3. Break down tasks into clear, actionable items without being overly granular.
4. For simple tasks (1-2 steps), just do them - don't create a todo.
5. When first creating a todo list for a task, ALWAYS ask the user if the plan looks good before starting work.
- Create the todos so they render, then ask "Does this plan look good?" or similar.
- Wait for the user's response before marking the first todo in_progress.
- Adjust the plan if they want changes.
6. Update todo status promptly as you complete each item.
The todo list is a planning tool - use it judiciously to avoid overwhelming the user with excessive task tracking."""
)
def _format_write_file_description(tool_call: ToolCall, _state: AgentState, _runtime: Runtime) -> str:
"""승인 프롬프트를 위한 write_file 도구 호출 포맷."""
args = tool_call["args"]
file_path = args.get("file_path", "unknown")
content = args.get("content", "")
action = "덮어쓰기(Overwrite)" if Path(file_path).exists() else "생성(Create)"
line_count = len(content.splitlines())
return f"파일: {file_path}\n작업: 파일 {action}\n줄 수: {line_count}"
def _format_edit_file_description(tool_call: ToolCall, _state: AgentState, _runtime: Runtime) -> str:
"""승인 프롬프트를 위한 edit_file 도구 호출 포맷."""
args = tool_call["args"]
file_path = args.get("file_path", "unknown")
replace_all = bool(args.get("replace_all", False))
return f"파일: {file_path}\n작업: 텍스트 교체 ({'모든 항목' if replace_all else '단일 항목'})"
def _format_web_search_description(tool_call: ToolCall, _state: AgentState, _runtime: Runtime) -> str:
"""Format web_search tool call for approval prompt."""
args = tool_call["args"]
query = args.get("query", "unknown")
max_results = args.get("max_results", 5)
return f"쿼리: {query}\n최대 결과: {max_results}\n\n⚠️ 이 작업은 Tavily API 크레딧을 사용합니다"
def _format_fetch_url_description(tool_call: ToolCall, _state: AgentState, _runtime: Runtime) -> str:
"""Format fetch_url tool call for approval prompt."""
args = tool_call["args"]
url = args.get("url", "unknown")
timeout = args.get("timeout", 30)
return f"URL: {url}\n시간 제한: {timeout}\n\n⚠️ 웹 콘텐츠를 가져와 마크다운으로 변환합니다"
def _format_task_description(tool_call: ToolCall, _state: AgentState, _runtime: Runtime) -> str:
"""승인 프롬프트를 위한 task(서브 에이전트) 도구 호출 포맷.
task 도구 서명은: task(description: str, subagent_type: str)
description에는 서브 에이전트에게 전송될 모든 지침이 포함됩니다.
"""
args = tool_call["args"]
description = args.get("description", "unknown")
subagent_type = args.get("subagent_type", "unknown")
# Truncate description if too long for display
description_preview = description
if len(description) > 500:
description_preview = description[:500] + "..."
return (
f"서브 에이전트 유형: {subagent_type}\n\n"
f"작업 지침:\n"
f"{'' * 40}\n"
f"{description_preview}\n"
f"{'' * 40}\n\n"
f"⚠️ 서브 에이전트는 파일 작업 및 셸 명령에 접근할 수 있습니다"
)
def _format_shell_description(tool_call: ToolCall, _state: AgentState, _runtime: Runtime) -> str:
"""Format shell tool call for approval prompt."""
args = tool_call["args"]
command = args.get("command", "없음")
return f"셸 명령: {command}\n작업 디렉터리: {Path.cwd()}"
def _format_execute_description(tool_call: ToolCall, _state: AgentState, _runtime: Runtime) -> str:
"""Format execute tool call for approval prompt."""
args = tool_call["args"]
command = args.get("command", "없음")
return f"명령 실행: {command}\n위치: 원격 샌드박스"
def _add_interrupt_on() -> dict[str, InterruptOnConfig]:
"""파괴적인 도구에 대해 히먼-인-더-루프(human-in-the-loop) interrupt_on 설정을 구성합니다."""
shell_interrupt_config: InterruptOnConfig = {
"allowed_decisions": ["approve", "reject"],
"description": _format_shell_description,
}
execute_interrupt_config: InterruptOnConfig = {
"allowed_decisions": ["approve", "reject"],
"description": _format_execute_description,
}
write_file_interrupt_config: InterruptOnConfig = {
"allowed_decisions": ["approve", "reject"],
"description": _format_write_file_description,
}
edit_file_interrupt_config: InterruptOnConfig = {
"allowed_decisions": ["approve", "reject"],
"description": _format_edit_file_description,
}
web_search_interrupt_config: InterruptOnConfig = {
"allowed_decisions": ["approve", "reject"],
"description": _format_web_search_description,
}
fetch_url_interrupt_config: InterruptOnConfig = {
"allowed_decisions": ["approve", "reject"],
"description": _format_fetch_url_description,
}
task_interrupt_config: InterruptOnConfig = {
"allowed_decisions": ["approve", "reject"],
"description": _format_task_description,
}
return {
"shell": shell_interrupt_config,
"execute": execute_interrupt_config,
"write_file": write_file_interrupt_config,
"edit_file": edit_file_interrupt_config,
"web_search": web_search_interrupt_config,
"fetch_url": fetch_url_interrupt_config,
"task": task_interrupt_config,
}
def create_cli_agent(
model: str | BaseChatModel,
assistant_id: str,
*,
tools: list[BaseTool] | None = None,
sandbox: SandboxBackendProtocol | None = None,
sandbox_type: str | None = None,
system_prompt: str | None = None,
auto_approve: bool = False,
enable_memory: bool = True,
enable_skills: bool = True,
enable_shell: bool = True,
) -> tuple[Pregel, CompositeBackend]:
"""유연한 옵션으로 CLI 구성 에이전트를 생성합니다.
이것은 deepagents CLI 에이전트 생성을 위한 주요 진입점이며,
내부적으로 사용되거나 외부 코드(예: 벤치마킹 프레임워크, Harbor)에서 사용할 수 있습니다.
Args:
model: 사용할 LLM 모델 (예: "anthropic:claude-sonnet-4-5-20250929")
assistant_id: 메모리/상태 저장을 위한 에이전트 식별자
tools: 에이전트에 제공할 추가 도구 (기본값: 빈 목록)
sandbox: 원격 실행을 위한 선택적 샌드박스 백엔드 (예: ModalBackend).
None인 경우 로컬 파일시스템 + 셸을 사용합니다.
sandbox_type: 샌드박스 공급자 유형("modal", "runloop", "daytona").
시스템 프롬프트 생성에 사용됩니다.
system_prompt: 기본 시스템 프롬프트를 재정의합니다. None인 경우
sandbox_type 및 assistant_id를 기반으로 생성합니다.
auto_approve: True인 경우 사람의 확인 없이 모든 도구 호출을 자동으로 승인합니다.
자동화된 워크플로에 유용합니다.
enable_memory: 영구 메모리를 위한 AgentMemoryMiddleware 활성화
enable_skills: 사용자 정의 에이전트 스킬을 위한 SkillsMiddleware 활성화
enable_shell: 로컬 셸 실행을 위한 ShellMiddleware 활성화 (로컬 모드에서만)
Returns:
(agent_graph, composite_backend)의 2-튜플
- agent_graph: 실행 준비된 구성된 LangGraph Pregel 인스턴스
- composite_backend: 파일 작업을 위한 CompositeBackend
"""
if tools is None:
tools = []
# Setup agent directory for persistent memory (if enabled)
if enable_memory or enable_skills:
agent_dir = settings.ensure_agent_dir(assistant_id)
agent_md = agent_dir / "agent.md"
if not agent_md.exists():
source_content = get_default_coding_instructions()
agent_md.write_text(source_content)
# Skills directories (if enabled)
skills_dir = None
project_skills_dir = None
if enable_skills:
skills_dir = settings.ensure_user_skills_dir(assistant_id)
project_skills_dir = settings.get_project_skills_dir()
# Build middleware stack based on enabled features
agent_middleware = []
# CONDITIONAL SETUP: Local vs Remote Sandbox
if sandbox is None:
# ========== LOCAL MODE ==========
composite_backend = CompositeBackend(
default=FilesystemBackend(), # Current working directory
routes={}, # No virtualization - use real paths
)
# Add memory middleware
if enable_memory:
agent_middleware.append(AgentMemoryMiddleware(settings=settings, assistant_id=assistant_id))
# Add skills middleware
if enable_skills:
agent_middleware.append(
SkillsMiddleware(
skills_dir=skills_dir,
assistant_id=assistant_id,
project_skills_dir=project_skills_dir,
)
)
# Add shell middleware (only in local mode)
if enable_shell:
# Create environment for shell commands
# Restore user's original LANGSMITH_PROJECT so their code traces separately
shell_env = os.environ.copy()
if settings.user_langchain_project:
shell_env["LANGSMITH_PROJECT"] = settings.user_langchain_project
agent_middleware.append(
ShellMiddleware(
workspace_root=str(Path.cwd()),
env=shell_env,
)
)
else:
# ========== REMOTE SANDBOX MODE ==========
composite_backend = CompositeBackend(
default=sandbox, # Remote sandbox (ModalBackend, etc.)
routes={}, # No virtualization
)
# Add memory middleware
if enable_memory:
agent_middleware.append(AgentMemoryMiddleware(settings=settings, assistant_id=assistant_id))
# Add skills middleware
if enable_skills:
agent_middleware.append(
SkillsMiddleware(
skills_dir=skills_dir,
assistant_id=assistant_id,
project_skills_dir=project_skills_dir,
)
)
# Note: Shell middleware not used in sandbox mode
# File operations and execute tool are provided by the sandbox backend
# Get or use custom system prompt
if system_prompt is None:
system_prompt = get_system_prompt(assistant_id=assistant_id, sandbox_type=sandbox_type)
# Configure interrupt_on based on auto_approve setting
if auto_approve:
# No interrupts - all tools run automatically
interrupt_on = {}
else:
# Full HITL for destructive operations
interrupt_on = _add_interrupt_on()
# Create the agent
agent = create_deep_agent(
model=model,
system_prompt=system_prompt,
tools=tools,
backend=composite_backend,
middleware=agent_middleware,
interrupt_on=interrupt_on,
checkpointer=InMemorySaver(),
).with_config(config)
return agent, composite_backend

View File

@@ -0,0 +1,328 @@
"""에이전트별 장기 메모리를 시스템 프롬프트에 로드하기 위한 미들웨어."""
import contextlib
from collections.abc import Awaitable, Callable
from typing import NotRequired, TypedDict, cast
from langchain.agents.middleware.types import (
AgentMiddleware,
AgentState,
ModelRequest,
ModelResponse,
)
from langgraph.runtime import Runtime
from deepagents_cli.config import Settings
class AgentMemoryState(AgentState):
"""에이전트 메모리 미들웨어를 위한 상태."""
user_memory: NotRequired[str]
"""~/.deepagents/{agent}/의 개인 설정 (모든 곳에 적용됨)."""
project_memory: NotRequired[str]
"""프로젝트별 컨텍스트 (프로젝트 루트에서 로드됨)."""
class AgentMemoryStateUpdate(TypedDict):
"""에이전트 메모리 미들웨어에 대한 상태 업데이트."""
user_memory: NotRequired[str]
"""~/.deepagents/{agent}/의 개인 설정 (모든 곳에 적용됨)."""
project_memory: NotRequired[str]
"""프로젝트별 컨텍스트 (프로젝트 루트에서 로드됨)."""
# Long-term Memory Documentation
# Note: Claude Code loads CLAUDE.md files hierarchically and combines them (not precedence-based):
# - Loads recursively from cwd up to (but not including) root directory
# - Multiple files are combined hierarchically: enterprise → project → user
# - Both [project-root]/CLAUDE.md and [project-root]/.claude/CLAUDE.md are loaded if both exist
# - Files higher in hierarchy load first, providing foundation for more specific memories
# We will follow that pattern for deepagents-cli
LONGTERM_MEMORY_SYSTEM_PROMPT = """
## Long-term Memory
Long-term memory is stored in files on the filesystem and persists across sessions.
**User Memory Location**: `{agent_dir_absolute}` (display: `{agent_dir_display}`)
**Project Memory Location**: {project_memory_info}
The system prompt is loaded from two sources at startup:
1. **User agent.md**: `{agent_dir_absolute}/agent.md` - personal settings that apply everywhere
2. **Project agent.md**: loaded from the project root if available - project-specific instructions
Project-specific agent.md files are loaded from the following locations (combined if both exist):
- `[project-root]/.deepagents/agent.md` (preferred)
- `[project-root]/agent.md` (fallback, included if both exist)
**When you should check/read memory (IMPORTANT - do this first):**
- **At the start of every new session**: Check both user and project memory
- User: `ls {agent_dir_absolute}`
- Project: `ls {project_deepagents_dir}` (if inside a project)
- **Before answering a question**: If asked "What do you know about X?" or "How do I do Y?", check project memory first, then user.
- **When the user asks you to do a task**: Check for project-specific guides or examples.
- **When the user refers to past work**: Search project memory files for relevant context.
**Memory-First Response Pattern:**
1. User asks question -> Check project directory first: `ls {project_deepagents_dir}`
2. If relevant files exist -> Read them: `read_file '{project_deepagents_dir}/[filename]'`
3. If needed, check user memory -> `ls {agent_dir_absolute}`
4. Answer by supplementing general knowledge with stored knowledge.
**When you should update memory:**
- **Immediately when the user describes your role or how you should behave**
- **Immediately when the user gives you feedback** - record what went wrong and how to do better in memory.
- When the user explicitly asks you to remember something.
- When patterns or preferences emerge (coding style, conventions, workflow).
- After a significant task where the context would be helpful for future sessions.
**Learning from Feedback:**
- When the user tells you something is better or worse, figure out why and encode it as a pattern.
- Every correction is an opportunity to improve permanently - don't just fix the immediate issue, update your instructions.
- If the user says "You should remember X" or "Pay attention to Y", treat this as highest priority and update memory immediately.
- Look for the underlying principles behind corrections, not just the specific mistakes.
## Deciding Where to Store Memory
When writing or updating agent memory, decide where each fact, configuration, or behavior belongs:
### User Agent File: `{agent_dir_absolute}/agent.md`
-> Describes the agent's **personality, style, and universal behaviors** across all projects.
**Store here:**
- General tone and communication style
- Universal coding preferences (formatting, commenting style, etc.)
- General workflows and methodologies to follow
- Tool usage patterns that apply everywhere
- Personal preferences that don't change between projects
**Examples:**
- "Be concise and direct in your answers"
- "Always use type hints in Python"
- "Prefer functional programming patterns"
### Project Agent File: `{project_deepagents_dir}/agent.md`
-> Describes **how this specific project works** and **how the agent should behave here only**.
**Store here:**
- Project-specific architecture and design patterns
- Coding conventions specific to this codebase
- Project structure and organization
- Testing strategies for this project
- Deployment processes and workflows
- Team conventions and guidelines
**Examples:**
- "This project uses FastAPI with SQLAlchemy"
- "Tests are located in tests/ directory mirroring src structure"
- "All API changes require updating OpenAPI specs"
### Project Memory Files: `{project_deepagents_dir}/*.md`
-> Use for **project-specific reference information** and structured notes.
**Store here:**
- API design documentation
- Architecture decisions and reasoning
- Deployment procedures
- Common debugging patterns
- Onboarding information
**Examples:**
- `{project_deepagents_dir}/api-design.md` - REST API patterns used
- `{project_deepagents_dir}/architecture.md` - System architecture overview
- `{project_deepagents_dir}/deployment.md` - How to deploy this project
### File Operations:
**User Memory:**
```
ls {agent_dir_absolute} # List user memory files
read_file '{agent_dir_absolute}/agent.md' # Read user preferences
edit_file '{agent_dir_absolute}/agent.md' ... # Update user preferences
```
**Project Memory (Preferred for project-specific info):**
```
ls {project_deepagents_dir} # List project memory files
read_file '{project_deepagents_dir}/agent.md' # Read project guidelines
edit_file '{project_deepagents_dir}/agent.md' ... # Update project guidelines
write_file '{project_deepagents_dir}/agent.md' ... # Create project memory file
```
**IMPORTANT**:
- Project memory files are stored in `.deepagents/` inside the project root.
- Always use absolute paths for file operations.
- Determine if info is project-specific (check user vs project memory) before answering."""
DEFAULT_MEMORY_SNIPPET = """<user_memory>
{user_memory}
</user_memory>
<project_memory>
{project_memory}
</project_memory>"""
class AgentMemoryMiddleware(AgentMiddleware):
"""에이전트별 장기 메모리를 로드하기 위한 미들웨어.
이 미들웨어는 파일(agent.md)에서 에이전트의 장기 메모리를 로드하고
시스템 프롬프트에 주입합니다. 메모리는 대화 시작 시 한 번 로드되어
상태에 저장됩니다.
"""
state_schema = AgentMemoryState
def __init__(
self,
*,
settings: Settings,
assistant_id: str,
system_prompt_template: str | None = None,
) -> None:
"""에이전트 메모리 미들웨어를 초기화합니다.
Args:
settings: 프로젝트 감지 및 경로가 포함된 전역 설정 인스턴스.
assistant_id: 에이전트 식별자.
system_prompt_template: 시스템 프롬프트에 에이전트 메모리를 주입하기 위한
선택적 사용자 정의 템플릿.
"""
self.settings = settings
self.assistant_id = assistant_id
# User paths
self.agent_dir = settings.get_agent_dir(assistant_id)
# Store both display path (with ~) and absolute path for file operations
self.agent_dir_display = f"~/.deepagents/{assistant_id}"
self.agent_dir_absolute = str(self.agent_dir)
# Project paths (from settings)
self.project_root = settings.project_root
self.system_prompt_template = system_prompt_template or DEFAULT_MEMORY_SNIPPET
def before_agent(
self,
state: AgentMemoryState,
runtime: Runtime,
) -> AgentMemoryStateUpdate:
"""에이전트 실행 전에 파일에서 에이전트 메모리를 로드합니다.
사용자 agent.md와 프로젝트별 agent.md가 있으면 로드합니다.
상태에 아직 없는 경우에만 로드합니다.
사용자 업데이트를 포착하기 위해 매 호출마다 파일 존재 여부를 동적으로 확인합니다.
Args:
state: 현재 에이전트 상태.
runtime: 런타임 컨텍스트.
Returns:
user_memory 및 project_memory가 채워진 업데이트된 상태.
"""
result: AgentMemoryStateUpdate = {}
# Load user memory if not already in state
if "user_memory" not in state:
user_path = self.settings.get_user_agent_md_path(self.assistant_id)
if user_path.exists():
with contextlib.suppress(OSError, UnicodeDecodeError):
result["user_memory"] = user_path.read_text()
# Load project memory if not already in state
if "project_memory" not in state:
project_path = self.settings.get_project_agent_md_path()
if project_path and project_path.exists():
with contextlib.suppress(OSError, UnicodeDecodeError):
result["project_memory"] = project_path.read_text()
return result
def _build_system_prompt(self, request: ModelRequest) -> str:
"""메모리 섹션이 포함된 전체 시스템 프롬프트를 작성합니다.
Args:
request: 상태 및 기본 시스템 프롬프트가 포함된 모델 요청.
Returns:
메모리 섹션이 주입된 전체 시스템 프롬프트.
"""
# Extract memory from state
state = cast("AgentMemoryState", request.state)
user_memory = state.get("user_memory")
project_memory = state.get("project_memory")
base_system_prompt = request.system_prompt
# Build project memory info for documentation
if self.project_root and project_memory:
project_memory_info = f"`{self.project_root}` (detected)"
elif self.project_root:
project_memory_info = f"`{self.project_root}` (no agent.md found)"
else:
project_memory_info = "None (not in a git project)"
# Build project deepagents directory path
if self.project_root:
project_deepagents_dir = str(self.project_root / ".deepagents")
else:
project_deepagents_dir = "[project-root]/.deepagents (not in a project)"
# Format memory section with both memories
memory_section = self.system_prompt_template.format(
user_memory=user_memory if user_memory else "(No user agent.md)",
project_memory=project_memory if project_memory else "(No project agent.md)",
)
system_prompt = memory_section
if base_system_prompt:
system_prompt += "\n\n" + base_system_prompt
system_prompt += "\n\n" + LONGTERM_MEMORY_SYSTEM_PROMPT.format(
agent_dir_absolute=self.agent_dir_absolute,
agent_dir_display=self.agent_dir_display,
project_memory_info=project_memory_info,
project_deepagents_dir=project_deepagents_dir,
)
return system_prompt
def wrap_model_call(
self,
request: ModelRequest,
handler: Callable[[ModelRequest], ModelResponse],
) -> ModelResponse:
"""시스템 프롬프트에 에이전트 메모리를 주입합니다.
Args:
request: 처리 중인 모델 요청.
handler: 수정된 요청으로 호출할 핸들러 함수.
Returns:
핸들러의 모델 응답.
"""
system_prompt = self._build_system_prompt(request)
return handler(request.override(system_prompt=system_prompt))
async def awrap_model_call(
self,
request: ModelRequest,
handler: Callable[[ModelRequest], Awaitable[ModelResponse]],
) -> ModelResponse:
"""(비동기) 시스템 프롬프트에 에이전트 메모리를 주입합니다.
Args:
request: 처리 중인 모델 요청.
handler: 수정된 요청으로 호출할 핸들러 함수.
Returns:
핸들러의 모델 응답.
"""
system_prompt = self._build_system_prompt(request)
return await handler(request.override(system_prompt=system_prompt))

View File

@@ -0,0 +1,87 @@
"""슬래시 명령 및 bash 실행을 위한 명령 처리기."""
import subprocess
from pathlib import Path
from langgraph.checkpoint.memory import InMemorySaver
from .config import COLORS, DEEP_AGENTS_ASCII, console
from .ui import TokenTracker, show_interactive_help
def handle_command(command: str, agent, token_tracker: TokenTracker) -> str | bool:
"""슬래시 명령을 처리합니다. 종료하려면 'exit', 처리된 경우 True, 에이전트에게 전달하려면 False를 반환합니다."""
cmd = command.lower().strip().lstrip("/")
if cmd in ["quit", "exit", "q"]:
return "exit"
if cmd == "clear":
# Reset agent conversation state
agent.checkpointer = InMemorySaver()
# Reset token tracking to baseline
token_tracker.reset()
# Clear screen and show fresh UI
console.clear()
console.print(DEEP_AGENTS_ASCII, style=f"bold {COLORS['primary']}")
console.print()
console.print("... 새로 시작! 화면이 지워지고 대화가 초기화되었습니다.", style=COLORS["agent"])
console.print()
return True
if cmd == "help":
show_interactive_help()
return True
if cmd == "tokens":
token_tracker.display_session()
return True
console.print()
console.print(f"[yellow]알 수 없는 명령: /{cmd}[/yellow]")
console.print("[dim]사용 가능한 명령을 보려면 /help를 입력하세요.[/dim]")
console.print()
return True
return False
def execute_bash_command(command: str) -> bool:
"""bash 명령을 실행하고 출력을 표시합니다. 처리된 경우 True를 반환합니다."""
cmd = command.strip().lstrip("!")
if not cmd:
return True
try:
console.print()
console.print(f"[dim]$ {cmd}[/dim]")
# Execute the command
result = subprocess.run(
cmd, check=False, shell=True, capture_output=True, text=True, timeout=30, cwd=Path.cwd()
)
# Display output
if result.stdout:
console.print(result.stdout, style=COLORS["dim"], markup=False)
if result.stderr:
console.print(result.stderr, style="red", markup=False)
# Show return code if non-zero
if result.returncode != 0:
console.print(f"[dim]Exit code: {result.returncode}[/dim]")
console.print()
return True
except subprocess.TimeoutExpired:
console.print("[red]30초 후 명령 시간 초과[/red]")
console.print()
return True
except Exception as e:
console.print(f"[red]명령 실행 오류: {e}[/red]")
console.print()
return True

View File

@@ -0,0 +1,509 @@
"""CLI를 위한 구성, 상수 밎 모델 생성."""
import os
import re
import sys
import uuid
from dataclasses import dataclass
from pathlib import Path
import dotenv
from rich.console import Console
from deepagents_cli._version import __version__
dotenv.load_dotenv()
# CRITICAL: Override LANGSMITH_PROJECT to route agent traces to separate project
# LangSmith reads LANGSMITH_PROJECT at invocation time, so we override it here
# and preserve the user's original value for shell commands
_deepagents_project = os.environ.get("DEEPAGENTS_LANGSMITH_PROJECT")
_original_langsmith_project = os.environ.get("LANGSMITH_PROJECT")
if _deepagents_project:
# Override LANGSMITH_PROJECT for agent traces
os.environ["LANGSMITH_PROJECT"] = _deepagents_project
# Now safe to import LangChain modules
from langchain_core.language_models import BaseChatModel
# Color scheme
COLORS = {
"primary": "#10b981",
"dim": "#6b7280",
"user": "#ffffff",
"agent": "#10b981",
"thinking": "#34d399",
"tool": "#fbbf24",
}
# ASCII art banner
DEEP_AGENTS_ASCII = f"""
██████╗ ███████╗ ███████╗ ██████╗
██╔══██╗ ██╔════╝ ██╔════╝ ██╔══██╗
██║ ██║ █████╗ █████╗ ██████╔╝
██║ ██║ ██╔══╝ ██╔══╝ ██╔═══╝
██████╔╝ ███████╗ ███████╗ ██║
╚═════╝ ╚══════╝ ╚══════╝ ╚═╝
█████╗ ██████╗ ███████╗ ███╗ ██╗ ████████╗ ███████╗
██╔══██╗ ██╔════╝ ██╔════╝ ████╗ ██║ ╚══██╔══╝ ██╔════╝
███████║ ██║ ███╗ █████╗ ██╔██╗ ██║ ██║ ███████╗
██╔══██║ ██║ ██║ ██╔══╝ ██║╚██╗██║ ██║ ╚════██║
██║ ██║ ╚██████╔╝ ███████╗ ██║ ╚████║ ██║ ███████║
╚═╝ ╚═╝ ╚═════╝ ╚══════╝ ╚═╝ ╚═══╝ ╚═╝ ╚══════╝
v{__version__}
"""
# Interactive commands
# Interactive commands
COMMANDS = {
"clear": "화면을 지우고 대화를 재설정합니다",
"help": "도움말 정보를 표시합니다",
"tokens": "현재 세션의 토큰 사용량을 표시합니다",
"quit": "CLI를 종료합니다",
"exit": "CLI를 종료합니다",
}
# Maximum argument length for display
MAX_ARG_LENGTH = 150
# Agent configuration
config = {"recursion_limit": 1000}
# Rich console instance
console = Console(highlight=False)
def _find_project_root(start_path: Path | None = None) -> Path | None:
"""git 디렉터리를 찾아 프로젝트 루트를 찾습니다.
start_path(또는 cwd)에서 디렉터리 트리를 따라 올라가며 프로젝트 루트를 나타내는
.git 디렉터리를 찾습니다.
Args:
start_path: 검색을 시작할 디렉터리. 기본값은 현재 작업 디렉터리입니다.
Returns:
찾은 경우 프로젝트 루트의 경로, 그렇지 않으면 None입니다.
"""
current = Path(start_path or Path.cwd()).resolve()
# Walk up the directory tree
for parent in [current, *list(current.parents)]:
git_dir = parent / ".git"
if git_dir.exists():
return parent
return None
def _find_project_agent_md(project_root: Path) -> list[Path]:
"""프로젝트별 agent.md 파일(들)을 찾습니다.
두 위치를 확인하고 존재하는 모든 위치를 반환합니다:
1. project_root/.deepagents/agent.md
2. project_root/agent.md
두 파일이 모두 존재하면 둘 다 로드되어 결합됩니다.
Args:
project_root: 프로젝트 루트 디렉터리 경로.
Returns:
프로젝트 agent.md 파일 경로 목록 (0, 1 또는 2개의 경로를 포함할 수 있음).
"""
paths = []
# Check .deepagents/agent.md (preferred)
deepagents_md = project_root / ".deepagents" / "agent.md"
if deepagents_md.exists():
paths.append(deepagents_md)
# Check root agent.md (fallback, but also include if both exist)
root_md = project_root / "agent.md"
if root_md.exists():
paths.append(root_md)
return paths
@dataclass
class Settings:
"""DeepAgents-cli를 위한 전역 설정 및 환경 감지.
이 클래스는 시작 시 한 번 초기화되며 다음 정보에 대한 액세스를 제공합니다:
- 사용 가능한 모델 및 API 키
- 현재 프로젝트 정보
- 도구 가용성 (예: Tavily)
- 파일 시스템 경로
Attributes:
project_root: 현재 프로젝트 루트 디렉터리 (git 프로젝트 내인 경우)
openai_api_key: OpenAI API 키 (사용 가능한 경우)
anthropic_api_key: Anthropic API 키 (사용 가능한 경우)
tavily_api_key: Tavily API 키 (사용 가능한 경우)
deepagents_langchain_project: DeepAgents 에이전트 추적을 위한 LangSmith 프로젝트 이름
user_langchain_project: 환경의 원래 LANGSMITH_PROJECT (사용자 코드용)
"""
# API keys
openai_api_key: str | None
anthropic_api_key: str | None
google_api_key: str | None
tavily_api_key: str | None
# LangSmith configuration
deepagents_langchain_project: str | None # For deepagents agent tracing
user_langchain_project: str | None # Original LANGSMITH_PROJECT for user code
# Model configuration
model_name: str | None = None # Currently active model name
model_provider: str | None = None # Provider (openai, anthropic, google)
# Project information
project_root: Path | None = None
@classmethod
def from_environment(cls, *, start_path: Path | None = None) -> "Settings":
"""현재 환경을 감지하여 설정을 생성합니다.
Args:
start_path: 프로젝트 감지를 시작할 디렉터리(기본값은 cwd)
Returns:
감지된 구성이 포함된 Settings 인스턴스
"""
# Detect API keys
openai_key = os.environ.get("OPENAI_API_KEY")
anthropic_key = os.environ.get("ANTHROPIC_API_KEY")
google_key = os.environ.get("GOOGLE_API_KEY")
tavily_key = os.environ.get("TAVILY_API_KEY")
# Detect LangSmith configuration
# DEEPAGENTS_LANGSMITH_PROJECT: Project for deepagents agent tracing
# user_langchain_project: User's ORIGINAL LANGSMITH_PROJECT (before override)
# Note: LANGSMITH_PROJECT was already overridden at module import time (above)
# so we use the saved original value, not the current os.environ value
deepagents_langchain_project = os.environ.get("DEEPAGENTS_LANGSMITH_PROJECT")
user_langchain_project = _original_langsmith_project # Use saved original!
# Detect project
project_root = _find_project_root(start_path)
return cls(
openai_api_key=openai_key,
anthropic_api_key=anthropic_key,
google_api_key=google_key,
tavily_api_key=tavily_key,
deepagents_langchain_project=deepagents_langchain_project,
user_langchain_project=user_langchain_project,
project_root=project_root,
)
@property
def has_openai(self) -> bool:
"""OpenAI API 키가 구성되어 있는지 확인합니다."""
return self.openai_api_key is not None
@property
def has_anthropic(self) -> bool:
"""Anthropic API 키가 구성되어 있는지 확인합니다."""
return self.anthropic_api_key is not None
@property
def has_google(self) -> bool:
"""Google API 키가 구성되어 있는지 확인합니다."""
return self.google_api_key is not None
@property
def has_tavily(self) -> bool:
"""Tavily API 키가 구성되어 있는지 확인합니다."""
return self.tavily_api_key is not None
@property
def has_deepagents_langchain_project(self) -> bool:
"""DeepAgents LangChain 프로젝트 이름이 구성되어 있는지 확인합니다."""
return self.deepagents_langchain_project is not None
@property
def has_project(self) -> bool:
"""현재 git 프로젝트 내에 있는지 확인합니다."""
return self.project_root is not None
@property
def user_deepagents_dir(self) -> Path:
"""기본 사용자 수준 .deepagents 디렉터리를 가져옵니다.
Returns:
~/.deepagents 경로
"""
return Path.home() / ".deepagents"
def get_user_agent_md_path(self, agent_name: str) -> Path:
"""특정 에이전트에 대한 사용자 수준 agent.md 경로를 가져옵니다.
파일 존재 여부와 상관없이 경로를 반환합니다.
Args:
agent_name: 에이전트 이름
Returns:
~/.deepagents/{agent_name}/agent.md 경로
"""
return Path.home() / ".deepagents" / agent_name / "agent.md"
def get_project_agent_md_path(self) -> Path | None:
"""프로젝트 수준 agent.md 경로를 가져옵니다.
파일 존재 여부와 상관없이 경로를 반환합니다.
Returns:
{project_root}/.deepagents/agent.md 경로, 프로젝트 내에 없는 경우 None
"""
if not self.project_root:
return None
return self.project_root / ".deepagents" / "agent.md"
@staticmethod
def _is_valid_agent_name(agent_name: str) -> bool:
"""유효하지 않은 파일시스템 경로 및 보안 문제를 방지하기 위해 검증합니다."""
if not agent_name or not agent_name.strip():
return False
# Allow only alphanumeric, hyphens, underscores, and whitespace
return bool(re.match(r"^[a-zA-Z0-9_\-\s]+$", agent_name))
def get_agent_dir(self, agent_name: str) -> Path:
"""전역 에이전트 디렉터리 경로를 가져옵니다.
Args:
agent_name: 에이전트 이름
Returns:
~/.deepagents/{agent_name} 경로
"""
if not self._is_valid_agent_name(agent_name):
msg = (
f"Invalid agent name: {agent_name!r}. "
"Agent names can only contain letters, numbers, hyphens, underscores, and spaces."
)
raise ValueError(msg)
return Path.home() / ".deepagents" / agent_name
def ensure_agent_dir(self, agent_name: str) -> Path:
"""전역 에이전트 디렉터리가 존재하는지 확인하고 경로를 반환합니다.
Args:
agent_name: 에이전트 이름
Returns:
~/.deepagents/{agent_name} 경로
"""
if not self._is_valid_agent_name(agent_name):
msg = (
f"Invalid agent name: {agent_name!r}. "
"Agent names can only contain letters, numbers, hyphens, underscores, and spaces."
)
raise ValueError(msg)
agent_dir = self.get_agent_dir(agent_name)
agent_dir.mkdir(parents=True, exist_ok=True)
return agent_dir
def ensure_project_deepagents_dir(self) -> Path | None:
"""프로젝트 .deepagents 디렉터리가 존재하는지 확인하고 경로를 반환합니다.
Returns:
프로젝트 .deepagents 디렉터리 경로, 프로젝트 내에 없는 경우 None
"""
if not self.project_root:
return None
project_deepagents_dir = self.project_root / ".deepagents"
project_deepagents_dir.mkdir(parents=True, exist_ok=True)
return project_deepagents_dir
def get_user_skills_dir(self, agent_name: str) -> Path:
"""특정 에이전트에 대한 사용자 수준 기술(skills) 디렉터리 경로를 가져옵니다.
Args:
agent_name: 에이전트 이름
Returns:
~/.deepagents/{agent_name}/skills/ 경로
"""
return self.get_agent_dir(agent_name) / "skills"
def ensure_user_skills_dir(self, agent_name: str) -> Path:
"""사용자 수준 기술(skills) 디렉터리가 존재하는지 확인하고 경로를 반환합니다.
Args:
agent_name: 에이전트 이름
Returns:
~/.deepagents/{agent_name}/skills/ 경로
"""
skills_dir = self.get_user_skills_dir(agent_name)
skills_dir.mkdir(parents=True, exist_ok=True)
return skills_dir
def get_project_skills_dir(self) -> Path | None:
"""프로젝트 수준 기술(skills) 디렉터리 경로를 가져옵니다.
Returns:
{project_root}/.deepagents/skills/ 경로, 프로젝트 내에 없는 경우 None
"""
if not self.project_root:
return None
return self.project_root / ".deepagents" / "skills"
def ensure_project_skills_dir(self) -> Path | None:
"""프로젝트 수준 기술(skills) 디렉터리가 존재하는지 확인하고 경로를 반환합니다.
Returns:
{project_root}/.deepagents/skills/ 경로, 프로젝트 내에 없는 경우 None
"""
if not self.project_root:
return None
skills_dir = self.get_project_skills_dir()
skills_dir.mkdir(parents=True, exist_ok=True)
return skills_dir
# Global settings instance (initialized once)
settings = Settings.from_environment()
class SessionState:
"""변경 가능한 세션 상태를 유지합니다 (자동 승인 모드 등)."""
def __init__(self, auto_approve: bool = False, no_splash: bool = False) -> None:
self.auto_approve = auto_approve
self.no_splash = no_splash
self.exit_hint_until: float | None = None
self.exit_hint_handle = None
self.thread_id = str(uuid.uuid4())
def toggle_auto_approve(self) -> bool:
"""자동 승인을 토글하고 새로운 상태를 반환합니다."""
self.auto_approve = not self.auto_approve
return self.auto_approve
def get_default_coding_instructions() -> str:
"""기본 코딩 에이전트 지침을 가져옵니다.
이는 에이전트가 수정할 수 없는 불변의 기본 지침입니다.
장기 메모리(agent.md)는 미들웨어에서 별도로 처리합니다.
"""
default_prompt_path = Path(__file__).parent / "default_agent_prompt.md"
return default_prompt_path.read_text()
def _detect_provider(model_name: str) -> str | None:
"""모델 이름에서 공급자를 자동 감지합니다.
Args:
model_name: 공급자를 감지할 모델 이름
Returns:
공급자 이름(openai, anthropic, google) 또는 감지할 수 없는 경우 None
"""
model_lower = model_name.lower()
if any(x in model_lower for x in ["gpt", "o1", "o3"]):
return "openai"
if "claude" in model_lower:
return "anthropic"
if "gemini" in model_lower:
return "google"
return None
def create_model(model_name_override: str | None = None) -> BaseChatModel:
"""사용 가능한 API 키를 기반으로 적절한 모델을 생성합니다.
전역 설정 인스턴스를 사용하여 생성할 모델을 결정합니다.
Args:
model_name_override: 환경 변수 대신 사용할 선택적 모델 이름
Returns:
ChatModel 인스턴스 (OpenAI, Anthropic, 또는 Google)
Raises:
API 키가 구성되지 않았거나 모델 공급자를 결정할 수 없는 경우 SystemExit
"""
# Determine provider and model
if model_name_override:
# Use provided model, auto-detect provider
provider = _detect_provider(model_name_override)
if not provider:
console.print(
f"[bold red]오류:[/bold red] 모델 이름에서 공급자를 감지할 수 없습니다: {model_name_override}"
)
console.print("\n지원되는 모델 이름 패턴:")
console.print(" - OpenAI: gpt-*, o1-*, o3-*")
console.print(" - Anthropic: claude-*")
console.print(" - Google: gemini-*")
sys.exit(1)
# Check if API key for detected provider is available
if provider == "openai" and not settings.has_openai:
console.print(f"[bold red]오류:[/bold red] 모델 '{model_name_override}'은(는) OPENAI_API_KEY가 필요합니다")
sys.exit(1)
elif provider == "anthropic" and not settings.has_anthropic:
console.print(
f"[bold red]오류:[/bold red] 모델 '{model_name_override}'은(는) ANTHROPIC_API_KEY가 필요합니다"
)
sys.exit(1)
elif provider == "google" and not settings.has_google:
console.print(f"[bold red]오류:[/bold red] 모델 '{model_name_override}'은(는) GOOGLE_API_KEY가 필요합니다")
sys.exit(1)
model_name = model_name_override
# Use environment variable defaults, detect provider by API key priority
elif settings.has_openai:
provider = "openai"
model_name = os.environ.get("OPENAI_MODEL", "gpt-5-mini")
elif settings.has_anthropic:
provider = "anthropic"
model_name = os.environ.get("ANTHROPIC_MODEL", "claude-sonnet-4-5-20250929")
elif settings.has_google:
provider = "google"
model_name = os.environ.get("GOOGLE_MODEL", "gemini-3-pro-preview")
else:
console.print("[bold red]오류:[/bold red] API 키가 구성되지 않았습니다.")
console.print("\n다음 환경 변수 중 하나를 설정하십시오:")
console.print(" - OPENAI_API_KEY (OpenAI 모델용, 예: gpt-5-mini)")
console.print(" - ANTHROPIC_API_KEY (Claude 모델용)")
console.print(" - GOOGLE_API_KEY (Google Gemini 모델용)")
console.print("\n예시:")
console.print(" export OPENAI_API_KEY=your_api_key_here")
console.print("\n또는 .env 파일에 추가하십시오.")
sys.exit(1)
# Store model info in settings for display
settings.model_name = model_name
settings.model_provider = provider
# Create and return the model
if provider == "openai":
from langchain_openai import ChatOpenAI
return ChatOpenAI(model=model_name)
if provider == "anthropic":
from langchain_anthropic import ChatAnthropic
return ChatAnthropic(
model_name=model_name,
max_tokens=20_000, # type: ignore[arg-type]
)
if provider == "google":
from langchain_google_genai import ChatGoogleGenerativeAI
return ChatGoogleGenerativeAI(
model=model_name,
temperature=0,
max_tokens=None,
)

View File

@@ -0,0 +1,111 @@
You are an AI assistant that helps users with various tasks such as coding, research, and analysis.
# Core Role
Your core role and behavior can be updated based on user feedback and instructions. If the user instructs you on how to behave or about your role, immediately update this memory file to reflect those instructions.
## Memory-First Protocol
You have access to a persistent memory system. Always follow this protocol:
**At the start of a session:**
- Check `ls /memories/` to see what knowledge is stored.
- If a specific topic is mentioned in the role description, check related guides in `/memories/`.
**Before answering a question:**
- When asked "What do you know about X?" or "How do I do Y?" → Check `ls /memories/` first.
- If a relevant memory file exists → Read it and answer based on the saved knowledge.
- Prioritize stored knowledge over general knowledge.
**When learning new information:**
- If the user teaches you something or asks you to remember something → Save it to `/memories/[topic].md`.
- Use descriptive filenames: Use `/memories/deep-agents-guide.md` instead of `/memories/notes.md`.
- After saving, read specific content again to verify.
**Important:** Your memory persists between sessions. Information stored in `/memories/` is more reliable than general knowledge for topics you have specifically learned.
# Tone and Style
Be concise and direct. Answer within 4 lines unless the user asks for details.
Stop after finishing file operations - Do not explain what you did unless asked.
Avoid unnecessary introductions or conclusions.
When executing unimportant bash commands, briefly explain what you are doing.
## Proactiveness
Take action when requested, but do not surprise the user with unrequested actions.
If asked about an approach, answer first before taking action.
## Following Conventions
- Check existing code before assuming the availability of libraries and frameworks.
- Mimic existing code style, naming conventions, and patterns.
- Do not add comments unless requested.
## Task Management
Use `write_todos` for complex multi-step tasks (3 or more steps). Mark tasks as `in_progress` before starting, and `completed` immediately after finishing.
Perform simple 1-2 step tasks immediately without todos.
## File Reading Best Practices
**Important**: When navigating the codebase or reading multiple files, always use pagination to prevent context overflow.
**Codebase Navigation Patterns:**
1. First Scan: `read_file(path, limit=100)` - Check file structure and key sections
2. Targeted Reading: `read_file(path, offset=100, limit=200)` - Read specific sections if needed
3. Full Reading: Use `read_file(path)` without limits only when needed for editing
**When to use pagination:**
- Reading any file exceeding 500 lines
- Exploring unfamiliar codebases (Always start with limit=100)
- Reading multiple files in succession
- All research or investigation tasks
**When full reading is allowed:**
- Small files (under 500 lines)
- Files required to be edited immediately after reading
- After verifying file size with a first scan
**Workflow Example:**
```
Bad: read_file(/src/large_module.py) # Fills context with 2000+ lines of code
Good: read_file(/src/large_module.py, limit=100) # Scan structure first
read_file(/src/large_module.py, offset=100, limit=100) # Read relevant section
```
## Working with Subagents (Task Tools)
When delegating to subagents:
- **Use Filesystem for Large I/O**: If input instructions are large (500+ words) or expected output is large, communicate via files.
- Write input context/instructions to a file, and instruct the subagent to read it.
- Ask the subagent to write output to a file, and read it after the subagent returns.
- This prevents token bloat in both directions and keeps context manageable.
- **Parallelize Independent Tasks**: When tasks are independent, create parallel subagents to work simultaneously.
- **Clear Specifications**: Precisely inform the subagent of the required format/structure in their response or output file.
- **Main Agent Synthesis**: Once subagents collect/execute, the main agent integrates results into the final output.
## Tools
### execute_bash
Executes shell commands. Always allow path with spaces to be quoted.
bash commands are executed in the current working directory.
Example: `pytest /foo/bar/tests` (Good), `cd /foo/bar && pytest tests` (Bad)
### File Tools
- read_file: Read file content (use absolute path)
- edit_file: Exact string replacement in file (must read first, provide unique old_string)
- write_file: Create or overwrite file
- ls: List directory contents
- glob: Find files by pattern (e.g., "**/*.py")
- grep: Search file content
Always use absolute paths starting with /.
### web_search
Search for documentation, error solutions, and code examples.
### http_request
Sends HTTP requests to an API (GET, POST, etc.).
## Code References
When referencing code, use the following format: `file_path:line_number`
## Documentation
- Do not create excessive markdown summary/documentation files after completing tasks.
- Focus on the task itself, not documenting what you did.
- Write documentation only when explicitly requested.

View File

@@ -0,0 +1,672 @@
"""CLI를 위한 작업 실행 및 스트리밍 로직."""
import asyncio
import json
import sys
import termios
import tty
from langchain.agents.middleware.human_in_the_loop import (
ActionRequest,
ApproveDecision,
Decision,
HITLRequest,
HITLResponse,
RejectDecision,
)
from langchain_core.messages import HumanMessage, ToolMessage
from langgraph.types import Command, Interrupt
from pydantic import TypeAdapter, ValidationError
from rich import box
from rich.markdown import Markdown
from rich.panel import Panel
from deepagents_cli.config import COLORS, console
from deepagents_cli.file_ops import FileOpTracker, build_approval_preview
from deepagents_cli.image_utils import create_multimodal_content
from deepagents_cli.input import ImageTracker, parse_file_mentions
from deepagents_cli.ui import (
TokenTracker,
format_tool_display,
format_tool_message_content,
render_diff_block,
render_file_operation,
render_todo_list,
)
_HITL_REQUEST_ADAPTER = TypeAdapter(HITLRequest)
def prompt_for_tool_approval(
action_request: ActionRequest,
assistant_id: str | None,
) -> Decision | dict:
"""방향키 탐색을 사용하여 도구 작업을 승인/거부하도록 사용자에게 묻습니다.
Returns:
Decision (ApproveDecision 또는 RejectDecision) 또는
자동 승인 모드로 전환하기 위한 {"type": "auto_approve_all"} dict
"""
description = action_request.get("description", "No description available")
name = action_request["name"]
args = action_request["args"]
preview = build_approval_preview(name, args, assistant_id) if name else None
body_lines = []
if preview:
body_lines.append(f"[bold]{preview.title}[/bold]")
body_lines.extend(preview.details)
if preview.error:
body_lines.append(f"[red]{preview.error}[/red]")
else:
body_lines.append(description)
# Display action info first
console.print(
Panel(
"[bold yellow]⚠️ 도구 작업 승인 필요[/bold yellow]\n\n" + "\n".join(body_lines),
border_style="yellow",
box=box.ROUNDED,
padding=(0, 1),
)
)
if preview and preview.diff and not preview.error:
console.print()
render_diff_block(preview.diff, preview.diff_title or preview.title)
options = ["approve", "reject", "auto-accept all going forward"]
selected = 0 # Start with approve selected
try:
fd = sys.stdin.fileno()
old_settings = termios.tcgetattr(fd)
try:
tty.setraw(fd)
# Hide cursor during menu interaction
sys.stdout.write("\033[?25l")
sys.stdout.flush()
# Initial render flag
first_render = True
while True:
if not first_render:
# Move cursor back to start of menu (up 3 lines, then to start of line)
sys.stdout.write("\033[3A\r")
first_render = False
# Display options vertically with ANSI color codes
for i, option in enumerate(options):
sys.stdout.write("\r\033[K") # Clear line from cursor to end
if i == selected:
if option == "approve":
# Green bold with filled checkbox
sys.stdout.write("\033[1;32m☑ 승인 (Approve)\033[0m\n")
elif option == "reject":
# Red bold with filled checkbox
sys.stdout.write("\033[1;31m☑ 거부 (Reject)\033[0m\n")
else:
# Blue bold with filled checkbox for auto-accept
sys.stdout.write("\033[1;34m☑ 이후 모두 자동 승인 (Auto-accept all)\033[0m\n")
elif option == "approve":
# Dim with empty checkbox
sys.stdout.write("\033[2m☐ 승인 (Approve)\033[0m\n")
elif option == "reject":
# Dim with empty checkbox
sys.stdout.write("\033[2m☐ 거부 (Reject)\033[0m\n")
else:
# Dim with empty checkbox
sys.stdout.write("\033[2m☐ 이후 모두 자동 승인 (Auto-accept all)\033[0m\n")
sys.stdout.flush()
# Read key
char = sys.stdin.read(1)
if char == "\x1b": # ESC sequence (arrow keys)
next1 = sys.stdin.read(1)
next2 = sys.stdin.read(1)
if next1 == "[":
if next2 == "B": # Down arrow
selected = (selected + 1) % len(options)
elif next2 == "A": # Up arrow
selected = (selected - 1) % len(options)
elif char in {"\r", "\n"}: # Enter
sys.stdout.write("\r\n") # Move to start of line and add newline
break
elif char == "\x03": # Ctrl+C
sys.stdout.write("\r\n") # Move to start of line and add newline
raise KeyboardInterrupt
elif char.lower() == "a":
selected = 0
sys.stdout.write("\r\n") # Move to start of line and add newline
break
elif char.lower() == "r":
selected = 1
sys.stdout.write("\r\n") # Move to start of line and add newline
break
finally:
# Show cursor again
sys.stdout.write("\033[?25h")
sys.stdout.flush()
termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
except (termios.error, AttributeError):
# Fallback for non-Unix systems
console.print(" ☐ (A)승인 (기본값)")
console.print(" ☐ (R)거부")
console.print(" ☐ (Auto)이후 모두 자동 승인")
choice = input("\n선택 (A/R/Auto, 기본값=Approve): ").strip().lower()
if choice in {"r", "reject"}:
selected = 1
elif choice in {"auto", "auto-accept"}:
selected = 2
else:
selected = 0
# Return decision based on selection
if selected == 0:
return ApproveDecision(type="approve")
if selected == 1:
return RejectDecision(type="reject", message="User rejected the command")
# Return special marker for auto-approve mode
return {"type": "auto_approve_all"}
async def execute_task(
user_input: str,
agent,
assistant_id: str | None,
session_state,
token_tracker: TokenTracker | None = None,
backend=None,
image_tracker: ImageTracker | None = None,
) -> None:
"""모든 작업을 AI 에이전트에게 직접 전달하여 실행합니다."""
# Parse file mentions and inject content if any
prompt_text, mentioned_files = parse_file_mentions(user_input)
if mentioned_files:
context_parts = [prompt_text, "\n\n## 참조된 파일 (Referenced Files)\n"]
for file_path in mentioned_files:
try:
content = file_path.read_text()
# Limit file content to reasonable size
if len(content) > 50000:
content = content[:50000] + "\n... (파일 잘림)"
context_parts.append(f"\n### {file_path.name}\nPath: `{file_path}`\n```\n{content}\n```")
except Exception as e:
context_parts.append(f"\n### {file_path.name}\n[파일 읽기 오류: {e}]")
final_input = "\n".join(context_parts)
else:
final_input = prompt_text
# Include images in the message content
images_to_send = []
if image_tracker:
images_to_send = image_tracker.get_images()
if images_to_send:
message_content = create_multimodal_content(final_input, images_to_send)
else:
message_content = final_input
config = {
"configurable": {"thread_id": session_state.thread_id},
"metadata": {"assistant_id": assistant_id} if assistant_id else {},
}
has_responded = False
captured_input_tokens = 0
captured_output_tokens = 0
current_todos = None # Track current todo list state
status = console.status(f"[bold {COLORS['thinking']}]에이전트가 생각 중...", spinner="dots")
status.start()
spinner_active = True
tool_icons = {
"read_file": "📖",
"write_file": "✏️",
"edit_file": "✂️",
"ls": "📁",
"glob": "🔍",
"grep": "🔎",
"shell": "",
"execute": "🔧",
"web_search": "🌐",
"http_request": "🌍",
"task": "🤖",
"write_todos": "📋",
}
file_op_tracker = FileOpTracker(assistant_id=assistant_id, backend=backend)
# Track which tool calls we've displayed to avoid duplicates
displayed_tool_ids = set()
# Buffer partial tool-call chunks keyed by streaming index
tool_call_buffers: dict[str | int, dict] = {}
# Buffer assistant text so we can render complete markdown segments
pending_text = ""
def flush_text_buffer(*, final: bool = False) -> None:
"""Flush accumulated assistant text as rendered markdown when appropriate."""
nonlocal pending_text, spinner_active, has_responded
if not final or not pending_text.strip():
return
if spinner_active:
status.stop()
spinner_active = False
if not has_responded:
console.print("", style=COLORS["agent"], markup=False, end=" ")
has_responded = True
markdown = Markdown(pending_text.rstrip())
console.print(markdown, style=COLORS["agent"])
pending_text = ""
# Clear images from tracker after creating the message
# (they've been encoded into the message content)
if image_tracker:
image_tracker.clear()
# Stream input - may need to loop if there are interrupts
stream_input = {"messages": [{"role": "user", "content": message_content}]}
try:
while True:
interrupt_occurred = False
hitl_response: dict[str, HITLResponse] = {}
suppress_resumed_output = False
# Track all pending interrupts: {interrupt_id: request_data}
pending_interrupts: dict[str, HITLRequest] = {}
async for chunk in agent.astream(
stream_input,
stream_mode=["messages", "updates"], # Dual-mode for HITL support
subgraphs=True,
config=config,
durability="exit",
):
# Unpack chunk - with subgraphs=True and dual-mode, it's (namespace, stream_mode, data)
if not isinstance(chunk, tuple) or len(chunk) != 3:
continue
_namespace, current_stream_mode, data = chunk
# Handle UPDATES stream - for interrupts and todos
if current_stream_mode == "updates":
if not isinstance(data, dict):
continue
# Check for interrupts - collect ALL pending interrupts
if "__interrupt__" in data:
interrupts: list[Interrupt] = data["__interrupt__"]
if interrupts:
for interrupt_obj in interrupts:
# Interrupt has required fields: value (HITLRequest) and id (str)
# Validate the HITLRequest using TypeAdapter
try:
validated_request = _HITL_REQUEST_ADAPTER.validate_python(interrupt_obj.value)
pending_interrupts[interrupt_obj.id] = validated_request
interrupt_occurred = True
except ValidationError as e:
console.print(
f"[yellow]경고: 유효하지 않은 HITL 요청 데이터: {e}[/yellow]",
style="dim",
)
raise
# Extract chunk_data from updates for todo checking
chunk_data = next(iter(data.values())) if data else None
if chunk_data and isinstance(chunk_data, dict):
# Check for todo updates
if "todos" in chunk_data:
new_todos = chunk_data["todos"]
if new_todos != current_todos:
current_todos = new_todos
# Stop spinner before rendering todos
if spinner_active:
status.stop()
spinner_active = False
console.print()
render_todo_list(new_todos)
console.print()
# Handle MESSAGES stream - for content and tool calls
elif current_stream_mode == "messages":
# Messages stream returns (message, metadata) tuples
if not isinstance(data, tuple) or len(data) != 2:
continue
message, _metadata = data
if isinstance(message, HumanMessage):
content = message.text
if content:
flush_text_buffer(final=True)
if spinner_active:
status.stop()
spinner_active = False
if not has_responded:
console.print("", style=COLORS["agent"], markup=False, end=" ")
has_responded = True
markdown = Markdown(content)
console.print(markdown, style=COLORS["agent"])
console.print()
continue
if isinstance(message, ToolMessage):
# Tool results are sent to the agent, not displayed to users
# Exception: show shell command errors to help with debugging
tool_name = getattr(message, "name", "")
tool_status = getattr(message, "status", "success")
tool_content = format_tool_message_content(message.content)
record = file_op_tracker.complete_with_message(message)
# Reset spinner message after tool completes
if spinner_active:
status.update(f"[bold {COLORS['thinking']}]에이전트가 생각 중...")
if tool_name == "shell" and tool_status != "success":
flush_text_buffer(final=True)
if tool_content:
if spinner_active:
status.stop()
spinner_active = False
console.print()
console.print(tool_content, style="red", markup=False)
console.print()
elif tool_content and isinstance(tool_content, str):
stripped = tool_content.lstrip()
if stripped.lower().startswith("error"):
flush_text_buffer(final=True)
if spinner_active:
status.stop()
spinner_active = False
console.print()
console.print(tool_content, style="red", markup=False)
console.print()
if record:
flush_text_buffer(final=True)
if spinner_active:
status.stop()
spinner_active = False
console.print()
render_file_operation(record)
console.print()
if not spinner_active:
status.start()
spinner_active = True
# For all other tools (web_search, http_request, etc.),
# results are hidden from user - agent will process and respond
continue
# Check if this is an AIMessageChunk
if not hasattr(message, "content_blocks"):
# Fallback for messages without content_blocks
continue
# Extract token usage if available
if token_tracker and hasattr(message, "usage_metadata"):
usage = message.usage_metadata
if usage:
input_toks = usage.get("input_tokens", 0)
output_toks = usage.get("output_tokens", 0)
if input_toks or output_toks:
captured_input_tokens = max(captured_input_tokens, input_toks)
captured_output_tokens = max(captured_output_tokens, output_toks)
# Process content blocks (this is the key fix!)
for block in message.content_blocks:
block_type = block.get("type")
# Handle text blocks
if block_type == "text":
text = block.get("text", "")
if text:
pending_text += text
# Handle reasoning blocks
elif block_type == "reasoning":
flush_text_buffer(final=True)
reasoning = block.get("reasoning", "")
if reasoning and spinner_active:
status.stop()
spinner_active = False
# Could display reasoning differently if desired
# For now, skip it or handle minimally
# Handle tool call chunks
# Some models (OpenAI, Anthropic) stream tool_call_chunks
# Others (Gemini) don't stream them and just return the full tool_call
elif block_type in ("tool_call_chunk", "tool_call"):
chunk_name = block.get("name")
chunk_args = block.get("args")
chunk_id = block.get("id")
chunk_index = block.get("index")
# Use index as stable buffer key; fall back to id if needed
buffer_key: str | int
if chunk_index is not None:
buffer_key = chunk_index
elif chunk_id is not None:
buffer_key = chunk_id
else:
buffer_key = f"unknown-{len(tool_call_buffers)}"
buffer = tool_call_buffers.setdefault(
buffer_key,
{"name": None, "id": None, "args": None, "args_parts": []},
)
if chunk_name:
buffer["name"] = chunk_name
if chunk_id:
buffer["id"] = chunk_id
if isinstance(chunk_args, dict):
buffer["args"] = chunk_args
buffer["args_parts"] = []
elif isinstance(chunk_args, str):
if chunk_args:
parts: list[str] = buffer.setdefault("args_parts", [])
if not parts or chunk_args != parts[-1]:
parts.append(chunk_args)
buffer["args"] = "".join(parts)
elif chunk_args is not None:
buffer["args"] = chunk_args
buffer_name = buffer.get("name")
buffer_id = buffer.get("id")
if buffer_name is None:
continue
parsed_args = buffer.get("args")
if isinstance(parsed_args, str):
if not parsed_args:
continue
try:
parsed_args = json.loads(parsed_args)
except json.JSONDecodeError:
# Wait for more chunks to form valid JSON
continue
elif parsed_args is None:
continue
# Ensure args are in dict form for formatter
if not isinstance(parsed_args, dict):
parsed_args = {"value": parsed_args}
flush_text_buffer(final=True)
if buffer_id is not None:
if buffer_id not in displayed_tool_ids:
displayed_tool_ids.add(buffer_id)
file_op_tracker.start_operation(buffer_name, parsed_args, buffer_id)
else:
file_op_tracker.update_args(buffer_id, parsed_args)
tool_call_buffers.pop(buffer_key, None)
icon = tool_icons.get(buffer_name, "🔧")
if spinner_active:
status.stop()
if has_responded:
console.print()
display_str = format_tool_display(buffer_name, parsed_args)
console.print(
f" {icon} {display_str}",
style=f"dim {COLORS['tool']}",
markup=False,
)
# Restart spinner with context about which tool is executing
status.update(f"[bold {COLORS['thinking']}]{display_str} 실행 중...")
status.start()
spinner_active = True
if getattr(message, "chunk_position", None) == "last":
flush_text_buffer(final=True)
# After streaming loop - handle interrupt if it occurred
flush_text_buffer(final=True)
# Handle human-in-the-loop after stream completes
if interrupt_occurred:
any_rejected = False
for interrupt_id, hitl_request in pending_interrupts.items():
# Check if auto-approve is enabled
if session_state.auto_approve:
# Auto-approve all commands without prompting
decisions = []
for action_request in hitl_request["action_requests"]:
# Show what's being auto-approved (brief, dim message)
if spinner_active:
status.stop()
spinner_active = False
description = action_request.get("description", "tool action")
console.print()
console.print(f" [dim]⚡ {description}[/dim]")
decisions.append({"type": "approve"})
hitl_response[interrupt_id] = {"decisions": decisions}
# Restart spinner for continuation
if not spinner_active:
status.start()
spinner_active = True
else:
# Normal HITL flow - stop spinner and prompt user
if spinner_active:
status.stop()
spinner_active = False
# Handle human-in-the-loop approval
decisions = []
for action_index, action_request in enumerate(hitl_request["action_requests"]):
decision = prompt_for_tool_approval(
action_request,
assistant_id,
)
# Check if user wants to switch to auto-approve mode
if isinstance(decision, dict) and decision.get("type") == "auto_approve_all":
# Switch to auto-approve mode
session_state.auto_approve = True
console.print()
console.print("[bold blue]✓ 자동 승인 모드 활성화됨[/bold blue]")
console.print("[dim]향후 모든 도구 작업이 자동으로 승인됩니다.[/dim]")
console.print()
# Approve this action and all remaining actions in the batch
decisions.append({"type": "approve"})
for _remaining_action in hitl_request["action_requests"][action_index + 1 :]:
decisions.append({"type": "approve"})
break
decisions.append(decision)
# Mark file operations as HIL-approved if user approved
if decision.get("type") == "approve":
tool_name = action_request.get("name")
if tool_name in {"write_file", "edit_file"}:
file_op_tracker.mark_hitl_approved(tool_name, action_request.get("args", {}))
if any(decision.get("type") == "reject" for decision in decisions):
any_rejected = True
hitl_response[interrupt_id] = {"decisions": decisions}
suppress_resumed_output = any_rejected
if interrupt_occurred and hitl_response:
if suppress_resumed_output:
if spinner_active:
status.stop()
spinner_active = False
console.print("[yellow]명령이 거부되었습니다.[/yellow]", style="bold")
console.print("에이전트에게 다르게 수행할 작업을 알려주세요.")
console.print()
return
# Resume the agent with the human decision
stream_input = Command(resume=hitl_response)
# Continue the while loop to restream
else:
# No interrupt, break out of while loop
break
except asyncio.CancelledError:
# Event loop cancelled the task (e.g. Ctrl+C during streaming) - clean up and return
if spinner_active:
status.stop()
console.print("\n[yellow]사용자에 의해 중단됨[/yellow]")
console.print("에이전트 상태 업데이트 중...", style="dim")
try:
await agent.aupdate_state(
config=config,
values={"messages": [HumanMessage(content="[이전 요청이 시스템에 의해 취소되었습니다]")]},
)
console.print("다음 명령 준비 완료.\n", style="dim")
except Exception as e:
console.print(f"[red]경고: 에이전트 상태 업데이트 실패: {e}[/red]\n")
return
except KeyboardInterrupt:
# User pressed Ctrl+C - clean up and exit gracefully
if spinner_active:
status.stop()
console.print("\n[yellow]사용자에 의해 중단됨[/yellow]")
console.print("에이전트 상태 업데이트 중...", style="dim")
# Inform the agent synchronously (in async context)
try:
await agent.aupdate_state(
config=config,
values={"messages": [HumanMessage(content="[사용자가 Ctrl+C로 이전 요청을 중단했습니다]")]},
)
console.print("다음 명령 준비 완료.\n", style="dim")
except Exception as e:
console.print(f"[red]경고: 에이전트 상태 업데이트 실패: {e}[/red]\n")
return
if spinner_active:
status.stop()
if has_responded:
console.print()
# Track token usage (display only via /tokens command)
if token_tracker and (captured_input_tokens or captured_output_tokens):
token_tracker.add(captured_input_tokens, captured_output_tokens)

View File

@@ -0,0 +1,408 @@
"""CLI 표시를 위한 파일 작업 추적 및 diff 계산 도움말."""
from __future__ import annotations
import difflib
from dataclasses import dataclass, field
from pathlib import Path
from typing import TYPE_CHECKING, Any, Literal
from deepagents.backends.utils import perform_string_replacement
from deepagents_cli.config import settings
if TYPE_CHECKING:
from deepagents.backends.protocol import BACKEND_TYPES
FileOpStatus = Literal["pending", "success", "error"]
@dataclass
class ApprovalPreview:
"""HITL 미리보기를 렌더링하는 데 사용되는 데이터."""
title: str
details: list[str]
diff: str | None = None
diff_title: str | None = None
error: str | None = None
def _safe_read(path: Path) -> str | None:
"""파일 내용을 읽고, 실패 시 None을 반환합니다."""
try:
return path.read_text()
except (OSError, UnicodeDecodeError):
return None
def _count_lines(text: str) -> int:
"""빈 문자열을 0줄로 취급하여 텍스트의 줄 수를 셉니다."""
if not text:
return 0
return len(text.splitlines())
def compute_unified_diff(
before: str,
after: str,
display_path: str,
*,
max_lines: int | None = 800,
context_lines: int = 3,
) -> str | None:
"""이전 내용과 이후 내용 간의 통합 diff를 계산합니다.
Args:
before: 원본 내용
after: 새로운 내용
display_path: diff 헤더에 표시할 경로
max_lines: 최대 diff 줄 수 (제한 없으면 None)
context_lines: 변경 사항 주변의 컨텍스트 줄 수 (기본값 3)
Returns:
통합 diff 문자열 또는 변경 사항이 없는 경우 None
"""
before_lines = before.splitlines()
after_lines = after.splitlines()
diff_lines = list(
difflib.unified_diff(
before_lines,
after_lines,
fromfile=f"{display_path} (before)",
tofile=f"{display_path} (after)",
lineterm="",
n=context_lines,
)
)
if not diff_lines:
return None
if max_lines is not None and len(diff_lines) > max_lines:
truncated = diff_lines[: max_lines - 1]
truncated.append("...")
return "\n".join(truncated)
return "\n".join(diff_lines)
@dataclass
class FileOpMetrics:
"""파일 작업에 대한 줄 및 바이트 수준 메트릭."""
lines_read: int = 0
start_line: int | None = None
end_line: int | None = None
lines_written: int = 0
lines_added: int = 0
lines_removed: int = 0
bytes_written: int = 0
@dataclass
class FileOperationRecord:
"""단일 파일시스템 도구 호출을 추적합니다."""
tool_name: str
display_path: str
physical_path: Path | None
tool_call_id: str | None
args: dict[str, Any] = field(default_factory=dict)
status: FileOpStatus = "pending"
error: str | None = None
metrics: FileOpMetrics = field(default_factory=FileOpMetrics)
diff: str | None = None
before_content: str | None = None
after_content: str | None = None
read_output: str | None = None
hitl_approved: bool = False
def resolve_physical_path(path_str: str | None, assistant_id: str | None) -> Path | None:
"""가상/상대 경로를 실제 파일시스템 경로로 변환합니다."""
if not path_str:
return None
try:
if assistant_id and path_str.startswith("/memories/"):
agent_dir = settings.get_agent_dir(assistant_id)
suffix = path_str.removeprefix("/memories/").lstrip("/")
return (agent_dir / suffix).resolve()
path = Path(path_str)
if path.is_absolute():
return path
return (Path.cwd() / path).resolve()
except (OSError, ValueError):
return None
def format_display_path(path_str: str | None) -> str:
"""표시용으로 경로를 포맷합니다."""
if not path_str:
return "(알 수 없음)"
try:
path = Path(path_str)
if path.is_absolute():
return path.name or str(path)
return str(path)
except (OSError, ValueError):
return str(path_str)
def build_approval_preview(
tool_name: str,
args: dict[str, Any],
assistant_id: str | None,
) -> ApprovalPreview | None:
"""HITL 승인을 위한 요약 정보 및 diff를 수집합니다."""
path_str = str(args.get("file_path") or args.get("path") or "")
display_path = format_display_path(path_str)
physical_path = resolve_physical_path(path_str, assistant_id)
if tool_name == "write_file":
content = str(args.get("content", ""))
before = _safe_read(physical_path) if physical_path and physical_path.exists() else ""
after = content
diff = compute_unified_diff(before or "", after, display_path, max_lines=100)
additions = 0
if diff:
additions = sum(1 for line in diff.splitlines() if line.startswith("+") and not line.startswith("+++"))
total_lines = _count_lines(after)
details = [
f"파일: {path_str}",
"작업: 새 파일 생성" + (" (기존 내용 덮어씀)" if before else ""),
f"작성할 줄 수: {additions or total_lines}",
]
return ApprovalPreview(
title=f"{display_path} 쓰기",
details=details,
diff=diff,
diff_title=f"{display_path} 차이(Diff)",
)
if tool_name == "edit_file":
if physical_path is None:
return ApprovalPreview(
title=f"{display_path} 업데이트",
details=[f"파일: {path_str}", "작업: 텍스트 교체"],
error="파일 경로를 확인할 수 없습니다.",
)
before = _safe_read(physical_path)
if before is None:
return ApprovalPreview(
title=f"{display_path} 업데이트",
details=[f"파일: {path_str}", "작업: 텍스트 교체"],
error="현재 파일 내용을 읽을 수 없습니다.",
)
old_string = str(args.get("old_string", ""))
new_string = str(args.get("new_string", ""))
replace_all = bool(args.get("replace_all", False))
replacement = perform_string_replacement(before, old_string, new_string, replace_all)
if isinstance(replacement, str):
return ApprovalPreview(
title=f"{display_path} 업데이트",
details=[f"파일: {path_str}", "작업: 텍스트 교체"],
error=replacement,
)
after, occurrences = replacement
diff = compute_unified_diff(before, after, display_path, max_lines=None)
additions = 0
deletions = 0
if diff:
additions = sum(1 for line in diff.splitlines() if line.startswith("+") and not line.startswith("+++"))
deletions = sum(1 for line in diff.splitlines() if line.startswith("-") and not line.startswith("---"))
details = [
f"파일: {path_str}",
f"작업: 텍스트 교체 ({'모든 발생' if replace_all else '단일 발생'})",
f"일치하는 발생: {occurrences}",
f"변경된 줄: +{additions} / -{deletions}",
]
return ApprovalPreview(
title=f"{display_path} 업데이트",
details=details,
diff=diff,
diff_title=f"{display_path} 차이(Diff)",
)
return None
class FileOpTracker:
"""CLI 상호작용 중 파일 작업 메트릭을 수집합니다."""
def __init__(self, *, assistant_id: str | None, backend: BACKEND_TYPES | None = None) -> None:
"""추적기를 초기화합니다."""
self.assistant_id = assistant_id
self.backend = backend
self.active: dict[str | None, FileOperationRecord] = {}
self.completed: list[FileOperationRecord] = []
def start_operation(self, tool_name: str, args: dict[str, Any], tool_call_id: str | None) -> None:
if tool_name not in {"read_file", "write_file", "edit_file"}:
return
path_str = str(args.get("file_path") or args.get("path") or "")
display_path = format_display_path(path_str)
record = FileOperationRecord(
tool_name=tool_name,
display_path=display_path,
physical_path=resolve_physical_path(path_str, self.assistant_id),
tool_call_id=tool_call_id,
args=args,
)
if tool_name in {"write_file", "edit_file"}:
if self.backend and path_str:
try:
responses = self.backend.download_files([path_str])
if responses and responses[0].content is not None and responses[0].error is None:
record.before_content = responses[0].content.decode("utf-8")
else:
record.before_content = ""
except Exception:
record.before_content = ""
elif record.physical_path:
record.before_content = _safe_read(record.physical_path) or ""
self.active[tool_call_id] = record
def update_args(self, tool_call_id: str, args: dict[str, Any]) -> None:
"""활성 작업의 인수를 업데이트하고 before_content 캡처를 다시 시도합니다."""
record = self.active.get(tool_call_id)
if not record:
return
record.args.update(args)
# If we haven't captured before_content yet, try again now that we might have the path
if record.before_content is None and record.tool_name in {"write_file", "edit_file"}:
path_str = str(record.args.get("file_path") or record.args.get("path") or "")
if path_str:
record.display_path = format_display_path(path_str)
record.physical_path = resolve_physical_path(path_str, self.assistant_id)
if self.backend:
try:
responses = self.backend.download_files([path_str])
if responses and responses[0].content is not None and responses[0].error is None:
record.before_content = responses[0].content.decode("utf-8")
else:
record.before_content = ""
except Exception:
record.before_content = ""
elif record.physical_path:
record.before_content = _safe_read(record.physical_path) or ""
def complete_with_message(self, tool_message: Any) -> FileOperationRecord | None:
tool_call_id = getattr(tool_message, "tool_call_id", None)
record = self.active.get(tool_call_id)
if record is None:
return None
content = tool_message.content
if isinstance(content, list):
# Some tool messages may return list segments; join them for analysis.
joined = []
for item in content:
if isinstance(item, str):
joined.append(item)
else:
joined.append(str(item))
content_text = "\n".join(joined)
else:
content_text = str(content) if content is not None else ""
if getattr(tool_message, "status", "success") != "success" or content_text.lower().startswith("error"):
record.status = "error"
record.error = content_text
self._finalize(record)
return record
record.status = "success"
if record.tool_name == "read_file":
record.read_output = content_text
lines = _count_lines(content_text)
record.metrics.lines_read = lines
offset = record.args.get("offset")
limit = record.args.get("limit")
if isinstance(offset, int):
if offset > lines:
offset = 0
record.metrics.start_line = offset + 1
if lines:
record.metrics.end_line = offset + lines
elif lines:
record.metrics.start_line = 1
record.metrics.end_line = lines
if isinstance(limit, int) and lines > limit:
record.metrics.end_line = (record.metrics.start_line or 1) + limit - 1
else:
# For write/edit operations, read back from backend (or local filesystem)
self._populate_after_content(record)
if record.after_content is None:
record.status = "error"
record.error = "업데이트된 파일 내용을 읽을 수 없습니다."
self._finalize(record)
return record
record.metrics.lines_written = _count_lines(record.after_content)
before_lines = _count_lines(record.before_content or "")
diff = compute_unified_diff(
record.before_content or "",
record.after_content,
record.display_path,
max_lines=100,
)
record.diff = diff
if diff:
additions = sum(1 for line in diff.splitlines() if line.startswith("+") and not line.startswith("+++"))
deletions = sum(1 for line in diff.splitlines() if line.startswith("-") and not line.startswith("---"))
record.metrics.lines_added = additions
record.metrics.lines_removed = deletions
elif record.tool_name == "write_file" and (record.before_content or "") == "":
record.metrics.lines_added = record.metrics.lines_written
record.metrics.bytes_written = len(record.after_content.encode("utf-8"))
if record.diff is None and (record.before_content or "") != record.after_content:
record.diff = compute_unified_diff(
record.before_content or "",
record.after_content,
record.display_path,
max_lines=100,
)
if record.diff is None and before_lines != record.metrics.lines_written:
record.metrics.lines_added = max(record.metrics.lines_written - before_lines, 0)
self._finalize(record)
return record
def mark_hitl_approved(self, tool_name: str, args: dict[str, Any]) -> None:
"""tool_name 및 file_path와 일치하는 작업을 HIL 승인됨으로 표시합니다."""
file_path = args.get("file_path") or args.get("path")
if not file_path:
return
# Mark all active records that match
for record in self.active.values():
if record.tool_name == tool_name:
record_path = record.args.get("file_path") or record.args.get("path")
if record_path == file_path:
record.hitl_approved = True
def _populate_after_content(self, record: FileOperationRecord) -> None:
# Use backend if available (works for any BackendProtocol implementation)
if self.backend:
try:
file_path = record.args.get("file_path") or record.args.get("path")
if file_path:
responses = self.backend.download_files([file_path])
if responses and responses[0].content is not None and responses[0].error is None:
record.after_content = responses[0].content.decode("utf-8")
else:
record.after_content = None
else:
record.after_content = None
except Exception:
record.after_content = None
else:
# Fallback: direct filesystem read when no backend provided
if record.physical_path is None:
record.after_content = None
return
record.after_content = _safe_read(record.physical_path)
def _finalize(self, record: FileOperationRecord) -> None:
self.completed.append(record)
self.active.pop(record.tool_call_id, None)

View File

@@ -0,0 +1,209 @@
"""Utilities for handling image paste from clipboard."""
import base64
import io
import os
import subprocess
import sys
import tempfile
from dataclasses import dataclass
from PIL import Image
@dataclass
class ImageData:
"""Represents a pasted image with its base64 encoding."""
base64_data: str
format: str # "png", "jpeg", etc.
placeholder: str # Display text like "[image 1]"
def to_message_content(self) -> dict:
"""Convert to LangChain message content format.
Returns:
Dict with type and image_url for multimodal messages
"""
return {
"type": "image_url",
"image_url": {"url": f"data:image/{self.format};base64,{self.base64_data}"},
}
def get_clipboard_image() -> ImageData | None:
"""Attempt to read an image from the system clipboard.
Supports macOS via `pngpaste` or `osascript`.
Returns:
ImageData if an image is found, None otherwise
"""
if sys.platform == "darwin":
return _get_macos_clipboard_image()
# Linux/Windows support could be added here
return None
def _get_macos_clipboard_image() -> ImageData | None:
"""Get clipboard image on macOS using pngpaste or osascript.
First tries pngpaste (faster if installed), then falls back to osascript.
Returns:
ImageData if an image is found, None otherwise
"""
# Try pngpaste first (fast if installed)
try:
result = subprocess.run(
["pngpaste", "-"],
capture_output=True,
check=False,
timeout=2,
)
if result.returncode == 0 and result.stdout:
# Successfully got PNG data
try:
Image.open(io.BytesIO(result.stdout)) # Validate it's a real image
base64_data = base64.b64encode(result.stdout).decode("utf-8")
return ImageData(
base64_data=base64_data,
format="png", # 'pngpaste -' always outputs PNG
placeholder="[image]",
)
except Exception:
pass # Invalid image data
except (FileNotFoundError, subprocess.TimeoutExpired):
pass # pngpaste not installed or timed out
# Fallback to osascript with temp file (built-in but slower)
return _get_clipboard_via_osascript()
def _get_clipboard_via_osascript() -> ImageData | None:
"""Get clipboard image via osascript using a temp file.
osascript outputs data in a special format that can't be captured as raw binary,
so we write to a temp file instead.
Returns:
ImageData if an image is found, None otherwise
"""
# Create a temp file for the image
fd, temp_path = tempfile.mkstemp(suffix=".png")
os.close(fd)
try:
# First check if clipboard has PNG data
check_result = subprocess.run(
["osascript", "-e", "clipboard info"],
capture_output=True,
check=False,
timeout=2,
text=True,
)
if check_result.returncode != 0:
return None
# Check for PNG or TIFF in clipboard info
clipboard_info = check_result.stdout.lower()
if "pngf" not in clipboard_info and "tiff" not in clipboard_info:
return None
# Try to get PNG first, fall back to TIFF
if "pngf" in clipboard_info:
get_script = f"""
set pngData to the clipboard as «class PNGf»
set theFile to open for access POSIX file "{temp_path}" with write permission
write pngData to theFile
close access theFile
return "success"
"""
else:
get_script = f"""
set tiffData to the clipboard as TIFF picture
set theFile to open for access POSIX file "{temp_path}" with write permission
write tiffData to theFile
close access theFile
return "success"
"""
result = subprocess.run(
["osascript", "-e", get_script],
capture_output=True,
check=False,
timeout=3,
text=True,
)
if result.returncode != 0 or "success" not in result.stdout:
return None
# Check if file was created and has content
if not os.path.exists(temp_path) or os.path.getsize(temp_path) == 0:
return None
# Read and validate the image
with open(temp_path, "rb") as f:
image_data = f.read()
try:
image = Image.open(io.BytesIO(image_data))
# Convert to PNG if it's not already (e.g., if we got TIFF)
buffer = io.BytesIO()
image.save(buffer, format="PNG")
buffer.seek(0)
base64_data = base64.b64encode(buffer.getvalue()).decode("utf-8")
return ImageData(
base64_data=base64_data,
format="png",
placeholder="[image]",
)
except Exception:
return None
except (subprocess.TimeoutExpired, OSError):
return None
finally:
# Clean up temp file
try:
os.unlink(temp_path)
except OSError:
pass
def encode_image_to_base64(image_bytes: bytes) -> str:
"""Encode image bytes to base64 string.
Args:
image_bytes: Raw image bytes
Returns:
Base64-encoded string
"""
return base64.b64encode(image_bytes).decode("utf-8")
def create_multimodal_content(text: str, images: list[ImageData]) -> list[dict]:
"""Create multimodal message content with text and images.
Args:
text: Text content of the message
images: List of ImageData objects
Returns:
List of content blocks in LangChain format
"""
content_blocks = []
# Add text block
if text.strip():
content_blocks.append({"type": "text", "text": text})
# Add image blocks
for image in images:
content_blocks.append(image.to_message_content())
return content_blocks

View File

@@ -0,0 +1,420 @@
"""CLI를 위한 입력 처리, 완성 및 프롬프트 세션."""
import asyncio
import os
import re
import time
from collections.abc import Callable
from pathlib import Path
from prompt_toolkit import PromptSession
from prompt_toolkit.completion import (
Completer,
Completion,
PathCompleter,
merge_completers,
)
from prompt_toolkit.document import Document
from prompt_toolkit.enums import EditingMode
from prompt_toolkit.formatted_text import HTML
from prompt_toolkit.key_binding import KeyBindings
from .config import COLORS, COMMANDS, SessionState, console
from .image_utils import ImageData, get_clipboard_image
# Regex patterns for context-aware completion
AT_MENTION_RE = re.compile(r"@(?P<path>(?:[^\s@]|(?<=\\)\s)*)$")
SLASH_COMMAND_RE = re.compile(r"^/(?P<command>[a-z]*)$")
EXIT_CONFIRM_WINDOW = 3.0
class ImageTracker:
"""현재 대화에서 붙여넣은 이미지를 추적합니다."""
def __init__(self) -> None:
self.images: list[ImageData] = []
self.next_id = 1
def add_image(self, image_data: ImageData) -> str:
"""이미지를 추가하고 해당 자리 표시자 텍스트를 반환합니다.
Args:
image_data: 추적할 이미지 데이터
Returns:
"[image 1]"과 같은 자리 표시자 문자열
"""
placeholder = f"[image {self.next_id}]"
image_data.placeholder = placeholder
self.images.append(image_data)
self.next_id += 1
return placeholder
def get_images(self) -> list[ImageData]:
"""추적된 모든 이미지를 가져옵니다."""
return self.images.copy()
def clear(self) -> None:
"""추적된 모든 이미지를 지우고 카운터를 재설정합니다."""
self.images.clear()
self.next_id = 1
class FilePathCompleter(Completer):
"""커서가 '@' 뒤에 있을 때만 파일시스템 완성을 활성화합니다."""
def __init__(self) -> None:
self.path_completer = PathCompleter(
expanduser=True,
min_input_len=0,
only_directories=False,
)
def get_completions(self, document, complete_event):
"""@가 감지되면 파일 경로 완성을 가져옵니다."""
text = document.text_before_cursor
# Use regex to detect @path pattern at end of line
m = AT_MENTION_RE.search(text)
if not m:
return # Not in an @path context
path_fragment = m.group("path")
# Unescape the path for PathCompleter (it doesn't understand escape sequences)
unescaped_fragment = path_fragment.replace("\\ ", " ")
# Strip trailing backslash if present (user is in the process of typing an escape)
unescaped_fragment = unescaped_fragment.removesuffix("\\")
# Create temporary document for the unescaped path fragment
temp_doc = Document(text=unescaped_fragment, cursor_position=len(unescaped_fragment))
# Get completions from PathCompleter and use its start_position
# PathCompleter returns suffix text with start_position=0 (insert at cursor)
for comp in self.path_completer.get_completions(temp_doc, complete_event):
# Add trailing / for directories so users can continue navigating
completed_path = Path(unescaped_fragment + comp.text).expanduser()
# Re-escape spaces in the completion text for the command line
completion_text = comp.text.replace(" ", "\\ ")
if completed_path.is_dir() and not completion_text.endswith("/"):
completion_text += "/"
yield Completion(
text=completion_text,
start_position=comp.start_position, # Use PathCompleter's position (usually 0)
display=comp.display,
display_meta=comp.display_meta,
)
class CommandCompleter(Completer):
"""줄이 '/'로 시작할 때만 명령 완성을 활성화합니다."""
def get_completions(self, document, _complete_event):
"""/가 시작 부분에 있을 때 명령 완성을 가져옵니다."""
text = document.text_before_cursor
# Use regex to detect /command pattern at start of line
m = SLASH_COMMAND_RE.match(text)
if not m:
return # Not in a /command context
command_fragment = m.group("command")
# Match commands that start with the fragment (case-insensitive)
for cmd_name, cmd_desc in COMMANDS.items():
if cmd_name.startswith(command_fragment.lower()):
yield Completion(
text=cmd_name,
start_position=-len(command_fragment), # Fixed position for original document
display=cmd_name,
display_meta=cmd_desc,
)
def parse_file_mentions(text: str) -> tuple[str, list[Path]]:
"""@file 멘션을 추출하고 해결된 파일 경로가 포함된 정리된 텍스트를 반환합니다."""
pattern = r"@((?:[^\s@]|(?<=\\)\s)+)" # Match @filename, allowing escaped spaces
matches = re.findall(pattern, text)
files = []
for match in matches:
# Remove escape characters
clean_path = match.replace("\\ ", " ")
path = Path(clean_path).expanduser()
# Try to resolve relative to cwd
if not path.is_absolute():
path = Path.cwd() / path
try:
path = path.resolve()
if path.exists() and path.is_file():
files.append(path)
else:
console.print(f"[yellow]경고: 파일을 찾을 수 없습니다: {match}[/yellow]")
except Exception as e:
console.print(f"[yellow]경고: 유효하지 않은 경로 {match}: {e}[/yellow]")
return text, files
def parse_image_placeholders(text: str) -> tuple[str, int]:
"""텍스트 내 이미지 자리 표시자 수를 셉니다.
Args:
text: [image] 또는 [image N] 자리 표시자가 포함될 수 있는 입력 텍스트
Returns:
이미지 자리 표시자 수가 포함된 (텍스트, 개수) 튜플
"""
# Match [image] or [image N] patterns
pattern = r"\[image(?:\s+\d+)?\]"
matches = re.findall(pattern, text, re.IGNORECASE)
return text, len(matches)
def get_bottom_toolbar(session_state: SessionState, session_ref: dict) -> Callable[[], list[tuple[str, str]]]:
"""자동 승인 상태와 BASH 모드를 표시하는 툴바 함수를 반환합니다."""
def toolbar() -> list[tuple[str, str]]:
parts = []
# Check if we're in BASH mode (input starts with !)
try:
session = session_ref.get("session")
if session:
current_text = session.default_buffer.text
if current_text.startswith("!"):
parts.append(("bg:#ff1493 fg:#ffffff bold", " BASH MODE "))
parts.append(("", " | "))
except (AttributeError, TypeError):
# Silently ignore - toolbar is non-critical and called frequently
pass
# Base status message
if session_state.auto_approve:
base_msg = "자동 승인 켜짐 (CTRL+T로 전환)"
base_class = "class:toolbar-green"
else:
base_msg = "수동 승인 (CTRL+T로 전환)"
base_class = "class:toolbar-orange"
parts.append((base_class, base_msg))
# Show exit confirmation hint if active
hint_until = session_state.exit_hint_until
if hint_until is not None:
now = time.monotonic()
if now < hint_until:
parts.append(("", " | "))
parts.append(("class:toolbar-exit", " 종료하려면 Ctrl+C를 한번 더 누르세요 "))
else:
session_state.exit_hint_until = None
return parts
return toolbar
def create_prompt_session(
_assistant_id: str, session_state: SessionState, image_tracker: ImageTracker | None = None
) -> PromptSession:
"""모든 기능이 구성된 PromptSession을 생성합니다."""
# Set default editor if not already set
if "EDITOR" not in os.environ:
os.environ["EDITOR"] = "nano"
# Create key bindings
kb = KeyBindings()
@kb.add("c-c")
def _(event) -> None:
"""종료하려면 짧은 시간 내에 Ctrl+C를 두 번 눌러야 합니다."""
app = event.app
now = time.monotonic()
if session_state.exit_hint_until is not None and now < session_state.exit_hint_until:
handle = session_state.exit_hint_handle
if handle:
handle.cancel()
session_state.exit_hint_handle = None
session_state.exit_hint_until = None
app.invalidate()
app.exit(exception=KeyboardInterrupt())
return
session_state.exit_hint_until = now + EXIT_CONFIRM_WINDOW
handle = session_state.exit_hint_handle
if handle:
handle.cancel()
loop = asyncio.get_running_loop()
app_ref = app
def clear_hint() -> None:
if session_state.exit_hint_until is not None and time.monotonic() >= session_state.exit_hint_until:
session_state.exit_hint_until = None
session_state.exit_hint_handle = None
app_ref.invalidate()
session_state.exit_hint_handle = loop.call_later(EXIT_CONFIRM_WINDOW, clear_hint)
app.invalidate()
# Bind Ctrl+T to toggle auto-approve
@kb.add("c-t")
def _(event) -> None:
"""자동 승인 모드를 토글합니다."""
session_state.toggle_auto_approve()
# Force UI refresh to update toolbar
event.app.invalidate()
# Custom paste handler to detect images
if image_tracker:
from prompt_toolkit.keys import Keys
def _handle_paste_with_image_check(event, pasted_text: str = "") -> None:
"""클립보드에서 이미지를 확인하고, 그렇지 않으면 붙여넣은 텍스트를 삽입합니다."""
# Try to get an image from clipboard
clipboard_image = get_clipboard_image()
if clipboard_image:
# Found an image! Add it to tracker and insert placeholder
placeholder = image_tracker.add_image(clipboard_image)
# Insert placeholder (no confirmation message)
event.current_buffer.insert_text(placeholder)
elif pasted_text:
# No image, insert the pasted text
event.current_buffer.insert_text(pasted_text)
else:
# Fallback: try to get text from prompt_toolkit clipboard
clipboard_data = event.app.clipboard.get_data()
if clipboard_data and clipboard_data.text:
event.current_buffer.insert_text(clipboard_data.text)
@kb.add(Keys.BracketedPaste)
def _(event) -> None:
"""브래킷 붙여넣기(macOS의 Cmd+V)를 처리합니다 - 이미지를 먼저 확인합니다."""
# Bracketed paste provides the pasted text in event.data
pasted_text = event.data if hasattr(event, "data") else ""
_handle_paste_with_image_check(event, pasted_text)
@kb.add("c-v")
def _(event) -> None:
"""Ctrl+V 붙여넣기를 처리합니다 - 이미지를 먼저 확인합니다."""
_handle_paste_with_image_check(event)
# Bind regular Enter to submit (intuitive behavior)
@kb.add("enter")
def _(event) -> None:
"""완성 메뉴가 활성화되지 않은 경우 Enter는 입력을 제출합니다."""
buffer = event.current_buffer
# If completion menu is showing, apply the current completion
if buffer.complete_state:
# Get the current completion (the highlighted one)
current_completion = buffer.complete_state.current_completion
# If no completion is selected (user hasn't navigated), select and apply the first one
if not current_completion and buffer.complete_state.completions:
# Move to the first completion
buffer.complete_next()
# Now apply it
buffer.apply_completion(buffer.complete_state.current_completion)
elif current_completion:
# Apply the already-selected completion
buffer.apply_completion(current_completion)
else:
# No completions available, close menu
buffer.complete_state = None
# Don't submit if buffer is empty or only whitespace
elif buffer.text.strip():
# Normal submit
buffer.validate_and_handle()
# If empty, do nothing (don't submit)
# Alt+Enter for newlines (press ESC then Enter, or Option+Enter on Mac)
@kb.add("escape", "enter")
def _(event) -> None:
"""Alt+Enter는 여러 줄 입력을 위해 줄바꿈을 삽입합니다."""
event.current_buffer.insert_text("\n")
# Ctrl+E to open in external editor
@kb.add("c-e")
def _(event) -> None:
"""현재 입력을 외부 편집기(기본값 nano)에서 엽니다."""
event.current_buffer.open_in_editor()
# Backspace handler to retrigger completions and delete image tags as units
@kb.add("backspace")
def _(event) -> None:
"""백스페이스 처리: 이미지 태그를 단일 단위로 삭제하고 완성을 다시 트리거합니다."""
buffer = event.current_buffer
text_before = buffer.document.text_before_cursor
# Check if cursor is right after an image tag like [image 1] or [image 12]
image_tag_pattern = r"\[image \d+\]$"
match = re.search(image_tag_pattern, text_before)
if match and image_tracker:
# Delete the entire tag
tag_length = len(match.group(0))
buffer.delete_before_cursor(count=tag_length)
# Remove the image from tracker and reset counter
tag_text = match.group(0)
image_num_match = re.search(r"\d+", tag_text)
if image_num_match:
image_num = int(image_num_match.group(0))
# Remove image at index (1-based to 0-based)
if 0 < image_num <= len(image_tracker.images):
image_tracker.images.pop(image_num - 1)
# Reset counter to next available number
image_tracker.next_id = len(image_tracker.images) + 1
else:
# Normal backspace
buffer.delete_before_cursor(count=1)
# Check if we're in a completion context (@ or /)
text = buffer.document.text_before_cursor
if AT_MENTION_RE.search(text) or SLASH_COMMAND_RE.match(text):
# Retrigger completion
buffer.start_completion(select_first=False)
from prompt_toolkit.styles import Style
# Define styles for the toolbar with full-width background colors
toolbar_style = Style.from_dict({
"bottom-toolbar": "noreverse", # Disable default reverse video
"toolbar-green": "bg:#10b981 #000000", # Green for auto-accept ON
"toolbar-orange": "bg:#f59e0b #000000", # Orange for manual accept
"toolbar-exit": "bg:#2563eb #ffffff", # Blue for exit hint
})
# Create session reference dict for toolbar to access session
session_ref = {}
# Create the session
session = PromptSession(
message=HTML(f'<style fg="{COLORS["user"]}">></style> '),
multiline=True, # Keep multiline support but Enter submits
key_bindings=kb,
completer=merge_completers([CommandCompleter(), FilePathCompleter()]),
editing_mode=EditingMode.EMACS,
complete_while_typing=True, # Show completions as you type
complete_in_thread=True, # Async completion prevents menu freezing
mouse_support=False,
enable_open_in_editor=True, # Allow Ctrl+X Ctrl+E to open external editor
bottom_toolbar=get_bottom_toolbar(session_state, session_ref), # Persistent status bar at bottom
style=toolbar_style, # Apply toolbar styling
reserve_space_for_menu=7, # Reserve space for completion menu to show 5-6 results
)
# Store session reference for toolbar to access
session_ref["session"] = session
return session

View File

@@ -0,0 +1 @@
"""DeepAgents CLI를 위한 샌드박스 연동."""

View File

@@ -0,0 +1,115 @@
"""Daytona 샌드박스 백엔드 구현."""
from __future__ import annotations
from typing import TYPE_CHECKING
from deepagents.backends.protocol import (
ExecuteResponse,
FileDownloadResponse,
FileUploadResponse,
)
from deepagents.backends.sandbox import BaseSandbox
if TYPE_CHECKING:
from daytona import Sandbox
class DaytonaBackend(BaseSandbox):
"""SandboxBackendProtocol을 준수하는 Daytona 백엔드 구현.
이 구현은 BaseSandbox로부터 모든 파일 작업 메서드를 상속받으며,
Daytona의 API를 사용하여 execute() 메서드만 구현합니다.
"""
def __init__(self, sandbox: Sandbox) -> None:
"""Daytona 샌드박스 클라이언트로 DaytonaBackend를 초기화합니다.
Args:
sandbox: Daytona 샌드박스 인스턴스
"""
self._sandbox = sandbox
self._timeout: int = 30 * 60 # 30분
@property
def id(self) -> str:
"""샌드박스 백엔드의 고유 식별자."""
return self._sandbox.id
def execute(
self,
command: str,
) -> ExecuteResponse:
"""샌드박스에서 명령을 실행하고 ExecuteResponse를 반환합니다.
Args:
command: 실행할 전체 셸 명령 문자열.
Returns:
결합된 출력, 종료 코드, 선택적 시그널 및 잘림 플래그가 포함된 ExecuteResponse.
"""
result = self._sandbox.process.exec(command, timeout=self._timeout)
return ExecuteResponse(
output=result.result, # Daytona는 stdout/stderr를 결합함
exit_code=result.exit_code,
truncated=False,
)
def download_files(self, paths: list[str]) -> list[FileDownloadResponse]:
"""Daytona 샌드박스에서 여러 파일을 다운로드합니다.
효율성을 위해 Daytona의 네이티브 일괄 다운로드 API를 활용합니다.
부분적인 성공을 지원하므로 개별 다운로드가 다른 다운로드에 영향을 주지 않고 실패할 수 있습니다.
Args:
paths: 다운로드할 파일 경로 목록.
Returns:
입력 경로당 하나씩 FileDownloadResponse 객체 목록.
응답 순서는 입력 순서와 일치합니다.
TODO: Daytona API 오류 문자열을 표준화된 FileOperationError 코드로 매핑해야 합니다.
현재는 정상적인 동작(happy path)만 구현되어 있습니다.
"""
from daytona import FileDownloadRequest
# Daytona의 네이티브 일괄 API를 사용하여 일괄 다운로드 요청 생성
download_requests = [FileDownloadRequest(source=path) for path in paths]
daytona_responses = self._sandbox.fs.download_files(download_requests)
# Daytona 결과를 당사의 응답 형식으로 변환
# TODO: 사용 가능한 경우 resp.error를 표준화된 오류 코드로 매핑
return [
FileDownloadResponse(
path=resp.source,
content=resp.result,
error=None, # TODO: resp.error를 FileOperationError로 매핑
)
for resp in daytona_responses
]
def upload_files(self, files: list[tuple[str, bytes]]) -> list[FileUploadResponse]:
"""Daytona 샌드박스에 여러 파일을 업로드합니다.
효율성을 위해 Daytona의 네이티브 일괄 업로드 API를 활용합니다.
부분적인 성공을 지원하므로 개별 업로드가 다른 업로드에 영향을 주지 않고 실패할 수 있습니다.
Args:
files: 업로드할 (경로, 내용) 튜플 목록.
Returns:
입력 파일당 하나씩 FileUploadResponse 객체 목록.
응답 순서는 입력 순서와 일치합니다.
TODO: Daytona API 오류 문자열을 표준화된 FileOperationError 코드로 매핑해야 합니다.
현재는 정상적인 동작(happy path)만 구현되어 있습니다.
"""
from daytona import FileUpload
# Daytona의 네이티브 일괄 API를 사용하여 일괄 업로드 요청 생성
upload_requests = [FileUpload(source=content, destination=path) for path, content in files]
self._sandbox.fs.upload_files(upload_requests)
# TODO: Daytona가 오류 정보를 반환하는지 확인하고 FileOperationError 코드로 매핑
return [FileUploadResponse(path=path, error=None) for path, _ in files]

View File

@@ -0,0 +1,124 @@
"""Modal 샌드박스 백엔드 구현."""
from __future__ import annotations
from typing import TYPE_CHECKING
from deepagents.backends.protocol import (
ExecuteResponse,
FileDownloadResponse,
FileUploadResponse,
)
from deepagents.backends.sandbox import BaseSandbox
if TYPE_CHECKING:
import modal
class ModalBackend(BaseSandbox):
"""SandboxBackendProtocol을 준수하는 Modal 백엔드 구현.
이 구현은 BaseSandbox로부터 모든 파일 작업 메서드를 상속받으며,
Modal의 API를 사용하여 execute() 메서드만 구현합니다.
"""
def __init__(self, sandbox: modal.Sandbox) -> None:
"""Modal 샌드박스 인스턴스로 ModalBackend를 초기화합니다.
Args:
sandbox: 활성 Modal 샌드박스 인스턴스
"""
self._sandbox = sandbox
self._timeout = 30 * 60
@property
def id(self) -> str:
"""샌드박스 백엔드의 고유 식별자."""
return self._sandbox.object_id
def execute(
self,
command: str,
) -> ExecuteResponse:
"""샌드박스에서 명령을 실행하고 ExecuteResponse를 반환합니다.
Args:
command: 실행할 전체 셸 명령 문자열.
Returns:
결합된 출력, 종료 코드 및 잘림 플래그가 포함된 ExecuteResponse.
"""
# Modal의 exec API를 사용하여 명령 실행
process = self._sandbox.exec("bash", "-c", command, timeout=self._timeout)
# 프로세스가 완료될 때까지 대기
process.wait()
# stdout 및 stderr 읽기
stdout = process.stdout.read()
stderr = process.stderr.read()
# stdout과 stderr 결합 (Runloop의 방식과 일치)
output = stdout or ""
if stderr:
output += "\n" + stderr if output else stderr
return ExecuteResponse(
output=output,
exit_code=process.returncode,
truncated=False, # Modal은 잘림 정보를 제공하지 않음
)
def download_files(self, paths: list[str]) -> list[FileDownloadResponse]:
"""Modal 샌드박스에서 여러 파일을 다운로드합니다.
부분적인 성공을 지원하므로 개별 다운로드가 다른 다운로드에 영향을 주지 않고 실패할 수 있습니다.
Args:
paths: 다운로드할 파일 경로 목록.
Returns:
입력 경로당 하나씩 FileDownloadResponse 객체 목록.
응답 순서는 입력 순서와 일치합니다.
TODO: 표준화된 FileOperationError 코드를 사용하여 적절한 오류 처리를 구현해야 합니다.
Modal의 sandbox.open()이 실제로 어떤 예외를 발생시키는지 확인이 필요합니다.
현재는 정상적인 동작(happy path)만 구현되어 있습니다.
"""
# 이 구현은 Modal 샌드박스 파일 API에 의존합니다.
# https://modal.com/doc/guide/sandbox-files
# 이 API는 현재 알파 단계이며 프로덕션 용도로는 권장되지 않습니다.
# CLI 애플리케이션을 대상으로 하므로 여기에서 사용하는 것은 괜찮습니다.
responses = []
for path in paths:
with self._sandbox.open(path, "rb") as f:
content = f.read()
responses.append(FileDownloadResponse(path=path, content=content, error=None))
return responses
def upload_files(self, files: list[tuple[str, bytes]]) -> list[FileUploadResponse]:
"""Modal 샌드박스에 여러 파일을 업로드합니다.
부분적인 성공을 지원하므로 개별 업로드가 다른 업로드에 영향을 주지 않고 실패할 수 있습니다.
Args:
files: 업로드할 (경로, 내용) 튜플 목록.
Returns:
입력 파일당 하나씩 FileUploadResponse 객체 목록.
응답 순서는 입력 순서와 일치합니다.
TODO: 표준화된 FileOperationError 코드를 사용하여 적절한 오류 처리를 구현해야 합니다.
Modal의 sandbox.open()이 실제로 어떤 예외를 발생시키는지 확인이 필요합니다.
현재는 정상적인 동작(happy path)만 구현되어 있습니다.
"""
# 이 구현은 Modal 샌드박스 파일 API에 의존합니다.
# https://modal.com/doc/guide/sandbox-files
# 이 API는 현재 알파 단계이며 프로덕션 용도로는 권장되지 않습니다.
# CLI 애플리케이션을 대상으로 하므로 여기에서 사용하는 것은 괜찮습니다.
responses = []
for path, content in files:
with self._sandbox.open(path, "wb") as f:
f.write(content)
responses.append(FileUploadResponse(path=path, error=None))
return responses

View File

@@ -0,0 +1,121 @@
"""Runloop을 위한 BackendProtocol 구현."""
try:
import runloop_api_client
except ImportError:
msg = (
"RunloopBackend를 위해서는 runloop_api_client 패키지가 필요합니다. "
"`pip install runloop_api_client`로 설치하십시오."
)
raise ImportError(msg)
import os
from deepagents.backends.protocol import ExecuteResponse, FileDownloadResponse, FileUploadResponse
from deepagents.backends.sandbox import BaseSandbox
from runloop_api_client import Runloop
class RunloopBackend(BaseSandbox):
"""Runloop devbox의 파일에서 작동하는 백엔드.
이 구현은 Runloop API 클라이언트를 사용하여 명령을 실행하고
원격 devbox 환경 내에서 파일을 조작합니다.
"""
def __init__(
self,
devbox_id: str,
client: Runloop | None = None,
api_key: str | None = None,
) -> None:
"""Runloop 프로토콜을 초기화합니다.
Args:
devbox_id: 작업할 Runloop devbox의 ID.
client: 선택적인 기존 Runloop 클라이언트 인스턴스
api_key: 새 클라이언트를 생성하기 위한 선택적 API 키
(기본값은 RUNLOOP_API_KEY 환경 변수)
"""
if client and api_key:
msg = "client 또는 bearer_token 중 하나만 제공해야 하며, 둘 다 제공할 수는 없습니다."
raise ValueError(msg)
if client is None:
api_key = api_key or os.environ.get("RUNLOOP_API_KEY", None)
if api_key is None:
msg = "client 또는 bearer_token 중 하나는 제공되어야 합니다."
raise ValueError(msg)
client = Runloop(bearer_token=api_key)
self._client = client
self._devbox_id = devbox_id
self._timeout = 30 * 60
@property
def id(self) -> str:
"""샌드박스 백엔드의 고유 식별자."""
return self._devbox_id
def execute(
self,
command: str,
) -> ExecuteResponse:
"""devbox에서 명령을 실행하고 ExecuteResponse를 반환합니다.
Args:
command: 실행할 전체 셸 명령 문자열.
Returns:
결합된 출력, 종료 코드, 선택적 시그널 및 잘림 플래그가 포함된 ExecuteResponse.
"""
result = self._client.devboxes.execute_and_await_completion(
devbox_id=self._devbox_id,
command=command,
timeout=self._timeout,
)
# stdout과 stderr 결합
output = result.stdout or ""
if result.stderr:
output += "\n" + result.stderr if output else result.stderr
return ExecuteResponse(
output=output,
exit_code=result.exit_status,
truncated=False, # Runloop는 잘림 정보를 제공하지 않음
)
def download_files(self, paths: list[str]) -> list[FileDownloadResponse]:
"""Runloop devbox에서 여러 파일을 다운로드합니다.
Runloop API를 사용하여 파일을 개별적으로 다운로드합니다. 순서를 유지하고
예외를 발생시키는 대신 파일별 오류를 보고하는 FileDownloadResponse 객체 목록을 반환합니다.
TODO: 표준화된 FileOperationError 코드를 사용하여 적절한 오류 처리를 구현해야 합니다.
현재는 정상적인 동작(happy path)만 구현되어 있습니다.
"""
responses: list[FileDownloadResponse] = []
for path in paths:
# devboxes.download_file은 .read()를 노출하는 BinaryAPIResponse를 반환함
resp = self._client.devboxes.download_file(self._devbox_id, path=path)
content = resp.read()
responses.append(FileDownloadResponse(path=path, content=content, error=None))
return responses
def upload_files(self, files: list[tuple[str, bytes]]) -> list[FileUploadResponse]:
"""Runloop devbox에 여러 파일을 업로드합니다.
Runloop API를 사용하여 파일을 개별적으로 업로드합니다. 순서를 유지하고
예외를 발생시키는 대신 파일별 오류를 보고하는 FileUploadResponse 객체 목록을 반환합니다.
TODO: 표준화된 FileOperationError 코드를 사용하여 적절한 오류 처리를 구현해야 합니다.
현재는 정상적인 동작(happy path)만 구현되어 있습니다.
"""
responses: list[FileUploadResponse] = []
for path, content in files:
# Runloop 클라이언트는 'file'을 바이트 또는 파일류 객체로 기대함
self._client.devboxes.upload_file(self._devbox_id, path=path, file=content)
responses.append(FileUploadResponse(path=path, error=None))
return responses

View File

@@ -0,0 +1,345 @@
"""컨텍스트 매니저를 통한 샌드박스 수명 주기 관리."""
import os
import shlex
import string
import time
from collections.abc import Generator
from contextlib import contextmanager
from pathlib import Path
from deepagents.backends.protocol import SandboxBackendProtocol
from deepagents_cli.config import console
def _run_sandbox_setup(backend: SandboxBackendProtocol, setup_script_path: str) -> None:
"""환경 변수 확장을 포함하여 샌드박스에서 사용자 설정 스크립트를 실행합니다.
Args:
backend: 샌드박스 백엔드 인스턴스
setup_script_path: 설정 스크립트 파일 경로
"""
script_path = Path(setup_script_path)
if not script_path.exists():
msg = f"설정 스크립트를 찾을 수 없습니다: {setup_script_path}"
raise FileNotFoundError(msg)
console.print(f"[dim]설정 스크립트 실행 중: {setup_script_path}...[/dim]")
# 스크립트 내용 읽기
script_content = script_path.read_text()
# 로컬 환경을 사용하여 ${VAR} 구문 확장
template = string.Template(script_content)
expanded_script = template.safe_substitute(os.environ)
# 5분 타임아웃으로 샌드박스에서 실행
result = backend.execute(f"bash -c {shlex.quote(expanded_script)}")
if result.exit_code != 0:
console.print(f"[red]❌ 설정 스크립트 실패 (종료 코드 {result.exit_code}):[/red]")
console.print(f"[dim]{result.output}[/dim]")
msg = "설정 실패 - 중단됨"
raise RuntimeError(msg)
console.print("[green]✓ 설정 완료[/green]")
@contextmanager
def create_modal_sandbox(
*, sandbox_id: str | None = None, setup_script_path: str | None = None
) -> Generator[SandboxBackendProtocol, None, None]:
"""Modal 샌드박스를 생성하거나 연결합니다.
Args:
sandbox_id: 재사용할 기존 샌드박스 ID (선택 사항)
setup_script_path: 샌드박스 시작 후 실행할 설정 스크립트 경로 (선택 사항)
Yields:
(ModalBackend, sandbox_id)
Raises:
ImportError: Modal SDK가 설치되지 않음
Exception: 샌드박스 생성/연결 실패
FileNotFoundError: 설정 스크립트를 찾을 수 없음
RuntimeError: 설정 스크립트 실패
"""
import modal
from deepagents_cli.integrations.modal import ModalBackend
console.print("[yellow]Modal 샌드박스 시작 중...[/yellow]")
# 임시 앱 생성 (종료 시 자동 정리)
app = modal.App("deepagents-sandbox")
with app.run():
if sandbox_id:
sandbox = modal.Sandbox.from_id(sandbox_id=sandbox_id, app=app)
should_cleanup = False
else:
sandbox = modal.Sandbox.create(app=app, workdir="/workspace")
should_cleanup = True
# 실행될 때까지 폴링 (Modal에서 필요)
for _ in range(90): # 180초 타임아웃 (90 * 2초)
if sandbox.poll() is not None: # 샌드박스가 예기치 않게 종료됨
msg = "시작 중 Modal 샌드박스가 예기치 않게 종료되었습니다"
raise RuntimeError(msg)
# 간단한 명령을 시도하여 샌드박스가 준비되었는지 확인
try:
process = sandbox.exec("echo", "ready", timeout=5)
process.wait()
if process.returncode == 0:
break
except Exception:
pass
time.sleep(2)
else:
# 타임아웃 - 정리 및 실패 처리
sandbox.terminate()
msg = "180초 이내에 Modal 샌드박스를 시작하지 못했습니다"
raise RuntimeError(msg)
backend = ModalBackend(sandbox)
console.print(f"[green]✓ Modal 샌드박스 준비 완료: {backend.id}[/green]")
# 설정 스크립트가 제공된 경우 실행
if setup_script_path:
_run_sandbox_setup(backend, setup_script_path)
try:
yield backend
finally:
if should_cleanup:
try:
console.print(f"[dim]Modal 샌드박스 {sandbox_id} 종료 중...[/dim]")
sandbox.terminate()
console.print(f"[dim]✓ Modal 샌드박스 {sandbox_id} 종료됨[/dim]")
except Exception as e:
console.print(f"[yellow]⚠ 정리 실패: {e}[/yellow]")
@contextmanager
def create_runloop_sandbox(
*, sandbox_id: str | None = None, setup_script_path: str | None = None
) -> Generator[SandboxBackendProtocol, None, None]:
"""Runloop devbox를 생성하거나 연결합니다.
Args:
sandbox_id: 재사용할 기존 devbox ID (선택 사항)
setup_script_path: 샌드박스 시작 후 실행할 설정 스크립트 경로 (선택 사항)
Yields:
(RunloopBackend, devbox_id)
Raises:
ImportError: Runloop SDK가 설치되지 않음
ValueError: RUNLOOP_API_KEY가 설정되지 않음
RuntimeError: 타임아웃 내에 devbox를 시작하지 못함
FileNotFoundError: 설정 스크립트를 찾을 수 없음
RuntimeError: 설정 스크립트 실패
"""
from runloop_api_client import Runloop
from deepagents_cli.integrations.runloop import RunloopBackend
bearer_token = os.environ.get("RUNLOOP_API_KEY")
if not bearer_token:
msg = "RUNLOOP_API_KEY 환경 변수가 설정되지 않았습니다"
raise ValueError(msg)
client = Runloop(bearer_token=bearer_token)
console.print("[yellow]Runloop devbox 시작 중...[/yellow]")
if sandbox_id:
devbox = client.devboxes.retrieve(id=sandbox_id)
should_cleanup = False
else:
devbox = client.devboxes.create()
sandbox_id = devbox.id
should_cleanup = True
# 실행될 때까지 폴링 (Runloop에서 필요)
for _ in range(90): # 180초 타임아웃 (90 * 2초)
status = client.devboxes.retrieve(id=devbox.id)
if status.status == "running":
break
time.sleep(2)
else:
# 타임아웃 - 정리 및 실패 처리
client.devboxes.shutdown(id=devbox.id)
msg = "180초 이내에 devbox를 시작하지 못했습니다"
raise RuntimeError(msg)
console.print(f"[green]✓ Runloop devbox 준비 완료: {sandbox_id}[/green]")
backend = RunloopBackend(devbox_id=devbox.id, client=client)
# 설정 스크립트가 제공된 경우 실행
if setup_script_path:
_run_sandbox_setup(backend, setup_script_path)
try:
yield backend
finally:
if should_cleanup:
try:
console.print(f"[dim]Runloop devbox {sandbox_id} 종료 중...[/dim]")
client.devboxes.shutdown(id=devbox.id)
console.print(f"[dim]✓ Runloop devbox {sandbox_id} 종료됨[/dim]")
except Exception as e:
console.print(f"[yellow]⚠ 정리 실패: {e}[/yellow]")
@contextmanager
def create_daytona_sandbox(
*, sandbox_id: str | None = None, setup_script_path: str | None = None
) -> Generator[SandboxBackendProtocol, None, None]:
"""Daytona 샌드박스를 생성합니다.
Args:
sandbox_id: 재사용할 기존 샌드박스 ID (선택 사항)
setup_script_path: 샌드박스 시작 후 실행할 설정 스크립트 경로 (선택 사항)
Yields:
(DaytonaBackend, sandbox_id)
Note:
ID로 기존 Daytona 샌드박스에 연결하는 기능은 아직 지원되지 않을 수 있습니다.
sandbox_id가 제공되면 NotImplementedError가 발생합니다.
"""
from daytona import Daytona, DaytonaConfig
from deepagents_cli.integrations.daytona import DaytonaBackend
api_key = os.environ.get("DAYTONA_API_KEY")
if not api_key:
msg = "DAYTONA_API_KEY 환경 변수가 설정되지 않았습니다"
raise ValueError(msg)
if sandbox_id:
msg = (
"ID로 기존 Daytona 샌드박스에 연결하는 기능은 아직 지원되지 않습니다. "
"--sandbox-id를 생략하여 새 샌드박스를 생성하십시오."
)
raise NotImplementedError(msg)
console.print("[yellow]Daytona 샌드박스 시작 중...[/yellow]")
daytona = Daytona(DaytonaConfig(api_key=api_key))
sandbox = daytona.create()
sandbox_id = sandbox.id
# 실행될 때까지 폴링 (Daytona에서 필요)
for _ in range(90): # 180초 타임아웃 (90 * 2초)
# 간단한 명령을 시도하여 샌드박스가 준비되었는지 확인
try:
result = sandbox.process.exec("echo ready", timeout=5)
if result.exit_code == 0:
break
except Exception:
pass
time.sleep(2)
else:
try:
# 가능한 경우 정리
sandbox.delete()
finally:
msg = "180초 이내에 Daytona 샌드박스를 시작하지 못했습니다"
raise RuntimeError(msg)
backend = DaytonaBackend(sandbox)
console.print(f"[green]✓ Daytona 샌드박스 준비 완료: {backend.id}[/green]")
# 설정 스크립트가 제공된 경우 실행
if setup_script_path:
_run_sandbox_setup(backend, setup_script_path)
try:
yield backend
finally:
console.print(f"[dim]Daytona 샌드박스 {sandbox_id} 삭제 중...[/dim]")
try:
sandbox.delete()
console.print(f"[dim]✓ Daytona 샌드박스 {sandbox_id} 종료됨[/dim]")
except Exception as e:
console.print(f"[yellow]⚠ 정리 실패: {e}[/yellow]")
# 공급자별 작업 디렉토리 매핑
_PROVIDER_TO_WORKING_DIR = {
"modal": "/workspace",
"runloop": "/home/user",
"daytona": "/home/daytona",
}
# 샌드박스 유형과 해당 컨텍스트 매니저 팩토리 매핑
_SANDBOX_PROVIDERS = {
"modal": create_modal_sandbox,
"runloop": create_runloop_sandbox,
"daytona": create_daytona_sandbox,
}
@contextmanager
def create_sandbox(
provider: str,
*,
sandbox_id: str | None = None,
setup_script_path: str | None = None,
) -> Generator[SandboxBackendProtocol, None, None]:
"""지정된 공급자의 샌드박스를 생성하거나 연결합니다.
이것은 적절한 공급자별 컨텍스트 매니저에 위임하는 샌드박스 생성을 위한 통합 인터페이스입니다.
Args:
provider: 샌드박스 공급자 ("modal", "runloop", "daytona")
sandbox_id: 재사용할 기존 샌드박스 ID (선택 사항)
setup_script_path: 샌드박스 시작 후 실행할 설정 스크립트 경로 (선택 사항)
Yields:
(SandboxBackend, sandbox_id)
"""
if provider not in _SANDBOX_PROVIDERS:
msg = f"알 수 없는 샌드박스 공급자: {provider}. 사용 가능한 공급자: {', '.join(get_available_sandbox_types())}"
raise ValueError(msg)
sandbox_provider = _SANDBOX_PROVIDERS[provider]
with sandbox_provider(sandbox_id=sandbox_id, setup_script_path=setup_script_path) as backend:
yield backend
def get_available_sandbox_types() -> list[str]:
"""사용 가능한 샌드박스 공급자 유형 목록을 가져옵니다.
Returns:
샌드박스 유형 이름 목록 (예: ["modal", "runloop", "daytona"])
"""
return list(_SANDBOX_PROVIDERS.keys())
def get_default_working_dir(provider: str) -> str:
"""주어진 샌드박스 공급자의 기본 작업 디렉토리를 가져옵니다.
Args:
provider: 샌드박스 공급자 이름 ("modal", "runloop", "daytona")
Returns:
기본 작업 디렉토리 경로 (문자열)
Raises:
ValueError: 공급자를 알 수 없는 경우
"""
if provider in _PROVIDER_TO_WORKING_DIR:
return _PROVIDER_TO_WORKING_DIR[provider]
msg = f"알 수 없는 샌드박스 공급자: {provider}"
raise ValueError(msg)
__all__ = [
"create_sandbox",
"get_available_sandbox_types",
"get_default_working_dir",
]

View File

@@ -0,0 +1,468 @@
"""DeepAgents를 위한 메인 진입점 및 CLI 루프."""
import argparse
import asyncio
import os
import sys
from pathlib import Path
from deepagents.backends.protocol import SandboxBackendProtocol
# Now safe to import agent (which imports LangChain modules)
from deepagents_cli.agent import create_cli_agent, list_agents, reset_agent
from deepagents_cli.commands import execute_bash_command, handle_command
# CRITICAL: Import config FIRST to set LANGSMITH_PROJECT before LangChain loads
from deepagents_cli.config import (
COLORS,
DEEP_AGENTS_ASCII,
SessionState,
console,
create_model,
settings,
)
from deepagents_cli.execution import execute_task
from deepagents_cli.input import ImageTracker, create_prompt_session
from deepagents_cli.integrations.sandbox_factory import (
create_sandbox,
get_default_working_dir,
)
from deepagents_cli.skills import execute_skills_command, setup_skills_parser
from deepagents_cli.tools import fetch_url, http_request, web_search
from deepagents_cli.ui import TokenTracker, show_help
def check_cli_dependencies() -> None:
"""CLI 선택적 종속성이 설치되어 있는지 확인합니다."""
missing = []
try:
import rich
except ImportError:
missing.append("rich")
try:
import requests
except ImportError:
missing.append("requests")
try:
import dotenv
except ImportError:
missing.append("python-dotenv")
try:
import tavily
except ImportError:
missing.append("tavily-python")
try:
import prompt_toolkit
except ImportError:
missing.append("prompt-toolkit")
if missing:
print("\n❌ 필수 CLI 종속성이 누락되었습니다!")
print("\nDeepAgents CLI를 사용하려면 다음 패키지가 필요합니다:")
for pkg in missing:
print(f" - {pkg}")
print("\n다음 명령으로 설치하십시오:")
print(" pip install deepagents[cli]")
print("\n또는 모든 종속성을 설치하십시오:")
print(" pip install 'deepagents[cli]'")
sys.exit(1)
def parse_args():
"""명령줄 인수를 파싱합니다."""
parser = argparse.ArgumentParser(
description="DeepAgents - AI 코딩 도우미",
formatter_class=argparse.RawDescriptionHelpFormatter,
add_help=False,
)
subparsers = parser.add_subparsers(dest="command", help="실행할 명령")
# List command
subparsers.add_parser("list", help="사용 가능한 모든 에이전트 나열")
# Help command
subparsers.add_parser("help", help="도움말 정보 표시")
# Reset command
reset_parser = subparsers.add_parser("reset", help="에이전트 초기화")
reset_parser.add_argument("--agent", required=True, help="초기화할 에이전트 이름")
reset_parser.add_argument("--target", dest="source_agent", help="다른 에이전트에서 프롬프트 복사")
# Skills command - setup delegated to skills module
setup_skills_parser(subparsers)
# Default interactive mode
parser.add_argument(
"--agent",
default="agent",
help="별도의 메모리 저장소를 위한 에이전트 식별자 (기본값: agent).",
)
parser.add_argument(
"--model",
help="사용할 모델 (예: claude-sonnet-4-5-20250929, gpt-5-mini, gemini-3-pro-preview). 모델 이름에서 공급자가 자동 감지됩니다.",
)
parser.add_argument(
"--auto-approve",
action="store_true",
help="프롬프트 없이 도구 사용 자동 승인 (human-in-the-loop 비활성화)",
)
parser.add_argument(
"--sandbox",
choices=["none", "modal", "daytona", "runloop"],
default="none",
help="코드 실행을 위한 원격 샌드박스 (기본값: none - 로컬 전용)",
)
parser.add_argument(
"--sandbox-id",
help="재사용할 기존 샌드박스 ID (생성 및 정리 건너뜀)",
)
parser.add_argument(
"--sandbox-setup",
help="생성 후 샌드박스에서 실행할 설정 스크립트 경로",
)
parser.add_argument(
"--no-splash",
action="store_true",
help="시작 스플래시 화면 비활성화",
)
return parser.parse_args()
async def simple_cli(
agent,
assistant_id: str | None,
session_state,
baseline_tokens: int = 0,
backend=None,
sandbox_type: str | None = None,
setup_script_path: str | None = None,
no_splash: bool = False,
) -> None:
"""메인 CLI 루프.
Args:
backend: 파일 작업을 위한 백엔드 (CompositeBackend)
sandbox_type: 사용 중인 샌드박스 유형 (예: "modal", "runloop", "daytona").
None인 경우 로컬 모드에서 실행.
sandbox_id: 활성 샌드박스의 ID
setup_script_path: 실행된 설정 스크립트 경로 (있는 경우)
no_splash: True인 경우 시작 스플래시 화면 표시 건너뜀
"""
console.clear()
if not no_splash:
console.print(DEEP_AGENTS_ASCII, style=f"bold {COLORS['primary']}")
console.print()
# Extract sandbox ID from backend if using sandbox mode
sandbox_id: str | None = None
if backend:
from deepagents.backends.composite import CompositeBackend
# Check if it's a CompositeBackend with a sandbox default backend
if isinstance(backend, CompositeBackend):
if isinstance(backend.default, SandboxBackendProtocol):
sandbox_id = backend.default.id
elif isinstance(backend, SandboxBackendProtocol):
sandbox_id = backend.id
# Display sandbox info persistently (survives console.clear())
if sandbox_type and sandbox_id:
console.print(f"[yellow]⚡ {sandbox_type.capitalize()} 샌드박스: {sandbox_id}[/yellow]")
if setup_script_path:
console.print(f"[green]✓ 설정 스크립트 ({setup_script_path}) 완료됨[/green]")
console.print()
# Display model info
if settings.model_name and settings.model_provider:
provider_display = {
"openai": "OpenAI",
"anthropic": "Anthropic",
"google": "Google",
}.get(settings.model_provider, settings.model_provider)
console.print(
f"[green]✓ Model:[/green] {provider_display}'{settings.model_name}'",
style=COLORS["dim"],
)
console.print()
if not settings.has_tavily:
console.print(
"[yellow]⚠ 웹 검색 비활성화됨:[/yellow] TAVILY_API_KEY를 찾을 수 없습니다.",
style=COLORS["dim"],
)
console.print(" 웹 검색을 활성화하려면 Tavily API 키를 설정하세요:", style=COLORS["dim"])
console.print(" export TAVILY_API_KEY=your_api_key_here", style=COLORS["dim"])
console.print(
" 또는 .env 파일에 추가하세요. 키 발급: https://tavily.com",
style=COLORS["dim"],
)
console.print()
if settings.has_deepagents_langchain_project:
console.print(
f"[green]✓ LangSmith 추적 활성화됨:[/green] Deepagents → '{settings.deepagents_langchain_project}'",
style=COLORS["dim"],
)
if settings.user_langchain_project:
console.print(f" [dim]사용자 코드 (shell) → '{settings.user_langchain_project}'[/dim]")
console.print()
console.print("... 코딩 준비 완료! 무엇을 만들고 싶으신가요?", style=COLORS["agent"])
if sandbox_type:
working_dir = get_default_working_dir(sandbox_type)
console.print(f" [dim]로컬 CLI 디렉터리: {Path.cwd()}[/dim]")
console.print(f" [dim]코드 실행: 원격 샌드박스 ({working_dir})[/dim]")
else:
console.print(f" [dim]작업 디렉터리: {Path.cwd()}[/dim]")
console.print()
if session_state.auto_approve:
console.print(" [yellow]⚡ 자동 승인: 켜짐[/yellow] [dim](확인 없이 도구 실행)[/dim]")
console.print()
# Localize modifier names and show key symbols (macOS vs others)
if sys.platform == "darwin":
tips = (
" 팁: ⏎ Enter로 제출, ⌥ Option + ⏎ Enter로 줄바꿈 (또는 Esc+Enter), "
"⌃E로 편집기 열기, ⌃T로 자동 승인 전환, ⌃C로 중단"
)
else:
tips = (
" 팁: Enter로 제출, Alt+Enter (또는 Esc+Enter)로 줄바꿈, "
"Ctrl+E로 편집기 열기, Ctrl+T로 자동 승인 전환, Ctrl+C로 중단"
)
console.print(tips, style=f"dim {COLORS['dim']}")
console.print()
# Create prompt session, image tracker, and token tracker
image_tracker = ImageTracker()
session = create_prompt_session(assistant_id, session_state, image_tracker=image_tracker)
token_tracker = TokenTracker()
token_tracker.set_baseline(baseline_tokens)
while True:
try:
user_input = await session.prompt_async()
if session_state.exit_hint_handle:
session_state.exit_hint_handle.cancel()
session_state.exit_hint_handle = None
session_state.exit_hint_until = None
user_input = user_input.strip()
except EOFError:
break
except KeyboardInterrupt:
console.print("\n안녕히 가세요!", style=COLORS["primary"])
break
if not user_input:
continue
# Check for slash commands first
if user_input.startswith("/"):
result = handle_command(user_input, agent, token_tracker)
if result == "exit":
console.print("\n안녕히 가세요!", style=COLORS["primary"])
break
if result:
# Command was handled, continue to next input
continue
# Check for bash commands (!)
if user_input.startswith("!"):
execute_bash_command(user_input)
continue
# Handle regular quit keywords
if user_input.lower() in ["quit", "exit", "q"]:
console.print("\n안녕히 가세요!", style=COLORS["primary"])
break
await execute_task(
user_input,
agent,
assistant_id,
session_state,
token_tracker,
backend=backend,
image_tracker=image_tracker,
)
async def _run_agent_session(
model,
assistant_id: str,
session_state,
sandbox_backend=None,
sandbox_type: str | None = None,
setup_script_path: str | None = None,
) -> None:
"""에이전트를 생성하고 CLI 세션을 실행하는 도우미.
샌드박스 모드와 로컬 모드 간의 중복을 피하기 위해 추출되었습니다.
Args:
model: 사용할 LLM 모델
assistant_id: 메모리 저장을 위한 에이전트 식별자
session_state: 자동 승인 설정이 포함된 세션 상태
sandbox_backend: 원격 실행을 위한 선택적 샌드박스 백엔드
sandbox_type: 사용 중인 샌드박스 유형
setup_script_path: 실행된 설정 스크립트 경로 (있는 경우)
"""
# Create agent with conditional tools
tools = [http_request, fetch_url]
if settings.has_tavily:
tools.append(web_search)
agent, composite_backend = create_cli_agent(
model=model,
assistant_id=assistant_id,
tools=tools,
sandbox=sandbox_backend,
sandbox_type=sandbox_type,
auto_approve=session_state.auto_approve,
)
# Calculate baseline token count for accurate token tracking
from .agent import get_system_prompt
from .token_utils import calculate_baseline_tokens
agent_dir = settings.get_agent_dir(assistant_id)
system_prompt = get_system_prompt(assistant_id=assistant_id, sandbox_type=sandbox_type)
baseline_tokens = calculate_baseline_tokens(model, agent_dir, system_prompt, assistant_id)
await simple_cli(
agent,
assistant_id,
session_state,
baseline_tokens,
backend=composite_backend,
sandbox_type=sandbox_type,
setup_script_path=setup_script_path,
no_splash=session_state.no_splash,
)
async def main(
assistant_id: str,
session_state,
sandbox_type: str = "none",
sandbox_id: str | None = None,
setup_script_path: str | None = None,
model_name: str | None = None,
) -> None:
"""조건부 샌드박스 지원이 포함된 메인 진입점.
Args:
assistant_id: 메모리 저장을 위한 에이전트 식별자
session_state: 자동 승인 설정이 포함된 세션 상태
sandbox_type: 샌드박스 유형 ("none", "modal", "runloop", "daytona")
sandbox_id: 재사용할 선택적 기존 샌드박스 ID
setup_script_path: 샌드박스에서 실행할 선택적 설정 스크립트 경로
model_name: 환경 변수 대신 사용할 선택적 모델 이름
"""
model = create_model(model_name)
# Branch 1: User wants a sandbox
if sandbox_type != "none":
# Try to create sandbox
try:
console.print()
with create_sandbox(
sandbox_type, sandbox_id=sandbox_id, setup_script_path=setup_script_path
) as sandbox_backend:
console.print(f"[yellow]⚡ 원격 실행 활성화됨 ({sandbox_type})[/yellow]")
console.print()
await _run_agent_session(
model,
assistant_id,
session_state,
sandbox_backend,
sandbox_type=sandbox_type,
setup_script_path=setup_script_path,
)
except (ImportError, ValueError, RuntimeError, NotImplementedError) as e:
# Sandbox creation failed - fail hard (no silent fallback)
console.print()
console.print("[red]❌ 샌드박스 생성 실패[/red]")
console.print(f"[dim]{e}[/dim]")
sys.exit(1)
except KeyboardInterrupt:
console.print("\n\n[yellow]중단됨[/yellow]")
sys.exit(0)
except Exception as e:
console.print(f"\n[bold red]❌ 오류:[/bold red] {e}\n")
console.print_exception()
sys.exit(1)
# Branch 2: User wants local mode (none or default)
else:
try:
await _run_agent_session(model, assistant_id, session_state, sandbox_backend=None)
except KeyboardInterrupt:
console.print("\n\n[yellow]중단됨[/yellow]")
sys.exit(0)
except Exception as e:
console.print(f"\n[bold red]❌ 오류:[/bold red] {e}\n")
console.print_exception()
sys.exit(1)
def cli_main() -> None:
"""콘솔 스크립트 진입점."""
# Fix for gRPC fork issue on macOS
# https://github.com/grpc/grpc/issues/37642
if sys.platform == "darwin":
os.environ["GRPC_ENABLE_FORK_SUPPORT"] = "0"
# Note: LANGSMITH_PROJECT is already overridden in config.py (before LangChain imports)
# This ensures agent traces → DEEPAGENTS_LANGSMITH_PROJECT
# Shell commands → user's original LANGSMITH_PROJECT (via ShellMiddleware env)
# Check dependencies first
check_cli_dependencies()
try:
args = parse_args()
if args.command == "help":
show_help()
elif args.command == "list":
list_agents()
elif args.command == "reset":
reset_agent(args.agent, args.source_agent)
elif args.command == "skills":
execute_skills_command(args)
else:
# Create session state from args
session_state = SessionState(auto_approve=args.auto_approve, no_splash=args.no_splash)
# API key validation happens in create_model()
asyncio.run(
main(
args.agent,
session_state,
args.sandbox,
args.sandbox_id,
args.sandbox_setup,
getattr(args, "model", None),
)
)
except KeyboardInterrupt:
# Clean exit on Ctrl+C - suppress ugly traceback
console.print("\n\n[yellow]중단됨[/yellow]")
sys.exit(0)
if __name__ == "__main__":
cli_main()

View File

@@ -0,0 +1,56 @@
"""Utilities for project root detection and project-specific configuration."""
from pathlib import Path
def find_project_root(start_path: Path | None = None) -> Path | None:
"""Find the project root by looking for .git directory.
Walks up the directory tree from start_path (or cwd) looking for a .git
directory, which indicates the project root.
Args:
start_path: Directory to start searching from. Defaults to current working directory.
Returns:
Path to the project root if found, None otherwise.
"""
current = Path(start_path or Path.cwd()).resolve()
# Walk up the directory tree
for parent in [current, *list(current.parents)]:
git_dir = parent / ".git"
if git_dir.exists():
return parent
return None
def find_project_agent_md(project_root: Path) -> list[Path]:
"""Find project-specific agent.md file(s).
Checks two locations and returns ALL that exist:
1. project_root/.deepagents/agent.md
2. project_root/agent.md
Both files will be loaded and combined if both exist.
Args:
project_root: Path to the project root directory.
Returns:
List of paths to project agent.md files (may contain 0, 1, or 2 paths).
"""
paths = []
# Check .deepagents/agent.md (preferred)
deepagents_md = project_root / ".deepagents" / "agent.md"
if deepagents_md.exists():
paths.append(deepagents_md)
# Check root agent.md (fallback, but also include if both exist)
root_md = project_root / "agent.md"
if root_md.exists():
paths.append(root_md)
return paths

View File

@@ -0,0 +1,138 @@
"""에이전트에 기본 셸 도구를 노출하는 단순화된 미들웨어."""
from __future__ import annotations
import os
import subprocess
from typing import Any
from langchain.agents.middleware.types import AgentMiddleware, AgentState
from langchain.tools import ToolRuntime, tool
from langchain_core.messages import ToolMessage
from langchain_core.tools.base import ToolException
class ShellMiddleware(AgentMiddleware[AgentState, Any]):
"""shell을 통해 에이전트에게 기본 셸 액세스 권한을 부여합니다.
이 셸은 로컬 머신에서 실행되며 CLI 자체에서 제공하는 human-in-the-loop 안전장치 외에는
어떠한 안전장치도 없습니다.
"""
def __init__(
self,
*,
workspace_root: str,
timeout: float = 120.0,
max_output_bytes: int = 100_000,
env: dict[str, str] | None = None,
) -> None:
"""`ShellMiddleware`의 인스턴스를 초기화합니다.
Args:
workspace_root: 셸 명령을 위한 작업 디렉터리.
timeout: 명령 완료를 기다리는 최대 시간(초).
기본값은 120초입니다.
max_output_bytes: 명령 출력에서 캡처할 최대 바이트 수.
기본값은 100,000바이트입니다.
env: 하위 프로세스에 전달할 환경 변수. None이면
현재 프로세스의 환경을 사용합니다. 기본값은 None입니다.
"""
super().__init__()
self._timeout = timeout
self._max_output_bytes = max_output_bytes
self._tool_name = "shell"
self._env = env if env is not None else os.environ.copy()
self._workspace_root = workspace_root
# Build description with workspace info
description = (
f"Execute shell commands directly on the host. Commands run in this working directory: "
f"{workspace_root}. Each command runs in a fresh shell environment with the "
f"current process's environment variables. Commands may be truncated if they exceed "
f"configured timeout or output limits."
)
@tool(self._tool_name, description=description)
def shell_tool(
command: str,
runtime: ToolRuntime[None, AgentState],
) -> ToolMessage | str:
"""Execute a shell command.
Args:
command: The shell command to execute.
runtime: The tool runtime context.
"""
return self._run_shell_command(command, tool_call_id=runtime.tool_call_id)
self._shell_tool = shell_tool
self.tools = [self._shell_tool]
def _run_shell_command(
self,
command: str,
*,
tool_call_id: str | None,
) -> ToolMessage | str:
"""셸 명령을 실행하고 결과를 반환합니다.
Args:
command: 실행할 셸 명령.
tool_call_id: ToolMessage 생성을 위한 도구 호출 ID.
Returns:
명령 출력 또는 오류 메시지가 포함된 ToolMessage.
"""
if not command or not isinstance(command, str):
msg = "Shell 도구는 비어 있지 않은 명령 문자열을 필요로 합니다."
raise ToolException(msg)
try:
result = subprocess.run(
command,
check=False,
shell=True,
capture_output=True,
text=True,
timeout=self._timeout,
env=self._env,
cwd=self._workspace_root,
)
# Combine stdout and stderr
output_parts = []
if result.stdout:
output_parts.append(result.stdout)
if result.stderr:
stderr_lines = result.stderr.strip().split("\n")
for line in stderr_lines:
output_parts.append(f"[stderr] {line}")
output = "\n".join(output_parts) if output_parts else "<no output>"
# 필요한 경우 출력 자르기
if len(output) > self._max_output_bytes:
output = output[: self._max_output_bytes]
output += f"\n\n... 출력이 {self._max_output_bytes}바이트에서 잘렸습니다."
# 0이 아닌 경우 종료 코드 정보 추가
if result.returncode != 0:
output = f"{output.rstrip()}\n\n종료 코드: {result.returncode}"
status = "error"
else:
status = "success"
except subprocess.TimeoutExpired:
output = f"오류: 명령이 {self._timeout:.1f}초 후에 시간 초과되었습니다."
status = "error"
return ToolMessage(
content=output,
tool_call_id=tool_call_id,
name=self._tool_name,
status=status,
)
__all__ = ["ShellMiddleware"]

View File

@@ -0,0 +1,21 @@
"""deepagents CLI를 위한 Skills 모듈.
공개 API:
- SkillsMiddleware: 기술을 에이전트 실행에 통합하기 위한 미들웨어
- execute_skills_command: 기술 하위 명령(list/create/info) 실행
- setup_skills_parser: 기술 명령을 위한 argparse 설정
기타 모든 구성 요소는 내부 구현 세부 사항입니다.
"""
from deepagents_cli.skills.commands import (
execute_skills_command,
setup_skills_parser,
)
from deepagents_cli.skills.middleware import SkillsMiddleware
__all__ = [
"SkillsMiddleware",
"execute_skills_command",
"setup_skills_parser",
]

View File

@@ -0,0 +1,486 @@
"""기술 관리를 위한 CLI 명령.
이 명령들은 cli.py를 통해 CLI에 등록됩니다:
- deepagents skills list --agent <agent> [--project]
- deepagents skills create <name>
- deepagents skills info <name>
"""
import argparse
import re
from pathlib import Path
from typing import Any
from deepagents_cli.config import COLORS, Settings, console
from deepagents_cli.skills.load import MAX_SKILL_NAME_LENGTH, list_skills
def _validate_name(name: str) -> tuple[bool, str]:
"""Agent Skills 사양에 따라 이름을 검증합니다.
요구 사항 (https://agentskills.io/specification):
- 최대 64자
- 소문자 영숫자와 하이픈만 허용 (a-z, 0-9, -)
- 하이픈으로 시작하거나 끝날 수 없음
- 연속된 하이픈 허용 안 함
- 경로 탐색 시퀀스 허용 안 함
Args:
name: 검증할 이름
Returns:
(유효 여부, 오류 메시지) 튜플. 유효한 경우 오류 메시지는 비어 있습니다.
"""
# 비어 있거나 공백만 있는 이름 확인
if not name or not name.strip():
return False, "비어 있을 수 없습니다"
# 길이 확인 (사양: 최대 64자)
if len(name) > MAX_SKILL_NAME_LENGTH:
return False, "64자를 초과할 수 없습니다"
# 경로 탐색 시퀀스 확인
if ".." in name or "/" in name or "\\" in name:
return False, "경로 요소를 포함할 수 없습니다"
# 사양: 소문자 영숫자와 하이픈만 허용
# 패턴 보장: 시작/종료 하이픈 없음, 연속 하이픈 없음
if not re.match(r"^[a-z0-9]+(-[a-z0-9]+)*$", name):
return (
False,
"소문자, 숫자, 하이픈만 사용해야 합니다 (대문자, 밑줄 불가능, 하이픈으로 시작하거나 끝날 수 없음)",
)
return True, ""
def _validate_skill_path(skill_dir: Path, base_dir: Path) -> tuple[bool, str]:
"""해결된 기술 디렉토리가 기본 디렉토리 내에 있는지 확인합니다.
Args:
skill_dir: 검증할 기술 디렉토리 경로
base_dir: skill_dir을 포함해야 하는 기본 기술 디렉토리
Returns:
(유효 여부, 오류 메시지) 튜플. 유효한 경우 오류 메시지는 비어 있습니다.
"""
try:
# 두 경로를 정식 형식으로 해결
resolved_skill = skill_dir.resolve()
resolved_base = base_dir.resolve()
# skill_dir이 base_dir 내에 있는지 확인
# Python 3.9+인 경우 is_relative_to 사용, 그렇지 않으면 문자열 비교 사용
if hasattr(resolved_skill, "is_relative_to"):
if not resolved_skill.is_relative_to(resolved_base):
return False, f"기술 디렉토리는 {base_dir} 내에 있어야 합니다"
else:
# 이전 Python 버전을 위한 폴백
try:
resolved_skill.relative_to(resolved_base)
except ValueError:
return False, f"기술 디렉토리는 {base_dir} 내에 있어야 합니다"
return True, ""
except (OSError, RuntimeError) as e:
return False, f"잘못된 경로: {e}"
def _list(agent: str, *, project: bool = False) -> None:
"""지정된 에이전트에 대해 사용 가능한 모든 기술을 나열합니다.
Args:
agent: 기술을 위한 에이전트 식별자 (기본값: agent).
project: True인 경우 프로젝트 기술만 표시합니다.
False인 경우 모든 기술(사용자 + 프로젝트)을 표시합니다.
"""
settings = Settings.from_environment()
user_skills_dir = settings.get_user_skills_dir(agent)
project_skills_dir = settings.get_project_skills_dir()
# --project 플래그가 사용된 경우 프로젝트 기술만 표시
if project:
if not project_skills_dir:
console.print("[yellow]프로젝트 디렉토리가 아닙니다.[/yellow]")
console.print(
"[dim]프로젝트 기술을 사용하려면 프로젝트 루트에 .git 디렉토리가 필요합니다.[/dim]",
style=COLORS["dim"],
)
return
if not project_skills_dir.exists() or not any(project_skills_dir.iterdir()):
console.print("[yellow]프로젝트 기술을 찾을 수 없습니다.[/yellow]")
console.print(
f"[dim]프로젝트 기술을 추가하면 {project_skills_dir}/ 에 생성됩니다.[/dim]",
style=COLORS["dim"],
)
console.print(
"\n[dim]프로젝트 기술 생성:\n deepagents skills create my-skill --project[/dim]",
style=COLORS["dim"],
)
return
skills = list_skills(user_skills_dir=None, project_skills_dir=project_skills_dir)
console.print("\n[bold]프로젝트 기술:[/bold]\n", style=COLORS["primary"])
else:
# 사용자 및 프로젝트 기술 모두 로드
skills = list_skills(user_skills_dir=user_skills_dir, project_skills_dir=project_skills_dir)
if not skills:
console.print("[yellow]기술을 찾을 수 없습니다.[/yellow]")
console.print(
"[dim]기술을 추가하면 ~/.deepagents/agent/skills/ 에 생성됩니다.[/dim]",
style=COLORS["dim"],
)
console.print(
"\n[dim]첫 번째 기술 생성:\n deepagents skills create my-skill[/dim]",
style=COLORS["dim"],
)
return
console.print("\n[bold]사용 가능한 기술:[/bold]\n", style=COLORS["primary"])
# 출처별로 기술 그룹화
user_skills = [s for s in skills if s["source"] == "user"]
project_skills_list = [s for s in skills if s["source"] == "project"]
# 사용자 기술 표시
if user_skills and not project:
console.print("[bold cyan]사용자 기술:[/bold cyan]", style=COLORS["primary"])
for skill in user_skills:
skill_path = Path(skill["path"])
console.print(f" • [bold]{skill['name']}[/bold]", style=COLORS["primary"])
console.print(f" {skill['description']}", style=COLORS["dim"])
console.print(f" 위치: {skill_path.parent}/", style=COLORS["dim"])
console.print()
# 프로젝트 기술 표시
if project_skills_list:
if not project and user_skills:
console.print()
console.print("[bold green]프로젝트 기술:[/bold green]", style=COLORS["primary"])
for skill in project_skills_list:
skill_path = Path(skill["path"])
console.print(f" • [bold]{skill['name']}[/bold]", style=COLORS["primary"])
console.print(f" {skill['description']}", style=COLORS["dim"])
console.print(f" 위치: {skill_path.parent}/", style=COLORS["dim"])
console.print()
def _create(skill_name: str, agent: str, project: bool = False) -> None:
"""템플릿 SKILL.md 파일을 사용하여 새 기술을 생성합니다.
Args:
skill_name: 생성할 기술의 이름.
agent: 기술을 위한 에이전트 식별자
project: True인 경우 프로젝트 기술 디렉토리에 생성합니다.
False인 경우 사용자 기술 디렉토리에 생성합니다.
"""
# 기술 이름 먼저 검증 (Agent Skills 사양에 따름)
is_valid, error_msg = _validate_name(skill_name)
if not is_valid:
console.print(f"[bold red]오류:[/bold red] 잘못된 기술 이름: {error_msg}")
console.print(
"[dim]Agent Skills 사양에 따라: 이름은 소문자 영숫자와 하이픈만 사용해야 합니다.\n"
"예시: web-research, code-review, data-analysis[/dim]",
style=COLORS["dim"],
)
return
# 대상 디렉토리 결정
settings = Settings.from_environment()
if project:
if not settings.project_root:
console.print("[bold red]오류:[/bold red] 프로젝트 디렉토리가 아닙니다.")
console.print(
"[dim]프로젝트 기술을 사용하려면 프로젝트 루트에 .git 디렉토리가 필요합니다.[/dim]",
style=COLORS["dim"],
)
return
skills_dir = settings.ensure_project_skills_dir()
else:
skills_dir = settings.ensure_user_skills_dir(agent)
skill_dir = skills_dir / skill_name
# 해결된 경로가 skills_dir 내에 있는지 확인
is_valid_path, path_error = _validate_skill_path(skill_dir, skills_dir)
if not is_valid_path:
console.print(f"[bold red]오류:[/bold red] {path_error}")
return
if skill_dir.exists():
console.print(f"[bold red]오류:[/bold red] '{skill_name}' 기술이 이미 {skill_dir} 에 존재합니다")
return
# 기술 디렉토리 생성
skill_dir.mkdir(parents=True, exist_ok=True)
# 템플릿 SKILL.md 생성 (사양: https://agentskills.io/specification)
template = f"""---
name: {skill_name}
description: 이 기술이 수행하는 작업과 사용 시기에 대한 간략한 설명.
# Agent Skills 사양에 따른 선택적 필드:
# license: Apache-2.0
# compatibility: Designed for deepagents CLI
# metadata:
# author: your-org
# version: "1.0"
# allowed-tools: Bash(git:*) Read
---
# {skill_name.title().replace("-", " ")} 기술
## 설명
[이 기술이 수행하는 작업과 사용해야 하는 시기에 대한 자세한 설명을 제공하십시오]
## 사용 시기
- [시나리오 1: 사용자가 ...를 요청할 때]
- [시나리오 2: ...가 필요할 때]
- [시나리오 3: 태스크에 ...가 포함될 때]
## 사용 방법
### 1단계: [첫 번째 작업]
[먼저 수행할 작업을 설명하십시오]
### 2단계: [두 번째 작업]
[다음에 수행할 작업을 설명하십시오]
### 3단계: [최종 작업]
[태스크를 완료하는 방법을 설명하십시오]
## 권장 사항
- [권장 사항 1]
- [권장 사항 2]
- [권장 사항 3]
## 지원 파일
이 기술 디렉토리에는 지침에서 참조하는 지원 파일이 포함될 수 있습니다:
- `helper.py` - 자동화를 위한 Python 스크립트
- `config.json` - 설정 파일
- `reference.md` - 추가 참조 문서
## 예시
### 예시 1: [시나리오 이름]
**사용자 요청:** "[사용자 요청 예시]"
**접근 방식:**
1. [단계별 분석]
2. [도구 및 명령 사용]
3. [예상 결과]
### 예시 2: [다른 시나리오]
**사용자 요청:** "[다른 예시]"
**접근 방식:**
1. [다른 접근 방식]
2. [관련 명령]
3. [예상 결과]
## 참고 사항
- [추가 팁, 경고 또는 컨텍스트]
- [알려진 제한 사항 또는 예외 케이스]
- [도움이 되는 외부 리소스 링크]
"""
skill_md = skill_dir / "SKILL.md"
skill_md.write_text(template)
console.print(f"'{skill_name}' 기술이 성공적으로 생성되었습니다!", style=COLORS["primary"])
console.print(f"위치: {skill_dir}\n", style=COLORS["dim"])
console.print(
"[dim]SKILL.md 파일을 편집하여 사용자 정의하십시오:\n"
" 1. YAML frontmatter에서 설명을 업데이트하십시오\n"
" 2. 지침과 예시를 채우십시오\n"
" 3. 지원 파일(스크립트, 설정 등)을 추가하십시오\n"
"\n"
f" nano {skill_md}\n"
"\n"
"💡 기술 예시는 deepagents 저장소의 examples/skills/ 를 참조하십시오:\n"
" - web-research: 구조화된 연구 워크플로우\n"
" - langgraph-docs: LangGraph 문서 조회\n"
"\n"
" 예시 복사: cp -r examples/skills/web-research ~/.deepagents/agent/skills/\n",
style=COLORS["dim"],
)
def _info(skill_name: str, *, agent: str = "agent", project: bool = False) -> None:
"""특정 기술에 대한 자세한 정보를 표시합니다.
Args:
skill_name: 세부 정보를 표시할 기술의 이름.
agent: 기술을 위한 에이전트 식별자 (기본값: agent).
project: True인 경우 프로젝트 기술만 검색합니다. False인 경우 사용자 및 프로젝트 기술 모두에서 검색합니다.
"""
settings = Settings.from_environment()
user_skills_dir = settings.get_user_skills_dir(agent)
project_skills_dir = settings.get_project_skills_dir()
# --project 플래그에 따라 기술 로드
if project:
if not project_skills_dir:
console.print("[bold red]오류:[/bold red] 프로젝트 디렉토리가 아닙니다.")
return
skills = list_skills(user_skills_dir=None, project_skills_dir=project_skills_dir)
else:
skills = list_skills(user_skills_dir=user_skills_dir, project_skills_dir=project_skills_dir)
# 기술 찾기
skill = next((s for s in skills if s["name"] == skill_name), None)
if not skill:
console.print(f"[bold red]오류:[/bold red] '{skill_name}' 기술을 찾을 수 없습니다.")
console.print("\n[dim]사용 가능한 기술:[/dim]", style=COLORS["dim"])
for s in skills:
console.print(f" - {s['name']}", style=COLORS["dim"])
return
# 전체 SKILL.md 파일 읽기
skill_path = Path(skill["path"])
skill_content = skill_path.read_text()
# 출처 레이블 결정
source_label = "프로젝트 기술" if skill["source"] == "project" else "사용자 기술"
source_color = "green" if skill["source"] == "project" else "cyan"
console.print(
f"\n[bold]기술: {skill['name']}[/bold] [bold {source_color}]({source_label})[/bold {source_color}]\n",
style=COLORS["primary"],
)
console.print(f"[bold]설명:[/bold] {skill['description']}\n", style=COLORS["dim"])
console.print(f"[bold]위치:[/bold] {skill_path.parent}/\n", style=COLORS["dim"])
# 지원 파일 나열
skill_dir = skill_path.parent
supporting_files = [f for f in skill_dir.iterdir() if f.name != "SKILL.md"]
if supporting_files:
console.print("[bold]지원 파일:[/bold]", style=COLORS["dim"])
for file in supporting_files:
console.print(f" - {file.name}", style=COLORS["dim"])
console.print()
# 전체 SKILL.md 내용 표시
console.print("[bold]전체 SKILL.md 내용:[/bold]\n", style=COLORS["primary"])
console.print(skill_content, style=COLORS["dim"])
console.print()
def setup_skills_parser(
subparsers: Any,
) -> argparse.ArgumentParser:
"""모든 하위 명령과 함께 기술 하위 명령 파서를 설정합니다."""
skills_parser = subparsers.add_parser(
"skills",
help="에이전트 기술 관리",
description="에이전트 기술 관리 - 기술 정보 생성, 나열 및 보기",
)
skills_subparsers = skills_parser.add_subparsers(dest="skills_command", help="기술 명령")
# 기술 목록
list_parser = skills_subparsers.add_parser(
"list", help="사용 가능한 모든 기술 나열", description="사용 가능한 모든 기술 나열"
)
list_parser.add_argument(
"--agent",
default="agent",
help="기술을 위한 에이전트 식별자 (기본값: agent)",
)
list_parser.add_argument(
"--project",
action="store_true",
help="프로젝트 수준 기술만 표시",
)
# 기술 생성
create_parser = skills_subparsers.add_parser(
"create",
help="새 기술 생성",
description="템플릿 SKILL.md 파일을 사용하여 새 기술 생성",
)
create_parser.add_argument("name", help="생성할 기술 이름 (예: web-research)")
create_parser.add_argument(
"--agent",
default="agent",
help="기술을 위한 에이전트 식별자 (기본값: agent)",
)
create_parser.add_argument(
"--project",
action="store_true",
help="사용자 디렉토리 대신 프로젝트 디렉토리에 기술 생성",
)
# 기술 정보
info_parser = skills_subparsers.add_parser(
"info",
help="기술에 대한 자세한 정보 표시",
description="특정 기술에 대한 자세한 정보 표시",
)
info_parser.add_argument("name", help="정보를 표시할 기술 이름")
info_parser.add_argument(
"--agent",
default="agent",
help="기술을 위한 에이전트 식별자 (기본값: agent)",
)
info_parser.add_argument(
"--project",
action="store_true",
help="프로젝트 기술만 검색",
)
return skills_parser
def execute_skills_command(args: argparse.Namespace) -> None:
"""파싱된 인수를 기반으로 기술 하위 명령을 실행합니다.
Args:
args: skills_command 속성이 있는 파싱된 명령줄 인수
"""
# agent 인수 검증
if args.agent:
is_valid, error_msg = _validate_name(args.agent)
if not is_valid:
console.print(f"[bold red]오류:[/bold red] 잘못된 에이전트 이름: {error_msg}")
console.print(
"[dim]에이전트 이름은 영문자, 숫자, 하이픈 및 밑줄만 포함할 수 있습니다.[/dim]",
style=COLORS["dim"],
)
return
if args.skills_command == "list":
_list(agent=args.agent, project=args.project)
elif args.skills_command == "create":
_create(args.name, agent=args.agent, project=args.project)
elif args.skills_command == "info":
_info(args.name, agent=args.agent, project=args.project)
else:
# 하위 명령이 제공되지 않은 경우 도움말 표시
console.print("[yellow]기술 하위 명령을 지정하십시오: list, create, 또는 info[/yellow]")
console.print("\n[bold]사용법:[/bold]", style=COLORS["primary"])
console.print(" deepagents skills <command> [options]\n")
console.print("[bold]사용 가능한 명령:[/bold]", style=COLORS["primary"])
console.print(" list 사용 가능한 모든 기술 나열")
console.print(" create <name> 새 기술 생성")
console.print(" info <name> 기술에 대한 자세한 정보 표시")
console.print("\n[bold]예시:[/bold]", style=COLORS["primary"])
console.print(" deepagents skills list")
console.print(" deepagents skills create web-research")
console.print(" deepagents skills info web-research")
console.print("\n[dim]특정 명령에 대한 추가 도움말:[/dim]", style=COLORS["dim"])
console.print(" deepagents skills <command> --help", style=COLORS["dim"])
__all__ = [
"execute_skills_command",
"setup_skills_parser",
]

View File

@@ -0,0 +1,319 @@
"""SKILL.md 파일에서 에이전트 기술을 파싱하고 로드하기 위한 기술 로더.
이 모듈은 YAML frontmatter 파싱을 통해 Anthropic의 에이전트 기술 패턴을 구현합니다.
각 기술은 다음을 포함하는 SKILL.md 파일이 있는 디렉토리입니다:
- YAML frontmatter (이름, 설명 필수)
- 에이전트를 위한 마크다운 지침
- 선택적 지원 파일 (스크립트, 설정 등)
SKILL.md 구조 예시:
```markdown
---
name: web-research
description: 철저한 웹 조사를 수행하기 위한 구조화된 접근 방식
---
# 웹 조사 기술
## 사용 시기
- 사용자가 주제 조사를 요청할 때
...
```
"""
from __future__ import annotations
import logging
import re
from typing import TYPE_CHECKING, NotRequired, TypedDict
import yaml
if TYPE_CHECKING:
from pathlib import Path
logger = logging.getLogger(__name__)
# SKILL.md 파일의 최대 크기 (10MB)
MAX_SKILL_FILE_SIZE = 10 * 1024 * 1024
# Agent Skills 사양 제약 조건 (https://agentskills.io/specification)
MAX_SKILL_NAME_LENGTH = 64
MAX_SKILL_DESCRIPTION_LENGTH = 1024
class SkillMetadata(TypedDict):
"""Agent Skills 사양(https://agentskills.io/specification)에 따른 기술 메타데이터."""
name: str
"""기술 이름 (최대 64자, 소문자 영숫자와 하이픈)."""
description: str
"""기술이 수행하는 작업에 대한 설명 (최대 1024자)."""
path: str
"""SKILL.md 파일 경로."""
source: str
"""기술의 출처 ('user' 또는 'project')."""
# Agent Skills 사양에 따른 선택적 필드
license: NotRequired[str | None]
"""라이선스 이름 또는 번들로 제공되는 라이선스 파일에 대한 참조."""
compatibility: NotRequired[str | None]
"""환경 요구 사항 (최대 500자)."""
metadata: NotRequired[dict[str, str] | None]
"""추가 메타데이터를 위한 임의의 키-값 매핑."""
allowed_tools: NotRequired[str | None]
"""사전 승인된 도구의 공백으로 구분된 목록."""
def _is_safe_path(path: Path, base_dir: Path) -> bool:
"""경로가 base_dir 내에 안전하게 포함되어 있는지 확인합니다.
심볼릭 링크나 경로 조작을 통한 디렉토리 탐색 공격을 방지합니다.
이 함수는 두 경로를 정식 형식(심볼릭 링크 따름)으로 해결하고,
대상 경로가 기본 디렉토리 내에 있는지 확인합니다.
Args:
path: 검증할 경로
base_dir: 경로를 포함해야 하는 기본 디렉토리
Returns:
경로가 base_dir 내에 안전하게 있으면 True, 그렇지 않으면 False
예시:
>>> base = Path("/home/user/.deepagents/skills")
>>> safe = Path("/home/user/.deepagents/skills/web-research/SKILL.md")
>>> unsafe = Path("/home/user/.deepagents/skills/../../.ssh/id_rsa")
>>> _is_safe_path(safe, base)
True
>>> _is_safe_path(unsafe, base)
False
"""
try:
# 두 경로를 정식 형식으로 해결 (심볼릭 링크 따름)
resolved_path = path.resolve()
resolved_base = base_dir.resolve()
# 해결된 경로가 기본 디렉토리 내에 있는지 확인
# 이는 기본 디렉토리 외부를 가리키는 심볼릭 링크를 포착함
resolved_path.relative_to(resolved_base)
return True
except ValueError:
# 경로가 base_dir의 하위가 아님 (디렉토리 외부)
return False
except (OSError, RuntimeError):
# 경로 해결 중 오류 발생 (예: 순환 심볼릭 링크, 너무 많은 수준)
return False
def _validate_skill_name(name: str, directory_name: str) -> tuple[bool, str]:
"""Agent Skills 사양에 따라 기술 이름을 검증합니다.
요구 사항:
- 최대 64자
- 소문자 영숫자와 하이픈만 허용 (a-z, 0-9, -)
- 하이픈으로 시작하거나 끝날 수 없음
- 연속된 하이픈 허용 안 함
- 상위 디렉토리 이름과 일치해야 함
Args:
name: YAML frontmatter의 기술 이름.
directory_name: 상위 디렉토리 이름.
Returns:
(유효 여부, 오류 메시지) 튜플. 유효한 경우 오류 메시지는 비어 있습니다.
"""
if not name:
return False, "이름은 필수입니다"
if len(name) > MAX_SKILL_NAME_LENGTH:
return False, "이름이 64자를 초과합니다"
# 패턴: 소문자 영숫자, 세그먼트 사이의 단일 하이픈, 시작/종료 하이픈 없음
if not re.match(r"^[a-z0-9]+(-[a-z0-9]+)*$", name):
return False, "이름은 소문자 영숫자와 단일 하이픈만 사용해야 합니다"
if name != directory_name:
return False, f"이름 '{name}'은 디렉토리 이름 '{directory_name}'과 일치해야 합니다"
return True, ""
def _parse_skill_metadata(skill_md_path: Path, source: str) -> SkillMetadata | None:
"""Agent Skills 사양에 따라 SKILL.md 파일에서 YAML frontmatter를 파싱합니다.
Args:
skill_md_path: SKILL.md 파일 경로.
source: 기술 출처 ('user' 또는 'project').
Returns:
모든 필드가 포함된 SkillMetadata, 파싱 실패 시 None.
"""
try:
# 보안: DoS 공격 방지를 위해 파일 크기 확인
file_size = skill_md_path.stat().st_size
if file_size > MAX_SKILL_FILE_SIZE:
logger.warning("건너뛰는 중 %s: 파일이 너무 큼 (%d 바이트)", skill_md_path, file_size)
return None
content = skill_md_path.read_text(encoding="utf-8")
# --- 구분 기호 사이의 YAML frontmatter 매칭
frontmatter_pattern = r"^---\s*\n(.*?)\n---\s*\n"
match = re.match(frontmatter_pattern, content, re.DOTALL)
if not match:
logger.warning("건너뛰는 중 %s: 유효한 YAML frontmatter를 찾을 수 없음", skill_md_path)
return None
frontmatter_str = match.group(1)
# 적절한 중첩 구조 지원을 위해 safe_load를 사용하여 YAML 파싱
try:
frontmatter_data = yaml.safe_load(frontmatter_str)
except yaml.YAMLError as e:
logger.warning("%s의 잘못된 YAML: %s", skill_md_path, e)
return None
if not isinstance(frontmatter_data, dict):
logger.warning("건너뛰는 중 %s: frontmatter가 매핑이 아님", skill_md_path)
return None
# 필수 필드 검증
name = frontmatter_data.get("name")
description = frontmatter_data.get("description")
if not name or not description:
logger.warning("건너뛰는 중 %s: 필수 'name' 또는 'description'이 누락됨", skill_md_path)
return None
# 사양에 따라 이름 형식 검증 (경고하지만 하위 호환성을 위해 로드함)
directory_name = skill_md_path.parent.name
is_valid, error = _validate_skill_name(str(name), directory_name)
if not is_valid:
logger.warning(
"%s'%s' 기술이 Agent Skills 사양을 따르지 않음: %s. "
"사양을 준수하도록 이름을 변경하는 것을 고려하십시오.",
skill_md_path,
name,
error,
)
# 설명 길이 검증 (사양: 최대 1024자)
description_str = str(description)
if len(description_str) > MAX_SKILL_DESCRIPTION_LENGTH:
logger.warning(
"%s의 설명이 %d자를 초과하여 잘림",
skill_md_path,
MAX_SKILL_DESCRIPTION_LENGTH,
)
description_str = description_str[:MAX_SKILL_DESCRIPTION_LENGTH]
return SkillMetadata(
name=str(name),
description=description_str,
path=str(skill_md_path),
source=source,
license=frontmatter_data.get("license"),
compatibility=frontmatter_data.get("compatibility"),
metadata=frontmatter_data.get("metadata"),
allowed_tools=frontmatter_data.get("allowed-tools"),
)
except (OSError, UnicodeDecodeError) as e:
logger.warning("%s 읽기 오류: %s", skill_md_path, e)
return None
def _list_skills(skills_dir: Path, source: str) -> list[SkillMetadata]:
"""단일 기술 디렉토리에서 모든 기술을 나열합니다(내부 헬퍼).
기술 디렉토리에서 SKILL.md 파일이 포함된 하위 디렉토리를 스캔하고,
YAML frontmatter를 파싱하여 기술 메타데이터를 반환합니다.
기술 조직 구성:
skills/
├── skill-name/
│ ├── SKILL.md # 필수: YAML frontmatter가 있는 지침
│ ├── script.py # 선택 사항: 지원 파일
│ └── config.json # 선택 사항: 지원 파일
Args:
skills_dir: 기술 디렉토리 경로.
source: 기술 출처 ('user' 또는 'project').
Returns:
이름, 설명, 경로 및 출처가 포함된 기술 메타데이터 딕셔너리 목록.
"""
# 기술 디렉토리 존재 여부 확인
skills_dir = skills_dir.expanduser()
if not skills_dir.exists():
return []
# 보안 검사를 위해 기본 디렉토리를 정식 경로로 해결
try:
resolved_base = skills_dir.resolve()
except (OSError, RuntimeError):
# 기본 디렉토리를 해결할 수 없음, 안전하게 종료
return []
skills: list[SkillMetadata] = []
# 하위 디렉토리 순회
for skill_dir in skills_dir.iterdir():
# 보안: 기술 디렉토리 외부를 가리키는 심볼릭 링크 포착
if not _is_safe_path(skill_dir, resolved_base):
continue
if not skill_dir.is_dir():
continue
# SKILL.md 파일 찾기
skill_md_path = skill_dir / "SKILL.md"
if not skill_md_path.exists():
continue
# 보안: 읽기 전에 SKILL.md 경로가 안전한지 검증
# 이는 외부를 가리키는 심볼릭 링크인 SKILL.md 파일을 포착함
if not _is_safe_path(skill_md_path, resolved_base):
continue
# 메타데이터 파싱
metadata = _parse_skill_metadata(skill_md_path, source=source)
if metadata:
skills.append(metadata)
return skills
def list_skills(*, user_skills_dir: Path | None = None, project_skills_dir: Path | None = None) -> list[SkillMetadata]:
"""사용자 및/또는 프로젝트 디렉토리에서 기술을 나열합니다.
두 디렉토리가 모두 제공되면 사용자 기술과 이름이 동일한 프로젝트 기술이
사용자 기술을 오버라이드합니다.
Args:
user_skills_dir: 사용자 수준 기술 디렉토리 경로.
project_skills_dir: 프로젝트 수준 기술 디렉토리 경로.
Returns:
두 출처의 기술 메타데이터가 병합된 목록이며, 이름이 충돌할 경우
프로젝트 기술이 사용자 기술보다 우선합니다.
"""
all_skills: dict[str, SkillMetadata] = {}
# 사용자 기술 먼저 로드 (기본)
if user_skills_dir:
user_skills = _list_skills(user_skills_dir, source="user")
for skill in user_skills:
all_skills[skill["name"]] = skill
# 프로젝트 기술 두 번째로 로드 (오버라이드/확장)
if project_skills_dir:
project_skills = _list_skills(project_skills_dir, source="project")
for skill in project_skills:
# 프로젝트 기술은 이름이 같은 사용자 기술을 오버라이드함
all_skills[skill["name"]] = skill
return list(all_skills.values())

View File

@@ -0,0 +1,273 @@
"""에이전트 기술을 시스템 프롬프트에 로드하고 노출하기 위한 미들웨어.
이 미들웨어는 점진적 노출(progressive disclosure)을 통해 Anthropic의 "Agent Skills" 패턴을 구현합니다:
1. 세션 시작 시 SKILL.md 파일에서 YAML frontmatter 파싱
2. 시스템 프롬프트에 기술 메타데이터(이름 + 설명) 주입
3. 에이전트는 작업과 관련이 있을 때 SKILL.md의 전체 내용을 읽음
기술 디렉토리 구조 (에이전트별 + 프로젝트):
사용자 수준: ~/.deepagents/{AGENT_NAME}/skills/
프로젝트 수준: {PROJECT_ROOT}/.deepagents/skills/
구조 예시:
~/.deepagents/{AGENT_NAME}/skills/
├── web-research/
│ ├── SKILL.md # 필수: YAML frontmatter + 지침
│ └── helper.py # 선택 사항: 지원 파일
├── code-review/
│ ├── SKILL.md
│ └── checklist.md
.deepagents/skills/
├── project-specific/
│ └── SKILL.md # 프로젝트 전용 기술
"""
from collections.abc import Awaitable, Callable
from pathlib import Path
from typing import NotRequired, TypedDict, cast
from langchain.agents.middleware.types import (
AgentMiddleware,
AgentState,
ModelRequest,
ModelResponse,
)
from langgraph.runtime import Runtime
from deepagents_cli.skills.load import SkillMetadata, list_skills
class SkillsState(AgentState):
"""기술 미들웨어를 위한 상태."""
skills_metadata: NotRequired[list[SkillMetadata]]
"""로드된 기술 메타데이터 목록 (이름, 설명, 경로)."""
class SkillsStateUpdate(TypedDict):
"""기술 미들웨어를 위한 상태 업데이트."""
skills_metadata: list[SkillMetadata]
"""로드된 기술 메타데이터 목록 (이름, 설명, 경로)."""
# 기술 시스템 문서
SKILLS_SYSTEM_PROMPT = """
## 기술 시스템 (Skills System)
당신은 전문적인 능력과 도메인 지식을 제공하는 기술 라이브러리에 접근할 수 있습니다.
{skills_locations}
**사용 가능한 기술:**
{skills_list}
**기술 사용 방법 (점진적 노출):**
기술은 **점진적 노출(progressive disclosure)** 패턴을 따릅니다. 당신은 기술이 존재한다는 것(위의 이름 + 설명)은 알고 있지만, 필요할 때만 전체 지침을 읽습니다:
1. **기술이 적용되는 시기 파악**: 사용자의 작업이 기술의 설명과 일치하는지 확인하십시오.
2. **기술의 전체 지침 읽기**: 위의 기술 목록은 read_file과 함께 사용할 정확한 경로를 보여줍니다.
3. **기술의 지침 따르기**: SKILL.md에는 단계별 워크플로우, 권장 사항 및 예시가 포함되어 있습니다.
4. **지원 파일 접근**: 기술에는 Python 스크립트, 설정 또는 참조 문서가 포함될 수 있습니다. 절대 경로를 사용하십시오.
**기술을 사용해야 하는 경우:**
- 사용자의 요청이 기술의 도메인과 일치할 때 (예: "X 조사해줘" → web-research 기술)
- 전문 지식이나 구조화된 워크플로우가 필요할 때
- 기술이 복잡한 작업에 대해 검증된 패턴을 제공할 때
**기술은 자체 문서화됨:**
- 각 SKILL.md는 기술이 수행하는 작업과 사용 방법을 정확하게 알려줍니다.
- 위의 기술 목록은 각 기술의 SKILL.md 파일에 대한 전체 경로를 보여줍니다.
**기술 스크립트 실행:**
기술에는 Python 스크립트나 기타 실행 파일이 포함될 수 있습니다. 항상 기술 목록의 절대 경로를 사용하십시오.
**워크플로우 예시:**
사용자: "양자 컴퓨팅의 최신 개발 동향을 조사해 줄 수 있어?"
1. 위에서 사용 가능한 기술 확인 → 전체 경로와 함께 "web-research" 기술 확인
2. 목록에 표시된 경로를 사용하여 기술 읽기
3. 기술의 조사 워크플로우 따르기 (조사 → 정리 → 합성)
4. 절대 경로와 함께 헬퍼 스크립트 사용
주의: 기술은 당신을 더 유능하고 일관성 있게 만드는 도구입니다. 의심스러울 때는 해당 작업에 대한 기술이 있는지 확인하십시오!
"""
class SkillsMiddleware(AgentMiddleware):
"""에이전트 기술을 로드하고 노출하기 위한 미들웨어.
이 미들웨어는 Anthropic의 에이전트 기술 패턴을 구현합니다:
- 세션 시작 시 YAML frontmatter에서 기술 메타데이터(이름, 설명)를 로드함
- 발견 가능성을 위해 시스템 프롬프트에 기술 목록을 주입함
- 기술이 관련 있을 때 에이전트가 전체 SKILL.md 내용을 읽음 (점진적 노출)
사용자 수준 및 프로젝트 수준 기술을 모두 지원합니다:
- 사용자 기술: ~/.deepagents/{AGENT_NAME}/skills/
- 프로젝트 기술: {PROJECT_ROOT}/.deepagents/skills/
- 프로젝트 기술은 이름이 같은 사용자 기술을 오버라이드함
Args:
skills_dir: 사용자 수준 기술 디렉토리 경로 (에이전트별).
assistant_id: 프롬프트의 경로 참조를 위한 에이전트 식별자.
project_skills_dir: 선택적인 프로젝트 수준 기술 디렉토리 경로.
"""
state_schema = SkillsState
def __init__(
self,
*,
skills_dir: str | Path,
assistant_id: str,
project_skills_dir: str | Path | None = None,
) -> None:
"""기술 미들웨어를 초기화합니다.
Args:
skills_dir: 사용자 수준 기술 디렉토리 경로.
assistant_id: 에이전트 식별자.
project_skills_dir: 선택적인 프로젝트 수준 기술 디렉토리 경로.
"""
self.skills_dir = Path(skills_dir).expanduser()
self.assistant_id = assistant_id
self.project_skills_dir = Path(project_skills_dir).expanduser() if project_skills_dir else None
# 프롬프트 표시를 위한 경로 저장
self.user_skills_display = f"~/.deepagents/{assistant_id}/skills"
self.system_prompt_template = SKILLS_SYSTEM_PROMPT
def _format_skills_locations(self) -> str:
"""시스템 프롬프트 표시를 위해 기술 위치 형식을 지정합니다."""
locations = [f"**사용자 기술**: `{self.user_skills_display}`"]
if self.project_skills_dir:
locations.append(f"**프로젝트 기술**: `{self.project_skills_dir}` (사용자 기술을 오버라이드함)")
return "\n".join(locations)
def _format_skills_list(self, skills: list[SkillMetadata]) -> str:
"""시스템 프롬프트 표시를 위해 기술 메타데이터 형식을 지정합니다."""
if not skills:
locations = [f"{self.user_skills_display}/"]
if self.project_skills_dir:
locations.append(f"{self.project_skills_dir}/")
return f"(현재 사용 가능한 기술이 없습니다. {' 또는 '.join(locations)} 에 기술을 생성할 수 있습니다)"
# 출처별로 기술 그룹화
user_skills = [s for s in skills if s["source"] == "user"]
project_skills = [s for s in skills if s["source"] == "project"]
lines = []
# 사용자 기술 표시
if user_skills:
lines.append("**사용자 기술:**")
for skill in user_skills:
lines.append(f"- **{skill['name']}**: {skill['description']}")
lines.append(f" → 전체 지침을 보려면 `{skill['path']}` 읽기")
lines.append("")
# 프로젝트 기술 표시
if project_skills:
lines.append("**프로젝트 기술:**")
for skill in project_skills:
lines.append(f"- **{skill['name']}**: {skill['description']}")
lines.append(f" → 전체 지침을 보려면 `{skill['path']}` 읽기")
return "\n".join(lines)
def before_agent(self, state: SkillsState, runtime: Runtime) -> SkillsStateUpdate | None:
"""에이전트 실행 전 기술 메타데이터를 로드합니다.
이는 사용자 수준 및 프로젝트 수준 디렉토리 모두에서 사용 가능한 기술을 검색하기 위해
세션 시작 시 한 번 실행됩니다.
Args:
state: 현재 에이전트 상태.
runtime: 런타임 컨텍스트.
Returns:
skills_metadata가 채워진 업데이트된 상태.
"""
# 기술 디렉토리의 변경 사항을 포착하기 위해
# 에이전트와의 매 상호 작용마다 기술을 다시 로드합니다.
skills = list_skills(
user_skills_dir=self.skills_dir,
project_skills_dir=self.project_skills_dir,
)
return SkillsStateUpdate(skills_metadata=skills)
def wrap_model_call(
self,
request: ModelRequest,
handler: Callable[[ModelRequest], ModelResponse],
) -> ModelResponse:
"""시스템 프롬프트에 기술 문서를 주입합니다.
이것은 기술 정보가 항상 사용 가능하도록 매 모델 호출 시 실행됩니다.
Args:
request: 처리 중인 모델 요청.
handler: 수정된 요청으로 호출할 핸들러 함수.
Returns:
핸들러의 모델 응답.
"""
# 상태에서 기술 메타데이터 가져오기
skills_metadata = request.state.get("skills_metadata", [])
# 기술 위치 및 목록 형식 지정
skills_locations = self._format_skills_locations()
skills_list = self._format_skills_list(skills_metadata)
# 기술 문서 형식 지정
skills_section = self.system_prompt_template.format(
skills_locations=skills_locations,
skills_list=skills_list,
)
if request.system_prompt:
system_prompt = request.system_prompt + "\n\n" + skills_section
else:
system_prompt = skills_section
return handler(request.override(system_prompt=system_prompt))
async def awrap_model_call(
self,
request: ModelRequest,
handler: Callable[[ModelRequest], Awaitable[ModelResponse]],
) -> ModelResponse:
"""(비동기) 시스템 프롬프트에 기술 문서를 주입합니다.
Args:
request: 처리 중인 모델 요청.
handler: 수정된 요청으로 호출할 핸들러 함수.
Returns:
핸들러의 모델 응답.
"""
# state_schema로 인해 상태는 SkillsState임이 보장됨
state = cast("SkillsState", request.state)
skills_metadata = state.get("skills_metadata", [])
# 기술 위치 및 목록 형식 지정
skills_locations = self._format_skills_locations()
skills_list = self._format_skills_list(skills_metadata)
# 기술 문서 형식 지정
skills_section = self.system_prompt_template.format(
skills_locations=skills_locations,
skills_list=skills_list,
)
# 시스템 프롬프트에 주입
if request.system_prompt:
system_prompt = request.system_prompt + "\n\n" + skills_section
else:
system_prompt = skills_section
return await handler(request.override(system_prompt=system_prompt))

View File

@@ -0,0 +1,116 @@
"""Utilities for accurate token counting using LangChain models."""
from pathlib import Path
from langchain_core.messages import SystemMessage
from deepagents_cli.config import console, settings
def calculate_baseline_tokens(model, agent_dir: Path, system_prompt: str, assistant_id: str) -> int:
"""Calculate baseline context tokens using the model's official tokenizer.
This uses the model's get_num_tokens_from_messages() method to get
accurate token counts for the initial context (system prompt + agent.md).
Note: Tool definitions cannot be accurately counted before the first API call
due to LangChain limitations. They will be included in the total after the
first message is sent (~5,000 tokens).
Args:
model: LangChain model instance (ChatAnthropic or ChatOpenAI)
agent_dir: Path to agent directory containing agent.md
system_prompt: The base system prompt string
assistant_id: The agent identifier for path references
Returns:
Token count for system prompt + agent.md (tools not included)
"""
# Load user agent.md content
agent_md_path = agent_dir / "agent.md"
user_memory = ""
if agent_md_path.exists():
user_memory = agent_md_path.read_text()
# Load project agent.md content
from .config import _find_project_agent_md, _find_project_root
project_memory = ""
project_root = _find_project_root()
if project_root:
project_md_paths = _find_project_agent_md(project_root)
if project_md_paths:
try:
# Combine all project agent.md files (if multiple exist)
contents = []
for path in project_md_paths:
contents.append(path.read_text())
project_memory = "\n\n".join(contents)
except Exception:
pass
# Build the complete system prompt as it will be sent
# This mimics what AgentMemoryMiddleware.wrap_model_call() does
memory_section = (
f"<user_memory>\n{user_memory or '(No user agent.md)'}\n</user_memory>\n\n"
f"<project_memory>\n{project_memory or '(No project agent.md)'}\n</project_memory>"
)
# Get the long-term memory system prompt
memory_system_prompt = get_memory_system_prompt(
assistant_id, project_root, bool(project_memory)
)
# Combine all parts in the same order as the middleware
full_system_prompt = memory_section + "\n\n" + system_prompt + "\n\n" + memory_system_prompt
# Count tokens using the model's official method
messages = [SystemMessage(content=full_system_prompt)]
try:
# Note: tools parameter is not supported by LangChain's token counting
# Tool tokens will be included in the API response after first message
return model.get_num_tokens_from_messages(messages)
except Exception as e:
# Fallback if token counting fails
console.print(f"[yellow]Warning: Could not calculate baseline tokens: {e}[/yellow]")
return 0
def get_memory_system_prompt(
assistant_id: str, project_root: Path | None = None, has_project_memory: bool = False
) -> str:
"""Get the long-term memory system prompt text.
Args:
assistant_id: The agent identifier for path references
project_root: Path to the detected project root (if any)
has_project_memory: Whether project memory was loaded
"""
# Import from agent_memory middleware
from .agent_memory import LONGTERM_MEMORY_SYSTEM_PROMPT
agent_dir = settings.get_agent_dir(assistant_id)
agent_dir_absolute = str(agent_dir)
agent_dir_display = f"~/.deepagents/{assistant_id}"
# Build project memory info
if project_root and has_project_memory:
project_memory_info = f"`{project_root}` (detected)"
elif project_root:
project_memory_info = f"`{project_root}` (no agent.md found)"
else:
project_memory_info = "None (not in a git project)"
# Build project deepagents directory path
if project_root:
project_deepagents_dir = f"{project_root}/.deepagents"
else:
project_deepagents_dir = "[project-root]/.deepagents (not in a project)"
return LONGTERM_MEMORY_SYSTEM_PROMPT.format(
agent_dir_absolute=agent_dir_absolute,
agent_dir_display=agent_dir_display,
project_memory_info=project_memory_info,
project_deepagents_dir=project_deepagents_dir,
)

View File

@@ -0,0 +1,183 @@
"""CLI 에이전트를 위한 사용자 정의 도구."""
from typing import Any, Literal
import requests # type: ignore
from markdownify import markdownify # type: ignore
from tavily import TavilyClient # type: ignore
from deepagents_cli.config import settings
# Initialize Tavily client if API key is available
tavily_client = TavilyClient(api_key=settings.tavily_api_key) if settings.has_tavily else None
def http_request(
url: str,
method: str = "GET",
headers: dict[str, str] | None = None,
data: str | dict | None = None,
params: dict[str, str] | None = None,
timeout: int = 30,
) -> dict[str, Any]:
"""Sends an HTTP request to an API or web service.
Args:
url: The URL to target
method: HTTP method (GET, POST, PUT, DELETE, etc.)
headers: HTTP headers to include
data: Request body data (string or dict)
params: URL query parameters
timeout: Request timeout in seconds
Returns:
Dictionary containing status_code, headers, and content
"""
try:
kwargs = {"url": url, "method": method.upper(), "timeout": timeout}
if headers:
kwargs["headers"] = headers
if params:
kwargs["params"] = params
if data:
if isinstance(data, dict):
kwargs["json"] = data
else:
kwargs["data"] = data
response = requests.request(**kwargs)
try:
content = response.json()
except:
content = response.text
return {
"success": response.status_code < 400,
"status_code": response.status_code,
"headers": dict(response.headers),
"content": content,
"url": response.url,
}
except requests.exceptions.Timeout:
return {
"success": False,
"status_code": 0,
"headers": {},
"content": f"{timeout}초 후 요청 시간이 초과되었습니다",
"url": url,
}
except requests.exceptions.RequestException as e:
return {
"success": False,
"status_code": 0,
"headers": {},
"content": f"요청 오류: {e!s}",
"url": url,
}
except Exception as e:
return {
"success": False,
"status_code": 0,
"headers": {},
"content": f"요청 생성 오류: {e!s}",
"url": url,
}
def web_search(
query: str,
max_results: int = 5,
topic: Literal["general", "news", "finance"] = "general",
include_raw_content: bool = False,
):
"""Performs a web search using Tavily for current information and documents.
This tool searches the web and returns relevant results. After receiving results,
you should synthesize the information into a natural response that helps the user.
Args:
query: The search query (specific and detailed)
max_results: Number of results to return (default: 5)
topic: The topic type of the search - "general" for most queries, "news" for current events
include_raw_content: Include full page content (Warning: uses more tokens)
Returns:
Dictionary containing:
- results: List of search results, each containing:
- title: Page title
- url: Page URL
- content: Relevant snippet from the page
- score: Relevance score (0-1)
- query: Original search query
IMPORTANT: After using this tool:
1. Read the 'content' field of each result
2. Extract relevant information that answers the user's question
3. Synthesize this into a clear, natural language response
4. Cite sources by mentioning the page title or URL
5. Do NOT show raw JSON to the user - always provide a formatted response
"""
if tavily_client is None:
return {
"error": "Tavily API 키가 구성되지 않았습니다. TAVILY_API_KEY 환경 변수를 설정하십시오.",
"query": query,
}
try:
return tavily_client.search(
query,
max_results=max_results,
include_raw_content=include_raw_content,
topic=topic,
)
except Exception as e:
return {"error": f"웹 검색 오류: {e!s}", "query": query}
def fetch_url(url: str, timeout: int = 30) -> dict[str, Any]:
"""Fetches content from a URL and converts HTML to markdown format.
This tool fetches web page content and converts it to clean markdown text,
making it easier to read and process HTML content. After receiving markdown,
you should synthesize the information into a natural response that helps the user.
Args:
url: The URL to fetch (must be a valid HTTP/HTTPS URL)
timeout: Request timeout in seconds (default: 30)
Returns:
Dictionary containing:
- success: Whether the request was successful
- url: Final URL after redirects
- markdown_content: The page content converted to markdown
- status_code: HTTP status code
- content_length: Length of markdown content (in characters)
IMPORTANT: After using this tool:
1. Read the markdown_content
2. Extract relevant information that answers the user's question
3. Synthesize this into a clear, natural language response
4. Do NOT show raw markdown to the user unless specifically requested
"""
try:
response = requests.get(
url,
timeout=timeout,
headers={"User-Agent": "Mozilla/5.0 (compatible; DeepAgents/1.0)"},
)
response.raise_for_status()
# Convert HTML content to markdown
markdown_content = markdownify(response.text)
return {
"url": str(response.url),
"markdown_content": markdown_content,
"status_code": response.status_code,
"content_length": len(markdown_content),
}
except Exception as e:
return {"error": f"URL 가져오기 오류: {e!s}", "url": url}

View File

@@ -0,0 +1,644 @@
"""CLI를 위한 UI 렌더링 및 디스플레이 유틸리티."""
import json
import re
import shutil
from pathlib import Path
from typing import Any
from rich import box
from rich.markup import escape
from rich.panel import Panel
from rich.text import Text
from .config import COLORS, COMMANDS, DEEP_AGENTS_ASCII, MAX_ARG_LENGTH, console
from .file_ops import FileOperationRecord
def truncate_value(value: str, max_length: int = MAX_ARG_LENGTH) -> str:
"""max_length를 초과하는 경우 문자열 값을 자릅니다."""
if len(value) > max_length:
return value[:max_length] + "..."
return value
def format_tool_display(tool_name: str, tool_args: dict) -> str:
"""도구 호출을 도구별 스마트 포맷팅으로 표시합니다.
모든 인수보다는 각 도구 유형에 가장 관련성 높은 정보를 표시합니다.
Args:
tool_name: 호출되는 도구의 이름
tool_args: 도구 인수 딕셔너리
Returns:
표시용으로 포맷팅된 문자열 (예: "read_file(config.py)")
Examples:
read_file(path="/long/path/file.py") → "read_file(file.py)"
web_search(query="how to code", max_results=5) → 'web_search("how to code")'
shell(command="pip install foo") → 'shell("pip install foo")'
"""
# Tool-specific formatting - show the most important argument(s)
if tool_name in ("read_file", "write_file", "edit_file"):
return _format_file_tool(tool_name, tool_args)
if tool_name == "web_search":
return _format_web_search_tool(tool_name, tool_args)
if tool_name == "grep":
return _format_grep_tool(tool_name, tool_args)
if tool_name == "shell":
return _format_shell_tool(tool_name, tool_args)
if tool_name == "ls":
return _format_ls_tool(tool_name, tool_args)
if tool_name == "glob":
return _format_glob_tool(tool_name, tool_args)
if tool_name == "http_request":
return _format_http_request_tool(tool_name, tool_args)
if tool_name == "fetch_url":
return _format_fetch_url_tool(tool_name, tool_args)
if tool_name == "task":
return _format_task_tool(tool_name, tool_args)
if tool_name == "write_todos":
return _format_write_todos_tool(tool_name, tool_args)
# Fallback: generic formatting
arg_str = ", ".join(f"{k}={truncate_value(str(v), 20)}" for k, v in tool_args.items())
return f"{tool_name}({arg_str})"
def _abbreviate_path(path_str: str, max_length: int = 60) -> str:
"""파일 경로를 지능적으로 축약합니다 - 베이스네임 또는 상대 경로를 표시합니다."""
try:
path = Path(path_str)
# If it's just a filename (no directory parts), return as-is
if len(path.parts) == 1:
return path_str
# Try to get relative path from current working directory
try:
rel_path = path.relative_to(Path.cwd())
rel_str = str(rel_path)
# Use relative if it's shorter and not too long
if len(rel_str) < len(path_str) and len(rel_str) <= max_length:
return rel_str
except (ValueError, Exception):
pass
# If absolute path is reasonable length, use it
if len(path_str) <= max_length:
return path_str
# Otherwise, just show basename (filename only)
return path.name
except Exception:
# Fallback to original string if any error
return truncate_value(path_str, max_length)
def _format_file_tool(tool_name: str, tool_args: dict) -> str:
path_value = tool_args.get("file_path")
if path_value is None:
path_value = tool_args.get("path")
if path_value is not None:
path = _abbreviate_path(str(path_value))
return f"{tool_name}({path})"
return f"{tool_name}(...)"
def _format_web_search_tool(tool_name: str, tool_args: dict) -> str:
if "query" in tool_args:
query = str(tool_args["query"])
query = truncate_value(query, 100)
return f'{tool_name}("{query}")'
return f"{tool_name}()"
def _format_grep_tool(tool_name: str, tool_args: dict) -> str:
if "pattern" in tool_args:
pattern = str(tool_args["pattern"])
pattern = truncate_value(pattern, 70)
return f'{tool_name}("{pattern}")'
return f"{tool_name}()"
def _format_shell_tool(tool_name: str, tool_args: dict) -> str:
if "command" in tool_args:
command = str(tool_args["command"])
command = truncate_value(command, 120)
return f'{tool_name}("{command}")'
return f"{tool_name}()"
def _format_ls_tool(tool_name: str, tool_args: dict) -> str:
if tool_args.get("path"):
path = _abbreviate_path(str(tool_args["path"]))
return f"{tool_name}({path})"
return f"{tool_name}()"
def _format_glob_tool(tool_name: str, tool_args: dict) -> str:
if "pattern" in tool_args:
pattern = str(tool_args["pattern"])
pattern = truncate_value(pattern, 80)
return f'{tool_name}("{pattern}")'
return f"{tool_name}()"
def _format_http_request_tool(tool_name: str, tool_args: dict) -> str:
parts = []
if "method" in tool_args:
parts.append(str(tool_args["method"]).upper())
if "url" in tool_args:
url = str(tool_args["url"])
url = truncate_value(url, 80)
parts.append(url)
if parts:
return f"{tool_name}({' '.join(parts)})"
return f"{tool_name}()"
def _format_fetch_url_tool(tool_name: str, tool_args: dict) -> str:
if "url" in tool_args:
url = str(tool_args["url"])
url = truncate_value(url, 80)
return f'{tool_name}("{url}")'
return f"{tool_name}()"
def _format_task_tool(tool_name: str, tool_args: dict) -> str:
if "description" in tool_args:
desc = str(tool_args["description"])
desc = truncate_value(desc, 100)
return f'{tool_name}("{desc}")'
return f"{tool_name}()"
def _format_write_todos_tool(tool_name: str, tool_args: dict) -> str:
if "todos" in tool_args and isinstance(tool_args["todos"], list):
count = len(tool_args["todos"])
return f"{tool_name}({count} items)"
return f"{tool_name}()"
def format_tool_message_content(content: Any) -> str:
"""ToolMessage 내용을 출력 가능한 문자열로 변환합니다."""
if content is None:
return ""
if isinstance(content, list):
parts = []
for item in content:
if isinstance(item, str):
parts.append(item)
else:
try:
parts.append(json.dumps(item))
except Exception:
parts.append(str(item))
return "\n".join(parts)
return str(content)
class TokenTracker:
"""대화 전반에 걸친 토큰 사용량을 추적합니다."""
def __init__(self) -> None:
self.baseline_context = 0 # Baseline system context (system + agent.md + tools)
self.current_context = 0 # Total context including messages
self.last_output = 0
def set_baseline(self, tokens: int) -> None:
"""기준 컨텍스트 토큰 수를 설정합니다.
Args:
tokens: 기준 토큰 수 (시스템 프롬프트 + agent.md + 도구)
"""
self.baseline_context = tokens
self.current_context = tokens
def reset(self) -> None:
"""기준으로 재설정합니다 (/clear 명령용)."""
self.current_context = self.baseline_context
self.last_output = 0
def add(self, input_tokens: int, output_tokens: int) -> None:
"""응답에서 토큰을 추가합니다."""
# input_tokens IS the current context size (what was sent to the model)
self.current_context = input_tokens
self.last_output = output_tokens
def display_last(self) -> None:
"""이번 턴 이후의 현재 컨텍스트 크기를 표시합니다."""
if self.last_output and self.last_output >= 1000:
console.print(f" 생성됨: {self.last_output:,} 토큰", style="dim")
if self.current_context:
console.print(f" 현재 컨텍스트: {self.current_context:,} 토큰", style="dim")
def display_session(self) -> None:
"""현재 컨텍스트 크기를 표시합니다."""
console.print("\n[bold]토큰 사용량:[/bold]", style=COLORS["primary"])
# Check if we've had any actual API calls yet (current > baseline means we have conversation)
has_conversation = self.current_context > self.baseline_context
if self.baseline_context > 0:
console.print(
f" 기준(Baseline): {self.baseline_context:,} 토큰 [dim](시스템 + agent.md)[/dim]",
style=COLORS["dim"],
)
if not has_conversation:
# Before first message - warn that tools aren't counted yet
console.print(" [dim]참고: 도구 정의(~5k 토큰)는 첫 번째 메시지 이후에 포함됩니다[/dim]")
if has_conversation:
tools_and_conversation = self.current_context - self.baseline_context
console.print(f" 도구 + 대화: {tools_and_conversation:,} 토큰", style=COLORS["dim"])
console.print(f" 합계: {self.current_context:,} 토큰", style="bold " + COLORS["dim"])
console.print()
def render_todo_list(todos: list[dict]) -> None:
"""작업 목록을 체크박스가 있는 rich 패널로 렌더링합니다."""
if not todos:
return
lines = []
for todo in todos:
status = todo.get("status", "pending")
content = todo.get("content", "")
if status == "completed":
icon = ""
style = "green"
elif status == "in_progress":
icon = ""
style = "yellow"
else: # pending
icon = ""
style = "dim"
lines.append(f"[{style}]{icon} {content}[/{style}]")
panel = Panel(
"\n".join(lines),
title="[bold]작업 목록[/bold]",
border_style="cyan",
box=box.ROUNDED,
padding=(0, 1),
)
console.print(panel)
def _format_line_span(start: int | None, end: int | None) -> str:
if start is None and end is None:
return ""
if start is not None and end is None:
return f"({start}행부터)"
if start is None and end is not None:
return f"({end}행까지)"
if start == end:
return f"({start}행)"
return f"({start}-{end}행)"
def render_file_operation(record: FileOperationRecord) -> None:
"""파일시스템 도구 호출에 대한 간략한 요약을 렌더링합니다."""
label_lookup = {
"read_file": "읽기",
"write_file": "쓰기",
"edit_file": "업데이트",
}
label = label_lookup.get(record.tool_name, record.tool_name)
header = Text()
header.append("", style=COLORS["tool"])
header.append(f"{label}({record.display_path})", style=f"bold {COLORS['tool']}")
console.print(header)
def _print_detail(message: str, *, style: str = COLORS["dim"]) -> None:
detail = Text()
detail.append("", style=style)
detail.append(message, style=style)
console.print(detail)
if record.status == "error":
_print_detail(record.error or "파일 작업 실행 오류", style="red")
return
if record.tool_name == "read_file":
lines = record.metrics.lines_read
span = _format_line_span(record.metrics.start_line, record.metrics.end_line)
detail = f"{lines}줄 읽음"
if span:
detail = f"{detail} {span}"
_print_detail(detail)
else:
if record.tool_name == "write_file":
added = record.metrics.lines_added
removed = record.metrics.lines_removed
lines = record.metrics.lines_written
detail = f"{lines}줄 씀"
if added or removed:
detail = f"{detail} (+{added} / -{removed})"
else:
added = record.metrics.lines_added
removed = record.metrics.lines_removed
detail = f"{record.metrics.lines_written}줄 편집됨"
if added or removed:
detail = f"{detail} (+{added} / -{removed})"
_print_detail(detail)
# Skip diff display for HIL-approved operations that succeeded
# (user already saw the diff during approval)
if record.diff and not (record.hitl_approved and record.status == "success"):
render_diff(record)
def render_diff(record: FileOperationRecord) -> None:
"""파일 작업에 대한 diff를 렌더링합니다."""
if not record.diff:
return
render_diff_block(record.diff, f"{record.display_path} 차이(Diff)")
def _wrap_diff_line(
code: str,
marker: str,
color: str,
line_num: int | None,
width: int,
term_width: int,
) -> list[str]:
"""긴 diff 줄을 적절한 들여쓰기로 줄바꿈합니다.
Args:
code: 래핑할 코드 콘텐츠
marker: Diff 마커 ('+', '-', ' ')
color: 해당 줄의 색상
line_num: 표시할 줄 번호 (연속 줄의 경우 None)
width: 줄 번호 열의 너비
term_width: 터미널 너비
Returns:
포맷팅된 줄 목록 (줄바꿈된 경우 여러 개일 수 있음)
"""
# Escape Rich markup in code content
code = escape(code)
prefix_len = width + 4 # line_num + space + marker + 2 spaces
available_width = term_width - prefix_len
if len(code) <= available_width:
if line_num is not None:
return [f"[dim]{line_num:>{width}}[/dim] [{color}]{marker} {code}[/{color}]"]
return [f"{' ' * width} [{color}]{marker} {code}[/{color}]"]
lines = []
remaining = code
first = True
while remaining:
if len(remaining) <= available_width:
chunk = remaining
remaining = ""
else:
# Try to break at a good point (space, comma, etc.)
chunk = remaining[:available_width]
# Look for a good break point in the last 20 chars
break_point = max(
chunk.rfind(" "),
chunk.rfind(","),
chunk.rfind("("),
chunk.rfind(")"),
)
if break_point > available_width - 20:
# Found a good break point
chunk = remaining[: break_point + 1]
remaining = remaining[break_point + 1 :]
else:
# No good break point, just split
chunk = remaining[:available_width]
remaining = remaining[available_width:]
if first and line_num is not None:
lines.append(f"[dim]{line_num:>{width}}[/dim] [{color}]{marker} {chunk}[/{color}]")
first = False
else:
lines.append(f"{' ' * width} [{color}]{marker} {chunk}[/{color}]")
return lines
def format_diff_rich(diff_lines: list[str]) -> str:
"""줄 번호와 색상으로 diff 줄을 포맷팅합니다.
Args:
diff_lines: 통합 diff의 Diff 줄
"""
if not diff_lines:
return "[dim]감지된 변경 사항 없음[/dim]"
# Get terminal width
term_width = shutil.get_terminal_size().columns
# Find max line number for width calculation
max_line = max(
(
int(m.group(i))
for line in diff_lines
if (m := re.match(r"@@ -(\d+)(?:,\d+)? \+(\d+)", line))
for i in (1, 2)
),
default=0,
)
width = max(3, len(str(max_line)))
formatted_lines = []
old_num = new_num = 0
# Rich colors with backgrounds for better visibility
# White text on dark backgrounds for additions/deletions
addition_color = "white on dark_green"
deletion_color = "white on dark_red"
context_color = "dim"
for line in diff_lines:
if line.strip() == "...":
formatted_lines.append(f"[{context_color}]...[/{context_color}]")
elif line.startswith(("---", "+++")):
continue
elif m := re.match(r"@@ -(\d+)(?:,\d+)? \+(\d+)", line):
old_num, new_num = int(m.group(1)), int(m.group(2))
elif line.startswith("-"):
formatted_lines.extend(_wrap_diff_line(line[1:], "-", deletion_color, old_num, width, term_width))
old_num += 1
elif line.startswith("+"):
formatted_lines.extend(_wrap_diff_line(line[1:], "+", addition_color, new_num, width, term_width))
new_num += 1
elif line.startswith(" "):
formatted_lines.extend(_wrap_diff_line(line[1:], " ", context_color, old_num, width, term_width))
old_num += 1
new_num += 1
return "\n".join(formatted_lines)
def render_diff_block(diff: str, title: str) -> None:
"""diff 문자열을 줄 번호와 색상으로 렌더링합니다."""
try:
# Parse diff into lines and format with line numbers
diff_lines = diff.splitlines()
formatted_diff = format_diff_rich(diff_lines)
# Print with a simple header
console.print()
console.print(f"[bold {COLORS['primary']}]═══ {title} ═══[/bold {COLORS['primary']}]")
console.print(formatted_diff)
console.print()
except (ValueError, AttributeError, IndexError, OSError):
# Fallback to simple rendering if formatting fails
console.print()
console.print(f"[bold {COLORS['primary']}]{title}[/bold {COLORS['primary']}]")
console.print(diff)
console.print()
def show_interactive_help() -> None:
"""대화형 세션 중 사용할 수 있는 명령을 표시합니다."""
console.print()
console.print()
console.print("[bold]대화형 명령:[/bold]", style=COLORS["primary"])
console.print()
for cmd, desc in COMMANDS.items():
console.print(f" /{cmd:<12} {desc}", style=COLORS["dim"])
console.print()
console.print("[bold]편집 기능:[/bold]", style=COLORS["primary"])
console.print(" Enter 메시지 제출", style=COLORS["dim"])
console.print(
" Alt+Enter 줄바꿈 삽입 (Mac의 경우 Option+Enter, 또는 ESC 후 Enter)",
style=COLORS["dim"],
)
console.print(" Ctrl+E 외부 편집기에서 열기 (기본값 nano)", style=COLORS["dim"])
console.print(" Ctrl+T 자동 승인 모드 전환", style=COLORS["dim"])
console.print(" 방향키 입력 탐색", style=COLORS["dim"])
console.print(" Ctrl+C 입력 취소 또는 작업 중인 에이전트 중단", style=COLORS["dim"])
console.print()
console.print("[bold]특수 기능:[/bold]", style=COLORS["primary"])
console.print(" @filename @를 입력하여 파일 자동 완성 및 콘텐츠 주입", style=COLORS["dim"])
console.print(" /command /를 입력하여 사용 가능한 명령 확인", style=COLORS["dim"])
console.print(
" !command !를 입력하여 bash 명령 실행 (예: !ls, !git status)",
style=COLORS["dim"],
)
console.print(" 입력하면 완성이 자동으로 나타납니다", style=COLORS["dim"])
console.print()
console.print("[bold]자동 승인 모드:[/bold]", style=COLORS["primary"])
console.print(" Ctrl+T 자동 승인 모드 전환", style=COLORS["dim"])
console.print(
" --auto-approve 자동 승인이 활성화된 상태로 CLI 시작 (명령줄을 통해)",
style=COLORS["dim"],
)
console.print(" 활성화되면 도구 작업이 확인 프롬프트 없이 실행됩니다", style=COLORS["dim"])
console.print()
def show_help() -> None:
"""도움말 정보를 표시합니다."""
console.print()
console.print(DEEP_AGENTS_ASCII, style=f"bold {COLORS['primary']}")
console.print()
console.print("[bold]사용법:[/bold]", style=COLORS["primary"])
console.print(" deepagents [OPTIONS] 대화형 세션 시작")
console.print(" deepagents list 사용 가능한 모든 에이전트 나열")
console.print(" deepagents reset --agent AGENT 에이전트를 기본 프롬프트로 초기화")
console.print(" deepagents reset --agent AGENT --target SOURCE 에이전트를 다른 에이전트의 복사본으로 초기화")
console.print(" deepagents help 이 도움말 메시지 표시")
console.print()
console.print("[bold]옵션:[/bold]", style=COLORS["primary"])
console.print(" --agent NAME 에이전트 식별자 (기본값: agent)")
console.print(" --model MODEL 사용할 모델 (예: claude-sonnet-4-5-20250929, gpt-4o)")
console.print(" --auto-approve 프롬프트 없이 도구 사용 자동 승인")
console.print(" --sandbox TYPE 실행을 위한 원격 샌드박스 (modal, runloop, daytona)")
console.print(" --sandbox-id ID 기존 샌드박스 재사용 (생성/정리 건너뜀)")
console.print()
console.print("[bold]예시:[/bold]", style=COLORS["primary"])
console.print(" deepagents # 기본 에이전트로 시작", style=COLORS["dim"])
console.print(
" deepagents --agent mybot # 'mybot'이라는 이름의 에이전트로 시작",
style=COLORS["dim"],
)
console.print(
" deepagents --model gpt-4o # 특정 모델 사용 (공급자 자동 감지)",
style=COLORS["dim"],
)
console.print(
" deepagents --auto-approve # 자동 승인이 활성화된 상태로 시작",
style=COLORS["dim"],
)
console.print(
" deepagents --sandbox runloop # Runloop 샌드박스에서 코드 실행",
style=COLORS["dim"],
)
console.print(
" deepagents --sandbox modal # Modal 샌드박스에서 코드 실행",
style=COLORS["dim"],
)
console.print(
" deepagents --sandbox runloop --sandbox-id dbx_123 # 기존 샌드박스 재사용",
style=COLORS["dim"],
)
console.print(" deepagents list # 모든 에이전트 나열", style=COLORS["dim"])
console.print(" deepagents reset --agent mybot # mybot을 기본값으로 초기화", style=COLORS["dim"])
console.print(
" deepagents reset --agent mybot --target other # mybot을 'other' 에이전트의 복사본으로 초기화",
style=COLORS["dim"],
)
console.print()
console.print("[bold]장기 기억(Long-term Memory):[/bold]", style=COLORS["primary"])
console.print(" 기본적으로 장기 기억은 'agent'라는 에이전트 이름을 사용하여 활성화됩니다.", style=COLORS["dim"])
console.print(" 기억에는 다음이 포함됩니다:", style=COLORS["dim"])
console.print(" - 지침이 포함된 영구 agent.md 파일", style=COLORS["dim"])
console.print(" - 세션 간 컨텍스트 저장을 위한 /memories/ 폴더", style=COLORS["dim"])
console.print()
console.print("[bold]에이전트 저장소:[/bold]", style=COLORS["primary"])
console.print(" 에이전트는 다음 경로에 저장됩니다: ~/.deepagents/AGENT_NAME/", style=COLORS["dim"])
console.print(" 각 에이전트에는 프롬프트가 포함된 agent.md 파일이 있습니다", style=COLORS["dim"])
console.print()
console.print("[bold]대화형 기능:[/bold]", style=COLORS["primary"])
console.print(" Enter 메시지 제출", style=COLORS["dim"])
console.print(
" Alt+Enter 여러 줄 입력을 위한 줄바꿈 (Option+Enter 또는 ESC 후 Enter)",
style=COLORS["dim"],
)
console.print(" Ctrl+J 줄바꿈 삽입 (대안)", style=COLORS["dim"])
console.print(" Ctrl+T 자동 승인 모드 전환", style=COLORS["dim"])
console.print(" 방향키 입력 탐색", style=COLORS["dim"])
console.print(" @filename @를 입력하여 파일 자동 완성 및 콘텐츠 주입", style=COLORS["dim"])
console.print(" /command /를 입력하여 사용 가능한 명령 확인 (자동 완성)", style=COLORS["dim"])
console.print()
console.print("[bold]대화형 명령:[/bold]", style=COLORS["primary"])
console.print(" /help 사용 가능한 명령 및 기능 표시", style=COLORS["dim"])
console.print(" /clear 화면 지우기 및 대화 초기화", style=COLORS["dim"])
console.print(" /tokens 현재 세션의 토큰 사용량 표시", style=COLORS["dim"])
console.print(" /quit, /exit 세션 종료", style=COLORS["dim"])
console.print(" quit, exit, q 세션 종료 (입력하고 Enter 누름)", style=COLORS["dim"])
console.print()

View File

@@ -0,0 +1,102 @@
---
name: arxiv-search
description: Search arXiv preprint repository for papers in physics, mathematics, computer science, quantitative biology, and related fields
---
# arXiv Search Skill
This skill provides access to arXiv, a free distribution service and open-access archive for scholarly articles in physics, mathematics, computer science, quantitative biology, quantitative finance, statistics, electrical engineering, systems science, and economics.
## When to Use This Skill
Use this skill when you need to:
- Find preprints and recent research papers before journal publication
- Search for papers in computational biology, bioinformatics, or systems biology
- Access mathematical or statistical methods papers relevant to biology
- Find machine learning papers applied to biological problems
- Get the latest research that may not yet be in PubMed
## How to Use
The skill provides a Python script that searches arXiv and returns formatted results.
### Basic Usage
**Note:** Always use the absolute path from your skills directory (shown in the system prompt above).
If running deepagents from a virtual environment:
```bash
.venv/bin/python [YOUR_SKILLS_DIR]/arxiv-search/arxiv_search.py "your search query" [--max-papers N]
```
Or for system Python:
```bash
python3 [YOUR_SKILLS_DIR]/arxiv-search/arxiv_search.py "your search query" [--max-papers N]
```
Replace `[YOUR_SKILLS_DIR]` with the absolute skills directory path from your system prompt (e.g., `~/.deepagents/agent/skills` or the full absolute path).
**Arguments:**
- `query` (required): The search query string (e.g., "neural networks protein structure", "single cell RNA-seq")
- `--max-papers` (optional): Maximum number of papers to retrieve (default: 10)
### Examples
Search for machine learning papers:
```bash
.venv/bin/python ~/.deepagents/agent/skills/arxiv-search/arxiv_search.py "deep learning drug discovery" --max-papers 5
```
Search for computational biology papers:
```bash
.venv/bin/python ~/.deepagents/agent/skills/arxiv-search/arxiv_search.py "protein folding prediction"
```
Search for bioinformatics methods:
```bash
.venv/bin/python ~/.deepagents/agent/skills/arxiv-search/arxiv_search.py "genome assembly algorithms"
```
## Output Format
The script returns formatted results with:
- **Title**: Paper title
- **Summary**: Abstract/summary text
Each paper is separated by blank lines for readability.
## Features
- **Relevance sorting**: Results ordered by relevance to query
- **Fast retrieval**: Direct API access with no authentication required
- **Simple interface**: Clean, easy-to-parse output
- **No API key required**: Free access to arXiv database
## Dependencies
This skill requires the `arxiv` Python package. The script will detect if it's missing and show an error.
**If you see "Error: arxiv package not installed":**
If running deepagents from a virtual environment (recommended), use the venv's Python:
```bash
.venv/bin/python -m pip install arxiv
```
Or for system-wide install:
```bash
python3 -m pip install arxiv
```
The package is not included in deepagents by default since it's skill-specific. Install it on-demand when first using this skill.
## Notes
- arXiv is particularly strong for:
- Computer science (cs.LG, cs.AI, cs.CV)
- Quantitative biology (q-bio)
- Statistics (stat.ML)
- Physics and mathematics
- Papers are preprints and may not be peer-reviewed
- Results include both recent uploads and older papers
- Best for computational/theoretical work in biology

View File

@@ -0,0 +1,57 @@
#!/usr/bin/env python3
"""arXiv Search.
Searches the arXiv preprint repository for research papers.
"""
import argparse
def query_arxiv(query: str, max_papers: int = 10) -> str:
"""Query arXiv for papers based on the provided search query.
Parameters
----------
query : str
The search query string.
max_papers : int
The maximum number of papers to retrieve (default: 10).
Returns:
The formatted search results or an error message.
"""
try:
import arxiv
except ImportError:
return "Error: arxiv package not installed. Install with: pip install arxiv"
try:
client = arxiv.Client()
search = arxiv.Search(
query=query, max_results=max_papers, sort_by=arxiv.SortCriterion.Relevance
)
results = "\n\n".join(
[f"Title: {paper.title}\nSummary: {paper.summary}" for paper in client.results(search)]
)
return results if results else "No papers found on arXiv."
except Exception as e:
return f"Error querying arXiv: {e}"
def main() -> None:
parser = argparse.ArgumentParser(description="Search arXiv for research papers")
parser.add_argument("query", type=str, help="Search query string")
parser.add_argument(
"--max-papers",
type=int,
default=10,
help="Maximum number of papers to retrieve (default: 10)",
)
args = parser.parse_args()
query_arxiv(args.query, max_papers=args.max_papers)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,35 @@
---
name: langgraph-docs
description: Use this skill for requests related to LangGraph in order to fetch relevant documentation to provide accurate, up-to-date guidance.
---
# langgraph-docs
## Overview
This skill explains how to access LangGraph Python documentation to help answer questions and guide implementation.
## Instructions
### 1. Fetch the Documentation Index
Use the fetch_url tool to read the following URL:
https://docs.langchain.com/llms.txt
This provides a structured list of all available documentation with descriptions.
### 2. Select Relevant Documentation
Based on the question, identify 2-4 most relevant documentation URLs from the index. Prioritize:
- Specific how-to guides for implementation questions
- Core concept pages for understanding questions
- Tutorials for end-to-end examples
- Reference docs for API details
### 3. Fetch Selected Documentation
Use the fetch_url tool to read the selected documentation URLs.
### 4. Provide Accurate Guidance
After reading the documentation, complete the users request.

View File

@@ -0,0 +1,365 @@
---
name: skill-creator
description: Guide for creating effective skills that extend agent capabilities with specialized knowledge, workflows, or tool integrations. Use this skill when the user asks to: (1) create a new skill, (2) make a skill, (3) build a skill, (4) set up a skill, (5) initialize a skill, (6) scaffold a skill, (7) update or modify an existing skill, (8) validate a skill, (9) learn about skill structure, (10) understand how skills work, or (11) get guidance on skill design patterns. Trigger on phrases like "create a skill", "new skill", "make a skill", "skill for X", "how do I create a skill", or "help me build a skill".
---
# Skill Creator
This skill provides guidance for creating effective skills.
## About Skills
Skills are modular, self-contained packages that extend agent capabilities by providing
specialized knowledge, workflows, and tools. Think of them as "onboarding guides" for specific
domains or tasks—they transform a general-purpose agent into a specialized agent
equipped with procedural knowledge and domain expertise.
### Skill Location for Deepagents
In deepagents CLI, skills are stored in `~/.deepagents/<agent>/skills/` where `<agent>` is your agent configuration name (default is `agent`). For example, with the default configuration, skills live at:
```
~/.deepagents/agent/skills/
├── skill-name-1/
│ └── SKILL.md
├── skill-name-2/
│ └── SKILL.md
└── ...
```
### What Skills Provide
1. Specialized workflows - Multi-step procedures for specific domains
2. Tool integrations - Instructions for working with specific file formats or APIs
3. Domain expertise - Company-specific knowledge, schemas, business logic
4. Bundled resources - Scripts, references, and assets for complex and repetitive tasks
## Core Principles
### Concise is Key
The context window is a public good. Skills share the context window with everything else the agent needs: system prompt, conversation history, other Skills' metadata, and the actual user request.
**Default assumption: The agent is already very capable.** Only add context the agent doesn't already have. Challenge each piece of information: "Does the agent really need this explanation?" and "Does this paragraph justify its token cost?"
Prefer concise examples over verbose explanations.
### Set Appropriate Degrees of Freedom
Match the level of specificity to the task's fragility and variability:
**High freedom (text-based instructions)**: Use when multiple approaches are valid, decisions depend on context, or heuristics guide the approach.
**Medium freedom (pseudocode or scripts with parameters)**: Use when a preferred pattern exists, some variation is acceptable, or configuration affects behavior.
**Low freedom (specific scripts, few parameters)**: Use when operations are fragile and error-prone, consistency is critical, or a specific sequence must be followed.
Think of the agent as exploring a path: a narrow bridge with cliffs needs specific guardrails (low freedom), while an open field allows many routes (high freedom).
### Anatomy of a Skill
Every skill consists of a required SKILL.md file and optional bundled resources:
```
skill-name/
├── SKILL.md (required)
│ ├── YAML frontmatter metadata (required)
│ │ ├── name: (required)
│ │ └── description: (required)
│ └── Markdown instructions (required)
└── Bundled Resources (optional)
├── scripts/ - Executable code (Python/Bash/etc.)
├── references/ - Documentation intended to be loaded into context as needed
└── assets/ - Files used in output (templates, icons, fonts, etc.)
```
#### SKILL.md (required)
Every SKILL.md consists of:
- **Frontmatter** (YAML): Contains `name` and `description` fields. These are the only fields that the agent reads to determine when the skill gets used, thus it is very important to be clear and comprehensive in describing what the skill is, and when it should be used.
- **Body** (Markdown): Instructions and guidance for using the skill. Only loaded AFTER the skill triggers (if at all).
#### Bundled Resources (optional)
##### Scripts (`scripts/`)
Executable code (Python/Bash/etc.) for tasks that require deterministic reliability or are repeatedly rewritten.
- **When to include**: When the same code is being rewritten repeatedly or deterministic reliability is needed
- **Example**: `scripts/rotate_pdf.py` for PDF rotation tasks
- **Benefits**: Token efficient, deterministic, may be executed without loading into context
- **Note**: Scripts may still need to be read by the agent for patching or environment-specific adjustments
##### References (`references/`)
Documentation and reference material intended to be loaded as needed into context to inform the agent's process and thinking.
- **When to include**: For documentation that the agent should reference while working
- **Examples**: `references/finance.md` for financial schemas, `references/mnda.md` for company NDA template, `references/policies.md` for company policies, `references/api_docs.md` for API specifications
- **Use cases**: Database schemas, API documentation, domain knowledge, company policies, detailed workflow guides
- **Benefits**: Keeps SKILL.md lean, loaded only when the agent determines it's needed
- **Best practice**: If files are large (>10k words), include search patterns in SKILL.md
- **Avoid duplication**: Information should live in either SKILL.md or references files, not both. Prefer references files for detailed information unless it's truly core to the skill—this keeps SKILL.md lean while making information discoverable without hogging the context window. Keep only essential procedural instructions and workflow guidance in SKILL.md; move detailed reference material, schemas, and examples to references files.
##### Assets (`assets/`)
Files not intended to be loaded into context, but rather used within the output the agent produces.
- **When to include**: When the skill needs files that will be used in the final output
- **Examples**: `assets/logo.png` for brand assets, `assets/slides.pptx` for PowerPoint templates, `assets/frontend-template/` for HTML/React boilerplate, `assets/font.ttf` for typography
- **Use cases**: Templates, images, icons, boilerplate code, fonts, sample documents that get copied or modified
- **Benefits**: Separates output resources from documentation, enables the agent to use files without loading them into context
#### What to Not Include in a Skill
A skill should only contain essential files that directly support its functionality. Do NOT create extraneous documentation or auxiliary files, including:
- README.md
- INSTALLATION_GUIDE.md
- QUICK_REFERENCE.md
- CHANGELOG.md
- etc.
The skill should only contain the information needed for an AI agent to do the job at hand. It should not contain auxilary context about the process that went into creating it, setup and testing procedures, user-facing documentation, etc. Creating additional documentation files just adds clutter and confusion.
### Progressive Disclosure Design Principle
Skills use a three-level loading system to manage context efficiently:
1. **Metadata (name + description)** - Always in context (~100 words)
2. **SKILL.md body** - When skill triggers (<5k words)
3. **Bundled resources** - As needed by the agent (Unlimited because scripts can be executed without reading into context window)
#### Progressive Disclosure Patterns
Keep SKILL.md body to the essentials and under 500 lines to minimize context bloat. Split content into separate files when approaching this limit. When splitting out content into other files, it is very important to reference them from SKILL.md and describe clearly when to read them, to ensure the reader of the skill knows they exist and when to use them.
**Key principle:** When a skill supports multiple variations, frameworks, or options, keep only the core workflow and selection guidance in SKILL.md. Move variant-specific details (patterns, examples, configuration) into separate reference files.
**Pattern 1: High-level guide with references**
```markdown
# PDF Processing
## Quick start
Extract text with pdfplumber:
[code example]
## Advanced features
- **Form filling**: See [FORMS.md](FORMS.md) for complete guide
- **API reference**: See [REFERENCE.md](REFERENCE.md) for all methods
- **Examples**: See [EXAMPLES.md](EXAMPLES.md) for common patterns
```
The agent loads FORMS.md, REFERENCE.md, or EXAMPLES.md only when needed.
**Pattern 2: Domain-specific organization**
For Skills with multiple domains, organize content by domain to avoid loading irrelevant context:
```
bigquery-skill/
├── SKILL.md (overview and navigation)
└── reference/
├── finance.md (revenue, billing metrics)
├── sales.md (opportunities, pipeline)
├── product.md (API usage, features)
└── marketing.md (campaigns, attribution)
```
When a user asks about sales metrics, the agent only reads sales.md.
Similarly, for skills supporting multiple frameworks or variants, organize by variant:
```
cloud-deploy/
├── SKILL.md (workflow + provider selection)
└── references/
├── aws.md (AWS deployment patterns)
├── gcp.md (GCP deployment patterns)
└── azure.md (Azure deployment patterns)
```
When the user chooses AWS, the agent only reads aws.md.
**Pattern 3: Conditional details**
Show basic content, link to advanced content:
```markdown
# DOCX Processing
## Creating documents
Use docx-js for new documents. See [DOCX-JS.md](DOCX-JS.md).
## Editing documents
For simple edits, modify the XML directly.
**For tracked changes**: See [REDLINING.md](REDLINING.md)
**For OOXML details**: See [OOXML.md](OOXML.md)
```
The agent reads REDLINING.md or OOXML.md only when the user needs those features.
**Important guidelines:**
- **Avoid deeply nested references** - Keep references one level deep from SKILL.md. All reference files should link directly from SKILL.md.
- **Structure longer reference files** - For files longer than 100 lines, include a table of contents at the top so the agent can see the full scope when previewing.
## Skill Creation Process
Skill creation involves these steps:
1. Understand the skill with concrete examples
2. Plan reusable skill contents (scripts, references, assets)
3. Initialize the skill (run init_skill.py)
4. Edit the skill (implement resources and write SKILL.md)
5. Validate the skill (run quick_validate.py)
6. Iterate based on real usage
Follow these steps in order, skipping only if there is a clear reason why they are not applicable.
### Step 1: Understanding the Skill with Concrete Examples
Skip this step only when the skill's usage patterns are already clearly understood. It remains valuable even when working with an existing skill.
To create an effective skill, clearly understand concrete examples of how the skill will be used. This understanding can come from either direct user examples or generated examples that are validated with user feedback.
For example, when building an image-editor skill, relevant questions include:
- "What functionality should the image-editor skill support? Editing, rotating, anything else?"
- "Can you give some examples of how this skill would be used?"
- "I can imagine users asking for things like 'Remove the red-eye from this image' or 'Rotate this image'. Are there other ways you imagine this skill being used?"
- "What would a user say that should trigger this skill?"
To avoid overwhelming users, avoid asking too many questions in a single message. Start with the most important questions and follow up as needed for better effectiveness.
Conclude this step when there is a clear sense of the functionality the skill should support.
### Step 2: Planning the Reusable Skill Contents
To turn concrete examples into an effective skill, analyze each example by:
1. Considering how to execute on the example from scratch
2. Identifying what scripts, references, and assets would be helpful when executing these workflows repeatedly
Example: When building a `pdf-editor` skill to handle queries like "Help me rotate this PDF," the analysis shows:
1. Rotating a PDF requires re-writing the same code each time
2. A `scripts/rotate_pdf.py` script would be helpful to store in the skill
Example: When designing a `frontend-webapp-builder` skill for queries like "Build me a todo app" or "Build me a dashboard to track my steps," the analysis shows:
1. Writing a frontend webapp requires the same boilerplate HTML/React each time
2. An `assets/hello-world/` template containing the boilerplate HTML/React project files would be helpful to store in the skill
Example: When building a `big-query` skill to handle queries like "How many users have logged in today?" the analysis shows:
1. Querying BigQuery requires re-discovering the table schemas and relationships each time
2. A `references/schema.md` file documenting the table schemas would be helpful to store in the skill
To establish the skill's contents, analyze each concrete example to create a list of the reusable resources to include: scripts, references, and assets.
### Step 3: Initializing the Skill
At this point, it is time to actually create the skill.
Skip this step only if the skill being developed already exists, and iteration or packaging is needed. In this case, continue to the next step.
When creating a new skill from scratch, always run the `init_skill.py` script. The script conveniently generates a new template skill directory that automatically includes everything a skill requires, making the skill creation process much more efficient and reliable.
Usage:
```bash
scripts/init_skill.py <skill-name> --path <output-directory>
```
For deepagents CLI, use the agent's skills directory:
```bash
scripts/init_skill.py <skill-name> --path ~/.deepagents/agent/skills
```
The script:
- Creates the skill directory at the specified path
- Generates a SKILL.md template with proper frontmatter and TODO placeholders
- Creates example resource directories: `scripts/`, `references/`, and `assets/`
- Adds example files in each directory that can be customized or deleted
After initialization, customize or remove the generated SKILL.md and example files as needed.
### Step 4: Edit the Skill
When editing the (newly-generated or existing) skill, remember that the skill is being created for an agent to use. Include information that would be beneficial and non-obvious to the agent. Consider what procedural knowledge, domain-specific details, or reusable assets would help the agent execute these tasks more effectively.
#### Learn Proven Design Patterns
Consult these helpful guides based on your skill's needs:
- **Multi-step processes**: See references/workflows.md for sequential workflows and conditional logic
- **Specific output formats or quality standards**: See references/output-patterns.md for template and example patterns
These files contain established best practices for effective skill design.
#### Start with Reusable Skill Contents
To begin implementation, start with the reusable resources identified above: `scripts/`, `references/`, and `assets/` files. Note that this step may require user input. For example, when implementing a `brand-guidelines` skill, the user may need to provide brand assets or templates to store in `assets/`, or documentation to store in `references/`.
Added scripts must be tested by actually running them to ensure there are no bugs and that the output matches what is expected. If there are many similar scripts, only a representative sample needs to be tested to ensure confidence that they all work while balancing time to completion.
Any example files and directories not needed for the skill should be deleted. The initialization script creates example files in `scripts/`, `references/`, and `assets/` to demonstrate structure, but most skills won't need all of them.
#### Update SKILL.md
**Writing Guidelines:** Always use imperative/infinitive form.
##### Frontmatter
Write the YAML frontmatter with `name` and `description`:
- `name`: The skill name
- `description`: This is the primary triggering mechanism for your skill, and helps the agent understand when to use the skill.
- Include both what the Skill does and specific triggers/contexts for when to use it.
- Include all "when to use" information here - Not in the body. The body is only loaded after triggering, so "When to Use This Skill" sections in the body are not helpful to the agent.
- Example description for a `docx` skill: "Comprehensive document creation, editing, and analysis with support for tracked changes, comments, formatting preservation, and text extraction. Use when working with professional documents (.docx files) for: (1) Creating new documents, (2) Modifying or editing content, (3) Working with tracked changes, (4) Adding comments, or any other document tasks"
Do not include any other fields in YAML frontmatter.
##### Body
Write instructions for using the skill and its bundled resources.
### Step 5: Validate the Skill
Once development of the skill is complete, validate it to ensure it meets all requirements:
```bash
scripts/quick_validate.py <path/to/skill-folder>
```
The validation script checks:
- YAML frontmatter format and required fields
- Skill naming conventions (hyphen-case, max 64 characters)
- Description completeness (no angle brackets, max 1024 characters)
- Required fields: `name` and `description`
- Allowed frontmatter properties only: `name`, `description`, `license`, `allowed-tools`, `metadata`
If validation fails, fix the reported errors and run the validation command again.
### Step 6: Iterate
After testing the skill, users may request improvements. Often this happens right after using the skill, with fresh context of how the skill performed.
**Iteration workflow:**
1. Use the skill on real tasks
2. Notice struggles or inefficiencies
3. Identify how SKILL.md or bundled resources should be updated
4. Implement changes and test again

View File

@@ -0,0 +1,308 @@
#!/usr/bin/env python3
"""
Skill Initializer - Creates a new skill from template
Usage:
init_skill.py <skill-name> --path <path>
Examples:
init_skill.py my-new-skill --path skills/public
init_skill.py my-api-helper --path skills/private
init_skill.py custom-skill --path /custom/location
For deepagents CLI:
init_skill.py my-skill --path ~/.deepagents/agent/skills
"""
import sys
from pathlib import Path
SKILL_TEMPLATE = """---
name: {skill_name}
description: [TODO: Complete and informative explanation of what the skill does and when to use it. Include WHEN to use this skill - specific scenarios, file types, or tasks that trigger it.]
---
# {skill_title}
## Overview
[TODO: 1-2 sentences explaining what this skill enables]
## Structuring This Skill
[TODO: Choose the structure that best fits this skill's purpose. Common patterns:
**1. Workflow-Based** (best for sequential processes)
- Works well when there are clear step-by-step procedures
- Example: DOCX skill with "Workflow Decision Tree""Reading""Creating""Editing"
- Structure: ## Overview → ## Workflow Decision Tree → ## Step 1 → ## Step 2...
**2. Task-Based** (best for tool collections)
- Works well when the skill offers different operations/capabilities
- Example: PDF skill with "Quick Start""Merge PDFs""Split PDFs""Extract Text"
- Structure: ## Overview → ## Quick Start → ## Task Category 1 → ## Task Category 2...
**3. Reference/Guidelines** (best for standards or specifications)
- Works well for brand guidelines, coding standards, or requirements
- Example: Brand styling with "Brand Guidelines""Colors""Typography""Features"
- Structure: ## Overview → ## Guidelines → ## Specifications → ## Usage...
**4. Capabilities-Based** (best for integrated systems)
- Works well when the skill provides multiple interrelated features
- Example: Product Management with "Core Capabilities" → numbered capability list
- Structure: ## Overview → ## Core Capabilities → ### 1. Feature → ### 2. Feature...
Patterns can be mixed and matched as needed. Most skills combine patterns (e.g., start with task-based, add workflow for complex operations).
Delete this entire "Structuring This Skill" section when done - it's just guidance.]
## [TODO: Replace with the first main section based on chosen structure]
[TODO: Add content here. See examples in existing skills:
- Code samples for technical skills
- Decision trees for complex workflows
- Concrete examples with realistic user requests
- References to scripts/templates/references as needed]
## Resources
This skill includes example resource directories that demonstrate how to organize different types of bundled resources:
### scripts/
Executable code (Python/Bash/etc.) that can be run directly to perform specific operations.
**Examples from other skills:**
- PDF skill: `fill_fillable_fields.py`, `extract_form_field_info.py` - utilities for PDF manipulation
- DOCX skill: `document.py`, `utilities.py` - Python modules for document processing
**Appropriate for:** Python scripts, shell scripts, or any executable code that performs automation, data processing, or specific operations.
**Note:** Scripts may be executed without loading into context, but can still be read by Claude for patching or environment adjustments.
### references/
Documentation and reference material intended to be loaded into context to inform Claude's process and thinking.
**Examples from other skills:**
- Product management: `communication.md`, `context_building.md` - detailed workflow guides
- BigQuery: API reference documentation and query examples
- Finance: Schema documentation, company policies
**Appropriate for:** In-depth documentation, API references, database schemas, comprehensive guides, or any detailed information that Claude should reference while working.
### assets/
Files not intended to be loaded into context, but rather used within the output Claude produces.
**Examples from other skills:**
- Brand styling: PowerPoint template files (.pptx), logo files
- Frontend builder: HTML/React boilerplate project directories
- Typography: Font files (.ttf, .woff2)
**Appropriate for:** Templates, boilerplate code, document templates, images, icons, fonts, or any files meant to be copied or used in the final output.
---
**Any unneeded directories can be deleted.** Not every skill requires all three types of resources.
"""
EXAMPLE_SCRIPT = '''#!/usr/bin/env python3
"""
Example helper script for {skill_name}
This is a placeholder script that can be executed directly.
Replace with actual implementation or delete if not needed.
Example real scripts from other skills:
- pdf/scripts/fill_fillable_fields.py - Fills PDF form fields
- pdf/scripts/convert_pdf_to_images.py - Converts PDF pages to images
"""
def main():
print("This is an example script for {skill_name}")
# TODO: Add actual script logic here
# This could be data processing, file conversion, API calls, etc.
if __name__ == "__main__":
main()
'''
EXAMPLE_REFERENCE = """# Reference Documentation for {skill_title}
This is a placeholder for detailed reference documentation.
Replace with actual reference content or delete if not needed.
Example real reference docs from other skills:
- product-management/references/communication.md - Comprehensive guide for status updates
- product-management/references/context_building.md - Deep-dive on gathering context
- bigquery/references/ - API references and query examples
## When Reference Docs Are Useful
Reference docs are ideal for:
- Comprehensive API documentation
- Detailed workflow guides
- Complex multi-step processes
- Information too lengthy for main SKILL.md
- Content that's only needed for specific use cases
## Structure Suggestions
### API Reference Example
- Overview
- Authentication
- Endpoints with examples
- Error codes
- Rate limits
### Workflow Guide Example
- Prerequisites
- Step-by-step instructions
- Common patterns
- Troubleshooting
- Best practices
"""
EXAMPLE_ASSET = """# Example Asset File
This placeholder represents where asset files would be stored.
Replace with actual asset files (templates, images, fonts, etc.) or delete if not needed.
Asset files are NOT intended to be loaded into context, but rather used within
the output Claude produces.
Example asset files from other skills:
- Brand guidelines: logo.png, slides_template.pptx
- Frontend builder: hello-world/ directory with HTML/React boilerplate
- Typography: custom-font.ttf, font-family.woff2
- Data: sample_data.csv, test_dataset.json
## Common Asset Types
- Templates: .pptx, .docx, boilerplate directories
- Images: .png, .jpg, .svg, .gif
- Fonts: .ttf, .otf, .woff, .woff2
- Boilerplate code: Project directories, starter files
- Icons: .ico, .svg
- Data files: .csv, .json, .xml, .yaml
Note: This is a text placeholder. Actual assets can be any file type.
"""
def title_case_skill_name(skill_name):
"""Convert hyphenated skill name to Title Case for display."""
return ' '.join(word.capitalize() for word in skill_name.split('-'))
def init_skill(skill_name, path):
"""
Initialize a new skill directory with template SKILL.md.
Args:
skill_name: Name of the skill
path: Path where the skill directory should be created
Returns:
Path to created skill directory, or None if error
"""
# Determine skill directory path
skill_dir = Path(path).resolve() / skill_name
# Check if directory already exists
if skill_dir.exists():
print(f"❌ Error: Skill directory already exists: {skill_dir}")
return None
# Create skill directory
try:
skill_dir.mkdir(parents=True, exist_ok=False)
print(f"✅ Created skill directory: {skill_dir}")
except Exception as e:
print(f"❌ Error creating directory: {e}")
return None
# Create SKILL.md from template
skill_title = title_case_skill_name(skill_name)
skill_content = SKILL_TEMPLATE.format(
skill_name=skill_name,
skill_title=skill_title
)
skill_md_path = skill_dir / 'SKILL.md'
try:
skill_md_path.write_text(skill_content)
print("✅ Created SKILL.md")
except Exception as e:
print(f"❌ Error creating SKILL.md: {e}")
return None
# Create resource directories with example files
try:
# Create scripts/ directory with example script
scripts_dir = skill_dir / 'scripts'
scripts_dir.mkdir(exist_ok=True)
example_script = scripts_dir / 'example.py'
example_script.write_text(EXAMPLE_SCRIPT.format(skill_name=skill_name))
example_script.chmod(0o755)
print("✅ Created scripts/example.py")
# Create references/ directory with example reference doc
references_dir = skill_dir / 'references'
references_dir.mkdir(exist_ok=True)
example_reference = references_dir / 'api_reference.md'
example_reference.write_text(EXAMPLE_REFERENCE.format(skill_title=skill_title))
print("✅ Created references/api_reference.md")
# Create assets/ directory with example asset placeholder
assets_dir = skill_dir / 'assets'
assets_dir.mkdir(exist_ok=True)
example_asset = assets_dir / 'example_asset.txt'
example_asset.write_text(EXAMPLE_ASSET)
print("✅ Created assets/example_asset.txt")
except Exception as e:
print(f"❌ Error creating resource directories: {e}")
return None
# Print next steps
print(f"\n✅ Skill '{skill_name}' initialized successfully at {skill_dir}")
print("\nNext steps:")
print("1. Edit SKILL.md to complete the TODO items and update the description")
print("2. Customize or delete the example files in scripts/, references/, and assets/")
print("3. Run the validator when ready to check the skill structure")
return skill_dir
def main():
if len(sys.argv) < 4 or sys.argv[2] != '--path':
print("Usage: init_skill.py <skill-name> --path <path>")
print("\nSkill name requirements:")
print(" - Hyphen-case identifier (e.g., 'data-analyzer')")
print(" - Lowercase letters, digits, and hyphens only")
print(" - Max 40 characters")
print(" - Must match directory name exactly")
print("\nExamples:")
print(" init_skill.py my-new-skill --path skills/public")
print(" init_skill.py my-api-helper --path skills/private")
print(" init_skill.py custom-skill --path /custom/location")
print("\nFor deepagents CLI:")
print(" init_skill.py my-skill --path ~/.deepagents/agent/skills")
sys.exit(1)
skill_name = sys.argv[1]
path = sys.argv[3]
print(f"🚀 Initializing skill: {skill_name}")
print(f" Location: {path}")
print()
result = init_skill(skill_name, path)
if result:
sys.exit(0)
else:
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,101 @@
#!/usr/bin/env python3
"""
Quick validation script for skills - minimal version
For deepagents CLI, skills are located at:
~/.deepagents/<agent>/skills/<skill-name>/
Example:
python quick_validate.py ~/.deepagents/agent/skills/my-skill
"""
import sys
import os
import re
import yaml
from pathlib import Path
def validate_skill(skill_path):
"""Basic validation of a skill"""
skill_path = Path(skill_path)
# Check SKILL.md exists
skill_md = skill_path / 'SKILL.md'
if not skill_md.exists():
return False, "SKILL.md not found"
# Read and validate frontmatter
content = skill_md.read_text()
if not content.startswith('---'):
return False, "No YAML frontmatter found"
# Extract frontmatter
match = re.match(r'^---\n(.*?)\n---', content, re.DOTALL)
if not match:
return False, "Invalid frontmatter format"
frontmatter_text = match.group(1)
# Parse YAML frontmatter
try:
frontmatter = yaml.safe_load(frontmatter_text)
if not isinstance(frontmatter, dict):
return False, "Frontmatter must be a YAML dictionary"
except yaml.YAMLError as e:
return False, f"Invalid YAML in frontmatter: {e}"
# Define allowed properties
ALLOWED_PROPERTIES = {'name', 'description', 'license', 'allowed-tools', 'metadata'}
# Check for unexpected properties (excluding nested keys under metadata)
unexpected_keys = set(frontmatter.keys()) - ALLOWED_PROPERTIES
if unexpected_keys:
return False, (
f"Unexpected key(s) in SKILL.md frontmatter: {', '.join(sorted(unexpected_keys))}. "
f"Allowed properties are: {', '.join(sorted(ALLOWED_PROPERTIES))}"
)
# Check required fields
if 'name' not in frontmatter:
return False, "Missing 'name' in frontmatter"
if 'description' not in frontmatter:
return False, "Missing 'description' in frontmatter"
# Extract name for validation
name = frontmatter.get('name', '')
if not isinstance(name, str):
return False, f"Name must be a string, got {type(name).__name__}"
name = name.strip()
if name:
# Check naming convention (hyphen-case: lowercase with hyphens)
if not re.match(r'^[a-z0-9-]+$', name):
return False, f"Name '{name}' should be hyphen-case (lowercase letters, digits, and hyphens only)"
if name.startswith('-') or name.endswith('-') or '--' in name:
return False, f"Name '{name}' cannot start/end with hyphen or contain consecutive hyphens"
# Check name length (max 64 characters per spec)
if len(name) > 64:
return False, f"Name is too long ({len(name)} characters). Maximum is 64 characters."
# Extract and validate description
description = frontmatter.get('description', '')
if not isinstance(description, str):
return False, f"Description must be a string, got {type(description).__name__}"
description = description.strip()
if description:
# Check for angle brackets
if '<' in description or '>' in description:
return False, "Description cannot contain angle brackets (< or >)"
# Check description length (max 1024 characters per spec)
if len(description) > 1024:
return False, f"Description is too long ({len(description)} characters). Maximum is 1024 characters."
return True, "Skill is valid!"
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python quick_validate.py <skill_directory>")
sys.exit(1)
valid, message = validate_skill(sys.argv[1])
print(message)
sys.exit(0 if valid else 1)

View File

@@ -0,0 +1,102 @@
---
name: web-research
description: Use this skill for requests related to web research; it provides a structured approach to conducting comprehensive web research
---
# Web Research Skill
This skill provides a structured approach to conducting comprehensive web research using the `task` tool to spawn research subagents. It emphasizes planning, efficient delegation, and systematic synthesis of findings.
## When to Use This Skill
Use this skill when you need to:
- Research complex topics requiring multiple information sources
- Gather and synthesize current information from the web
- Conduct comparative analysis across multiple subjects
- Produce well-sourced research reports with clear citations
## Research Process
### Step 1: Create and Save Research Plan
Before delegating to subagents, you MUST:
1. **Create a research folder** - Organize all research files in a dedicated folder relative to the current working directory:
```
mkdir research_[topic_name]
```
This keeps files organized and prevents clutter in the working directory.
2. **Analyze the research question** - Break it down into distinct, non-overlapping subtopics
3. **Write a research plan file** - Use the `write_file` tool to create `research_[topic_name]/research_plan.md` containing:
- The main research question
- 2-5 specific subtopics to investigate
- Expected information from each subtopic
- How results will be synthesized
**Planning Guidelines:**
- **Simple fact-finding**: 1-2 subtopics
- **Comparative analysis**: 1 subtopic per comparison element (max 3)
- **Complex investigations**: 3-5 subtopics
### Step 2: Delegate to Research Subagents
For each subtopic in your plan:
1. **Use the `task` tool** to spawn a research subagent with:
- Clear, specific research question (no acronyms)
- Instructions to write findings to a file: `research_[topic_name]/findings_[subtopic].md`
- Budget: 3-5 web searches maximum
2. **Run up to 3 subagents in parallel** for efficient research
**Subagent Instructions Template:**
```
Research [SPECIFIC TOPIC]. Use the web_search tool to gather information.
After completing your research, use write_file to save your findings to research_[topic_name]/findings_[subtopic].md.
Include key facts, relevant quotes, and source URLs.
Use 3-5 web searches maximum.
```
### Step 3: Synthesize Findings
After all subagents complete:
1. **Review the findings files** that were saved locally:
- First run `list_files research_[topic_name]` to see what files were created
- Then use `read_file` with the **file paths** (e.g., `research_[topic_name]/findings_*.md`)
- **Important**: Use `read_file` for LOCAL files only, not URLs
2. **Synthesize the information** - Create a comprehensive response that:
- Directly answers the original question
- Integrates insights from all subtopics
- Cites specific sources with URLs (from the findings files)
- Identifies any gaps or limitations
3. **Write final report** (optional) - Use `write_file` to create `research_[topic_name]/research_report.md` if requested
**Note**: If you need to fetch additional information from URLs, use the `fetch_url` tool, not `read_file`.
## Available Tools
You have access to:
- **write_file**: Save research plans and findings to local files
- **read_file**: Read local files (e.g., findings saved by subagents)
- **list_files**: See what local files exist in a directory
- **fetch_url**: Fetch content from URLs and convert to markdown (use this for web pages, not read_file)
- **task**: Spawn research subagents with web_search access
## Research Subagent Configuration
Each subagent you spawn will have access to:
- **web_search**: Search the web using Tavily (parameters: query, max_results, topic, include_raw_content)
- **write_file**: Save their findings to the filesystem
## Best Practices
- **Plan before delegating** - Always write research_plan.md first
- **Clear subtopics** - Ensure each subagent has distinct, non-overlapping scope
- **File-based communication** - Have subagents save findings to files, not return them directly
- **Systematic synthesis** - Read all findings files before creating final response
- **Stop appropriately** - Don't over-research; 3-5 searches per subtopic is usually sufficient

View File

@@ -0,0 +1,123 @@
[project]
name = "deepagents-cli"
version = "0.0.12"
description = "Deepagents CLI"
readme = "README.md"
license = { text = "MIT" }
requires-python = ">=3.11,<4.0"
dependencies = [
"deepagents==0.2.8",
"requests",
"rich>=13.0.0",
"prompt-toolkit>=3.0.52",
"langchain-openai>=0.1.0",
"tavily-python",
"python-dotenv",
"daytona>=0.113.0",
"modal>=0.65.0",
"markdownify>=0.13.0",
"langchain>=1.0.7",
"runloop-api-client>=0.69.0",
"pillow>=10.0.0",
"pyyaml>=6.0",
]
[project.scripts]
deepagents = "deepagents_cli:cli_main"
deepagents-cli = "deepagents_cli:cli_main"
[dependency-groups]
test = [
"pytest>=8.3.4",
"pytest-asyncio>=0.25.3",
"pytest-cov>=6.0.0",
"pytest-mock>=3.14.0",
"pytest-socket>=0.7.0",
"pytest-timeout>=2.3.1",
"responses>=0.25.0",
"ruff>=0.9.7",
]
dev = [
"pytest",
"pytest-cov",
"build",
"twine",
"langchain-openai",
"pytest-timeout>=2.4.0",
"pytest-socket>=0.7.0",
"pytest-asyncio>=1.2.0",
]
lint = [
"ruff",
"mypy"
]
[build-system]
requires = ["setuptools>=73.0.0", "wheel"]
build-backend = "setuptools.build_meta"
[tool.setuptools.package-data]
deepagents_cli = ["default_agent_prompt.md"]
[tool.ruff]
line-length = 100
exclude = []
[tool.ruff.format]
docstring-code-format = true # Formats code blocks in docstrings
[tool.ruff.lint]
select = [
"ALL" # Enable all rules by default
]
ignore = [
"COM812", # Messes with the formatter
"ISC001", # Messes with the formatter
"PERF203", # Rarely useful
"SLF001", # Private member access
"PLC0415", # Imports should be at the top. Not always desirable
"PLR0913", # Too many arguments in function definition
"PLC0414", # Inconsistent with how type checkers expect to be notified of intentional re-exports
"C901", # Too complex
]
unfixable = ["B028"] # Rules that shouldn't be auto-fixed
[tool.ruff.lint.pyupgrade]
keep-runtime-typing = true
[tool.ruff.lint.flake8-annotations]
allow-star-arg-any = true
[tool.ruff.lint.pydocstyle]
convention = "google" # Google-style docstrings
ignore-var-parameters = true
[tool.ruff.lint.per-file-ignores]
"deepagents_cli/cli.py" = [
"T201", # Allow print statements in CLI
]
"tests/*" = [
"D1", # Skip documentation rules in tests
"S101", # Allow asserts in tests
"S311", # Allow pseudo-random generators in tests
"ANN201", # Missing return type annotation
"INP001", # Implicit namespace package
"PLR2004", # Magic value comparisons are fine in tests
]
[tool.pytest.ini_options]
timeout = 10 # Default timeout for all tests (can be overridden per-test)
[tool.mypy]
strict = true
ignore_missing_imports = true
enable_error_code = ["deprecated"]
# Optional: reduce strictness if needed
disallow_any_generics = false
warn_return_any = false
[tool.uv.sources]
deepagents = { path = "../deepagents" }

View File

@@ -0,0 +1,274 @@
"""Integration test for CLI with auto-approve mode.
This module implements benchmarking for simple tasks using the DeepAgents CLI; e.g.,
"write a poem to a file", "create multiple files", etc.
The agent runs on auto-approve mode, meaning it can perform actions without
user confirmation.
Note on testing approach:
- We use StringIO to capture console output, which is the recommended
approach according to Rich's documentation for unit/integration tests.
- The capture() context manager is an alternative, but StringIO provides
better control and is simpler for testing purposes.
- We patch console instances in both main and config modules to ensure
all output is captured in the test.
"""
import os
import uuid
from collections.abc import AsyncIterator
from contextlib import asynccontextmanager
from io import StringIO
from pathlib import Path
from unittest.mock import AsyncMock, patch
import pytest
from langgraph.checkpoint.memory import MemorySaver
from rich.console import Console
from deepagents_cli import config as config_module
from deepagents_cli import main as main_module
from deepagents_cli.agent import create_cli_agent
from deepagents_cli.config import SessionState, create_model
from deepagents_cli.main import simple_cli
@asynccontextmanager
async def run_cli_task(task: str, tmp_path: Path) -> AsyncIterator[tuple[Path, str]]:
"""Context manager to run a CLI task with auto-approve and capture output.
Args:
task: The task string to give to the agent
tmp_path: Temporary directory for the test
Yields:
tuple: (working_directory: Path, console_output: str)
"""
original_dir = Path.cwd()
os.chdir(tmp_path)
# Capture console output
# Using StringIO is the recommended approach for testing (per Rich docs)
output = StringIO()
captured_console = Console(
file=output,
force_terminal=False, # Disable ANSI codes for simpler assertions
width=120, # Fixed width for predictable output
color_system=None, # Explicitly disable colors for testing
legacy_windows=False, # Modern behavior
)
try:
# Mock the prompt session to provide input and exit
# Use patch.object() to fail immediately if attributes don't exist
with patch.object(main_module, "create_prompt_session") as mock_prompt:
mock_session = AsyncMock()
mock_session.prompt_async.side_effect = [
task, # User input
EOFError(), # Exit after task
]
mock_prompt.return_value = mock_session
# Mock console to capture output
# Use patch.object() to fail immediately if attributes don't exist
with (
patch.object(main_module, "console", captured_console),
patch.object(config_module, "console", captured_console),
):
# Import after patching
from deepagents_cli.agent import create_cli_agent
from deepagents_cli.config import create_model
# Create real agent with real model (will use env var or fail gracefully)
model = create_model()
agent, backend = create_cli_agent(
model=model,
assistant_id="test_agent",
tools=[],
sandbox=None,
sandbox_type=None,
)
# Create session state with auto-approve
session_state = SessionState(auto_approve=True)
# Run the CLI
await simple_cli(
agent=agent,
assistant_id="test_agent",
session_state=session_state,
baseline_tokens=0,
backend=backend,
sandbox_type=None,
setup_script_path=None,
)
# Verify that our mocks were actually used (ensures patching worked)
mock_prompt.assert_called_once()
assert mock_session.prompt_async.call_count >= 1, (
"prompt_async should have been called at least once"
)
# Yield the directory and captured output
yield tmp_path, output.getvalue()
finally:
os.chdir(original_dir)
@asynccontextmanager
async def run_agent_task_with_hitl(task: str, tmp_path: Path) -> AsyncIterator:
"""Context manager to run an agent task with HIL and stream events.
Args:
task: The task string to give to the agent
tmp_path: Temporary directory for the test
Yields:
AsyncGenerator: Stream of events from the agent
"""
original_dir = Path.cwd()
os.chdir(tmp_path)
try:
# Create agent with HIL enabled (no auto-approve)
model = create_model()
checkpointer = MemorySaver()
agent, _backend = create_cli_agent(
model=model,
assistant_id="test_agent",
tools=[],
sandbox=None,
sandbox_type=None,
)
agent.checkpointer = checkpointer
# Create config with thread_id for checkpointing
config = {"configurable": {"thread_id": str(uuid.uuid4())}}
# Yield the stream generator for the test to consume
yield agent.astream(
{"messages": [{"role": "user", "content": task}]},
config=config,
stream_mode="values",
)
finally:
os.chdir(original_dir)
class TestSimpleTasks:
"""A collection of simple task benchmarks for the deepagents-cli."""
@pytest.mark.asyncio
@pytest.mark.timeout(120) # Agent can take 60-120 seconds
async def test_write_hello_to_a_file(self, tmp_path: Path) -> None:
"""Test agents to write 'hello' to a file."""
async with run_cli_task("write hello to file foo.md", tmp_path) as (
work_dir,
console_output,
):
# Verify the file was created
output_file = work_dir / "foo.md"
assert output_file.exists(), f"foo.md should have been created in {work_dir}"
content = output_file.read_text()
assert "hello" in content.lower(), f"File should contain 'hello', but got: {content!r}"
# Verify console output shows auto-approve mode
# Print output for debugging if assertion fails
assert "Auto-approve" in console_output or "" in console_output, (
f"Expected auto-approve indicator in output.\nConsole output:\n{console_output}"
)
@pytest.mark.asyncio
@pytest.mark.timeout(120)
async def test_cli_auto_approve_multiple_operations(self, tmp_path: Path) -> None:
"""Test agent to create multiple files with auto-approve."""
task = "create files test1.txt and test2.txt with content 'test file'"
async with run_cli_task(task, tmp_path) as (work_dir, console_output):
# Verify both files were created
test1 = work_dir / "test1.txt"
test2 = work_dir / "test2.txt"
# At least one file should be created (agent might interpret task differently)
created_files = [f for f in [test1, test2] if f.exists()]
assert len(created_files) > 0, (
f"Expected at least one test file to be created in {work_dir}.\n"
f"Files in directory: {list(work_dir.iterdir())}"
)
# Verify console output captured the interaction
assert len(console_output) > 0, "Console output should not be empty"
class TestAgentBehavior:
"""A collection of tests for agent behavior (non-CLI level)."""
@pytest.mark.asyncio
@pytest.mark.timeout(120)
async def test_run_command_calls_shell_tool(self, tmp_path: Path) -> None:
"""Test that 'run make format' calls shell tool with 'make format' command.
This test verifies that when a user says "run make format", the agent
correctly interprets this as a shell command and calls the shell tool
with just "make format" (not including the word "run").
The test stops at the interrupt (HITL approval point) before the shell
tool is actually executed, to verify the correct command is being passed.
"""
# Mock the settings to use a fresh filesystem in tmp_path
from deepagents_cli.config import Settings
mock_settings = Settings.from_environment(start_path=tmp_path)
# Patch settings in all modules that import it
patches = [
patch("deepagents_cli.config.settings", mock_settings),
patch("deepagents_cli.agent.settings", mock_settings),
patch("deepagents_cli.file_ops.settings", mock_settings),
patch("deepagents_cli.tools.settings", mock_settings),
patch("deepagents_cli.token_utils.settings", mock_settings),
]
# Apply all patches using ExitStack for cleaner nesting
from contextlib import ExitStack
with ExitStack() as stack:
for p in patches:
stack.enter_context(p)
async with run_agent_task_with_hitl("run make format", tmp_path) as stream:
# Stream events and capture the final result
events = []
result = {}
async for event in stream:
events.append(event)
result = event
# Verify that we captured events
assert len(events) > 0, "Expected to receive events from agent stream"
# Verify that an interrupt occurred (shell tool requires approval)
assert "__interrupt__" in result, "Expected shell tool to trigger HITL interrupt"
assert result["__interrupt__"] is not None
# Extract interrupt information
interrupts = result["__interrupt__"]
assert len(interrupts) > 0, "Expected at least one interrupt"
interrupt_value = interrupts[0].value
action_requests = interrupt_value.get("action_requests", [])
# Verify that a shell tool call is present
shell_calls = [req for req in action_requests if req.get("name") == "shell"]
assert len(shell_calls) > 0, "Expected at least one shell tool call"
# Verify the shell command is "make format" (not "run make format")
shell_call = shell_calls[0]
command = shell_call.get("args", {}).get("command", "")
assert command == "make format", (
f"Expected shell command to be 'make format', got: {command}"
)

View File

@@ -0,0 +1,36 @@
"""Pytest configuration for benchmark tests."""
import os
from collections.abc import Generator
import pytest
from langsmith import Client, get_tracing_context
@pytest.fixture(scope="session", autouse=True)
def langsmith_client() -> Generator[Client | None, None, None]:
"""Create a LangSmith client if LANGSMITH_API_KEY is set.
This fixture is session-scoped and automatically used by all tests.
It creates a single client instance and ensures it's flushed after each test.
"""
langsmith_api_key = os.environ.get("LANGSMITH_API_KEY") or os.environ.get("LANGCHAIN_API_KEY")
if langsmith_api_key:
client = get_tracing_context()["client"] or Client()
yield client
# Final flush at end of session
client.flush()
else:
yield None
@pytest.fixture(autouse=True)
def flush_langsmith_after_test(langsmith_client: Client) -> Generator[None, None, None]:
"""Automatically flush LangSmith client after each test."""
yield
# This runs after each test
if langsmith_client is not None:
langsmith_client.flush()

View File

@@ -0,0 +1,322 @@
"""Test sandbox integrations with upload/download functionality.
This module tests sandbox backends (RunLoop, Daytona, Modal) with support for
optional sandbox reuse to reduce test execution time.
Set REUSE_SANDBOX=1 environment variable to reuse sandboxes across tests within
a class. Otherwise, a fresh sandbox is created for each test method.
"""
from abc import ABC, abstractmethod
from collections.abc import Iterator
import pytest
from deepagents.backends.protocol import SandboxBackendProtocol
from deepagents.backends.sandbox import BaseSandbox
from deepagents_cli.integrations.sandbox_factory import create_sandbox
class BaseSandboxIntegrationTest(ABC):
"""Base class for sandbox integration tests.
Subclasses must implement the `sandbox` fixture to provide a sandbox instance.
All test methods are defined here and will be inherited by concrete test classes.
"""
@pytest.fixture(scope="class")
@abstractmethod
def sandbox(self) -> Iterator[SandboxBackendProtocol]:
"""Provide a sandbox instance for testing."""
...
def test_sandbox_creation(self, sandbox: SandboxBackendProtocol) -> None:
"""Test basic sandbox creation and command execution."""
assert sandbox.id is not None
result = sandbox.execute("echo 'hello'")
assert result.output.strip() == "hello"
def test_upload_single_file(self, sandbox: SandboxBackendProtocol) -> None:
"""Test uploading a single file."""
test_path = "/tmp/test_upload_single.txt"
test_content = b"Hello, Sandbox!"
upload_responses = sandbox.upload_files([(test_path, test_content)])
assert len(upload_responses) == 1
assert upload_responses[0].path == test_path
assert upload_responses[0].error is None
# Verify file exists via command execution
result = sandbox.execute(f"cat {test_path}")
assert result.output.strip() == test_content.decode()
def test_download_single_file(self, sandbox: SandboxBackendProtocol) -> None:
"""Test downloading a single file."""
test_path = "/tmp/test_download_single.txt"
test_content = b"Download test content"
# Create file first
sandbox.upload_files([(test_path, test_content)])
# Download and verify
download_responses = sandbox.download_files([test_path])
assert len(download_responses) == 1
assert download_responses[0].path == test_path
assert download_responses[0].content == test_content
assert download_responses[0].error is None
def test_upload_download_roundtrip(self, sandbox: SandboxBackendProtocol) -> None:
"""Test upload followed by download for data integrity."""
test_path = "/tmp/test_roundtrip.txt"
test_content = b"Roundtrip test: special chars \n\t\r\x00"
# Upload
upload_responses = sandbox.upload_files([(test_path, test_content)])
assert upload_responses[0].error is None
# Download
download_responses = sandbox.download_files([test_path])
assert download_responses[0].error is None
assert download_responses[0].content == test_content
def test_upload_multiple_files(self, sandbox: SandboxBackendProtocol) -> None:
"""Test uploading multiple files in a single batch."""
files = [
("/tmp/test_multi_1.txt", b"Content 1"),
("/tmp/test_multi_2.txt", b"Content 2"),
("/tmp/test_multi_3.txt", b"Content 3"),
]
upload_responses = sandbox.upload_files(files)
assert len(upload_responses) == 3
for i, resp in enumerate(upload_responses):
assert resp.path == files[i][0]
assert resp.error is None
def test_download_multiple_files(self, sandbox: SandboxBackendProtocol) -> None:
"""Test downloading multiple files in a single batch."""
files = [
("/tmp/test_batch_1.txt", b"Batch 1"),
("/tmp/test_batch_2.txt", b"Batch 2"),
("/tmp/test_batch_3.txt", b"Batch 3"),
]
# Upload files first
sandbox.upload_files(files)
# Download all at once
paths = [f[0] for f in files]
download_responses = sandbox.download_files(paths)
assert len(download_responses) == 3
for i, resp in enumerate(download_responses):
assert resp.path == files[i][0]
assert resp.content == files[i][1]
assert resp.error is None
@pytest.mark.skip(reason="Error handling not implemented yet.")
def test_download_nonexistent_file(self, sandbox: SandboxBackendProtocol) -> None:
"""Test that downloading a non-existent file returns an error."""
nonexistent_path = "/tmp/does_not_exist.txt"
download_responses = sandbox.download_files([nonexistent_path])
assert len(download_responses) == 1
assert download_responses[0].path == nonexistent_path
assert download_responses[0].content is None
assert download_responses[0].error is not None
def test_upload_binary_content(self, sandbox: SandboxBackendProtocol) -> None:
"""Test uploading binary content (not valid UTF-8)."""
test_path = "/tmp/binary_file.bin"
# Create binary content with all byte values
test_content = bytes(range(256))
upload_responses = sandbox.upload_files([(test_path, test_content)])
assert len(upload_responses) == 1
assert upload_responses[0].error is None
# Verify by downloading
download_responses = sandbox.download_files([test_path])
assert download_responses[0].content == test_content
def test_partial_success_upload(self, sandbox: SandboxBackendProtocol) -> None:
"""Test that batch upload supports partial success."""
files = [
("/tmp/valid_upload.txt", b"Valid content"),
("/tmp/another_valid.txt", b"Another valid"),
]
upload_responses = sandbox.upload_files(files)
# Should get a response for each file
assert len(upload_responses) == len(files)
# At least verify we got responses with proper paths
for i, resp in enumerate(upload_responses):
assert resp.path == files[i][0]
@pytest.mark.skip(reason="Error handling not implemented yet.")
def test_partial_success_download(self, sandbox: SandboxBackendProtocol) -> None:
"""Test that batch download supports partial success."""
# Create one valid file
valid_path = "/tmp/valid_file.txt"
valid_content = b"Valid"
sandbox.upload_files([(valid_path, valid_content)])
# Request both valid and invalid files
paths = [valid_path, "/tmp/does_not_exist.txt"]
download_responses = sandbox.download_files(paths)
assert len(download_responses) == 2
# First should succeed
assert download_responses[0].path == valid_path
assert download_responses[0].content == valid_content
assert download_responses[0].error is None
# Second should fail
assert download_responses[1].path == "/tmp/does_not_exist.txt"
assert download_responses[1].content is None
assert download_responses[1].error is not None
@pytest.mark.skip(
reason="Error handling not yet implemented in sandbox providers - requires implementation"
)
def test_download_error_file_not_found(self, sandbox: SandboxBackendProtocol) -> None:
"""Test downloading a non-existent file returns file_not_found error.
Expected behavior: download_files should return FileDownloadResponse with
error='file_not_found' when the requested file doesn't exist.
"""
responses = sandbox.download_files(["/tmp/nonexistent_test_file.txt"])
assert len(responses) == 1
assert responses[0].path == "/tmp/nonexistent_test_file.txt"
assert responses[0].content is None
assert responses[0].error == "file_not_found"
@pytest.mark.skip(
reason="Error handling not yet implemented in sandbox providers - requires implementation"
)
def test_download_error_is_directory(self, sandbox: SandboxBackendProtocol) -> None:
"""Test downloading a directory returns is_directory error.
Expected behavior: download_files should return FileDownloadResponse with
error='is_directory' when trying to download a directory as a file.
"""
# Create a directory
sandbox.execute("mkdir -p /tmp/test_directory")
responses = sandbox.download_files(["/tmp/test_directory"])
assert len(responses) == 1
assert responses[0].path == "/tmp/test_directory"
assert responses[0].content is None
assert responses[0].error == "is_directory"
@pytest.mark.skip(
reason="Error handling not yet implemented in sandbox providers - requires implementation"
)
def test_upload_error_parent_not_found(self, sandbox: SandboxBackendProtocol) -> None:
"""Test uploading to a path with non-existent parent returns parent_not_found error.
Expected behavior: upload_files should return FileUploadResponse with
error='parent_not_found' when the parent directory doesn't exist and
can't be created automatically.
Note: This test may need adjustment based on whether sandbox providers
auto-create parent directories or not.
"""
# Try to upload to a path where the parent is a file, not a directory
# First create a file
sandbox.upload_files([("/tmp/parent_is_file.txt", b"I am a file")])
# Now try to upload as if parent_is_file.txt were a directory
responses = sandbox.upload_files([("/tmp/parent_is_file.txt/child.txt", b"child")])
assert len(responses) == 1
assert responses[0].path == "/tmp/parent_is_file.txt/child.txt"
# Could be parent_not_found or invalid_path depending on implementation
assert responses[0].error in ("parent_not_found", "invalid_path")
@pytest.mark.skip(
reason="Error handling not yet implemented in sandbox providers - requires implementation"
)
def test_upload_error_invalid_path(self, sandbox: SandboxBackendProtocol) -> None:
"""Test uploading with invalid path returns invalid_path error.
Expected behavior: upload_files should return FileUploadResponse with
error='invalid_path' for malformed paths (null bytes, invalid chars, etc).
"""
# Test with null byte (invalid in most filesystems)
responses = sandbox.upload_files([("/tmp/file\x00name.txt", b"content")])
assert len(responses) == 1
assert responses[0].path == "/tmp/file\x00name.txt"
assert responses[0].error == "invalid_path"
@pytest.mark.skip(
reason="Error handling not yet implemented in sandbox providers - requires implementation"
)
def test_download_error_invalid_path(self, sandbox: SandboxBackendProtocol) -> None:
"""Test downloading with invalid path returns invalid_path error.
Expected behavior: download_files should return FileDownloadResponse with
error='invalid_path' for malformed paths (null bytes, invalid chars, etc).
"""
# Test with null byte (invalid in most filesystems)
responses = sandbox.download_files(["/tmp/file\x00name.txt"])
assert len(responses) == 1
assert responses[0].path == "/tmp/file\x00name.txt"
assert responses[0].content is None
assert responses[0].error == "invalid_path"
@pytest.mark.skip(
reason="Error handling not yet implemented in sandbox providers - requires implementation"
)
def test_upload_to_existing_directory_path(self, sandbox: SandboxBackendProtocol) -> None:
"""Test uploading to a path that is an existing directory.
Expected behavior: This should either succeed by overwriting or return
an appropriate error. The exact behavior depends on the sandbox provider.
"""
# Create a directory
sandbox.execute("mkdir -p /tmp/test_dir_upload")
# Try to upload a file with the same name as the directory
responses = sandbox.upload_files([("/tmp/test_dir_upload", b"file content")])
assert len(responses) == 1
assert responses[0].path == "/tmp/test_dir_upload"
# Behavior depends on implementation - just verify we get a response
class TestRunLoopIntegration(BaseSandboxIntegrationTest):
"""Test RunLoop backend integration."""
@pytest.fixture(scope="class")
def sandbox(self) -> Iterator[BaseSandbox]:
"""Provide a RunLoop sandbox instance."""
with create_sandbox("runloop") as sandbox:
yield sandbox
class TestDaytonaIntegration(BaseSandboxIntegrationTest):
"""Test Daytona backend integration."""
@pytest.fixture(scope="class")
def sandbox(self) -> Iterator[BaseSandbox]:
"""Provide a Daytona sandbox instance."""
with create_sandbox("daytona") as sandbox:
yield sandbox
class TestModalIntegration(BaseSandboxIntegrationTest):
"""Test Modal backend integration."""
@pytest.fixture(scope="class")
def sandbox(self) -> Iterator[BaseSandbox]:
"""Provide a Modal sandbox instance."""
with create_sandbox("modal") as sandbox:
yield sandbox

View File

@@ -0,0 +1,139 @@
"""Tests for project-specific memory and dual agent.md loading."""
import os
from pathlib import Path
import pytest
from deepagents_cli.agent_memory import AgentMemoryMiddleware
from deepagents_cli.config import Settings
from deepagents_cli.skills import SkillsMiddleware
class TestAgentMemoryMiddleware:
"""Test dual memory loading in AgentMemoryMiddleware."""
def test_load_user_memory_only(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
"""Test loading user agent.md when no project memory exists."""
# Mock Path.home() to return tmp_path
monkeypatch.setattr("pathlib.Path.home", lambda: tmp_path)
# Create user agent directory
agent_dir = tmp_path / ".deepagents" / "test_agent"
agent_dir.mkdir(parents=True)
user_md = agent_dir / "agent.md"
user_md.write_text("User instructions")
# Create a directory without .git to avoid project detection
non_project_dir = tmp_path / "not-a-project"
non_project_dir.mkdir()
# Change to non-project directory for test
original_cwd = Path.cwd()
try:
os.chdir(non_project_dir)
# Create settings (no project detected from non_project_dir)
test_settings = Settings.from_environment(start_path=non_project_dir)
# Create middleware
middleware = AgentMemoryMiddleware(settings=test_settings, assistant_id="test_agent")
# Simulate before_agent call with no project root
state = {}
result = middleware.before_agent(state, None)
assert result["user_memory"] == "User instructions"
assert "project_memory" not in result
finally:
os.chdir(original_cwd)
def test_load_both_memories(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
"""Test loading both user and project agent.md."""
# Mock Path.home() to return tmp_path
monkeypatch.setattr("pathlib.Path.home", lambda: tmp_path)
# Create user agent directory
agent_dir = tmp_path / ".deepagents" / "test_agent"
agent_dir.mkdir(parents=True)
user_md = agent_dir / "agent.md"
user_md.write_text("User instructions")
# Create project with .git and agent.md in .deepagents/
project_root = tmp_path / "project"
project_root.mkdir()
(project_root / ".git").mkdir()
(project_root / ".deepagents").mkdir()
project_md = project_root / ".deepagents" / "agent.md"
project_md.write_text("Project instructions")
original_cwd = Path.cwd()
try:
os.chdir(project_root)
# Create settings (project detected from project_root)
test_settings = Settings.from_environment(start_path=project_root)
# Create middleware
middleware = AgentMemoryMiddleware(settings=test_settings, assistant_id="test_agent")
# Simulate before_agent call
state = {}
result = middleware.before_agent(state, None)
assert result["user_memory"] == "User instructions"
assert result["project_memory"] == "Project instructions"
finally:
os.chdir(original_cwd)
def test_memory_not_reloaded_if_already_in_state(self, tmp_path: Path) -> None:
"""Test that memory is not reloaded if already in state."""
agent_dir = tmp_path / ".deepagents" / "test_agent"
agent_dir.mkdir(parents=True)
# Create settings
test_settings = Settings.from_environment(start_path=tmp_path)
middleware = AgentMemoryMiddleware(settings=test_settings, assistant_id="test_agent")
# State already has memory
state = {"user_memory": "Existing memory", "project_memory": "Existing project"}
result = middleware.before_agent(state, None)
# Should return empty dict (no updates)
assert result == {}
class TestSkillsPathResolution:
"""Test skills path resolution with per-agent structure."""
def test_skills_middleware_paths(self, tmp_path: Path) -> None:
"""Test that skills middleware uses correct per-agent paths."""
agent_dir = tmp_path / ".deepagents" / "test_agent"
skills_dir = agent_dir / "skills"
skills_dir.mkdir(parents=True)
middleware = SkillsMiddleware(skills_dir=skills_dir, assistant_id="test_agent")
# Check paths are correctly set
assert middleware.skills_dir == skills_dir
assert middleware.user_skills_display == "~/.deepagents/test_agent/skills"
def test_skills_dir_per_agent(self, tmp_path: Path) -> None:
"""Test that different agents have separate skills directories."""
from deepagents_cli.skills import SkillsMiddleware
# Agent 1
agent1_skills = tmp_path / ".deepagents" / "agent1" / "skills"
agent1_skills.mkdir(parents=True)
middleware1 = SkillsMiddleware(skills_dir=agent1_skills, assistant_id="agent1")
# Agent 2
agent2_skills = tmp_path / ".deepagents" / "agent2" / "skills"
agent2_skills.mkdir(parents=True)
middleware2 = SkillsMiddleware(skills_dir=agent2_skills, assistant_id="agent2")
# Should have different paths
assert middleware1.skills_dir != middleware2.skills_dir
assert "agent1" in middleware1.user_skills_display
assert "agent2" in middleware2.user_skills_display

View File

@@ -0,0 +1 @@
"""Skills unit tests."""

View File

@@ -0,0 +1,217 @@
"""Unit tests for skills command sanitization and validation."""
from pathlib import Path
import pytest
from deepagents_cli.skills.commands import _validate_name, _validate_skill_path
class TestValidateSkillName:
"""Test skill name validation per Agent Skills spec (https://agentskills.io/specification)."""
def test_valid_skill_names(self):
"""Test that spec-compliant skill names are accepted.
Per spec: lowercase alphanumeric, hyphens only, no start/end hyphen,
no consecutive hyphens, max 64 chars.
"""
valid_names = [
"web-research",
"langgraph-docs",
"skill123",
"skill-with-many-parts",
"a",
"a1",
"code-review",
"data-analysis",
]
for name in valid_names:
is_valid, error = _validate_name(name)
assert is_valid, f"Valid name '{name}' was rejected: {error}"
assert error == ""
def test_invalid_names_per_spec(self):
"""Test that non-spec-compliant names are rejected."""
invalid_names = [
("MySkill", "uppercase not allowed"),
("my_skill", "underscores not allowed"),
("skill_with_underscores", "underscores not allowed"),
("-skill", "cannot start with hyphen"),
("skill-", "cannot end with hyphen"),
("skill--name", "consecutive hyphens not allowed"),
]
for name, reason in invalid_names:
is_valid, error = _validate_name(name)
assert not is_valid, f"Invalid name '{name}' ({reason}) was accepted"
assert error != ""
def test_path_traversal_attacks(self):
"""Test that path traversal attempts are blocked."""
malicious_names = [
"../../../etc/passwd",
"../../.ssh/authorized_keys",
"../.bashrc",
"..\\..\\windows\\system32",
"skill/../../../etc",
"../../tmp/exploit",
"../..",
"..",
]
for name in malicious_names:
is_valid, error = _validate_name(name)
assert not is_valid, f"Malicious name '{name}' was accepted"
assert error != ""
assert "path" in error.lower() or ".." in error
def test_absolute_paths(self):
"""Test that absolute paths are blocked."""
malicious_names = [
"/etc/passwd",
"/home/user/.ssh",
"\\Windows\\System32",
"/tmp/exploit",
]
for name in malicious_names:
is_valid, error = _validate_name(name)
assert not is_valid, f"Absolute path '{name}' was accepted"
assert error != ""
def test_path_separators(self):
"""Test that path separators are blocked."""
malicious_names = [
"skill/name",
"skill\\name",
"path/to/skill",
"parent\\child",
]
for name in malicious_names:
is_valid, error = _validate_name(name)
assert not is_valid, f"Path with separator '{name}' was accepted"
assert error != ""
def test_invalid_characters(self):
"""Test that invalid characters are blocked."""
malicious_names = [
"skill name", # space
"skill;rm -rf /", # command injection
"skill`whoami`", # command substitution
"skill$(whoami)", # command substitution
"skill&ls", # command chaining
"skill|cat", # pipe
"skill>file", # redirect
"skill<file", # redirect
"skill*", # wildcard
"skill?", # wildcard
"skill[a]", # pattern
"skill{a,b}", # brace expansion
"skill$VAR", # variable expansion
"skill@host", # at sign
"skill#comment", # hash
"skill!event", # exclamation
"skill'quote", # single quote
'skill"quote', # double quote
]
for name in malicious_names:
is_valid, error = _validate_name(name)
assert not is_valid, f"Invalid character in '{name}' was accepted"
assert error != ""
def test_empty_names(self):
"""Test that empty or whitespace names are blocked."""
malicious_names = [
"",
" ",
"\t",
"\n",
]
for name in malicious_names:
is_valid, error = _validate_name(name)
assert not is_valid, f"Empty/whitespace name '{name}' was accepted"
assert error != ""
class TestValidateSkillPath:
"""Test skill path validation to ensure paths stay within bounds."""
def test_valid_path_within_base(self, tmp_path: Path) -> None:
"""Test that valid paths within base directory are accepted."""
base_dir = tmp_path / "skills"
base_dir.mkdir()
skill_dir = base_dir / "my-skill"
is_valid, error = _validate_skill_path(skill_dir, base_dir)
assert is_valid, f"Valid path was rejected: {error}"
assert error == ""
def test_path_traversal_outside_base(self, tmp_path: Path) -> None:
"""Test that paths outside base directory are blocked."""
base_dir = tmp_path / "skills"
base_dir.mkdir()
# Try to escape to parent directory
malicious_dir = tmp_path / "malicious"
is_valid, error = _validate_skill_path(malicious_dir, base_dir)
assert not is_valid, "Path outside base directory was accepted"
assert error != ""
def test_symlink_path_traversal(self, tmp_path: Path) -> None:
"""Test that symlinks pointing outside base are detected."""
base_dir = tmp_path / "skills"
base_dir.mkdir()
outside_dir = tmp_path / "outside"
outside_dir.mkdir()
symlink_path = base_dir / "evil-link"
try:
symlink_path.symlink_to(outside_dir)
is_valid, error = _validate_skill_path(symlink_path, base_dir)
# The symlink resolves to outside the base, so it should be blocked
assert not is_valid, "Symlink to outside directory was accepted"
assert error != ""
except OSError:
# Symlink creation might fail on some systems
pytest.skip("Symlink creation not supported")
def test_nonexistent_path_validation(self, tmp_path: Path) -> None:
"""Test validation of paths that don't exist yet."""
base_dir = tmp_path / "skills"
base_dir.mkdir()
# Path doesn't exist yet, but should be valid
skill_dir = base_dir / "new-skill"
is_valid, error = _validate_skill_path(skill_dir, base_dir)
assert is_valid, f"Valid non-existent path was rejected: {error}"
assert error == ""
class TestIntegrationSecurity:
"""Integration tests for security across the command flow."""
def test_combined_validation(self, tmp_path: Path) -> None:
"""Test that both name and path validation work together."""
base_dir = tmp_path / "skills"
base_dir.mkdir()
# Test various attack scenarios
attack_vectors = [
("../../../etc/passwd", "path traversal"),
("/etc/passwd", "absolute path"),
("skill/../../../tmp", "hidden traversal"),
("skill;rm -rf", "command injection"),
]
for skill_name, attack_type in attack_vectors:
# First, name validation should catch it
is_valid_name, name_error = _validate_name(skill_name)
if is_valid_name:
# If name validation doesn't catch it, path validation must
skill_dir = base_dir / skill_name
is_valid_path, _path_error = _validate_skill_path(skill_dir, base_dir)
assert not is_valid_path, f"{attack_type} bypassed both validations: {skill_name}"
else:
# Name validation caught it - this is good
assert name_error != "", f"No error message for {attack_type}"

View File

@@ -0,0 +1,292 @@
"""Unit tests for skills loading functionality."""
from pathlib import Path
from deepagents_cli.skills.load import list_skills
class TestListSkillsSingleDirectory:
"""Test list_skills function for loading skills from a single directory."""
def test_list_skills_empty_directory(self, tmp_path: Path) -> None:
"""Test listing skills from an empty directory."""
skills_dir = tmp_path / "skills"
skills_dir.mkdir()
skills = list_skills(user_skills_dir=skills_dir, project_skills_dir=None)
assert skills == []
def test_list_skills_with_valid_skill(self, tmp_path: Path) -> None:
"""Test listing a valid skill with proper YAML frontmatter."""
skills_dir = tmp_path / "skills"
skills_dir.mkdir()
skill_dir = skills_dir / "test-skill"
skill_dir.mkdir()
skill_md = skill_dir / "SKILL.md"
skill_md.write_text("""---
name: test-skill
description: A test skill
---
# Test Skill
This is a test skill.
""")
skills = list_skills(user_skills_dir=skills_dir, project_skills_dir=None)
assert len(skills) == 1
assert skills[0]["name"] == "test-skill"
assert skills[0]["description"] == "A test skill"
assert skills[0]["source"] == "user"
assert Path(skills[0]["path"]) == skill_md
def test_list_skills_source_parameter(self, tmp_path: Path) -> None:
"""Test that source parameter is correctly set for project skills."""
skills_dir = tmp_path / "skills"
skills_dir.mkdir()
skill_dir = skills_dir / "project-skill"
skill_dir.mkdir()
skill_md = skill_dir / "SKILL.md"
skill_md.write_text("""---
name: project-skill
description: A project skill
---
# Project Skill
""")
# Test with project source
skills = list_skills(user_skills_dir=None, project_skills_dir=skills_dir)
assert len(skills) == 1
assert skills[0]["source"] == "project"
def test_list_skills_missing_frontmatter(self, tmp_path: Path) -> None:
"""Test that skills without YAML frontmatter are skipped."""
skills_dir = tmp_path / "skills"
skills_dir.mkdir()
skill_dir = skills_dir / "invalid-skill"
skill_dir.mkdir()
skill_md = skill_dir / "SKILL.md"
skill_md.write_text("# Invalid Skill\n\nNo frontmatter here.")
skills = list_skills(user_skills_dir=skills_dir, project_skills_dir=None)
assert skills == []
def test_list_skills_missing_required_fields(self, tmp_path: Path) -> None:
"""Test that skills with incomplete frontmatter are skipped."""
skills_dir = tmp_path / "skills"
skills_dir.mkdir()
# Missing description
skill_dir_1 = skills_dir / "incomplete-1"
skill_dir_1.mkdir()
(skill_dir_1 / "SKILL.md").write_text("""---
name: incomplete-1
---
Content
""")
# Missing name
skill_dir_2 = skills_dir / "incomplete-2"
skill_dir_2.mkdir()
(skill_dir_2 / "SKILL.md").write_text("""---
description: Missing name
---
Content
""")
skills = list_skills(user_skills_dir=skills_dir, project_skills_dir=None)
assert skills == []
def test_list_skills_nonexistent_directory(self, tmp_path: Path) -> None:
"""Test listing skills from a non-existent directory."""
skills_dir = tmp_path / "nonexistent"
skills = list_skills(user_skills_dir=skills_dir, project_skills_dir=None)
assert skills == []
class TestListSkillsMultipleDirectories:
"""Test list_skills function for loading from multiple directories."""
def test_list_skills_user_only(self, tmp_path: Path) -> None:
"""Test loading skills from user directory only."""
user_dir = tmp_path / "user_skills"
user_dir.mkdir()
skill_dir = user_dir / "user-skill"
skill_dir.mkdir()
(skill_dir / "SKILL.md").write_text("""---
name: user-skill
description: A user skill
---
Content
""")
skills = list_skills(user_skills_dir=user_dir, project_skills_dir=None)
assert len(skills) == 1
assert skills[0]["name"] == "user-skill"
assert skills[0]["source"] == "user"
def test_list_skills_project_only(self, tmp_path: Path) -> None:
"""Test loading skills from project directory only."""
project_dir = tmp_path / "project_skills"
project_dir.mkdir()
skill_dir = project_dir / "project-skill"
skill_dir.mkdir()
(skill_dir / "SKILL.md").write_text("""---
name: project-skill
description: A project skill
---
Content
""")
skills = list_skills(user_skills_dir=None, project_skills_dir=project_dir)
assert len(skills) == 1
assert skills[0]["name"] == "project-skill"
assert skills[0]["source"] == "project"
def test_list_skills_both_sources(self, tmp_path: Path) -> None:
"""Test loading skills from both user and project directories."""
user_dir = tmp_path / "user_skills"
user_dir.mkdir()
project_dir = tmp_path / "project_skills"
project_dir.mkdir()
# User skill
user_skill_dir = user_dir / "user-skill"
user_skill_dir.mkdir()
(user_skill_dir / "SKILL.md").write_text("""---
name: user-skill
description: A user skill
---
Content
""")
# Project skill
project_skill_dir = project_dir / "project-skill"
project_skill_dir.mkdir()
(project_skill_dir / "SKILL.md").write_text("""---
name: project-skill
description: A project skill
---
Content
""")
skills = list_skills(user_skills_dir=user_dir, project_skills_dir=project_dir)
assert len(skills) == 2
skill_names = {s["name"] for s in skills}
assert "user-skill" in skill_names
assert "project-skill" in skill_names
# Verify sources
user_skill = next(s for s in skills if s["name"] == "user-skill")
project_skill = next(s for s in skills if s["name"] == "project-skill")
assert user_skill["source"] == "user"
assert project_skill["source"] == "project"
def test_list_skills_project_overrides_user(self, tmp_path: Path) -> None:
"""Test that project skills override user skills with the same name."""
user_dir = tmp_path / "user_skills"
user_dir.mkdir()
project_dir = tmp_path / "project_skills"
project_dir.mkdir()
# User skill
user_skill_dir = user_dir / "shared-skill"
user_skill_dir.mkdir()
(user_skill_dir / "SKILL.md").write_text("""---
name: shared-skill
description: User version
---
Content
""")
# Project skill with same name
project_skill_dir = project_dir / "shared-skill"
project_skill_dir.mkdir()
(project_skill_dir / "SKILL.md").write_text("""---
name: shared-skill
description: Project version
---
Content
""")
skills = list_skills(user_skills_dir=user_dir, project_skills_dir=project_dir)
assert len(skills) == 1 # Only one skill with this name
skill = skills[0]
assert skill["name"] == "shared-skill"
assert skill["description"] == "Project version"
assert skill["source"] == "project"
def test_list_skills_empty_directories(self, tmp_path: Path) -> None:
"""Test loading from empty directories."""
user_dir = tmp_path / "user_skills"
user_dir.mkdir()
project_dir = tmp_path / "project_skills"
project_dir.mkdir()
skills = list_skills(user_skills_dir=user_dir, project_skills_dir=project_dir)
assert skills == []
def test_list_skills_no_directories(self):
"""Test loading with no directories specified."""
skills = list_skills(user_skills_dir=None, project_skills_dir=None)
assert skills == []
def test_list_skills_multiple_user_skills(self, tmp_path: Path) -> None:
"""Test loading multiple skills from user directory."""
user_dir = tmp_path / "user_skills"
user_dir.mkdir()
# Create multiple skills
for i in range(3):
skill_dir = user_dir / f"skill-{i}"
skill_dir.mkdir()
(skill_dir / "SKILL.md").write_text(f"""---
name: skill-{i}
description: Skill number {i}
---
Content
""")
skills = list_skills(user_skills_dir=user_dir, project_skills_dir=None)
assert len(skills) == 3
skill_names = {s["name"] for s in skills}
assert skill_names == {"skill-0", "skill-1", "skill-2"}
def test_list_skills_mixed_valid_invalid(self, tmp_path: Path) -> None:
"""Test loading with a mix of valid and invalid skills."""
user_dir = tmp_path / "user_skills"
user_dir.mkdir()
# Valid skill
valid_skill_dir = user_dir / "valid-skill"
valid_skill_dir.mkdir()
(valid_skill_dir / "SKILL.md").write_text("""---
name: valid-skill
description: A valid skill
---
Content
""")
# Invalid skill (missing description)
invalid_skill_dir = user_dir / "invalid-skill"
invalid_skill_dir.mkdir()
(invalid_skill_dir / "SKILL.md").write_text("""---
name: invalid-skill
---
Content
""")
skills = list_skills(user_skills_dir=user_dir, project_skills_dir=None)
assert len(skills) == 1
assert skills[0]["name"] == "valid-skill"

View File

@@ -0,0 +1,267 @@
"""Unit tests for agent formatting functions."""
from pathlib import Path
from unittest.mock import Mock
from deepagents_cli.agent import (
_format_edit_file_description,
_format_execute_description,
_format_fetch_url_description,
_format_shell_description,
_format_task_description,
_format_web_search_description,
_format_write_file_description,
)
def test_format_write_file_description_create_new_file(tmp_path: Path) -> None:
"""Test write_file description for creating a new file."""
new_file = tmp_path / "new_file.py"
tool_call = {
"name": "write_file",
"args": {
"file_path": str(new_file),
"content": "def hello():\n return 'world'\n",
},
"id": "call-1",
}
state = Mock()
runtime = Mock()
description = _format_write_file_description(tool_call, state, runtime)
assert f"File: {new_file}" in description
assert "Action: Create file" in description
assert "Lines: 2" in description
def test_format_write_file_description_overwrite_existing_file(tmp_path: Path) -> None:
"""Test write_file description for overwriting an existing file."""
existing_file = tmp_path / "existing.py"
existing_file.write_text("old content")
tool_call = {
"name": "write_file",
"args": {
"file_path": str(existing_file),
"content": "line1\nline2\nline3\n",
},
"id": "call-2",
}
state = Mock()
runtime = Mock()
description = _format_write_file_description(tool_call, state, runtime)
assert f"File: {existing_file}" in description
assert "Action: Overwrite file" in description
assert "Lines: 3" in description
def test_format_edit_file_description_single_occurrence():
"""Test edit_file description for single occurrence replacement."""
tool_call = {
"name": "edit_file",
"args": {
"file_path": "/path/to/file.py",
"old_string": "foo",
"new_string": "bar",
"replace_all": False,
},
"id": "call-3",
}
state = Mock()
runtime = Mock()
description = _format_edit_file_description(tool_call, state, runtime)
assert "File: /path/to/file.py" in description
assert "Action: Replace text (single occurrence)" in description
def test_format_edit_file_description_all_occurrences():
"""Test edit_file description for replacing all occurrences."""
tool_call = {
"name": "edit_file",
"args": {
"file_path": "/path/to/file.py",
"old_string": "foo",
"new_string": "bar",
"replace_all": True,
},
"id": "call-4",
}
state = Mock()
runtime = Mock()
description = _format_edit_file_description(tool_call, state, runtime)
assert "File: /path/to/file.py" in description
assert "Action: Replace text (all occurrences)" in description
def test_format_web_search_description():
"""Test web_search description formatting."""
tool_call = {
"name": "web_search",
"args": {
"query": "python async programming",
"max_results": 10,
},
"id": "call-5",
}
state = Mock()
runtime = Mock()
description = _format_web_search_description(tool_call, state, runtime)
assert "Query: python async programming" in description
assert "Max results: 10" in description
assert "⚠️ This will use Tavily API credits" in description
def test_format_web_search_description_default_max_results():
"""Test web_search description with default max_results."""
tool_call = {
"name": "web_search",
"args": {
"query": "langchain tutorial",
},
"id": "call-6",
}
state = Mock()
runtime = Mock()
description = _format_web_search_description(tool_call, state, runtime)
assert "Query: langchain tutorial" in description
assert "Max results: 5" in description
def test_format_fetch_url_description():
"""Test fetch_url description formatting."""
tool_call = {
"name": "fetch_url",
"args": {
"url": "https://example.com/docs",
"timeout": 60,
},
"id": "call-7",
}
state = Mock()
runtime = Mock()
description = _format_fetch_url_description(tool_call, state, runtime)
assert "URL: https://example.com/docs" in description
assert "Timeout: 60s" in description
assert "⚠️ Will fetch and convert web content to markdown" in description
def test_format_fetch_url_description_default_timeout():
"""Test fetch_url description with default timeout."""
tool_call = {
"name": "fetch_url",
"args": {
"url": "https://api.example.com",
},
"id": "call-8",
}
state = Mock()
runtime = Mock()
description = _format_fetch_url_description(tool_call, state, runtime)
assert "URL: https://api.example.com" in description
assert "Timeout: 30s" in description
def test_format_task_description():
"""Test task (subagent) description formatting."""
tool_call = {
"name": "task",
"args": {
"description": "Analyze code structure and identify the main components.",
"subagent_type": "general-purpose",
},
"id": "call-9",
}
state = Mock()
runtime = Mock()
description = _format_task_description(tool_call, state, runtime)
assert "Subagent Type: general-purpose" in description
assert "Task Instructions:" in description
assert "Analyze code structure and identify the main components." in description
assert "⚠️ Subagent will have access to file operations and shell commands" in description
def test_format_task_description_truncates_long_description():
"""Test task description truncates long descriptions."""
long_description = "x" * 600 # 600 characters
tool_call = {
"name": "task",
"args": {
"description": long_description,
"subagent_type": "general-purpose",
},
"id": "call-10",
}
state = Mock()
runtime = Mock()
description = _format_task_description(tool_call, state, runtime)
assert "Subagent Type: general-purpose" in description
assert "..." in description
# Description should be truncated to 500 chars + "..."
assert len(description) < len(long_description) + 300
def test_format_shell_description():
"""Test shell command description formatting."""
tool_call = {
"name": "shell",
"args": {
"command": "ls -la /tmp",
},
"id": "call-11",
}
state = Mock()
runtime = Mock()
description = _format_shell_description(tool_call, state, runtime)
assert "Shell Command: ls -la /tmp" in description
assert "Working Directory:" in description
def test_format_execute_description():
"""Test execute command description formatting."""
tool_call = {
"name": "execute",
"args": {
"command": "python script.py",
},
"id": "call-12",
}
state = Mock()
runtime = Mock()
description = _format_execute_description(tool_call, state, runtime)
assert "Execute Command: python script.py" in description
assert "Location: Remote Sandbox" in description

View File

@@ -0,0 +1,109 @@
"""Tests for config module including project discovery utilities."""
from pathlib import Path
from deepagents_cli.config import _find_project_agent_md, _find_project_root
class TestProjectRootDetection:
"""Test project root detection via .git directory."""
def test_find_project_root_with_git(self, tmp_path: Path) -> None:
"""Test that project root is found when .git directory exists."""
# Create a mock project structure
project_root = tmp_path / "my-project"
project_root.mkdir()
git_dir = project_root / ".git"
git_dir.mkdir()
# Create a subdirectory to search from
subdir = project_root / "src" / "components"
subdir.mkdir(parents=True)
# Should find project root from subdirectory
result = _find_project_root(subdir)
assert result == project_root
def test_find_project_root_no_git(self, tmp_path: Path) -> None:
"""Test that None is returned when no .git directory exists."""
# Create directory without .git
no_git_dir = tmp_path / "no-git"
no_git_dir.mkdir()
result = _find_project_root(no_git_dir)
assert result is None
def test_find_project_root_nested_git(self, tmp_path: Path) -> None:
"""Test that nearest .git directory is found (not parent repos)."""
# Create nested git repos
outer_repo = tmp_path / "outer"
outer_repo.mkdir()
(outer_repo / ".git").mkdir()
inner_repo = outer_repo / "inner"
inner_repo.mkdir()
(inner_repo / ".git").mkdir()
# Should find inner repo, not outer
result = _find_project_root(inner_repo)
assert result == inner_repo
class TestProjectAgentMdFinding:
"""Test finding project-specific agent.md files."""
def test_find_agent_md_in_deepagents_dir(self, tmp_path: Path) -> None:
"""Test finding agent.md in .deepagents/ directory."""
project_root = tmp_path / "project"
project_root.mkdir()
# Create .deepagents/agent.md
deepagents_dir = project_root / ".deepagents"
deepagents_dir.mkdir()
agent_md = deepagents_dir / "agent.md"
agent_md.write_text("Project instructions")
result = _find_project_agent_md(project_root)
assert len(result) == 1
assert result[0] == agent_md
def test_find_agent_md_in_root(self, tmp_path: Path) -> None:
"""Test finding agent.md in project root (fallback)."""
project_root = tmp_path / "project"
project_root.mkdir()
# Create root-level agent.md (no .deepagents/)
agent_md = project_root / "agent.md"
agent_md.write_text("Project instructions")
result = _find_project_agent_md(project_root)
assert len(result) == 1
assert result[0] == agent_md
def test_both_agent_md_files_combined(self, tmp_path: Path) -> None:
"""Test that both agent.md files are returned when both exist."""
project_root = tmp_path / "project"
project_root.mkdir()
# Create both locations
deepagents_dir = project_root / ".deepagents"
deepagents_dir.mkdir()
deepagents_md = deepagents_dir / "agent.md"
deepagents_md.write_text("In .deepagents/")
root_md = project_root / "agent.md"
root_md.write_text("In root")
# Should return both, with .deepagents/ first
result = _find_project_agent_md(project_root)
assert len(result) == 2
assert result[0] == deepagents_md
assert result[1] == root_md
def test_find_agent_md_not_found(self, tmp_path: Path) -> None:
"""Test that empty list is returned when no agent.md exists."""
project_root = tmp_path / "project"
project_root.mkdir()
result = _find_project_agent_md(project_root)
assert result == []

View File

@@ -0,0 +1,335 @@
"""End-to-end unit tests for deepagents-cli with fake LLM models."""
import uuid
from collections.abc import Callable, Generator, Sequence
from contextlib import contextmanager
from pathlib import Path
from typing import Any
from unittest.mock import patch
from deepagents.backends import CompositeBackend
from deepagents.backends.filesystem import FilesystemBackend
from langchain_core.language_models import LanguageModelInput
from langchain_core.language_models.fake_chat_models import GenericFakeChatModel
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.runnables import Runnable
from langchain_core.tools import BaseTool, tool
from deepagents_cli.agent import create_cli_agent
@tool(description="Sample tool")
def sample_tool(sample_input: str) -> str:
"""A sample tool that returns the input string."""
return sample_input
class FixedGenericFakeChatModel(GenericFakeChatModel):
"""Fixed version of GenericFakeChatModel that properly handles bind_tools."""
def bind_tools(
self,
tools: Sequence[dict[str, Any] | type | Callable | BaseTool],
*,
tool_choice: str | None = None,
**kwargs: Any,
) -> Runnable[LanguageModelInput, AIMessage]:
"""Override bind_tools to return self."""
return self
@contextmanager
def mock_settings(tmp_path: Path, assistant_id: str = "test-agent") -> Generator[Path, None, None]:
"""Context manager for patching CLI settings with temporary directories.
Args:
tmp_path: Temporary directory path (typically from pytest's tmp_path fixture)
assistant_id: Agent identifier for directory setup
Yields:
The agent directory path
"""
# Setup directory structure
agent_dir = tmp_path / "agents" / assistant_id
agent_dir.mkdir(parents=True)
agent_md = agent_dir / "agent.md"
agent_md.write_text("# Test Agent\nTest agent instructions.")
skills_dir = tmp_path / "skills"
skills_dir.mkdir(parents=True)
# Patch settings
with patch("deepagents_cli.agent.settings") as mock_settings_obj:
mock_settings_obj.user_deepagents_dir = tmp_path / "agents"
mock_settings_obj.ensure_agent_dir.return_value = agent_dir
mock_settings_obj.ensure_user_skills_dir.return_value = skills_dir
mock_settings_obj.get_project_skills_dir.return_value = None
# Mock methods that get called during agent execution to return real Path objects
# This prevents MagicMock objects from being stored in state (which would fail serialization)
def get_user_agent_md_path(agent_id: str) -> Path:
return tmp_path / "agents" / agent_id / "agent.md"
def get_agent_dir(agent_id: str) -> Path:
return tmp_path / "agents" / agent_id
mock_settings_obj.get_user_agent_md_path = get_user_agent_md_path
mock_settings_obj.get_project_agent_md_path.return_value = None
mock_settings_obj.get_agent_dir = get_agent_dir
mock_settings_obj.project_root = None
yield agent_dir
class TestDeepAgentsCLIEndToEnd:
"""Test suite for end-to-end deepagents-cli functionality with fake LLM."""
def test_cli_agent_with_fake_llm_basic(self, tmp_path: Path) -> None:
"""Test basic CLI agent functionality with a fake LLM model.
This test verifies that a CLI agent can be created and invoked with
a fake LLM model that returns predefined responses.
"""
with mock_settings(tmp_path):
# Create a fake model that returns predefined messages
model = FixedGenericFakeChatModel(
messages=iter(
[
AIMessage(
content="I'll help you with that.",
tool_calls=[
{
"name": "write_todos",
"args": {"todos": []},
"id": "call_1",
"type": "tool_call",
}
],
),
AIMessage(
content="Task completed successfully!",
),
]
)
)
# Create a CLI agent with the fake model
agent, backend = create_cli_agent(
model=model,
assistant_id="test-agent",
tools=[],
)
# Invoke the agent with a simple message
result = agent.invoke(
{"messages": [HumanMessage(content="Hello, agent!")]},
{"configurable": {"thread_id": str(uuid.uuid4())}},
)
# Verify the agent executed correctly
assert "messages" in result
assert len(result["messages"]) > 0
# Verify we got AI responses
ai_messages = [msg for msg in result["messages"] if msg.type == "ai"]
assert len(ai_messages) > 0
# Verify the final AI message contains our expected content
final_ai_message = ai_messages[-1]
assert "Task completed successfully!" in final_ai_message.content
def test_cli_agent_with_fake_llm_with_tools(self, tmp_path: Path) -> None:
"""Test CLI agent with tools using a fake LLM model.
This test verifies that a CLI agent can handle tool calls correctly
when using a fake LLM model.
"""
with mock_settings(tmp_path):
# Create a fake model that calls sample_tool
model = FixedGenericFakeChatModel(
messages=iter(
[
AIMessage(
content="",
tool_calls=[
{
"name": "sample_tool",
"args": {"sample_input": "test input"},
"id": "call_1",
"type": "tool_call",
}
],
),
AIMessage(
content="I called the sample_tool with 'test input'.",
),
]
)
)
# Create a CLI agent with the fake model and sample_tool
agent, backend = create_cli_agent(
model=model,
assistant_id="test-agent",
tools=[sample_tool],
)
# Invoke the agent
result = agent.invoke(
{"messages": [HumanMessage(content="Use the sample tool")]},
{"configurable": {"thread_id": "test-thread-2"}},
)
# Verify the agent executed correctly
assert "messages" in result
# Verify tool was called
tool_messages = [msg for msg in result["messages"] if msg.type == "tool"]
assert len(tool_messages) > 0
# Verify the tool message contains our expected input
assert any("test input" in msg.content for msg in tool_messages)
def test_cli_agent_with_fake_llm_filesystem_tool(self, tmp_path: Path) -> None:
"""Test CLI agent with filesystem tools using a fake LLM model.
This test verifies that a CLI agent can use the built-in filesystem
tools (ls, read_file, etc.) with a fake LLM model.
"""
with mock_settings(tmp_path):
# Create a test file to list
test_file = tmp_path / "test.txt"
test_file.write_text("test content")
# Create a fake model that uses filesystem tools
model = FixedGenericFakeChatModel(
messages=iter(
[
AIMessage(
content="",
tool_calls=[
{
"name": "ls",
"args": {"path": str(tmp_path)},
"id": "call_1",
"type": "tool_call",
}
],
),
AIMessage(
content="I've listed the files in the directory.",
),
]
)
)
# Create a CLI agent with the fake model
agent, backend = create_cli_agent(
model=model,
assistant_id="test-agent",
tools=[],
)
# Invoke the agent
result = agent.invoke(
{"messages": [HumanMessage(content="List files")]},
{"configurable": {"thread_id": "test-thread-3"}},
)
# Verify the agent executed correctly
assert "messages" in result
# Verify ls tool was called
tool_messages = [msg for msg in result["messages"] if msg.type == "tool"]
assert len(tool_messages) > 0
def test_cli_agent_with_fake_llm_multiple_tool_calls(self, tmp_path: Path) -> None:
"""Test CLI agent with multiple tool calls using a fake LLM model.
This test verifies that a CLI agent can handle multiple sequential
tool calls with a fake LLM model.
"""
with mock_settings(tmp_path):
# Create a fake model that makes multiple tool calls
model = FixedGenericFakeChatModel(
messages=iter(
[
AIMessage(
content="",
tool_calls=[
{
"name": "sample_tool",
"args": {"sample_input": "first call"},
"id": "call_1",
"type": "tool_call",
}
],
),
AIMessage(
content="",
tool_calls=[
{
"name": "sample_tool",
"args": {"sample_input": "second call"},
"id": "call_2",
"type": "tool_call",
}
],
),
AIMessage(
content="I completed both tool calls successfully.",
),
]
)
)
# Create a CLI agent with the fake model and sample_tool
agent, backend = create_cli_agent(
model=model,
assistant_id="test-agent",
tools=[sample_tool],
)
# Invoke the agent
result = agent.invoke(
{"messages": [HumanMessage(content="Use sample tool twice")]},
{"configurable": {"thread_id": "test-thread-4"}},
)
# Verify the agent executed correctly
assert "messages" in result
# Verify multiple tool calls occurred
tool_messages = [msg for msg in result["messages"] if msg.type == "tool"]
assert len(tool_messages) >= 2
# Verify both inputs were used
tool_contents = [msg.content for msg in tool_messages]
assert any("first call" in content for content in tool_contents)
assert any("second call" in content for content in tool_contents)
def test_cli_agent_backend_setup(self, tmp_path: Path) -> None:
"""Test that CLI agent creates the correct backend setup.
This test verifies that the backend is properly configured with
a CompositeBackend containing a FilesystemBackend.
"""
with mock_settings(tmp_path):
# Create a simple fake model
model = FixedGenericFakeChatModel(
messages=iter(
[
AIMessage(content="Done."),
]
)
)
# Create a CLI agent
agent, backend = create_cli_agent(
model=model,
assistant_id="test-agent",
tools=[],
)
assert isinstance(backend, CompositeBackend)
assert isinstance(backend.default, FilesystemBackend)

View File

@@ -0,0 +1,120 @@
import textwrap
from pathlib import Path
from langchain_core.messages import ToolMessage
from deepagents_cli.file_ops import FileOpTracker, build_approval_preview
def test_tracker_records_read_lines(tmp_path: Path) -> None:
tracker = FileOpTracker(assistant_id=None)
path = tmp_path / "example.py"
tracker.start_operation(
"read_file",
{"file_path": str(path), "offset": 0, "limit": 100},
"read-1",
)
message = ToolMessage(
content=" 1\tline one\n 2\tline two\n",
tool_call_id="read-1",
name="read_file",
)
record = tracker.complete_with_message(message)
assert record is not None
assert record.metrics.lines_read == 2
assert record.metrics.start_line == 1
assert record.metrics.end_line == 2
def test_tracker_records_write_diff(tmp_path: Path) -> None:
tracker = FileOpTracker(assistant_id=None)
file_path = tmp_path / "created.txt"
tracker.start_operation(
"write_file",
{"file_path": str(file_path)},
"write-1",
)
file_path.write_text("hello world\nsecond line\n")
message = ToolMessage(
content=f"Updated file {file_path}",
tool_call_id="write-1",
name="write_file",
)
record = tracker.complete_with_message(message)
assert record is not None
assert record.metrics.lines_written == 2
assert record.metrics.lines_added == 2
assert record.diff is not None
assert "+hello world" in record.diff
def test_tracker_records_edit_diff(tmp_path: Path) -> None:
tracker = FileOpTracker(assistant_id=None)
file_path = tmp_path / "functions.py"
file_path.write_text(
textwrap.dedent(
"""\
def greet():
return "hello"
"""
)
)
tracker.start_operation(
"edit_file",
{"file_path": str(file_path)},
"edit-1",
)
file_path.write_text(
textwrap.dedent(
"""\
def greet():
return "hi"
def wave():
return "wave"
"""
)
)
message = ToolMessage(
content=f"Successfully replaced 1 instance(s) of the string in '{file_path}'",
tool_call_id="edit-1",
name="edit_file",
)
record = tracker.complete_with_message(message)
assert record is not None
assert record.metrics.lines_added >= 1
assert record.metrics.lines_removed >= 1
assert record.diff is not None
assert '- return "hello"' in record.diff
assert '+ return "hi"' in record.diff
def test_build_approval_preview_generates_diff(tmp_path: Path) -> None:
target = tmp_path / "notes.txt"
target.write_text("alpha\nbeta\n")
preview = build_approval_preview(
"edit_file",
{
"file_path": str(target),
"old_string": "beta",
"new_string": "gamma",
"replace_all": False,
},
assistant_id=None,
)
assert preview is not None
assert preview.diff is not None
assert "+gamma" in preview.diff

View File

@@ -0,0 +1,267 @@
"""Tests for image utilities (clipboard detection, base64 encoding, multimodal content)."""
import base64
import io
from unittest.mock import MagicMock, patch
from PIL import Image
from deepagents_cli.image_utils import (
ImageData,
create_multimodal_content,
encode_image_to_base64,
get_clipboard_image,
)
from deepagents_cli.input import ImageTracker
class TestImageData:
"""Tests for ImageData dataclass."""
def test_to_message_content_png(self) -> None:
"""Test converting PNG image data to LangChain message format."""
image = ImageData(
base64_data="iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==",
format="png",
placeholder="[image 1]",
)
result = image.to_message_content()
assert result["type"] == "image_url"
assert "image_url" in result
assert result["image_url"]["url"].startswith("data:image/png;base64,")
def test_to_message_content_jpeg(self) -> None:
"""Test converting JPEG image data to LangChain message format."""
image = ImageData(
base64_data="abc123",
format="jpeg",
placeholder="[image 2]",
)
result = image.to_message_content()
assert result["type"] == "image_url"
assert result["image_url"]["url"].startswith("data:image/jpeg;base64,")
class TestImageTracker:
"""Tests for ImageTracker class."""
def test_add_image_increments_counter(self) -> None:
"""Test that adding images increments the counter correctly."""
tracker = ImageTracker()
img1 = ImageData(base64_data="abc", format="png", placeholder="")
img2 = ImageData(base64_data="def", format="png", placeholder="")
placeholder1 = tracker.add_image(img1)
placeholder2 = tracker.add_image(img2)
assert placeholder1 == "[image 1]"
assert placeholder2 == "[image 2]"
assert img1.placeholder == "[image 1]"
assert img2.placeholder == "[image 2]"
def test_get_images_returns_copy(self) -> None:
"""Test that get_images returns a copy, not the original list."""
tracker = ImageTracker()
img = ImageData(base64_data="abc", format="png", placeholder="")
tracker.add_image(img)
images = tracker.get_images()
images.clear() # Modify the returned list
# Original should be unchanged
assert len(tracker.get_images()) == 1
def test_clear_resets_counter(self) -> None:
"""Test that clear resets both images and counter."""
tracker = ImageTracker()
img = ImageData(base64_data="abc", format="png", placeholder="")
tracker.add_image(img)
tracker.add_image(img)
assert tracker.next_id == 3
assert len(tracker.images) == 2
tracker.clear()
assert tracker.next_id == 1
assert len(tracker.images) == 0
def test_add_after_clear_starts_at_one(self) -> None:
"""Test that adding after clear starts from [image 1] again."""
tracker = ImageTracker()
img = ImageData(base64_data="abc", format="png", placeholder="")
tracker.add_image(img)
tracker.add_image(img)
tracker.clear()
new_img = ImageData(base64_data="xyz", format="png", placeholder="")
placeholder = tracker.add_image(new_img)
assert placeholder == "[image 1]"
def test_remove_image_and_reset_counter(self) -> None:
"""Test removing an image resets the counter appropriately."""
tracker = ImageTracker()
img1 = ImageData(base64_data="abc", format="png", placeholder="")
img2 = ImageData(base64_data="def", format="png", placeholder="")
tracker.add_image(img1)
tracker.add_image(img2)
# Simulate what happens on backspace delete
tracker.images.pop(1) # Remove image 2
tracker.next_id = len(tracker.images) + 1
assert tracker.next_id == 2
assert len(tracker.images) == 1
class TestEncodeImageToBase64:
"""Tests for base64 encoding."""
def test_encode_image_bytes(self) -> None:
"""Test encoding raw bytes to base64."""
test_bytes = b"test image data"
result = encode_image_to_base64(test_bytes)
# Verify it's valid base64
decoded = base64.b64decode(result)
assert decoded == test_bytes
def test_encode_png_bytes(self) -> None:
"""Test encoding actual PNG bytes."""
# Create a small PNG in memory
img = Image.new("RGB", (10, 10), color="red")
buffer = io.BytesIO()
img.save(buffer, format="PNG")
png_bytes = buffer.getvalue()
result = encode_image_to_base64(png_bytes)
# Should be valid base64
decoded = base64.b64decode(result)
assert decoded == png_bytes
class TestCreateMultimodalContent:
"""Tests for creating multimodal message content."""
def test_text_only(self) -> None:
"""Test creating content with text only (no images)."""
result = create_multimodal_content("Hello world", [])
assert len(result) == 1
assert result[0]["type"] == "text"
assert result[0]["text"] == "Hello world"
def test_text_and_one_image(self) -> None:
"""Test creating content with text and one image."""
img = ImageData(base64_data="abc123", format="png", placeholder="[image 1]")
result = create_multimodal_content("Describe this:", [img])
assert len(result) == 2
assert result[0]["type"] == "text"
assert result[0]["text"] == "Describe this:"
assert result[1]["type"] == "image_url"
def test_text_and_multiple_images(self) -> None:
"""Test creating content with text and multiple images."""
img1 = ImageData(base64_data="abc", format="png", placeholder="[image 1]")
img2 = ImageData(base64_data="def", format="png", placeholder="[image 2]")
result = create_multimodal_content("Compare these:", [img1, img2])
assert len(result) == 3
assert result[0]["type"] == "text"
assert result[1]["type"] == "image_url"
assert result[2]["type"] == "image_url"
def test_empty_text_with_image(self) -> None:
"""Test that empty text is not included in content."""
img = ImageData(base64_data="abc", format="png", placeholder="[image 1]")
result = create_multimodal_content("", [img])
# Should only have the image, no empty text block
assert len(result) == 1
assert result[0]["type"] == "image_url"
def test_whitespace_only_text(self) -> None:
"""Test that whitespace-only text is not included."""
img = ImageData(base64_data="abc", format="png", placeholder="[image 1]")
result = create_multimodal_content(" \n\t ", [img])
assert len(result) == 1
assert result[0]["type"] == "image_url"
class TestGetClipboardImage:
"""Tests for clipboard image detection."""
@patch("deepagents_cli.image_utils.sys.platform", "linux")
def test_unsupported_platform_returns_none(self) -> None:
"""Test that non-macOS platforms return None."""
result = get_clipboard_image()
assert result is None
@patch("deepagents_cli.image_utils.sys.platform", "darwin")
@patch("deepagents_cli.image_utils._get_macos_clipboard_image")
def test_macos_calls_macos_function(self, mock_macos_fn: MagicMock) -> None:
"""Test that macOS platform calls the macOS-specific function."""
mock_macos_fn.return_value = None
get_clipboard_image()
mock_macos_fn.assert_called_once()
@patch("deepagents_cli.image_utils.sys.platform", "darwin")
@patch("deepagents_cli.image_utils.subprocess.run")
def test_pngpaste_success(self, mock_run: MagicMock) -> None:
"""Test successful image retrieval via pngpaste."""
# Create a small valid PNG
img = Image.new("RGB", (10, 10), color="blue")
buffer = io.BytesIO()
img.save(buffer, format="PNG")
png_bytes = buffer.getvalue()
mock_run.return_value = MagicMock(
returncode=0,
stdout=png_bytes,
)
result = get_clipboard_image()
assert result is not None
assert result.format == "png"
assert len(result.base64_data) > 0
@patch("deepagents_cli.image_utils.sys.platform", "darwin")
@patch("deepagents_cli.image_utils.subprocess.run")
def test_pngpaste_not_installed_falls_back(self, mock_run: MagicMock) -> None:
"""Test fallback to osascript when pngpaste is not installed."""
# First call (pngpaste) raises FileNotFoundError
# Second call (osascript clipboard info) returns no image info
mock_run.side_effect = [
FileNotFoundError("pngpaste not found"),
MagicMock(returncode=0, stdout="text data"), # clipboard info - no pngf
]
result = get_clipboard_image()
# Should return None since clipboard has no image
assert result is None
# Should have tried both methods
assert mock_run.call_count == 2
@patch("deepagents_cli.image_utils.sys.platform", "darwin")
@patch("deepagents_cli.image_utils._get_clipboard_via_osascript")
@patch("deepagents_cli.image_utils.subprocess.run")
def test_no_image_in_clipboard(self, mock_run: MagicMock, mock_osascript: MagicMock) -> None:
"""Test behavior when clipboard has no image."""
# pngpaste fails
mock_run.return_value = MagicMock(returncode=1, stdout=b"")
# osascript fallback also returns None
mock_osascript.return_value = None
result = get_clipboard_image()
assert result is None

View File

@@ -0,0 +1,11 @@
"""Test importing files."""
def test_imports() -> None:
"""Test importing deepagents modules."""
from deepagents_cli import (
agent, # noqa: F401
agent_memory, # noqa: F401
integrations, # noqa: F401
)
from deepagents_cli.main import cli_main # noqa: F401

View File

@@ -0,0 +1,24 @@
"""Test version consistency between _version.py and pyproject.toml."""
import tomllib
from pathlib import Path
from deepagents_cli._version import __version__
def test_version_matches_pyproject() -> None:
"""Verify that __version__ in _version.py matches version in pyproject.toml."""
# Get the project root directory
project_root = Path(__file__).parent.parent.parent
pyproject_path = project_root / "pyproject.toml"
# Read the version from pyproject.toml
with pyproject_path.open("rb") as f:
pyproject_data = tomllib.load(f)
pyproject_version = pyproject_data["project"]["version"]
# Compare versions
assert __version__ == pyproject_version, (
f"Version mismatch: _version.py has '{__version__}' "
f"but pyproject.toml has '{pyproject_version}'"
)

View File

@@ -0,0 +1,72 @@
"""Tests for tools module."""
import requests
import responses
from deepagents_cli.tools import fetch_url
@responses.activate
def test_fetch_url_success() -> None:
"""Test successful URL fetch and HTML to markdown conversion."""
responses.add(
responses.GET,
"http://example.com",
body="<html><body><h1>Test</h1><p>Content</p></body></html>",
status=200,
)
result = fetch_url("http://example.com")
assert result["status_code"] == 200
assert "Test" in result["markdown_content"]
assert result["url"].startswith("http://example.com")
assert result["content_length"] > 0
@responses.activate
def test_fetch_url_http_error() -> None:
"""Test handling of HTTP errors."""
responses.add(
responses.GET,
"http://example.com/notfound",
status=404,
)
result = fetch_url("http://example.com/notfound")
assert "error" in result
assert "Fetch URL error" in result["error"]
assert result["url"] == "http://example.com/notfound"
@responses.activate
def test_fetch_url_timeout() -> None:
"""Test handling of request timeout."""
responses.add(
responses.GET,
"http://example.com/slow",
body=requests.exceptions.Timeout(),
)
result = fetch_url("http://example.com/slow", timeout=1)
assert "error" in result
assert "Fetch URL error" in result["error"]
assert result["url"] == "http://example.com/slow"
@responses.activate
def test_fetch_url_connection_error() -> None:
"""Test handling of connection errors."""
responses.add(
responses.GET,
"http://example.com/error",
body=requests.exceptions.ConnectionError(),
)
result = fetch_url("http://example.com/error")
assert "error" in result
assert "Fetch URL error" in result["error"]
assert result["url"] == "http://example.com/error"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,29 @@
PYTHON_FILES=.
MYPY_CACHE=.mypy_cache
lint format: PYTHON_FILES=.
lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/deepagents --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')
lint_package: PYTHON_FILES=.
lint_tests: PYTHON_FILES=tests
lint lint_diff lint_package lint_tests:
[ "$(PYTHON_FILES)" = "" ] || uv run --all-groups ruff format $(PYTHON_FILES) --diff
@if [ "$(LINT)" != "minimal" ]; then \
if [ "$(PYTHON_FILES)" != "" ]; then \
uv run --all-groups ruff check $(PYTHON_FILES); \
fi; \
fi
@if [ "$(LINT)" != "minimal" ]; then \
if [ "$(PYTHON_FILES)" != "" ]; then \
mkdir -p $(MYPY_CACHE) && uv run --all-groups mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE); \
fi; \
fi
format format_diff:
[ "$(PYTHON_FILES)" = "" ] || uv run --all-groups ruff format $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || uv run --all-groups ruff check --fix $(PYTHON_FILES)
test:
uv run pytest tests/unit_tests --cov=deepagents --cov-report=term-missing
integration_test:
uv run pytest tests/integration_tests --cov=deepagents --cov-report=term-missing

View File

@@ -0,0 +1,495 @@
# 🧠🤖Deep Agents
Using an LLM to call tools in a loop is the simplest form of an agent.
This architecture, however, can yield agents that are “shallow” and fail to plan and act over longer, more complex tasks.
Applications like “Deep Research”, "Manus", and “Claude Code” have gotten around this limitation by implementing a combination of four things:
a **planning tool**, **sub agents**, access to a **file system**, and a **detailed prompt**.
<img src="../../deep_agents.png" alt="deep agent" width="600"/>
`deepagents` is a Python package that implements these in a general purpose way so that you can easily create a Deep Agent for your application. For a full overview and quickstart of `deepagents`, the best resource is our [docs](https://docs.langchain.com/oss/python/deepagents/overview).
**Acknowledgements: This project was primarily inspired by Claude Code, and initially was largely an attempt to see what made Claude Code general purpose, and make it even more so.**
## Installation
```bash
# pip
pip install deepagents
# uv
uv add deepagents
# poetry
poetry add deepagents
```
## Usage
(To run the example below, you will need to `pip install tavily-python`).
Make sure to set `TAVILY_API_KEY` in your environment. You can generate one [here](https://www.tavily.com/).
```python
import os
from typing import Literal
from tavily import TavilyClient
from deepagents import create_deep_agent
tavily_client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])
# Web search tool
def internet_search(
query: str,
max_results: int = 5,
topic: Literal["general", "news", "finance"] = "general",
include_raw_content: bool = False,
):
"""Run a web search"""
return tavily_client.search(
query,
max_results=max_results,
include_raw_content=include_raw_content,
topic=topic,
)
# System prompt to steer the agent to be an expert researcher
research_instructions = """You are an expert researcher. Your job is to conduct thorough research, and then write a polished report.
You have access to an internet search tool as your primary means of gathering information.
## `internet_search`
Use this to run an internet search for a given query. You can specify the max number of results to return, the topic, and whether raw content should be included.
"""
# Create the deep agent
agent = create_deep_agent(
tools=[internet_search],
system_prompt=research_instructions,
)
# Invoke the agent
result = agent.invoke({"messages": [{"role": "user", "content": "What is langgraph?"}]})
```
See [examples/research/research_agent.py](examples/research/research_agent.py) for a more complex example.
The agent created with `create_deep_agent` is just a LangGraph graph - so you can interact with it (streaming, human-in-the-loop, memory, studio)
in the same way you would any LangGraph agent.
## Core Capabilities
**Planning & Task Decomposition**
Deep Agents include a built-in `write_todos` tool that enables agents to break down complex tasks into discrete steps, track progress, and adapt plans as new information emerges.
**Context Management**
File system tools (`ls`, `read_file`, `write_file`, `edit_file`, `glob`, `grep`) allow agents to offload large context to memory, preventing context window overflow and enabling work with variable-length tool results.
**Subagent Spawning**
A built-in `task` tool enables agents to spawn specialized subagents for context isolation. This keeps the main agents context clean while still going deep on specific subtasks.
**Long-term Memory**
Extend agents with persistent memory across threads using LangGraphs Store. Agents can save and retrieve information from previous conversations.
## Customizing Deep Agents
There are several parameters you can pass to `create_deep_agent` to create your own custom deep agent.
### `model`
By default, `deepagents` uses `"claude-sonnet-4-5-20250929"`. You can customize this by passing any [LangChain model object](https://python.langchain.com/docs/integrations/chat/).
```python
from langchain.chat_models import init_chat_model
from deepagents import create_deep_agent
model = init_chat_model("openai:gpt-4o")
agent = create_deep_agent(
model=model,
)
```
### `system_prompt`
Deep Agents come with a built-in system prompt. This is relatively detailed prompt that is heavily based on and inspired by [attempts](https://github.com/kn1026/cc/blob/main/claudecode.md) to [replicate](https://github.com/asgeirtj/system_prompts_leaks/blob/main/Anthropic/claude-code.md)
Claude Code's system prompt. It was made more general purpose than Claude Code's system prompt. The default prompt contains detailed instructions for how to use the built-in planning tool, file system tools, and sub agents.
Each deep agent tailored to a use case should include a custom system prompt specific to that use case as well. The importance of prompting for creating a successful deep agent cannot be overstated.
```python
from deepagents import create_deep_agent
research_instructions = """You are an expert researcher. Your job is to conduct thorough research, and then write a polished report.
"""
agent = create_deep_agent(
system_prompt=research_instructions,
)
```
### `tools`
Just like with tool-calling agents, you can provide a deep agent with a set of tools that it has access to.
```python
import os
from typing import Literal
from tavily import TavilyClient
from deepagents import create_deep_agent
tavily_client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])
def internet_search(
query: str,
max_results: int = 5,
topic: Literal["general", "news", "finance"] = "general",
include_raw_content: bool = False,
):
"""Run a web search"""
return tavily_client.search(
query,
max_results=max_results,
include_raw_content=include_raw_content,
topic=topic,
)
agent = create_deep_agent(
tools=[internet_search]
)
```
### `middleware`
`create_deep_agent` is implemented with middleware that can be customized. You can provide additional middleware to extend functionality, add tools, or implement custom hooks.
```python
from langchain_core.tools import tool
from deepagents import create_deep_agent
from langchain.agents.middleware import AgentMiddleware
@tool
def get_weather(city: str) -> str:
"""Get the weather in a city."""
return f"The weather in {city} is sunny."
@tool
def get_temperature(city: str) -> str:
"""Get the temperature in a city."""
return f"The temperature in {city} is 70 degrees Fahrenheit."
class WeatherMiddleware(AgentMiddleware):
tools = [get_weather, get_temperature]
agent = create_deep_agent(
model="anthropic:claude-sonnet-4-20250514",
middleware=[WeatherMiddleware()]
)
```
### `subagents`
A main feature of Deep Agents is their ability to spawn subagents. You can specify custom subagents that your agent can hand off work to in the subagents parameter. Sub agents are useful for context quarantine (to help not pollute the overall context of the main agent) as well as custom instructions.
`subagents` should be a list of dictionaries, where each dictionary follow this schema:
```python
class SubAgent(TypedDict):
name: str
description: str
system_prompt: str
tools: Sequence[BaseTool | Callable | dict[str, Any]]
model: NotRequired[str | BaseChatModel]
middleware: NotRequired[list[AgentMiddleware]]
interrupt_on: NotRequired[dict[str, bool | InterruptOnConfig]]
class CompiledSubAgent(TypedDict):
name: str
description: str
runnable: Runnable
```
**SubAgent fields:**
- **name**: This is the name of the subagent, and how the main agent will call the subagent
- **description**: This is the description of the subagent that is shown to the main agent
- **system_prompt**: This is the system prompt used for the subagent
- **tools**: This is the list of tools that the subagent has access to.
- **model**: Optional model name or model instance.
- **middleware** Additional middleware to attach to the subagent. See [here](https://docs.langchain.com/oss/python/langchain/middleware) for an introduction into middleware and how it works with create_agent.
- **interrupt_on** A custom interrupt config that specifies human-in-the-loop interactions for your tools.
**CompiledSubAgent fields:**
- **name**: This is the name of the subagent, and how the main agent will call the subagent
- **description**: This is the description of the subagent that is shown to the main agent
- **runnable**: A pre-built LangGraph graph/agent that will be used as the subagent
#### Using SubAgent
```python
import os
from typing import Literal
from tavily import TavilyClient
from deepagents import create_deep_agent
tavily_client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])
def internet_search(
query: str,
max_results: int = 5,
topic: Literal["general", "news", "finance"] = "general",
include_raw_content: bool = False,
):
"""Run a web search"""
return tavily_client.search(
query,
max_results=max_results,
include_raw_content=include_raw_content,
topic=topic,
)
research_subagent = {
"name": "research-agent",
"description": "Used to research more in depth questions",
"system_prompt": "You are a great researcher",
"tools": [internet_search],
"model": "openai:gpt-4o", # Optional override, defaults to main agent model
}
subagents = [research_subagent]
agent = create_deep_agent(
model="anthropic:claude-sonnet-4-20250514",
subagents=subagents
)
```
#### Using CustomSubAgent
For more complex use cases, you can provide your own pre-built LangGraph graph as a subagent:
```python
# Create a custom agent graph
custom_graph = create_agent(
model=your_model,
tools=specialized_tools,
prompt="You are a specialized agent for data analysis..."
)
# Use it as a custom subagent
custom_subagent = CompiledSubAgent(
name="data-analyzer",
description="Specialized agent for complex data analysis tasks",
runnable=custom_graph
)
subagents = [custom_subagent]
agent = create_deep_agent(
model="anthropic:claude-sonnet-4-20250514",
tools=[internet_search],
system_prompt=research_instructions,
subagents=subagents
)
```
### `interrupt_on`
A common reality for agents is that some tool operations may be sensitive and require human approval before execution. Deep Agents supports human-in-the-loop workflows through LangGraphs interrupt capabilities. You can configure which tools require approval using a checkpointer.
These tool configs are passed to our prebuilt [HITL middleware](https://docs.langchain.com/oss/python/langchain/middleware#human-in-the-loop) so that the agent pauses execution and waits for feedback from the user before executing configured tools.
```python
from langchain_core.tools import tool
from deepagents import create_deep_agent
@tool
def get_weather(city: str) -> str:
"""Get the weather in a city."""
return f"The weather in {city} is sunny."
agent = create_deep_agent(
model="anthropic:claude-sonnet-4-20250514",
tools=[get_weather],
interrupt_on={
"get_weather": {
"allowed_decisions": ["approve", "edit", "reject"]
},
}
)
```
## Deep Agents Middleware
Deep Agents are built with a modular middleware architecture. As a reminder, Deep Agents have access to:
- A planning tool
- A filesystem for storing context and long-term memories
- The ability to spawn subagents
Each of these features is implemented as separate middleware. When you create a deep agent with `create_deep_agent`, we automatically attach **TodoListMiddleware**, **FilesystemMiddleware** and **SubAgentMiddleware** to your agent.
Middleware is a composable concept, and you can choose to add as many or as few middleware to an agent depending on your use case. That means that you can also use any of the aforementioned middleware independently!
### TodoListMiddleware
Planning is integral to solving complex problems. If youve used claude code recently, youll notice how it writes out a To-Do list before tackling complex, multi-part tasks. Youll also notice how it can adapt and update this To-Do list on the fly as more information comes in.
**TodoListMiddleware** provides your agent with a tool specifically for updating this To-Do list. Before, and while it executes a multi-part task, the agent is prompted to use the write_todos tool to keep track of what its doing, and what still needs to be done.
```python
from langchain.agents import create_agent
from langchain.agents.middleware import TodoListMiddleware
# TodoListMiddleware is included by default in create_deep_agent
# You can customize it if building a custom agent
agent = create_agent(
model="anthropic:claude-sonnet-4-20250514",
# Custom planning instructions can be added via middleware
middleware=[
TodoListMiddleware(
system_prompt="Use the write_todos tool to..." # Optional: Custom addition to the system prompt
),
],
)
```
### FilesystemMiddleware
Context engineering is one of the main challenges in building effective agents. This can be particularly hard when using tools that can return variable length results (ex. web_search, rag), as long ToolResults can quickly fill up your context window.
**FilesystemMiddleware** provides four tools to your agent to interact with both short-term and long-term memory.
- **ls**: List the files in your filesystem
- **read_file**: Read an entire file, or a certain number of lines from a file
- **write_file**: Write a new file to your filesystem
- **edit_file**: Edit an existing file in your filesystem
```python
from langchain.agents import create_agent
from deepagents.middleware.filesystem import FilesystemMiddleware
# FilesystemMiddleware is included by default in create_deep_agent
# You can customize it if building a custom agent
agent = create_agent(
model="anthropic:claude-sonnet-4-20250514",
middleware=[
FilesystemMiddleware(
backend=..., # Optional: customize storage backend
system_prompt="Write to the filesystem when...", # Optional custom system prompt override
custom_tool_descriptions={
"ls": "Use the ls tool when...",
"read_file": "Use the read_file tool to..."
} # Optional: Custom descriptions for filesystem tools
),
],
)
```
### SubAgentMiddleware
Handing off tasks to subagents is a great way to isolate context, keeping the context window of the main (supervisor) agent clean while still going deep on a task. The subagents middleware allows you supply subagents through a task tool.
A subagent is defined with a name, description, system prompt, and tools. You can also provide a subagent with a custom model, or with additional middleware. This can be particularly useful when you want to give the subagent an additional state key to share with the main agent.
```python
from langchain_core.tools import tool
from langchain.agents import create_agent
from deepagents.middleware.subagents import SubAgentMiddleware
@tool
def get_weather(city: str) -> str:
"""Get the weather in a city."""
return f"The weather in {city} is sunny."
agent = create_agent(
model="claude-sonnet-4-20250514",
middleware=[
SubAgentMiddleware(
default_model="claude-sonnet-4-20250514",
default_tools=[],
subagents=[
{
"name": "weather",
"description": "This subagent can get weather in cities.",
"system_prompt": "Use the get_weather tool to get the weather in a city.",
"tools": [get_weather],
"model": "gpt-4.1",
"middleware": [],
}
],
)
],
)
```
For more complex use cases, you can also provide your own pre-built LangGraph graph as a subagent.
```python
# Create a custom LangGraph graph
def create_weather_graph():
workflow = StateGraph(...)
# Build your custom graph
return workflow.compile()
weather_graph = create_weather_graph()
# Wrap it in a CompiledSubAgent
weather_subagent = CompiledSubAgent(
name="weather",
description="This subagent can get weather in cities.",
runnable=weather_graph
)
agent = create_agent(
model="anthropic:claude-sonnet-4-20250514",
middleware=[
SubAgentMiddleware(
default_model="claude-sonnet-4-20250514",
default_tools=[],
subagents=[weather_subagent],
)
],
)
```
## Sync vs Async
Prior versions of deepagents separated sync and async agent factories.
`async_create_deep_agent` has been folded in to `create_deep_agent`.
**You should use `create_deep_agent` as the factory for both sync and async agents**
## MCP
The `deepagents` library can be ran with MCP tools. This can be achieved by using the [Langchain MCP Adapter library](https://github.com/langchain-ai/langchain-mcp-adapters).
**NOTE:** MCP tools are async, so you'll need to use `agent.ainvoke()` or `agent.astream()` for invocation.
(To run the example below, will need to `pip install langchain-mcp-adapters`)
```python
import asyncio
from langchain_mcp_adapters.client import MultiServerMCPClient
from deepagents import create_deep_agent
async def main():
# Collect MCP tools
mcp_client = MultiServerMCPClient(...)
mcp_tools = await mcp_client.get_tools()
# Create agent
agent = create_deep_agent(tools=mcp_tools, ....)
# Stream the agent
async for chunk in agent.astream(
{"messages": [{"role": "user", "content": "what is langgraph?"}]},
stream_mode="values"
):
if "messages" in chunk:
chunk["messages"][-1].pretty_print()
asyncio.run(main())
```

View File

@@ -0,0 +1,7 @@
"""DeepAgents package."""
from deepagents.graph import create_deep_agent
from deepagents.middleware.filesystem import FilesystemMiddleware
from deepagents.middleware.subagents import CompiledSubAgent, SubAgent, SubAgentMiddleware
__all__ = ["CompiledSubAgent", "FilesystemMiddleware", "SubAgent", "SubAgentMiddleware", "create_deep_agent"]

View File

@@ -0,0 +1,15 @@
"""Memory backends for pluggable file storage."""
from deepagents.backends.composite import CompositeBackend
from deepagents.backends.filesystem import FilesystemBackend
from deepagents.backends.protocol import BackendProtocol
from deepagents.backends.state import StateBackend
from deepagents.backends.store import StoreBackend
__all__ = [
"BackendProtocol",
"CompositeBackend",
"FilesystemBackend",
"StateBackend",
"StoreBackend",
]

View File

@@ -0,0 +1,557 @@
"""CompositeBackend: 경로 접두사(prefix)를 기반으로 작업을 다른 백엔드로 라우팅합니다."""
from collections import defaultdict
from deepagents.backends.protocol import (
BackendProtocol,
EditResult,
ExecuteResponse,
FileDownloadResponse,
FileInfo,
FileUploadResponse,
GrepMatch,
SandboxBackendProtocol,
WriteResult,
)
from deepagents.backends.state import StateBackend
class CompositeBackend:
def __init__(
self,
default: BackendProtocol | StateBackend,
routes: dict[str, BackendProtocol],
) -> None:
# Default backend
self.default = default
# Virtual routes
self.routes = routes
# Sort routes by length (longest first) for correct prefix matching
self.sorted_routes = sorted(routes.items(), key=lambda x: len(x[0]), reverse=True)
def _get_backend_and_key(self, key: str) -> tuple[BackendProtocol, str]:
"""어떤 백엔드가 이 키를 처리하는지 결정하고 접두사를 제거합니다.
Args:
key: 원본 파일 경로
Returns:
(backend, stripped_key) 튜플. stripped_key는 라우트 접두사가
제거된 상태입니다 (하지만 선행 슬래시는 유지됨).
"""
# Check routes in order of length (longest first)
for prefix, backend in self.sorted_routes:
if key.startswith(prefix):
# Strip full prefix and ensure a leading slash remains
# e.g., "/memories/notes.txt" → "/notes.txt"; "/memories/" → "/"
suffix = key[len(prefix) :]
stripped_key = f"/{suffix}" if suffix else "/"
return backend, stripped_key
return self.default, key
def ls_info(self, path: str) -> list[FileInfo]:
"""지정된 디렉토리의 파일과 디렉토리를 나열합니다 (비재귀적).
Args:
path: 디렉토리의 절대 경로.
Returns:
디렉토리 바로 아래에 있는 파일 및 디렉토리에 대한 FileInfo 유사 dict 목록 (라우트 접두사 추가됨).
디렉토리는 경로 끝에 /가 붙으며 is_dir=True입니다.
"""
# Check if path matches a specific route
for route_prefix, backend in self.sorted_routes:
if path.startswith(route_prefix.rstrip("/")):
# Query only the matching routed backend
suffix = path[len(route_prefix) :]
search_path = f"/{suffix}" if suffix else "/"
infos = backend.ls_info(search_path)
prefixed: list[FileInfo] = []
for fi in infos:
fi = dict(fi)
fi["path"] = f"{route_prefix[:-1]}{fi['path']}"
prefixed.append(fi)
return prefixed
# At root, aggregate default and all routed backends
if path == "/":
results: list[FileInfo] = []
results.extend(self.default.ls_info(path))
for route_prefix, backend in self.sorted_routes:
# Add the route itself as a directory (e.g., /memories/)
results.append({
"path": route_prefix,
"is_dir": True,
"size": 0,
"modified_at": "",
})
results.sort(key=lambda x: x.get("path", ""))
return results
# Path doesn't match a route: query only default backend
return self.default.ls_info(path)
async def als_info(self, path: str) -> list[FileInfo]:
"""ls_info의 비동기 버전입니다."""
# Check if path matches a specific route
for route_prefix, backend in self.sorted_routes:
if path.startswith(route_prefix.rstrip("/")):
# Query only the matching routed backend
suffix = path[len(route_prefix) :]
search_path = f"/{suffix}" if suffix else "/"
infos = await backend.als_info(search_path)
prefixed: list[FileInfo] = []
for fi in infos:
fi = dict(fi)
fi["path"] = f"{route_prefix[:-1]}{fi['path']}"
prefixed.append(fi)
return prefixed
# At root, aggregate default and all routed backends
if path == "/":
results: list[FileInfo] = []
results.extend(await self.default.als_info(path))
for route_prefix, backend in self.sorted_routes:
# Add the route itself as a directory (e.g., /memories/)
results.append({
"path": route_prefix,
"is_dir": True,
"size": 0,
"modified_at": "",
})
results.sort(key=lambda x: x.get("path", ""))
return results
# Path doesn't match a route: query only default backend
return await self.default.als_info(path)
def read(
self,
file_path: str,
offset: int = 0,
limit: int = 2000,
) -> str:
"""적절한 백엔드로 라우팅하여 파일 내용을 읽습니다.
Args:
file_path: 파일 절대 경로.
offset: 읽기 시작할 라인 오프셋 (0부터 시작).
limit: 읽을 최대 라인 수.
Returns:
라인 번호가 포함된 형식화된 파일 내용, 또는 에러 메시지.
"""
backend, stripped_key = self._get_backend_and_key(file_path)
return backend.read(stripped_key, offset=offset, limit=limit)
async def aread(
self,
file_path: str,
offset: int = 0,
limit: int = 2000,
) -> str:
"""read의 비동기 버전입니다."""
backend, stripped_key = self._get_backend_and_key(file_path)
return await backend.aread(stripped_key, offset=offset, limit=limit)
def grep_raw(
self,
pattern: str,
path: str | None = None,
glob: str | None = None,
) -> list[GrepMatch] | str:
# If path targets a specific route, search only that backend
for route_prefix, backend in self.sorted_routes:
if path is not None and path.startswith(route_prefix.rstrip("/")):
search_path = path[len(route_prefix) - 1 :]
raw = backend.grep_raw(pattern, search_path if search_path else "/", glob)
if isinstance(raw, str):
return raw
return [{**m, "path": f"{route_prefix[:-1]}{m['path']}"} for m in raw]
# Otherwise, search default and all routed backends and merge
all_matches: list[GrepMatch] = []
raw_default = self.default.grep_raw(pattern, path, glob) # type: ignore[attr-defined]
if isinstance(raw_default, str):
# This happens if error occurs
return raw_default
all_matches.extend(raw_default)
for route_prefix, backend in self.routes.items():
raw = backend.grep_raw(pattern, "/", glob)
if isinstance(raw, str):
# This happens if error occurs
return raw
all_matches.extend({**m, "path": f"{route_prefix[:-1]}{m['path']}"} for m in raw)
return all_matches
async def agrep_raw(
self,
pattern: str,
path: str | None = None,
glob: str | None = None,
) -> list[GrepMatch] | str:
"""grep_raw의 비동기 버전입니다."""
# If path targets a specific route, search only that backend
for route_prefix, backend in self.sorted_routes:
if path is not None and path.startswith(route_prefix.rstrip("/")):
search_path = path[len(route_prefix) - 1 :]
raw = await backend.agrep_raw(pattern, search_path if search_path else "/", glob)
if isinstance(raw, str):
return raw
return [{**m, "path": f"{route_prefix[:-1]}{m['path']}"} for m in raw]
# Otherwise, search default and all routed backends and merge
all_matches: list[GrepMatch] = []
raw_default = await self.default.agrep_raw(pattern, path, glob) # type: ignore[attr-defined]
if isinstance(raw_default, str):
# This happens if error occurs
return raw_default
all_matches.extend(raw_default)
for route_prefix, backend in self.routes.items():
raw = await backend.agrep_raw(pattern, "/", glob)
if isinstance(raw, str):
# This happens if error occurs
return raw
all_matches.extend({**m, "path": f"{route_prefix[:-1]}{m['path']}"} for m in raw)
return all_matches
def glob_info(self, pattern: str, path: str = "/") -> list[FileInfo]:
results: list[FileInfo] = []
# Route based on path, not pattern
for route_prefix, backend in self.sorted_routes:
if path.startswith(route_prefix.rstrip("/")):
search_path = path[len(route_prefix) - 1 :]
infos = backend.glob_info(pattern, search_path if search_path else "/")
return [{**fi, "path": f"{route_prefix[:-1]}{fi['path']}"} for fi in infos]
# Path doesn't match any specific route - search default backend AND all routed backends
results.extend(self.default.glob_info(pattern, path))
for route_prefix, backend in self.routes.items():
infos = backend.glob_info(pattern, "/")
results.extend({**fi, "path": f"{route_prefix[:-1]}{fi['path']}"} for fi in infos)
# Deterministic ordering
results.sort(key=lambda x: x.get("path", ""))
return results
async def aglob_info(self, pattern: str, path: str = "/") -> list[FileInfo]:
"""glob_info의 비동기 버전입니다."""
results: list[FileInfo] = []
# Route based on path, not pattern
for route_prefix, backend in self.sorted_routes:
if path.startswith(route_prefix.rstrip("/")):
search_path = path[len(route_prefix) - 1 :]
infos = await backend.aglob_info(pattern, search_path if search_path else "/")
return [{**fi, "path": f"{route_prefix[:-1]}{fi['path']}"} for fi in infos]
# Path doesn't match any specific route - search default backend AND all routed backends
results.extend(await self.default.aglob_info(pattern, path))
for route_prefix, backend in self.routes.items():
infos = await backend.aglob_info(pattern, "/")
results.extend({**fi, "path": f"{route_prefix[:-1]}{fi['path']}"} for fi in infos)
# Deterministic ordering
results.sort(key=lambda x: x.get("path", ""))
return results
def write(
self,
file_path: str,
content: str,
) -> WriteResult:
"""적절한 백엔드로 라우팅하여 새 파일을 생성합니다.
Args:
file_path: 파일 절대 경로.
content: 문자열 형태의 파일 내용.
Returns:
성공 메시지 또는 Command 객체, 또는 파일이 이미 존재하는 경우 에러.
"""
backend, stripped_key = self._get_backend_and_key(file_path)
res = backend.write(stripped_key, content)
# If this is a state-backed update and default has state, merge so listings reflect changes
if res.files_update:
try:
runtime = getattr(self.default, "runtime", None)
if runtime is not None:
state = runtime.state
files = state.get("files", {})
files.update(res.files_update)
state["files"] = files
except Exception:
pass
return res
async def awrite(
self,
file_path: str,
content: str,
) -> WriteResult:
"""write의 비동기 버전입니다."""
backend, stripped_key = self._get_backend_and_key(file_path)
res = await backend.awrite(stripped_key, content)
# If this is a state-backed update and default has state, merge so listings reflect changes
if res.files_update:
try:
runtime = getattr(self.default, "runtime", None)
if runtime is not None:
state = runtime.state
files = state.get("files", {})
files.update(res.files_update)
state["files"] = files
except Exception:
pass
return res
def edit(
self,
file_path: str,
old_string: str,
new_string: str,
replace_all: bool = False,
) -> EditResult:
"""적절한 백엔드로 라우팅하여 파일을 편집합니다.
Args:
file_path: 파일 절대 경로.
old_string: 찾아서 교체할 문자열.
new_string: 교체할 문자열.
replace_all: True인 경우 모든 발생을 교체.
Returns:
성공 메시지 또는 Command 객체, 또는 실패 시 에러 메시지.
"""
backend, stripped_key = self._get_backend_and_key(file_path)
res = backend.edit(stripped_key, old_string, new_string, replace_all=replace_all)
if res.files_update:
try:
runtime = getattr(self.default, "runtime", None)
if runtime is not None:
state = runtime.state
files = state.get("files", {})
files.update(res.files_update)
state["files"] = files
except Exception:
pass
return res
async def aedit(
self,
file_path: str,
old_string: str,
new_string: str,
replace_all: bool = False,
) -> EditResult:
"""edit의 비동기 버전입니다."""
backend, stripped_key = self._get_backend_and_key(file_path)
res = await backend.aedit(stripped_key, old_string, new_string, replace_all=replace_all)
if res.files_update:
try:
runtime = getattr(self.default, "runtime", None)
if runtime is not None:
state = runtime.state
files = state.get("files", {})
files.update(res.files_update)
state["files"] = files
except Exception:
pass
return res
def execute(
self,
command: str,
) -> ExecuteResponse:
"""기본(default) 백엔드를 통해 명령을 실행합니다.
실행은 경로에 특정되지 않으므로, 항상 기본 백엔드로 위임됩니다.
이 기능이 작동하려면 기본 백엔드가 SandboxBackendProtocol을 구현해야 합니다.
Args:
command: 실행할 전체 쉘 명령 문자열.
Returns:
결합된 출력, 종료 코드, 잘림(truncation) 플래그를 포함하는 ExecuteResponse.
Raises:
NotImplementedError: 기본 백엔드가 실행을 지원하지 않는 경우.
"""
if isinstance(self.default, SandboxBackendProtocol):
return self.default.execute(command)
# This shouldn't be reached if the runtime check in the execute tool works correctly,
# but we include it as a safety fallback.
raise NotImplementedError(
"Default backend doesn't support command execution (SandboxBackendProtocol). "
"To enable execution, provide a default backend that implements SandboxBackendProtocol."
)
async def aexecute(
self,
command: str,
) -> ExecuteResponse:
"""execute의 비동기 버전입니다."""
if isinstance(self.default, SandboxBackendProtocol):
return await self.default.aexecute(command)
# This shouldn't be reached if the runtime check in the execute tool works correctly,
# but we include it as a safety fallback.
raise NotImplementedError(
"Default backend doesn't support command execution (SandboxBackendProtocol). "
"To enable execution, provide a default backend that implements SandboxBackendProtocol."
)
def upload_files(self, files: list[tuple[str, bytes]]) -> list[FileUploadResponse]:
"""효율성을 위해 백엔드별로 배치 처리하여 여러 파일을 업로드합니다.
파일을 대상 백엔드별로 그룹화하고, 각 백엔드의 upload_files를
해당 백엔드의 모든 파일과 함께 한 번 호출한 다음, 결과를 원래 순서대로 병합합니다.
Args:
files: 업로드할 (path, content) 튜플의 리스트.
Returns:
FileUploadResponse 객체들의 리스트. 입력 파일마다 하나씩 반환됩니다.
응답 순서는 입력 순서와 일치합니다.
"""
# Pre-allocate result list
results: list[FileUploadResponse | None] = [None] * len(files)
# Group files by backend, tracking original indices
from collections import defaultdict
backend_batches: dict[BackendProtocol, list[tuple[int, str, bytes]]] = defaultdict(list)
for idx, (path, content) in enumerate(files):
backend, stripped_path = self._get_backend_and_key(path)
backend_batches[backend].append((idx, stripped_path, content))
# Process each backend's batch
for backend, batch in backend_batches.items():
# Extract data for backend call
indices, stripped_paths, contents = zip(*batch, strict=False)
batch_files = list(zip(stripped_paths, contents, strict=False))
# Call backend once with all its files
batch_responses = backend.upload_files(batch_files)
# Place responses at original indices with original paths
for i, orig_idx in enumerate(indices):
results[orig_idx] = FileUploadResponse(
path=files[orig_idx][0], # Original path
error=batch_responses[i].error if i < len(batch_responses) else None,
)
return results # type: ignore[return-value]
async def aupload_files(self, files: list[tuple[str, bytes]]) -> list[FileUploadResponse]:
"""upload_files의 비동기 버전입니다."""
# Pre-allocate result list
results: list[FileUploadResponse | None] = [None] * len(files)
# Group files by backend, tracking original indices
backend_batches: dict[BackendProtocol, list[tuple[int, str, bytes]]] = defaultdict(list)
for idx, (path, content) in enumerate(files):
backend, stripped_path = self._get_backend_and_key(path)
backend_batches[backend].append((idx, stripped_path, content))
# Process each backend's batch
for backend, batch in backend_batches.items():
# Extract data for backend call
indices, stripped_paths, contents = zip(*batch, strict=False)
batch_files = list(zip(stripped_paths, contents, strict=False))
# Call backend once with all its files
batch_responses = await backend.aupload_files(batch_files)
# Place responses at original indices with original paths
for i, orig_idx in enumerate(indices):
results[orig_idx] = FileUploadResponse(
path=files[orig_idx][0], # Original path
error=batch_responses[i].error if i < len(batch_responses) else None,
)
return results # type: ignore[return-value]
def download_files(self, paths: list[str]) -> list[FileDownloadResponse]:
"""효율성을 위해 백엔드별로 배치 처리하여 여러 파일을 다운로드합니다.
경로를 대상 백엔드별로 그룹화하고, 각 백엔드의 download_files를
해당 백엔드의 모든 경로와 함께 한 번 호출한 다음, 결과를 원래 순서대로 병합합니다.
Args:
paths: 다운로드할 파일 경로의 리스트.
Returns:
FileDownloadResponse 객체들의 리스트. 입력 경로마다 하나씩 반환됩니다.
응답 순서는 입력 순서와 일치합니다.
"""
# Pre-allocate result list
results: list[FileDownloadResponse | None] = [None] * len(paths)
backend_batches: dict[BackendProtocol, list[tuple[int, str]]] = defaultdict(list)
for idx, path in enumerate(paths):
backend, stripped_path = self._get_backend_and_key(path)
backend_batches[backend].append((idx, stripped_path))
# Process each backend's batch
for backend, batch in backend_batches.items():
# Extract data for backend call
indices, stripped_paths = zip(*batch, strict=False)
# Call backend once with all its paths
batch_responses = backend.download_files(list(stripped_paths))
# Place responses at original indices with original paths
for i, orig_idx in enumerate(indices):
results[orig_idx] = FileDownloadResponse(
path=paths[orig_idx], # Original path
content=batch_responses[i].content if i < len(batch_responses) else None,
error=batch_responses[i].error if i < len(batch_responses) else None,
)
return results # type: ignore[return-value]
async def adownload_files(self, paths: list[str]) -> list[FileDownloadResponse]:
"""download_files의 비동기 버전입니다."""
# Pre-allocate result list
results: list[FileDownloadResponse | None] = [None] * len(paths)
backend_batches: dict[BackendProtocol, list[tuple[int, str]]] = defaultdict(list)
for idx, path in enumerate(paths):
backend, stripped_path = self._get_backend_and_key(path)
backend_batches[backend].append((idx, stripped_path))
# Process each backend's batch
for backend, batch in backend_batches.items():
# Extract data for backend call
indices, stripped_paths = zip(*batch, strict=False)
# Call backend once with all its paths
batch_responses = await backend.adownload_files(list(stripped_paths))
# Place responses at original indices with original paths
for i, orig_idx in enumerate(indices):
results[orig_idx] = FileDownloadResponse(
path=paths[orig_idx], # Original path
content=batch_responses[i].content if i < len(batch_responses) else None,
error=batch_responses[i].error if i < len(batch_responses) else None,
)
return results # type: ignore[return-value]

View File

@@ -0,0 +1,544 @@
"""FilesystemBackend: 파일시스템에서 직접 파일을 읽고 씁니다.
보안 및 검색 업그레이드:
- virtual_mode일 때 루트 포함(root containment)을 통한 보안 경로 확인 (cwd로 샌드박싱됨)
- 가능한 경우 O_NOFOLLOW를 사용하여 파일 I/O 시 심볼릭 링크 따라가기 방지
- JSON 파싱을 포함한 Ripgrep 기반 검색과, 가상 경로 동작을 보존하면서
정규식 및 선택적 glob 포함 필터링을 지원하는 Python 폴백(fallback) 기능
"""
import json
import os
import re
import subprocess
from datetime import datetime
from pathlib import Path
import wcmatch.glob as wcglob
from deepagents.backends.protocol import (
BackendProtocol,
EditResult,
FileDownloadResponse,
FileInfo,
FileUploadResponse,
GrepMatch,
WriteResult,
)
from deepagents.backends.utils import (
check_empty_content,
format_content_with_line_numbers,
perform_string_replacement,
)
class FilesystemBackend(BackendProtocol):
"""파일시스템에서 직접 파일을 읽고 쓰는 백엔드.
파일은 실제 파일시스템 경로를 사용하여 접근합니다. 상대 경로는
현재 작업 디렉토리에 상대적으로 해결(resolve)됩니다. 내용은 일반 텍스트로
읽고 쓰이며, 메타데이터(타임스탬프)는 파일시스템 상태(stat)에서 파생됩니다.
"""
def __init__(
self,
root_dir: str | Path | None = None,
virtual_mode: bool = False,
max_file_size_mb: int = 10,
) -> None:
"""파일시스템 백엔드를 초기화합니다.
Args:
root_dir: 파일 작업을 위한 선택적 루트 디렉토리. 제공된 경우,
모든 파일 경로는 이 디렉토리에 상대적으로 해결됩니다.
제공되지 않은 경우, 현재 작업 디렉토리를 사용합니다.
"""
self.cwd = Path(root_dir).resolve() if root_dir else Path.cwd()
self.virtual_mode = virtual_mode
self.max_file_size_bytes = max_file_size_mb * 1024 * 1024
def _resolve_path(self, key: str) -> Path:
"""보안 검사를 포함하여 파일 경로를 해결(resolve)합니다.
virtual_mode=True일 때, 들어오는 경로를 self.cwd 하위의 가상 절대 경로로 취급하며,
상위 경로 탐색(.., ~)을 허용하지 않고 해결된 경로가 루트 내에 머물도록 보장합니다.
virtual_mode=False일 때, 레거시 동작을 유지합니다: 절대 경로는 그대료 허용되고,
상대 경로는 cwd 하위로 해결됩니다.
Args:
key: 파일 경로 (절대, 상대, 또는 virtual_mode=True일 때 가상 경로)
Returns:
해결된 절대 Path 객체
"""
if self.virtual_mode:
vpath = key if key.startswith("/") else "/" + key
if ".." in vpath or vpath.startswith("~"):
raise ValueError("Path traversal not allowed")
full = (self.cwd / vpath.lstrip("/")).resolve()
try:
full.relative_to(self.cwd)
except ValueError:
raise ValueError(f"Path:{full} outside root directory: {self.cwd}") from None
return full
path = Path(key)
if path.is_absolute():
return path
return (self.cwd / path).resolve()
def ls_info(self, path: str) -> list[FileInfo]:
"""지정된 디렉토리의 파일과 디렉토리를 나열합니다 (비재귀적).
Args:
path: 파일 목록을 가져올 절대 디렉토리 경로.
Returns:
디렉토리 바로 아래에 있는 파일 및 디렉토리에 대한 FileInfo 유사 dict 목록.
디렉토리는 경로 끝에 /가 붙으며 is_dir=True입니다.
"""
dir_path = self._resolve_path(path)
if not dir_path.exists() or not dir_path.is_dir():
return []
results: list[FileInfo] = []
# Convert cwd to string for comparison
cwd_str = str(self.cwd)
if not cwd_str.endswith("/"):
cwd_str += "/"
# List only direct children (non-recursive)
try:
for child_path in dir_path.iterdir():
try:
is_file = child_path.is_file()
is_dir = child_path.is_dir()
except OSError:
continue
abs_path = str(child_path)
if not self.virtual_mode:
# Non-virtual mode: use absolute paths
if is_file:
try:
st = child_path.stat()
results.append({
"path": abs_path,
"is_dir": False,
"size": int(st.st_size),
"modified_at": datetime.fromtimestamp(st.st_mtime).isoformat(),
})
except OSError:
results.append({"path": abs_path, "is_dir": False})
elif is_dir:
try:
st = child_path.stat()
results.append({
"path": abs_path + "/",
"is_dir": True,
"size": 0,
"modified_at": datetime.fromtimestamp(st.st_mtime).isoformat(),
})
except OSError:
results.append({"path": abs_path + "/", "is_dir": True})
else:
# Virtual mode: strip cwd prefix
if abs_path.startswith(cwd_str):
relative_path = abs_path[len(cwd_str) :]
elif abs_path.startswith(str(self.cwd)):
# Handle case where cwd doesn't end with /
relative_path = abs_path[len(str(self.cwd)) :].lstrip("/")
else:
# Path is outside cwd, return as-is or skip
relative_path = abs_path
virt_path = "/" + relative_path
if is_file:
try:
st = child_path.stat()
results.append({
"path": virt_path,
"is_dir": False,
"size": int(st.st_size),
"modified_at": datetime.fromtimestamp(st.st_mtime).isoformat(),
})
except OSError:
results.append({"path": virt_path, "is_dir": False})
elif is_dir:
try:
st = child_path.stat()
results.append({
"path": virt_path + "/",
"is_dir": True,
"size": 0,
"modified_at": datetime.fromtimestamp(st.st_mtime).isoformat(),
})
except OSError:
results.append({"path": virt_path + "/", "is_dir": True})
except (OSError, PermissionError):
pass
# Keep deterministic order by path
results.sort(key=lambda x: x.get("path", ""))
return results
def read(
self,
file_path: str,
offset: int = 0,
limit: int = 2000,
) -> str:
"""파일 내용을 라인 번호와 함께 읽습니다.
Args:
file_path: 절대 또는 상대 파일 경로.
offset: 읽기 시작할 라인 오프셋 (0부터 시작).
limit: 읽을 최대 라인 수.
Returns:
라인 번호가 포함된 형식화된 파일 내용, 또는 에러 메시지.
"""
resolved_path = self._resolve_path(file_path)
if not resolved_path.exists() or not resolved_path.is_file():
return f"Error: File '{file_path}' not found"
try:
# Open with O_NOFOLLOW where available to avoid symlink traversal
fd = os.open(resolved_path, os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0))
with os.fdopen(fd, "r", encoding="utf-8") as f:
content = f.read()
empty_msg = check_empty_content(content)
if empty_msg:
return empty_msg
lines = content.splitlines()
start_idx = offset
end_idx = min(start_idx + limit, len(lines))
if start_idx >= len(lines):
return f"Error: Line offset {offset} exceeds file length ({len(lines)} lines)"
selected_lines = lines[start_idx:end_idx]
return format_content_with_line_numbers(selected_lines, start_line=start_idx + 1)
except (OSError, UnicodeDecodeError) as e:
return f"Error reading file '{file_path}': {e}"
def write(
self,
file_path: str,
content: str,
) -> WriteResult:
"""내용을 포함하는 새 파일을 생성합니다.
WriteResult를 반환합니다. 외부 저장소는 files_update=None을 설정합니다.
"""
resolved_path = self._resolve_path(file_path)
if resolved_path.exists():
return WriteResult(
error=f"Cannot write to {file_path} because it already exists. Read and then make an edit, or write to a new path."
)
try:
# Create parent directories if needed
resolved_path.parent.mkdir(parents=True, exist_ok=True)
# Prefer O_NOFOLLOW to avoid writing through symlinks
flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
if hasattr(os, "O_NOFOLLOW"):
flags |= os.O_NOFOLLOW
fd = os.open(resolved_path, flags, 0o644)
with os.fdopen(fd, "w", encoding="utf-8") as f:
f.write(content)
return WriteResult(path=file_path, files_update=None)
except (OSError, UnicodeEncodeError) as e:
return WriteResult(error=f"Error writing file '{file_path}': {e}")
def edit(
self,
file_path: str,
old_string: str,
new_string: str,
replace_all: bool = False,
) -> EditResult:
"""문자열 발생(occurrences)을 교체하여 파일을 편집합니다.
EditResult를 반환합니다. 외부 저장소는 files_update=None을 설정합니다.
"""
resolved_path = self._resolve_path(file_path)
if not resolved_path.exists() or not resolved_path.is_file():
return EditResult(error=f"Error: File '{file_path}' not found")
try:
# Read securely
fd = os.open(resolved_path, os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0))
with os.fdopen(fd, "r", encoding="utf-8") as f:
content = f.read()
result = perform_string_replacement(content, old_string, new_string, replace_all)
if isinstance(result, str):
return EditResult(error=result)
new_content, occurrences = result
# Write securely
flags = os.O_WRONLY | os.O_TRUNC
if hasattr(os, "O_NOFOLLOW"):
flags |= os.O_NOFOLLOW
fd = os.open(resolved_path, flags)
with os.fdopen(fd, "w", encoding="utf-8") as f:
f.write(new_content)
return EditResult(path=file_path, files_update=None, occurrences=int(occurrences))
except (OSError, UnicodeDecodeError, UnicodeEncodeError) as e:
return EditResult(error=f"Error editing file '{file_path}': {e}")
def grep_raw(
self,
pattern: str,
path: str | None = None,
glob: str | None = None,
) -> list[GrepMatch] | str:
# Validate regex
try:
re.compile(pattern)
except re.error as e:
return f"Invalid regex pattern: {e}"
# Resolve base path
try:
base_full = self._resolve_path(path or ".")
except ValueError:
return []
if not base_full.exists():
return []
# Try ripgrep first
results = self._ripgrep_search(pattern, base_full, glob)
if results is None:
results = self._python_search(pattern, base_full, glob)
matches: list[GrepMatch] = []
for fpath, items in results.items():
for line_num, line_text in items:
matches.append({"path": fpath, "line": int(line_num), "text": line_text})
return matches
def _ripgrep_search(
self, pattern: str, base_full: Path, include_glob: str | None
) -> dict[str, list[tuple[int, str]]] | None:
cmd = ["rg", "--json"]
if include_glob:
cmd.extend(["--glob", include_glob])
cmd.extend(["--", pattern, str(base_full)])
try:
proc = subprocess.run( # noqa: S603
cmd,
capture_output=True,
text=True,
timeout=30,
check=False,
)
except (subprocess.TimeoutExpired, FileNotFoundError):
return None
results: dict[str, list[tuple[int, str]]] = {}
for line in proc.stdout.splitlines():
try:
data = json.loads(line)
except json.JSONDecodeError:
continue
if data.get("type") != "match":
continue
pdata = data.get("data", {})
ftext = pdata.get("path", {}).get("text")
if not ftext:
continue
p = Path(ftext)
if self.virtual_mode:
try:
virt = "/" + str(p.resolve().relative_to(self.cwd))
except Exception:
continue
else:
virt = str(p)
ln = pdata.get("line_number")
lt = pdata.get("lines", {}).get("text", "").rstrip("\n")
if ln is None:
continue
results.setdefault(virt, []).append((int(ln), lt))
return results
def _python_search(
self, pattern: str, base_full: Path, include_glob: str | None
) -> dict[str, list[tuple[int, str]]]:
try:
regex = re.compile(pattern)
except re.error:
return {}
results: dict[str, list[tuple[int, str]]] = {}
root = base_full if base_full.is_dir() else base_full.parent
for fp in root.rglob("*"):
if not fp.is_file():
continue
if include_glob and not wcglob.globmatch(fp.name, include_glob, flags=wcglob.BRACE):
continue
try:
if fp.stat().st_size > self.max_file_size_bytes:
continue
except OSError:
continue
try:
content = fp.read_text()
except (UnicodeDecodeError, PermissionError, OSError):
continue
for line_num, line in enumerate(content.splitlines(), 1):
if regex.search(line):
if self.virtual_mode:
try:
virt_path = "/" + str(fp.resolve().relative_to(self.cwd))
except Exception:
continue
else:
virt_path = str(fp)
results.setdefault(virt_path, []).append((line_num, line))
return results
def glob_info(self, pattern: str, path: str = "/") -> list[FileInfo]:
if pattern.startswith("/"):
pattern = pattern.lstrip("/")
search_path = self.cwd if path == "/" else self._resolve_path(path)
if not search_path.exists() or not search_path.is_dir():
return []
results: list[FileInfo] = []
try:
# Use recursive globbing to match files in subdirectories as tests expect
for matched_path in search_path.rglob(pattern):
try:
is_file = matched_path.is_file()
except OSError:
continue
if not is_file:
continue
abs_path = str(matched_path)
if not self.virtual_mode:
try:
st = matched_path.stat()
results.append({
"path": abs_path,
"is_dir": False,
"size": int(st.st_size),
"modified_at": datetime.fromtimestamp(st.st_mtime).isoformat(),
})
except OSError:
results.append({"path": abs_path, "is_dir": False})
else:
cwd_str = str(self.cwd)
if not cwd_str.endswith("/"):
cwd_str += "/"
if abs_path.startswith(cwd_str):
relative_path = abs_path[len(cwd_str) :]
elif abs_path.startswith(str(self.cwd)):
relative_path = abs_path[len(str(self.cwd)) :].lstrip("/")
else:
relative_path = abs_path
virt = "/" + relative_path
try:
st = matched_path.stat()
results.append({
"path": virt,
"is_dir": False,
"size": int(st.st_size),
"modified_at": datetime.fromtimestamp(st.st_mtime).isoformat(),
})
except OSError:
results.append({"path": virt, "is_dir": False})
except (OSError, ValueError):
pass
results.sort(key=lambda x: x.get("path", ""))
return results
def upload_files(self, files: list[tuple[str, bytes]]) -> list[FileUploadResponse]:
"""파일시스템에 여러 파일을 업로드합니다.
Args:
files: 내용이 bytes인 (path, content) 튜플의 리스트.
Returns:
FileUploadResponse 객체들의 리스트. 입력 파일마다 하나씩 반환됩니다.
응답 순서는 입력 순서와 일치합니다.
"""
responses: list[FileUploadResponse] = []
for path, content in files:
try:
resolved_path = self._resolve_path(path)
# Create parent directories if needed
resolved_path.parent.mkdir(parents=True, exist_ok=True)
flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
if hasattr(os, "O_NOFOLLOW"):
flags |= os.O_NOFOLLOW
fd = os.open(resolved_path, flags, 0o644)
with os.fdopen(fd, "wb") as f:
f.write(content)
responses.append(FileUploadResponse(path=path, error=None))
except FileNotFoundError:
responses.append(FileUploadResponse(path=path, error="file_not_found"))
except PermissionError:
responses.append(FileUploadResponse(path=path, error="permission_denied"))
except (ValueError, OSError) as e:
# ValueError from _resolve_path for path traversal, OSError for other file errors
if isinstance(e, ValueError) or "invalid" in str(e).lower():
responses.append(FileUploadResponse(path=path, error="invalid_path"))
else:
# Generic error fallback
responses.append(FileUploadResponse(path=path, error="invalid_path"))
return responses
def download_files(self, paths: list[str]) -> list[FileDownloadResponse]:
"""파일시스템에서 여러 파일을 다운로드합니다.
Args:
paths: 다운로드할 파일 경로의 리스트.
Returns:
FileDownloadResponse 객체들의 리스트. 입력 경로마다 하나씩 반환됩니다.
"""
responses: list[FileDownloadResponse] = []
for path in paths:
try:
resolved_path = self._resolve_path(path)
# Use flags to optionally prevent symlink following if
# supported by the OS
fd = os.open(resolved_path, os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0))
with os.fdopen(fd, "rb") as f:
content = f.read()
responses.append(FileDownloadResponse(path=path, content=content, error=None))
except FileNotFoundError:
responses.append(FileDownloadResponse(path=path, content=None, error="file_not_found"))
except PermissionError:
responses.append(FileDownloadResponse(path=path, content=None, error="permission_denied"))
except IsADirectoryError:
responses.append(FileDownloadResponse(path=path, content=None, error="is_directory"))
except ValueError:
responses.append(FileDownloadResponse(path=path, content=None, error="invalid_path"))
# Let other errors propagate
return responses

View File

@@ -0,0 +1,453 @@
"""플러그형 메모리 백엔드를 위한 프로토콜 정의.
이 모듈은 모든 백엔드 구현이 따라야 하는 BackendProtocol을 정의합니다.
백엔드는 파일들을 서로 다른 위치(state, filesystem, database 등)에 저장할 수 있으며,
파일 작업에 대해 통일된 인터페이스를 제공합니다.
"""
import abc
import asyncio
from collections.abc import Callable
from dataclasses import dataclass
from typing import Any, Literal, NotRequired, TypeAlias
from langchain.tools import ToolRuntime
from typing_extensions import TypedDict
FileOperationError = Literal[
"file_not_found", # Download: file doesn't exist
"permission_denied", # Both: access denied
"is_directory", # Download: tried to download directory as file
"invalid_path", # Both: path syntax malformed (parent dir missing, invalid chars)
]
"""파일 업로드/다운로드 작업을 위한 표준화된 에러 코드.
이 코드들은 LLM이 이해하고 잠재적으로 수정할 수 있는 일반적인 복구 가능 에러들을 나타냅니다:
- file_not_found: 요청한 파일이 존재하지 않음 (다운로드)
- parent_not_found: 부모 디렉토리가 존재하지 않음 (업로드)
- permission_denied: 작업에 대한 접근이 거부됨
- is_directory: 디렉토리를 파일로 다운로드하려고 시도함
- invalid_path: 경로 구문이 잘못되었거나 유효하지 않은 문자를 포함함
"""
@dataclass
class FileDownloadResponse:
"""단일 파일 다운로드 작업의 결과.
이 응답은 일괄 작업에서 부분적인 성공을 허용하도록 설계되었습니다.
에러는 LLM이 파일 작업을 수행하는 사용 사례에서 복구 가능한 특정 조건들을 위해
FileOperationError 리터럴을 사용하여 표준화되었습니다.
Attributes:
path: 요청된 파일 경로. 일괄 결과를 처리할 때 쉬운 상호참조를 위해 포함됩니다.
에러 메시지에 특히 유용합니다.
content: 성공 시 파일 내용(bytes), 실패 시 None.
error: 실패 시 표준화된 에러 코드, 성공 시 None.
구조화되고 LLM이 조치 가능한 에러 보고를 위해 FileOperationError 리터럴을 사용합니다.
Examples:
>>> # Success
>>> FileDownloadResponse(path="/app/config.json", content=b"{...}", error=None)
>>> # Failure
>>> FileDownloadResponse(path="/wrong/path.txt", content=None, error="file_not_found")
"""
path: str
content: bytes | None = None
error: FileOperationError | None = None
@dataclass
class FileUploadResponse:
"""단일 파일 업로드 작업의 결과.
이 응답은 일괄 작업에서 부분적인 성공을 허용하도록 설계되었습니다.
에러는 LLM이 파일 작업을 수행하는 사용 사례에서 복구 가능한 특정 조건들을 위해
FileOperationError 리터럴을 사용하여 표준화되었습니다.
Attributes:
path: 요청된 파일 경로. 일괄 결과를 처리할 때 쉬운 상호참조와 명확한 에러 메시지를 위해 포함됩니다.
error: 실패 시 표준화된 에러 코드, 성공 시 None.
구조화되고 LLM이 조치 가능한 에러 보고를 위해 FileOperationError 리터럴을 사용합니다.
Examples:
>>> # Success
>>> FileUploadResponse(path="/app/data.txt", error=None)
>>> # Failure
>>> FileUploadResponse(path="/readonly/file.txt", error="permission_denied")
"""
path: str
error: FileOperationError | None = None
class FileInfo(TypedDict):
"""구조화된 파일 목록 정보.
백엔드 전반에서 사용되는 최소한의 계약입니다. "path"만 필수입니다.
다른 필드들은 최선의 노력(best-effort)으로 제공되며 백엔드에 따라 없을 수 있습니다.
"""
path: str
is_dir: NotRequired[bool]
size: NotRequired[int] # bytes (approx)
modified_at: NotRequired[str] # ISO timestamp if known
class GrepMatch(TypedDict):
"""구조화된 grep 일치(match) 항목."""
path: str
line: int
text: str
@dataclass
class WriteResult:
"""백엔드 쓰기(write) 작업의 결과.
Attributes:
error: 실패 시 에러 메시지, 성공 시 None.
path: 작성된 파일의 절대 경로, 실패 시 None.
files_update: 체크포인트 백엔드를 위한 상태 업데이트 dict, 외부 저장소인 경우 None.
체크포인트 백엔드는 이를 LangGraph 상태를 위한 {file_path: file_data}로 채웁니다.
외부 백엔드는 None으로 설정합니다 (이미 디스크/S3/데이터베이스 등에 영구 저장됨).
Examples:
>>> # Checkpoint storage
>>> WriteResult(path="/f.txt", files_update={"/f.txt": {...}})
>>> # External storage
>>> WriteResult(path="/f.txt", files_update=None)
>>> # Error
>>> WriteResult(error="File exists")
"""
error: str | None = None
path: str | None = None
files_update: dict[str, Any] | None = None
@dataclass
class EditResult:
"""백엔드 편집(edit) 작업의 결과.
Attributes:
error: 실패 시 에러 메시지, 성공 시 None.
path: 편집된 파일의 절대 경로, 실패 시 None.
files_update: 체크포인트 백엔드를 위한 상태 업데이트 dict, 외부 저장소인 경우 None.
체크포인트 백엔드는 이를 LangGraph 상태를 위한 {file_path: file_data}로 채웁니다.
외부 백엔드는 None으로 설정합니다 (이미 디스크/S3/데이터베이스 등에 영구 저장됨).
occurrences: 교체된 횟수, 실패 시 None.
Examples:
>>> # Checkpoint storage
>>> EditResult(path="/f.txt", files_update={"/f.txt": {...}}, occurrences=1)
>>> # External storage
>>> EditResult(path="/f.txt", files_update=None, occurrences=2)
>>> # Error
>>> EditResult(error="File not found")
"""
error: str | None = None
path: str | None = None
files_update: dict[str, Any] | None = None
occurrences: int | None = None
class BackendProtocol(abc.ABC):
"""플러그형 메모리 백엔드를 위한 프로토콜 (단일 통일 인터페이스).
백엔드는 파일들을 다양한 위치(state, filesystem, database 등)에 저장할 수 있으며,
파일 작업에 대해 통일된 인터페이스를 제공합니다.
모든 파일 데이터는 다음 구조를 가진 딕셔너리로 표현됩니다:
{
"content": list[str], # 텍스트 내용의 라인 리스트
"created_at": str, # ISO 형식 타임스탬프
"modified_at": str, # ISO 형식 타임스탬프
}
"""
def ls_info(self, path: str) -> list["FileInfo"]:
"""디렉토리 내의 모든 파일과 메타데이터를 나열합니다.
Args:
path: 목록을 조회할 디렉토리의 절대 경로. '/'로 시작해야 합니다.
Returns:
파일 메타데이터를 포함하는 FileInfo 딕셔너리의 리스트:
- `path` (필수): 절대 파일 경로
- `is_dir` (선택): 디렉토리인 경우 True
- `size` (선택): 바이트 단위 파일 크기
- `modified_at` (선택): ISO 8601 타임스탬프
"""
async def als_info(self, path: str) -> list["FileInfo"]:
"""Async version of ls_info."""
return await asyncio.to_thread(self.ls_info, path)
def read(
self,
file_path: str,
offset: int = 0,
limit: int = 2000,
) -> str:
"""파일 내용을 라인 번호와 함께 읽습니다.
Args:
file_path: 읽을 파일의 절대 경로. '/'로 시작해야 합니다.
offset: 읽기 시작할 라인 번호 (0부터 시작). 기본값: 0.
limit: 읽을 최대 라인 수. 기본값: 2000.
Returns:
라인 번호가 포함된 파일 내용 문자열 (cat -n 형식), 1번 라인부터 시작합니다.
2000자를 초과하는 라인은 잘립니다.
파일이 존재하지 않거나 읽을 수 없는 경우 에러 문자열을 반환합니다.
!!! note
- 컨텍스트 오버플로우를 방지하기 위해 대용량 파일에는 페이지네이션(offset/limit)을 사용하세요.
- 첫 스캔: `read(path, limit=100)`으로 파일 구조 확인
- 추가 읽기: `read(path, offset=100, limit=200)`으로 다음 구간 읽기
- ALWAYS read a file before editing it (편집 전 반드시 파일 읽기)
- 파일이 존재하지만 비어있는 경우, 시스템 리마인더 경고를 받게 됩니다.
"""
async def aread(
self,
file_path: str,
offset: int = 0,
limit: int = 2000,
) -> str:
"""Async version of read."""
return await asyncio.to_thread(self.read, file_path, offset, limit)
def grep_raw(
self,
pattern: str,
path: str | None = None,
glob: str | None = None,
) -> list["GrepMatch"] | str:
"""파일들에서 리터럴 텍스트 패턴을 검색합니다.
Args:
pattern: 검색할 리터럴 문자열 (정규식 아님).
파일 내용 내에서 정확한 부분 문자열 매칭을 수행합니다.
예: "TODO""TODO"를 포함하는 모든 라인과 일치합니다.
path: 검색할 디렉토리 경로 (선택).
None인 경우 현재 작업 디렉토리에서 검색합니다.
예: "/workspace/src"
glob: 검색할 파일을 필터링하기 위한 선택적 glob 패턴.
내용이 아닌 파일명/경로로 필터링합니다.
표준 glob 와일드카드를 지원합니다:
- `*`: 파일명의 모든 문자와 일치
- `**`: 모든 디렉토리를 재귀적으로 일치
- `?`: 단일 문자와 일치
- `[abc]`: 세트 내의 한 문자와 일치
Examples:
- "*.py" - Python 파일만 검색
- "**/*.txt" - 모든 .txt 파일을 재귀적으로 검색
- "src/**/*.js" - src/ 하위의 JS 파일 검색
- "test[0-9].txt" - test0.txt, test1.txt 등을 검색
Returns:
성공 시: 다음을 포함하는 구조화된 결과 list[GrepMatch] 반환:
- path: 절대 파일 경로
- line: 라인 번호 (1부터 시작)
- text: 매치를 포함하는 전체 라인 내용
실패 시: 에러 메시지 문자열 (예: 잘못된 경로, 권한 거부)
"""
async def agrep_raw(
self,
pattern: str,
path: str | None = None,
glob: str | None = None,
) -> list["GrepMatch"] | str:
"""Async version of grep_raw."""
return await asyncio.to_thread(self.grep_raw, pattern, path, glob)
def glob_info(self, pattern: str, path: str = "/") -> list["FileInfo"]:
"""glob 패턴과 일치하는 파일을 찾습니다.
Args:
pattern: 파일 경로와 일치시킬 와일드카드가 포함된 Glob 패턴.
표준 glob 문법을 지원합니다:
- `*` 파일명/디렉토리 내의 모든 문자와 일치
- `**` 모든 디렉토리를 재귀적으로 일치
- `?` 단일 문자와 일치
- `[abc]` 세트 내의 한 문자와 일치
path: 검색을 시작할 기본 디렉토리. 기본값: "/" (루트).
패턴은 이 경로에 상대적으로 적용됩니다.
Returns:
list of FileInfo
"""
async def aglob_info(self, pattern: str, path: str = "/") -> list["FileInfo"]:
"""Async version of glob_info."""
return await asyncio.to_thread(self.glob_info, pattern, path)
def write(
self,
file_path: str,
content: str,
) -> WriteResult:
"""파일시스템 내 새 파일에 내용을 씁니다. 파일이 존재하면 에러가 발생합니다.
Args:
file_path: 파일이 생성될 절대 경로.
'/'로 시작해야 합니다.
content: 파일에 쓸 문자열 내용.
Returns:
WriteResult
"""
async def awrite(
self,
file_path: str,
content: str,
) -> WriteResult:
"""Async version of write."""
return await asyncio.to_thread(self.write, file_path, content)
def edit(
self,
file_path: str,
old_string: str,
new_string: str,
replace_all: bool = False,
) -> EditResult:
"""기존 파일에서 정확한 문자열 교체를 수행합니다.
Args:
file_path: 편집할 파일의 절대 경로. '/'로 시작해야 합니다.
old_string: 검색 및 교체할 정확한 문자열.
공백과 들여쓰기를 포함하여 정확히 일치해야 합니다.
new_string: old_string을 대체할 문자열.
old_string과 달라야 합니다.
replace_all: True인 경우 모든 발생을 교체합니다. False(기본값)인 경우
old_string은 파일 내에서 유일해야 하며, 그렇지 않으면 편집이 실패합니다.
Returns:
EditResult
"""
async def aedit(
self,
file_path: str,
old_string: str,
new_string: str,
replace_all: bool = False,
) -> EditResult:
"""Async version of edit."""
return await asyncio.to_thread(self.edit, file_path, old_string, new_string, replace_all)
def upload_files(self, files: list[tuple[str, bytes]]) -> list[FileUploadResponse]:
"""샌드박스에 여러 파일을 업로드합니다.
이 API는 개발자가 직접 사용하거나 커스텀 도구를 통해
LLM에게 노출할 수 있도록 설계되었습니다.
Args:
files: 업로드할 (path, content) 튜플의 리스트.
Returns:
FileUploadResponse 객체들의 리스트. 입력 파일마다 하나씩 반환됩니다.
응답 순서는 입력 순서와 일치합니다 (files[i]에 대해 response[i]).
파일별 성공/실패 여부는 error 필드를 확인하십시오.
Examples:
```python
responses = sandbox.upload_files([
("/app/config.json", b"{...}"),
("/app/data.txt", b"content"),
])
```
"""
async def aupload_files(self, files: list[tuple[str, bytes]]) -> list[FileUploadResponse]:
"""Async version of upload_files."""
return await asyncio.to_thread(self.upload_files, files)
def download_files(self, paths: list[str]) -> list[FileDownloadResponse]:
"""샌드박스에서 여러 파일을 다운로드합니다.
이 API는 개발자가 직접 사용하거나 커스텀 도구를 통해
LLM에게 노출할 수 있도록 설계되었습니다.
Args:
paths: 다운로드할 파일 경로의 리스트.
Returns:
FileDownloadResponse 객체들의 리스트. 입력 경로마다 하나씩 반환됩니다.
응답 순서는 입력 순서와 일치합니다 (paths[i]에 대해 response[i]).
파일별 성공/실패 여부는 error 필드를 확인하십시오.
"""
async def adownload_files(self, paths: list[str]) -> list[FileDownloadResponse]:
"""Async version of download_files."""
return await asyncio.to_thread(self.download_files, paths)
@dataclass
class ExecuteResponse:
"""코드 실행 결과.
LLM 소비에 최적화된 단순화된 스키마입니다.
"""
output: str
"""실행된 명령의 결합된 표준 출력(stdout) 및 표준 에러(stderr)."""
exit_code: int | None = None
"""프로세스 종료 코드. 0은 성공, 0이 아닌 값은 실패를 나타냅니다."""
truncated: bool = False
"""백엔드 제한으로 인해 출력이 잘렸는지 여부."""
class SandboxBackendProtocol(BackendProtocol):
"""격리된 런타임을 가진 샌드박스 백엔드를 위한 프로토콜.
샌드박스 백엔드는 격리된 환경(예: 별도 프로세스, 컨테이너)에서 실행되며
정의된 인터페이스를 통해 통신합니다.
"""
def execute(
self,
command: str,
) -> ExecuteResponse:
"""프로세스에서 명령을 실행합니다.
LLM 소비에 최적화된 단순화된 인터페이스.
Args:
command: 실행할 전체 쉘 명령 문자열.
Returns:
결합된 출력, 종료 코드, 선택적 시그널, 잘림(truncation) 플래그를 포함하는 ExecuteResponse.
"""
async def aexecute(
self,
command: str,
) -> ExecuteResponse:
"""Async version of execute."""
return await asyncio.to_thread(self.execute, command)
@property
def id(self) -> str:
"""Unique identifier for the sandbox backend instance."""
BackendFactory: TypeAlias = Callable[[ToolRuntime], BackendProtocol]
BACKEND_TYPES = BackendProtocol | BackendFactory

View File

@@ -0,0 +1,360 @@
"""execute()만을 추상 메서드로 가지는 기본 샌드박스 구현.
이 모듈은 execute()를 통해 쉘 명령을 실행하여 모든 SandboxBackendProtocol
메서드를 구현하는 기본 클래스를 제공합니다. 구체적인 구현체는
오직 execute() 메서드만 구현하면 됩니다.
"""
from __future__ import annotations
import base64
import json
import shlex
from abc import ABC, abstractmethod
from deepagents.backends.protocol import (
EditResult,
ExecuteResponse,
FileDownloadResponse,
FileInfo,
FileUploadResponse,
GrepMatch,
SandboxBackendProtocol,
WriteResult,
)
_GLOB_COMMAND_TEMPLATE = """python3 -c "
import glob
import os
import json
import base64
# base64 인코딩된 파라미터 디코딩
path = base64.b64decode('{path_b64}').decode('utf-8')
pattern = base64.b64decode('{pattern_b64}').decode('utf-8')
os.chdir(path)
matches = sorted(glob.glob(pattern, recursive=True))
for m in matches:
stat = os.stat(m)
result = {{
'path': m,
'size': stat.st_size,
'mtime': stat.st_mtime,
'is_dir': os.path.isdir(m)
}}
print(json.dumps(result))
" 2>/dev/null"""
_WRITE_COMMAND_TEMPLATE = """python3 -c "
import os
import sys
import base64
file_path = '{file_path}'
# 파일이 이미 존재하는지 확인 (쓰기와 원자적)
if os.path.exists(file_path):
print(f'Error: File \\'{file_path}\\' already exists', file=sys.stderr)
sys.exit(1)
# 필요시 부모 디렉토리 생성
parent_dir = os.path.dirname(file_path) or '.'
os.makedirs(parent_dir, exist_ok=True)
# 내용 디코딩 및 쓰기
content = base64.b64decode('{content_b64}').decode('utf-8')
with open(file_path, 'w') as f:
f.write(content)
" 2>&1"""
_EDIT_COMMAND_TEMPLATE = """python3 -c "
import sys
import base64
# 파일 내용 읽기
with open('{file_path}', 'r') as f:
text = f.read()
# base64 인코딩된 문자열 디코딩
old = base64.b64decode('{old_b64}').decode('utf-8')
new = base64.b64decode('{new_b64}').decode('utf-8')
# 발생 횟수 계산
count = text.count(old)
# 문제가 발견되면 에러 코드와 함께 종료
if count == 0:
sys.exit(1) # 문자열을 찾을 수 없음
elif count > 1 and not {replace_all}:
sys.exit(2) # replace_all 없이 여러 번 발생
# 교체 수행
if {replace_all}:
result = text.replace(old, new)
else:
result = text.replace(old, new, 1)
# 파일에 다시 쓰기
with open('{file_path}', 'w') as f:
f.write(result)
print(count)
" 2>&1"""
_READ_COMMAND_TEMPLATE = """python3 -c "
import os
import sys
file_path = '{file_path}'
offset = {offset}
limit = {limit}
# 파일이 존재하는지 확인
if not os.path.isfile(file_path):
print('Error: File not found')
sys.exit(1)
# 파일이 비어있는지 확인
if os.path.getsize(file_path) == 0:
print('System reminder: File exists but has empty contents')
sys.exit(0)
# offset과 limit으로 파일 읽기
with open(file_path, 'r') as f:
lines = f.readlines()
# offset과 limit 적용
start_idx = offset
end_idx = offset + limit
selected_lines = lines[start_idx:end_idx]
# 라인 번호로 포맷팅 (1부터 시작, offset + 1부터 시작)
for i, line in enumerate(selected_lines):
line_num = offset + i + 1
# 포맷팅을 위해 끝의 개행 문자 제거 후 다시 추가
line_content = line.rstrip('\\n')
print(f'{{line_num:6d}}\\t{{line_content}}')
" 2>&1"""
class BaseSandbox(SandboxBackendProtocol, ABC):
"""execute()를 추상 메서드로 가지는 기본 샌드박스 구현.
이 클래스는 쉘 명령을 사용하여 모든 프로토콜 메서드에 대한 기본 구현을
제공합니다. 하위 클래스는 오직 execute()만 구현하면 됩니다.
"""
@abstractmethod
def execute(
self,
command: str,
) -> ExecuteResponse:
"""샌드박스에서 명령을 실행하고 ExecuteResponse를 반환합니다.
Args:
command: 실행할 전체 쉘 명령 문자열.
Returns:
결합된 출력, 종료 코드, 선택적 시그널, 잘림(truncation) 플래그를 포함하는 ExecuteResponse.
"""
...
def ls_info(self, path: str) -> list[FileInfo]:
"""os.scandir을 사용하여 파일 메타데이터가 포함된 구조화된 목록을 반환합니다."""
cmd = f"""python3 -c "
import os
import json
path = '{path}'
try:
with os.scandir(path) as it:
for entry in it:
result = {{
'path': entry.name,
'is_dir': entry.is_dir(follow_symlinks=False)
}}
print(json.dumps(result))
except FileNotFoundError:
pass
except PermissionError:
pass
" 2>/dev/null"""
result = self.execute(cmd)
file_infos: list[FileInfo] = []
for line in result.output.strip().split("\n"):
if not line:
continue
try:
data = json.loads(line)
file_infos.append({"path": data["path"], "is_dir": data["is_dir"]})
except json.JSONDecodeError:
continue
return file_infos
def read(
self,
file_path: str,
offset: int = 0,
limit: int = 2000,
) -> str:
"""단일 쉘 명령을 사용하여 라인 번호와 함께 파일 내용을 읽습니다."""
# offset과 limit으로 파일을 읽기 위해 템플릿 사용
cmd = _READ_COMMAND_TEMPLATE.format(file_path=file_path, offset=offset, limit=limit)
result = self.execute(cmd)
output = result.output.rstrip()
exit_code = result.exit_code
if exit_code != 0 or "Error: File not found" in output:
return f"Error: File '{file_path}' not found"
return output
def write(
self,
file_path: str,
content: str,
) -> WriteResult:
"""새 파일을 생성합니다. WriteResult를 반환하며, 실패 시 에러가 채워집니다."""
# 이스케이프 문제를 피하기 위해 내용을 base64로 인코딩
content_b64 = base64.b64encode(content.encode("utf-8")).decode("ascii")
# 단일 원자적 확인 + 쓰기 명령
cmd = _WRITE_COMMAND_TEMPLATE.format(file_path=file_path, content_b64=content_b64)
result = self.execute(cmd)
# 에러 확인 (종료 코드 또는 출력 내 에러 메시지)
if result.exit_code != 0 or "Error:" in result.output:
error_msg = result.output.strip() or f"Failed to write file '{file_path}'"
return WriteResult(error=error_msg)
# 외부 저장소 - files_update 필요 없음
return WriteResult(path=file_path, files_update=None)
def edit(
self,
file_path: str,
old_string: str,
new_string: str,
replace_all: bool = False,
) -> EditResult:
"""문자열 발생(occurrences)을 교체하여 파일을 편집합니다. EditResult를 반환합니다."""
# 이스케이프 문제를 피하기 위해 문자열을 base64로 인코딩
old_b64 = base64.b64encode(old_string.encode("utf-8")).decode("ascii")
new_b64 = base64.b64encode(new_string.encode("utf-8")).decode("ascii")
# 문자열 교체를 위해 템플릿 사용
cmd = _EDIT_COMMAND_TEMPLATE.format(
file_path=file_path, old_b64=old_b64, new_b64=new_b64, replace_all=replace_all
)
result = self.execute(cmd)
exit_code = result.exit_code
output = result.output.strip()
if exit_code == 1:
return EditResult(error=f"Error: String not found in file: '{old_string}'")
if exit_code == 2:
return EditResult(
error=f"Error: String '{old_string}' appears multiple times. Use replace_all=True to replace all occurrences."
)
if exit_code != 0:
return EditResult(error=f"Error: File '{file_path}' not found")
count = int(output)
# 외부 저장소 - files_update 필요 없음
return EditResult(path=file_path, files_update=None, occurrences=count)
def grep_raw(
self,
pattern: str,
path: str | None = None,
glob: str | None = None,
) -> list[GrepMatch] | str:
"""구조화된 검색 결과 또는 잘못된 입력에 대한 에러 문자열을 반환합니다."""
search_path = shlex.quote(path or ".")
# 구조화된 출력을 얻기 위해 grep 명령 생성
grep_opts = "-rHnF" # 재귀적, 파일명 포함, 라인 번호 포함, 고정 문자열 (리터럴)
# 지정된 경우 glob 패턴 추가
glob_pattern = ""
if glob:
glob_pattern = f"--include='{glob}'"
# 쉘을 위해 패턴 이스케이프
pattern_escaped = shlex.quote(pattern)
cmd = f"grep {grep_opts} {glob_pattern} -e {pattern_escaped} {search_path} 2>/dev/null || true"
result = self.execute(cmd)
output = result.output.rstrip()
if not output:
return []
# grep 출력을 GrepMatch 객체로 파싱
matches: list[GrepMatch] = []
for line in output.split("\n"):
# 형식: 경로:라인번호:텍스트
parts = line.split(":", 2)
if len(parts) >= 3:
matches.append({
"path": parts[0],
"line": int(parts[1]),
"text": parts[2],
})
return matches
def glob_info(self, pattern: str, path: str = "/") -> list[FileInfo]:
"""FileInfo dict를 반환하는 구조화된 glob 매칭입니다."""
# 이스케이프 문제를 피하기 위해 패턴과 경로를 base64로 인코딩
pattern_b64 = base64.b64encode(pattern.encode("utf-8")).decode("ascii")
path_b64 = base64.b64encode(path.encode("utf-8")).decode("ascii")
cmd = _GLOB_COMMAND_TEMPLATE.format(path_b64=path_b64, pattern_b64=pattern_b64)
result = self.execute(cmd)
output = result.output.strip()
if not output:
return []
# JSON 출력을 FileInfo dict로 파싱
file_infos: list[FileInfo] = []
for line in output.split("\n"):
try:
data = json.loads(line)
file_infos.append({
"path": data["path"],
"is_dir": data["is_dir"],
})
except json.JSONDecodeError:
continue
return file_infos
@property
@abstractmethod
def id(self) -> str:
"""샌드박스 백엔드의 고유 식별자입니다."""
@abstractmethod
def upload_files(self, files: list[tuple[str, bytes]]) -> list[FileUploadResponse]:
"""샌드박스에 여러 파일을 업로드합니다.
구현체는 부분적 성공을 지원해야 합니다 - 파일별로 예외를 catch하고
예외를 발생시키는 대신 FileUploadResponse 객체에 에러를 반환해야 합니다.
"""
@abstractmethod
def download_files(self, paths: list[str]) -> list[FileDownloadResponse]:
"""샌드박스에서 여러 파일을 다운로드합니다.
구현체는 부분적 성공을 지원해야 합니다 - 파일별로 예외를 catch하고
예외를 발생시키는 대신 FileDownloadResponse 객체에 에러를 반환해야 합니다.
"""

View File

@@ -0,0 +1,181 @@
"""StateBackend: 파일을 LangGraph 에이전트 상태(임시)에 저장되도록 합니다."""
from typing import TYPE_CHECKING
from deepagents.backends.protocol import BackendProtocol, EditResult, FileInfo, GrepMatch, WriteResult
from deepagents.backends.utils import (
_glob_search_files,
create_file_data,
file_data_to_string,
format_read_response,
grep_matches_from_files,
perform_string_replacement,
update_file_data,
)
if TYPE_CHECKING:
from langchain.tools import ToolRuntime
class StateBackend(BackendProtocol):
"""에이전트 상태(임시)에 파일을 저장하는 백엔드.
LangGraph의 상태 관리 및 체크포인팅을 사용합니다. 파일은 하나의 대화 스레드 내에서만 지속되며
스레드 간에는 공유되지 않습니다. 상태는 각 에이전트 단계 후에 자동으로 체크포인트됩니다.
특수 처리: LangGraph 상태는 (직접 변경이 아닌) Command 객체를 통해 업데이트되어야 하므로,
작업은 None 대신 Command 객체를 반환할 수 있습니다. 이는 uses_state=True 플래그로 표시됩니다.
"""
def __init__(self, runtime: "ToolRuntime"):
"""런타임으로 StateBackend를 초기화합니다."""
self.runtime = runtime
def ls_info(self, path: str) -> list[FileInfo]:
"""지정된 디렉토리의 파일과 디렉토리를 나열합니다 (비재귀적).
Args:
path: 디렉토리의 절대 경로.
Returns:
디렉토리 바로 아래에 있는 파일 및 디렉토리에 대한 FileInfo 유사 dict 목록.
디렉토리는 경로 끝에 /가 붙으며 is_dir=True입니다.
"""
files = self.runtime.state.get("files", {})
infos: list[FileInfo] = []
subdirs: set[str] = set()
# Normalize path to have trailing slash for proper prefix matching
normalized_path = path if path.endswith("/") else path + "/"
for k, fd in files.items():
# Check if file is in the specified directory or a subdirectory
if not k.startswith(normalized_path):
continue
# Get the relative path after the directory
relative = k[len(normalized_path) :]
# If relative path contains '/', it's in a subdirectory
if "/" in relative:
# Extract the immediate subdirectory name
subdir_name = relative.split("/")[0]
subdirs.add(normalized_path + subdir_name + "/")
continue
# This is a file directly in the current directory
size = len("\n".join(fd.get("content", [])))
infos.append({
"path": k,
"is_dir": False,
"size": int(size),
"modified_at": fd.get("modified_at", ""),
})
# Add directories to the results
for subdir in sorted(subdirs):
infos.append({
"path": subdir,
"is_dir": True,
"size": 0,
"modified_at": "",
})
infos.sort(key=lambda x: x.get("path", ""))
return infos
def read(
self,
file_path: str,
offset: int = 0,
limit: int = 2000,
) -> str:
"""파일 내용을 라인 번호와 함께 읽습니다.
Args:
file_path: 파일 절대 경로.
offset: 읽기 시작할 라인 오프셋 (0부터 시작).
limit: 읽을 최대 라인 수.
Returns:
라인 번호가 포함된 형식화된 파일 내용, 또는 에러 메시지.
"""
files = self.runtime.state.get("files", {})
file_data = files.get(file_path)
if file_data is None:
return f"Error: File '{file_path}' not found"
return format_read_response(file_data, offset, limit)
def write(
self,
file_path: str,
content: str,
) -> WriteResult:
"""내용을 포함하는 새 파일을 생성합니다.
LangGraph 상태 업데이트를 위한 files_update가 포함된 WriteResult를 반환합니다.
"""
files = self.runtime.state.get("files", {})
if file_path in files:
return WriteResult(
error=f"Cannot write to {file_path} because it already exists. Read and then make an edit, or write to a new path."
)
new_file_data = create_file_data(content)
return WriteResult(path=file_path, files_update={file_path: new_file_data})
def edit(
self,
file_path: str,
old_string: str,
new_string: str,
replace_all: bool = False,
) -> EditResult:
"""문자열 발생(occurrences)을 교체하여 파일을 편집합니다.
files_update와 occurrences가 포함된 EditResult를 반환합니다.
"""
files = self.runtime.state.get("files", {})
file_data = files.get(file_path)
if file_data is None:
return EditResult(error=f"Error: File '{file_path}' not found")
content = file_data_to_string(file_data)
result = perform_string_replacement(content, old_string, new_string, replace_all)
if isinstance(result, str):
return EditResult(error=result)
new_content, occurrences = result
new_file_data = update_file_data(file_data, new_content)
return EditResult(path=file_path, files_update={file_path: new_file_data}, occurrences=int(occurrences))
def grep_raw(
self,
pattern: str,
path: str = "/",
glob: str | None = None,
) -> list[GrepMatch] | str:
files = self.runtime.state.get("files", {})
return grep_matches_from_files(files, pattern, path, glob)
def glob_info(self, pattern: str, path: str = "/") -> list[FileInfo]:
"""glob 패턴과 일치하는 파일에 대한 FileInfo를 가져옵니다."""
files = self.runtime.state.get("files", {})
result = _glob_search_files(files, pattern, path)
if result == "No files found":
return []
paths = result.split("\n")
infos: list[FileInfo] = []
for p in paths:
fd = files.get(p)
size = len("\n".join(fd.get("content", []))) if fd else 0
infos.append({
"path": p,
"is_dir": False,
"size": int(size),
"modified_at": fd.get("modified_at", "") if fd else "",
})
return infos

View File

@@ -0,0 +1,438 @@
"""StoreBackend: LangGraph의 BaseStore(영구적, 스레드 간 공유)를 위한 어댑터."""
from typing import Any
from langgraph.config import get_config
from langgraph.store.base import BaseStore, Item
from deepagents.backends.protocol import (
BackendProtocol,
EditResult,
FileDownloadResponse,
FileInfo,
FileUploadResponse,
GrepMatch,
WriteResult,
)
from deepagents.backends.utils import (
_glob_search_files,
create_file_data,
file_data_to_string,
format_read_response,
grep_matches_from_files,
perform_string_replacement,
update_file_data,
)
class StoreBackend(BackendProtocol):
"""파일을 LangGraph의 BaseStore(영구적)에 저장하는 백엔드.
LangGraph의 Store를 사용하여 영구적이고 대화 간 공유되는 저장소를 사용합니다.
파일은 네임스페이스를 통해 조직화되며 모든 스레드에서 지속됩니다.
네임스페이스는 다중 에이전트 격리를 위해 선택적 assistant_id를 포함할 수 있습니다.
"""
def __init__(self, runtime: "ToolRuntime"):
"""런타임으로 StoreBackend를 초기화합니다.
Args:
runtime: 저장소 접근 및 구성을 제공하는 ToolRuntime 인스턴스.
"""
self.runtime = runtime
def _get_store(self) -> BaseStore:
"""저장소(store) 인스턴스를 가져옵니다.
Returns:
런타임의 BaseStore 인스턴스.
Raises:
ValueError: 런타임에서 저장소를 사용할 수 없는 경우.
"""
store = self.runtime.store
if store is None:
msg = "Store is required but not available in runtime"
raise ValueError(msg)
return store
def _get_namespace(self) -> tuple[str, ...]:
"""저장소 작업을 위한 네임스페이스를 가져옵니다.
우선순위:
1) 존재하는 경우 `self.runtime.config` 사용 (테스트에서 명시적으로 전달).
2) 가능한 경우 `langgraph.config.get_config()`로 폴백(fallback).
3) ("filesystem",)으로 기본 설정.
config 메타데이터에 assistant_id가 있는 경우,
에이전트별 격리를 제공하기 위해 (assistant_id, "filesystem")을 반환합니다.
"""
namespace = "filesystem"
# Prefer the runtime-provided config when present
runtime_cfg = getattr(self.runtime, "config", None)
if isinstance(runtime_cfg, dict):
assistant_id = runtime_cfg.get("metadata", {}).get("assistant_id")
if assistant_id:
return (assistant_id, namespace)
return (namespace,)
# Fallback to langgraph's context, but guard against errors when
# called outside of a runnable context
try:
cfg = get_config()
except Exception:
return (namespace,)
try:
assistant_id = cfg.get("metadata", {}).get("assistant_id") # type: ignore[assignment]
except Exception:
assistant_id = None
if assistant_id:
return (assistant_id, namespace)
return (namespace,)
def _convert_store_item_to_file_data(self, store_item: Item) -> dict[str, Any]:
"""저장소 Item을 FileData 형식으로 변환합니다.
Args:
store_item: 파일 데이터를 포함하는 저장소 Item.
Returns:
content, created_at, modified_at 필드를 포함하는 FileData dict.
Raises:
ValueError: 필수 필드가 누락되었거나 올바르지 않은 타입인 경우.
"""
if "content" not in store_item.value or not isinstance(store_item.value["content"], list):
msg = f"Store item does not contain valid content field. Got: {store_item.value.keys()}"
raise ValueError(msg)
if "created_at" not in store_item.value or not isinstance(store_item.value["created_at"], str):
msg = f"Store item does not contain valid created_at field. Got: {store_item.value.keys()}"
raise ValueError(msg)
if "modified_at" not in store_item.value or not isinstance(store_item.value["modified_at"], str):
msg = f"Store item does not contain valid modified_at field. Got: {store_item.value.keys()}"
raise ValueError(msg)
return {
"content": store_item.value["content"],
"created_at": store_item.value["created_at"],
"modified_at": store_item.value["modified_at"],
}
def _convert_file_data_to_store_value(self, file_data: dict[str, Any]) -> dict[str, Any]:
"""FileData를 store.put()에 적합한 dict로 변환합니다.
Args:
file_data: 변환할 FileData.
Returns:
content, created_at, modified_at 필드를 포함하는 딕셔너리.
"""
return {
"content": file_data["content"],
"created_at": file_data["created_at"],
"modified_at": file_data["modified_at"],
}
def _search_store_paginated(
self,
store: BaseStore,
namespace: tuple[str, ...],
*,
query: str | None = None,
filter: dict[str, Any] | None = None,
page_size: int = 100,
) -> list[Item]:
"""자동 페이지네이션으로 저장소를 검색하여 모든 결과를 가져옵니다.
Args:
store: 검색할 저장소.
namespace: 검색할 계층적 경로 접두사(prefix).
query: 자연어 검색을 위한 선택적 쿼리.
filter: 결과 필터링을 위한 키-값 쌍.
page_size: 페이지당 가져올 아이템 수 (기본값: 100).
Returns:
검색 조건과 일치하는 모든 아이템 목록.
Example:
```python
store = _get_store(runtime)
namespace = _get_namespace()
all_items = _search_store_paginated(store, namespace)
```
"""
all_items: list[Item] = []
offset = 0
while True:
page_items = store.search(
namespace,
query=query,
filter=filter,
limit=page_size,
offset=offset,
)
if not page_items:
break
all_items.extend(page_items)
if len(page_items) < page_size:
break
offset += page_size
return all_items
def ls_info(self, path: str) -> list[FileInfo]:
"""지정된 디렉토리의 파일과 디렉토리를 나열합니다 (비재귀적).
Args:
path: 디렉토리의 절대 경로.
Returns:
디렉토리 바로 아래에 있는 파일 및 디렉토리에 대한 FileInfo 유사 dict 목록.
디렉토리는 경로 끝에 /가 붙으며 is_dir=True입니다.
"""
store = self._get_store()
namespace = self._get_namespace()
# Retrieve all items and filter by path prefix locally to avoid
# coupling to store-specific filter semantics
items = self._search_store_paginated(store, namespace)
infos: list[FileInfo] = []
subdirs: set[str] = set()
# Normalize path to have trailing slash for proper prefix matching
normalized_path = path if path.endswith("/") else path + "/"
for item in items:
# Check if file is in the specified directory or a subdirectory
if not str(item.key).startswith(normalized_path):
continue
# Get the relative path after the directory
relative = str(item.key)[len(normalized_path) :]
# If relative path contains '/', it's in a subdirectory
if "/" in relative:
# Extract the immediate subdirectory name
subdir_name = relative.split("/")[0]
subdirs.add(normalized_path + subdir_name + "/")
continue
# This is a file directly in the current directory
try:
fd = self._convert_store_item_to_file_data(item)
except ValueError:
continue
size = len("\n".join(fd.get("content", [])))
infos.append({
"path": item.key,
"is_dir": False,
"size": int(size),
"modified_at": fd.get("modified_at", ""),
})
# Add directories to the results
for subdir in sorted(subdirs):
infos.append({
"path": subdir,
"is_dir": True,
"size": 0,
"modified_at": "",
})
infos.sort(key=lambda x: x.get("path", ""))
return infos
def read(
self,
file_path: str,
offset: int = 0,
limit: int = 2000,
) -> str:
"""파일 내용을 라인 번호와 함께 읽습니다.
Args:
file_path: 파일 절대 경로.
offset: 읽기 시작할 라인 오프셋 (0부터 시작).
limit: 읽을 최대 라인 수.
Returns:
라인 번호가 포함된 형식화된 파일 내용, 또는 에러 메시지.
"""
store = self._get_store()
namespace = self._get_namespace()
item: Item | None = store.get(namespace, file_path)
if item is None:
return f"Error: File '{file_path}' not found"
try:
file_data = self._convert_store_item_to_file_data(item)
except ValueError as e:
return f"Error: {e}"
return format_read_response(file_data, offset, limit)
def write(
self,
file_path: str,
content: str,
) -> WriteResult:
"""내용을 포함하는 새 파일을 생성합니다.
WriteResult를 반환합니다. 외부 저장소는 files_update=None을 설정합니다.
"""
store = self._get_store()
namespace = self._get_namespace()
# Check if file exists
existing = store.get(namespace, file_path)
if existing is not None:
return WriteResult(
error=f"Cannot write to {file_path} because it already exists. Read and then make an edit, or write to a new path."
)
# Create new file
file_data = create_file_data(content)
store_value = self._convert_file_data_to_store_value(file_data)
store.put(namespace, file_path, store_value)
return WriteResult(path=file_path, files_update=None)
def edit(
self,
file_path: str,
old_string: str,
new_string: str,
replace_all: bool = False,
) -> EditResult:
"""문자열 발생(occurrences)을 교체하여 파일을 편집합니다.
EditResult를 반환합니다. 외부 저장소는 files_update=None을 설정합니다.
"""
store = self._get_store()
namespace = self._get_namespace()
# Get existing file
item = store.get(namespace, file_path)
if item is None:
return EditResult(error=f"Error: File '{file_path}' not found")
try:
file_data = self._convert_store_item_to_file_data(item)
except ValueError as e:
return EditResult(error=f"Error: {e}")
content = file_data_to_string(file_data)
result = perform_string_replacement(content, old_string, new_string, replace_all)
if isinstance(result, str):
return EditResult(error=result)
new_content, occurrences = result
new_file_data = update_file_data(file_data, new_content)
# Update file in store
store_value = self._convert_file_data_to_store_value(new_file_data)
store.put(namespace, file_path, store_value)
return EditResult(path=file_path, files_update=None, occurrences=int(occurrences))
# Removed legacy grep() convenience to keep lean surface
def grep_raw(
self,
pattern: str,
path: str = "/",
glob: str | None = None,
) -> list[GrepMatch] | str:
store = self._get_store()
namespace = self._get_namespace()
items = self._search_store_paginated(store, namespace)
files: dict[str, Any] = {}
for item in items:
try:
files[item.key] = self._convert_store_item_to_file_data(item)
except ValueError:
continue
return grep_matches_from_files(files, pattern, path, glob)
def glob_info(self, pattern: str, path: str = "/") -> list[FileInfo]:
store = self._get_store()
namespace = self._get_namespace()
items = self._search_store_paginated(store, namespace)
files: dict[str, Any] = {}
for item in items:
try:
files[item.key] = self._convert_store_item_to_file_data(item)
except ValueError:
continue
result = _glob_search_files(files, pattern, path)
if result == "No files found":
return []
paths = result.split("\n")
infos: list[FileInfo] = []
for p in paths:
fd = files.get(p)
size = len("\n".join(fd.get("content", []))) if fd else 0
infos.append({
"path": p,
"is_dir": False,
"size": int(size),
"modified_at": fd.get("modified_at", "") if fd else "",
})
return infos
def upload_files(self, files: list[tuple[str, bytes]]) -> list[FileUploadResponse]:
"""저장소에 여러 파일을 업로드합니다.
Args:
files: 내용이 bytes인 (path, content) 튜플의 리스트.
Returns:
FileUploadResponse 객체들의 리스트. 입력 파일마다 하나씩 반환됩니다.
응답 순서는 입력 순서와 일치합니다.
"""
store = self._get_store()
namespace = self._get_namespace()
responses: list[FileUploadResponse] = []
for path, content in files:
content_str = content.decode("utf-8")
# Create file data
file_data = create_file_data(content_str)
store_value = self._convert_file_data_to_store_value(file_data)
# Store the file
store.put(namespace, path, store_value)
responses.append(FileUploadResponse(path=path, error=None))
return responses
def download_files(self, paths: list[str]) -> list[FileDownloadResponse]:
"""저장소에서 여러 파일을 다운로드합니다.
Args:
paths: 다운로드할 파일 경로의 리스트.
Returns:
FileDownloadResponse 객체들의 리스트. 입력 경로마다 하나씩 반환됩니다.
응답 순서는 입력 순서와 일치합니다.
"""
store = self._get_store()
namespace = self._get_namespace()
responses: list[FileDownloadResponse] = []
for path in paths:
item = store.get(namespace, path)
if item is None:
responses.append(FileDownloadResponse(path=path, content=None, error="file_not_found"))
continue
file_data = self._convert_store_item_to_file_data(item)
# Convert file data to bytes
content_str = file_data_to_string(file_data)
content_bytes = content_str.encode("utf-8")
responses.append(FileDownloadResponse(path=path, content=content_bytes, error=None))
return responses

View File

@@ -0,0 +1,436 @@
"""메모리 백엔드 구현을 위한 공유 유틸리티 함수들.
이 모듈은 백엔드와 복합 라우터(composite router)에서 사용하는
사용자 대면 문자열 포맷터와 구조적 헬퍼 함수를 포함합니다.
구조적 헬퍼는 깨지기 쉬운 문자열 파싱 없이 구성을 가능하게 합니다.
"""
import re
from datetime import UTC, datetime
from pathlib import Path
from typing import Any, Literal
import wcmatch.glob as wcglob
from deepagents.backends.protocol import FileInfo as _FileInfo
from deepagents.backends.protocol import GrepMatch as _GrepMatch
EMPTY_CONTENT_WARNING = "System reminder: File exists but has empty contents"
MAX_LINE_LENGTH = 10000
LINE_NUMBER_WIDTH = 6
TOOL_RESULT_TOKEN_LIMIT = 20000 # Same threshold as eviction
TRUNCATION_GUIDANCE = "... [results truncated, try being more specific with your parameters]"
# Re-export protocol types for backwards compatibility
FileInfo = _FileInfo
GrepMatch = _GrepMatch
def sanitize_tool_call_id(tool_call_id: str) -> str:
r"""경로 탐색(path traversal) 및 구분자 문제를 방지하기 위해 tool_call_id를 정리(sanitize)합니다.
위험한 문자(., /, \)를 밑줄(_)로 교체합니다.
"""
sanitized = tool_call_id.replace(".", "_").replace("/", "_").replace("\\", "_")
return sanitized
def format_content_with_line_numbers(
content: str | list[str],
start_line: int = 1,
) -> str:
"""파일 내용을 라인 번호와 함께 포맷팅합니다 (cat -n 스타일).
MAX_LINE_LENGTH보다 긴 라인은 연속 마커(예: 5.1, 5.2)와 함께 청크로 나눕니다.
Args:
content: 문자열 또는 라인 리스트 형태의 파일 내용
start_line: 시작 라인 번호 (기본값: 1)
Returns:
라인 번호와 연속 마커가 포함된 포맷팅된 내용
"""
if isinstance(content, str):
lines = content.split("\n")
if lines and lines[-1] == "":
lines = lines[:-1]
else:
lines = content
result_lines = []
for i, line in enumerate(lines):
line_num = i + start_line
if len(line) <= MAX_LINE_LENGTH:
result_lines.append(f"{line_num:{LINE_NUMBER_WIDTH}d}\t{line}")
else:
# Split long line into chunks with continuation markers
num_chunks = (len(line) + MAX_LINE_LENGTH - 1) // MAX_LINE_LENGTH
for chunk_idx in range(num_chunks):
start = chunk_idx * MAX_LINE_LENGTH
end = min(start + MAX_LINE_LENGTH, len(line))
chunk = line[start:end]
if chunk_idx == 0:
# First chunk: use normal line number
result_lines.append(f"{line_num:{LINE_NUMBER_WIDTH}d}\t{chunk}")
else:
# Continuation chunks: use decimal notation (e.g., 5.1, 5.2)
continuation_marker = f"{line_num}.{chunk_idx}"
result_lines.append(f"{continuation_marker:>{LINE_NUMBER_WIDTH}}\t{chunk}")
return "\n".join(result_lines)
def check_empty_content(content: str) -> str | None:
"""내용이 비어 있는지 확인하고 경고 메시지를 반환합니다.
Args:
content: 확인할 내용
Returns:
비어 있는 경우 경고 메시지, 그렇지 않으면 None
"""
if not content or content.strip() == "":
return EMPTY_CONTENT_WARNING
return None
def file_data_to_string(file_data: dict[str, Any]) -> str:
"""FileData를 일반 문자열 내용으로 변환합니다.
Args:
file_data: 'content' 키를 가진 FileData dict
Returns:
줄바꿈으로 연결된 문자열 형태의 내용
"""
return "\n".join(file_data["content"])
def create_file_data(content: str, created_at: str | None = None) -> dict[str, Any]:
"""타임스탬프를 포함하는 FileData 객체를 생성합니다.
Args:
content: 문자열 형태의 파일 내용
created_at: 선택적 생성 타임스탬프 (ISO 형식)
Returns:
내용과 타임스탬프를 포함하는 FileData dict
"""
lines = content.split("\n") if isinstance(content, str) else content
now = datetime.now(UTC).isoformat()
return {
"content": lines,
"created_at": created_at or now,
"modified_at": now,
}
def update_file_data(file_data: dict[str, Any], content: str) -> dict[str, Any]:
"""생성 타임스탬프를 유지하면서 새로운 내용으로 FileData를 업데이트합니다.
Args:
file_data: 기존 FileData dict
content: 문자열 형태의 새로운 내용
Returns:
업데이트된 FileData dict
"""
lines = content.split("\n") if isinstance(content, str) else content
now = datetime.now(UTC).isoformat()
return {
"content": lines,
"created_at": file_data["created_at"],
"modified_at": now,
}
def format_read_response(
file_data: dict[str, Any],
offset: int,
limit: int,
) -> str:
"""읽기 응답을 위해 파일 데이터를 라인 번호와 함께 포맷팅합니다.
Args:
file_data: FileData dict
offset: 라인 오프셋 (0부터 시작)
limit: 최대 라인 수
Returns:
포맷팅된 내용 또는 에러 메시지
"""
content = file_data_to_string(file_data)
empty_msg = check_empty_content(content)
if empty_msg:
return empty_msg
lines = content.splitlines()
start_idx = offset
end_idx = min(start_idx + limit, len(lines))
if start_idx >= len(lines):
return f"Error: Line offset {offset} exceeds file length ({len(lines)} lines)"
selected_lines = lines[start_idx:end_idx]
return format_content_with_line_numbers(selected_lines, start_line=start_idx + 1)
def perform_string_replacement(
content: str,
old_string: str,
new_string: str,
replace_all: bool,
) -> tuple[str, int] | str:
"""발생(occurrence) 검증과 함께 문자열 교체를 수행합니다.
Args:
content: 원본 내용
old_string: 교체할 문자열
new_string: 새로운 문자열
replace_all: 모든 발생을 교체할지 여부
Returns:
성공 시 (new_content, occurrences) 튜플, 또는 에러 메시지 문자열
"""
occurrences = content.count(old_string)
if occurrences == 0:
return f"Error: String not found in file: '{old_string}'"
if occurrences > 1 and not replace_all:
return f"Error: String '{old_string}' appears {occurrences} times in file. Use replace_all=True to replace all instances, or provide a more specific string with surrounding context."
new_content = content.replace(old_string, new_string)
return new_content, occurrences
def truncate_if_too_long(result: list[str] | str) -> list[str] | str:
"""토큰 제한을 초과하는 경우 리스트 또는 문자열 결과를 잘라냅니다 (대략적 추정: 4자/토큰)."""
if isinstance(result, list):
total_chars = sum(len(item) for item in result)
if total_chars > TOOL_RESULT_TOKEN_LIMIT * 4:
return result[: len(result) * TOOL_RESULT_TOKEN_LIMIT * 4 // total_chars] + [TRUNCATION_GUIDANCE]
return result
# string
if len(result) > TOOL_RESULT_TOKEN_LIMIT * 4:
return result[: TOOL_RESULT_TOKEN_LIMIT * 4] + "\n" + TRUNCATION_GUIDANCE
return result
def _validate_path(path: str | None) -> str:
"""경로를 검증하고 정규화합니다.
Args:
path: 검증할 경로
Returns:
/로 시작하는 정규화된 경로
Raises:
ValueError: 경로가 유효하지 않은 경우
"""
path = path or "/"
if not path or path.strip() == "":
raise ValueError("Path cannot be empty")
normalized = path if path.startswith("/") else "/" + path
if not normalized.endswith("/"):
normalized += "/"
return normalized
def _glob_search_files(
files: dict[str, Any],
pattern: str,
path: str = "/",
) -> str:
"""glob 패턴과 일치하는 경로를 찾기 위해 파일 dict를 검색합니다.
Args:
files: 파일 경로에서 FileData로의 딕셔너리.
pattern: Glob 패턴 (예: "*.py", "**/*.ts").
path: 검색을 시작할 기본 경로.
Returns:
수정 시간순(최신순)으로 정렬된, 줄바꿈으로 구분된 파일 경로들.
일치하는 항목이 없으면 "No files found"를 반환합니다.
Example:
```python
files = {"/src/main.py": FileData(...), "/test.py": FileData(...)}
_glob_search_files(files, "*.py", "/")
# Returns: "/test.py\n/src/main.py" (sorted by modified_at)
```
"""
try:
normalized_path = _validate_path(path)
except ValueError:
return "No files found"
filtered = {fp: fd for fp, fd in files.items() if fp.startswith(normalized_path)}
# Respect standard glob semantics:
# - Patterns without path separators (e.g., "*.py") match only in the current
# directory (non-recursive) relative to `path`.
# - Use "**" explicitly for recursive matching.
effective_pattern = pattern
matches = []
for file_path, file_data in filtered.items():
relative = file_path[len(normalized_path) :].lstrip("/")
if not relative:
relative = file_path.split("/")[-1]
if wcglob.globmatch(relative, effective_pattern, flags=wcglob.BRACE | wcglob.GLOBSTAR):
matches.append((file_path, file_data["modified_at"]))
matches.sort(key=lambda x: x[1], reverse=True)
if not matches:
return "No files found"
return "\n".join(fp for fp, _ in matches)
def _format_grep_results(
results: dict[str, list[tuple[int, str]]],
output_mode: Literal["files_with_matches", "content", "count"],
) -> str:
"""출력 모드에 따라 grep 검색 결과를 포맷팅합니다.
Args:
results: 파일 경로에서 (line_num, line_content) 튜플 리스트로의 딕셔너리
output_mode: 출력 형식 - "files_with_matches", "content", 또는 "count"
Returns:
포맷팅된 문자열 출력
"""
if output_mode == "files_with_matches":
return "\n".join(sorted(results.keys()))
if output_mode == "count":
lines = []
for file_path in sorted(results.keys()):
count = len(results[file_path])
lines.append(f"{file_path}: {count}")
return "\n".join(lines)
lines = []
for file_path in sorted(results.keys()):
lines.append(f"{file_path}:")
for line_num, line in results[file_path]:
lines.append(f" {line_num}: {line}")
return "\n".join(lines)
def _grep_search_files(
files: dict[str, Any],
pattern: str,
path: str | None = None,
glob: str | None = None,
output_mode: Literal["files_with_matches", "content", "count"] = "files_with_matches",
) -> str:
"""정규식 패턴에 대해 파일 내용을 검색합니다.
Args:
files: 파일 경로에서 FileData로의 딕셔너리.
pattern: 검색할 정규식 패턴.
path: 검색을 시작할 기본 경로.
glob: 파일을 필터링할 선택적 glob 패턴 (예: "*.py").
output_mode: 출력 형식 - "files_with_matches", "content", 또는 "count".
Returns:
포맷팅된 검색 결과. 결과가 없으면 "No matches found"를 반환합니다.
Example:
```python
files = {"/file.py": FileData(content=["import os", "print('hi')"], ...)}
_grep_search_files(files, "import", "/")
# Returns: "/file.py" (with output_mode="files_with_matches")
```
"""
try:
regex = re.compile(pattern)
except re.error as e:
return f"Invalid regex pattern: {e}"
try:
normalized_path = _validate_path(path)
except ValueError:
return "No matches found"
filtered = {fp: fd for fp, fd in files.items() if fp.startswith(normalized_path)}
if glob:
filtered = {fp: fd for fp, fd in filtered.items() if wcglob.globmatch(Path(fp).name, glob, flags=wcglob.BRACE)}
results: dict[str, list[tuple[int, str]]] = {}
for file_path, file_data in filtered.items():
for line_num, line in enumerate(file_data["content"], 1):
if regex.search(line):
if file_path not in results:
results[file_path] = []
results[file_path].append((line_num, line))
if not results:
return "No matches found"
return _format_grep_results(results, output_mode)
# -------- Structured helpers for composition --------
def grep_matches_from_files(
files: dict[str, Any],
pattern: str,
path: str | None = None,
glob: str | None = None,
) -> list[GrepMatch] | str:
"""인메모리 파일 매핑에서 구조화된 grep 일치 항목을 반환합니다.
성공 시 GrepMatch 리스트를 반환하며, 잘못된 입력(예: 잘못된 정규식)의 경우 문자열을 반환합니다.
도구 컨텍스트에서 백엔드가 예외를 발생시키지 않고 사용자 대면 에러 메시지를 보존하기 위해,
여기서는 의도적으로 예외를 발생시키지 않습니다.
"""
try:
regex = re.compile(pattern)
except re.error as e:
return f"Invalid regex pattern: {e}"
try:
normalized_path = _validate_path(path)
except ValueError:
return []
filtered = {fp: fd for fp, fd in files.items() if fp.startswith(normalized_path)}
if glob:
filtered = {fp: fd for fp, fd in filtered.items() if wcglob.globmatch(Path(fp).name, glob, flags=wcglob.BRACE)}
matches: list[GrepMatch] = []
for file_path, file_data in filtered.items():
for line_num, line in enumerate(file_data["content"], 1):
if regex.search(line):
matches.append({"path": file_path, "line": int(line_num), "text": line})
return matches
def build_grep_results_dict(matches: list[GrepMatch]) -> dict[str, list[tuple[int, str]]]:
"""구조화된 일치 항목을 포맷터가 사용하는 레거시 dict 형태로 그룹화합니다."""
grouped: dict[str, list[tuple[int, str]]] = {}
for m in matches:
grouped.setdefault(m["path"], []).append((m["line"], m["text"]))
return grouped
def format_grep_matches(
matches: list[GrepMatch],
output_mode: Literal["files_with_matches", "content", "count"],
) -> str:
"""기존 포맷팅 로직을 사용하여 구조화된 grep 일치 항목을 포맷팅합니다."""
if not matches:
return "No matches found"
return _format_grep_results(build_grep_results_dict(matches), output_mode)

View File

@@ -0,0 +1,158 @@
"""Deepagents는 계획(planning), 파일시스템(filesystem), 하위 에이전트(subagents) 기능을 포함합니다."""
from collections.abc import Callable, Sequence
from typing import Any
from langchain.agents import create_agent
from langchain.agents.middleware import HumanInTheLoopMiddleware, InterruptOnConfig, TodoListMiddleware
from langchain.agents.middleware.summarization import SummarizationMiddleware
from langchain.agents.middleware.types import AgentMiddleware
from langchain.agents.structured_output import ResponseFormat
from langchain.chat_models import init_chat_model
from langchain_anthropic import ChatAnthropic
from langchain_anthropic.middleware import AnthropicPromptCachingMiddleware
from langchain_core.language_models import BaseChatModel
from langchain_core.tools import BaseTool
from langgraph.cache.base import BaseCache
from langgraph.graph.state import CompiledStateGraph
from langgraph.store.base import BaseStore
from langgraph.types import Checkpointer
from deepagents.backends.protocol import BackendFactory, BackendProtocol
from deepagents.middleware.filesystem import FilesystemMiddleware
from deepagents.middleware.patch_tool_calls import PatchToolCallsMiddleware
from deepagents.middleware.subagents import CompiledSubAgent, SubAgent, SubAgentMiddleware
BASE_AGENT_PROMPT = "In order to complete the objective that the user asks of you, you have access to a number of standard tools."
def get_default_model() -> ChatAnthropic:
"""Deep Agent를 위한 기본 모델을 반환합니다.
Returns:
Claude Sonnet 4로 구성된 ChatAnthropic 인스턴스.
"""
return ChatAnthropic(
model_name="claude-sonnet-4-5-20250929",
max_tokens=20000,
)
def create_deep_agent(
model: str | BaseChatModel | None = None,
tools: Sequence[BaseTool | Callable | dict[str, Any]] | None = None,
*,
system_prompt: str | None = None,
middleware: Sequence[AgentMiddleware] = (),
subagents: list[SubAgent | CompiledSubAgent] | None = None,
response_format: ResponseFormat | None = None,
context_schema: type[Any] | None = None,
checkpointer: Checkpointer | None = None,
store: BaseStore | None = None,
backend: BackendProtocol | BackendFactory | None = None,
interrupt_on: dict[str, bool | InterruptOnConfig] | None = None,
debug: bool = False,
name: str | None = None,
cache: BaseCache | None = None,
) -> CompiledStateGraph:
"""Deep Agent를 생성합니다.
이 에이전트는 기본적으로 할 일 목록 작성 도구(write_todos), 7가지 파일 및 실행 도구
(ls, read_file, write_file, edit_file, glob, grep, execute), 그리고 하위 에이전트 호출 도구를 가집니다.
execute 도구는 백엔드가 SandboxBackendProtocol을 구현한 경우 쉘 명령을 실행할 수 있습니다.
샌드박스 백엔드가 아닌 경우 execute 도구는 오류 메시지를 반환합니다.
Args:
model: 사용할 모델. 기본값은 Claude Sonnet 4입니다.
tools: 에이전트가 접근할 수 있는 도구들입니다.
system_prompt: 에이전트에게 제공할 추가 지침입니다. 시스템 프롬프트에 포함됩니다.
middleware: 표준 미들웨어 이후에 적용할 추가 미들웨어입니다.
subagents: 사용할 하위 에이전트 목록입니다. 각 하위 에이전트는 다음 키를 가진 딕셔너리여야 합니다:
- `name`
- `description` (메인 에이전트가 하위 에이전트 호출 여부를 결정할 때 사용)
- `prompt` (하위 에이전트의 시스템 프롬프트로 사용)
- (선택사항) `tools`
- (선택사항) `model` (LanguageModelLike 인스턴스 또는 dict 설정)
- (선택사항) `middleware` (List[AgentMiddleware])
response_format: 에이전트에 사용할 구조화된 출력 응답 형식입니다.
context_schema: Deep Agent의 스키마입니다.
checkpointer: 실행 간 에이전트 상태를 유지하기 위한 선택적 체크포인터입니다.
store: 영구 저장을 위한 선택적 저장소 (백엔드가 StoreBackend를 사용하는 경우 필수).
backend: 파일 저장 및 실행을 위한 선택적 백엔드. Backend 인스턴스 또는
`lambda rt: StateBackend(rt)`와 같은 호출 가능한 팩토리를 전달합니다. 실행 지원을 위해서는
SandboxBackendProtocol을 구현하는 백엔드를 사용하십시오.
interrupt_on: 도구 이름을 인터럽트 설정에 매핑하는 선택적 Dict[str, bool | InterruptOnConfig]입니다.
debug: 디버그 모드 활성화 여부. create_agent로 전달됩니다.
name: 에이전트의 이름. create_agent로 전달됩니다.
cache: 에이전트에 사용할 캐시. create_agent로 전달됩니다.
Returns:
구성된 Deep Agent.
"""
if model is None:
model = get_default_model()
elif isinstance(model, str):
model = init_chat_model(model)
if (
model.profile is not None
and isinstance(model.profile, dict)
and "max_input_tokens" in model.profile
and isinstance(model.profile["max_input_tokens"], int)
):
trigger = ("fraction", 0.85)
keep = ("fraction", 0.10)
else:
trigger = ("tokens", 170000)
keep = ("messages", 6)
deepagent_middleware = [
TodoListMiddleware(),
FilesystemMiddleware(backend=backend),
SubAgentMiddleware(
default_model=model,
default_tools=tools,
subagents=subagents if subagents is not None else [],
default_middleware=[
TodoListMiddleware(),
FilesystemMiddleware(backend=backend),
SummarizationMiddleware(
model=model,
trigger=trigger,
keep=keep,
trim_tokens_to_summarize=None,
),
AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"),
PatchToolCallsMiddleware(),
],
default_interrupt_on=interrupt_on,
general_purpose_agent=True,
),
SummarizationMiddleware(
model=model,
trigger=trigger,
keep=keep,
trim_tokens_to_summarize=None,
),
AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"),
PatchToolCallsMiddleware(),
]
if middleware:
deepagent_middleware.extend(middleware)
if interrupt_on is not None:
deepagent_middleware.append(HumanInTheLoopMiddleware(interrupt_on=interrupt_on))
return create_agent(
model,
system_prompt=system_prompt + "\n\n" + BASE_AGENT_PROMPT if system_prompt else BASE_AGENT_PROMPT,
tools=tools,
middleware=deepagent_middleware,
response_format=response_format,
context_schema=context_schema,
checkpointer=checkpointer,
store=store,
debug=debug,
name=name,
cache=cache,
).with_config({"recursion_limit": 1000})

View File

@@ -0,0 +1,11 @@
"""Middleware for the DeepAgent."""
from deepagents.middleware.filesystem import FilesystemMiddleware
from deepagents.middleware.subagents import CompiledSubAgent, SubAgent, SubAgentMiddleware
__all__ = [
"CompiledSubAgent",
"FilesystemMiddleware",
"SubAgent",
"SubAgentMiddleware",
]

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,44 @@
"""Middleware to patch dangling tool calls in the messages history."""
from typing import Any
from langchain.agents.middleware import AgentMiddleware, AgentState
from langchain_core.messages import ToolMessage
from langgraph.runtime import Runtime
from langgraph.types import Overwrite
class PatchToolCallsMiddleware(AgentMiddleware):
"""메시지 기록에서 댕글링(dangling) 도구 호출을 패치하는 미들웨어."""
def before_agent(self, state: AgentState, runtime: Runtime[Any]) -> dict[str, Any] | None: # noqa: ARG002
"""에이전트가 실행되기 전에 AIMessage의 댕글링 도구 호출을 처리합니다."""
messages = state["messages"]
if not messages or len(messages) == 0:
return None
patched_messages = []
# Iterate over the messages and add any dangling tool calls
for i, msg in enumerate(messages):
patched_messages.append(msg)
if msg.type == "ai" and msg.tool_calls:
for tool_call in msg.tool_calls:
corresponding_tool_msg = next(
(msg for msg in messages[i:] if msg.type == "tool" and msg.tool_call_id == tool_call["id"]),
None,
)
if corresponding_tool_msg is None:
# We have a dangling tool call which needs a ToolMessage
tool_msg = (
f"도구 호출 {tool_call['name']} (ID: {tool_call['id']})이 취소되었습니다 - "
"완료되기 전에 다른 메시지가 도착했습니다."
)
patched_messages.append(
ToolMessage(
content=tool_msg,
name=tool_call["name"],
tool_call_id=tool_call["id"],
)
)
return {"messages": Overwrite(patched_messages)}

View File

@@ -0,0 +1,498 @@
"""Middleware for providing subagents to an agent via a `task` tool."""
from collections.abc import Awaitable, Callable, Sequence
from typing import Any, NotRequired, TypedDict, cast
from langchain.agents import create_agent
from langchain.agents.middleware import HumanInTheLoopMiddleware, InterruptOnConfig
from langchain.agents.middleware.types import AgentMiddleware, ModelRequest, ModelResponse
from langchain.tools import BaseTool, ToolRuntime
from langchain_core.language_models import BaseChatModel
from langchain_core.messages import HumanMessage, ToolMessage
from langchain_core.runnables import Runnable
from langchain_core.tools import StructuredTool
from langgraph.types import Command
class SubAgent(TypedDict):
"""에이전트에 대한 사양(Specification)입니다.
사용자 정의 에이전트를 지정할 때, `SubAgentMiddleware`의 `default_middleware`가
먼저 적용되고, 그 다음에 이 사양에 지정된 `middleware`가 적용됩니다.
기본값을 제외하고 사용자 정의 미들웨어만 사용하려면, `SubAgentMiddleware`에
`default_middleware=[]`를 전달하십시오.
"""
name: str
"""에이전트의 이름."""
description: str
"""에이전트의 설명."""
system_prompt: str
"""에이전트에 사용할 시스템 프롬프트."""
tools: Sequence[BaseTool | Callable | dict[str, Any]]
"""에이전트에 사용할 도구들."""
model: NotRequired[str | BaseChatModel]
"""에이전트의 모델. 기본값은 `default_model`입니다."""
middleware: NotRequired[list[AgentMiddleware]]
"""`default_middleware` 뒤에 추가할 추가 미들웨어."""
interrupt_on: NotRequired[dict[str, bool | InterruptOnConfig]]
"""에이전트에 사용할 도구 설정."""
class CompiledSubAgent(TypedDict):
"""미리 컴파일된 에이전트 사양."""
name: str
"""에이전트의 이름."""
description: str
"""에이전트의 설명."""
runnable: Runnable
"""에이전트에 사용할 Runnable."""
DEFAULT_SUBAGENT_PROMPT = "사용자가 요청하는 목표를 완료하기 위해, 당신은 여러 표준 도구에 접근할 수 있습니다."
# State keys that are excluded when passing state to subagents and when returning
# updates from subagents.
# When returning updates:
# 1. The messages key is handled explicitly to ensure only the final message is included
# 2. The todos and structured_response keys are excluded as they do not have a defined reducer
# and no clear meaning for returning them from a subagent to the main agent.
_EXCLUDED_STATE_KEYS = {"messages", "todos", "structured_response"}
TASK_TOOL_DESCRIPTION = """격리된 컨텍스트 창(isolated context windows)을 가진 복잡하고 다단계적인 독립 작업을 처리하기 위해 일회성(ephemeral) 서브 에이전트를 실행합니다.
사용 가능한 에이전트 유형과 그들이 접근할 수 있는 도구:
{available_agents}
Task 도구를 사용할 때는 subagent_type 매개변수를 지정하여 사용할 에이전트 유형을 선택해야 합니다.
## 사용 참고 사항:
1. 성능을 극대화하기 위해 가능한 경우 여러 에이전트를 동시에(concurrently) 실행하십시오. 이를 위해 다중 도구 사용(multiple tool uses)이 포함된 단일 메시지를 사용하십시오.
2. 에이전트가 완료되면 단일 메시지를 반환합니다. 에이전트가 반환한 결과는 사용자에게 보이지 않습니다. 사용자에게 결과를 보여주려면 결과에 대한 간결한 요약이 담긴 텍스트 메시지를 사용자에게 보내야 합니다.
3. 각 에이전트 호출은 상태비저장(stateless)입니다. 서브 에이전트에게 추가 메시지를 보낼 수 없으며, 서브 에이전트도 최종 보고서 이외에는 당신과 통신할 수 없습니다. 따라서 프롬프트에는 에이전트가 자율적으로 수행해야 할 작업에 대한 매우 자세한 설명이 포함되어야 하며, 에이전트가 최종적이고 유일한 메시지로 어떤 정보를 반환해야 하는지 정확히 지정해야 합니다.
4. 에이전트의 출력은 일반적으로 신뢰할 수 있어야 합니다.
5. 에이전트는 사용자의 의도를 알지 못하므로 콘텐츠 생성, 분석 수행, 또는 단순 연구(검색, 파일 읽기, 웹 가져오기 등) 중 무엇을 수행해야 하는지 명확하게 알려주십시오.
6. 에이전트 설명에 선제적으로(proactively) 사용해야 한다고 언급되어 있다면, 사용자가 먼저 요청하지 않아도 최선을 다해 사용해 보십시오. 판단력을 발휘하십시오.
7. 범용(general-purpose) 에이전트만 제공되는 경우 모든 작업에 해당 에이전트를 사용해야 합니다. 메인 에이전트와 동일한 모든 기능을 갖추고 있으므로, 컨텍스트와 토큰 사용을 격리하고 특정하고 복잡한 작업을 완료하는 데 매우 적합합니다.
### 범용 에이전트 사용 예시:
<example_agent_descriptions>
"general-purpose": use this agent for general purpose tasks, it has access to all tools as the main agent.
</example_agent_descriptions>
<example>
User: "I want to conduct research on the accomplishments of Lebron James, Michael Jordan, and Kobe Bryant, and then compare them."
Assistant: *Uses the task tool in parallel to conduct isolated research on each of the three players*
Assistant: *Synthesizes the results of the three isolated research tasks and responds to the User*
<commentary>
연구는 그 자체로 복잡하고 다단계적인 작업입니다.
각 개별 선수의 연구는 다른 선수의 연구에 의존하지 않습니다.
어시스턴트는 task 도구를 사용하여 복잡한 목표를 세 가지 독립적인 작업으로 나눕니다.
각 연구 작업은 한 선수에 대한 컨텍스트와 토큰만 신경 쓰면 되며, 도구 결과로 각 선수에 대한 종합된 정보를 반환합니다.
이는 각 연구 작업이 각 선수를 깊이 있게 연구하는 데 토큰과 컨텍스트를 사용할 수 있음을 의미하며, 최종 결과는 종합된 정보이므로 선수들을 서로 비교할 때 장기적으로 토큰을 절약할 수 있습니다.
</commentary>
</example>
<example>
User: "Analyze a single large code repository for security vulnerabilities and generate a report."
Assistant: *Launches a single `task` subagent for the repository analysis*
Assistant: *Receives report and integrates results into final summary*
<commentary>
서브 에이전트는 단 하나라도 크고 컨텍스트가 많은 작업을 격리하는 데 사용됩니다. 이는 메인 스레드가 세부 사항으로 과부하되는 것을 방지합니다.
사용자가 후속 질문을 하면 분석 및 도구 호출의 전체 기록 대신 참조할 간결한 보고서가 있으므로 시간과 비용을 절약할 수 있습니다.
</commentary>
</example>
<example>
User: "Schedule two meetings for me and prepare agendas for each."
Assistant: *Calls the task tool in parallel to launch two `task` subagents (one per meeting) to prepare agendas*
Assistant: *Returns final schedules and agendas*
<commentary>
작업은 개별적으로는 간단하지만, 서브 에이전트는 의제 준비를 격리하는 데 도움이 됩니다.
각 서브 에이전트는 한 회의의 의제만 신경 쓰면 됩니다.
</commentary>
</example>
<example>
User: "I want to order a pizza from Dominos, order a burger from McDonald's, and order a salad from Subway."
Assistant: *Calls tools directly in parallel to order a pizza from Dominos, a burger from McDonald's, and a salad from Subway*
<commentary>
목표가 매우 간단하고 명확하며 몇 가지 사소한 도구 호출만 필요하므로 어시스턴트는 task 도구를 사용하지 않았습니다.
작업을 직접 완료하고 `task` 도구를 사용하지 않는 것이 더 좋습니다.
</commentary>
</example>
### Example usage with custom agents:
<example_agent_descriptions>
"content-reviewer": use this agent after you are done creating significant content or documents
"greeting-responder": use this agent when to respond to user greetings with a friendly joke
"research-analyst": use this agent to conduct thorough research on complex topics
</example_agent_description>
<example>
user: "Please write a function that checks if a number is prime"
assistant: Sure let me write a function that checks if a number is prime
assistant: First let me use the Write tool to write a function that checks if a number is prime
assistant: I'm going to use the Write tool to write the following code:
<code>
function isPrime(n) {{
if (n <= 1) return false
for (let i = 2; i * i <= n; i++) {{
if (n % i === 0) return false
}}
return true
}}
</code>
<commentary>
상당한 콘텐츠가 생성되었고 작업이 완료되었으므로, 이제 content-reviewer 에이전트를 사용하여 작업을 검토합니다.
</commentary>
assistant: Now let me use the content-reviewer agent to review the code
assistant: Uses the Task tool to launch with the content-reviewer agent
</example>
<example>
user: "Can you help me research the environmental impact of different renewable energy sources and create a comprehensive report?"
<commentary>
이것은 철저한 분석을 수행하기 위해 research-analyst 에이전트를 사용하는 것이 도움이 되는 복잡한 연구 작업입니다.
</commentary>
assistant: I'll help you research the environmental impact of renewable energy sources. Let me use the research-analyst agent to conduct comprehensive research on this topic.
assistant: Uses the Task tool to launch with the research-analyst agent, providing detailed instructions about what research to conduct and what format the report should take
</example>
<example>
user: "Hello"
<commentary>
사용자가 인사를 하고 있으므로, greeting-responder 에이전트를 사용하여 친절한 농담으로 응답하십시오.
</commentary>
assistant: "I'm going to use the Task tool to launch with the greeting-responder agent"
</example>""" # noqa: E501
TASK_SYSTEM_PROMPT = """## `task` (서브 에이전트 스포너(spawner))
당신은 격리된 작업을 처리하는 일회성 서브 에이전트를 실행하기 위한 `task` 도구에 접근할 수 있습니다. 이 에이전트들은 일회적(ephemeral)입니다 — 작업 기간 동안에만 존재하며 단일 결과를 반환합니다.
task 도구를 사용해야 하는 경우:
- 작업이 복잡하고 다단계적이며 완전히 격리하여 위임할 수 있는 경우
- 작업이 다른 작업과 독립적이며 병렬로 실행할 수 있는 경우
- 작업에 집중적인 추론이나 많은 토큰/컨텍스트 사용이 필요하여 오케스트레이터 스레드를 부풀릴(bloat) 수 있는 경우
- 샌드박싱이 신뢰성을 향상시키는 경우 (예: 코드 실행, 구조화된 검색, 데이터 포맷팅)
- 서브 에이전트의 중간 단계가 아니라 출력에만 관심이 있는 경우 (예: 많은 연구를 수행한 후 종합된 보고서를 반환하거나, 간결하고 관련성 있는 답변을 얻기 위해 일련의 계산 또는 조회를 수행하는 경우)
서브 에이전트 생명주기:
1. **생성(Spawn)** → 명확한 역할, 지침 및 예상 출력 제공
2. **실행(Run)** → 서브 에이전트가 자율적으로 작업 완료
3. **반환(Return)** → 서브 에이전트가 단일 구조화된 결과를 제공
4. **조정(Reconcile)** → 결과를 메인 스레드에 통합하거나 합성
task 도구를 사용하지 말아야 하는 경우:
- 서브 에이전트가 완료된 후 중간 추론이나 단계를 확인해야 하는 경우 (task 도구는 이를 숨깁니다)
- 작업이 사소한 경우 (몇 번의 도구 호출 또는 간단한 조회)
- 위임이 토큰 사용량, 복잡성 또는 컨텍스트 전환을 줄이지 않는 경우
- 분할이 이점 없이 지연 시간만 추가하는 경우
## 기억해야 할 중요한 Task 도구 사용 참고 사항
- 가능하면 수행하는 작업을 병렬화하십시오. 이는 도구 호출(tool_calls)과 작업(tasks) 모두에 해당합니다. 완료해야 할 독립적인 단계가 있을 때마다 - 도구 호출을 하거나 작업을 병렬로 시작(kick off)하여 더 빠르게 완료하십시오. 이는 사용자에게 매우 중요한 시간을 절약해 줍니다.
- 다중 파트 목표 내에서 독립적인 작업을 격리(silo)하려면 `task` 도구를 사용하는 것을 기억하십시오.
- 여러 단계가 걸리고 에이전트가 완료해야 하는 다른 작업과 독립적인 복잡한 작업이 있을 때마다 `task` 도구를 사용해야 합니다. 이 에이전트들은 매우 유능하고 효율적입니다.""" # noqa: E501
DEFAULT_GENERAL_PURPOSE_DESCRIPTION = "복잡한 질문 연구, 파일 및 콘텐츠 검색, 다중 단계 작업 실행을 위한 범용 에이전트입니다. 키워드나 파일을 검색할 때 처음 몇 번의 시도로 올바른 일치 항목을 찾을 수 있을지 확신이 서지 않는다면, 이 에이전트를 사용하여 검색을 수행하십시오. 이 에이전트는 메인 에이전트와 동일한 모든 도구에 접근할 수 있습니다." # noqa: E501
def _get_subagents(
*,
default_model: str | BaseChatModel,
default_tools: Sequence[BaseTool | Callable | dict[str, Any]],
default_middleware: list[AgentMiddleware] | None,
default_interrupt_on: dict[str, bool | InterruptOnConfig] | None,
subagents: list[SubAgent | CompiledSubAgent],
general_purpose_agent: bool,
) -> tuple[dict[str, Any], list[str]]:
"""사양(specifications)에서 서브 에이전트 인스턴스를 생성합니다.
Args:
default_model: 지정하지 않은 서브 에이전트를 위한 기본 모델.
default_tools: 지정하지 않은 서브 에이전트를 위한 기본 도구.
default_middleware: 모든 서브 에이전트에 적용할 미들웨어. `None`인 경우 기본 미들웨어가 적용되지 않습니다.
default_interrupt_on: 기본 범용 서브 에이전트에 사용할 도구 설정입니다.
이는 자체 도구 설정을 지정하지 않은 서브 에이전트에 대한 폴백(fallback)이기도 합니다.
subagents: 에이전트 사양 또는 미리 컴파일된 에이전트 목록.
general_purpose_agent: 범용 서브 에이전트 포함 여부.
Returns:
(agent_dict, description_list) 튜플. agent_dict는 에이전트 이름을 runnable 인스턴스에 매핑하고,
description_list는 포맷된 설명을 포함합니다.
"""
# Use empty list if None (no default middleware)
default_subagent_middleware = default_middleware or []
agents: dict[str, Any] = {}
subagent_descriptions = []
# Create general-purpose agent if enabled
if general_purpose_agent:
general_purpose_middleware = [*default_subagent_middleware]
if default_interrupt_on:
general_purpose_middleware.append(HumanInTheLoopMiddleware(interrupt_on=default_interrupt_on))
general_purpose_subagent = create_agent(
default_model,
system_prompt=DEFAULT_SUBAGENT_PROMPT,
tools=default_tools,
middleware=general_purpose_middleware,
)
agents["general-purpose"] = general_purpose_subagent
subagent_descriptions.append(f"- general-purpose: {DEFAULT_GENERAL_PURPOSE_DESCRIPTION}")
# Process custom subagents
for agent_ in subagents:
subagent_descriptions.append(f"- {agent_['name']}: {agent_['description']}")
if "runnable" in agent_:
custom_agent = cast("CompiledSubAgent", agent_)
agents[custom_agent["name"]] = custom_agent["runnable"]
continue
_tools = agent_.get("tools", list(default_tools))
subagent_model = agent_.get("model", default_model)
_middleware = (
[*default_subagent_middleware, *agent_["middleware"]]
if "middleware" in agent_
else [*default_subagent_middleware]
)
interrupt_on = agent_.get("interrupt_on", default_interrupt_on)
if interrupt_on:
_middleware.append(HumanInTheLoopMiddleware(interrupt_on=interrupt_on))
agents[agent_["name"]] = create_agent(
subagent_model,
system_prompt=agent_["system_prompt"],
tools=_tools,
middleware=_middleware,
)
return agents, subagent_descriptions
def _create_task_tool(
*,
default_model: str | BaseChatModel,
default_tools: Sequence[BaseTool | Callable | dict[str, Any]],
default_middleware: list[AgentMiddleware] | None,
default_interrupt_on: dict[str, bool | InterruptOnConfig] | None,
subagents: list[SubAgent | CompiledSubAgent],
general_purpose_agent: bool,
task_description: str | None = None,
) -> BaseTool:
"""서브 에이전트를 호출하기 위한 task 도구를 생성합니다.
Args:
default_model: 서브 에이전트용 기본 모델.
default_tools: 서브 에이전트용 기본 도구.
default_middleware: 모든 서브 에이전트에 적용할 미들웨어.
default_interrupt_on: 기본 범용 서브 에이전트에 사용할 도구 설정입니다.
이는 자체 도구 설정을 지정하지 않은 서브 에이전트에 대한 폴백이기도 합니다.
subagents: 서브 에이전트 사양 목록.
general_purpose_agent: 범용 에이전트 포함 여부.
task_description: task 도구에 대한 사용자 정의 설명. `None`인 경우
기본 템플릿을 사용합니다. `{available_agents}` 플레이스홀더를 지원합니다.
Returns:
유형별로 서브 에이전트를 호출할 수 있는 StructuredTool.
"""
subagent_graphs, subagent_descriptions = _get_subagents(
default_model=default_model,
default_tools=default_tools,
default_middleware=default_middleware,
default_interrupt_on=default_interrupt_on,
subagents=subagents,
general_purpose_agent=general_purpose_agent,
)
subagent_description_str = "\n".join(subagent_descriptions)
def _return_command_with_state_update(result: dict, tool_call_id: str) -> Command:
state_update = {k: v for k, v in result.items() if k not in _EXCLUDED_STATE_KEYS}
# Strip trailing whitespace to prevent API errors with Anthropic
message_text = result["messages"][-1].text.rstrip() if result["messages"][-1].text else ""
return Command(
update={
**state_update,
"messages": [ToolMessage(message_text, tool_call_id=tool_call_id)],
}
)
def _validate_and_prepare_state(
subagent_type: str, description: str, runtime: ToolRuntime
) -> tuple[Runnable, dict]:
"""Prepare state for invocation."""
subagent = subagent_graphs[subagent_type]
# Create a new state dict to avoid mutating the original
subagent_state = {k: v for k, v in runtime.state.items() if k not in _EXCLUDED_STATE_KEYS}
subagent_state["messages"] = [HumanMessage(content=description)]
return subagent, subagent_state
# Use custom description if provided, otherwise use default template
if task_description is None:
task_description = TASK_TOOL_DESCRIPTION.format(available_agents=subagent_description_str)
elif "{available_agents}" in task_description:
# If custom description has placeholder, format with agent descriptions
task_description = task_description.format(available_agents=subagent_description_str)
def task(
description: str,
subagent_type: str,
runtime: ToolRuntime,
) -> str | Command:
if subagent_type not in subagent_graphs:
allowed_types = ", ".join([f"`{k}`" for k in subagent_graphs])
return f"{subagent_type} 서브 에이전트는 존재하지 않으므로 호출할 수 없습니다. 허용된 유형은 다음과 같습니다: {allowed_types}"
subagent, subagent_state = _validate_and_prepare_state(subagent_type, description, runtime)
result = subagent.invoke(subagent_state, runtime.config)
if not runtime.tool_call_id:
value_error_msg = "서브 에이전트 호출에는 도구 호출 ID가 필요합니다"
raise ValueError(value_error_msg)
return _return_command_with_state_update(result, runtime.tool_call_id)
async def atask(
description: str,
subagent_type: str,
runtime: ToolRuntime,
) -> str | Command:
if subagent_type not in subagent_graphs:
allowed_types = ", ".join([f"`{k}`" for k in subagent_graphs])
return f"{subagent_type} 서브 에이전트는 존재하지 않으므로 호출할 수 없습니다. 허용된 유형은 다음과 같습니다: {allowed_types}"
subagent, subagent_state = _validate_and_prepare_state(subagent_type, description, runtime)
result = await subagent.ainvoke(subagent_state, runtime.config)
if not runtime.tool_call_id:
value_error_msg = "서브 에이전트 호출에는 도구 호출 ID가 필요합니다"
raise ValueError(value_error_msg)
return _return_command_with_state_update(result, runtime.tool_call_id)
return StructuredTool.from_function(
name="task",
func=task,
coroutine=atask,
description=task_description,
)
class SubAgentMiddleware(AgentMiddleware):
"""`task` 도구를 통해 에이전트에게 서브 에이전트를 제공하기 위한 미들웨어.
이 미들웨어는 서브 에이전트를 호출하는 데 사용할 수 있는 `task` 도구를 에이전트에 추가합니다.
서브 에이전트는 여러 단계가 필요한 복잡한 작업이나 해결하기 위해 많은 컨텍스트가 필요한 작업을 처리하는 데 유용합니다.
서브 에이전트의 주된 이점은 다중 단계 작업을 처리한 다음,
깨끗하고 간결한 응답을 메인 에이전트에게 반환할 수 있다는 것입니다.
서브 에이전트는 좁은 도구 집합과 집중이 필요한 다양한 전문 분야에도 적합합니다.
이 미들웨어에는 격리된 컨텍스트에서 메인 에이전트와 동일한 작업을 처리하는 데 사용할 수 있는
기본 범용 서브 에이전트가 함께 제공됩니다.
Args:
default_model: 서브 에이전트에 사용할 모델.
LanguageModelLike 또는 init_chat_model을 위한 dict일 수 있습니다.
default_tools: 기본 범용 서브 에이전트에 사용할 도구.
default_middleware: 모든 서브 에이전트에 적용할 기본 미들웨어. `None`(기본값)인 경우
기본 미들웨어가 적용되지 않습니다. 사용자 정의 미들웨어를 지정하려면 목록을 전달하십시오.
default_interrupt_on: 기본 범용 서브 에이전트에 사용할 도구 설정입니다.
이는 자체 도구 설정을 지정하지 않은 서브 에이전트에 대한 폴백이기도 합니다.
subagents: 에이전트에 제공할 추가 서브 에이전트 목록.
system_prompt: 전체 시스템 프롬프트 재정의. 제공된 경우 에이전트의
시스템 프롬프트를 완전히 대체합니다.
general_purpose_agent: 범용 에이전트 포함 여부. 기본값은 `True`입니다.
task_description: task 도구에 대한 사용자 정의 설명. `None`인 경우
기본 설명 템플릿을 사용합니다.
Example:
```python
from langchain.agents.middleware.subagents import SubAgentMiddleware
from langchain.agents import create_agent
# Basic usage with defaults (no default middleware)
agent = create_agent(
"openai:gpt-4o",
middleware=[
SubAgentMiddleware(
default_model="openai:gpt-4o",
subagents=[],
)
],
)
# Add custom middleware to subagents
agent = create_agent(
"openai:gpt-4o",
middleware=[
SubAgentMiddleware(
default_model="openai:gpt-4o",
default_middleware=[TodoListMiddleware()],
subagents=[],
)
],
)
```
"""
def __init__(
self,
*,
default_model: str | BaseChatModel,
default_tools: Sequence[BaseTool | Callable | dict[str, Any]] | None = None,
default_middleware: list[AgentMiddleware] | None = None,
default_interrupt_on: dict[str, bool | InterruptOnConfig] | None = None,
subagents: list[SubAgent | CompiledSubAgent] | None = None,
system_prompt: str | None = TASK_SYSTEM_PROMPT,
general_purpose_agent: bool = True,
task_description: str | None = None,
) -> None:
"""SubAgentMiddleware를 초기화합니다."""
super().__init__()
self.system_prompt = system_prompt
task_tool = _create_task_tool(
default_model=default_model,
default_tools=default_tools or [],
default_middleware=default_middleware,
default_interrupt_on=default_interrupt_on,
subagents=subagents or [],
general_purpose_agent=general_purpose_agent,
task_description=task_description,
)
self.tools = [task_tool]
def wrap_model_call(
self,
request: ModelRequest,
handler: Callable[[ModelRequest], ModelResponse],
) -> ModelResponse:
"""시스템 프롬프트를 업데이트하여 서브 에이전트 사용 지침을 포함합니다."""
if self.system_prompt is not None:
system_prompt = (
request.system_prompt + "\n\n" + self.system_prompt if request.system_prompt else self.system_prompt
)
return handler(request.override(system_prompt=system_prompt))
return handler(request)
async def awrap_model_call(
self,
request: ModelRequest,
handler: Callable[[ModelRequest], Awaitable[ModelResponse]],
) -> ModelResponse:
"""(async) 시스템 프롬프트를 업데이트하여 서브 에이전트 사용 지침을 포함합니다."""
if self.system_prompt is not None:
system_prompt = (
request.system_prompt + "\n\n" + self.system_prompt if request.system_prompt else self.system_prompt
)
return await handler(request.override(system_prompt=system_prompt))
return await handler(request)

View File

@@ -0,0 +1,100 @@
[project]
name = "deepagents"
version = "0.3.1"
description = "General purpose 'deep agent' with sub-agent spawning, todo list capabilities, and mock file system. Built on LangGraph."
readme = "README.md"
license = { text = "MIT" }
requires-python = ">=3.11,<4.0"
dependencies = [
"langchain-anthropic>=1.2.0,<2.0.0",
"langchain-google-genai",
"langchain>=1.1.0,<2.0.0",
"langchain-core>=1.1.0,<2.0.0",
"wcmatch",
]
[project.urls]
Homepage = "https://docs.langchain.com/oss/python/deepagents/overview"
Documentation = "https://reference.langchain.com/python/deepagents/"
Source = "https://github.com/langchain-ai/deepagents"
Twitter = "https://x.com/LangChainAI"
Slack = "https://www.langchain.com/join-community"
Reddit = "https://www.reddit.com/r/LangChain/"
[dependency-groups]
test = [
"pytest",
"pytest-cov",
"pytest-xdist",
"ruff>=0.12.2,<0.13.0",
"mypy>=1.18.1,<1.19.0",
"pytest-asyncio>=1.3.0",
]
dev = [
"langchain-openai",
"twine",
"build",
]
[build-system]
requires = ["setuptools>=73.0.0", "wheel"]
build-backend = "setuptools.build_meta"
[tool.setuptools.package-data]
"*" = ["py.typed", "*.md"]
[tool.ruff]
line-length = 150
# Exclude any files that shouldn't be linted
exclude = []
[tool.ruff.format]
docstring-code-format = true # Formats code blocks in docstrings
[tool.ruff.lint]
select = [
"ALL" # Enable all rules by default
]
ignore = [
"COM812", # Messes with the formatter
"ISC001", # Messes with the formatter
"PERF203", # Rarely useful
"SLF001", # Private member access
"PLC0415", # Imports should be at the top. Not always desirable
"PLR0913", # Too many arguments in function definition
"PLC0414", # Inconsistent with how type checkers expect to be notified of intentional re-exports
"C901", # Too complex
]
unfixable = ["B028"] # Rules that shouldn't be auto-fixed
[tool.ruff.lint.pyupgrade]
keep-runtime-typing = true
[tool.ruff.lint.flake8-annotations]
allow-star-arg-any = true
[tool.ruff.lint.pydocstyle]
convention = "google" # Google-style docstrings
ignore-var-parameters = true
[tool.ruff.lint.per-file-ignores]
"tests/*" = [
"D1", # Skip documentation rules in tests
"S101", # Allow asserts in tests
"S311", # Allow pseudo-random generators in tests
# Add more test-specific ignores
]
[tool.mypy]
strict = true
ignore_missing_imports = true
enable_error_code = ["deprecated"]
# Optional: reduce strictness if needed
disallow_any_generics = false
warn_return_any = false
[tool.pytest.ini_options]
asyncio_mode = "auto"

View File

@@ -0,0 +1 @@
# This file makes the integration_tests directory a Python package for relative imports

View File

@@ -0,0 +1,165 @@
from langchain.agents import create_agent
from langchain.agents.structured_output import ToolStrategy
from langchain_core.messages import HumanMessage
from pydantic import BaseModel
from deepagents.graph import create_deep_agent
from ..utils import (
SAMPLE_MODEL,
TOY_BASKETBALL_RESEARCH,
ResearchMiddleware,
ResearchMiddlewareWithTools,
SampleMiddlewareWithTools,
SampleMiddlewareWithToolsAndState,
WeatherToolMiddleware,
assert_all_deepagent_qualities,
get_soccer_scores,
get_weather,
sample_tool,
)
class TestDeepAgents:
def test_base_deep_agent(self):
agent = create_deep_agent()
assert_all_deepagent_qualities(agent)
def test_deep_agent_with_tool(self):
agent = create_deep_agent(tools=[sample_tool])
assert_all_deepagent_qualities(agent)
assert "sample_tool" in agent.nodes["tools"].bound._tools_by_name.keys()
def test_deep_agent_with_middleware_with_tool(self):
agent = create_deep_agent(middleware=[SampleMiddlewareWithTools()])
assert_all_deepagent_qualities(agent)
assert "sample_tool" in agent.nodes["tools"].bound._tools_by_name.keys()
def test_deep_agent_with_middleware_with_tool_and_state(self):
agent = create_deep_agent(middleware=[SampleMiddlewareWithToolsAndState()])
assert_all_deepagent_qualities(agent)
assert "sample_tool" in agent.nodes["tools"].bound._tools_by_name.keys()
assert "sample_input" in agent.stream_channels
def test_deep_agent_with_subagents(self):
subagents = [
{
"name": "weather_agent",
"description": "Use this agent to get the weather",
"system_prompt": "You are a weather agent.",
"tools": [get_weather],
"model": SAMPLE_MODEL,
}
]
agent = create_deep_agent(tools=[sample_tool], subagents=subagents)
assert_all_deepagent_qualities(agent)
result = agent.invoke({"messages": [HumanMessage(content="What is the weather in Tokyo?")]})
agent_messages = [msg for msg in result.get("messages", []) if msg.type == "ai"]
tool_calls = [tool_call for msg in agent_messages for tool_call in msg.tool_calls]
assert any([tool_call["name"] == "task" and tool_call["args"].get("subagent_type") == "weather_agent" for tool_call in tool_calls])
def test_deep_agent_with_subagents_gen_purpose(self):
subagents = [
{
"name": "weather_agent",
"description": "Use this agent to get the weather",
"system_prompt": "You are a weather agent.",
"tools": [get_weather],
"model": SAMPLE_MODEL,
}
]
agent = create_deep_agent(tools=[sample_tool], subagents=subagents)
assert_all_deepagent_qualities(agent)
result = agent.invoke({"messages": [HumanMessage(content="Use the general purpose subagent to call the sample tool")]})
agent_messages = [msg for msg in result.get("messages", []) if msg.type == "ai"]
tool_calls = [tool_call for msg in agent_messages for tool_call in msg.tool_calls]
assert any([tool_call["name"] == "task" and tool_call["args"].get("subagent_type") == "general-purpose" for tool_call in tool_calls])
def test_deep_agent_with_subagents_with_middleware(self):
subagents = [
{
"name": "weather_agent",
"description": "Use this agent to get the weather",
"system_prompt": "You are a weather agent.",
"tools": [],
"model": SAMPLE_MODEL,
"middleware": [WeatherToolMiddleware()],
}
]
agent = create_deep_agent(tools=[sample_tool], subagents=subagents)
assert_all_deepagent_qualities(agent)
result = agent.invoke({"messages": [HumanMessage(content="What is the weather in Tokyo?")]})
agent_messages = [msg for msg in result.get("messages", []) if msg.type == "ai"]
tool_calls = [tool_call for msg in agent_messages for tool_call in msg.tool_calls]
assert any([tool_call["name"] == "task" and tool_call["args"].get("subagent_type") == "weather_agent" for tool_call in tool_calls])
def test_deep_agent_with_custom_subagents(self):
subagents = [
{
"name": "weather_agent",
"description": "Use this agent to get the weather",
"system_prompt": "You are a weather agent.",
"tools": [get_weather],
"model": SAMPLE_MODEL,
},
{
"name": "soccer_agent",
"description": "Use this agent to get the latest soccer scores",
"runnable": create_agent(
model=SAMPLE_MODEL,
tools=[get_soccer_scores],
system_prompt="You are a soccer agent.",
),
},
]
agent = create_deep_agent(tools=[sample_tool], subagents=subagents)
assert_all_deepagent_qualities(agent)
result = agent.invoke({"messages": [HumanMessage(content="Look up the weather in Tokyo, and the latest scores for Manchester City!")]})
agent_messages = [msg for msg in result.get("messages", []) if msg.type == "ai"]
tool_calls = [tool_call for msg in agent_messages for tool_call in msg.tool_calls]
assert any([tool_call["name"] == "task" and tool_call["args"].get("subagent_type") == "weather_agent" for tool_call in tool_calls])
assert any([tool_call["name"] == "task" and tool_call["args"].get("subagent_type") == "soccer_agent" for tool_call in tool_calls])
def test_deep_agent_with_extended_state_and_subagents(self):
subagents = [
{
"name": "basketball_info_agent",
"description": "Use this agent to get surface level info on any basketball topic",
"system_prompt": "You are a basketball info agent.",
"middleware": [ResearchMiddlewareWithTools()],
}
]
agent = create_deep_agent(tools=[sample_tool], subagents=subagents, middleware=[ResearchMiddleware()])
assert_all_deepagent_qualities(agent)
assert "research" in agent.stream_channels
result = agent.invoke({"messages": [HumanMessage(content="Get surface level info on lebron james")]}, config={"recursion_limit": 100})
agent_messages = [msg for msg in result.get("messages", []) if msg.type == "ai"]
tool_calls = [tool_call for msg in agent_messages for tool_call in msg.tool_calls]
assert any([tool_call["name"] == "task" and tool_call["args"].get("subagent_type") == "basketball_info_agent" for tool_call in tool_calls])
assert TOY_BASKETBALL_RESEARCH in result["research"]
def test_deep_agent_with_subagents_no_tools(self):
subagents = [
{
"name": "basketball_info_agent",
"description": "Use this agent to get surface level info on any basketball topic",
"system_prompt": "You are a basketball info agent.",
}
]
agent = create_deep_agent(tools=[sample_tool], subagents=subagents)
assert_all_deepagent_qualities(agent)
result = agent.invoke(
{"messages": [HumanMessage(content="Use the basketball info subagent to call the sample tool")]}, config={"recursion_limit": 100}
)
agent_messages = [msg for msg in result.get("messages", []) if msg.type == "ai"]
tool_calls = [tool_call for msg in agent_messages for tool_call in msg.tool_calls]
assert any([tool_call["name"] == "task" and tool_call["args"].get("subagent_type") == "basketball_info_agent" for tool_call in tool_calls])
def test_response_format_tool_strategy(self):
class StructuredOutput(BaseModel):
pokemon: list[str]
agent = create_deep_agent(response_format=ToolStrategy(schema=StructuredOutput))
response = agent.invoke({"messages": [{"role": "user", "content": "Who are all of the Kanto starters?"}]})
structured_output = response["structured_response"]
assert len(structured_output.pokemon) == 3

View File

@@ -0,0 +1,154 @@
import uuid
from langgraph.checkpoint.memory import MemorySaver
from langgraph.types import Command
from deepagents.graph import create_deep_agent
from ..utils import assert_all_deepagent_qualities, get_soccer_scores, get_weather, sample_tool
SAMPLE_TOOL_CONFIG = {
"sample_tool": True,
"get_weather": False,
"get_soccer_scores": {"allowed_decisions": ["approve", "reject"]},
}
class TestHITL:
def test_hitl_agent(self):
checkpointer = MemorySaver()
agent = create_deep_agent(tools=[sample_tool, get_weather, get_soccer_scores], interrupt_on=SAMPLE_TOOL_CONFIG, checkpointer=checkpointer)
config = {"configurable": {"thread_id": uuid.uuid4()}}
assert_all_deepagent_qualities(agent)
result = agent.invoke(
{
"messages": [
{
"role": "user",
"content": "Call the sample tool, get the weather in New York and get scores for the latest soccer games in parallel",
}
]
},
config=config,
)
agent_messages = [msg for msg in result.get("messages", []) if msg.type == "ai"]
tool_calls = [tool_call for msg in agent_messages for tool_call in msg.tool_calls]
assert any([tool_call["name"] == "sample_tool" for tool_call in tool_calls])
assert any([tool_call["name"] == "get_weather" for tool_call in tool_calls])
assert any([tool_call["name"] == "get_soccer_scores" for tool_call in tool_calls])
assert result["__interrupt__"] is not None
interrupts = result["__interrupt__"][0].value
action_requests = interrupts["action_requests"]
assert len(interrupts) == 2
assert any([action_request["name"] == "sample_tool" for action_request in action_requests])
assert any([action_request["name"] == "get_soccer_scores" for action_request in action_requests])
review_configs = interrupts["review_configs"]
assert any(
[
review_config["action_name"] == "sample_tool" and review_config["allowed_decisions"] == ["approve", "edit", "reject"]
for review_config in review_configs
]
)
assert any(
[
review_config["action_name"] == "get_soccer_scores" and review_config["allowed_decisions"] == ["approve", "reject"]
for review_config in review_configs
]
)
result2 = agent.invoke(Command(resume={"decisions": [{"type": "approve"}, {"type": "approve"}]}), config=config)
tool_results = [msg for msg in result2.get("messages", []) if msg.type == "tool"]
assert any([tool_result.name == "sample_tool" for tool_result in tool_results])
assert any([tool_result.name == "get_weather" for tool_result in tool_results])
assert any([tool_result.name == "get_soccer_scores" for tool_result in tool_results])
assert "__interrupt__" not in result2
def test_subagent_with_hitl(self):
checkpointer = MemorySaver()
agent = create_deep_agent(tools=[sample_tool, get_weather, get_soccer_scores], interrupt_on=SAMPLE_TOOL_CONFIG, checkpointer=checkpointer)
config = {"configurable": {"thread_id": uuid.uuid4()}}
assert_all_deepagent_qualities(agent)
result = agent.invoke(
{
"messages": [
{
"role": "user",
"content": "Use the task tool to kick off the general-purpose subagent. Tell it to call the sample tool, get the weather in New York and get scores for the latest soccer games in parallel",
}
]
},
config=config,
)
assert result["__interrupt__"] is not None
interrupts = result["__interrupt__"][0].value
action_requests = interrupts["action_requests"]
assert len(interrupts) == 2
assert any([action_request["name"] == "sample_tool" for action_request in action_requests])
assert any([action_request["name"] == "get_soccer_scores" for action_request in action_requests])
review_configs = interrupts["review_configs"]
assert any(
[
review_config["action_name"] == "sample_tool" and review_config["allowed_decisions"] == ["approve", "edit", "reject"]
for review_config in review_configs
]
)
assert any(
[
review_config["action_name"] == "get_soccer_scores" and review_config["allowed_decisions"] == ["approve", "reject"]
for review_config in review_configs
]
)
result2 = agent.invoke(Command(resume={"decisions": [{"type": "approve"}, {"type": "approve"}]}), config=config)
assert "__interrupt__" not in result2
def test_subagent_with_custom_interrupt_on(self):
checkpointer = MemorySaver()
agent = create_deep_agent(
tools=[sample_tool, get_weather, get_soccer_scores],
interrupt_on=SAMPLE_TOOL_CONFIG,
checkpointer=checkpointer,
subagents=[
{
"name": "task_handler",
"description": "A subagent that can handle all sorts of tasks",
"system_prompt": "You are a task handler. You can handle all sorts of tasks.",
"tools": [sample_tool, get_weather, get_soccer_scores],
"interrupt_on": {"sample_tool": False, "get_weather": True, "get_soccer_scores": True},
},
],
)
config = {"configurable": {"thread_id": uuid.uuid4()}}
assert_all_deepagent_qualities(agent)
result = agent.invoke(
{
"messages": [
{
"role": "user",
"content": "Use the task tool to kick off the task_handler subagent. Tell it to call the sample tool, get the weather in New York and get scores for the latest soccer games in parallel",
}
]
},
config=config,
)
assert result["__interrupt__"] is not None
interrupts = result["__interrupt__"][0].value
action_requests = interrupts["action_requests"]
assert len(interrupts) == 2
assert any([action_request["name"] == "get_weather" for action_request in action_requests])
assert any([action_request["name"] == "get_soccer_scores" for action_request in action_requests])
review_configs = interrupts["review_configs"]
assert any(
[
review_config["action_name"] == "get_weather" and review_config["allowed_decisions"] == ["approve", "edit", "reject"]
for review_config in review_configs
]
)
assert any(
[
review_config["action_name"] == "get_soccer_scores" and review_config["allowed_decisions"] == ["approve", "edit", "reject"]
for review_config in review_configs
]
)
result2 = agent.invoke(Command(resume={"decisions": [{"type": "approve"}, {"type": "approve"}]}), config=config)
assert "__interrupt__" not in result2

View File

@@ -0,0 +1,283 @@
import pytest
from langchain.agents.middleware import AgentMiddleware
from langchain_core.messages import HumanMessage
from langchain_core.tools import tool
from deepagents.graph import create_agent
from deepagents.middleware.patch_tool_calls import PatchToolCallsMiddleware
from deepagents.middleware.subagents import (
DEFAULT_GENERAL_PURPOSE_DESCRIPTION,
TASK_SYSTEM_PROMPT,
TASK_TOOL_DESCRIPTION,
SubAgentMiddleware,
)
@tool
def get_weather(city: str) -> str:
"""Get the weather in a city."""
return f"The weather in {city} is sunny."
class WeatherMiddleware(AgentMiddleware):
tools = [get_weather]
def assert_expected_subgraph_actions(expected_tool_calls, agent, inputs):
current_idx = 0
for update in agent.stream(
inputs,
subgraphs=True,
stream_mode="updates",
):
if "model" in update[1]:
ai_message = update[1]["model"]["messages"][-1]
tool_calls = ai_message.tool_calls
for tool_call in tool_calls:
if tool_call["name"] == expected_tool_calls[current_idx]["name"]:
if "model" in expected_tool_calls[current_idx]:
assert ai_message.response_metadata["model_name"] == expected_tool_calls[current_idx]["model"]
for arg in expected_tool_calls[current_idx]["args"]:
assert arg in tool_call["args"]
assert tool_call["args"][arg] == expected_tool_calls[current_idx]["args"][arg]
current_idx += 1
assert current_idx == len(expected_tool_calls)
@pytest.mark.requires("langchain_anthropic", "langchain_openai")
class TestSubagentMiddleware:
"""Integration tests for the SubagentMiddleware class."""
def test_general_purpose_subagent(self):
agent = create_agent(
model="claude-sonnet-4-20250514",
system_prompt="Use the general-purpose subagent to get the weather in a city.",
middleware=[
SubAgentMiddleware(
default_model="claude-sonnet-4-20250514",
default_tools=[get_weather],
)
],
)
assert "task" in agent.nodes["tools"].bound._tools_by_name.keys()
response = agent.invoke({"messages": [HumanMessage(content="What is the weather in Tokyo?")]})
assert response["messages"][1].tool_calls[0]["name"] == "task"
assert response["messages"][1].tool_calls[0]["args"]["subagent_type"] == "general-purpose"
def test_defined_subagent(self):
agent = create_agent(
model="claude-sonnet-4-20250514",
system_prompt="Use the task tool to call a subagent.",
middleware=[
SubAgentMiddleware(
default_model="claude-sonnet-4-20250514",
default_tools=[],
subagents=[
{
"name": "weather",
"description": "This subagent can get weather in cities.",
"system_prompt": "Use the get_weather tool to get the weather in a city.",
"tools": [get_weather],
}
],
)
],
)
assert "task" in agent.nodes["tools"].bound._tools_by_name.keys()
response = agent.invoke({"messages": [HumanMessage(content="What is the weather in Tokyo?")]})
assert response["messages"][1].tool_calls[0]["name"] == "task"
assert response["messages"][1].tool_calls[0]["args"]["subagent_type"] == "weather"
def test_defined_subagent_tool_calls(self):
agent = create_agent(
model="claude-sonnet-4-20250514",
system_prompt="Use the task tool to call a subagent.",
middleware=[
SubAgentMiddleware(
default_model="claude-sonnet-4-20250514",
default_tools=[],
subagents=[
{
"name": "weather",
"description": "This subagent can get weather in cities.",
"system_prompt": "Use the get_weather tool to get the weather in a city.",
"tools": [get_weather],
}
],
)
],
)
expected_tool_calls = [
{"name": "task", "args": {"subagent_type": "weather"}},
{"name": "get_weather", "args": {}},
]
assert_expected_subgraph_actions(
expected_tool_calls,
agent,
{"messages": [HumanMessage(content="What is the weather in Tokyo?")]},
)
def test_defined_subagent_custom_model(self):
agent = create_agent(
model="claude-sonnet-4-20250514",
system_prompt="Use the task tool to call a subagent.",
middleware=[
SubAgentMiddleware(
default_model="claude-sonnet-4-20250514",
default_tools=[],
subagents=[
{
"name": "weather",
"description": "This subagent can get weather in cities.",
"system_prompt": "Use the get_weather tool to get the weather in a city.",
"tools": [get_weather],
"model": "gpt-4.1",
}
],
)
],
)
expected_tool_calls = [
{
"name": "task",
"args": {"subagent_type": "weather"},
"model": "claude-sonnet-4-20250514",
},
{"name": "get_weather", "args": {}, "model": "gpt-4.1-2025-04-14"},
]
assert_expected_subgraph_actions(
expected_tool_calls,
agent,
{"messages": [HumanMessage(content="What is the weather in Tokyo?")]},
)
def test_defined_subagent_custom_middleware(self):
agent = create_agent(
model="claude-sonnet-4-20250514",
system_prompt="Use the task tool to call a subagent.",
middleware=[
SubAgentMiddleware(
default_model="claude-sonnet-4-20250514",
default_tools=[],
subagents=[
{
"name": "weather",
"description": "This subagent can get weather in cities.",
"system_prompt": "Use the get_weather tool to get the weather in a city.",
"tools": [], # No tools, only in middleware
"model": "gpt-4.1",
"middleware": [WeatherMiddleware()],
}
],
)
],
)
expected_tool_calls = [
{
"name": "task",
"args": {"subagent_type": "weather"},
"model": "claude-sonnet-4-20250514",
},
{"name": "get_weather", "args": {}, "model": "gpt-4.1-2025-04-14"},
]
assert_expected_subgraph_actions(
expected_tool_calls,
agent,
{"messages": [HumanMessage(content="What is the weather in Tokyo?")]},
)
def test_defined_subagent_custom_runnable(self):
custom_subagent = create_agent(
model="gpt-4.1-2025-04-14",
system_prompt="Use the get_weather tool to get the weather in a city.",
tools=[get_weather],
)
agent = create_agent(
model="claude-sonnet-4-20250514",
system_prompt="Use the task tool to call a subagent.",
middleware=[
SubAgentMiddleware(
default_model="claude-sonnet-4-20250514",
default_tools=[],
subagents=[
{
"name": "weather",
"description": "This subagent can get weather in cities.",
"runnable": custom_subagent,
}
],
)
],
)
expected_tool_calls = [
{
"name": "task",
"args": {"subagent_type": "weather"},
"model": "claude-sonnet-4-20250514",
},
{"name": "get_weather", "args": {}, "model": "gpt-4.1-2025-04-14"},
]
assert_expected_subgraph_actions(
expected_tool_calls,
agent,
{"messages": [HumanMessage(content="What is the weather in Tokyo?")]},
)
def test_multiple_subagents_with_interrupt_on_no_middleware_accumulation(self):
agent = create_agent(
model="claude-sonnet-4-20250514",
system_prompt="Use the task tool to call subagents.",
middleware=[
SubAgentMiddleware(
default_model="claude-sonnet-4-20250514",
default_tools=[],
default_middleware=[PatchToolCallsMiddleware()],
subagents=[
{
"name": "subagent1",
"description": "First subagent.",
"system_prompt": "You are subagent 1.",
"tools": [get_weather],
"interrupt_on": {"get_weather": True},
},
{
"name": "subagent2",
"description": "Second subagent.",
"system_prompt": "You are subagent 2.",
"tools": [get_weather],
"interrupt_on": {"get_weather": True},
},
],
)
],
)
# This would error if the default middleware was accumulated
assert True
def test_subagent_middleware_init(self):
middleware = SubAgentMiddleware(
default_model="gpt-4o-mini",
)
assert middleware is not None
assert middleware.system_prompt is TASK_SYSTEM_PROMPT
assert len(middleware.tools) == 1
assert middleware.tools[0].name == "task"
expected_desc = TASK_TOOL_DESCRIPTION.format(available_agents=f"- general-purpose: {DEFAULT_GENERAL_PURPOSE_DESCRIPTION}")
assert middleware.tools[0].description == expected_desc
def test_default_subagent_with_tools(self):
middleware = SubAgentMiddleware(
default_model="gpt-4o-mini",
default_tools=[],
)
assert middleware is not None
assert middleware.system_prompt == TASK_SYSTEM_PROMPT
def test_default_subagent_custom_system_prompt(self):
middleware = SubAgentMiddleware(
default_model="gpt-4o-mini",
default_tools=[],
system_prompt="Use the task tool to call a subagent.",
)
assert middleware is not None
assert middleware.system_prompt == "Use the task tool to call a subagent."

View File

@@ -0,0 +1 @@
# This file makes the tests directory a Python package for relative imports

View File

@@ -0,0 +1,671 @@
from pathlib import Path
import pytest
from langchain.tools import ToolRuntime
from langgraph.store.memory import InMemoryStore
from deepagents.backends.composite import CompositeBackend
from deepagents.backends.filesystem import FilesystemBackend
from deepagents.backends.protocol import (
ExecuteResponse,
SandboxBackendProtocol,
WriteResult,
)
from deepagents.backends.state import StateBackend
from deepagents.backends.store import StoreBackend
def make_runtime(tid: str = "tc"):
return ToolRuntime(
state={"messages": [], "files": {}},
context=None,
tool_call_id=tid,
store=InMemoryStore(),
stream_writer=lambda _: None,
config={},
)
def build_composite_state_backend(runtime: ToolRuntime, *, routes):
built_routes = {}
for prefix, backend_or_factory in routes.items():
if callable(backend_or_factory):
built_routes[prefix] = backend_or_factory(runtime)
else:
built_routes[prefix] = backend_or_factory
default_state = StateBackend(runtime)
return CompositeBackend(default=default_state, routes=built_routes)
def test_composite_state_backend_routes_and_search(tmp_path: Path):
rt = make_runtime("t3")
# route /memories/ to store
be = build_composite_state_backend(rt, routes={"/memories/": (lambda r: StoreBackend(r))})
# write to default (state)
res = be.write("/file.txt", "alpha")
assert isinstance(res, WriteResult) and res.files_update is not None
# write to routed (store)
msg = be.write("/memories/readme.md", "beta")
assert isinstance(msg, WriteResult) and msg.error is None and msg.files_update is None
# ls_info at root returns both
infos = be.ls_info("/")
paths = {i["path"] for i in infos}
assert "/file.txt" in paths and "/memories/" in paths
# grep across both
matches = be.grep_raw("alpha", path="/")
assert any(m["path"] == "/file.txt" for m in matches)
matches2 = be.grep_raw("beta", path="/")
assert any(m["path"] == "/memories/readme.md" for m in matches2)
# glob across both
g = be.glob_info("**/*.md", path="/")
assert any(i["path"] == "/memories/readme.md" for i in g)
def test_composite_backend_filesystem_plus_store(tmp_path: Path):
# default filesystem, route to store under /memories/
root = tmp_path
fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
rt = make_runtime("t4")
store = StoreBackend(rt)
comp = CompositeBackend(default=fs, routes={"/memories/": store})
# put files in both
r1 = comp.write("/hello.txt", "hello")
assert isinstance(r1, WriteResult) and r1.error is None and r1.files_update is None
r2 = comp.write("/memories/notes.md", "note")
assert isinstance(r2, WriteResult) and r2.error is None and r2.files_update is None
# ls_info path routing
infos_root = comp.ls_info("/")
assert any(i["path"] == "/hello.txt" for i in infos_root)
infos_mem = comp.ls_info("/memories/")
assert any(i["path"] == "/memories/notes.md" for i in infos_mem)
# grep_raw merges
gm = comp.grep_raw("hello", path="/")
assert any(m["path"] == "/hello.txt" for m in gm)
gm2 = comp.grep_raw("note", path="/")
assert any(m["path"] == "/memories/notes.md" for m in gm2)
# glob_info
gl = comp.glob_info("*.md", path="/")
assert any(i["path"] == "/memories/notes.md" for i in gl)
def test_composite_backend_store_to_store():
"""Test composite with default store and routed store (two different stores)."""
rt = make_runtime("t5")
# Create two separate store backends (simulating different namespaces/stores)
default_store = StoreBackend(rt)
memories_store = StoreBackend(rt)
comp = CompositeBackend(default=default_store, routes={"/memories/": memories_store})
# Write to default store
res1 = comp.write("/notes.txt", "default store content")
assert isinstance(res1, WriteResult) and res1.error is None and res1.path == "/notes.txt"
# Write to routed store
res2 = comp.write("/memories/important.txt", "routed store content")
assert isinstance(res2, WriteResult) and res2.error is None and res2.path == "/important.txt"
# Read from both
content1 = comp.read("/notes.txt")
assert "default store content" in content1
content2 = comp.read("/memories/important.txt")
assert "routed store content" in content2
# ls_info at root should show both
infos = comp.ls_info("/")
paths = {i["path"] for i in infos}
assert "/notes.txt" in paths
assert "/memories/" in paths
# grep across both stores
matches = comp.grep_raw("default", path="/")
assert any(m["path"] == "/notes.txt" for m in matches)
matches2 = comp.grep_raw("routed", path="/")
assert any(m["path"] == "/memories/important.txt" for m in matches2)
def test_composite_backend_multiple_routes():
"""Test composite with state default and multiple store routes."""
rt = make_runtime("t6")
# State backend as default, multiple stores for different routes
comp = build_composite_state_backend(
rt,
routes={
"/memories/": (lambda r: StoreBackend(r)),
"/archive/": (lambda r: StoreBackend(r)),
"/cache/": (lambda r: StoreBackend(r)),
},
)
# Write to state (default)
res_state = comp.write("/temp.txt", "ephemeral data")
assert res_state.files_update is not None # State backend returns files_update
assert res_state.path == "/temp.txt"
# Write to /memories/ route
res_mem = comp.write("/memories/important.md", "long-term memory")
assert res_mem.files_update is None # Store backend doesn't return files_update
assert res_mem.path == "/important.md"
# Write to /archive/ route
res_arch = comp.write("/archive/old.log", "archived log")
assert res_arch.files_update is None
assert res_arch.path == "/old.log"
# Write to /cache/ route
res_cache = comp.write("/cache/session.json", "cached session")
assert res_cache.files_update is None
assert res_cache.path == "/session.json"
# ls_info at root should aggregate all
infos = comp.ls_info("/")
paths = {i["path"] for i in infos}
assert "/temp.txt" in paths
assert "/memories/" in paths
assert "/archive/" in paths
assert "/cache/" in paths
# ls_info at specific route
mem_infos = comp.ls_info("/memories/")
mem_paths = {i["path"] for i in mem_infos}
assert "/memories/important.md" in mem_paths
assert "/temp.txt" not in mem_paths
assert "/archive/old.log" not in mem_paths
# grep across all backends
all_matches = comp.grep_raw(".", path="/") # Match any character
paths_with_content = {m["path"] for m in all_matches}
assert "/temp.txt" in paths_with_content
assert "/memories/important.md" in paths_with_content
assert "/archive/old.log" in paths_with_content
assert "/cache/session.json" in paths_with_content
# glob across all backends
glob_results = comp.glob_info("**/*.md", path="/")
assert any(i["path"] == "/memories/important.md" for i in glob_results)
# Edit in routed backend
edit_res = comp.edit("/memories/important.md", "long-term", "persistent", replace_all=False)
assert edit_res.error is None
assert edit_res.occurrences == 1
updated_content = comp.read("/memories/important.md")
assert "persistent memory" in updated_content
def test_composite_backend_ls_nested_directories(tmp_path: Path):
rt = make_runtime("t7")
root = tmp_path
files = {
root / "local.txt": "local file",
root / "src" / "main.py": "code",
root / "src" / "utils" / "helper.py": "utils",
}
for path, content in files.items():
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(content)
fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
store = StoreBackend(rt)
comp = CompositeBackend(default=fs, routes={"/memories/": store})
comp.write("/memories/note1.txt", "note 1")
comp.write("/memories/deep/note2.txt", "note 2")
comp.write("/memories/deep/nested/note3.txt", "note 3")
root_listing = comp.ls_info("/")
root_paths = [fi["path"] for fi in root_listing]
assert "/local.txt" in root_paths
assert "/src/" in root_paths
assert "/memories/" in root_paths
assert "/src/main.py" not in root_paths
assert "/memories/note1.txt" not in root_paths
src_listing = comp.ls_info("/src/")
src_paths = [fi["path"] for fi in src_listing]
assert "/src/main.py" in src_paths
assert "/src/utils/" in src_paths
assert "/src/utils/helper.py" not in src_paths
mem_listing = comp.ls_info("/memories/")
mem_paths = [fi["path"] for fi in mem_listing]
assert "/memories/note1.txt" in mem_paths
assert "/memories/deep/" in mem_paths
assert "/memories/deep/note2.txt" not in mem_paths
deep_listing = comp.ls_info("/memories/deep/")
deep_paths = [fi["path"] for fi in deep_listing]
assert "/memories/deep/note2.txt" in deep_paths
assert "/memories/deep/nested/" in deep_paths
assert "/memories/deep/nested/note3.txt" not in deep_paths
def test_composite_backend_ls_multiple_routes_nested():
rt = make_runtime("t8")
comp = build_composite_state_backend(
rt,
routes={
"/memories/": (lambda r: StoreBackend(r)),
"/archive/": (lambda r: StoreBackend(r)),
},
)
state_files = {
"/temp.txt": "temp",
"/work/file1.txt": "work file 1",
"/work/projects/proj1.txt": "project 1",
}
for path, content in state_files.items():
res = comp.write(path, content)
if res.files_update:
rt.state["files"].update(res.files_update)
memory_files = {
"/memories/important.txt": "important",
"/memories/diary/entry1.txt": "diary entry",
}
for path, content in memory_files.items():
comp.write(path, content)
archive_files = {
"/archive/old.txt": "old",
"/archive/2023/log.txt": "2023 log",
}
for path, content in archive_files.items():
comp.write(path, content)
root_listing = comp.ls_info("/")
root_paths = [fi["path"] for fi in root_listing]
assert "/temp.txt" in root_paths
assert "/work/" in root_paths
assert "/memories/" in root_paths
assert "/archive/" in root_paths
assert "/work/file1.txt" not in root_paths
assert "/memories/important.txt" not in root_paths
work_listing = comp.ls_info("/work/")
work_paths = [fi["path"] for fi in work_listing]
assert "/work/file1.txt" in work_paths
assert "/work/projects/" in work_paths
assert "/work/projects/proj1.txt" not in work_paths
mem_listing = comp.ls_info("/memories/")
mem_paths = [fi["path"] for fi in mem_listing]
assert "/memories/important.txt" in mem_paths
assert "/memories/diary/" in mem_paths
assert "/memories/diary/entry1.txt" not in mem_paths
arch_listing = comp.ls_info("/archive/")
arch_paths = [fi["path"] for fi in arch_listing]
assert "/archive/old.txt" in arch_paths
assert "/archive/2023/" in arch_paths
assert "/archive/2023/log.txt" not in arch_paths
def test_composite_backend_ls_trailing_slash(tmp_path: Path):
rt = make_runtime("t9")
root = tmp_path
(root / "file.txt").write_text("content")
fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
store = StoreBackend(rt)
comp = CompositeBackend(default=fs, routes={"/store/": store})
comp.write("/store/item.txt", "store content")
listing = comp.ls_info("/")
paths = [fi["path"] for fi in listing]
assert paths == sorted(paths)
empty_listing = comp.ls_info("/store/nonexistent/")
assert empty_listing == []
empty_listing2 = comp.ls_info("/nonexistent/")
assert empty_listing2 == []
listing1 = comp.ls_info("/store/")
listing2 = comp.ls_info("/store")
assert [fi["path"] for fi in listing1] == [fi["path"] for fi in listing2]
def test_composite_backend_intercept_large_tool_result():
from langchain_core.messages import ToolMessage
from langgraph.types import Command
from deepagents.middleware.filesystem import FilesystemMiddleware
rt = make_runtime("t10")
middleware = FilesystemMiddleware(
backend=lambda r: build_composite_state_backend(r, routes={"/memories/": (lambda x: StoreBackend(x))}), tool_token_limit_before_evict=1000
)
large_content = "z" * 5000
tool_message = ToolMessage(content=large_content, tool_call_id="test_789")
result = middleware._intercept_large_tool_result(tool_message, rt)
assert isinstance(result, Command)
assert "/large_tool_results/test_789" in result.update["files"]
assert result.update["files"]["/large_tool_results/test_789"]["content"] == [large_content]
assert "Tool result too large" in result.update["messages"][0].content
def test_composite_backend_intercept_large_tool_result_routed_to_store():
"""Test that large tool results can be routed to a specific backend like StoreBackend."""
from langchain_core.messages import ToolMessage
from deepagents.middleware.filesystem import FilesystemMiddleware
rt = make_runtime("t11")
middleware = FilesystemMiddleware(
backend=lambda r: build_composite_state_backend(r, routes={"/large_tool_results/": (lambda x: StoreBackend(x))}),
tool_token_limit_before_evict=1000,
)
large_content = "w" * 5000
tool_message = ToolMessage(content=large_content, tool_call_id="test_routed_123")
result = middleware._intercept_large_tool_result(tool_message, rt)
assert isinstance(result, ToolMessage)
assert "Tool result too large" in result.content
assert "/large_tool_results/test_routed_123" in result.content
stored_item = rt.store.get(("filesystem",), "/test_routed_123")
assert stored_item is not None
assert stored_item.value["content"] == [large_content]
# Mock sandbox backend for testing execute functionality
class MockSandboxBackend(SandboxBackendProtocol, StateBackend):
"""Mock sandbox backend that implements SandboxBackendProtocol."""
def execute(self, command: str, *, timeout: int = 30 * 60) -> ExecuteResponse:
"""Mock execute that returns the command as output."""
return ExecuteResponse(
output=f"Executed: {command}",
exit_code=0,
truncated=False,
)
@property
def id(self) -> str:
return "mock_sandbox_backend"
def test_composite_backend_execute_with_sandbox_default():
"""Test that CompositeBackend.execute() delegates to sandbox default backend."""
rt = make_runtime("t_exec1")
sandbox = MockSandboxBackend(rt)
store = StoreBackend(rt)
comp = CompositeBackend(default=sandbox, routes={"/memories/": store})
# Execute should work since default backend supports it
result = comp.execute("ls -la")
assert isinstance(result, ExecuteResponse)
assert result.output == "Executed: ls -la"
assert result.exit_code == 0
assert result.truncated is False
def test_composite_backend_execute_without_sandbox_default():
"""Test that CompositeBackend.execute() fails when default doesn't support execution."""
rt = make_runtime("t_exec2")
state_backend = StateBackend(rt) # StateBackend doesn't implement SandboxBackendProtocol
store = StoreBackend(rt)
comp = CompositeBackend(default=state_backend, routes={"/memories/": store})
# Execute should raise NotImplementedError since default backend doesn't support it
with pytest.raises(NotImplementedError, match="doesn't support command execution"):
comp.execute("ls -la")
def test_composite_backend_supports_execution_check():
"""Test the isinstance check works correctly for CompositeBackend."""
rt = make_runtime("t_exec3")
# CompositeBackend with sandbox default should pass isinstance check
sandbox = MockSandboxBackend(rt)
comp_with_sandbox = CompositeBackend(default=sandbox, routes={})
# Note: CompositeBackend itself has execute() method, so isinstance will pass
# but the actual support depends on the default backend
assert hasattr(comp_with_sandbox, "execute")
# CompositeBackend with non-sandbox default should still have execute() method
# but will raise NotImplementedError when called
state = StateBackend(rt)
comp_without_sandbox = CompositeBackend(default=state, routes={})
assert hasattr(comp_without_sandbox, "execute")
def test_composite_backend_execute_with_routed_backends():
"""Test that execution doesn't interfere with file routing."""
rt = make_runtime("t_exec4")
sandbox = MockSandboxBackend(rt)
store = StoreBackend(rt)
comp = CompositeBackend(default=sandbox, routes={"/memories/": store})
# Write files to both backends
comp.write("/local.txt", "local content")
comp.write("/memories/persistent.txt", "persistent content")
# Execute should still work
result = comp.execute("echo test")
assert result.output == "Executed: echo test"
# File operations should still work
assert "local content" in comp.read("/local.txt")
assert "persistent content" in comp.read("/memories/persistent.txt")
def test_composite_upload_routing(tmp_path: Path):
"""Test upload_files routing to different backends."""
rt = make_runtime("t_upload1")
root = tmp_path
# Create composite with filesystem default and store route
fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
store = StoreBackend(rt)
comp = CompositeBackend(default=fs, routes={"/memories/": store})
# Upload files to default path (filesystem)
default_files = [
("/file1.bin", b"Default content 1"),
("/file2.bin", b"Default content 2"),
]
responses = comp.upload_files(default_files)
assert len(responses) == 2
assert all(r.error is None for r in responses)
assert (root / "file1.bin").exists()
assert (root / "file2.bin").read_bytes() == b"Default content 2"
# Upload files to routed path (store)
routed_files = [
("/memories/note1.bin", b"Memory content 1"),
("/memories/note2.bin", b"Memory content 2"),
]
responses = comp.upload_files(routed_files)
assert len(responses) == 2
assert all(r.error is None for r in responses)
# Verify files are accessible in store
content1 = comp.read("/memories/note1.bin")
assert "Memory content 1" in content1
def test_composite_download_routing(tmp_path: Path):
"""Test download_files routing to different backends."""
rt = make_runtime("t_download1")
root = tmp_path
# Create composite with filesystem default and store route
fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
store = StoreBackend(rt)
comp = CompositeBackend(default=fs, routes={"/memories/": store})
# Pre-populate filesystem backend
(root / "local.bin").write_bytes(b"Local binary data")
# Pre-populate store backend
comp.write("/memories/stored.txt", "Stored text data")
# Download from default path (filesystem)
responses = comp.download_files(["/local.bin"])
assert len(responses) == 1
assert responses[0].path == "/local.bin"
assert responses[0].content == b"Local binary data"
assert responses[0].error is None
# Download from routed path (store) - Note: store backend doesn't implement download yet
# So this test focuses on routing logic
paths_to_download = ["/local.bin"]
responses = comp.download_files(paths_to_download)
assert len(responses) == 1
assert responses[0].path == "/local.bin"
def test_composite_upload_download_roundtrip(tmp_path: Path):
"""Test upload and download roundtrip through composite backend."""
rt = make_runtime("t_roundtrip1")
root = tmp_path
fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
comp = CompositeBackend(default=fs, routes={})
# Upload binary content
test_content = bytes(range(128)) # Binary data
upload_responses = comp.upload_files([("/test.bin", test_content)])
assert upload_responses[0].error is None
# Download it back
download_responses = comp.download_files(["/test.bin"])
assert download_responses[0].error is None
assert download_responses[0].content == test_content
def test_composite_partial_success_upload(tmp_path: Path):
"""Test partial success in batch upload with mixed valid/invalid paths."""
rt = make_runtime("t_partial_upload")
root = tmp_path
fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
comp = CompositeBackend(default=fs, routes={})
files = [
("/valid1.bin", b"Valid 1"),
("/../invalid.bin", b"Invalid path"), # Path traversal
("/valid2.bin", b"Valid 2"),
]
responses = comp.upload_files(files)
assert len(responses) == 3
# First should succeed
assert responses[0].error is None
assert (root / "valid1.bin").exists()
# Second should fail
assert responses[1].error == "invalid_path"
# Third should still succeed (partial success)
assert responses[2].error is None
assert (root / "valid2.bin").exists()
def test_composite_partial_success_download(tmp_path: Path):
"""Test partial success in batch download with mixed valid/invalid paths."""
rt = make_runtime("t_partial_download")
root = tmp_path
fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
comp = CompositeBackend(default=fs, routes={})
# Create one valid file
(root / "exists.bin").write_bytes(b"I exist!")
paths = ["/exists.bin", "/doesnotexist.bin", "/../invalid"]
responses = comp.download_files(paths)
assert len(responses) == 3
# First should succeed
assert responses[0].error is None
assert responses[0].content == b"I exist!"
# Second should fail with file_not_found
assert responses[1].error == "file_not_found"
assert responses[1].content is None
# Third should fail with invalid_path
assert responses[2].error == "invalid_path"
assert responses[2].content is None
def test_composite_upload_download_multiple_routes(tmp_path: Path):
"""Test upload/download with multiple routed backends."""
rt = make_runtime("t_multi_route")
root = tmp_path
fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
store1 = StoreBackend(rt)
store2 = StoreBackend(rt)
comp = CompositeBackend(default=fs, routes={"/memories/": store1, "/archive/": store2})
# Upload to different backends
files = [
("/default.bin", b"Default backend"),
("/memories/mem.bin", b"Memory backend"),
("/archive/arch.bin", b"Archive backend"),
]
responses = comp.upload_files(files)
assert len(responses) == 3
assert all(r.error is None for r in responses)
# Verify routing worked (filesystem file should exist)
assert (root / "default.bin").exists()
assert (root / "default.bin").read_bytes() == b"Default backend"
def test_composite_download_preserves_original_paths(tmp_path: Path):
"""Test that download responses preserve original composite paths."""
rt = make_runtime("t_path_preserve")
root = tmp_path
fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
comp = CompositeBackend(default=fs, routes={})
# Create files
(root / "subdir").mkdir()
(root / "subdir" / "file.bin").write_bytes(b"Nested file")
# Download with composite path
responses = comp.download_files(["/subdir/file.bin"])
# Response should have the original composite path, not stripped
assert responses[0].path == "/subdir/file.bin"
assert responses[0].content == b"Nested file"

View File

@@ -0,0 +1,582 @@
"""Async tests for CompositeBackend."""
from pathlib import Path
import pytest
from langchain.tools import ToolRuntime
from langgraph.store.memory import InMemoryStore
from deepagents.backends.composite import CompositeBackend
from deepagents.backends.filesystem import FilesystemBackend
from deepagents.backends.protocol import (
ExecuteResponse,
SandboxBackendProtocol,
WriteResult,
)
from deepagents.backends.state import StateBackend
from deepagents.backends.store import StoreBackend
def make_runtime(tid: str = "tc"):
return ToolRuntime(
state={"messages": [], "files": {}},
context=None,
tool_call_id=tid,
store=InMemoryStore(),
stream_writer=lambda _: None,
config={},
)
def build_composite_state_backend(runtime: ToolRuntime, *, routes):
built_routes = {}
for prefix, backend_or_factory in routes.items():
if callable(backend_or_factory):
built_routes[prefix] = backend_or_factory(runtime)
else:
built_routes[prefix] = backend_or_factory
default_state = StateBackend(runtime)
return CompositeBackend(default=default_state, routes=built_routes)
# Mock sandbox backend for testing execute functionality
class MockSandboxBackend(SandboxBackendProtocol, StateBackend):
"""Mock sandbox backend that implements SandboxBackendProtocol."""
def execute(self, command: str, *, timeout: int = 30 * 60) -> ExecuteResponse:
"""Mock execute that returns the command as output."""
return ExecuteResponse(
output=f"Executed: {command}",
exit_code=0,
truncated=False,
)
async def aexecute(self, command: str) -> ExecuteResponse:
"""Async mock execute that returns the command as output."""
return ExecuteResponse(
output=f"Async Executed: {command}",
exit_code=0,
truncated=False,
)
@property
def id(self) -> str:
return "mock_sandbox_backend"
async def test_composite_state_backend_routes_and_search_async(tmp_path: Path):
"""Test async operations with composite backend routing."""
rt = make_runtime("t3")
be = build_composite_state_backend(rt, routes={"/memories/": (lambda r: StoreBackend(r))})
# write to default (state)
res = await be.awrite("/file.txt", "alpha")
assert isinstance(res, WriteResult) and res.files_update is not None
# write to routed (store)
msg = await be.awrite("/memories/readme.md", "beta")
assert isinstance(msg, WriteResult) and msg.error is None and msg.files_update is None
# als_info at root returns both
infos = await be.als_info("/")
paths = {i["path"] for i in infos}
assert "/file.txt" in paths and "/memories/" in paths
# agrep across both
matches = await be.agrep_raw("alpha", path="/")
assert any(m["path"] == "/file.txt" for m in matches)
matches2 = await be.agrep_raw("beta", path="/")
assert any(m["path"] == "/memories/readme.md" for m in matches2)
# aglob across both
g = await be.aglob_info("**/*.md", path="/")
assert any(i["path"] == "/memories/readme.md" for i in g)
async def test_composite_backend_filesystem_plus_store_async(tmp_path: Path):
"""Test async operations with filesystem and store backends."""
root = tmp_path
fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
rt = make_runtime("t4")
store = StoreBackend(rt)
comp = CompositeBackend(default=fs, routes={"/memories/": store})
# put files in both
r1 = await comp.awrite("/hello.txt", "hello")
assert isinstance(r1, WriteResult) and r1.error is None and r1.files_update is None
r2 = await comp.awrite("/memories/notes.md", "note")
assert isinstance(r2, WriteResult) and r2.error is None and r2.files_update is None
# als_info path routing
infos_root = await comp.als_info("/")
assert any(i["path"] == "/hello.txt" for i in infos_root)
infos_mem = await comp.als_info("/memories/")
assert any(i["path"] == "/memories/notes.md" for i in infos_mem)
# agrep_raw merges
gm = await comp.agrep_raw("hello", path="/")
assert any(m["path"] == "/hello.txt" for m in gm)
gm2 = await comp.agrep_raw("note", path="/")
assert any(m["path"] == "/memories/notes.md" for m in gm2)
# aglob_info
gl = await comp.aglob_info("*.md", path="/")
assert any(i["path"] == "/memories/notes.md" for i in gl)
async def test_composite_backend_store_to_store_async():
"""Test async operations with default store and routed store."""
rt = make_runtime("t5")
# Create two separate store backends
default_store = StoreBackend(rt)
memories_store = StoreBackend(rt)
comp = CompositeBackend(default=default_store, routes={"/memories/": memories_store})
# Write to default store
res1 = await comp.awrite("/notes.txt", "default store content")
assert isinstance(res1, WriteResult) and res1.error is None and res1.path == "/notes.txt"
# Write to routed store
res2 = await comp.awrite("/memories/important.txt", "routed store content")
assert isinstance(res2, WriteResult) and res2.error is None and res2.path == "/important.txt"
# Read from both
content1 = await comp.aread("/notes.txt")
assert "default store content" in content1
content2 = await comp.aread("/memories/important.txt")
assert "routed store content" in content2
# als_info at root should show both
infos = await comp.als_info("/")
paths = {i["path"] for i in infos}
assert "/notes.txt" in paths
assert "/memories/" in paths
# agrep across both stores
matches = await comp.agrep_raw("default", path="/")
assert any(m["path"] == "/notes.txt" for m in matches)
matches2 = await comp.agrep_raw("routed", path="/")
assert any(m["path"] == "/memories/important.txt" for m in matches2)
async def test_composite_backend_multiple_routes_async():
"""Test async operations with state default and multiple store routes."""
rt = make_runtime("t6")
comp = build_composite_state_backend(
rt,
routes={
"/memories/": (lambda r: StoreBackend(r)),
"/archive/": (lambda r: StoreBackend(r)),
"/cache/": (lambda r: StoreBackend(r)),
},
)
# Write to state (default)
res_state = await comp.awrite("/temp.txt", "ephemeral data")
assert res_state.files_update is not None
assert res_state.path == "/temp.txt"
# Write to /memories/ route
res_mem = await comp.awrite("/memories/important.md", "long-term memory")
assert res_mem.files_update is None
assert res_mem.path == "/important.md"
# Write to /archive/ route
res_arch = await comp.awrite("/archive/old.log", "archived log")
assert res_arch.files_update is None
assert res_arch.path == "/old.log"
# Write to /cache/ route
res_cache = await comp.awrite("/cache/session.json", "cached session")
assert res_cache.files_update is None
assert res_cache.path == "/session.json"
# als_info at root should aggregate all
infos = await comp.als_info("/")
paths = {i["path"] for i in infos}
assert "/temp.txt" in paths
assert "/memories/" in paths
assert "/archive/" in paths
assert "/cache/" in paths
# als_info at specific route
mem_infos = await comp.als_info("/memories/")
mem_paths = {i["path"] for i in mem_infos}
assert "/memories/important.md" in mem_paths
assert "/temp.txt" not in mem_paths
assert "/archive/old.log" not in mem_paths
# agrep across all backends
all_matches = await comp.agrep_raw(".", path="/") # Match any character
paths_with_content = {m["path"] for m in all_matches}
assert "/temp.txt" in paths_with_content
assert "/memories/important.md" in paths_with_content
assert "/archive/old.log" in paths_with_content
assert "/cache/session.json" in paths_with_content
# aglob across all backends
glob_results = await comp.aglob_info("**/*.md", path="/")
assert any(i["path"] == "/memories/important.md" for i in glob_results)
# Edit in routed backend
edit_res = await comp.aedit("/memories/important.md", "long-term", "persistent", replace_all=False)
assert edit_res.error is None
assert edit_res.occurrences == 1
updated_content = await comp.aread("/memories/important.md")
assert "persistent memory" in updated_content
async def test_composite_backend_als_nested_directories_async(tmp_path: Path):
"""Test async ls operations with nested directories."""
rt = make_runtime("t7")
root = tmp_path
files = {
root / "local.txt": "local file",
root / "src" / "main.py": "code",
root / "src" / "utils" / "helper.py": "utils",
}
for path, content in files.items():
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(content)
fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
store = StoreBackend(rt)
comp = CompositeBackend(default=fs, routes={"/memories/": store})
await comp.awrite("/memories/note1.txt", "note 1")
await comp.awrite("/memories/deep/note2.txt", "note 2")
await comp.awrite("/memories/deep/nested/note3.txt", "note 3")
root_listing = await comp.als_info("/")
root_paths = [fi["path"] for fi in root_listing]
assert "/local.txt" in root_paths
assert "/src/" in root_paths
assert "/memories/" in root_paths
assert "/src/main.py" not in root_paths
assert "/memories/note1.txt" not in root_paths
src_listing = await comp.als_info("/src/")
src_paths = [fi["path"] for fi in src_listing]
assert "/src/main.py" in src_paths
assert "/src/utils/" in src_paths
assert "/src/utils/helper.py" not in src_paths
mem_listing = await comp.als_info("/memories/")
mem_paths = [fi["path"] for fi in mem_listing]
assert "/memories/note1.txt" in mem_paths
assert "/memories/deep/" in mem_paths
assert "/memories/deep/note2.txt" not in mem_paths
deep_listing = await comp.als_info("/memories/deep/")
deep_paths = [fi["path"] for fi in deep_listing]
assert "/memories/deep/note2.txt" in deep_paths
assert "/memories/deep/nested/" in deep_paths
assert "/memories/deep/nested/note3.txt" not in deep_paths
async def test_composite_backend_als_multiple_routes_nested_async():
"""Test async ls with multiple routes and nested directories."""
rt = make_runtime("t8")
comp = build_composite_state_backend(
rt,
routes={
"/memories/": (lambda r: StoreBackend(r)),
"/archive/": (lambda r: StoreBackend(r)),
},
)
state_files = {
"/temp.txt": "temp",
"/work/file1.txt": "work file 1",
"/work/projects/proj1.txt": "project 1",
}
for path, content in state_files.items():
res = await comp.awrite(path, content)
if res.files_update:
rt.state["files"].update(res.files_update)
memory_files = {
"/memories/important.txt": "important",
"/memories/diary/entry1.txt": "diary entry",
}
for path, content in memory_files.items():
await comp.awrite(path, content)
archive_files = {
"/archive/old.txt": "old",
"/archive/2023/log.txt": "2023 log",
}
for path, content in archive_files.items():
await comp.awrite(path, content)
root_listing = await comp.als_info("/")
root_paths = [fi["path"] for fi in root_listing]
assert "/temp.txt" in root_paths
assert "/work/" in root_paths
assert "/memories/" in root_paths
assert "/archive/" in root_paths
assert "/work/file1.txt" not in root_paths
assert "/memories/important.txt" not in root_paths
work_listing = await comp.als_info("/work/")
work_paths = [fi["path"] for fi in work_listing]
assert "/work/file1.txt" in work_paths
assert "/work/projects/" in work_paths
assert "/work/projects/proj1.txt" not in work_paths
mem_listing = await comp.als_info("/memories/")
mem_paths = [fi["path"] for fi in mem_listing]
assert "/memories/important.txt" in mem_paths
assert "/memories/diary/" in mem_paths
assert "/memories/diary/entry1.txt" not in mem_paths
arch_listing = await comp.als_info("/archive/")
arch_paths = [fi["path"] for fi in arch_listing]
assert "/archive/old.txt" in arch_paths
assert "/archive/2023/" in arch_paths
assert "/archive/2023/log.txt" not in arch_paths
async def test_composite_backend_aexecute_with_sandbox_default_async():
"""Test async execute with sandbox default backend."""
rt = make_runtime("t_exec1")
sandbox = MockSandboxBackend(rt)
store = StoreBackend(rt)
comp = CompositeBackend(default=sandbox, routes={"/memories/": store})
# Execute should work since default backend supports it
result = await comp.aexecute("ls -la")
assert isinstance(result, ExecuteResponse)
assert result.output == "Async Executed: ls -la"
assert result.exit_code == 0
assert result.truncated is False
async def test_composite_backend_aexecute_without_sandbox_default_async():
"""Test async execute fails when default doesn't support execution."""
rt = make_runtime("t_exec2")
state_backend = StateBackend(rt)
store = StoreBackend(rt)
comp = CompositeBackend(default=state_backend, routes={"/memories/": store})
# Execute should raise NotImplementedError
with pytest.raises(NotImplementedError, match="doesn't support command execution"):
await comp.aexecute("ls -la")
async def test_composite_backend_aexecute_with_routed_backends_async():
"""Test async execution doesn't interfere with file routing."""
rt = make_runtime("t_exec4")
sandbox = MockSandboxBackend(rt)
store = StoreBackend(rt)
comp = CompositeBackend(default=sandbox, routes={"/memories/": store})
# Write files to both backends
await comp.awrite("/local.txt", "local content")
await comp.awrite("/memories/persistent.txt", "persistent content")
# Execute should still work
result = await comp.aexecute("echo test")
assert result.output == "Async Executed: echo test"
# File operations should still work
assert "local content" in await comp.aread("/local.txt")
assert "persistent content" in await comp.aread("/memories/persistent.txt")
async def test_composite_aupload_routing_async(tmp_path: Path):
"""Test async upload_files routing to different backends."""
rt = make_runtime("t_upload1")
root = tmp_path
# Create composite with filesystem default and store route
fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
store = StoreBackend(rt)
comp = CompositeBackend(default=fs, routes={"/memories/": store})
# Upload files to default path (filesystem)
default_files = [
("/file1.bin", b"Default content 1"),
("/file2.bin", b"Default content 2"),
]
responses = await comp.aupload_files(default_files)
assert len(responses) == 2
assert all(r.error is None for r in responses)
assert (root / "file1.bin").exists()
assert (root / "file2.bin").read_bytes() == b"Default content 2"
# Upload files to routed path (store)
routed_files = [
("/memories/note1.bin", b"Memory content 1"),
("/memories/note2.bin", b"Memory content 2"),
]
responses = await comp.aupload_files(routed_files)
assert len(responses) == 2
assert all(r.error is None for r in responses)
# Verify files are accessible in store
content1 = await comp.aread("/memories/note1.bin")
assert "Memory content 1" in content1
async def test_composite_adownload_routing_async(tmp_path: Path):
"""Test async download_files routing to different backends."""
rt = make_runtime("t_download1")
root = tmp_path
# Create composite with filesystem default and store route
fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
store = StoreBackend(rt)
comp = CompositeBackend(default=fs, routes={"/memories/": store})
# Pre-populate filesystem backend
(root / "local.bin").write_bytes(b"Local binary data")
# Pre-populate store backend
await comp.awrite("/memories/stored.txt", "Stored text data")
# Download from default path (filesystem)
responses = await comp.adownload_files(["/local.bin"])
assert len(responses) == 1
assert responses[0].path == "/local.bin"
assert responses[0].content == b"Local binary data"
assert responses[0].error is None
async def test_composite_aupload_download_roundtrip_async(tmp_path: Path):
"""Test async upload and download roundtrip through composite backend."""
rt = make_runtime("t_roundtrip1")
root = tmp_path
fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
comp = CompositeBackend(default=fs, routes={})
# Upload binary content
test_content = bytes(range(128)) # Binary data
upload_responses = await comp.aupload_files([("/test.bin", test_content)])
assert upload_responses[0].error is None
# Download it back
download_responses = await comp.adownload_files(["/test.bin"])
assert download_responses[0].error is None
assert download_responses[0].content == test_content
async def test_composite_partial_success_aupload_async(tmp_path: Path):
"""Test partial success in async batch upload with mixed valid/invalid paths."""
rt = make_runtime("t_partial_upload")
root = tmp_path
fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
comp = CompositeBackend(default=fs, routes={})
files = [
("/valid1.bin", b"Valid 1"),
("/../invalid.bin", b"Invalid path"), # Path traversal
("/valid2.bin", b"Valid 2"),
]
responses = await comp.aupload_files(files)
assert len(responses) == 3
# First should succeed
assert responses[0].error is None
assert (root / "valid1.bin").exists()
# Second should fail
assert responses[1].error == "invalid_path"
# Third should still succeed (partial success)
assert responses[2].error is None
assert (root / "valid2.bin").exists()
async def test_composite_partial_success_adownload_async(tmp_path: Path):
"""Test partial success in async batch download with mixed valid/invalid paths."""
rt = make_runtime("t_partial_download")
root = tmp_path
fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
comp = CompositeBackend(default=fs, routes={})
# Create one valid file
(root / "exists.bin").write_bytes(b"I exist!")
paths = ["/exists.bin", "/doesnotexist.bin", "/../invalid"]
responses = await comp.adownload_files(paths)
assert len(responses) == 3
# First should succeed
assert responses[0].error is None
assert responses[0].content == b"I exist!"
# Second should fail with file_not_found
assert responses[1].error == "file_not_found"
assert responses[1].content is None
# Third should fail with invalid_path
assert responses[2].error == "invalid_path"
assert responses[2].content is None
async def test_composite_aupload_download_multiple_routes_async(tmp_path: Path):
"""Test async upload/download with multiple routed backends."""
rt = make_runtime("t_multi_route")
root = tmp_path
fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
store1 = StoreBackend(rt)
store2 = StoreBackend(rt)
comp = CompositeBackend(default=fs, routes={"/memories/": store1, "/archive/": store2})
# Upload to different backends
files = [
("/default.bin", b"Default backend"),
("/memories/mem.bin", b"Memory backend"),
("/archive/arch.bin", b"Archive backend"),
]
responses = await comp.aupload_files(files)
assert len(responses) == 3
assert all(r.error is None for r in responses)
# Verify routing worked (filesystem file should exist)
assert (root / "default.bin").exists()
assert (root / "default.bin").read_bytes() == b"Default backend"
async def test_composite_adownload_preserves_original_paths_async(tmp_path: Path):
"""Test async download responses preserve original composite paths."""
rt = make_runtime("t_path_preserve")
root = tmp_path
fs = FilesystemBackend(root_dir=str(root), virtual_mode=True)
comp = CompositeBackend(default=fs, routes={})
# Create files
(root / "subdir").mkdir()
(root / "subdir" / "file.bin").write_bytes(b"Nested file")
# Download with composite path
responses = await comp.adownload_files(["/subdir/file.bin"])
# Response should have the original composite path, not stripped
assert responses[0].path == "/subdir/file.bin"
assert responses[0].content == b"Nested file"

View File

@@ -0,0 +1,491 @@
from pathlib import Path
from deepagents.backends.filesystem import FilesystemBackend
from deepagents.backends.protocol import EditResult, WriteResult
def write_file(p: Path, content: str):
p.parent.mkdir(parents=True, exist_ok=True)
p.write_text(content)
def test_filesystem_backend_normal_mode(tmp_path: Path):
root = tmp_path
f1 = root / "a.txt"
f2 = root / "dir" / "b.py"
write_file(f1, "hello fs")
write_file(f2, "print('x')\nhello")
be = FilesystemBackend(root_dir=str(root), virtual_mode=False)
# ls_info absolute path - should only list files in root, not subdirectories
infos = be.ls_info(str(root))
paths = {i["path"] for i in infos}
assert str(f1) in paths # File in root should be listed
assert str(f2) not in paths # File in subdirectory should NOT be listed
assert (str(root) + "/dir/") in paths # Directory should be listed
# read, edit, write
txt = be.read(str(f1))
assert "hello fs" in txt
msg = be.edit(str(f1), "fs", "filesystem", replace_all=False)
assert isinstance(msg, EditResult) and msg.error is None and msg.occurrences == 1
msg2 = be.write(str(root / "new.txt"), "new content")
assert isinstance(msg2, WriteResult) and msg2.error is None and msg2.path.endswith("new.txt")
# grep_raw
matches = be.grep_raw("hello", path=str(root))
assert isinstance(matches, list) and any(m["path"].endswith("a.txt") for m in matches)
# glob_info
g = be.glob_info("*.py", path=str(root))
assert any(i["path"] == str(f2) for i in g)
def test_filesystem_backend_virtual_mode(tmp_path: Path):
root = tmp_path
f1 = root / "a.txt"
f2 = root / "dir" / "b.md"
write_file(f1, "hello virtual")
write_file(f2, "content")
be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
# ls_info from virtual root - should only list files in root, not subdirectories
infos = be.ls_info("/")
paths = {i["path"] for i in infos}
assert "/a.txt" in paths # File in root should be listed
assert "/dir/b.md" not in paths # File in subdirectory should NOT be listed
assert "/dir/" in paths # Directory should be listed
# read and edit via virtual path
txt = be.read("/a.txt")
assert "hello virtual" in txt
msg = be.edit("/a.txt", "virtual", "virt", replace_all=False)
assert isinstance(msg, EditResult) and msg.error is None and msg.occurrences == 1
# write new file via virtual path
msg2 = be.write("/new.txt", "x")
assert isinstance(msg2, WriteResult) and msg2.error is None
assert (root / "new.txt").exists()
# grep_raw limited to path
matches = be.grep_raw("virt", path="/")
assert isinstance(matches, list) and any(m["path"] == "/a.txt" for m in matches)
# glob_info
g = be.glob_info("**/*.md", path="/")
assert any(i["path"] == "/dir/b.md" for i in g)
# invalid regex returns error string
err = be.grep_raw("[", path="/")
assert isinstance(err, str)
# path traversal blocked
try:
be.read("/../a.txt")
assert False, "expected ValueError for traversal"
except ValueError:
pass
def test_filesystem_backend_ls_nested_directories(tmp_path: Path):
root = tmp_path
files = {
root / "config.json": "config",
root / "src" / "main.py": "code",
root / "src" / "utils" / "helper.py": "utils code",
root / "src" / "utils" / "common.py": "common utils",
root / "docs" / "readme.md": "documentation",
root / "docs" / "api" / "reference.md": "api docs",
}
for path, content in files.items():
write_file(path, content)
be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
root_listing = be.ls_info("/")
root_paths = [fi["path"] for fi in root_listing]
assert "/config.json" in root_paths
assert "/src/" in root_paths
assert "/docs/" in root_paths
assert "/src/main.py" not in root_paths
assert "/src/utils/helper.py" not in root_paths
src_listing = be.ls_info("/src/")
src_paths = [fi["path"] for fi in src_listing]
assert "/src/main.py" in src_paths
assert "/src/utils/" in src_paths
assert "/src/utils/helper.py" not in src_paths
utils_listing = be.ls_info("/src/utils/")
utils_paths = [fi["path"] for fi in utils_listing]
assert "/src/utils/helper.py" in utils_paths
assert "/src/utils/common.py" in utils_paths
assert len(utils_paths) == 2
empty_listing = be.ls_info("/nonexistent/")
assert empty_listing == []
def test_filesystem_backend_ls_normal_mode_nested(tmp_path: Path):
"""Test ls_info with nested directories in normal (non-virtual) mode."""
root = tmp_path
files = {
root / "file1.txt": "content1",
root / "subdir" / "file2.txt": "content2",
root / "subdir" / "nested" / "file3.txt": "content3",
}
for path, content in files.items():
write_file(path, content)
be = FilesystemBackend(root_dir=str(root), virtual_mode=False)
root_listing = be.ls_info(str(root))
root_paths = [fi["path"] for fi in root_listing]
assert str(root / "file1.txt") in root_paths
assert str(root / "subdir") + "/" in root_paths
assert str(root / "subdir" / "file2.txt") not in root_paths
subdir_listing = be.ls_info(str(root / "subdir"))
subdir_paths = [fi["path"] for fi in subdir_listing]
assert str(root / "subdir" / "file2.txt") in subdir_paths
assert str(root / "subdir" / "nested") + "/" in subdir_paths
assert str(root / "subdir" / "nested" / "file3.txt") not in subdir_paths
def test_filesystem_backend_ls_trailing_slash(tmp_path: Path):
"""Test ls_info edge cases for filesystem backend."""
root = tmp_path
files = {
root / "file.txt": "content",
root / "dir" / "nested.txt": "nested",
}
for path, content in files.items():
write_file(path, content)
be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
listing_with_slash = be.ls_info("/")
assert len(listing_with_slash) > 0
listing = be.ls_info("/")
paths = [fi["path"] for fi in listing]
assert paths == sorted(paths)
listing1 = be.ls_info("/dir/")
listing2 = be.ls_info("/dir")
assert len(listing1) == len(listing2)
assert [fi["path"] for fi in listing1] == [fi["path"] for fi in listing2]
empty = be.ls_info("/nonexistent/")
assert empty == []
def test_filesystem_backend_intercept_large_tool_result(tmp_path: Path):
"""Test that FilesystemBackend properly handles large tool result interception."""
from langchain.tools import ToolRuntime
from langchain_core.messages import ToolMessage
from deepagents.middleware.filesystem import FilesystemMiddleware
root = tmp_path
rt = ToolRuntime(
state={"messages": [], "files": {}},
context=None,
tool_call_id="test_fs",
store=None,
stream_writer=lambda _: None,
config={},
)
middleware = FilesystemMiddleware(backend=lambda r: FilesystemBackend(root_dir=str(root), virtual_mode=True), tool_token_limit_before_evict=1000)
large_content = "f" * 5000
tool_message = ToolMessage(content=large_content, tool_call_id="test_fs_123")
result = middleware._intercept_large_tool_result(tool_message, rt)
assert isinstance(result, ToolMessage)
assert "Tool result too large" in result.content
assert "/large_tool_results/test_fs_123" in result.content
saved_file = root / "large_tool_results" / "test_fs_123"
assert saved_file.exists()
assert saved_file.read_text() == large_content
def test_filesystem_upload_single_file(tmp_path: Path):
"""Test uploading a single binary file."""
root = tmp_path
be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
test_path = "/test_upload.bin"
test_content = b"Hello, Binary World!"
responses = be.upload_files([(test_path, test_content)])
assert len(responses) == 1
assert responses[0].path == test_path
assert responses[0].error is None
# Verify file exists and content matches
uploaded_file = root / "test_upload.bin"
assert uploaded_file.exists()
assert uploaded_file.read_bytes() == test_content
def test_filesystem_upload_multiple_files(tmp_path: Path):
"""Test uploading multiple files in one call."""
root = tmp_path
be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
files = [
("/file1.bin", b"Content 1"),
("/file2.bin", b"Content 2"),
("/subdir/file3.bin", b"Content 3"),
]
responses = be.upload_files(files)
assert len(responses) == 3
for i, (path, content) in enumerate(files):
assert responses[i].path == path
assert responses[i].error is None
# Verify all files created
assert (root / "file1.bin").read_bytes() == b"Content 1"
assert (root / "file2.bin").read_bytes() == b"Content 2"
assert (root / "subdir" / "file3.bin").read_bytes() == b"Content 3"
def test_filesystem_download_single_file(tmp_path: Path):
"""Test downloading a single file."""
root = tmp_path
be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
# Create a file manually
test_file = root / "test_download.bin"
test_content = b"Download me!"
test_file.write_bytes(test_content)
responses = be.download_files(["/test_download.bin"])
assert len(responses) == 1
assert responses[0].path == "/test_download.bin"
assert responses[0].content == test_content
assert responses[0].error is None
def test_filesystem_download_multiple_files(tmp_path: Path):
"""Test downloading multiple files in one call."""
root = tmp_path
be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
# Create several files
files = {
root / "file1.txt": b"File 1",
root / "file2.txt": b"File 2",
root / "subdir" / "file3.txt": b"File 3",
}
for path, content in files.items():
path.parent.mkdir(parents=True, exist_ok=True)
path.write_bytes(content)
paths = ["/file1.txt", "/file2.txt", "/subdir/file3.txt"]
responses = be.download_files(paths)
assert len(responses) == 3
assert responses[0].path == "/file1.txt"
assert responses[0].content == b"File 1"
assert responses[0].error is None
assert responses[1].path == "/file2.txt"
assert responses[1].content == b"File 2"
assert responses[1].error is None
assert responses[2].path == "/subdir/file3.txt"
assert responses[2].content == b"File 3"
assert responses[2].error is None
def test_filesystem_upload_download_roundtrip(tmp_path: Path):
"""Test upload followed by download for data integrity."""
root = tmp_path
be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
# Test with binary content including special bytes
test_path = "/roundtrip.bin"
test_content = bytes(range(256)) # All possible byte values
# Upload
upload_responses = be.upload_files([(test_path, test_content)])
assert upload_responses[0].error is None
# Download
download_responses = be.download_files([test_path])
assert download_responses[0].error is None
assert download_responses[0].content == test_content
def test_filesystem_download_errors(tmp_path: Path):
"""Test download error handling."""
root = tmp_path
be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
# Test file_not_found
responses = be.download_files(["/nonexistent.txt"])
assert len(responses) == 1
assert responses[0].path == "/nonexistent.txt"
assert responses[0].content is None
assert responses[0].error == "file_not_found"
# Test is_directory
(root / "testdir").mkdir()
responses = be.download_files(["/testdir"])
assert responses[0].error == "is_directory"
assert responses[0].content is None
# Test invalid_path (path traversal)
responses = be.download_files(["/../etc/passwd"])
assert len(responses) == 1
assert responses[0].error == "invalid_path"
assert responses[0].content is None
def test_filesystem_upload_errors(tmp_path: Path):
"""Test upload error handling."""
root = tmp_path
be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
# Test invalid_path (path traversal)
responses = be.upload_files([("/../bad/path.txt", b"content")])
assert len(responses) == 1
assert responses[0].error == "invalid_path"
def test_filesystem_partial_success_upload(tmp_path: Path):
"""Test partial success in batch upload."""
root = tmp_path
be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
files = [
("/valid1.txt", b"Valid content 1"),
("/../invalid.txt", b"Invalid path"), # Path traversal
("/valid2.txt", b"Valid content 2"),
]
responses = be.upload_files(files)
assert len(responses) == 3
# First file should succeed
assert responses[0].error is None
assert (root / "valid1.txt").exists()
# Second file should fail
assert responses[1].error == "invalid_path"
# Third file should still succeed (partial success)
assert responses[2].error is None
assert (root / "valid2.txt").exists()
def test_filesystem_partial_success_download(tmp_path: Path):
"""Test partial success in batch download."""
root = tmp_path
be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
# Create one valid file
valid_file = root / "exists.txt"
valid_content = b"I exist!"
valid_file.write_bytes(valid_content)
paths = ["/exists.txt", "/doesnotexist.txt", "/../invalid"]
responses = be.download_files(paths)
assert len(responses) == 3
# First should succeed
assert responses[0].error is None
assert responses[0].content == valid_content
# Second should fail with file_not_found
assert responses[1].error == "file_not_found"
assert responses[1].content is None
# Third should fail with invalid_path
assert responses[2].error == "invalid_path"
assert responses[2].content is None
def test_filesystem_upload_to_existing_directory_path(tmp_path: Path):
"""Test uploading to a path where the target is an existing directory.
This simulates trying to overwrite a directory with a file, which should
produce an error. For example, if /mydir/ exists as a directory, trying
to upload a file to /mydir should fail.
"""
root = tmp_path
be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
# Create a directory
(root / "existing_dir").mkdir()
# Try to upload a file with the same name as the directory
# Note: on Unix systems, this will likely succeed but create a different inode
# The behavior depends on the OS and filesystem. Let's just verify we get a response.
responses = be.upload_files([("/existing_dir", b"file content")])
assert len(responses) == 1
assert responses[0].path == "/existing_dir"
# Depending on OS behavior, this might succeed or fail
# We're just documenting the behavior exists
def test_filesystem_upload_parent_is_file(tmp_path: Path):
"""Test uploading to a path where a parent component is a file, not a directory.
For example, if /somefile.txt exists as a file, trying to upload to
/somefile.txt/child.txt should fail because somefile.txt is not a directory.
"""
root = tmp_path
be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
# Create a file
parent_file = root / "parent.txt"
parent_file.write_text("I am a file, not a directory")
# Try to upload a file as if parent.txt were a directory
responses = be.upload_files([("/parent.txt/child.txt", b"child content")])
assert len(responses) == 1
assert responses[0].path == "/parent.txt/child.txt"
# This should produce some kind of error since parent.txt is a file
assert responses[0].error is not None
def test_filesystem_download_directory_as_file(tmp_path: Path):
"""Test that downloading a directory returns is_directory error.
This is already tested in test_filesystem_download_errors but we add
an explicit test case to make it clear this is a supported error scenario.
"""
root = tmp_path
be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
# Create a directory
(root / "mydir").mkdir()
# Try to download the directory as if it were a file
responses = be.download_files(["/mydir"])
assert len(responses) == 1
assert responses[0].path == "/mydir"
assert responses[0].content is None
assert responses[0].error == "is_directory"

View File

@@ -0,0 +1,520 @@
"""Async tests for FilesystemBackend."""
from pathlib import Path
import pytest
from deepagents.backends.filesystem import FilesystemBackend
from deepagents.backends.protocol import EditResult, WriteResult
def write_file(p: Path, content: str):
p.parent.mkdir(parents=True, exist_ok=True)
p.write_text(content)
async def test_filesystem_backend_async_normal_mode(tmp_path: Path):
"""Test async operations in normal (non-virtual) mode."""
root = tmp_path
f1 = root / "a.txt"
f2 = root / "dir" / "b.py"
write_file(f1, "hello fs")
write_file(f2, "print('x')\nhello")
be = FilesystemBackend(root_dir=str(root), virtual_mode=False)
# als_info absolute path - should only list files in root, not subdirectories
infos = await be.als_info(str(root))
paths = {i["path"] for i in infos}
assert str(f1) in paths # File in root should be listed
assert str(f2) not in paths # File in subdirectory should NOT be listed
assert (str(root) + "/dir/") in paths # Directory should be listed
# aread, aedit, awrite
txt = await be.aread(str(f1))
assert "hello fs" in txt
msg = await be.aedit(str(f1), "fs", "filesystem", replace_all=False)
assert isinstance(msg, EditResult) and msg.error is None and msg.occurrences == 1
msg2 = await be.awrite(str(root / "new.txt"), "new content")
assert isinstance(msg2, WriteResult) and msg2.error is None and msg2.path.endswith("new.txt")
# agrep_raw
matches = await be.agrep_raw("hello", path=str(root))
assert isinstance(matches, list) and any(m["path"].endswith("a.txt") for m in matches)
# aglob_info
g = await be.aglob_info("*.py", path=str(root))
assert any(i["path"] == str(f2) for i in g)
async def test_filesystem_backend_async_virtual_mode(tmp_path: Path):
"""Test async operations in virtual mode."""
root = tmp_path
f1 = root / "a.txt"
f2 = root / "dir" / "b.md"
write_file(f1, "hello virtual")
write_file(f2, "content")
be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
# als_info from virtual root - should only list files in root, not subdirectories
infos = await be.als_info("/")
paths = {i["path"] for i in infos}
assert "/a.txt" in paths # File in root should be listed
assert "/dir/b.md" not in paths # File in subdirectory should NOT be listed
assert "/dir/" in paths # Directory should be listed
# aread and aedit via virtual path
txt = await be.aread("/a.txt")
assert "hello virtual" in txt
msg = await be.aedit("/a.txt", "virtual", "virt", replace_all=False)
assert isinstance(msg, EditResult) and msg.error is None and msg.occurrences == 1
# awrite new file via virtual path
msg2 = await be.awrite("/new.txt", "x")
assert isinstance(msg2, WriteResult) and msg2.error is None
assert (root / "new.txt").exists()
# agrep_raw limited to path
matches = await be.agrep_raw("virt", path="/")
assert isinstance(matches, list) and any(m["path"] == "/a.txt" for m in matches)
# aglob_info
g = await be.aglob_info("**/*.md", path="/")
assert any(i["path"] == "/dir/b.md" for i in g)
# invalid regex returns error string
err = await be.agrep_raw("[", path="/")
assert isinstance(err, str)
# path traversal blocked
with pytest.raises(ValueError):
await be.aread("/../a.txt")
async def test_filesystem_backend_als_nested_directories(tmp_path: Path):
"""Test async ls with nested directories."""
root = tmp_path
files = {
root / "config.json": "config",
root / "src" / "main.py": "code",
root / "src" / "utils" / "helper.py": "utils code",
root / "src" / "utils" / "common.py": "common utils",
root / "docs" / "readme.md": "documentation",
root / "docs" / "api" / "reference.md": "api docs",
}
for path, content in files.items():
write_file(path, content)
be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
root_listing = await be.als_info("/")
root_paths = [fi["path"] for fi in root_listing]
assert "/config.json" in root_paths
assert "/src/" in root_paths
assert "/docs/" in root_paths
assert "/src/main.py" not in root_paths
assert "/src/utils/helper.py" not in root_paths
src_listing = await be.als_info("/src/")
src_paths = [fi["path"] for fi in src_listing]
assert "/src/main.py" in src_paths
assert "/src/utils/" in src_paths
assert "/src/utils/helper.py" not in src_paths
utils_listing = await be.als_info("/src/utils/")
utils_paths = [fi["path"] for fi in utils_listing]
assert "/src/utils/helper.py" in utils_paths
assert "/src/utils/common.py" in utils_paths
assert len(utils_paths) == 2
empty_listing = await be.als_info("/nonexistent/")
assert empty_listing == []
async def test_filesystem_backend_als_normal_mode_nested(tmp_path: Path):
"""Test async ls_info with nested directories in normal (non-virtual) mode."""
root = tmp_path
files = {
root / "file1.txt": "content1",
root / "subdir" / "file2.txt": "content2",
root / "subdir" / "nested" / "file3.txt": "content3",
}
for path, content in files.items():
write_file(path, content)
be = FilesystemBackend(root_dir=str(root), virtual_mode=False)
root_listing = await be.als_info(str(root))
root_paths = [fi["path"] for fi in root_listing]
assert str(root / "file1.txt") in root_paths
assert str(root / "subdir") + "/" in root_paths
assert str(root / "subdir" / "file2.txt") not in root_paths
subdir_listing = await be.als_info(str(root / "subdir"))
subdir_paths = [fi["path"] for fi in subdir_listing]
assert str(root / "subdir" / "file2.txt") in subdir_paths
assert str(root / "subdir" / "nested") + "/" in subdir_paths
assert str(root / "subdir" / "nested" / "file3.txt") not in subdir_paths
async def test_filesystem_backend_als_trailing_slash(tmp_path: Path):
"""Test async ls_info edge cases with trailing slashes."""
root = tmp_path
files = {
root / "file.txt": "content",
root / "dir" / "nested.txt": "nested",
}
for path, content in files.items():
write_file(path, content)
be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
listing_with_slash = await be.als_info("/")
assert len(listing_with_slash) > 0
listing = await be.als_info("/")
paths = [fi["path"] for fi in listing]
assert paths == sorted(paths)
listing1 = await be.als_info("/dir/")
listing2 = await be.als_info("/dir")
assert len(listing1) == len(listing2)
assert [fi["path"] for fi in listing1] == [fi["path"] for fi in listing2]
empty = await be.als_info("/nonexistent/")
assert empty == []
async def test_filesystem_backend_intercept_large_tool_result_async(tmp_path: Path):
"""Test that FilesystemBackend properly handles large tool result interception in async context."""
from langchain.tools import ToolRuntime
from langchain_core.messages import ToolMessage
from deepagents.middleware.filesystem import FilesystemMiddleware
root = tmp_path
rt = ToolRuntime(
state={"messages": [], "files": {}},
context=None,
tool_call_id="test_fs",
store=None,
stream_writer=lambda _: None,
config={},
)
middleware = FilesystemMiddleware(backend=lambda r: FilesystemBackend(root_dir=str(root), virtual_mode=True), tool_token_limit_before_evict=1000)
large_content = "f" * 5000
tool_message = ToolMessage(content=large_content, tool_call_id="test_fs_123")
result = middleware._intercept_large_tool_result(tool_message, rt)
assert isinstance(result, ToolMessage)
assert "Tool result too large" in result.content
assert "/large_tool_results/test_fs_123" in result.content
saved_file = root / "large_tool_results" / "test_fs_123"
assert saved_file.exists()
assert saved_file.read_text() == large_content
async def test_filesystem_aupload_single_file(tmp_path: Path):
"""Test async uploading a single binary file."""
root = tmp_path
be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
test_path = "/test_upload.bin"
test_content = b"Hello, Binary World!"
responses = await be.aupload_files([(test_path, test_content)])
assert len(responses) == 1
assert responses[0].path == test_path
assert responses[0].error is None
# Verify file exists and content matches
uploaded_file = root / "test_upload.bin"
assert uploaded_file.exists()
assert uploaded_file.read_bytes() == test_content
async def test_filesystem_aupload_multiple_files(tmp_path: Path):
"""Test async uploading multiple files in one call."""
root = tmp_path
be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
files = [
("/file1.bin", b"Content 1"),
("/file2.bin", b"Content 2"),
("/subdir/file3.bin", b"Content 3"),
]
responses = await be.aupload_files(files)
assert len(responses) == 3
for i, (path, content) in enumerate(files):
assert responses[i].path == path
assert responses[i].error is None
# Verify all files created
assert (root / "file1.bin").read_bytes() == b"Content 1"
assert (root / "file2.bin").read_bytes() == b"Content 2"
assert (root / "subdir" / "file3.bin").read_bytes() == b"Content 3"
async def test_filesystem_adownload_single_file(tmp_path: Path):
"""Test async downloading a single file."""
root = tmp_path
be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
# Create a file manually
test_file = root / "test_download.bin"
test_content = b"Download me!"
test_file.write_bytes(test_content)
responses = await be.adownload_files(["/test_download.bin"])
assert len(responses) == 1
assert responses[0].path == "/test_download.bin"
assert responses[0].content == test_content
assert responses[0].error is None
async def test_filesystem_adownload_multiple_files(tmp_path: Path):
"""Test async downloading multiple files in one call."""
root = tmp_path
be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
# Create several files
files = {
root / "file1.txt": b"File 1",
root / "file2.txt": b"File 2",
root / "subdir" / "file3.txt": b"File 3",
}
for path, content in files.items():
path.parent.mkdir(parents=True, exist_ok=True)
path.write_bytes(content)
paths = ["/file1.txt", "/file2.txt", "/subdir/file3.txt"]
responses = await be.adownload_files(paths)
assert len(responses) == 3
assert responses[0].path == "/file1.txt"
assert responses[0].content == b"File 1"
assert responses[0].error is None
assert responses[1].path == "/file2.txt"
assert responses[1].content == b"File 2"
assert responses[1].error is None
assert responses[2].path == "/subdir/file3.txt"
assert responses[2].content == b"File 3"
assert responses[2].error is None
async def test_filesystem_aupload_download_roundtrip(tmp_path: Path):
"""Test async upload followed by download for data integrity."""
root = tmp_path
be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
# Test with binary content including special bytes
test_path = "/roundtrip.bin"
test_content = bytes(range(256)) # All possible byte values
# Upload
upload_responses = await be.aupload_files([(test_path, test_content)])
assert upload_responses[0].error is None
# Download
download_responses = await be.adownload_files([test_path])
assert download_responses[0].error is None
assert download_responses[0].content == test_content
async def test_filesystem_adownload_errors(tmp_path: Path):
"""Test async download error handling."""
root = tmp_path
be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
# Test file_not_found
responses = await be.adownload_files(["/nonexistent.txt"])
assert len(responses) == 1
assert responses[0].path == "/nonexistent.txt"
assert responses[0].content is None
assert responses[0].error == "file_not_found"
# Test is_directory
(root / "testdir").mkdir()
responses = await be.adownload_files(["/testdir"])
assert responses[0].error == "is_directory"
assert responses[0].content is None
# Test invalid_path (path traversal)
responses = await be.adownload_files(["/../etc/passwd"])
assert len(responses) == 1
assert responses[0].error == "invalid_path"
assert responses[0].content is None
async def test_filesystem_aupload_errors(tmp_path: Path):
"""Test async upload error handling."""
root = tmp_path
be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
# Test invalid_path (path traversal)
responses = await be.aupload_files([("/../bad/path.txt", b"content")])
assert len(responses) == 1
assert responses[0].error == "invalid_path"
async def test_filesystem_partial_success_aupload(tmp_path: Path):
"""Test partial success in async batch upload."""
root = tmp_path
be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
files = [
("/valid1.txt", b"Valid content 1"),
("/../invalid.txt", b"Invalid path"), # Path traversal
("/valid2.txt", b"Valid content 2"),
]
responses = await be.aupload_files(files)
assert len(responses) == 3
# First file should succeed
assert responses[0].error is None
assert (root / "valid1.txt").exists()
# Second file should fail
assert responses[1].error == "invalid_path"
# Third file should still succeed (partial success)
assert responses[2].error is None
assert (root / "valid2.txt").exists()
async def test_filesystem_partial_success_adownload(tmp_path: Path):
"""Test partial success in async batch download."""
root = tmp_path
be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
# Create one valid file
valid_file = root / "exists.txt"
valid_content = b"I exist!"
valid_file.write_bytes(valid_content)
paths = ["/exists.txt", "/doesnotexist.txt", "/../invalid"]
responses = await be.adownload_files(paths)
assert len(responses) == 3
# First should succeed
assert responses[0].error is None
assert responses[0].content == valid_content
# Second should fail with file_not_found
assert responses[1].error == "file_not_found"
assert responses[1].content is None
# Third should fail with invalid_path
assert responses[2].error == "invalid_path"
assert responses[2].content is None
async def test_filesystem_aedit_replace_all(tmp_path: Path):
"""Test async edit with replace_all option."""
root = tmp_path
be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
# Create file with multiple occurrences
test_file = root / "test.txt"
test_file.write_text("foo bar foo baz")
# Edit with replace_all=False when string appears multiple times should error
res1 = await be.aedit("/test.txt", "foo", "qux", replace_all=False)
assert res1.error is not None
assert "appears 2 times" in res1.error
# Edit with replace_all=True - should replace all occurrences
res2 = await be.aedit("/test.txt", "foo", "qux", replace_all=True)
assert res2.error is None
assert res2.occurrences == 2
content = await be.aread("/test.txt")
assert "qux bar qux baz" in content
# Now test replace_all=False with unique string (should succeed)
res3 = await be.aedit("/test.txt", "bar", "xyz", replace_all=False)
assert res3.error is None
assert res3.occurrences == 1
content2 = await be.aread("/test.txt")
assert "qux xyz qux baz" in content2
async def test_filesystem_aread_with_offset_and_limit(tmp_path: Path):
"""Test async read with offset and limit."""
root = tmp_path
be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
# Create file with multiple lines
test_file = root / "multi.txt"
lines = "\n".join([f"Line {i}" for i in range(1, 11)])
test_file.write_text(lines)
# Read with offset and limit
content = await be.aread("/multi.txt", offset=2, limit=3)
assert "Line 3" in content
assert "Line 4" in content
assert "Line 5" in content
assert "Line 1" not in content
assert "Line 6" not in content
async def test_filesystem_agrep_with_glob(tmp_path: Path):
"""Test async grep with glob filter."""
root = tmp_path
be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
# Create multiple files
(root / "test.py").write_text("import os")
(root / "test.txt").write_text("import nothing")
(root / "main.py").write_text("import sys")
# agrep_raw with glob filter
matches = await be.agrep_raw("import", path="/", glob="*.py")
assert isinstance(matches, list)
py_files = [m["path"] for m in matches]
assert any("test.py" in p for p in py_files)
assert any("main.py" in p for p in py_files)
assert not any("test.txt" in p for p in py_files)
async def test_filesystem_aglob_recursive(tmp_path: Path):
"""Test async glob with recursive patterns."""
root = tmp_path
be = FilesystemBackend(root_dir=str(root), virtual_mode=True)
# Create files in nested directories
files = {
root / "src" / "main.py": "code",
root / "src" / "utils" / "helper.py": "utils",
root / "tests" / "test_main.py": "tests",
root / "readme.txt": "docs",
}
for path, content in files.items():
write_file(path, content)
# Recursive glob for all .py files
infos = await be.aglob_info("**/*.py", path="/")
py_files = [i["path"] for i in infos]
assert any("main.py" in p for p in py_files)
assert any("helper.py" in p for p in py_files)
assert any("test_main.py" in p for p in py_files)
assert not any("readme.txt" in p for p in py_files)

Some files were not shown because too many files have changed in this diff Show More