Files
deepagent/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/token_utils.py
HyunjunJeon 9cb01f4abe project init
2025-12-31 11:32:36 +09:00

117 lines
4.4 KiB
Python

"""Utilities for accurate token counting using LangChain models."""
from pathlib import Path
from langchain_core.messages import SystemMessage
from deepagents_cli.config import console, settings
def calculate_baseline_tokens(model, agent_dir: Path, system_prompt: str, assistant_id: str) -> int:
"""Calculate baseline context tokens using the model's official tokenizer.
This uses the model's get_num_tokens_from_messages() method to get
accurate token counts for the initial context (system prompt + agent.md).
Note: Tool definitions cannot be accurately counted before the first API call
due to LangChain limitations. They will be included in the total after the
first message is sent (~5,000 tokens).
Args:
model: LangChain model instance (ChatAnthropic or ChatOpenAI)
agent_dir: Path to agent directory containing agent.md
system_prompt: The base system prompt string
assistant_id: The agent identifier for path references
Returns:
Token count for system prompt + agent.md (tools not included)
"""
# Load user agent.md content
agent_md_path = agent_dir / "agent.md"
user_memory = ""
if agent_md_path.exists():
user_memory = agent_md_path.read_text()
# Load project agent.md content
from .config import _find_project_agent_md, _find_project_root
project_memory = ""
project_root = _find_project_root()
if project_root:
project_md_paths = _find_project_agent_md(project_root)
if project_md_paths:
try:
# Combine all project agent.md files (if multiple exist)
contents = []
for path in project_md_paths:
contents.append(path.read_text())
project_memory = "\n\n".join(contents)
except Exception:
pass
# Build the complete system prompt as it will be sent
# This mimics what AgentMemoryMiddleware.wrap_model_call() does
memory_section = (
f"<user_memory>\n{user_memory or '(No user agent.md)'}\n</user_memory>\n\n"
f"<project_memory>\n{project_memory or '(No project agent.md)'}\n</project_memory>"
)
# Get the long-term memory system prompt
memory_system_prompt = get_memory_system_prompt(
assistant_id, project_root, bool(project_memory)
)
# Combine all parts in the same order as the middleware
full_system_prompt = memory_section + "\n\n" + system_prompt + "\n\n" + memory_system_prompt
# Count tokens using the model's official method
messages = [SystemMessage(content=full_system_prompt)]
try:
# Note: tools parameter is not supported by LangChain's token counting
# Tool tokens will be included in the API response after first message
return model.get_num_tokens_from_messages(messages)
except Exception as e:
# Fallback if token counting fails
console.print(f"[yellow]Warning: Could not calculate baseline tokens: {e}[/yellow]")
return 0
def get_memory_system_prompt(
assistant_id: str, project_root: Path | None = None, has_project_memory: bool = False
) -> str:
"""Get the long-term memory system prompt text.
Args:
assistant_id: The agent identifier for path references
project_root: Path to the detected project root (if any)
has_project_memory: Whether project memory was loaded
"""
# Import from agent_memory middleware
from .agent_memory import LONGTERM_MEMORY_SYSTEM_PROMPT
agent_dir = settings.get_agent_dir(assistant_id)
agent_dir_absolute = str(agent_dir)
agent_dir_display = f"~/.deepagents/{assistant_id}"
# Build project memory info
if project_root and has_project_memory:
project_memory_info = f"`{project_root}` (detected)"
elif project_root:
project_memory_info = f"`{project_root}` (no agent.md found)"
else:
project_memory_info = "None (not in a git project)"
# Build project deepagents directory path
if project_root:
project_deepagents_dir = f"{project_root}/.deepagents"
else:
project_deepagents_dir = "[project-root]/.deepagents (not in a project)"
return LONGTERM_MEMORY_SYSTEM_PROMPT.format(
agent_dir_absolute=agent_dir_absolute,
agent_dir_display=agent_dir_display,
project_memory_info=project_memory_info,
project_deepagents_dir=project_deepagents_dir,
)