- research_agent/tools.py: 한글 Docstring 및 ASCII 흐름도 추가 - research_agent/researcher/depth.py: ResearchDepth enum 및 DepthConfig 추가 - research_agent/researcher/ralph_loop.py: Ralph Loop 반복 연구 패턴 구현 - research_agent/researcher/runner.py: 연구 실행기 (CLI 지원) - tests/researcher/: 91개 테스트 (실제 API 호출 포함) - scripts/run_ai_trend_research.py: AI 트렌드 연구 스크립트 + 도구 궤적 로깅
237 lines
6.7 KiB
Python
237 lines
6.7 KiB
Python
#!/usr/bin/env python3
|
|
"""Tool Trajectory verification script with detailed logging.
|
|
|
|
This script verifies the research agent tools work correctly by:
|
|
1. Testing each tool individually with logging
|
|
2. Verifying the tool call sequence (trajectory)
|
|
3. Outputting detailed logs for debugging
|
|
|
|
Usage:
|
|
uv run python scripts/verify_tool_trajectory.py
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import sys
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
from rich.console import Console
|
|
from rich.logging import RichHandler
|
|
from rich.panel import Panel
|
|
from rich.table import Table
|
|
|
|
logging.basicConfig(
|
|
level=logging.DEBUG,
|
|
format="%(message)s",
|
|
handlers=[RichHandler(rich_tracebacks=True, show_path=False)],
|
|
)
|
|
log = logging.getLogger("tool_trajectory")
|
|
console = Console()
|
|
|
|
|
|
@dataclass
|
|
class ToolCall:
|
|
tool_name: str
|
|
input_args: dict[str, Any]
|
|
output: str
|
|
duration_ms: float
|
|
success: bool
|
|
error: str | None = None
|
|
|
|
|
|
@dataclass
|
|
class ToolTrajectory:
|
|
calls: list[ToolCall] = field(default_factory=list)
|
|
start_time: datetime = field(default_factory=datetime.now)
|
|
|
|
def add_call(self, call: ToolCall) -> None:
|
|
self.calls.append(call)
|
|
log.info(
|
|
f"[{len(self.calls)}] {call.tool_name} "
|
|
f"({'OK' if call.success else 'FAIL'}) "
|
|
f"[{call.duration_ms:.0f}ms]"
|
|
)
|
|
|
|
def summary(self) -> str:
|
|
total = len(self.calls)
|
|
success = sum(1 for c in self.calls if c.success)
|
|
return f"Total: {total}, Success: {success}, Failed: {total - success}"
|
|
|
|
|
|
def test_tool(
|
|
trajectory: ToolTrajectory,
|
|
tool_name: str,
|
|
tool_func: Any,
|
|
args: dict[str, Any],
|
|
) -> bool:
|
|
log.debug(f"Testing {tool_name} with args: {args}")
|
|
start = datetime.now()
|
|
|
|
try:
|
|
result = tool_func.invoke(args)
|
|
duration = (datetime.now() - start).total_seconds() * 1000
|
|
|
|
call = ToolCall(
|
|
tool_name=tool_name,
|
|
input_args=args,
|
|
output=result[:500] if len(result) > 500 else result,
|
|
duration_ms=duration,
|
|
success=True,
|
|
)
|
|
trajectory.add_call(call)
|
|
return True
|
|
|
|
except Exception as e:
|
|
duration = (datetime.now() - start).total_seconds() * 1000
|
|
call = ToolCall(
|
|
tool_name=tool_name,
|
|
input_args=args,
|
|
output="",
|
|
duration_ms=duration,
|
|
success=False,
|
|
error=str(e),
|
|
)
|
|
trajectory.add_call(call)
|
|
log.error(f"Error in {tool_name}: {e}")
|
|
return False
|
|
|
|
|
|
def main() -> int:
|
|
console.print(
|
|
Panel(
|
|
"[bold cyan]Tool Trajectory Verification[/bold cyan]\n"
|
|
"[dim]Testing research agent tools with detailed logging[/dim]",
|
|
title="Verification Started",
|
|
)
|
|
)
|
|
|
|
from research_agent.tools import (
|
|
arxiv_search,
|
|
github_code_search,
|
|
library_docs_search,
|
|
tavily_search,
|
|
think_tool,
|
|
)
|
|
|
|
trajectory = ToolTrajectory()
|
|
|
|
console.print("\n[bold]Phase 1: Individual Tool Tests[/bold]\n")
|
|
|
|
test_cases = [
|
|
("think_tool", think_tool, {"reflection": "Testing reflection capability"}),
|
|
(
|
|
"tavily_search",
|
|
tavily_search,
|
|
{"query": "context engineering", "max_results": 1},
|
|
),
|
|
(
|
|
"arxiv_search",
|
|
arxiv_search,
|
|
{"query": "large language model", "max_results": 2},
|
|
),
|
|
(
|
|
"github_code_search",
|
|
github_code_search,
|
|
{"query": "useState(", "max_results": 2},
|
|
),
|
|
]
|
|
|
|
for tool_name, tool_func, args in test_cases:
|
|
console.print(f" Testing: [cyan]{tool_name}[/cyan]...")
|
|
test_tool(trajectory, tool_name, tool_func, args)
|
|
|
|
console.print("\n[bold]Phase 2: Tool Trajectory Analysis[/bold]\n")
|
|
|
|
table = Table(title="Tool Call Trajectory")
|
|
table.add_column("#", style="cyan", width=3)
|
|
table.add_column("Tool", style="green")
|
|
table.add_column("Status", style="yellow")
|
|
table.add_column("Duration", style="blue")
|
|
table.add_column("Output Preview", style="dim", max_width=50)
|
|
|
|
for i, call in enumerate(trajectory.calls, 1):
|
|
status = (
|
|
"[green]OK[/green]" if call.success else f"[red]FAIL: {call.error}[/red]"
|
|
)
|
|
output_preview = (
|
|
call.output[:50] + "..." if len(call.output) > 50 else call.output
|
|
)
|
|
output_preview = output_preview.replace("\n", " ")
|
|
table.add_row(
|
|
str(i),
|
|
call.tool_name,
|
|
status,
|
|
f"{call.duration_ms:.0f}ms",
|
|
output_preview,
|
|
)
|
|
|
|
console.print(table)
|
|
|
|
console.print("\n[bold]Phase 3: Verification Summary[/bold]\n")
|
|
|
|
total_calls = len(trajectory.calls)
|
|
success_calls = sum(1 for c in trajectory.calls if c.success)
|
|
failed_calls = total_calls - success_calls
|
|
|
|
summary_table = Table(show_header=False)
|
|
summary_table.add_column("Metric", style="bold")
|
|
summary_table.add_column("Value")
|
|
|
|
summary_table.add_row("Total Tool Calls", str(total_calls))
|
|
summary_table.add_row("Successful", f"[green]{success_calls}[/green]")
|
|
summary_table.add_row(
|
|
"Failed",
|
|
f"[red]{failed_calls}[/red]" if failed_calls > 0 else "[green]0[/green]",
|
|
)
|
|
summary_table.add_row(
|
|
"Total Duration",
|
|
f"{sum(c.duration_ms for c in trajectory.calls):.0f}ms",
|
|
)
|
|
|
|
console.print(summary_table)
|
|
|
|
log_path = Path("research_workspace") / "tool_trajectory.log"
|
|
log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
with open(log_path, "w") as f:
|
|
f.write(f"Tool Trajectory Log - {datetime.now().isoformat()}\n")
|
|
f.write("=" * 60 + "\n\n")
|
|
for i, call in enumerate(trajectory.calls, 1):
|
|
f.write(f"[{i}] {call.tool_name}\n")
|
|
f.write(f" Args: {call.input_args}\n")
|
|
f.write(f" Success: {call.success}\n")
|
|
f.write(f" Duration: {call.duration_ms:.0f}ms\n")
|
|
if call.error:
|
|
f.write(f" Error: {call.error}\n")
|
|
f.write(f" Output:\n{call.output}\n")
|
|
f.write("-" * 40 + "\n")
|
|
|
|
console.print(f"\n[dim]Log saved to: {log_path}[/dim]")
|
|
|
|
if failed_calls > 0:
|
|
console.print(
|
|
Panel(
|
|
f"[red]Verification FAILED[/red]\n"
|
|
f"{failed_calls} tool(s) failed. Check logs above.",
|
|
border_style="red",
|
|
)
|
|
)
|
|
return 1
|
|
|
|
console.print(
|
|
Panel(
|
|
"[green]Verification PASSED[/green]\n"
|
|
"All tools executed successfully with correct trajectory.",
|
|
border_style="green",
|
|
)
|
|
)
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|