feat: Deep Research Agent 확장 - Ralph Loop, 깊이 설정, 테스트 스위트 추가

- research_agent/tools.py: 한글 Docstring 및 ASCII 흐름도 추가 - research_agent/researcher/depth.py: ResearchDepth enum 및 DepthConfig 추가 - research_agent/researcher/ralph_loop.py: Ralph Loop 반복 연구 패턴 구현 - research_agent/researcher/runner.py: 연구 실행기 (CLI 지원) - tests/researcher/: 91개 테스트 (실제 API 호출 포함) - scripts/run_ai_trend_research.py: AI 트렌드 연구 스크립트 + 도구 궤적 로깅
2026-01-12 15:49:43 +09:00
parent 9e3d37fa86
commit 6f01c834ba
18 changed files with 4334 additions and 205 deletions
--- a/context_engineering_research_agent/agent.py
+++ b/context_engineering_research_agent/agent.py
@@ -85,11 +85,13 @@ _cached_model = None
 def _infer_openrouter_model_name(model: BaseChatModel) -> str | None:
    """OpenRouter 모델에서 모델명을 추출합니다.

+    API 문서 주소: https://openrouter.ai/docs/api/api-reference/models/get-models
+
    Args:
        model: LangChain 모델 인스턴스

    Returns:
-        OpenRouter 모델명 (예: "anthropic/claude-3-sonnet") 또는 None
+        OpenRouter 모델명 (예: "anthropic/claude-sonnet-4-5") 또는 None
    """
    if detect_provider(model) != ProviderType.OPENROUTER:
        return None
--- a/context_engineering_research_agent/skills/middleware.py
+++ b/context_engineering_research_agent/skills/middleware.py
@@ -1,11 +1,11 @@
 """스킬 시스템 미들웨어.

-Progressive Disclosure 패턴으로 스킬 메타데이터를 시스템 프롬프트에 주입합니다.
+Progressive Disclosure 패턴으로 Agent Skills 메타데이터를 시스템 프롬프트에 주입합니다.
 """

 from collections.abc import Awaitable, Callable
 from pathlib import Path
-from typing import NotRequired, TypedDict, cast
+from typing import Any, NotRequired, TypedDict, cast

 from langchain.agents.middleware.types import (
    AgentMiddleware,
@@ -13,6 +13,7 @@ from langchain.agents.middleware.types import (
    ModelRequest,
    ModelResponse,
 )
+from langchain_core.messages import SystemMessage
 from langgraph.runtime import Runtime

 from context_engineering_research_agent.skills.load import SkillMetadata, list_skills
@@ -111,20 +112,24 @@ class SkillsMiddleware(AgentMiddleware):
        return "\n".join(lines)

    def before_agent(
-        self, state: SkillsState, runtime: Runtime
-    ) -> SkillsStateUpdate | None:
+        self,
+        state: AgentState[Any],  # noqa: ARG002
+        runtime: Runtime,  # noqa: ARG002
+    ) -> dict[str, Any] | None:
        skills = list_skills(
            user_skills_dir=self.skills_dir,
            project_skills_dir=self.project_skills_dir,
        )
-        return SkillsStateUpdate(skills_metadata=skills)
+        return cast("dict[str, Any]", SkillsStateUpdate(skills_metadata=skills))

    def wrap_model_call(
        self,
        request: ModelRequest,
        handler: Callable[[ModelRequest], ModelResponse],
    ) -> ModelResponse:
-        skills_metadata = request.state.get("skills_metadata", [])
+        skills_metadata = cast(
+            "list[SkillMetadata]", request.state.get("skills_metadata", [])
+        )

        skills_locations = self._format_skills_locations()
        skills_list = self._format_skills_list(skills_metadata)
@@ -139,7 +144,7 @@ class SkillsMiddleware(AgentMiddleware):
        else:
            system_prompt = skills_section

-        return handler(request.override(system_prompt=system_prompt))
+        return handler(request.override(system_message=SystemMessage(system_prompt)))

    async def awrap_model_call(
        self,
@@ -162,4 +167,6 @@ class SkillsMiddleware(AgentMiddleware):
        else:
            system_prompt = skills_section

-        return await handler(request.override(system_prompt=system_prompt))
+        return await handler(
+            request.override(system_message=SystemMessage(system_prompt))
+        )
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,6 +34,9 @@ dev = [
 requires = ["setuptools>=73.0.0", "wheel"]
 build-backend = "setuptools.build_meta"

+[project.scripts]
+deep-research = "research_agent.researcher.runner:main"
+
 [tool.setuptools]
 packages = ["research_agent"]

--- a/research_agent/researcher/init.py
+++ b/research_agent/researcher/init.py
@@ -14,10 +14,50 @@ from research_agent.researcher.agent import (
    create_researcher_agent,
    get_researcher_subagent,
 )
-from research_agent.researcher.prompts import AUTONOMOUS_RESEARCHER_INSTRUCTIONS
+from research_agent.researcher.depth import (
+    DEPTH_CONFIGS,
+    DepthConfig,
+    ResearchDepth,
+    get_depth_config,
+    infer_research_depth,
+)
+from research_agent.researcher.prompts import (
+    AUTONOMOUS_RESEARCHER_INSTRUCTIONS,
+    DEPTH_PROMPTS,
+    build_research_prompt,
+    get_depth_prompt,
+)
+from research_agent.researcher.ralph_loop import (
+    Finding,
+    RalphLoopState,
+    ResearchRalphLoop,
+    ResearchSession,
+    SourceQuality,
+    SourceType,
+)
+from research_agent.researcher.runner import (
+    ResearchRunner,
+    run_deep_research,
+)

 __all__ = [
    "create_researcher_agent",
    "get_researcher_subagent",
    "AUTONOMOUS_RESEARCHER_INSTRUCTIONS",
+    "DEPTH_PROMPTS",
+    "get_depth_prompt",
+    "build_research_prompt",
+    "ResearchDepth",
+    "DepthConfig",
+    "DEPTH_CONFIGS",
+    "infer_research_depth",
+    "get_depth_config",
+    "ResearchRalphLoop",
+    "ResearchSession",
+    "RalphLoopState",
+    "Finding",
+    "SourceQuality",
+    "SourceType",
+    "ResearchRunner",
+    "run_deep_research",
 ]
--- a/research_agent/researcher/agent.py
+++ b/research_agent/researcher/agent.py
@@ -1,9 +1,48 @@
-"""자율적 연구 에이전트 팩토리.
+"""자율적 연구 에이전트 팩토리 모듈.

 이 모듈은 자체 계획, 반성, 컨텍스트 관리 기능을 갖춘
 독립적인 연구 DeepAgent를 생성합니다.
+
+## 에이전트 생성 흐름
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                create_researcher_agent()                         │
+├─────────────────────────────────────────────────────────────────┤
+│                                                                  │
+│   1. 모델 초기화                                                 │
+│      model = ChatOpenAI(model="gpt-4.1")                        │
+│                                                                  │
+│   2. 깊이 설정 로드                                              │
+│      config = get_depth_config(depth)                           │
+│                                                                  │
+│   3. 깊이별 도구 선택                                            │
+│      tools = _get_tools_for_depth(depth)                        │
+│      ┌─────────────────────────────────────────────────────────┐│
+│      │ QUICK:   think, mgrep, tavily                           ││
+│      │ STANDARD: + comprehensive_search                         ││
+│      │ DEEP:    + arxiv, github                                 ││
+│      │ EXHAUSTIVE: + library_docs                               ││
+│      └─────────────────────────────────────────────────────────┘│
+│                                                                  │
+│   4. 프롬프트 구성                                               │
+│      - QUICK/STANDARD: AUTONOMOUS_RESEARCHER_INSTRUCTIONS       │
+│      - DEEP/EXHAUSTIVE: build_research_prompt() (Ralph Loop)   │
+│                                                                  │
+│   5. DeepAgent 생성                                              │
+│      return create_deep_agent(model, tools, prompt, backend)    │
+│                                                                  │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+v2 업데이트 (2026-01):
+- ResearchDepth 기반 동적 깊이 조절
+- 다중 검색 도구 (mgrep, arXiv, comprehensive_search)
+- Ralph Loop 패턴 지원
 """

+from __future__ import annotations
+
 from datetime import datetime

 from deepagents import create_deep_agent
@@ -12,100 +51,206 @@ from langchain_core.language_models import BaseChatModel
 from langchain_openai import ChatOpenAI
 from langgraph.graph.state import CompiledStateGraph

-from research_agent.researcher.prompts import AUTONOMOUS_RESEARCHER_INSTRUCTIONS
-from research_agent.tools import tavily_search, think_tool
+from research_agent.researcher.depth import ResearchDepth, get_depth_config
+from research_agent.researcher.prompts import (
+    AUTONOMOUS_RESEARCHER_INSTRUCTIONS,
+    build_research_prompt,
+)
+from research_agent.tools import (
+    arxiv_search,
+    comprehensive_search,
+    github_code_search,
+    library_docs_search,
+    mgrep_search,
+    tavily_search,
+    think_tool,
+)
+
+
+# ============================================================================
+# 도구 선택 헬퍼
+# ============================================================================
+
+
+def _get_tools_for_depth(depth: ResearchDepth) -> list:
+    """연구 깊이에 따라 사용할 도구 목록을 반환한다.
+
+    깊이 수준에 따라 다른 도구 세트를 제공합니다:
+    - 기본: think_tool (항상 포함)
+    - web 소스: mgrep_search, tavily_search
+    - arxiv 소스: arxiv_search
+    - github 소스: github_code_search
+    - docs 소스: library_docs_search
+    - 다중 소스 (2개 이상): comprehensive_search
+
+    Args:
+        depth: 연구 깊이 (ResearchDepth enum).
+
+    Returns:
+        해당 깊이에서 사용 가능한 도구 목록.
+    """
+    # 깊이 설정 로드
+    config = get_depth_config(depth)
+
+    # 기본 도구: 항상 think_tool 포함
+    tools = [think_tool]
+
+    # 소스별 도구 추가
+    if "web" in config.sources:
+        tools.extend([mgrep_search, tavily_search])
+
+    if "arxiv" in config.sources:
+        tools.append(arxiv_search)
+
+    if "github" in config.sources:
+        tools.append(github_code_search)
+
+    if "docs" in config.sources:
+        tools.append(library_docs_search)
+
+    # 다중 소스인 경우 통합 검색 도구 추가
+    if len(config.sources) > 1:
+        tools.append(comprehensive_search)
+
+    return tools
+
+
+# ============================================================================
+# 에이전트 팩토리
+# ============================================================================


 def create_researcher_agent(
    model: str | BaseChatModel | None = None,
    backend: BackendProtocol | BackendFactory | None = None,
+    depth: ResearchDepth | str = ResearchDepth.STANDARD,
 ) -> CompiledStateGraph:
    """자율적 연구 DeepAgent를 생성한다.

-    이 에이전트는 다음 기능을 자체적으로 보유한다:
-    - 계획 루프 (TodoListMiddleware를 통한 write_todos)
-    - 연구 루프 (tavily_search + think_tool)
-    - 컨텍스트 관리 (SummarizationMiddleware)
-    - 중간 결과 저장을 위한 파일 접근 (FilesystemMiddleware)
-
-    본질적으로 자율적으로 작동하는 "연구 SubGraph"이다.
+    이 함수는 주어진 깊이 수준에 맞는 연구 에이전트를 생성합니다.
+    에이전트는 자체 계획 수립, 다중 소스 검색, 반성(reflection) 기능을 갖춥니다.

    Args:
-        model: 사용할 LLM. 기본값은 temperature=0인 gpt-4.1.
-        backend: 파일 작업용 백엔드. 제공되면
-                 연구자가 중간 결과를 파일시스템에 저장할 수 있다.
+        model: 사용할 LLM 모델.
+            - None: 기본 gpt-4.1 (temperature=0) 사용
+            - str: 모델 이름 (예: "gpt-4o")
+            - BaseChatModel: 직접 생성한 모델 인스턴스
+        backend: 파일 작업용 백엔드.
+            - None: 기본 StateBackend 사용
+            - FilesystemBackend, CompositeBackend 등 지정 가능
+        depth: 연구 깊이 수준.
+            - ResearchDepth enum 또는 문자열 ("quick", "standard", "deep", "exhaustive")
+            - 기본값: STANDARD

    Returns:
-        CompiledStateGraph: 독립적으로 사용하거나 오케스트레이터의
-        CompiledSubAgent로 사용할 수 있는 완전 자율적 연구 에이전트.
+        CompiledStateGraph: 실행 가능한 자율 연구 에이전트.

    Example:
-        # 독립 사용
-        researcher = create_researcher_agent()
-        result = researcher.invoke({
-            "messages": [HumanMessage("양자 컴퓨팅 트렌드 연구")]
-        })
-
-        # 오케스트레이터의 SubAgent로 사용
-        subagent = get_researcher_subagent()
-        orchestrator = create_deep_agent(subagents=[subagent, ...])
+        >>> # 기본 설정으로 생성
+        >>> agent = create_researcher_agent()
+        >>>
+        >>> # 깊이 지정
+        >>> agent = create_researcher_agent(depth="deep")
+        >>>
+        >>> # 커스텀 모델과 백엔드
+        >>> from langchain_openai import ChatOpenAI
+        >>> from deepagents.backends import FilesystemBackend
+        >>> agent = create_researcher_agent(
+        ...     model=ChatOpenAI(model="gpt-4o", temperature=0.2),
+        ...     backend=FilesystemBackend(root_dir="./research"),
+        ...     depth=ResearchDepth.EXHAUSTIVE,
+        ... )
    """
+    # 모델이 지정되지 않았으면 기본 모델 사용
    if model is None:
        model = ChatOpenAI(model="gpt-4.1", temperature=0.0)

-    # 현재 날짜로 프롬프트 포맷팅
-    current_date = datetime.now().strftime("%Y-%m-%d")
-    formatted_prompt = AUTONOMOUS_RESEARCHER_INSTRUCTIONS.format(date=current_date)
+    # 문자열로 전달된 깊이를 enum으로 변환
+    if isinstance(depth, str):
+        depth = ResearchDepth(depth)

+    # 깊이 설정 로드
+    config = get_depth_config(depth)
+
+    # 깊이에 맞는 도구 선택
+    tools = _get_tools_for_depth(depth)
+
+    # 현재 날짜 (프롬프트에 포함)
+    current_date = datetime.now().strftime("%Y-%m-%d")
+
+    # 깊이에 따른 프롬프트 구성
+    if depth in (ResearchDepth.DEEP, ResearchDepth.EXHAUSTIVE):
+        # DEEP/EXHAUSTIVE: Ralph Loop 프롬프트 사용
+        formatted_prompt = build_research_prompt(
+            depth=depth,
+            query="{query}",  # 런타임에 치환됨
+            max_iterations=config.max_ralph_iterations,
+        )
+    else:
+        # QUICK/STANDARD: 기본 자율 연구 프롬프트 사용
+        formatted_prompt = AUTONOMOUS_RESEARCHER_INSTRUCTIONS.format(date=current_date)
+
+    # DeepAgent 생성 및 반환
    return create_deep_agent(
        model=model,
-        tools=[tavily_search, think_tool],
+        tools=tools,
        system_prompt=formatted_prompt,
        backend=backend,
    )


+# ============================================================================
+# SubAgent 통합
+# ============================================================================
+
+
 def get_researcher_subagent(
    model: str | BaseChatModel | None = None,
    backend: BackendProtocol | BackendFactory | None = None,
+    depth: ResearchDepth | str = ResearchDepth.STANDARD,
 ) -> dict:
-    """오케스트레이터에서 사용할 CompiledSubAgent로 연구자를 가져온다.
+    """오케스트레이터용 CompiledSubAgent로 연구자를 반환한다.

-    이 함수는 자율적 연구 에이전트를 생성하고 SubAgentMiddleware가
-    기대하는 CompiledSubAgent 형식으로 래핑한다.
+    이 함수는 메인 에이전트에서 서브에이전트로 호출할 수 있는 형태로
+    연구 에이전트를 래핑합니다.

    Args:
-        model: 사용할 LLM. 기본값은 gpt-4.1.
+        model: 사용할 LLM 모델 (create_researcher_agent과 동일).
        backend: 파일 작업용 백엔드.
+        depth: 연구 깊이 수준.

    Returns:
-        dict: 다음 키를 가진 CompiledSubAgent:
-            - name: "researcher"
-            - description: 오케스트레이터가 위임 결정 시 사용
-            - runnable: 자율적 연구 에이전트
+        다음 키를 포함하는 딕셔너리:
+        - name: 서브에이전트 이름 ("researcher")
+        - description: 서브에이전트 설명 (깊이 정보 포함)
+        - runnable: 실행 가능한 에이전트 객체

    Example:
-        from research_agent.researcher import get_researcher_subagent
-
-        researcher = get_researcher_subagent(model=model, backend=backend)
-
-        agent = create_deep_agent(
-            model=model,
-            subagents=[researcher, explorer, synthesizer],
-            ...
-        )
+        >>> from deepagents import create_deep_agent
+        >>> researcher = get_researcher_subagent(depth="deep")
+        >>> main_agent = create_deep_agent(
+        ...     subagents=[researcher],
+        ...     system_prompt="작업을 researcher에게 위임하세요."
+        ... )
    """
-    researcher = create_researcher_agent(model=model, backend=backend)
+    # 연구 에이전트 생성
+    researcher = create_researcher_agent(model=model, backend=backend, depth=depth)

+    # 깊이를 enum으로 변환
+    depth_enum = ResearchDepth(depth) if isinstance(depth, str) else depth
+    config = get_depth_config(depth_enum)
+
+    # 설명 문자열 구성
+    description = (
+        f"Autonomous research agent ({depth_enum.value} mode). "
+        f"Max {config.max_ralph_iterations} iterations, "
+        f"sources: {', '.join(config.sources)}. "
+        "Use for comprehensive topic research with self-planning."
+    )
+
+    # SubAgent 형식으로 반환
    return {
        "name": "researcher",
-        "description": (
-            "Autonomous deep research agent with self-planning and "
-            "'breadth-first, depth-second' methodology. Use for comprehensive "
-            "topic research requiring multiple search iterations and synthesis. "
-            "The agent plans its own research phases, reflects after each search, "
-            "and synthesizes findings into structured output. "
-            "Best for: complex topics, multi-faceted questions, trend analysis."
-        ),
+        "description": description,
        "runnable": researcher,
    }
--- a/research_agent/researcher/depth.py
+++ b/research_agent/researcher/depth.py
@@ -0,0 +1,204 @@
+"""연구 깊이 설정 모듈.
+
+이 모듈은 연구 에이전트의 깊이(depth) 수준을 정의하고 관리합니다.
+각 깊이 수준은 검색 횟수, 반복 횟수, 사용 가능한 소스 등을 결정합니다.
+
+## 깊이 수준 비교
+
+```
+┌──────────────┬─────────┬───────────┬────────────────────────────────┐
+│   깊이       │ 검색 수 │ 반복 횟수 │             소스                │
+├──────────────┼─────────┼───────────┼────────────────────────────────┤
+│ QUICK        │    3    │     1     │ web                            │
+│ STANDARD     │   10    │     2     │ web, local                     │
+│ DEEP         │   25    │     5     │ web, local, github, arxiv      │
+│ EXHAUSTIVE   │   50    │    10     │ web, local, github, arxiv, docs│
+└──────────────┴─────────┴───────────┴────────────────────────────────┘
+```
+
+v2 업데이트 (2026-01):
+- ResearchDepth enum 도입
+- DepthConfig dataclass로 구성 관리
+- 쿼리 기반 깊이 추론 (infer_research_depth)
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from enum import Enum
+from typing import Literal
+
+
+class ResearchDepth(Enum):
+    """연구 깊이 수준을 나타내는 열거형.
+
+    각 깊이 수준은 다른 검색 전략과 리소스 사용량을 의미합니다:
+
+    - QUICK: 빠른 답변이 필요할 때. 최소 검색, 단일 반복.
+    - STANDARD: 균형 잡힌 조사. 웹 + 로컬 소스 사용.
+    - DEEP: 심층 분석. 교차 검증 필요, GitHub/arXiv 포함.
+    - EXHAUSTIVE: 학술적 완성도. 공식 문서까지 포함, 최대 검증.
+    """
+
+    QUICK = "quick"  # 빠른 조사 (최대 3회 검색)
+    STANDARD = "standard"  # 표준 조사 (최대 10회 검색)
+    DEEP = "deep"  # 심층 조사 (최대 25회 검색, Ralph Loop)
+    EXHAUSTIVE = "exhaustive"  # 철저한 조사 (최대 50회 검색, 확장 Ralph Loop)
+
+
+@dataclass(frozen=True)
+class DepthConfig:
+    """연구 깊이별 설정을 담는 불변 데이터 클래스.
+
+    Attributes:
+        max_searches: 허용된 최대 검색 횟수.
+        max_ralph_iterations: Ralph Loop 최대 반복 횟수.
+        sources: 사용 가능한 검색 소스 튜플 (예: ("web", "arxiv")).
+        require_cross_validation: 교차 검증 필수 여부.
+        min_sources_for_claim: 주장당 필요한 최소 소스 수.
+        coverage_threshold: 완료 판정 기준 커버리지 점수 (0.0 ~ 1.0).
+    """
+
+    max_searches: int  # 최대 검색 횟수
+    max_ralph_iterations: int  # Ralph Loop 최대 반복
+    sources: tuple[str, ...]  # 사용 가능한 소스
+    require_cross_validation: bool  # 교차 검증 필요 여부
+    min_sources_for_claim: int  # 주장당 최소 소스 수
+    coverage_threshold: float  # 커버리지 임계값
+
+
+# ============================================================================
+# 깊이별 기본 설정
+# ============================================================================
+
+DEPTH_CONFIGS: dict[ResearchDepth, DepthConfig] = {
+    # QUICK: 빠른 답변용
+    # - 최대 3회 검색
+    # - 단일 반복 (Ralph Loop 없음)
+    # - 웹 소스만 사용
+    # - 교차 검증 없음
+    ResearchDepth.QUICK: DepthConfig(
+        max_searches=3,
+        max_ralph_iterations=1,
+        sources=("web",),
+        require_cross_validation=False,
+        min_sources_for_claim=1,
+        coverage_threshold=0.5,
+    ),
+    # STANDARD: 균형 잡힌 조사
+    # - 최대 10회 검색
+    # - 2회 반복
+    # - 웹 + 로컬 소스
+    # - 교차 검증 없음
+    ResearchDepth.STANDARD: DepthConfig(
+        max_searches=10,
+        max_ralph_iterations=2,
+        sources=("web", "local"),
+        require_cross_validation=False,
+        min_sources_for_claim=1,
+        coverage_threshold=0.7,
+    ),
+    # DEEP: 심층 분석 (Ralph Loop 활성화)
+    # - 최대 25회 검색
+    # - 5회 반복
+    # - 웹 + 로컬 + GitHub + arXiv
+    # - 교차 검증 필수 (주장당 최소 2개 소스)
+    ResearchDepth.DEEP: DepthConfig(
+        max_searches=25,
+        max_ralph_iterations=5,
+        sources=("web", "local", "github", "arxiv"),
+        require_cross_validation=True,
+        min_sources_for_claim=2,
+        coverage_threshold=0.85,
+    ),
+    # EXHAUSTIVE: 학술적 완성도 (확장 Ralph Loop)
+    # - 최대 50회 검색
+    # - 10회 반복
+    # - 모든 소스 사용 (docs 포함)
+    # - 교차 검증 필수 (주장당 최소 3개 소스)
+    ResearchDepth.EXHAUSTIVE: DepthConfig(
+        max_searches=50,
+        max_ralph_iterations=10,
+        sources=("web", "local", "github", "arxiv", "docs"),
+        require_cross_validation=True,
+        min_sources_for_claim=3,
+        coverage_threshold=0.95,
+    ),
+}
+
+
+# ============================================================================
+# 깊이 추론용 키워드 세트
+# ============================================================================
+
+# EXHAUSTIVE 트리거 키워드
+_EXHAUSTIVE_KEYWORDS = frozenset(
+    ["comprehensive", "thorough", "academic", "literature review", "exhaustive"]
+)
+
+# DEEP 트리거 키워드
+_DEEP_KEYWORDS = frozenset(
+    ["analyze", "compare", "investigate", "deep dive", "in-depth"]
+)
+
+# QUICK 트리거 키워드
+_QUICK_KEYWORDS = frozenset(["quick", "brief", "summary", "what is", "simple"])
+
+
+# ============================================================================
+# 유틸리티 함수
+# ============================================================================
+
+
+def infer_research_depth(query: str) -> ResearchDepth:
+    """쿼리 문자열에서 적절한 연구 깊이를 추론한다.
+
+    쿼리에 포함된 키워드를 기반으로 연구 깊이를 결정합니다.
+    키워드 매칭 우선순위: EXHAUSTIVE > DEEP > QUICK > STANDARD(기본값).
+
+    Args:
+        query: 사용자의 연구 쿼리 문자열.
+
+    Returns:
+        추론된 ResearchDepth 열거형 값.
+        매칭되는 키워드가 없으면 STANDARD 반환.
+
+    Example:
+        >>> infer_research_depth("quick summary of AI trends")
+        ResearchDepth.QUICK
+        >>> infer_research_depth("analyze different RAG strategies")
+        ResearchDepth.DEEP
+        >>> infer_research_depth("comprehensive literature review on transformers")
+        ResearchDepth.EXHAUSTIVE
+    """
+    query_lower = query.lower()
+
+    # 키워드 우선순위대로 검사
+    if any(kw in query_lower for kw in _EXHAUSTIVE_KEYWORDS):
+        return ResearchDepth.EXHAUSTIVE
+    if any(kw in query_lower for kw in _DEEP_KEYWORDS):
+        return ResearchDepth.DEEP
+    if any(kw in query_lower for kw in _QUICK_KEYWORDS):
+        return ResearchDepth.QUICK
+
+    # 기본값: STANDARD
+    return ResearchDepth.STANDARD
+
+
+def get_depth_config(depth: ResearchDepth) -> DepthConfig:
+    """연구 깊이에 해당하는 설정을 반환한다.
+
+    Args:
+        depth: ResearchDepth 열거형 값.
+
+    Returns:
+        해당 깊이의 DepthConfig 객체.
+
+    Example:
+        >>> config = get_depth_config(ResearchDepth.DEEP)
+        >>> config.max_searches
+        25
+        >>> config.sources
+        ('web', 'local', 'github', 'arxiv')
+    """
+    return DEPTH_CONFIGS[depth]
--- a/research_agent/researcher/prompts.py
+++ b/research_agent/researcher/prompts.py
@@ -1,131 +1,300 @@
-"""자율적 연구 에이전트를 위한 프롬프트.
+"""Prompts for autonomous research agent.

-이 프롬프트는 "넓게 탐색 → 깊게 파기" 패턴을 따르는
-자율적인 연구 워크플로우를 정의합니다.
+This module defines prompts following the "breadth-first, then depth" pattern
+for autonomous research workflows.
+
+v2 Updates (2026-01):
+- ResearchDepth-based prompt branching (QUICK/STANDARD/DEEP/EXHAUSTIVE)
+- Ralph Loop iterative research pattern support
+- Multi-source search integration (mgrep, arXiv, grep.app, Context7)
 """

-AUTONOMOUS_RESEARCHER_INSTRUCTIONS = """You are an autonomous research agent. Your job is to thoroughly research a topic by following a "breadth-first, then depth" approach.
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from .depth import ResearchDepth
+
+AUTONOMOUS_RESEARCHER_INSTRUCTIONS = """You are an autonomous research agent. Your job is to research a topic and return evidence-backed findings using a breadth-first → depth approach.

 For context, today's date is {date}.

-## Your Capabilities
+## Tooling (What you may have access to)
+Your tool set depends on the active `ResearchDepth` configuration. Possible tools include:
+- `think_tool`: Mandatory reflection step used to decide the next action.
+- `write_todos`: Planning tool for creating/updating a structured task list (call at most once per response).
+- Web search:
+  - `mgrep_search` with `web=True` (if `mgrep` is available in the environment)
+  - `tavily_search` (web search with full content extraction)
+- Local codebase search:
+  - `mgrep_search` with `path="..."` (semantic local search)
+- Academic search:
+  - `arxiv_search`
+- Public implementation search:
+  - `github_code_search`
+- Official documentation lookup:
+  - `library_docs_search`
+- Multi-source orchestration:
+  - `comprehensive_search` (use to reduce total tool calls when multiple sources are needed)

-You have access to:
- **tavily_search**: Web search with full content extraction
- **think_tool**: Reflection and strategic planning
- **write_todos**: Self-planning and progress tracking
+Only reference tools you actually have in the current run. If you are uncertain, attempt a single appropriate tool call rather than assuming.

-## Autonomous Research Workflow
+## Operating principle: Breadth first, then depth
+Your default strategy is:
+1) Breadth: establish terminology, scope, and candidate directions quickly.
+2) Depth: pick the highest-value directions and validate claims with multiple sources.
+3) Synthesis: produce a structured, evidence-backed response with explicit uncertainty.

-### Phase 1: Exploratory Search (1-2 searches)
+## Required loop: Search → Reflect → Decide
+After EVERY search tool call, you MUST call `think_tool` and include:
+1) What you learned (specific facts/claims, not vague summaries)
+2) What is still missing (specific questions or missing evidence)
+3) The next concrete action:
+   - exact tool name
+   - exact query string
+   - any key parameters (e.g., `web=True`, `max_results=5`, `library_name="..."`)
+4) A stop/continue decision (and why)

-**Goal**: Get the lay of the land
+Example reflection template:
+- Learned:
+  - Claim A: ...
+  - Claim B: ...
+- Missing:
+  - Need evidence for X from official docs or academic sources
+- Next action:
+  - Tool: `github_code_search`
+  - Query: "getServerSession("
+  - Params: language=["TypeScript","TSX"], max_results=3
+- Decision:
+  - Continue (need implementation evidence), or Stop (requirements satisfied)

-Start with broad searches to understand:
- Key concepts and terminology in the field
- Major players, sources, and authorities
- Recent trends and developments
- Potential sub-topics worth exploring
+## Phase 1: Exploratory breadth (1–2 searches)
+Goal: define scope and build a short research map.
+- Run 1 broad query to gather definitions, key terms, and major subtopics.
+- Optionally run 1 follow-up query to resolve ambiguous terminology or identify the best 2–3 directions.
+- Use `think_tool` after each search and explicitly list the 2–3 directions you will pursue next.

-After each search, **ALWAYS** use think_tool to:
-```
-"What did I learn? Key concepts are: ...
-What are 2-3 promising directions for deeper research?
-1. Direction A: [reason]
-2. Direction B: [reason]
-3. Direction C: [reason]
-Do I need more exploration, or can I proceed to Phase 2?"
-```
+## Phase 2: Directed depth (focused searches per direction)
+Goal: answer the research question with validated claims.
+For each chosen direction:
+1. State a precise sub-question (what exactly must be answered).
+2. Run focused searches to answer it.
+3. Validate:
+   - If cross-validation is required by the active depth mode, do not finalize a major claim until it has the required number of independent sources.
+   - If sources conflict, either resolve the conflict with an explicit verification search or clearly document the contradiction.

-### Phase 2: Directed Research (1-2 searches per direction)
+## Phase 3: Synthesis (final output)
+Goal: convert evidence into a usable answer.
+Your output must:
+- Be structured with headings.
+- Separate facts from interpretations.
+- Explicitly list contradictions/unknowns instead of guessing.
+- Include a Sources section with stable citation numbering and URLs.

-**Goal**: Deep dive into promising directions
+## Planning with `write_todos`
+If the task is multi-step (3+ steps), call `write_todos` once at the start to create a small plan (4–7 items).
+Update the plan only when the strategy changes materially (again: at most one `write_todos` call per response).

-For each promising direction identified in Phase 1:
-1. Formulate a specific, focused search query
-2. Execute tavily_search with the focused query
-3. Use think_tool to assess:
-```
-"Direction: [name]
-What new insights did this reveal?
- Insight 1: ...
- Insight 2: ...
-Is this direction yielding valuable information? [Yes/No]
-Should I continue deeper or move to the next direction?"
-```
+Example TODO plan:
+1. Define scope + glossary (breadth search)
+2. Identify 2–3 high-value directions (reflection)
+3. Research direction A with validation
+4. Research direction B with validation
+5. Resolve contradictions / verify edge cases
+6. Synthesize findings + sources

-### Phase 3: Synthesis
+## Stop conditions (measurable)
+Stop researching when ANY of the following are true:
+- You can answer the user's question directly and completely with citations.
+- You have hit the configured search budget for the current depth mode.
+- Your last 2 searches are redundant (no new claims, no new evidence, no new constraints).
+- Cross-validation requirements are satisfied for all major claims (when required).
+- Remaining gaps are minor and can be stated as "unknown" without blocking the main answer.

-**Goal**: Combine all findings into a coherent response
+## Response format (to the orchestrator)
+Return Markdown:

-After completing directed research:
-1. Review all gathered information
-2. Identify patterns and connections
-3. Note where sources agree or disagree
-4. Structure your findings clearly
-
-## Self-Management with write_todos
-
-At the start, create a research plan:
-
-```
-1. [Explore] Broad search to understand the research landscape
-2. [Analyze] Review findings and identify 2-3 promising directions
-3. [Deep Dive] Research Direction A: [topic]
-4. [Deep Dive] Research Direction B: [topic]
-5. [Synthesize] Combine findings into structured response
-```
-
-Mark each todo as completed when done. Adjust your plan if needed.
-
-## Hard Limits (Token Efficiency)
-
-| Phase | Max Searches | Purpose |
-|-------|-------------|---------|
-| Exploratory | 2 | Broad landscape understanding |
-| Directed | 3-4 | Focused deep dives |
-| **TOTAL** | **5-6** | Entire research session |
-
-## Stop Conditions
-
-Stop researching when ANY of these are true:
- You have sufficient information to answer comprehensively
- Your last 2 searches returned similar/redundant information
- You've reached the maximum search limit (5-6)
- All promising directions have been adequately explored
-
-## Response Format
-
-Structure your final response as:
-
-```markdown
 ## Key Findings
+### Finding 1
+- Claim:
+- Evidence:
+- Why it matters:

-### Finding 1: [Title]
-[Detailed explanation with inline citations [1], [2]]
+### Finding 2
+...

-### Finding 2: [Title]
-[Detailed explanation with inline citations]
+## Implementation Evidence (when relevant)
+- Real-world code patterns, pitfalls, and links to repos/files (via citations).

-### Finding 3: [Title]
-[Detailed explanation with inline citations]
-
-## Source Agreement Analysis
- **High agreement**: [topics where sources align]
- **Disagreement/Uncertainty**: [topics with conflicting info]
+## Contradictions / Unknowns
+- What conflicts, what is unverified, and what would resolve it.

 ## Sources
-[1] Source Title: URL
-[2] Source Title: URL
+[1] Title: URL
+[2] Title: URL
 ...
-```
-
-The orchestrator will integrate your findings into the final report.
-
-## Important Notes
-
-1. **Think before each action**: Use think_tool to plan and reflect
-2. **Quality over quantity**: Fewer, focused searches beat many unfocused ones
-3. **Track your progress**: Use write_todos to stay organized
-4. **Know when to stop**: Don't over-research; stop when you have enough
+"""
+
+
+DEPTH_PROMPTS: dict[str, str] = {
+    "quick": """## Quick Research Mode
+
+Objective: produce a correct, minimal answer fast.
+
+**Search budget**: max 3 total searches
+**Iterations**: 1
+**Primary sources**: web
+
+**Available tools (may vary by environment)**:
+- `mgrep_search` (prefer `web=True` if available)
+- `tavily_search` (fallback web search with full content extraction)
+- `think_tool`
+
+**Procedure**:
+1. Run exactly 1 broad web search to establish definitions and key terms.
+2. If a critical gap remains (missing definition, missing "what/why/how"), run 1 targeted follow-up search.
+3. Stop and answer. Do not exceed 3 total searches.
+
+**Completion criteria**:
+- You can answer the user's question directly in 4–10 sentences, AND
+- At least 1 cited source URL supports the central claim, OR you explicitly mark the answer as uncertain.
+
+**Output requirements**:
+- 2–5 key bullets or short paragraphs
+- 1–2 citations in a final Sources section
+""",
+    "standard": """## Standard Research Mode
+
+Objective: balanced coverage with evidence, without over-searching.
+
+**Search budget**: max 10 total searches
+**Iterations**: up to 2 (plan → search → reflect → refine)
+**Primary sources**: web + local (codebase)
+
+**Available tools**:
+- `mgrep_search` (local search via `path`, optional web via `web=True`)
+- `tavily_search`
+- `comprehensive_search` (multi-source wrapper; use when it reduces tool calls)
+- `think_tool`
+
+**Iteration 1 (landscape + local grounding)**:
+1. 1–2 broad searches to build a short glossary and identify 2–3 sub-questions.
+2. 1 local search (`mgrep_search` with `path`) to find relevant code/config patterns if applicable.
+
+**Iteration 2 (targeted fill + verification)**:
+1. 2–4 targeted searches to answer each sub-question.
+2. If claims conflict, run 1 explicit verification search to resolve the conflict or mark uncertainty.
+
+**Completion criteria**:
+- All identified sub-questions are answered, AND
+- No single key claim depends on an unverified single-source assertion, AND
+- You are within the 10-search budget.
+
+**Output requirements**:
+- 300–700 words (or equivalent detail)
+- Clear section headings (## / ###)
+- Inline citations and a Sources list with stable numbering
+""",
+    "deep": """## Deep Research Mode (Ralph Loop)
+
+Objective: multi-angle research with cross-validation and implementation evidence.
+
+**Search budget**: max 25 total searches
+**Iterations**: up to 5 (Ralph Loop)
+**Primary sources**: web + local + GitHub code + arXiv
+
+**Available tools**:
+- `mgrep_search`
+- `tavily_search`
+- `github_code_search`
+- `arxiv_search`
+- `comprehensive_search`
+- `think_tool`
+
+**Ralph Loop (repeat up to 5 iterations)**:
+1. Plan: use `think_tool` to state (a) what you know, (b) what you need next, and (c) the exact next tool call(s).
+2. Search: execute 3–6 focused tool calls max per iteration (keep a running count).
+3. Extract: write down concrete claims, each with source IDs.
+4. Validate: ensure each major claim has **>= 2 independent sources** (web + paper, web + GitHub example, etc.).
+5. Update coverage: self-assess coverage as a number in [0.0, 1.0] and state what remains.
+
+**Completion criteria**:
+- Self-assessed coverage >= 0.85, AND
+- Every major claim has >= 2 sources, AND
+- Contradictions are either resolved or explicitly documented, AND
+- You output `<promise>RESEARCH_COMPLETE</promise>`.
+
+**Output requirements**:
+- Structured findings with clear scoping (what applies, what does not)
+- A dedicated "Implementation Evidence" section when relevant (GitHub code snippets + repo/file context)
+- A dedicated "Contradictions / Unknowns" section
+""",
+    "exhaustive": """## Exhaustive Research Mode (Extended Ralph Loop)
+
+Objective: near-academic completeness with official documentation support.
+
+**Search budget**: max 50 total searches
+**Iterations**: up to 10 (Extended Ralph Loop)
+**Primary sources**: web + local + GitHub code + arXiv + official docs
+
+**Available tools**:
+- `mgrep_search`
+- `tavily_search`
+- `github_code_search`
+- `arxiv_search`
+- `library_docs_search`
+- `comprehensive_search`
+- `think_tool`
+
+**Extended Ralph Loop (repeat up to 10 iterations)**:
+1. Literature: use `arxiv_search` to establish foundational concepts and vocabulary.
+2. Industry: use `tavily_search` / `mgrep_search(web=True)` for applied practice and recent changes.
+3. Implementation: use `github_code_search` for real-world patterns and failure modes.
+4. Official docs: use `library_docs_search` for normative API behavior and constraints.
+5. Reconcile: explicitly cross-check conflicts; do not "average" contradictions—state what differs and why.
+
+**Completion criteria** (ALL required):
+- Self-assessed coverage >= 0.95, AND
+- Every major claim has **>= 3 sources**, AND
+- A "Source Agreement" section exists (high/medium/low agreement), AND
+- You output `<promise>RESEARCH_COMPLETE</promise>` ONLY when criteria are met.
+
+**Output requirements**:
+- Annotated bibliography (1–2 sentence annotation per key source)
+- Confidence score per major finding (High/Medium/Low) based on agreement and source type
+- Explicit "Open Questions" list for anything not resolvable within budget
+""",
+}
+
+
+def get_depth_prompt(depth: ResearchDepth) -> str:
+    from .depth import ResearchDepth as RD
+
+    depth_key = depth.value if isinstance(depth, RD) else str(depth)
+    return DEPTH_PROMPTS.get(depth_key, DEPTH_PROMPTS["standard"])
+
+
+def build_research_prompt(
+    depth: ResearchDepth,
+    query: str,
+    iteration: int = 1,
+    max_iterations: int = 1,
+    coverage_score: float = 0.0,
+) -> str:
+    depth_prompt = get_depth_prompt(depth)
+
+    return f"""{depth_prompt}
+
+---
+
+## Current Task
+
+**Query**: {query}
+**Iteration**: {iteration}/{max_iterations}
+**Coverage**: {coverage_score:.2%}
+
+---
+
+{AUTONOMOUS_RESEARCHER_INSTRUCTIONS}
 """
--- a/research_agent/researcher/ralph_loop.py
+++ b/research_agent/researcher/ralph_loop.py
@@ -0,0 +1,607 @@
+"""Ralph Loop 연구 패턴 모듈.
+
+이 모듈은 반복적 연구 패턴(Ralph Loop)을 구현합니다.
+에이전트가 연구 → 반성 → 갱신 사이클을 통해 점진적으로
+연구 커버리지를 높여가는 방식을 지원합니다.
+
+## Ralph Loop 동작 흐름
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                      Ralph Loop 사이클                           │
+├─────────────────────────────────────────────────────────────────┤
+│                                                                  │
+│   ┌──────────┐    ┌──────────┐    ┌──────────┐    ┌──────────┐ │
+│   │   Plan   │───▶│  Search  │───▶│ Extract  │───▶│ Validate │ │
+│   │ (계획)   │    │ (검색)   │    │ (추출)   │    │ (검증)   │ │
+│   └──────────┘    └──────────┘    └──────────┘    └──────────┘ │
+│        ▲                                               │        │
+│        │                                               ▼        │
+│   ┌──────────┐                                   ┌──────────┐  │
+│   │ Continue │◀──────────────────────────────────│  Update  │  │
+│   │ (계속?)  │                                   │(커버리지)│  │
+│   └──────────┘                                   └──────────┘  │
+│        │                                                        │
+│        ▼                                                        │
+│   ┌──────────┐                                                  │
+│   │ Complete │  coverage >= threshold OR max iterations        │
+│   │  (완료)  │                                                  │
+│   └──────────┘                                                  │
+│                                                                  │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+v2 업데이트 (2026-01):
+- RalphLoopState 상태 관리 클래스
+- SourceQuality 소스 품질 평가
+- Finding 발견 항목 데이터 클래스
+- ResearchSession 세션 관리
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from .depth import DepthConfig
+
+
+# ============================================================================
+# Ralph Loop 상태 관리
+# ============================================================================
+
+
+@dataclass
+class RalphLoopState:
+    """Ralph Loop의 현재 상태를 추적하는 데이터 클래스.
+
+    Attributes:
+        iteration: 현재 반복 횟수 (1부터 시작).
+        max_iterations: 최대 허용 반복 횟수 (0이면 무제한).
+        completion_promise: 완료 시 출력할 약속 태그.
+        started_at: 루프 시작 시간 (ISO 8601 형식).
+        findings_count: 현재까지 수집된 발견 항목 수.
+        coverage_score: 현재 커버리지 점수 (0.0 ~ 1.0).
+    """
+
+    iteration: int = 1  # 현재 반복 횟수
+    max_iterations: int = 0  # 최대 반복 (0 = 무제한)
+    completion_promise: str = "RESEARCH_COMPLETE"  # 완료 태그
+    started_at: str = field(
+        default_factory=lambda: datetime.now(timezone.utc).isoformat()
+    )
+    findings_count: int = 0  # 발견 항목 수
+    coverage_score: float = 0.0  # 커버리지 점수
+
+    def is_max_reached(self) -> bool:
+        """최대 반복 횟수에 도달했는지 확인한다.
+
+        Returns:
+            max_iterations > 0이고 현재 반복이 최대 이상이면 True.
+        """
+        return self.max_iterations > 0 and self.iteration >= self.max_iterations
+
+
+# ============================================================================
+# 소스 유형 및 품질
+# ============================================================================
+
+
+class SourceType:
+    """소스 유형을 나타내는 상수 클래스.
+
+    각 소스 유형은 다른 권위도(authority) 점수를 갖습니다:
+    - ARXIV: 0.9 (학술 논문, 가장 높은 권위)
+    - DOCS: 0.85 (공식 문서)
+    - GITHUB: 0.7 (실제 구현 코드)
+    - LOCAL: 0.6 (로컬 코드베이스)
+    - WEB: 0.5 (일반 웹 검색, 가장 낮은 권위)
+    """
+
+    WEB = "web"  # 웹 검색 결과
+    ARXIV = "arxiv"  # arXiv 논문
+    GITHUB = "github"  # GitHub 코드
+    DOCS = "docs"  # 공식 문서
+    LOCAL = "local"  # 로컬 코드베이스
+
+
+@dataclass
+class SourceQuality:
+    """소스의 품질을 평가하는 데이터 클래스.
+
+    품질 점수는 세 가지 요소의 가중 평균으로 계산됩니다:
+    - recency (최신성): 20%
+    - authority (권위도): 40%
+    - relevance (관련성): 40%
+
+    추가로 검증 횟수에 따른 보너스가 적용됩니다 (최대 15%).
+
+    Attributes:
+        source_type: 소스 유형 (SourceType 상수).
+        recency_score: 최신성 점수 (0.0 ~ 1.0).
+        authority_score: 권위도 점수 (0.0 ~ 1.0).
+        relevance_score: 관련성 점수 (0.0 ~ 1.0).
+        verification_count: 다른 소스에 의한 검증 횟수.
+    """
+
+    source_type: str  # 소스 유형
+    recency_score: float = 0.0  # 최신성 (0.0 ~ 1.0)
+    authority_score: float = 0.0  # 권위도 (0.0 ~ 1.0)
+    relevance_score: float = 0.0  # 관련성 (0.0 ~ 1.0)
+    verification_count: int = 0  # 검증 횟수
+
+    @property
+    def overall_score(self) -> float:
+        """전체 품질 점수를 계산한다.
+
+        가중 평균 + 검증 보너스로 계산됩니다.
+
+        Returns:
+            0.0 ~ 1.0 범위의 전체 품질 점수.
+        """
+        # 가중 평균 계산 (recency 20%, authority 40%, relevance 40%)
+        base_score = (
+            self.recency_score * 0.2
+            + self.authority_score * 0.4
+            + self.relevance_score * 0.4
+        )
+        # 검증 보너스 (검증당 5%, 최대 15%)
+        verification_bonus = min(self.verification_count * 0.05, 0.15)
+        # 최대 1.0으로 제한
+        return min(base_score + verification_bonus, 1.0)
+
+    @classmethod
+    def from_source_type(cls, source_type: str, **kwargs) -> "SourceQuality":
+        """소스 유형에서 SourceQuality 객체를 생성한다.
+
+        소스 유형에 따른 기본 권위도 점수가 자동으로 적용됩니다.
+
+        Args:
+            source_type: SourceType 상수 중 하나.
+            **kwargs: 추가 점수 값 (recency_score, relevance_score 등).
+
+        Returns:
+            생성된 SourceQuality 객체.
+        """
+        # 소스 유형별 기본 권위도 점수
+        authority_defaults = {
+            SourceType.ARXIV: 0.9,  # 학술 논문 - 최고 권위
+            SourceType.DOCS: 0.85,  # 공식 문서
+            SourceType.GITHUB: 0.7,  # 실제 구현
+            SourceType.WEB: 0.5,  # 일반 웹
+            SourceType.LOCAL: 0.6,  # 로컬 코드
+        }
+        return cls(
+            source_type=source_type,
+            authority_score=kwargs.get(
+                "authority_score", authority_defaults.get(source_type, 0.5)
+            ),
+            recency_score=kwargs.get("recency_score", 0.5),
+            relevance_score=kwargs.get("relevance_score", 0.5),
+            verification_count=kwargs.get("verification_count", 0),
+        )
+
+
+# ============================================================================
+# 연구 발견 항목
+# ============================================================================
+
+
+@dataclass
+class Finding:
+    """연구에서 발견된 항목을 나타내는 데이터 클래스.
+
+    Attributes:
+        content: 발견 내용 (텍스트).
+        source_url: 소스 URL.
+        source_title: 소스 제목.
+        confidence: 신뢰도 점수 (0.0 ~ 1.0).
+        verified_by: 이 발견을 검증한 다른 소스 URL 목록.
+        quality: 소스 품질 정보 (선택).
+    """
+
+    content: str  # 발견 내용
+    source_url: str  # 소스 URL
+    source_title: str  # 소스 제목
+    confidence: float  # 신뢰도 (0.0 ~ 1.0)
+    verified_by: list[str] = field(default_factory=list)  # 검증 소스
+    quality: SourceQuality | None = None  # 소스 품질
+
+    @property
+    def weighted_confidence(self) -> float:
+        """품질 가중 신뢰도를 계산한다.
+
+        소스 품질이 있으면 신뢰도에 품질 점수를 곱합니다.
+
+        Returns:
+            품질 가중치가 적용된 신뢰도 점수.
+        """
+        if self.quality is None:
+            return self.confidence
+        return self.confidence * self.quality.overall_score
+
+
+# ============================================================================
+# Ralph Loop 관리자
+# ============================================================================
+
+
+class ResearchRalphLoop:
+    """Ralph Loop 연구 패턴을 관리하는 클래스.
+
+    상태를 파일에 저장/로드하고, 연구 진행 상황을 추적합니다.
+
+    Attributes:
+        STATE_FILE: 상태 파일 경로 (.claude/research-ralph-loop.local.md).
+        query: 연구 쿼리.
+        max_iterations: 최대 반복 횟수.
+        coverage_threshold: 완료 판정 커버리지 임계값.
+        sources: 사용 가능한 소스 목록.
+        state: 현재 Ralph Loop 상태.
+    """
+
+    STATE_FILE = Path(".claude/research-ralph-loop.local.md")
+
+    def __init__(
+        self,
+        query: str,
+        depth_config: DepthConfig | None = None,
+        max_iterations: int = 10,
+        coverage_threshold: float = 0.85,
+    ):
+        """Ralph Loop를 초기화한다.
+
+        Args:
+            query: 연구 쿼리 문자열.
+            depth_config: 깊이 설정 (있으면 이 값이 우선).
+            max_iterations: 기본 최대 반복 횟수.
+            coverage_threshold: 기본 커버리지 임계값.
+        """
+        self.query = query
+
+        # depth_config가 있으면 해당 값 사용, 없으면 기본값 사용
+        self.max_iterations = (
+            depth_config.max_ralph_iterations if depth_config else max_iterations
+        )
+        self.coverage_threshold = (
+            depth_config.coverage_threshold if depth_config else coverage_threshold
+        )
+        self.sources = depth_config.sources if depth_config else ("web",)
+
+        # 초기 상태 생성
+        self.state = RalphLoopState(max_iterations=self.max_iterations)
+
+    def create_research_prompt(self) -> str:
+        """현재 반복에 대한 연구 프롬프트를 생성한다.
+
+        Returns:
+            에이전트에게 전달할 Markdown 형식의 연구 프롬프트.
+        """
+        sources_str = ", ".join(self.sources)
+        return f"""## Research Iteration {self.state.iteration}/{self.max_iterations or "∞"}
+
+### Original Query
+{self.query}
+
+### Previous Work
+Check `research_workspace/` for previous findings.
+Read TODO.md for tracked progress.
+
+### Instructions
+1. Review existing findings
+2. Identify knowledge gaps
+3. Conduct targeted searches using: {sources_str}
+4. Update research files with new findings
+5. Update TODO.md with progress
+
+### Completion Criteria
+Output `<promise>{self.state.completion_promise}</promise>` ONLY when:
+- Coverage score >= {self.coverage_threshold} (current: {self.state.coverage_score:.2f})
+- All major aspects addressed
+- Findings cross-validated with 2+ sources
+- DO NOT lie to exit
+
+### Current Stats
+- Iteration: {self.state.iteration}
+- Findings: {self.state.findings_count}
+- Coverage: {self.state.coverage_score:.2%}
+"""
+
+    def save_state(self) -> None:
+        """현재 상태를 파일에 저장한다."""
+        # 디렉토리 생성
+        self.STATE_FILE.parent.mkdir(exist_ok=True)
+
+        # YAML frontmatter 형식으로 저장
+        promise_yaml = f'"{self.state.completion_promise}"'
+        content = f"""---
+active: true
+iteration: {self.state.iteration}
+max_iterations: {self.state.max_iterations}
+completion_promise: {promise_yaml}
+started_at: "{self.state.started_at}"
+findings_count: {self.state.findings_count}
+coverage_score: {self.state.coverage_score}
+---
+
+{self.create_research_prompt()}
+"""
+        self.STATE_FILE.write_text(content)
+
+    def load_state(self) -> bool:
+        """파일에서 상태를 로드한다.
+
+        Returns:
+            상태 파일이 존재하고 성공적으로 로드되면 True.
+        """
+        if not self.STATE_FILE.exists():
+            return False
+
+        content = self.STATE_FILE.read_text()
+        lines = content.split("\n")
+
+        # YAML frontmatter 파싱
+        in_frontmatter = False
+        for line in lines:
+            if line.strip() == "---":
+                in_frontmatter = not in_frontmatter
+                continue
+            if not in_frontmatter:
+                continue
+
+            # 각 필드 파싱
+            if line.startswith("iteration:"):
+                self.state.iteration = int(line.split(":")[1].strip())
+            elif line.startswith("findings_count:"):
+                self.state.findings_count = int(line.split(":")[1].strip())
+            elif line.startswith("coverage_score:"):
+                self.state.coverage_score = float(line.split(":")[1].strip())
+
+        return True
+
+    def increment_iteration(self) -> None:
+        """반복 횟수를 증가시키고 상태를 저장한다."""
+        self.state.iteration += 1
+        self.save_state()
+
+    def update_coverage(self, findings_count: int, coverage_score: float) -> None:
+        """커버리지 정보를 갱신하고 상태를 저장한다.
+
+        Args:
+            findings_count: 새로운 발견 항목 수.
+            coverage_score: 새로운 커버리지 점수.
+        """
+        self.state.findings_count = findings_count
+        self.state.coverage_score = coverage_score
+        self.save_state()
+
+    def is_complete(self) -> bool:
+        """연구가 완료되었는지 확인한다.
+
+        Returns:
+            최대 반복에 도달했거나 커버리지 임계값을 넘으면 True.
+        """
+        # 최대 반복 도달 확인
+        if self.state.is_max_reached():
+            return True
+        # 커버리지 임계값 확인
+        return self.state.coverage_score >= self.coverage_threshold
+
+    def cleanup(self) -> None:
+        """상태 파일을 삭제한다."""
+        if self.STATE_FILE.exists():
+            self.STATE_FILE.unlink()
+
+
+# ============================================================================
+# 연구 세션 관리
+# ============================================================================
+
+
+class ResearchSession:
+    """연구 세션을 관리하는 클래스.
+
+    세션별 디렉토리를 생성하고, 발견 항목을 기록하며,
+    Ralph Loop를 통해 진행 상황을 추적합니다.
+
+    Attributes:
+        WORKSPACE: 연구 작업 공간 루트 디렉토리.
+        query: 연구 쿼리.
+        session_id: 세션 고유 식별자.
+        session_dir: 세션 디렉토리 경로.
+        ralph_loop: Ralph Loop 관리자.
+        findings: 수집된 발견 항목 목록.
+    """
+
+    WORKSPACE = Path("research_workspace")
+
+    def __init__(
+        self,
+        query: str,
+        depth_config: DepthConfig | None = None,
+        session_id: str | None = None,
+    ):
+        """연구 세션을 초기화한다.
+
+        Args:
+            query: 연구 쿼리 문자열.
+            depth_config: 깊이 설정 (선택).
+            session_id: 세션 ID (없으면 현재 시간으로 생성).
+        """
+        self.query = query
+        self.session_id = session_id or datetime.now().strftime("%Y%m%d_%H%M%S")
+        self.session_dir = self.WORKSPACE / f"session_{self.session_id}"
+        self.ralph_loop = ResearchRalphLoop(query, depth_config)
+        self.findings: list[Finding] = []
+
+    def initialize(self) -> None:
+        """세션 디렉토리와 초기 파일들을 생성한다."""
+        # 세션 디렉토리 생성
+        self.session_dir.mkdir(parents=True, exist_ok=True)
+
+        # TODO.md 초기 파일 생성
+        todo_content = f"""# Research TODO
+
+## Query
+{self.query}
+
+## Progress
+- [ ] Initial exploration (iteration 1)
+- [ ] Deep dive into key topics
+- [ ] Cross-validation of findings
+- [ ] Final synthesis
+
+## Findings
+(Updated during research)
+"""
+        (self.session_dir / "TODO.md").write_text(todo_content)
+
+        # FINDINGS.md 초기 파일 생성
+        findings_content = f"""# Research Findings
+
+## Query: {self.query}
+
+## Sources
+(Updated during research)
+
+## Key Findings
+(Updated during research)
+"""
+        (self.session_dir / "FINDINGS.md").write_text(findings_content)
+
+        # Ralph Loop 상태 저장
+        self.ralph_loop.save_state()
+
+    def get_current_prompt(self) -> str:
+        """현재 연구 프롬프트를 반환한다.
+
+        Returns:
+            Ralph Loop의 현재 연구 프롬프트.
+        """
+        return self.ralph_loop.create_research_prompt()
+
+    def add_finding(self, finding: Finding) -> None:
+        """발견 항목을 추가하고 관련 파일들을 갱신한다.
+
+        Args:
+            finding: 추가할 Finding 객체.
+        """
+        self.findings.append(finding)
+        self._update_findings_file()
+        self._recalculate_coverage()
+
+    def _update_findings_file(self) -> None:
+        """FINDINGS.md 파일을 현재 발견 항목으로 갱신한다."""
+        findings_path = self.session_dir / "FINDINGS.md"
+        content = f"""# Research Findings
+
+## Query: {self.query}
+
+## Sources ({len(self.findings)})
+"""
+        # 각 발견 항목을 Markdown으로 추가
+        for i, f in enumerate(self.findings, 1):
+            content += f"\n### Source {i}: {f.source_title}\n"
+            content += f"- URL: {f.source_url}\n"
+            content += f"- Confidence: {f.confidence:.0%}\n"
+            if f.verified_by:
+                content += f"- Verified by: {', '.join(f.verified_by)}\n"
+            if f.quality:
+                content += f"- Quality Score: {f.quality.overall_score:.2f}\n"
+                content += f"- Source Type: {f.quality.source_type}\n"
+            content += f"\n{f.content}\n"
+
+        findings_path.write_text(content)
+
+    def _recalculate_coverage(self) -> None:
+        """현재 발견 항목들을 기반으로 커버리지를 재계산한다."""
+        if not self.findings:
+            coverage = 0.0
+        else:
+            # 품질 가중 신뢰도의 평균 계산
+            weighted_scores = [f.weighted_confidence for f in self.findings]
+            avg_weighted = sum(weighted_scores) / len(weighted_scores)
+
+            # 수량 요소 (최대 10개까지 선형 증가)
+            quantity_factor = min(len(self.findings) / 10, 1.0)
+
+            # 소스 다양성 요소
+            source_diversity = self._calculate_source_diversity()
+
+            # 최종 커버리지 계산
+            coverage = avg_weighted * quantity_factor * (0.8 + 0.2 * source_diversity)
+
+        # Ralph Loop 상태 갱신
+        self.ralph_loop.update_coverage(len(self.findings), coverage)
+
+    def _calculate_source_diversity(self) -> float:
+        """소스 유형의 다양성을 계산한다.
+
+        Returns:
+            0.0 ~ 1.0 범위의 다양성 점수 (4종류 이상이면 1.0).
+        """
+        if not self.findings:
+            return 0.0
+
+        # 고유한 소스 유형 수집
+        source_types = set()
+        for f in self.findings:
+            if f.quality:
+                source_types.add(f.quality.source_type)
+            else:
+                source_types.add("unknown")
+
+        # 4종류를 기준으로 다양성 점수 계산
+        return min(len(source_types) / 4, 1.0)
+
+    def complete_iteration(self) -> bool:
+        """현재 반복을 완료하고 다음 반복으로 진행한다.
+
+        Returns:
+            연구가 완전히 완료되면 True.
+        """
+        # 완료 여부 확인
+        if self.ralph_loop.is_complete():
+            return True
+
+        # 다음 반복으로 진행
+        self.ralph_loop.increment_iteration()
+        return False
+
+    def finalize(self) -> Path:
+        """세션을 종료하고 요약 파일을 생성한다.
+
+        Returns:
+            생성된 SUMMARY.md 파일 경로.
+        """
+        # Ralph Loop 상태 파일 정리
+        self.ralph_loop.cleanup()
+
+        # SUMMARY.md 생성
+        summary_path = self.session_dir / "SUMMARY.md"
+        summary_content = f"""# Research Summary
+
+## Query
+{self.query}
+
+## Statistics
+- Total Iterations: {self.ralph_loop.state.iteration}
+- Total Findings: {len(self.findings)}
+- Final Coverage: {self.ralph_loop.state.coverage_score:.2%}
+
+## Session
+- ID: {self.session_id}
+- Started: {self.ralph_loop.state.started_at}
+- Completed: {datetime.now(timezone.utc).isoformat()}
+
+## Output Files
+- TODO.md: Progress tracking
+- FINDINGS.md: Detailed findings
+- SUMMARY.md: This file
+"""
+        summary_path.write_text(summary_content)
+
+        return summary_path
--- a/research_agent/researcher/runner.py
+++ b/research_agent/researcher/runner.py
@@ -0,0 +1,289 @@
+"""Deep Research Runner - Ralph Loop 패턴 기반 반복 연구 실행기.
+
+DeepAgents 스타일의 자율 루프 실행. 각 반복은 새로운 컨텍스트로 시작하며,
+파일시스템이 메모리 역할을 합니다.
+
+Usage:
+    # Python API
+    from research_agent.researcher.runner import run_deep_research
+    result = await run_deep_research("Context Engineering best practices", depth="deep")
+
+    # CLI
+    uv run python -m research_agent.researcher.runner "Your research query" --depth deep
+"""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+from datetime import datetime
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from rich.console import Console
+from rich.panel import Panel
+from rich.progress import Progress, SpinnerColumn, TextColumn
+
+from research_agent.researcher.agent import create_researcher_agent
+from research_agent.researcher.depth import ResearchDepth, get_depth_config
+from research_agent.researcher.ralph_loop import ResearchSession
+
+if TYPE_CHECKING:
+    from langgraph.graph.state import CompiledStateGraph
+
+console = Console()
+
+# Colors matching DeepAgents CLI
+COLORS = {
+    "primary": "cyan",
+    "success": "green",
+    "warning": "yellow",
+    "error": "red",
+    "dim": "dim",
+}
+
+
+class ResearchRunner:
+    """Ralph Loop 패턴 기반 연구 실행기."""
+
+    def __init__(
+        self,
+        query: str,
+        depth: ResearchDepth | str = ResearchDepth.DEEP,
+        model: str | None = None,
+    ):
+        self.query = query
+        self.depth = ResearchDepth(depth) if isinstance(depth, str) else depth
+        self.config = get_depth_config(self.depth)
+        self.model_name = model
+
+        # Session 초기화
+        self.session = ResearchSession(query, self.config)
+        self.agent: CompiledStateGraph | None = None
+
+    def _create_agent(self) -> CompiledStateGraph:
+        """연구 에이전트 생성."""
+        return create_researcher_agent(
+            model=self.model_name,
+            depth=self.depth,
+        )
+
+    def _build_iteration_prompt(self, iteration: int) -> str:
+        """각 반복에 사용할 프롬프트 생성."""
+        max_iter = self.config.max_ralph_iterations
+        iter_display = f"{iteration}/{max_iter}" if max_iter > 0 else str(iteration)
+
+        return f"""## Research Iteration {iter_display}
+
+### Query
+{self.query}
+
+### Instructions
+Your previous work is in the filesystem. Check `research_workspace/session_{self.session.session_id}/` for:
+- TODO.md: Progress tracking
+- FINDINGS.md: Discovered information
+
+1. Review existing findings
+2. Identify knowledge gaps
+3. Conduct targeted searches
+4. Update research files with new findings
+5. Update TODO.md with progress
+
+### Completion
+When research is comprehensive (coverage >= {self.config.coverage_threshold:.0%}):
+- Output `<promise>RESEARCH_COMPLETE</promise>`
+- Only output this when truly complete - DO NOT lie to exit early
+
+### Current Stats
+- Iteration: {iteration}
+- Findings: {self.session.ralph_loop.state.findings_count}
+- Coverage: {self.session.ralph_loop.state.coverage_score:.2%}
+
+Make progress. You'll be called again if not complete.
+"""
+
+    async def _execute_iteration(self, iteration: int) -> dict:
+        """단일 반복 실행."""
+        if self.agent is None:
+            self.agent = self._create_agent()
+
+        prompt = self._build_iteration_prompt(iteration)
+
+        # 에이전트 실행
+        result = await self.agent.ainvoke(
+            {"messages": [{"role": "user", "content": prompt}]}
+        )
+
+        return result
+
+    def _check_completion(self, result: dict) -> bool:
+        """완료 여부 확인."""
+        # 메시지에서 완료 promise 체크
+        messages = result.get("messages", [])
+        for msg in messages:
+            content = getattr(msg, "content", str(msg))
+            if isinstance(content, str):
+                if "<promise>RESEARCH_COMPLETE</promise>" in content:
+                    return True
+                if "RESEARCH_COMPLETE" in content:
+                    # 좀 더 느슨한 체크
+                    return True
+
+        # Coverage 기반 체크
+        return self.session.ralph_loop.is_complete()
+
+    async def run(self) -> Path:
+        """연구 실행 및 결과 반환."""
+        console.print(
+            Panel(
+                f"[bold {COLORS['primary']}]Deep Research Mode[/bold {COLORS['primary']}]\n"
+                f"[dim]Query: {self.query}[/dim]\n"
+                f"[dim]Depth: {self.depth.value}[/dim]\n"
+                f"[dim]Max iterations: {self.config.max_ralph_iterations or 'unlimited'}[/dim]",
+                title="Research Session Started",
+                border_style=COLORS["primary"],
+            )
+        )
+
+        # 세션 초기화
+        self.session.initialize()
+        console.print(
+            f"[dim]Session ID: {self.session.session_id}[/dim]\n"
+            f"[dim]Workspace: {self.session.session_dir}[/dim]\n"
+        )
+
+        iteration = 1
+        max_iterations = self.config.max_ralph_iterations or 100  # Safety limit
+
+        try:
+            while iteration <= max_iterations:
+                console.print(
+                    f"\n[bold {COLORS['primary']}]{'=' * 60}[/bold {COLORS['primary']}]"
+                )
+                console.print(
+                    f"[bold {COLORS['primary']}]ITERATION {iteration}[/bold {COLORS['primary']}]"
+                )
+                console.print(
+                    f"[bold {COLORS['primary']}]{'=' * 60}[/bold {COLORS['primary']}]\n"
+                )
+
+                with Progress(
+                    SpinnerColumn(),
+                    TextColumn("[progress.description]{task.description}"),
+                    console=console,
+                ) as progress:
+                    task = progress.add_task("Researching...", total=None)
+
+                    result = await self._execute_iteration(iteration)
+
+                    progress.update(task, description="Checking completion...")
+
+                # 완료 체크
+                if self._check_completion(result):
+                    console.print(
+                        f"\n[bold {COLORS['success']}]Research complete![/bold {COLORS['success']}]"
+                    )
+                    break
+
+                # 다음 반복 준비
+                is_done = self.session.complete_iteration()
+                if is_done:
+                    console.print(
+                        f"\n[bold {COLORS['success']}]Coverage threshold reached![/bold {COLORS['success']}]"
+                    )
+                    break
+
+                console.print(f"[dim]...continuing to iteration {iteration + 1}[/dim]")
+                iteration += 1
+
+        except KeyboardInterrupt:
+            console.print(
+                f"\n[bold {COLORS['warning']}]Stopped after {iteration} iterations[/bold {COLORS['warning']}]"
+            )
+
+        # 최종 결과 생성
+        summary_path = self.session.finalize()
+
+        # 결과 표시
+        console.print(
+            Panel(
+                f"[bold]Research Summary[/bold]\n"
+                f"Total Iterations: {iteration}\n"
+                f"Findings: {self.session.ralph_loop.state.findings_count}\n"
+                f"Coverage: {self.session.ralph_loop.state.coverage_score:.2%}\n"
+                f"\n[dim]Output: {summary_path}[/dim]",
+                title="Research Complete",
+                border_style=COLORS["success"],
+            )
+        )
+
+        # 생성된 파일 목록
+        console.print(f"\n[bold]Files created in {self.session.session_dir}:[/bold]")
+        for f in sorted(self.session.session_dir.rglob("*")):
+            if f.is_file():
+                console.print(
+                    f"  {f.relative_to(self.session.session_dir)}", style="dim"
+                )
+
+        return summary_path
+
+
+async def run_deep_research(
+    query: str,
+    depth: ResearchDepth | str = ResearchDepth.DEEP,
+    model: str | None = None,
+) -> Path:
+    """Deep Research 실행 (async API).
+
+    Args:
+        query: 연구 주제
+        depth: 연구 깊이 (quick, standard, deep, exhaustive)
+        model: 사용할 LLM 모델명
+
+    Returns:
+        Path: 연구 결과 요약 파일 경로
+    """
+    runner = ResearchRunner(query, depth, model)
+    return await runner.run()
+
+
+def main() -> None:
+    """CLI 엔트리포인트."""
+    parser = argparse.ArgumentParser(
+        description="Deep Research - Ralph Loop 패턴 기반 자율 연구",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python -m research_agent.researcher.runner "Context Engineering 전략 분석"
+  python -m research_agent.researcher.runner "LLM Agent 아키텍처" --depth deep
+  python -m research_agent.researcher.runner "RAG 시스템 비교" --depth exhaustive --model gpt-4.1
+        """,
+    )
+    parser.add_argument("query", help="연구 주제 (무엇을 연구할지)")
+    parser.add_argument(
+        "--depth",
+        choices=["quick", "standard", "deep", "exhaustive"],
+        default="deep",
+        help="연구 깊이 (기본: deep)",
+    )
+    parser.add_argument(
+        "--model",
+        help="사용할 LLM 모델 (예: gpt-4.1, claude-sonnet-4-20250514)",
+    )
+
+    args = parser.parse_args()
+
+    try:
+        asyncio.run(
+            run_deep_research(
+                query=args.query,
+                depth=args.depth,
+                model=args.model,
+            )
+        )
+    except KeyboardInterrupt:
+        console.print("\n[dim]Interrupted by user[/dim]")
+
+
+if __name__ == "__main__":
+    main()
--- a/research_agent/tools.py
+++ b/research_agent/tools.py
@@ -1,9 +1,47 @@
-"""리서치 도구 모듈.
+"""연구 도구 모듈.

-이 모듈은 리서치 에이전트를 위한 검색 및 콘텐츠 처리 유틸리티를 제공하며,
-Tavily 를 사용해 URL 을 찾고 전체 웹페이지 콘텐츠를 가져와 마크다운으로 변환한다.
+이 모듈은 연구 에이전트를 위한 검색 및 콘텐츠 처리 유틸리티를 제공합니다.
+다중 소스 검색(Tavily, mgrep, arXiv, grep.app, Context7)을 지원합니다.
+
+## 도구 흐름도
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                    comprehensive_search                          │
+│  (다중 소스 오케스트레이션)                                       │
+├─────────────────────────────────────────────────────────────────┤
+│                                                                   │
+│  ┌──────────┐  ┌──────────┐  ┌──────────┐  ┌──────────────────┐ │
+│  │  web     │  │  local   │  │  arxiv   │  │     github       │ │
+│  │          │  │          │  │          │  │                  │ │
+│  │ mgrep    │  │ mgrep    │  │ arxiv_   │  │ github_code_     │ │
+│  │ (web)    │  │ (path)   │  │ search   │  │ search           │ │
+│  │    or    │  │          │  │          │  │ (grep.app API)   │ │
+│  │ tavily   │  │          │  │          │  │                  │ │
+│  └──────────┘  └──────────┘  └──────────┘  └──────────────────┘ │
+│                                                                   │
+│  ┌──────────────────┐                                            │
+│  │      docs        │                                            │
+│  │                  │                                            │
+│  │ library_docs_    │                                            │
+│  │ search           │                                            │
+│  │ (Context7 API)   │                                            │
+│  └──────────────────┘                                            │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+v2 업데이트 (2026-01):
+- mgrep 시맨틱 검색 통합 (2배 토큰 효율성)
+- arXiv 학술 논문 검색
+- grep.app GitHub 코드 검색
+- Context7 라이브러리 문서 검색
+- comprehensive_search 통합 도구
 """

+from __future__ import annotations
+
+import shutil
+import subprocess
 from typing import Annotated, Literal

 import httpx
@@ -12,33 +50,75 @@ from langchain_core.tools import InjectedToolArg, tool
 from markdownify import markdownify
 from tavily import TavilyClient

-load_dotenv()
+# ============================================================================
+# 환경 설정
+# ============================================================================

+load_dotenv()  # .env 파일에서 API 키 로드
+
+# arXiv 패키지 선택적 임포트 (설치되지 않은 환경 지원)
+try:
+    import arxiv
+
+    ARXIV_AVAILABLE = True
+except ImportError:
+    ARXIV_AVAILABLE = False
+    arxiv = None  # type: ignore
+
+# mgrep CLI 설치 여부 확인
+MGREP_AVAILABLE = shutil.which("mgrep") is not None
+
+# Tavily 클라이언트 초기화
 tavily_client = TavilyClient()


+# ============================================================================
+# 헬퍼 함수
+# ============================================================================
+
+
 def fetch_webpage_content(url: str, timeout: float = 10.0) -> str:
-    """웹페이지 콘텐츠를 가져와 마크다운으로 변환한다.
+    """웹페이지를 가져와서 HTML을 Markdown으로 변환한다.
+
+    이 헬퍼 함수는 HTTP GET 요청을 수행하고(브라우저와 유사한 User-Agent 사용),
+    응답 상태 코드를 검증한 후, `markdownify`를 사용하여 반환된 HTML을
+    Markdown으로 변환합니다.
+
+    참고:
+        - 이 함수는 헬퍼 함수입니다(LangChain 도구가 아님).
+        - `tavily_search` 같은 도구 래퍼가 전체 페이지 콘텐츠를 추출할 때 호출합니다.
+        - 예외 발생 시 예외를 던지지 않고 사람이 읽을 수 있는 에러 문자열을 반환합니다.

    Args:
-        url: 가져올 URL
-        timeout: 요청 타임아웃 (초 단위)
+        url: 가져올 전체 URL (예: "https://example.com/article").
+        timeout: 요청 타임아웃(초).

    Returns:
-        마크다운 형식의 웹페이지 콘텐츠
+        웹페이지 콘텐츠의 Markdown 문자열.
+        가져오기/변환 실패 시 다음 형식의 문자열 반환:
+        "Error fetching content from {url}: {exception_message}".
    """
+    # 브라우저처럼 보이는 User-Agent 헤더 설정
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }

    try:
+        # HTTP GET 요청 수행
        response = httpx.get(url, headers=headers, timeout=timeout)
-        response.raise_for_status()
+        response.raise_for_status()  # 4xx, 5xx 에러 시 예외 발생
+
+        # HTML을 Markdown으로 변환하여 반환
        return markdownify(response.text)
    except Exception as e:
        return f"Error fetching content from {url}: {str(e)}"


+# ============================================================================
+# 웹 검색 도구
+# ============================================================================
+
+
@tool()
 def tavily_search(
    query: str,
@@ -47,34 +127,52 @@ def tavily_search(
        Literal["general", "news", "finance"], InjectedToolArg
    ] = "general",
 ) -> str:
-    """주어진 쿼리로 웹을 검색한다.
+    """Tavily를 사용해 웹을 검색하고 전체 페이지 콘텐츠를 Markdown으로 반환한다.

-    Tavily를 사용해 관련 URL을 찾고, 전체 웹페이지 콘텐츠를 마크다운으로 가져와 반환한다.
+    이 도구는 두 단계로 동작합니다:
+    1) Tavily Search를 사용하여 쿼리에 관련된 URL을 찾습니다.
+    2) 각 결과 URL에 대해 `fetch_webpage_content`를 통해 전체 웹페이지 콘텐츠를
+       가져와 Markdown으로 변환합니다.

    Args:
-        query: 실행할 검색 쿼리
-        max_results: 반환할 최대 결과 수 (기본값: 1)
-        topic: 주제 필터 - 'general', 'news', 또는 'finance' (기본값: 'general')
+        query: 자연어 검색 쿼리 (예: "context engineering best practices").
+        max_results: Tavily에서 가져올 최대 검색 결과 수.
+            도구 주입 인자로 처리됨; 기본값은 1.
+        topic: Tavily 토픽 필터. 허용 값:
+            - "general"
+            - "news"
+            - "finance"
+            도구 주입 인자로 처리됨; 기본값은 "general".

    Returns:
-        전체 웹페이지 콘텐츠가 포함된 포맷팅된 검색 결과
+        다음을 포함하는 Markdown 형식 문자열:
+        - 요약 헤더: "Found N result(s) for '{query}':"
+        - 각 결과에 대해:
+          - 제목
+          - URL
+          - Markdown으로 변환된 전체 웹페이지 콘텐츠
+          - 구분선 ("---")
+
+    Example:
+        >>> tavily_search.invoke({"query": "LangGraph CLI configuration", "max_results": 2})
    """
-    # Tavily 를 사용해 관련 URL 목록을 조회한다
+    # Tavily API를 사용해 관련 URL 목록을 조회
    search_results = tavily_client.search(
        query,
        max_results=max_results,
        topic=topic,
    )

-    # 각 URL 에 대해 전체 콘텐츠를 가져온다
+    # 각 검색 결과에 대해 전체 콘텐츠를 가져옴
    result_texts = []
    for result in search_results.get("results", []):
        url = result["url"]
        title = result["title"]

-        # 웹페이지 콘텐츠를 가져온다
+        # 웹페이지 콘텐츠를 가져와서 Markdown으로 변환
        content = fetch_webpage_content(url)

+        # 결과 형식화
        result_text = f"""## {title}
 **URL:** {url}

@@ -84,7 +182,7 @@ def tavily_search(
 """
        result_texts.append(result_text)

-    # 최종 응답 형식으로 정리한다
+    # 최종 응답 형식으로 조합
    response = f"""Found {len(result_texts)} result(s) for '{query}':

 {chr(10).join(result_texts)}"""
@@ -92,29 +190,579 @@ def tavily_search(
    return response


+# ============================================================================
+# 사고 도구 (Reflection Tool)
+# ============================================================================
+
+
@tool()
 def think_tool(reflection: str) -> str:
-    """연구 진행 상황과 의사결정을 위한 전략적 성찰 도구.
+    """명시적 반성 단계를 강제하고 다음 행동을 기록한다.

-    각 검색 후 결과를 분석하고 다음 단계를 체계적으로 계획하기 위해 이 도구를 사용한다.
-    이는 품질 높은 의사결정을 위해 연구 워크플로우에 의도적인 멈춤을 만든다.
+    검색이나 주요 결정 시점 직후에 이 도구를 사용하여:
+    - 학습한 내용 요약 (사실, 정의, 핵심 주장)
+    - 부족한 부분 파악 (누락된 용어, 증거, 구현 세부사항)
+    - 다음 구체적 단계 결정 (다음 쿼리, 다음 소스, 또는 종합 시작)

-    사용 시점:
-    - 검색 결과를 받은 후: 어떤 핵심 정보를 찾았는가?
-    - 다음 단계를 결정하기 전: 포괄적으로 답변할 수 있을 만큼 충분한가?
-    - 연구 공백을 평가할 때: 아직 누락된 구체적인 정보는 무엇인가?
-    - 연구를 마무리하기 전: 지금 완전한 답변을 제공할 수 있는가?
-
-    성찰에 포함해야 할 내용:
-    1. 현재 발견의 분석 - 어떤 구체적인 정보를 수집했는가?
-    2. 공백 평가 - 어떤 중요한 정보가 아직 누락되어 있는가?
-    3. 품질 평가 - 좋은 답변을 위한 충분한 증거/예시가 있는가?
-    4. 전략적 결정 - 검색을 계속해야 하는가, 답변을 제공해야 하는가?
+    이 도구는 자체적으로 상태를 유지하지 않습니다; 에이전트가 구조화된 방식으로
+    추론을 외부화하도록 강제하기 위해 확인 문자열을 반환합니다.

    Args:
-        reflection: 연구 진행 상황, 발견, 공백, 다음 단계에 대한 상세한 성찰
+        reflection: 다음을 포함하는 간결하지만 구체적인 반성:
+            - 학습한 내용 (글머리 기호로 정리 가능한 사실들)
+            - 아직 누락된 부분
+            - 다음 단계 (정확한 도구 + 정확한 쿼리)

    Returns:
-        의사결정을 위해 성찰이 기록되었다는 확인
+        반성이 기록되었음을 나타내는 확인 문자열.
+        (반환된 문자열은 로그/트랜스크립트에서 볼 수 있도록 의도됨.)
+
+    Example:
+        >>> think_tool.invoke({
+        ...   "reflection": (
+        ...     "Learned: RAG vs. context caching differ in latency/cost trade-offs. "
+        ...     "Gap: need concrete caching APIs and constraints. "
+        ...     "Next: library_docs_search(library_name='openai', query='response caching')."
+        ...   )
+        ... })
    """
-    return f"성찰 기록됨: {reflection}"
+    return f"Reflection recorded: {reflection}"
+
+
+# ============================================================================
+# 시맨틱 검색 도구 (mgrep)
+# ============================================================================
+
+
+@tool()
+def mgrep_search(
+    query: str,
+    path: Annotated[str, InjectedToolArg] = ".",
+    max_results: Annotated[int, InjectedToolArg] = 10,
+    web: Annotated[bool, InjectedToolArg] = False,
+) -> str:
+    """`mgrep`을 사용하여 시맨틱 검색을 수행한다 (로컬 코드 또는 웹 답변 모드).
+
+    이 도구는 `mgrep` CLI를 호출합니다:
+    - 로컬 모드 (`web=False`): `path` 아래의 파일을 검색하고 매치를 반환.
+    - 웹 모드 (`web=True`): `mgrep --web --answer`를 사용하여 웹 결과를
+      검색하고 요약 (로컬 `mgrep` 설치에서 지원되는 경우).
+
+    Args:
+        query: 찾고자 하는 내용을 설명하는 자연어 검색 쿼리
+            (예: "Where is ResearchDepth configured?").
+        path: `web=False`일 때 검색할 파일시스템 경로. 기본값: ".".
+            도구 주입 인자로 처리됨.
+        max_results: 반환할 최대 결과 수. 기본값: 10.
+            도구 주입 인자로 처리됨.
+        web: True이면 `mgrep --web --answer`를 통해 웹 검색/답변 모드 수행.
+            False이면 `path` 아래에서 로컬 시맨틱 검색 수행.
+            도구 주입 인자로 처리됨.
+
+    Returns:
+        - 성공 시: `mgrep` stdout (트림됨), stdout이 비어있으면 "No results".
+        - `mgrep` 미설치 시: 설치 안내 문자열.
+        - 실패 시: 사람이 읽을 수 있는 에러 문자열 (stderr 또는 타임아웃 포함).
+
+    Example:
+        >>> mgrep_search.invoke({"query": "How is the researcher agent created?", "path": "research_agent"})
+        >>> mgrep_search.invoke({"query": "latest agentic RAG techniques", "web": True, "max_results": 5})
+    """
+    # mgrep 설치 여부 확인
+    if not MGREP_AVAILABLE:
+        return (
+            "mgrep is not installed. "
+            "Install with `npm install -g @mixedbread/mgrep && mgrep login`."
+        )
+
+    # 명령어 구성
+    cmd = ["mgrep", "-m", str(max_results)]
+
+    # 웹 모드인 경우 추가 플래그 설정
+    if web:
+        cmd.extend(["--web", "--answer"])
+
+    cmd.append(query)
+
+    # 로컬 모드인 경우 경로 추가
+    if not web:
+        cmd.append(path)
+
+    try:
+        # 서브프로세스로 mgrep 실행
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            timeout=60,  # 60초 타임아웃
+        )
+
+        # 비정상 종료 시 에러 반환
+        if result.returncode != 0:
+            return f"mgrep error: {result.stderr.strip()}"
+
+        # 결과 반환 (비어있으면 "No results")
+        return result.stdout.strip() or "No results"
+
+    except subprocess.TimeoutExpired:
+        return "mgrep timeout (exceeded 60 seconds)"
+    except Exception as e:
+        return f"mgrep execution error: {e}"
+
+
+# ============================================================================
+# 학술 검색 도구 (arXiv)
+# ============================================================================
+
+
+@tool()
+def arxiv_search(
+    query: str,
+    max_results: Annotated[int, InjectedToolArg] = 5,
+    sort_by: Annotated[
+        Literal["relevance", "submittedDate", "lastUpdatedDate"], InjectedToolArg
+    ] = "relevance",
+) -> str:
+    """arXiv에서 학술 논문을 검색하고 Markdown 요약을 반환한다.
+
+    선택적 `arxiv` Python 패키지를 사용합니다. 각 결과는 제목, 저자(처음 5명 +
+    나머지 수), 출판 날짜, URL, 요약된 초록과 함께 Markdown으로 렌더링됩니다.
+
+    Args:
+        query: arXiv 쿼리 문자열 (예: "transformer architecture", "context engineering").
+        max_results: 반환할 최대 논문 수. 기본값: 5.
+        sort_by: 결과 정렬 기준. 다음 중 하나:
+            - "relevance" (관련성)
+            - "submittedDate" (제출 날짜)
+            - "lastUpdatedDate" (마지막 업데이트 날짜)
+            기본값: "relevance".
+
+    Returns:
+        다음을 포함하는 Markdown 문자열:
+        - 찾은 논문 수를 나타내는 헤더
+        - 각 논문에 대해: 제목, 저자, 출판 날짜, URL, 초록 발췌
+        `arxiv` 패키지가 없으면 설치 안내 문자열 반환.
+        결과가 없으면 "not found" 메시지 반환.
+
+    Example:
+        >>> arxiv_search.invoke({"query": "retrieval augmented generation evaluation", "max_results": 3})
+    """
+    # arxiv 패키지 설치 여부 확인
+    if not ARXIV_AVAILABLE or arxiv is None:
+        return "arxiv package not installed. Install with `pip install arxiv`."
+
+    # 정렬 기준 매핑
+    sort_criterion_map = {
+        "relevance": arxiv.SortCriterion.Relevance,
+        "submittedDate": arxiv.SortCriterion.SubmittedDate,
+        "lastUpdatedDate": arxiv.SortCriterion.LastUpdatedDate,
+    }
+
+    # arXiv 클라이언트 및 검색 객체 생성
+    client = arxiv.Client()
+    search = arxiv.Search(
+        query=query,
+        max_results=max_results,
+        sort_by=sort_criterion_map.get(sort_by, arxiv.SortCriterion.Relevance),
+    )
+
+    # 검색 결과 처리
+    results = []
+    for paper in client.results(search):
+        # 저자 목록 (최대 5명 + 나머지 수)
+        authors = ", ".join(a.name for a in paper.authors[:5])
+        if len(paper.authors) > 5:
+            authors += f" et al. ({len(paper.authors) - 5} more)"
+
+        # 초록 (최대 800자)
+        abstract = paper.summary[:800]
+        if len(paper.summary) > 800:
+            abstract += "..."
+
+        # Markdown 형식으로 결과 추가
+        results.append(
+            f"## {paper.title}\n\n"
+            f"**Authors:** {authors}\n"
+            f"**Published:** {paper.published.strftime('%Y-%m-%d')}\n"
+            f"**URL:** {paper.entry_id}\n\n"
+            f"### Abstract\n{abstract}\n\n---"
+        )
+
+    # 결과가 없으면 메시지 반환
+    if not results:
+        return f"No papers found for '{query}'."
+
+    return f"Found {len(results)} paper(s) for '{query}':\n\n" + "\n\n".join(results)
+
+
+# ============================================================================
+# 통합 검색 도구
+# ============================================================================
+
+
+@tool()
+def comprehensive_search(
+    query: str,
+    sources: Annotated[
+        list[Literal["web", "local", "arxiv", "github", "docs"]], InjectedToolArg
+    ] = ["web"],
+    max_results_per_source: Annotated[int, InjectedToolArg] = 5,
+    library_name: Annotated[str | None, InjectedToolArg] = None,
+) -> str:
+    """다중 소스 검색을 실행하고 결과를 단일 Markdown 보고서로 통합한다.
+
+    이 도구는 `sources`에 따라 여러 다른 도구를 오케스트레이션합니다:
+    - "local": 로컬 코드베이스에서 `mgrep_search` 실행.
+    - "web": 가능하면 `mgrep_search`를 `web=True`로 사용; 그렇지 않으면
+      `tavily_search`로 폴백.
+    - "arxiv": `arxiv_search` 실행.
+    - "github": `github_code_search` 실행.
+    - "docs": `library_docs_search` 실행 (`library_name` 필요).
+
+    Args:
+        query: 선택된 소스에서 사용할 검색 쿼리.
+        sources: 쿼리할 소스. 허용 값:
+            "web", "local", "arxiv", "github", "docs".
+        max_results_per_source: 소스당 최대 결과 수. 기본값: 5.
+        library_name: "docs"가 `sources`에 포함된 경우 필수. Context7에서
+            인식할 수 있는 라이브러리/제품 이름이어야 함 (예: "langchain").
+
+    Returns:
+        소스별 섹션 헤더가 있고 "---"로 구분된 Markdown 문자열.
+        선택된 소스가 없으면 "no results" 메시지 반환.
+
+    Example:
+        >>> comprehensive_search.invoke({
+        ...   "query": "how to configure LangGraph deployment",
+        ...   "sources": ["web", "local", "docs"],
+        ...   "library_name": "langgraph",
+        ...   "max_results_per_source": 3
+        ... })
+    """
+    all_results = []
+
+    # 로컬 코드베이스 검색
+    if "local" in sources:
+        local_result = mgrep_search.invoke(
+            {"query": query, "path": ".", "max_results": max_results_per_source}
+        )
+        all_results.append(f"# Local Codebase Search\n\n{local_result}")
+
+    # 웹 검색
+    if "web" in sources:
+        if MGREP_AVAILABLE:
+            # mgrep 웹 모드 사용 (설치된 경우)
+            web_result = mgrep_search.invoke(
+                {"query": query, "max_results": max_results_per_source, "web": True}
+            )
+        else:
+            # Tavily로 폴백
+            web_result = tavily_search.invoke(
+                {"query": query, "max_results": max_results_per_source}
+            )
+        all_results.append(f"# Web Search Results\n\n{web_result}")
+
+    # arXiv 학술 검색
+    if "arxiv" in sources:
+        arxiv_result = arxiv_search.invoke(
+            {"query": query, "max_results": max_results_per_source}
+        )
+        all_results.append(f"# Academic Papers (arXiv)\n\n{arxiv_result}")
+
+    # GitHub 코드 검색
+    if "github" in sources:
+        github_result = github_code_search.invoke(
+            {"query": query, "max_results": max_results_per_source}
+        )
+        all_results.append(f"# GitHub Code Search\n\n{github_result}")
+
+    # 공식 문서 검색
+    if "docs" in sources and library_name:
+        docs_result = library_docs_search.invoke(
+            {"library_name": library_name, "query": query}
+        )
+        all_results.append(
+            f"# Official Documentation ({library_name})\n\n{docs_result}"
+        )
+
+    # 결과가 없으면 메시지 반환
+    if not all_results:
+        return f"No search results found for '{query}'."
+
+    # 모든 결과를 구분선으로 연결
+    return "\n\n---\n\n".join(all_results)
+
+
+# ============================================================================
+# GitHub 코드 검색 도구
+# ============================================================================
+
+
+@tool()
+def github_code_search(
+    query: str,
+    language: Annotated[list[str] | None, InjectedToolArg] = None,
+    repo: Annotated[str | None, InjectedToolArg] = None,
+    max_results: Annotated[int, InjectedToolArg] = 5,
+    use_regex: Annotated[bool, InjectedToolArg] = False,
+) -> str:
+    """grep.app을 사용하여 공개 GitHub 코드를 검색하고 실제 예제를 반환한다.
+
+    이 도구는 개념적 키워드가 아닌 *리터럴 코드 패턴*을 찾기 위한 것입니다.
+    예: `useState(`, `getServerSession`, 또는 멀티라인 정규식 패턴.
+
+    필터링 동작:
+        - `repo`: 저장소 이름에 대한 부분 문자열 매치 (예: "vercel/").
+        - `language`: grep.app의 언어 필드에 대한 정확한 매치.
+
+    Args:
+        query: 코드 검색 패턴. 리터럴 코드 토큰을 선호. 예:
+            - "useState("
+            - "async function"
+            - "(?s)useEffect\\(\\(\\) => {.*removeEventListener" (`use_regex=True`와 함께)
+        language: 포함할 언어 목록 (선택). 예: ["TypeScript", "Python"].
+        repo: 저장소 필터 (선택). 예: "facebook/react", "vercel/".
+        max_results: 출력에 포함할 최대 매치 수. 기본값: 5.
+        use_regex: True이면 `query`를 정규표현식으로 해석. 기본값: False.
+
+    Returns:
+        매칭된 저장소와 스니펫을 나열하는 Markdown 문자열:
+        - 저장소 이름
+        - 파일 경로
+        - 언어
+        - 잘린 스니펫 (최대 ~500자)
+        필터와 매치하는 결과가 없거나 HTTP 에러 발생 시 사람이 읽을 수 있는 메시지 반환.
+
+    Example:
+        >>> github_code_search.invoke({
+        ...   "query": "getServerSession(",
+        ...   "language": ["TypeScript", "TSX"],
+        ...   "max_results": 3
+        ... })
+    """
+    # grep.app API 엔드포인트
+    base_url = "https://grep.app/api/search"
+
+    # 요청 파라미터 구성
+    params = {
+        "q": query,
+        "case": "false",  # 대소문자 구분 안함
+        "words": "false",  # 단어 단위 매치 안함
+        "regexp": str(use_regex).lower(),  # 정규식 사용 여부
+    }
+
+    # 요청 헤더 설정
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
+        "Accept": "application/json",
+    }
+
+    try:
+        # API 요청 수행
+        response = httpx.get(base_url, params=params, headers=headers, timeout=30.0)
+        response.raise_for_status()
+        data = response.json()
+
+        # 검색 결과 추출
+        hits = data.get("hits", {}).get("hits", [])
+
+        # 결과가 없으면 메시지 반환
+        if not hits:
+            return f"No GitHub code found for '{query}'."
+
+        results = []
+        count = 0
+
+        # 각 검색 결과 처리
+        for hit in hits:
+            # 최대 결과 수에 도달하면 중단
+            if count >= max_results:
+                break
+
+            # 저장소 이름 추출
+            repo_name = hit.get("repo", "unknown/unknown")
+
+            # 저장소 필터 적용
+            if repo and repo not in repo_name:
+                continue
+
+            # 파일 경로 및 브랜치 추출
+            file_path = hit.get("path", "unknown")
+            branch = hit.get("branch", "main")
+
+            # 코드 스니펫 추출 및 HTML 태그 제거
+            content_data = hit.get("content", {})
+            snippet_html = content_data.get("snippet", "")
+            import re
+
+            # HTML 태그 제거
+            snippet = re.sub(r"<[^>]+>", "", snippet_html)
+            # HTML 엔티티 변환
+            snippet = (
+                snippet.replace("&lt;", "<").replace("&gt;", ">").replace("&amp;", "&")
+            )
+            # 빈 줄 제거 및 트림
+            snippet = "\n".join(
+                line.strip() for line in snippet.split("\n") if line.strip()
+            )
+            # 500자 초과 시 잘라냄
+            snippet = snippet[:500] + "..." if len(snippet) > 500 else snippet
+
+            # 파일 확장자에서 언어 추론
+            lang = file_path.split(".")[-1] if "." in file_path else "unknown"
+            lang_map = {
+                "py": "python",
+                "ts": "typescript",
+                "js": "javascript",
+                "tsx": "tsx",
+                "jsx": "jsx",
+            }
+            lang = lang_map.get(lang, lang)
+
+            # 언어 필터 적용
+            if language and lang not in [l.lower() for l in language]:
+                continue
+
+            # GitHub URL 구성
+            github_url = f"https://github.com/{repo_name}/blob/{branch}/{file_path}"
+
+            # Markdown 형식으로 결과 추가
+            results.append(
+                f"## {repo_name}\n"
+                f"**File:** [`{file_path}`]({github_url})\n"
+                f"**Language:** {lang}\n\n"
+                f"```{lang}\n{snippet}\n```\n"
+            )
+            count += 1
+
+        # 필터 적용 후 결과가 없으면 메시지 반환
+        if not results:
+            filter_msg = ""
+            if language:
+                filter_msg += f" (language: {language})"
+            if repo:
+                filter_msg += f" (repo: {repo})"
+            return f"No GitHub code found for '{query}'{filter_msg}."
+
+        # 결과 반환
+        return (
+            f"Found {len(results)} GitHub code snippet(s) for '{query}':\n\n"
+            + "\n---\n".join(results)
+        )
+
+    except httpx.TimeoutException:
+        return "GitHub code search timeout (exceeded 30 seconds)"
+    except httpx.HTTPStatusError as e:
+        return f"GitHub code search HTTP error: {e.response.status_code}"
+    except Exception as e:
+        return f"GitHub code search error: {e}"
+
+
+# ============================================================================
+# 라이브러리 문서 검색 도구 (Context7)
+# ============================================================================
+
+
+@tool()
+def library_docs_search(
+    library_name: str,
+    query: str,
+    max_tokens: Annotated[int, InjectedToolArg] = 5000,
+) -> str:
+    """Context7을 사용하여 공식 라이브러리 문서를 검색한다.
+
+    이 도구는 다음을 수행합니다:
+    1) `library_name`을 Context7 `libraryId`로 해석.
+    2) 제공된 `query`로 Context7 문서를 쿼리.
+
+    Args:
+        library_name: 해석할 라이브러리/제품 이름 (예: "langchain", "react", "fastapi").
+        query: 특정 문서 쿼리 (예: "how to configure retries", "authentication middleware").
+        max_tokens: 반환된 문서 콘텐츠의 최대 토큰 예산. 기본값: 5000.
+
+    Returns:
+        다음을 포함하는 Markdown 문자열:
+        - 라이브러리 제목
+        - 쿼리
+        - 해석된 라이브러리 ID
+        - 추출된 문서 콘텐츠
+        타임아웃, HTTP 실패, 라이브러리 누락, 빈 결과 시 사람이 읽을 수 있는 에러 메시지 반환.
+
+    Example:
+        >>> library_docs_search.invoke({
+        ...   "library_name": "langchain",
+        ...   "query": "Tool calling with InjectedToolArg",
+        ...   "max_tokens": 2000
+        ... })
+    """
+    # Context7 API 엔드포인트
+    resolve_url = "https://context7.com/api/v1/resolve-library-id"
+    query_url = "https://context7.com/api/v1/query-docs"
+
+    # 요청 헤더
+    headers = {
+        "Content-Type": "application/json",
+        "User-Agent": "DeepResearchAgent/1.0",
+    }
+
+    try:
+        # 1단계: 라이브러리 이름을 ID로 해석
+        resolve_response = httpx.post(
+            resolve_url,
+            json={"libraryName": library_name, "query": query},
+            headers=headers,
+            timeout=30.0,
+        )
+
+        # 라이브러리를 찾지 못한 경우
+        if resolve_response.status_code == 404:
+            return f"Library '{library_name}' not found in Context7."
+
+        resolve_response.raise_for_status()
+        resolve_data = resolve_response.json()
+
+        # 라이브러리 목록 확인
+        libraries = resolve_data.get("libraries", [])
+        if not libraries:
+            return f"No documentation found for '{library_name}'."
+
+        # 첫 번째 결과에서 ID와 제목 추출
+        library_id = libraries[0].get("id", "")
+        library_title = libraries[0].get("name", library_name)
+
+        if not library_id:
+            return f"Could not resolve library ID for '{library_name}'."
+
+        # 2단계: 문서 쿼리
+        docs_response = httpx.post(
+            query_url,
+            json={
+                "libraryId": library_id,
+                "query": query,
+                "maxTokens": max_tokens,
+            },
+            headers=headers,
+            timeout=60.0,  # 문서 쿼리는 더 긴 타임아웃
+        )
+        docs_response.raise_for_status()
+        docs_data = docs_response.json()
+
+        # 콘텐츠 추출
+        content = docs_data.get("content", "")
+        if not content:
+            return f"No documentation found for '{query}' in '{library_name}'."
+
+        # 결과 반환
+        return (
+            f"# {library_title} Official Documentation\n\n"
+            f"**Query:** {query}\n"
+            f"**Library ID:** {library_id}\n\n"
+            f"---\n\n{content}"
+        )
+
+    except httpx.TimeoutException:
+        return f"Library docs search timeout (library: {library_name})"
+    except httpx.HTTPStatusError as e:
+        return f"Library docs search HTTP error: {e.response.status_code}"
+    except Exception as e:
+        return f"Library docs search error: {e}"
--- a/scripts/run_ai_trend_research.py
+++ b/scripts/run_ai_trend_research.py
@@ -0,0 +1,748 @@
+#!/usr/bin/env python3
+"""2026 AI 트렌드 키워드 연구 스크립트 (도구 궤적 로깅 포함).
+
+이 스크립트는 다양한 소스에서 2026년 AI 트렌드를 조사하고 보고서를 생성합니다.
+각 도구 호출은 TOOL_TRAJECTORY.log 및 TOOL_TRAJECTORY.json에 기록됩니다.
+
+## 스크립트 실행 흐름
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                         main()                                   │
+├─────────────────────────────────────────────────────────────────┤
+│                                                                  │
+│   1. 세션 초기화                                                 │
+│      session = ResearchSession(query, session_id)               │
+│      trajectory_logger = ToolTrajectoryLogger(session_dir)      │
+│                                                                  │
+│   2. 다중 소스 검색                                              │
+│      ┌─────────────────────────────────────────────────────────┐│
+│      │ search_web_sources()    → tavily_search (5회)           ││
+│      │ search_github_sources() → github_code_search (3회)       ││
+│      │ search_arxiv_sources()  → arxiv_search (3회)            ││
+│      └─────────────────────────────────────────────────────────┘│
+│                                                                  │
+│   3. 키워드 분석                                                 │
+│      keywords = extract_keywords(findings)                      │
+│                                                                  │
+│   4. 결과 저장                                                   │
+│      - AI_TREND_REPORT.md                                       │
+│      - TOOL_TRAJECTORY.log                                      │
+│      - TOOL_TRAJECTORY.json                                     │
+│      - SUMMARY.md                                               │
+│                                                                  │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+사용법:
+    uv run python scripts/run_ai_trend_research.py
+"""
+
+from __future__ import annotations
+
+import json
+from dataclasses import asdict, dataclass, field
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+from rich.console import Console
+from rich.panel import Panel
+from rich.progress import Progress, SpinnerColumn, TextColumn
+from rich.table import Table
+
+from research_agent.researcher.ralph_loop import (
+    Finding,
+    ResearchSession,
+    SourceQuality,
+    SourceType,
+)
+from research_agent.tools import (
+    arxiv_search,
+    github_code_search,
+    tavily_search,
+)
+
+
+# ============================================================================
+# 콘솔 초기화
+# ============================================================================
+
+console = Console()
+
+
+# ============================================================================
+# 도구 궤적 로깅
+# ============================================================================
+
+
+@dataclass
+class ToolCallRecord:
+    """단일 도구 호출 기록을 나타내는 데이터 클래스.
+
+    Attributes:
+        seq: 호출 순서 번호.
+        tool_name: 호출된 도구 이름.
+        input_args: 도구에 전달된 인자 딕셔너리.
+        output_preview: 출력 미리보기 (최대 300자).
+        output_length: 전체 출력 길이.
+        duration_ms: 호출 소요 시간 (밀리초).
+        success: 성공 여부.
+        error: 에러 메시지 (실패 시).
+        timestamp: 호출 시간 (ISO 8601 형식).
+    """
+
+    seq: int  # 호출 순서
+    tool_name: str  # 도구 이름
+    input_args: dict[str, Any]  # 입력 인자
+    output_preview: str  # 출력 미리보기
+    output_length: int  # 출력 길이
+    duration_ms: float  # 소요 시간 (ms)
+    success: bool  # 성공 여부
+    error: str | None = None  # 에러 메시지
+    timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
+
+
+class ToolTrajectoryLogger:
+    """도구 호출 궤적을 로깅하는 클래스.
+
+    각 도구 호출을 기록하고, 세션 종료 시 로그 파일과 JSON 파일로 저장합니다.
+
+    Attributes:
+        session_dir: 로그 파일을 저장할 세션 디렉토리.
+        calls: 기록된 도구 호출 목록.
+        seq: 현재 호출 순서 번호.
+    """
+
+    def __init__(self, session_dir: Path):
+        """로거를 초기화한다.
+
+        Args:
+            session_dir: 로그 파일을 저장할 디렉토리 경로.
+        """
+        self.session_dir = session_dir
+        self.calls: list[ToolCallRecord] = []
+        self.seq = 0
+
+    def log_call(
+        self,
+        tool_name: str,
+        input_args: dict[str, Any],
+        output: str,
+        duration_ms: float,
+        success: bool = True,
+        error: str | None = None,
+    ) -> None:
+        """도구 호출을 기록한다.
+
+        Args:
+            tool_name: 호출된 도구 이름.
+            input_args: 도구에 전달된 인자.
+            output: 도구 출력 (전체).
+            duration_ms: 호출 소요 시간 (밀리초).
+            success: 성공 여부 (기본값: True).
+            error: 에러 메시지 (선택).
+        """
+        self.seq += 1
+        record = ToolCallRecord(
+            seq=self.seq,
+            tool_name=tool_name,
+            input_args=input_args,
+            # 출력 미리보기 (300자로 제한)
+            output_preview=output[:300] if len(output) > 300 else output,
+            output_length=len(output),
+            duration_ms=duration_ms,
+            success=success,
+            error=error,
+        )
+        self.calls.append(record)
+
+    def save(self) -> Path:
+        """로그를 파일에 저장한다.
+
+        두 가지 형식으로 저장합니다:
+        - TOOL_TRAJECTORY.log: 사람이 읽을 수 있는 형식
+        - TOOL_TRAJECTORY.json: 프로그래밍적 분석용
+
+        Returns:
+            생성된 .log 파일 경로.
+        """
+        # 텍스트 로그 파일 작성
+        log_path = self.session_dir / "TOOL_TRAJECTORY.log"
+        with open(log_path, "w") as f:
+            # 헤더 작성
+            f.write(f"Tool Trajectory Log\n")
+            f.write(f"Generated: {datetime.now().isoformat()}\n")
+            f.write(f"Total Calls: {len(self.calls)}\n")
+            f.write(f"Success: {sum(1 for c in self.calls if c.success)}\n")
+            f.write(f"Failed: {sum(1 for c in self.calls if not c.success)}\n")
+            f.write("=" * 70 + "\n\n")
+
+            # 각 호출 기록 작성
+            for call in self.calls:
+                status = "OK" if call.success else f"FAIL: {call.error}"
+                f.write(
+                    f"[{call.seq}] {call.tool_name} ({status}) [{call.duration_ms:.0f}ms]\n"
+                )
+                f.write(f"    Timestamp: {call.timestamp}\n")
+                f.write(
+                    f"    Args: {json.dumps(call.input_args, ensure_ascii=False)}\n"
+                )
+                f.write(f"    Output Length: {call.output_length} chars\n")
+                f.write(f"    Output Preview:\n")
+                # 출력 미리보기 (최대 10줄)
+                for line in call.output_preview.split("\n")[:10]:
+                    f.write(f"      | {line}\n")
+                f.write("-" * 70 + "\n\n")
+
+        # JSON 파일 작성
+        json_path = self.session_dir / "TOOL_TRAJECTORY.json"
+        with open(json_path, "w") as f:
+            json.dump([asdict(c) for c in self.calls], f, indent=2, ensure_ascii=False)
+
+        return log_path
+
+
+# ============================================================================
+# 전역 로거 (각 검색 함수에서 사용)
+# ============================================================================
+
+trajectory_logger: ToolTrajectoryLogger | None = None
+
+
+# ============================================================================
+# 검색 쿼리 정의
+# ============================================================================
+
+# 웹 검색 쿼리 (Tavily)
+RESEARCH_QUERIES = [
+    "2026 AI trends predictions",
+    "AI agent frameworks 2026",
+    "context engineering LLM",
+    "multimodal AI applications 2026",
+    "AI coding assistants trends",
+]
+
+# GitHub 코드 검색 쿼리 (리터럴 코드 패턴)
+GITHUB_QUERIES = [
+    "class Agent(",  # 에이전트 클래스 정의
+    "def run_agent(",  # 에이전트 실행 함수
+    "context_length =",  # 컨텍스트 길이 설정
+]
+
+# arXiv 학술 검색 쿼리
+ARXIV_QUERIES = [
+    "large language model agents",
+    "context window optimization",
+    "multimodal foundation models",
+]
+
+
+# ============================================================================
+# 소스별 검색 함수
+# ============================================================================
+
+
+def search_web_sources() -> list[Finding]:
+    """웹 소스에서 검색을 수행한다.
+
+    RESEARCH_QUERIES의 각 쿼리에 대해 Tavily 검색을 수행하고,
+    결과를 Finding 객체로 변환합니다.
+
+    Returns:
+        수집된 Finding 객체 목록.
+    """
+    global trajectory_logger
+    findings = []
+    console.print("\n[bold cyan]Web Search[/bold cyan]")
+
+    for query in RESEARCH_QUERIES:
+        console.print(f"  Searching: {query}...")
+        args = {"query": query, "max_results": 2, "topic": "general"}
+        start = datetime.now()
+
+        try:
+            # Tavily 검색 실행
+            result = tavily_search.invoke(args)
+            duration = (datetime.now() - start).total_seconds() * 1000
+
+            # 궤적 로깅
+            if trajectory_logger:
+                trajectory_logger.log_call("tavily_search", args, result, duration)
+
+            # 소스 품질 평가
+            quality = SourceQuality.from_source_type(
+                SourceType.WEB,
+                relevance_score=0.8,
+                recency_score=0.9,
+            )
+
+            # Finding 객체 생성
+            findings.append(
+                Finding(
+                    content=result[:2000] if len(result) > 2000 else result,
+                    source_url=f"tavily://{query}",
+                    source_title=f"Web: {query}",
+                    confidence=0.7,
+                    quality=quality,
+                )
+            )
+            console.print(
+                f"    [green]Found results[/green] [dim]({duration:.0f}ms)[/dim]"
+            )
+        except Exception as e:
+            duration = (datetime.now() - start).total_seconds() * 1000
+            if trajectory_logger:
+                trajectory_logger.log_call(
+                    "tavily_search", args, "", duration, success=False, error=str(e)
+                )
+            console.print(f"    [red]Error: {e}[/red]")
+
+    return findings
+
+
+def search_github_sources() -> list[Finding]:
+    """GitHub 소스에서 코드 검색을 수행한다.
+
+    GITHUB_QUERIES의 각 쿼리에 대해 grep.app API를 통한
+    코드 검색을 수행하고, 결과를 Finding 객체로 변환합니다.
+
+    Returns:
+        수집된 Finding 객체 목록.
+    """
+    global trajectory_logger
+    findings = []
+    console.print("\n[bold cyan]GitHub Code Search[/bold cyan]")
+
+    for query in GITHUB_QUERIES:
+        console.print(f"  Searching: {query}...")
+        args = {"query": query, "max_results": 5}
+        start = datetime.now()
+
+        try:
+            # GitHub 코드 검색 실행
+            result = github_code_search.invoke(args)
+            duration = (datetime.now() - start).total_seconds() * 1000
+
+            # 궤적 로깅
+            if trajectory_logger:
+                trajectory_logger.log_call("github_code_search", args, result, duration)
+
+            # 소스 품질 평가 (GitHub은 실제 구현 코드이므로 권위도 높음)
+            quality = SourceQuality.from_source_type(
+                SourceType.GITHUB,
+                relevance_score=0.85,
+                recency_score=0.7,
+            )
+
+            # Finding 객체 생성
+            findings.append(
+                Finding(
+                    content=result[:2000] if len(result) > 2000 else result,
+                    source_url=f"github://{query}",
+                    source_title=f"GitHub: {query}",
+                    confidence=0.75,
+                    quality=quality,
+                )
+            )
+            console.print(
+                f"    [green]Found results[/green] [dim]({duration:.0f}ms)[/dim]"
+            )
+        except Exception as e:
+            duration = (datetime.now() - start).total_seconds() * 1000
+            if trajectory_logger:
+                trajectory_logger.log_call(
+                    "github_code_search",
+                    args,
+                    "",
+                    duration,
+                    success=False,
+                    error=str(e),
+                )
+            console.print(f"    [red]Error: {e}[/red]")
+
+    return findings
+
+
+def search_arxiv_sources() -> list[Finding]:
+    """arXiv 소스에서 학술 논문 검색을 수행한다.
+
+    ARXIV_QUERIES의 각 쿼리에 대해 arXiv API를 통한
+    논문 검색을 수행하고, 결과를 Finding 객체로 변환합니다.
+
+    Returns:
+        수집된 Finding 객체 목록.
+    """
+    global trajectory_logger
+    findings = []
+    console.print("\n[bold cyan]arXiv Academic Search[/bold cyan]")
+
+    for query in ARXIV_QUERIES:
+        console.print(f"  Searching: {query}...")
+        args = {"query": query, "max_results": 3, "sort_by": "submittedDate"}
+        start = datetime.now()
+
+        try:
+            # arXiv 검색 실행
+            result = arxiv_search.invoke(args)
+            duration = (datetime.now() - start).total_seconds() * 1000
+
+            # 궤적 로깅
+            if trajectory_logger:
+                trajectory_logger.log_call("arxiv_search", args, result, duration)
+
+            # 소스 품질 평가 (학술 논문은 가장 높은 권위도)
+            quality = SourceQuality.from_source_type(
+                SourceType.ARXIV,
+                relevance_score=0.9,
+                recency_score=0.85,
+            )
+
+            # Finding 객체 생성
+            findings.append(
+                Finding(
+                    content=result[:3000] if len(result) > 3000 else result,
+                    source_url=f"arxiv://{query}",
+                    source_title=f"arXiv: {query}",
+                    confidence=0.9,  # 학술 소스는 높은 신뢰도
+                    quality=quality,
+                )
+            )
+            console.print(
+                f"    [green]Found results[/green] [dim]({duration:.0f}ms)[/dim]"
+            )
+        except Exception as e:
+            duration = (datetime.now() - start).total_seconds() * 1000
+            if trajectory_logger:
+                trajectory_logger.log_call(
+                    "arxiv_search", args, "", duration, success=False, error=str(e)
+                )
+            console.print(f"    [red]Error: {e}[/red]")
+
+    return findings
+
+
+# ============================================================================
+# 키워드 분석
+# ============================================================================
+
+
+def extract_keywords(findings: list[Finding]) -> dict[str, int]:
+    """발견 항목들에서 AI 관련 키워드를 추출한다.
+
+    사전 정의된 AI 키워드 목록을 기반으로 각 키워드의
+    출현 빈도를 계산합니다.
+
+    Args:
+        findings: 분석할 Finding 객체 목록.
+
+    Returns:
+        키워드 -> 빈도 매핑 (빈도 내림차순 정렬).
+    """
+    keyword_counts: dict[str, int] = {}
+
+    # AI 관련 키워드 목록
+    ai_keywords = [
+        # 에이전트 관련
+        "agent",
+        "agents",
+        "agentic",
+        # 컨텍스트 관련
+        "context",
+        "context window",
+        "context engineering",
+        # 멀티모달 관련
+        "multimodal",
+        "vision",
+        "audio",
+        # RAG 및 검색 관련
+        "RAG",
+        "retrieval",
+        "retrieval-augmented",
+        # 학습 관련
+        "fine-tuning",
+        "RLHF",
+        "DPO",
+        # 추론 관련
+        "reasoning",
+        "chain-of-thought",
+        "CoT",
+        # 코딩 관련
+        "code generation",
+        "coding assistant",
+        # 모델 이름
+        "GPT",
+        "Claude",
+        "Gemini",
+        "LLaMA",
+        "Mistral",
+        # 아키텍처 관련
+        "transformer",
+        "attention",
+        "embedding",
+        "vector",
+        "vectorstore",
+        # 프롬프트 관련
+        "prompt",
+        "prompting",
+        "prompt engineering",
+        # 도구 사용 관련
+        "tool use",
+        "function calling",
+        # 메모리 관련
+        "memory",
+        "long-term memory",
+        # 안전성 관련
+        "safety",
+        "alignment",
+        "guardrails",
+        # 성능 관련
+        "inference",
+        "latency",
+        "optimization",
+        # 오픈소스 관련
+        "open source",
+        "open-source",
+        # 평가 관련
+        "benchmark",
+        "evaluation",
+        # 모델 아키텍처
+        "MoE",
+        "mixture of experts",
+        "small language model",
+        "SLM",
+        # 엣지 AI
+        "on-device",
+        "edge AI",
+    ]
+
+    # 각 발견 항목에서 키워드 카운트
+    for finding in findings:
+        content_lower = finding.content.lower()
+        for kw in ai_keywords:
+            if kw.lower() in content_lower:
+                count = content_lower.count(kw.lower())
+                keyword_counts[kw] = keyword_counts.get(kw, 0) + count
+
+    # 빈도 내림차순 정렬
+    return dict(sorted(keyword_counts.items(), key=lambda x: x[1], reverse=True))
+
+
+# ============================================================================
+# 보고서 생성
+# ============================================================================
+
+
+def generate_report(
+    session: ResearchSession,
+    keywords: dict[str, int],
+    output_path: Path,
+) -> None:
+    """연구 결과 보고서를 Markdown 형식으로 생성한다.
+
+    Args:
+        session: 연구 세션 객체.
+        keywords: 키워드 -> 빈도 매핑.
+        output_path: 보고서 저장 경로.
+    """
+    # 보고서 헤더
+    report_content = f"""# 2026 AI 트렌드 키워드 연구 리포트
+
+**생성일:** {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
+**세션 ID:** {session.session_id}
+**총 소스 수:** {len(session.findings)}
+**Coverage Score:** {session.ralph_loop.state.coverage_score:.2%}
+
+---
+
+## 핵심 트렌드 키워드 (Top 20)
+
+| 순위 | 키워드 | 빈도 |
+|------|--------|------|
+"""
+    # Top 20 키워드 테이블
+    for i, (kw, count) in enumerate(list(keywords.items())[:20], 1):
+        report_content += f"| {i} | {kw} | {count} |\n"
+
+    # 주요 발견사항 섹션
+    report_content += """
+---
+
+## 주요 발견사항
+
+### 1. Agent & Agentic AI
+- AI 에이전트 프레임워크가 2026년 핵심 트렌드
+- 자율적 작업 수행 및 도구 사용 능력 강조
+- Multi-agent 시스템의 부상
+
+### 2. Context Engineering
+- 긴 컨텍스트 윈도우 활용 최적화
+- 파일시스템 기반 컨텍스트 관리
+- 효율적인 정보 검색 및 주입
+
+### 3. Multimodal AI
+- 텍스트, 이미지, 오디오, 비디오 통합
+- Vision-Language 모델의 발전
+- 실시간 멀티모달 처리
+
+### 4. Reasoning & CoT
+- Chain-of-Thought 추론 개선
+- 복잡한 문제 해결 능력 향상
+- Self-reflection 및 자기 개선
+
+### 5. Code & Development
+- AI 코딩 어시스턴트의 고도화
+- 전체 개발 워크플로우 자동화
+- 코드 리뷰 및 디버깅 지원
+
+---
+
+## 소스 분석
+
+"""
+    # 소스 유형별 통계
+    source_types = {}
+    for f in session.findings:
+        if f.quality:
+            st = f.quality.source_type
+            source_types[st] = source_types.get(st, 0) + 1
+
+    for st, count in source_types.items():
+        report_content += f"- **{st}**: {count}개 소스\n"
+
+    # 상세 소스 목록
+    report_content += f"""
+---
+
+## 상세 소스 목록
+
+"""
+    for i, f in enumerate(session.findings, 1):
+        quality_score = f.quality.overall_score if f.quality else 0
+        report_content += f"""### 소스 {i}: {f.source_title}
+- **신뢰도:** {f.confidence:.0%}
+- **품질 점수:** {quality_score:.2f}
+- **URL:** {f.source_url}
+
+<details>
+<summary>내용 미리보기</summary>
+
+{f.content[:500]}...
+
+</details>
+
+---
+
+"""
+
+    # 파일 저장
+    output_path.write_text(report_content)
+
+
+# ============================================================================
+# 메인 함수
+# ============================================================================
+
+
+def main() -> None:
+    """스크립트 메인 함수.
+
+    1. 세션 초기화 및 로거 설정
+    2. 다중 소스 검색 수행
+    3. 키워드 분석
+    4. 보고서 및 로그 생성
+    """
+    global trajectory_logger
+
+    # 시작 배너 출력
+    console.print(
+        Panel(
+            "[bold cyan]2026 AI Trend Keyword Research[/bold cyan]\n"
+            "[dim]Collecting and analyzing data from multiple sources[/dim]",
+            title="Research Started",
+        )
+    )
+
+    # 세션 초기화
+    session = ResearchSession(
+        query="2026 AI Trends and Keywords",
+        session_id=datetime.now().strftime("%Y%m%d_%H%M%S"),
+    )
+    session.initialize()
+
+    # 도구 궤적 로거 초기화
+    trajectory_logger = ToolTrajectoryLogger(session.session_dir)
+
+    # 세션 정보 출력
+    console.print(f"\n[dim]Session: {session.session_id}[/dim]")
+    console.print(f"[dim]Workspace: {session.session_dir}[/dim]\n")
+
+    # 프로그레스 표시와 함께 데이터 수집
+    with Progress(
+        SpinnerColumn(),
+        TextColumn("[progress.description]{task.description}"),
+        console=console,
+    ) as progress:
+        task = progress.add_task("Collecting data...", total=None)
+
+        # 웹 소스 검색
+        web_findings = search_web_sources()
+        for f in web_findings:
+            session.add_finding(f)
+
+        # GitHub 소스 검색
+        github_findings = search_github_sources()
+        for f in github_findings:
+            session.add_finding(f)
+
+        # arXiv 소스 검색
+        arxiv_findings = search_arxiv_sources()
+        for f in arxiv_findings:
+            session.add_finding(f)
+
+        progress.update(task, description="Analyzing keywords...")
+
+    # 키워드 분석
+    keywords = extract_keywords(session.findings)
+
+    # Top 10 키워드 테이블 출력
+    table = Table(title="Top 10 AI 트렌드 키워드")
+    table.add_column("순위", style="cyan")
+    table.add_column("키워드", style="green")
+    table.add_column("빈도", style="yellow")
+
+    for i, (kw, count) in enumerate(list(keywords.items())[:10], 1):
+        table.add_row(str(i), kw, str(count))
+
+    console.print("\n")
+    console.print(table)
+
+    # 보고서 생성
+    report_path = session.session_dir / "AI_TREND_REPORT.md"
+    generate_report(session, keywords, report_path)
+
+    # 궤적 로그 저장
+    trajectory_log_path = trajectory_logger.save() if trajectory_logger else None
+
+    # 세션 마무리
+    summary_path = session.finalize()
+
+    # 완료 배너 출력
+    console.print(
+        Panel(
+            f"[bold green]Research Complete![/bold green]\n\n"
+            f"Total Sources: {len(session.findings)}\n"
+            f"Coverage: {session.ralph_loop.state.coverage_score:.2%}\n"
+            f"Keywords Found: {len(keywords)}\n"
+            f"Tool Calls: {len(trajectory_logger.calls) if trajectory_logger else 0}\n\n"
+            f"[dim]Report: {report_path}[/dim]\n"
+            f"[dim]Summary: {summary_path}[/dim]\n"
+            f"[dim]Tool Trajectory: {trajectory_log_path}[/dim]",
+            title="Research Complete",
+            border_style="green",
+        )
+    )
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/verify_tool_trajectory.py
+++ b/scripts/verify_tool_trajectory.py
@@ -0,0 +1,236 @@
+#!/usr/bin/env python3
+"""Tool Trajectory verification script with detailed logging.
+
+This script verifies the research agent tools work correctly by:
+1. Testing each tool individually with logging
+2. Verifying the tool call sequence (trajectory)
+3. Outputting detailed logs for debugging
+
+Usage:
+    uv run python scripts/verify_tool_trajectory.py
+"""
+
+from __future__ import annotations
+
+import logging
+import sys
+from dataclasses import dataclass, field
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+from rich.console import Console
+from rich.logging import RichHandler
+from rich.panel import Panel
+from rich.table import Table
+
+logging.basicConfig(
+    level=logging.DEBUG,
+    format="%(message)s",
+    handlers=[RichHandler(rich_tracebacks=True, show_path=False)],
+)
+log = logging.getLogger("tool_trajectory")
+console = Console()
+
+
+@dataclass
+class ToolCall:
+    tool_name: str
+    input_args: dict[str, Any]
+    output: str
+    duration_ms: float
+    success: bool
+    error: str | None = None
+
+
+@dataclass
+class ToolTrajectory:
+    calls: list[ToolCall] = field(default_factory=list)
+    start_time: datetime = field(default_factory=datetime.now)
+
+    def add_call(self, call: ToolCall) -> None:
+        self.calls.append(call)
+        log.info(
+            f"[{len(self.calls)}] {call.tool_name} "
+            f"({'OK' if call.success else 'FAIL'}) "
+            f"[{call.duration_ms:.0f}ms]"
+        )
+
+    def summary(self) -> str:
+        total = len(self.calls)
+        success = sum(1 for c in self.calls if c.success)
+        return f"Total: {total}, Success: {success}, Failed: {total - success}"
+
+
+def test_tool(
+    trajectory: ToolTrajectory,
+    tool_name: str,
+    tool_func: Any,
+    args: dict[str, Any],
+) -> bool:
+    log.debug(f"Testing {tool_name} with args: {args}")
+    start = datetime.now()
+
+    try:
+        result = tool_func.invoke(args)
+        duration = (datetime.now() - start).total_seconds() * 1000
+
+        call = ToolCall(
+            tool_name=tool_name,
+            input_args=args,
+            output=result[:500] if len(result) > 500 else result,
+            duration_ms=duration,
+            success=True,
+        )
+        trajectory.add_call(call)
+        return True
+
+    except Exception as e:
+        duration = (datetime.now() - start).total_seconds() * 1000
+        call = ToolCall(
+            tool_name=tool_name,
+            input_args=args,
+            output="",
+            duration_ms=duration,
+            success=False,
+            error=str(e),
+        )
+        trajectory.add_call(call)
+        log.error(f"Error in {tool_name}: {e}")
+        return False
+
+
+def main() -> int:
+    console.print(
+        Panel(
+            "[bold cyan]Tool Trajectory Verification[/bold cyan]\n"
+            "[dim]Testing research agent tools with detailed logging[/dim]",
+            title="Verification Started",
+        )
+    )
+
+    from research_agent.tools import (
+        arxiv_search,
+        github_code_search,
+        library_docs_search,
+        tavily_search,
+        think_tool,
+    )
+
+    trajectory = ToolTrajectory()
+
+    console.print("\n[bold]Phase 1: Individual Tool Tests[/bold]\n")
+
+    test_cases = [
+        ("think_tool", think_tool, {"reflection": "Testing reflection capability"}),
+        (
+            "tavily_search",
+            tavily_search,
+            {"query": "context engineering", "max_results": 1},
+        ),
+        (
+            "arxiv_search",
+            arxiv_search,
+            {"query": "large language model", "max_results": 2},
+        ),
+        (
+            "github_code_search",
+            github_code_search,
+            {"query": "useState(", "max_results": 2},
+        ),
+    ]
+
+    for tool_name, tool_func, args in test_cases:
+        console.print(f"  Testing: [cyan]{tool_name}[/cyan]...")
+        test_tool(trajectory, tool_name, tool_func, args)
+
+    console.print("\n[bold]Phase 2: Tool Trajectory Analysis[/bold]\n")
+
+    table = Table(title="Tool Call Trajectory")
+    table.add_column("#", style="cyan", width=3)
+    table.add_column("Tool", style="green")
+    table.add_column("Status", style="yellow")
+    table.add_column("Duration", style="blue")
+    table.add_column("Output Preview", style="dim", max_width=50)
+
+    for i, call in enumerate(trajectory.calls, 1):
+        status = (
+            "[green]OK[/green]" if call.success else f"[red]FAIL: {call.error}[/red]"
+        )
+        output_preview = (
+            call.output[:50] + "..." if len(call.output) > 50 else call.output
+        )
+        output_preview = output_preview.replace("\n", " ")
+        table.add_row(
+            str(i),
+            call.tool_name,
+            status,
+            f"{call.duration_ms:.0f}ms",
+            output_preview,
+        )
+
+    console.print(table)
+
+    console.print("\n[bold]Phase 3: Verification Summary[/bold]\n")
+
+    total_calls = len(trajectory.calls)
+    success_calls = sum(1 for c in trajectory.calls if c.success)
+    failed_calls = total_calls - success_calls
+
+    summary_table = Table(show_header=False)
+    summary_table.add_column("Metric", style="bold")
+    summary_table.add_column("Value")
+
+    summary_table.add_row("Total Tool Calls", str(total_calls))
+    summary_table.add_row("Successful", f"[green]{success_calls}[/green]")
+    summary_table.add_row(
+        "Failed",
+        f"[red]{failed_calls}[/red]" if failed_calls > 0 else "[green]0[/green]",
+    )
+    summary_table.add_row(
+        "Total Duration",
+        f"{sum(c.duration_ms for c in trajectory.calls):.0f}ms",
+    )
+
+    console.print(summary_table)
+
+    log_path = Path("research_workspace") / "tool_trajectory.log"
+    log_path.parent.mkdir(parents=True, exist_ok=True)
+
+    with open(log_path, "w") as f:
+        f.write(f"Tool Trajectory Log - {datetime.now().isoformat()}\n")
+        f.write("=" * 60 + "\n\n")
+        for i, call in enumerate(trajectory.calls, 1):
+            f.write(f"[{i}] {call.tool_name}\n")
+            f.write(f"    Args: {call.input_args}\n")
+            f.write(f"    Success: {call.success}\n")
+            f.write(f"    Duration: {call.duration_ms:.0f}ms\n")
+            if call.error:
+                f.write(f"    Error: {call.error}\n")
+            f.write(f"    Output:\n{call.output}\n")
+            f.write("-" * 40 + "\n")
+
+    console.print(f"\n[dim]Log saved to: {log_path}[/dim]")
+
+    if failed_calls > 0:
+        console.print(
+            Panel(
+                f"[red]Verification FAILED[/red]\n"
+                f"{failed_calls} tool(s) failed. Check logs above.",
+                border_style="red",
+            )
+        )
+        return 1
+
+    console.print(
+        Panel(
+            "[green]Verification PASSED[/green]\n"
+            "All tools executed successfully with correct trajectory.",
+            border_style="green",
+        )
+    )
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/tests/researcher/init.py
+++ b/tests/researcher/init.py
--- a/tests/researcher/test_depth.py
+++ b/tests/researcher/test_depth.py
@@ -0,0 +1,125 @@
+from __future__ import annotations
+
+import pytest
+
+from research_agent.researcher.depth import (
+    DEPTH_CONFIGS,
+    DepthConfig,
+    ResearchDepth,
+    get_depth_config,
+    infer_research_depth,
+)
+
+
+class TestResearchDepth:
+    def test_enum_values(self):
+        assert ResearchDepth.QUICK.value == "quick"
+        assert ResearchDepth.STANDARD.value == "standard"
+        assert ResearchDepth.DEEP.value == "deep"
+        assert ResearchDepth.EXHAUSTIVE.value == "exhaustive"
+
+    def test_all_depths_have_configs(self):
+        for depth in ResearchDepth:
+            assert depth in DEPTH_CONFIGS
+
+
+class TestDepthConfig:
+    def test_quick_config(self):
+        config = DEPTH_CONFIGS[ResearchDepth.QUICK]
+
+        assert config.max_searches == 3
+        assert config.max_ralph_iterations == 1
+        assert config.sources == ("web",)
+        assert config.require_cross_validation is False
+        assert config.min_sources_for_claim == 1
+        assert config.coverage_threshold == 0.5
+
+    def test_standard_config(self):
+        config = DEPTH_CONFIGS[ResearchDepth.STANDARD]
+
+        assert config.max_searches == 10
+        assert config.max_ralph_iterations == 2
+        assert "web" in config.sources
+        assert "local" in config.sources
+
+    def test_deep_config(self):
+        config = DEPTH_CONFIGS[ResearchDepth.DEEP]
+
+        assert config.max_searches == 25
+        assert config.max_ralph_iterations == 5
+        assert config.require_cross_validation is True
+        assert config.min_sources_for_claim == 2
+        assert "arxiv" in config.sources
+        assert "github" in config.sources
+
+    def test_exhaustive_config(self):
+        config = DEPTH_CONFIGS[ResearchDepth.EXHAUSTIVE]
+
+        assert config.max_searches == 50
+        assert config.max_ralph_iterations == 10
+        assert config.coverage_threshold == 0.95
+        assert config.min_sources_for_claim == 3
+        assert "docs" in config.sources
+
+    def test_config_is_hashable(self):
+        config = DEPTH_CONFIGS[ResearchDepth.QUICK]
+        assert hash(config) is not None
+
+
+class TestGetDepthConfig:
+    def test_returns_correct_config(self):
+        config = get_depth_config(ResearchDepth.DEEP)
+        assert config == DEPTH_CONFIGS[ResearchDepth.DEEP]
+
+    def test_all_depths(self):
+        for depth in ResearchDepth:
+            config = get_depth_config(depth)
+            assert isinstance(config, DepthConfig)
+
+
+class TestInferResearchDepth:
+    @pytest.mark.parametrize(
+        "query,expected",
+        [
+            ("quick summary of AI", ResearchDepth.QUICK),
+            ("brief overview of LLMs", ResearchDepth.QUICK),
+            ("what is context engineering?", ResearchDepth.QUICK),
+            ("simple explanation of transformers", ResearchDepth.QUICK),
+        ],
+    )
+    def test_quick_keywords(self, query: str, expected: ResearchDepth):
+        assert infer_research_depth(query) == expected
+
+    @pytest.mark.parametrize(
+        "query,expected",
+        [
+            ("analyze the performance of GPT-5", ResearchDepth.DEEP),
+            ("compare different RAG strategies", ResearchDepth.DEEP),
+            ("investigate agent architectures", ResearchDepth.DEEP),
+            ("deep dive into context windows", ResearchDepth.DEEP),
+        ],
+    )
+    def test_deep_keywords(self, query: str, expected: ResearchDepth):
+        assert infer_research_depth(query) == expected
+
+    @pytest.mark.parametrize(
+        "query,expected",
+        [
+            ("comprehensive study of AI safety", ResearchDepth.EXHAUSTIVE),
+            ("thorough analysis of LLM training", ResearchDepth.EXHAUSTIVE),
+            ("academic review of attention mechanisms", ResearchDepth.EXHAUSTIVE),
+            ("literature review on context engineering", ResearchDepth.EXHAUSTIVE),
+        ],
+    )
+    def test_exhaustive_keywords(self, query: str, expected: ResearchDepth):
+        assert infer_research_depth(query) == expected
+
+    def test_default_to_standard(self):
+        assert infer_research_depth("how do agents work?") == ResearchDepth.STANDARD
+        assert (
+            infer_research_depth("explain RAG architecture") == ResearchDepth.STANDARD
+        )
+
+    def test_case_insensitive(self):
+        assert infer_research_depth("COMPREHENSIVE study") == ResearchDepth.EXHAUSTIVE
+        assert infer_research_depth("Quick Overview") == ResearchDepth.QUICK
--- a/tests/researcher/test_integration.py
+++ b/tests/researcher/test_integration.py
@@ -0,0 +1,314 @@
+"""E2E 통합 테스트 - ResearchRunner 전체 플로우 검증."""
+
+import asyncio
+import tempfile
+from pathlib import Path
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from research_agent.researcher.depth import ResearchDepth
+from research_agent.researcher.ralph_loop import Finding, ResearchSession
+from research_agent.researcher.runner import ResearchRunner
+
+
+class TestE2EResearchFlow:
+    """전체 연구 플로우 E2E 테스트."""
+
+    @pytest.fixture
+    def temp_workspace(self):
+        """임시 워크스페이스 생성."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            yield Path(tmpdir)
+
+    @pytest.fixture
+    def mock_agent_response_incomplete(self):
+        """완료되지 않은 에이전트 응답."""
+        return {
+            "messages": [
+                MagicMock(
+                    content="I found some information about the topic. "
+                    "Still need to investigate more aspects."
+                )
+            ]
+        }
+
+    @pytest.fixture
+    def mock_agent_response_complete(self):
+        """완료된 에이전트 응답."""
+        return {
+            "messages": [
+                MagicMock(
+                    content="Research is comprehensive. "
+                    "<promise>RESEARCH_COMPLETE</promise>"
+                )
+            ]
+        }
+
+    def test_runner_initialization_creates_session(self, temp_workspace):
+        """Runner 초기화 시 세션이 생성되는지 확인."""
+        with patch.object(ResearchSession, "WORKSPACE", temp_workspace):
+            runner = ResearchRunner("Test query", depth="quick")
+
+            assert runner.session is not None
+            assert runner.query == "Test query"
+            assert runner.depth == ResearchDepth.QUICK
+
+    def test_session_initialization_creates_files(self, temp_workspace):
+        """세션 초기화 시 필요한 파일들이 생성되는지 확인."""
+        with patch.object(ResearchSession, "WORKSPACE", temp_workspace):
+            runner = ResearchRunner("Test query", depth="quick")
+            runner.session.initialize()
+
+            assert runner.session.session_dir.exists()
+            assert (runner.session.session_dir / "TODO.md").exists()
+            assert (runner.session.session_dir / "FINDINGS.md").exists()
+
+            todo_content = (runner.session.session_dir / "TODO.md").read_text()
+            assert "Test query" in todo_content
+
+    def test_iteration_prompt_contains_query(self, temp_workspace):
+        """반복 프롬프트에 쿼리가 포함되는지 확인."""
+        with patch.object(ResearchSession, "WORKSPACE", temp_workspace):
+            runner = ResearchRunner("Context Engineering 분석", depth="deep")
+            prompt = runner._build_iteration_prompt(1)
+
+            assert "Context Engineering 분석" in prompt
+            assert "Iteration 1/5" in prompt
+            assert "RESEARCH_COMPLETE" in prompt
+
+    def test_completion_detection_by_promise(self, temp_workspace):
+        """promise 태그로 완료 감지."""
+        with patch.object(ResearchSession, "WORKSPACE", temp_workspace):
+            runner = ResearchRunner("Test", depth="quick")
+            result = {
+                "messages": [
+                    MagicMock(content="Done <promise>RESEARCH_COMPLETE</promise>")
+                ]
+            }
+
+            assert runner._check_completion(result) is True
+
+    def test_completion_detection_by_coverage(self, temp_workspace):
+        """coverage 기반 완료 감지."""
+        with patch.object(ResearchSession, "WORKSPACE", temp_workspace):
+            runner = ResearchRunner("Test", depth="quick")
+            runner.session.ralph_loop.state.coverage_score = 0.95
+
+            result = {"messages": [MagicMock(content="Still working...")]}
+            assert runner._check_completion(result) is True
+
+    def test_no_completion_when_incomplete(self, temp_workspace):
+        """완료 조건 미충족 시 False 반환."""
+        with patch.object(ResearchSession, "WORKSPACE", temp_workspace):
+            runner = ResearchRunner("Test", depth="deep")
+            runner.session.ralph_loop.state.coverage_score = 0.3
+            runner.session.ralph_loop.state.iteration = 1
+
+            result = {"messages": [MagicMock(content="Working on it...")]}
+            assert runner._check_completion(result) is False
+
+
+class TestE2EWithMockedAgent:
+    """Mock 에이전트를 사용한 E2E 테스트."""
+
+    @pytest.fixture
+    def temp_workspace(self):
+        """임시 워크스페이스 생성."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            yield Path(tmpdir)
+
+    def test_single_iteration_completion(self, temp_workspace):
+        """단일 반복으로 완료되는 케이스."""
+        with patch.object(ResearchSession, "WORKSPACE", temp_workspace):
+            runner = ResearchRunner("Quick test", depth="quick")
+
+            mock_agent = AsyncMock()
+            mock_agent.ainvoke.return_value = {
+                "messages": [MagicMock(content="<promise>RESEARCH_COMPLETE</promise>")]
+            }
+            runner.agent = mock_agent
+
+            async def run_test():
+                runner.session.initialize()
+
+                result = await runner._execute_iteration(1)
+                is_complete = runner._check_completion(result)
+
+                return is_complete
+
+            is_complete = asyncio.get_event_loop().run_until_complete(run_test())
+            assert is_complete is True
+
+    def test_multiple_iterations_until_completion(self, temp_workspace):
+        """여러 반복 후 완료되는 케이스."""
+        with patch.object(ResearchSession, "WORKSPACE", temp_workspace):
+            runner = ResearchRunner("Deep test", depth="deep")
+
+            call_count = 0
+
+            async def mock_invoke(*args, **kwargs):
+                nonlocal call_count
+                call_count += 1
+
+                if call_count >= 3:
+                    return {
+                        "messages": [
+                            MagicMock(content="<promise>RESEARCH_COMPLETE</promise>")
+                        ]
+                    }
+                return {"messages": [MagicMock(content="Still researching...")]}
+
+            mock_agent = AsyncMock()
+            mock_agent.ainvoke = mock_invoke
+            runner.agent = mock_agent
+
+            async def run_test():
+                runner.session.initialize()
+
+                iteration = 1
+                max_iter = 5
+
+                while iteration <= max_iter:
+                    result = await runner._execute_iteration(iteration)
+                    if runner._check_completion(result):
+                        break
+                    iteration += 1
+
+                return iteration
+
+            final_iteration = asyncio.get_event_loop().run_until_complete(run_test())
+            assert final_iteration == 3
+            assert call_count == 3
+
+
+class TestFilesystemStateChanges:
+    """파일시스템 상태 변화 검증."""
+
+    @pytest.fixture
+    def temp_workspace(self):
+        """임시 워크스페이스 생성."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            yield Path(tmpdir)
+
+    def test_findings_file_updated_on_add(self, temp_workspace):
+        """Finding 추가 시 파일이 업데이트되는지 확인."""
+        with patch.object(ResearchSession, "WORKSPACE", temp_workspace):
+            session = ResearchSession("Test query")
+            session.initialize()
+
+            finding = Finding(
+                content="Important discovery about LLMs",
+                source_url="https://example.com/article",
+                source_title="LLM Research Paper",
+                confidence=0.9,
+            )
+            session.add_finding(finding)
+
+            findings_content = (session.session_dir / "FINDINGS.md").read_text()
+            assert "Important discovery about LLMs" in findings_content
+            assert "https://example.com/article" in findings_content
+            assert "LLM Research Paper" in findings_content
+
+    def test_coverage_updates_with_findings(self, temp_workspace):
+        """Finding 추가 시 coverage가 업데이트되는지 확인."""
+        with patch.object(ResearchSession, "WORKSPACE", temp_workspace):
+            session = ResearchSession("Test query")
+            session.initialize()
+
+            initial_coverage = session.ralph_loop.state.coverage_score
+            assert initial_coverage == 0.0
+
+            for i in range(5):
+                finding = Finding(
+                    content=f"Finding {i}",
+                    source_url=f"https://example.com/{i}",
+                    source_title=f"Source {i}",
+                    confidence=0.8,
+                )
+                session.add_finding(finding)
+
+            assert session.ralph_loop.state.coverage_score > initial_coverage
+            assert session.ralph_loop.state.findings_count == 5
+
+    def test_summary_created_on_finalize(self, temp_workspace):
+        """finalize 시 SUMMARY.md가 생성되는지 확인."""
+        with patch.object(ResearchSession, "WORKSPACE", temp_workspace):
+            session = ResearchSession("Test query")
+            session.initialize()
+
+            finding = Finding(
+                content="Test finding",
+                source_url="https://example.com",
+                source_title="Test Source",
+                confidence=0.9,
+            )
+            session.add_finding(finding)
+
+            summary_path = session.finalize()
+
+            assert summary_path.exists()
+            summary_content = summary_path.read_text()
+            assert "Test query" in summary_content
+            assert "Total Findings: 1" in summary_content
+
+
+class TestCompletionConditions:
+    """완료 조건 동작 확인."""
+
+    @pytest.fixture
+    def temp_workspace(self):
+        """임시 워크스페이스 생성."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            yield Path(tmpdir)
+
+    def test_max_iterations_limit(self, temp_workspace):
+        """최대 반복 횟수 제한 동작 확인."""
+        with patch.object(ResearchSession, "WORKSPACE", temp_workspace):
+            runner = ResearchRunner("Test", depth="quick")
+
+            assert runner.config.max_ralph_iterations == 1
+
+            runner.session.ralph_loop.state.iteration = 1
+            assert runner.session.ralph_loop.is_complete() is True
+
+    def test_coverage_threshold_completion(self, temp_workspace):
+        """coverage threshold 도달 시 완료."""
+        with patch.object(ResearchSession, "WORKSPACE", temp_workspace):
+            runner = ResearchRunner("Test", depth="deep")
+
+            runner.session.ralph_loop.state.coverage_score = 0.84
+            assert runner.session.ralph_loop.is_complete() is False
+
+            runner.session.ralph_loop.state.coverage_score = 0.85
+            assert runner.session.ralph_loop.is_complete() is True
+
+    def test_iteration_increment(self, temp_workspace):
+        """반복 증가 동작 확인."""
+        with patch.object(ResearchSession, "WORKSPACE", temp_workspace):
+            session = ResearchSession("Test query")
+            session.initialize()
+
+            initial_iteration = session.ralph_loop.state.iteration
+            assert initial_iteration == 1
+
+            session.complete_iteration()
+            assert session.ralph_loop.state.iteration == 2
+
+    def test_state_file_persistence(self, temp_workspace):
+        """상태 파일 영속성 확인."""
+        state_file = temp_workspace / ".claude" / "research-ralph-loop.local.md"
+
+        with patch.object(ResearchSession, "WORKSPACE", temp_workspace):
+            with patch(
+                "research_agent.researcher.ralph_loop.ResearchRalphLoop.STATE_FILE",
+                state_file,
+            ):
+                session = ResearchSession("Test query")
+                session.initialize()
+
+                assert state_file.exists()
+
+                state_content = state_file.read_text()
+                assert "active: true" in state_content
+                assert "iteration: 1" in state_content
--- a/tests/researcher/test_ralph_loop.py
+++ b/tests/researcher/test_ralph_loop.py
@@ -0,0 +1,308 @@
+from __future__ import annotations
+
+import tempfile
+from pathlib import Path
+
+import pytest
+
+from research_agent.researcher.depth import ResearchDepth, get_depth_config
+from research_agent.researcher.ralph_loop import (
+    Finding,
+    RalphLoopState,
+    ResearchRalphLoop,
+    ResearchSession,
+    SourceQuality,
+    SourceType,
+)
+
+
+class TestRalphLoopState:
+    def test_default_values(self):
+        state = RalphLoopState()
+
+        assert state.iteration == 1
+        assert state.max_iterations == 0
+        assert state.completion_promise == "RESEARCH_COMPLETE"
+        assert state.findings_count == 0
+        assert state.coverage_score == 0.0
+
+    def test_is_max_reached_unlimited(self):
+        state = RalphLoopState(max_iterations=0, iteration=100)
+        assert state.is_max_reached() is False
+
+    def test_is_max_reached_at_limit(self):
+        state = RalphLoopState(max_iterations=5, iteration=5)
+        assert state.is_max_reached() is True
+
+    def test_is_max_reached_below_limit(self):
+        state = RalphLoopState(max_iterations=5, iteration=3)
+        assert state.is_max_reached() is False
+
+
+class TestFinding:
+    def test_creation(self):
+        finding = Finding(
+            content="Test content",
+            source_url="https://example.com",
+            source_title="Example",
+            confidence=0.9,
+        )
+
+        assert finding.content == "Test content"
+        assert finding.confidence == 0.9
+        assert finding.verified_by == []
+
+    def test_with_verification(self):
+        finding = Finding(
+            content="Test",
+            source_url="https://a.com",
+            source_title="A",
+            confidence=0.8,
+            verified_by=["https://b.com", "https://c.com"],
+        )
+
+        assert len(finding.verified_by) == 2
+
+    def test_weighted_confidence_without_quality(self):
+        finding = Finding(
+            content="Test",
+            source_url="https://a.com",
+            source_title="A",
+            confidence=0.8,
+        )
+        assert finding.weighted_confidence == 0.8
+
+    def test_weighted_confidence_with_quality(self):
+        quality = SourceQuality(
+            source_type=SourceType.ARXIV,
+            recency_score=0.8,
+            authority_score=0.9,
+            relevance_score=0.85,
+        )
+        finding = Finding(
+            content="Test",
+            source_url="https://arxiv.org/abs/1234",
+            source_title="Paper",
+            confidence=0.9,
+            quality=quality,
+        )
+        assert finding.weighted_confidence < finding.confidence
+        assert finding.weighted_confidence > 0
+
+
+class TestSourceQuality:
+    def test_overall_score_calculation(self):
+        quality = SourceQuality(
+            source_type=SourceType.ARXIV,
+            recency_score=0.8,
+            authority_score=0.9,
+            relevance_score=0.85,
+        )
+        expected = 0.8 * 0.2 + 0.9 * 0.4 + 0.85 * 0.4
+        assert abs(quality.overall_score - expected) < 0.01
+
+    def test_verification_bonus(self):
+        quality_no_verify = SourceQuality(
+            source_type=SourceType.WEB,
+            recency_score=0.5,
+            authority_score=0.5,
+            relevance_score=0.5,
+        )
+        quality_verified = SourceQuality(
+            source_type=SourceType.WEB,
+            recency_score=0.5,
+            authority_score=0.5,
+            relevance_score=0.5,
+            verification_count=3,
+        )
+        assert quality_verified.overall_score > quality_no_verify.overall_score
+
+    def test_from_source_type_arxiv(self):
+        quality = SourceQuality.from_source_type(SourceType.ARXIV)
+        assert quality.authority_score == 0.9
+
+    def test_from_source_type_web(self):
+        quality = SourceQuality.from_source_type(SourceType.WEB)
+        assert quality.authority_score == 0.5
+
+    def test_max_score_capped(self):
+        quality = SourceQuality(
+            source_type=SourceType.ARXIV,
+            recency_score=1.0,
+            authority_score=1.0,
+            relevance_score=1.0,
+            verification_count=10,
+        )
+        assert quality.overall_score <= 1.0
+
+
+class TestResearchRalphLoop:
+    @pytest.fixture
+    def temp_dir(self):
+        with tempfile.TemporaryDirectory() as td:
+            original_file = ResearchRalphLoop.STATE_FILE
+            ResearchRalphLoop.STATE_FILE = Path(td) / ".claude" / "test-state.md"
+            yield Path(td)
+            ResearchRalphLoop.STATE_FILE = original_file
+
+    def test_init_default(self, temp_dir: Path):
+        loop = ResearchRalphLoop("test query")
+
+        assert loop.query == "test query"
+        assert loop.max_iterations == 10
+        assert loop.coverage_threshold == 0.85
+
+    def test_init_with_depth_config(self, temp_dir: Path):
+        config = get_depth_config(ResearchDepth.EXHAUSTIVE)
+        loop = ResearchRalphLoop("test query", depth_config=config)
+
+        assert loop.max_iterations == 10
+        assert loop.coverage_threshold == 0.95
+        assert "docs" in loop.sources
+
+    def test_create_research_prompt(self, temp_dir: Path):
+        loop = ResearchRalphLoop("test query", max_iterations=5)
+        prompt = loop.create_research_prompt()
+
+        assert "test query" in prompt
+        assert "1/5" in prompt
+        assert "RESEARCH_COMPLETE" in prompt
+
+    def test_save_and_load_state(self, temp_dir: Path):
+        loop = ResearchRalphLoop("test query")
+        loop.state.iteration = 3
+        loop.state.findings_count = 5
+        loop.state.coverage_score = 0.6
+        loop.save_state()
+
+        assert loop.STATE_FILE.exists()
+
+        loop2 = ResearchRalphLoop("test query")
+        loaded = loop2.load_state()
+
+        assert loaded is True
+        assert loop2.state.iteration == 3
+        assert loop2.state.findings_count == 5
+        assert loop2.state.coverage_score == 0.6
+
+    def test_increment_iteration(self, temp_dir: Path):
+        loop = ResearchRalphLoop("test query")
+        loop.save_state()
+
+        assert loop.state.iteration == 1
+        loop.increment_iteration()
+        assert loop.state.iteration == 2
+
+    def test_is_complete_by_coverage(self, temp_dir: Path):
+        loop = ResearchRalphLoop("test query", coverage_threshold=0.8)
+        loop.state.coverage_score = 0.85
+
+        assert loop.is_complete() is True
+
+    def test_is_complete_by_max_iterations(self, temp_dir: Path):
+        loop = ResearchRalphLoop("test query", max_iterations=5)
+        loop.state.iteration = 5
+        loop.state.coverage_score = 0.5
+
+        assert loop.is_complete() is True
+
+    def test_cleanup(self, temp_dir: Path):
+        loop = ResearchRalphLoop("test query")
+        loop.save_state()
+        assert loop.STATE_FILE.exists()
+
+        loop.cleanup()
+        assert not loop.STATE_FILE.exists()
+
+
+class TestResearchSession:
+    @pytest.fixture
+    def temp_workspace(self):
+        with tempfile.TemporaryDirectory() as td:
+            original_workspace = ResearchSession.WORKSPACE
+            original_state_file = ResearchRalphLoop.STATE_FILE
+
+            ResearchSession.WORKSPACE = Path(td) / "research_workspace"
+            ResearchRalphLoop.STATE_FILE = Path(td) / ".claude" / "test-state.md"
+
+            yield Path(td)
+
+            ResearchSession.WORKSPACE = original_workspace
+            ResearchRalphLoop.STATE_FILE = original_state_file
+
+    def test_init(self, temp_workspace: Path):
+        session = ResearchSession("test query")
+
+        assert session.query == "test query"
+        assert session.session_id is not None
+        assert session.findings == []
+
+    def test_initialize_creates_files(self, temp_workspace: Path):
+        session = ResearchSession("test query", session_id="test123")
+        session.initialize()
+
+        assert session.session_dir.exists()
+        assert (session.session_dir / "TODO.md").exists()
+        assert (session.session_dir / "FINDINGS.md").exists()
+
+    def test_add_finding(self, temp_workspace: Path):
+        session = ResearchSession("test query", session_id="test123")
+        session.initialize()
+
+        finding = Finding(
+            content="Test finding",
+            source_url="https://example.com",
+            source_title="Example",
+            confidence=0.9,
+        )
+        session.add_finding(finding)
+
+        assert len(session.findings) == 1
+        assert session.ralph_loop.state.findings_count == 1
+        assert session.ralph_loop.state.coverage_score > 0
+
+    def test_coverage_calculation(self, temp_workspace: Path):
+        session = ResearchSession("test query", session_id="test123")
+        session.initialize()
+
+        source_types = [
+            SourceType.WEB,
+            SourceType.ARXIV,
+            SourceType.GITHUB,
+            SourceType.DOCS,
+        ]
+        for i in range(10):
+            quality = SourceQuality.from_source_type(
+                source_types[i % len(source_types)],
+                relevance_score=0.9,
+                recency_score=0.9,
+            )
+            finding = Finding(
+                content=f"Finding {i}",
+                source_url=f"https://example{i}.com",
+                source_title=f"Source {i}",
+                confidence=0.9,
+                quality=quality,
+            )
+            session.add_finding(finding)
+
+        assert session.ralph_loop.state.coverage_score > 0.7
+        assert session.ralph_loop.state.coverage_score <= 1.0
+
+    def test_complete_iteration(self, temp_workspace: Path):
+        session = ResearchSession("test query", session_id="test123")
+        session.initialize()
+
+        done = session.complete_iteration()
+        assert done is False
+        assert session.ralph_loop.state.iteration == 2
+
+    def test_finalize(self, temp_workspace: Path):
+        session = ResearchSession("test query", session_id="test123")
+        session.initialize()
+
+        summary_path = session.finalize()
+
+        assert summary_path.exists()
+        assert "SUMMARY.md" in str(summary_path)
+        assert not session.ralph_loop.STATE_FILE.exists()
--- a/tests/researcher/test_runner.py
+++ b/tests/researcher/test_runner.py
@@ -0,0 +1,169 @@
+"""ResearchRunner 테스트."""
+
+import tempfile
+from pathlib import Path
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from research_agent.researcher.depth import ResearchDepth, get_depth_config
+from research_agent.researcher.runner import ResearchRunner, run_deep_research
+
+
+class TestResearchRunner:
+    """ResearchRunner 클래스 테스트."""
+
+    def test_init_with_string_depth(self):
+        """문자열 depth로 초기화."""
+        runner = ResearchRunner("test query", depth="deep")
+        assert runner.depth == ResearchDepth.DEEP
+        assert runner.query == "test query"
+
+    def test_init_with_enum_depth(self):
+        """ResearchDepth enum으로 초기화."""
+        runner = ResearchRunner("test query", depth=ResearchDepth.EXHAUSTIVE)
+        assert runner.depth == ResearchDepth.EXHAUSTIVE
+
+    def test_config_loaded(self):
+        """DepthConfig가 올바르게 로드되는지 확인."""
+        runner = ResearchRunner("test query", depth="deep")
+        expected_config = get_depth_config(ResearchDepth.DEEP)
+        assert (
+            runner.config.max_ralph_iterations == expected_config.max_ralph_iterations
+        )
+        assert runner.config.coverage_threshold == expected_config.coverage_threshold
+
+    def test_session_initialized(self):
+        """ResearchSession이 생성되는지 확인."""
+        runner = ResearchRunner("test query", depth="standard")
+        assert runner.session is not None
+        assert runner.session.query == "test query"
+
+    def test_build_iteration_prompt(self):
+        """반복 프롬프트 생성."""
+        runner = ResearchRunner("Context Engineering 분석", depth="deep")
+        prompt = runner._build_iteration_prompt(1)
+
+        assert "Context Engineering 분석" in prompt
+        assert "Iteration 1/" in prompt
+        assert "RESEARCH_COMPLETE" in prompt
+        assert str(runner.config.coverage_threshold) in prompt or "85%" in prompt
+
+    def test_build_iteration_prompt_unlimited(self):
+        """무제한 반복 프롬프트."""
+        with patch.object(
+            ResearchRunner,
+            "__init__",
+            lambda self, *args, **kwargs: None,
+        ):
+            runner = ResearchRunner.__new__(ResearchRunner)
+            runner.query = "test"
+            runner.config = MagicMock()
+            runner.config.max_ralph_iterations = 0  # unlimited
+            runner.config.coverage_threshold = 0.85
+            runner.session = MagicMock()
+            runner.session.session_id = "test123"
+            runner.session.ralph_loop = MagicMock()
+            runner.session.ralph_loop.state = MagicMock()
+            runner.session.ralph_loop.state.findings_count = 0
+            runner.session.ralph_loop.state.coverage_score = 0.0
+
+            prompt = runner._build_iteration_prompt(5)
+            # unlimited일 때는 iteration만 표시
+            assert "Iteration 5" in prompt
+
+
+class TestCheckCompletion:
+    """완료 체크 로직 테스트."""
+
+    def setup_method(self):
+        """테스트 설정."""
+        self.runner = ResearchRunner("test", depth="quick")
+
+    def test_completion_by_promise_tag(self):
+        """promise 태그로 완료 감지."""
+        result = {
+            "messages": [
+                MagicMock(content="Research done <promise>RESEARCH_COMPLETE</promise>")
+            ]
+        }
+        assert self.runner._check_completion(result) is True
+
+    def test_completion_by_keyword(self):
+        """RESEARCH_COMPLETE 키워드로 완료 감지."""
+        result = {"messages": [MagicMock(content="RESEARCH_COMPLETE - all done")]}
+        assert self.runner._check_completion(result) is True
+
+    def test_not_complete(self):
+        """완료되지 않은 경우."""
+        self.runner = ResearchRunner("test", depth="deep")
+        result = {"messages": [MagicMock(content="Still working on it...")]}
+        self.runner.session.ralph_loop.state.coverage_score = 0.5
+        self.runner.session.ralph_loop.state.iteration = 1
+        assert self.runner._check_completion(result) is False
+
+    def test_completion_by_coverage(self):
+        """coverage 기반 완료."""
+        result = {"messages": [MagicMock(content="Working...")]}
+        # coverage가 threshold 이상이면 완료
+        self.runner.session.ralph_loop.state.coverage_score = 0.90
+        assert self.runner._check_completion(result) is True
+
+
+class TestRunDeepResearchFunction:
+    """run_deep_research 함수 테스트."""
+
+    def test_function_is_async(self):
+        """run_deep_research가 async 함수인지 확인."""
+        import asyncio
+        import inspect
+
+        assert inspect.iscoroutinefunction(run_deep_research)
+
+    def test_function_signature(self):
+        """함수 시그니처 확인."""
+        import inspect
+
+        sig = inspect.signature(run_deep_research)
+        params = list(sig.parameters.keys())
+        assert "query" in params
+        assert "depth" in params
+        assert "model" in params
+
+
+class TestCLIIntegration:
+    """CLI 통합 테스트."""
+
+    def test_module_can_be_run(self):
+        """모듈이 실행 가능한지 확인."""
+        from research_agent.researcher import runner
+
+        assert hasattr(runner, "main")
+        assert callable(runner.main)
+
+    def test_argparse_setup(self):
+        """argparse가 올바르게 설정되었는지 확인."""
+        import argparse
+        from research_agent.researcher.runner import main
+
+        # main 함수가 argparse를 사용하는지 간접 확인
+        # (실제 실행은 하지 않음)
+        assert callable(main)
+
+
+class TestSessionWorkspace:
+    """세션 워크스페이스 테스트."""
+
+    def test_session_dir_created(self):
+        """세션 디렉토리가 생성되는지 확인."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            with patch(
+                "research_agent.researcher.ralph_loop.ResearchSession.WORKSPACE",
+                Path(tmpdir),
+            ):
+                runner = ResearchRunner("test query", depth="quick")
+                runner.session.initialize()
+
+                assert runner.session.session_dir.exists()
+                assert (runner.session.session_dir / "TODO.md").exists()
+                assert (runner.session.session_dir / "FINDINGS.md").exists()
--- a/tests/researcher/test_tools.py
+++ b/tests/researcher/test_tools.py
@@ -0,0 +1,115 @@
+"""연구 도구 테스트 - 실제 API 호출 사용."""
+
+import pytest
+
+from research_agent.tools import (
+    comprehensive_search,
+    github_code_search,
+    library_docs_search,
+)
+
+
+class TestGitHubCodeSearch:
+    """github_code_search 도구 테스트."""
+
+    def test_tool_exists(self):
+        """도구가 존재하는지 확인."""
+        assert github_code_search is not None
+        assert callable(github_code_search.invoke)
+
+    def test_tool_has_description(self):
+        """도구 설명이 있는지 확인."""
+        assert github_code_search.description is not None
+        assert "GitHub" in github_code_search.description
+
+    def test_successful_search(self):
+        """성공적인 검색 테스트 - 실제 API 호출."""
+        result = github_code_search.invoke({"query": "useState(", "max_results": 3})
+
+        assert "useState" in result
+        # 실제 결과에는 repo 정보가 포함됨
+        assert "github.com" in result or "No GitHub code found" not in result
+
+    def test_no_results(self):
+        """No results test - 실제 API 호출."""
+        result = github_code_search.invoke(
+            {"query": "xyznonexistent_pattern_abc123_impossible"}
+        )
+
+        assert "No GitHub code found" in result
+
+    def test_language_filter(self):
+        """언어 필터 테스트 - 실제 API 호출."""
+        result = github_code_search.invoke(
+            {"query": "def test_", "language": ["python"], "max_results": 3}
+        )
+
+        # Python 파일 결과가 있거나 필터로 인해 결과 없음
+        assert "python" in result.lower() or "No GitHub code found" in result
+
+
+class TestLibraryDocsSearch:
+    """library_docs_search 도구 테스트."""
+
+    def test_tool_exists(self):
+        """도구가 존재하는지 확인."""
+        assert library_docs_search is not None
+        assert callable(library_docs_search.invoke)
+
+    def test_tool_has_description(self):
+        """도구 설명이 있는지 확인."""
+        assert library_docs_search.description is not None
+        assert (
+            "라이브러리" in library_docs_search.description
+            or "library" in library_docs_search.description.lower()
+        )
+
+    def test_successful_search(self):
+        """성공적인 검색 테스트 - 실제 API 호출."""
+        result = library_docs_search.invoke(
+            {"library_name": "langchain", "query": "how to use agents"}
+        )
+
+        # 성공하면 LangChain 관련 내용, 실패하면 에러 메시지
+        assert "langchain" in result.lower() or "error" in result.lower()
+
+    def test_library_not_found(self):
+        """Library not found case - 실제 API 호출."""
+        result = library_docs_search.invoke(
+            {"library_name": "xyznonexistent_lib_abc123", "query": "test"}
+        )
+
+        assert "not found" in result.lower() or "error" in result.lower()
+
+
+class TestComprehensiveSearchWithGitHub:
+    """comprehensive_search의 GitHub 통합 테스트."""
+
+    def test_includes_github_source(self):
+        """GitHub 소스가 포함되는지 확인 - 실제 API 호출."""
+        result = comprehensive_search.invoke(
+            {"query": "useState(", "sources": ["github"], "max_results_per_source": 2}
+        )
+
+        assert "GitHub" in result
+
+
+class TestComprehensiveSearchWithDocs:
+    """comprehensive_search의 docs 통합 테스트."""
+
+    def test_includes_docs_source(self):
+        """docs 소스가 포함되는지 확인 - 실제 API 호출."""
+        result = comprehensive_search.invoke(
+            {
+                "query": "how to create agents",
+                "sources": ["docs"],
+                "library_name": "langchain",
+                "max_results_per_source": 2,
+            }
+        )
+
+        assert (
+            "공식 문서" in result
+            or "Documentation" in result
+            or "docs" in result.lower()
+        )