diff --git a/.claude/research-ralph-loop.local.md b/.claude/research-ralph-loop.local.md
deleted file mode 100644
index 8505620..0000000
--- a/.claude/research-ralph-loop.local.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-active: true
-iteration: 1
-max_iterations: 1
-completion_promise: "RESEARCH_COMPLETE"
-started_at: "2026-01-12T06:46:24.266812+00:00"
-findings_count: 0
-coverage_score: 0.0
----
-
-## Research Iteration 1/1
-
-### Original Query
-test query
-
-### Previous Work
-Check `research_workspace/` for previous findings.
-Read TODO.md for tracked progress.
-
-### Instructions
-1. Review existing findings
-2. Identify knowledge gaps
-3. Conduct targeted searches using: web
-4. Update research files with new findings
-5. Update TODO.md with progress
-
-### Completion Criteria
-Output `<promise>RESEARCH_COMPLETE</promise>` ONLY when:
-- Coverage score >= 0.5 (current: 0.00)
-- All major aspects addressed
-- Findings cross-validated with 2+ sources
-- DO NOT lie to exit
-
-### Current Stats
-- Iteration: 1
-- Findings: 0
-- Coverage: 0.00%
-
diff --git a/Context_Engineering_Research.ipynb b/Context_Engineering_Research.ipynb
index 8951e73..1c39068 100644
--- a/Context_Engineering_Research.ipynb
+++ b/Context_Engineering_Research.ipynb
@@ -107,10 +107,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "offloading_demo",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "토큰 임계값: 20,000\n",
+      "축출 경로: /large_tool_results\n",
+      "미리보기 줄 수: 10\n"
+     ]
+    }
+   ],
    "source": [
     "from context_engineering_research_agent.context_strategies.offloading import (\n",
     "    ContextOffloadingStrategy,\n",
@@ -130,10 +140,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "id": "offloading_test",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "짧은 콘텐츠: 600 자 → 축출 대상: False\n",
+      "대용량 콘텐츠: 210,000 자 → 축출 대상: True\n"
+     ]
+    }
+   ],
    "source": [
     "strategy = ContextOffloadingStrategy(config=config)\n",
     "\n",
@@ -167,10 +186,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "id": "reduction_demo",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "임계값: 85.0%\n",
+      "컨텍스트 윈도우: 200,000 토큰\n",
+      "Compaction 대상 나이: 10 메시지\n",
+      "최소 유지 메시지: 5\n"
+     ]
+    }
+   ],
    "source": [
     "from context_engineering_research_agent.context_strategies.reduction import (\n",
     "    ContextReductionStrategy,\n",
@@ -192,10 +222,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "id": "reduction_test",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "컨텍스트 사용률: 25.0%\n",
+      "축소 필요: False\n"
+     ]
+    }
+   ],
    "source": [
     "from langchain_core.messages import AIMessage, HumanMessage\n",
     "\n",
@@ -234,10 +273,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "id": "retrieval_demo",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "기본 읽기 제한: 500 줄\n",
+      "grep 최대 결과: 100\n",
+      "glob 최대 결과: 100\n",
+      "줄 길이 제한: 2000 자\n"
+     ]
+    }
+   ],
    "source": [
     "from context_engineering_research_agent.context_strategies.retrieval import (\n",
     "    ContextRetrievalStrategy,\n",
@@ -283,10 +333,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "id": "isolation_demo",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "기본 모델: gpt-4.1\n",
+      "범용 에이전트 포함: True\n",
+      "제외 상태 키: ('messages', 'todos', 'structured_response')\n"
+     ]
+    }
+   ],
    "source": [
     "from context_engineering_research_agent.context_strategies.isolation import (\n",
     "    ContextIsolationStrategy,\n",
@@ -327,10 +387,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "id": "caching_demo",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "최소 캐싱 토큰: 1,024\n",
+      "캐시 컨트롤 타입: ephemeral\n",
+      "시스템 프롬프트 캐싱: True\n",
+      "도구 캐싱: True\n"
+     ]
+    }
+   ],
    "source": [
     "from context_engineering_research_agent.context_strategies.caching import (\n",
     "    ContextCachingStrategy,\n",
@@ -352,10 +423,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "id": "caching_test",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "짧은 콘텐츠: 11 자 → 캐싱 대상: False\n",
+      "긴 콘텐츠: 5,500 자 → 캐싱 대상: True\n"
+     ]
+    }
+   ],
    "source": [
     "strategy = ContextCachingStrategy(config=config)\n",
     "\n",
@@ -380,15 +460,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "id": "agent_create",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "에이전트 타입: CompiledStateGraph\n"
+     ]
+    }
+   ],
    "source": [
     "from context_engineering_research_agent import create_context_aware_agent\n",
     "\n",
     "agent = create_context_aware_agent(\n",
-    "    model_name=\"gpt-4.1\",\n",
+    "    model=\"gpt-4.1\",\n",
     "    enable_offloading=True,\n",
     "    enable_reduction=True,\n",
     "    enable_caching=True,\n",
@@ -431,26 +519,6 @@
     "| 8 | Poisoning | 검증되지 않은 사실이 메모리를 오염 | 출처 태깅 / 검증 게이트 / 격리 |\n"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "comparison_setup",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from context_engineering_research_agent.context_strategies.offloading import (\n",
-    "    ContextOffloadingStrategy, OffloadingConfig\n",
-    ")\n",
-    "from context_engineering_research_agent.context_strategies.reduction import (\n",
-    "    ContextReductionStrategy, ReductionConfig\n",
-    ")\n",
-    "from langchain_core.messages import AIMessage, HumanMessage, ToolMessage\n",
-    "\n",
-    "print(\"=\" * 60)\n",
-    "print(\"전략 비교를 위한 테스트 데이터 생성\")\n",
-    "print(\"=\" * 60)"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "exp1_offloading",
@@ -463,10 +531,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "id": "exp1_code",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "작은 결과 크기: 23 자\n",
+      "대용량 결과 크기: 305,889 자\n",
+      "\n",
+      "[Offloading 비활성화 시]\n",
+      "  작은 결과 축출: False\n",
+      "  대용량 결과 축출: False\n",
+      "  → 대용량 결과가 컨텍스트에 그대로 포함됨\n",
+      "\n",
+      "[Offloading 활성화 시]\n",
+      "  작은 결과 축출: False\n",
+      "  대용량 결과 축출: True\n",
+      "  → 대용량 결과는 파일로 저장, 미리보기만 컨텍스트에 포함\n",
+      "\n",
+      "미리보기 크기: 6,159 자 (원본의 2.0%)\n"
+     ]
+    }
+   ],
    "source": [
     "small_result = \"검색 결과: 항목 1, 항목 2, 항목 3\"\n",
     "large_result = \"\\n\".join([f\"검색 결과 {i}: \" + \"상세 내용 \" * 100 for i in range(500)])\n",
@@ -510,10 +599,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
    "id": "exp2_code",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[Reduction 비활성화 시]\n",
+      "  메시지 수: 85\n",
+      "  추정 토큰: 2,972\n",
+      "  → 모든 도구 호출/결과가 컨텍스트에 유지됨\n",
+      "\n",
+      "[Reduction 활성화 시 - Compaction]\n",
+      "  메시지 수: 85 → 60\n",
+      "  추정 토큰: 2,972 → 2,350\n",
+      "  절약된 토큰: 622 (20.9%)\n",
+      "  → 오래된 도구 호출/결과가 제거되어 컨텍스트 효율화\n"
+     ]
+    }
+   ],
    "source": [
     "messages_with_tools = []\n",
     "for i in range(30):\n",
@@ -559,10 +665,42 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
    "id": "exp3_code",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "============================================================\n",
+      "시나리오: 복잡한 연구 작업 수행\n",
+      "============================================================\n",
+      "\n",
+      "[시나리오 설정]\n",
+      "  대화 턴 수: 50\n",
+      "  도구 호출 수: 40\n",
+      "  대용량 결과 수: 5\n",
+      "  평균 결과 크기: 100k 자\n",
+      "\n",
+      "[모든 전략 비활성화 시]\n",
+      "  예상 컨텍스트 크기: 537,000 자 (~134,250 토큰)\n",
+      "  문제: 컨텍스트 윈도우 초과 가능성 높음\n",
+      "\n",
+      "[Offloading만 활성화 시]\n",
+      "  예상 컨텍스트 크기: 42,000 자 (~10,500 토큰)\n",
+      "  절약: 495,000 자 (92.2%)\n",
+      "\n",
+      "[Offloading + Reduction 활성화 시]\n",
+      "  예상 컨텍스트 크기: 25,200 자 (~6,300 토큰)\n",
+      "  총 절약: 511,800 자 (95.3%)\n",
+      "\n",
+      "[+ Caching 활성화 시 추가 효과]\n",
+      "  시스템 프롬프트 캐싱으로 반복 호출 비용 90% 절감\n",
+      "  응답 속도 향상\n"
+     ]
+    }
+   ],
    "source": [
     "print(\"=\" * 60)\n",
     "print(\"시나리오: 복잡한 연구 작업 수행\")\n",
@@ -616,10 +754,46 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 17,
    "id": "exp4_code",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "에이전트 생성 비교\n",
+      "============================================================\n",
+      "\n",
+      "[기본 (모두 비활성화)]\n",
+      "  Offloading: ❌\n",
+      "  Reduction:  ❌\n",
+      "  Caching:    ❌\n",
+      "  에이전트 타입: CompiledStateGraph\n",
+      "\n",
+      "[Offloading만]\n",
+      "  Offloading: ✅\n",
+      "  Reduction:  ❌\n",
+      "  Caching:    ❌\n",
+      "  에이전트 타입: CompiledStateGraph\n",
+      "\n",
+      "[Reduction만]\n",
+      "  Offloading: ❌\n",
+      "  Reduction:  ✅\n",
+      "  Caching:    ❌\n",
+      "  에이전트 타입: CompiledStateGraph\n",
+      "\n",
+      "[모두 활성화]\n",
+      "  Offloading: ✅\n",
+      "  Reduction:  ✅\n",
+      "  Caching:    ✅\n",
+      "  에이전트 타입: CompiledStateGraph\n",
+      "\n",
+      "============================================================\n",
+      "모든 에이전트가 성공적으로 생성되었습니다.\n"
+     ]
+    }
+   ],
    "source": [
     "from context_engineering_research_agent import create_context_aware_agent\n",
     "\n",
@@ -635,7 +809,7 @@
     "\n",
     "for cfg in configs:\n",
     "    agent = create_context_aware_agent(\n",
-    "        model_name=\"gpt-4.1\",\n",
+    "        model=\"gpt-4.1\",\n",
     "        enable_offloading=cfg[\"offloading\"],\n",
     "        enable_reduction=cfg[\"reduction\"],\n",
     "        enable_caching=cfg[\"caching\"],\n",
@@ -650,751 +824,6 @@
     "print(\"모든 에이전트가 성공적으로 생성되었습니다.\")"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "exp5_8_real_intro",
-   "metadata": {},
-   "source": [
-    "---\n",
-    "\n",
-    "## 실험 5~8: 실패 모드 실험 (실제 실행 + 로그 기반)\n",
-    "\n",
-    "이 섹션은 앞선 “순수 파이썬 시뮬레이션”을 넘어서,\n",
-    "실제로 `langchain.agents.create_agent` + **Middleware**를 조합해 실행하면서\n",
-    "메시지/툴콜/툴결과 로그를 확인합니다.\n",
-    "\n",
-    "참고(공식 built-in middleware): https://docs.langchain.com/oss/python/langchain/middleware/built-in\n",
-    "\n",
-    "- Tool selection: `LLMToolSelectorMiddleware`\n",
-    "- Tool call limiting: `ToolCallLimitMiddleware`\n",
-    "\n",
-    "또한 deepagents에서 제공하는 **FilesystemMiddleware**(파일 툴 스택)를 함께 사용합니다.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "id": "mw_real_helpers",
-   "metadata": {},
-   "execution_count": null,
-   "outputs": [],
-   "source": [
-    "from __future__ import annotations\n",
-    "\n",
-    "import json\n",
-    "import uuid\n",
-    "from collections.abc import Callable\n",
-    "from dataclasses import dataclass\n",
-    "from typing import Any\n",
-    "\n",
-    "from deepagents.backends import StateBackend\n",
-    "from deepagents.backends.utils import create_file_data\n",
-    "from deepagents.middleware.filesystem import FilesystemMiddleware\n",
-    "from langchain.agents import create_agent\n",
-    "from langchain.agents.middleware import LLMToolSelectorMiddleware, ToolCallLimitMiddleware\n",
-    "from langchain.agents.middleware.types import AgentMiddleware\n",
-    "from langchain_core.language_models import BaseChatModel\n",
-    "from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage, ToolMessage\n",
-    "from langchain_core.outputs import ChatGeneration, ChatResult\n",
-    "from langchain_core.runnables import RunnableLambda\n",
-    "from langchain_core.tools import tool\n",
-    "from langgraph.types import Overwrite\n",
-    "\n",
-    "\n",
-    "def _extract_valid_tool_names_from_schema(schema: dict[str, Any]) -> list[str]:\n",
-    "    \"\"\"Extract tool-name enum from the JSON schema used by LLMToolSelectorMiddleware.\"\"\"\n",
-    "    # The schema is produced from a Literal enum of tool names.\n",
-    "    # We look for any 'enum' list nested in the schema.\n",
-    "    enums: list[str] = []\n",
-    "\n",
-    "    def walk(node: Any) -> None:\n",
-    "        if isinstance(node, dict):\n",
-    "            if 'enum' in node and isinstance(node['enum'], list):\n",
-    "                for v in node['enum']:\n",
-    "                    if isinstance(v, str):\n",
-    "                        enums.append(v)\n",
-    "            for v in node.values():\n",
-    "                walk(v)\n",
-    "        elif isinstance(node, list):\n",
-    "            for v in node:\n",
-    "                walk(v)\n",
-    "\n",
-    "    walk(schema)\n",
-    "    # Deduplicate while preserving order\n",
-    "    seen: set[str] = set()\n",
-    "    out: list[str] = []\n",
-    "    for name in enums:\n",
-    "        if name not in seen:\n",
-    "            seen.add(name)\n",
-    "            out.append(name)\n",
-    "    return out\n",
-    "\n",
-    "\n",
-    "class DeterministicStructuredSelectorModel(BaseChatModel):\n",
-    "    \"\"\"Offline tool-selection model compatible with `with_structured_output(schema)`.\n",
-    "\n",
-    "    This is used to drive LangChain's `LLMToolSelectorMiddleware` without API keys.\n",
-    "    \"\"\"\n",
-    "\n",
-    "    def __init__(self, selector: Callable[[str, list[str]], list[str]]):\n",
-    "        super().__init__()\n",
-    "        self._selector = selector\n",
-    "\n",
-    "    @property\n",
-    "    def _llm_type(self) -> str:\n",
-    "        return 'deterministic-structured-selector'\n",
-    "\n",
-    "    @property\n",
-    "    def _identifying_params(self) -> dict[str, Any]:\n",
-    "        return {}\n",
-    "\n",
-    "    def _generate(\n",
-    "        self,\n",
-    "        messages: list[BaseMessage],\n",
-    "        stop: list[str] | None = None,\n",
-    "        run_manager=None,\n",
-    "        **kwargs: Any,\n",
-    "    ) -> ChatResult:\n",
-    "        # Not used by the tool selector middleware (it calls with_structured_output).\n",
-    "        _ = (messages, stop, run_manager, kwargs)\n",
-    "        return ChatResult(generations=[ChatGeneration(message=AIMessage(content='{}'))])\n",
-    "\n",
-    "    def with_structured_output(self, schema: dict[str, Any], **kwargs: Any):  # type: ignore[override]\n",
-    "        _ = kwargs\n",
-    "        valid = _extract_valid_tool_names_from_schema(schema)\n",
-    "\n",
-    "        def _invoke(msgs: list[Any]) -> dict[str, Any]:\n",
-    "            # msgs contains a system dict + last HumanMessage.\n",
-    "            last_user = ''\n",
-    "            for m in reversed(msgs):\n",
-    "                if isinstance(m, HumanMessage):\n",
-    "                    last_user = m.content\n",
-    "                    break\n",
-    "            selected = self._selector(last_user, valid)\n",
-    "            return {'tools': selected}\n",
-    "\n",
-    "        return RunnableLambda(_invoke)\n",
-    "\n",
-    "\n",
-    "class HeuristicToolCallingModel(BaseChatModel):\n",
-    "    \"\"\"Offline tool-calling model that reacts to the *currently available tools*.\n",
-    "\n",
-    "    This makes the effect of tool-selection middleware observable without external LLM calls.\n",
-    "    \"\"\"\n",
-    "\n",
-    "    def __init__(self, *, confusion_threshold: int = 10):\n",
-    "        super().__init__()\n",
-    "        self._bound_tool_names: list[str] = []\n",
-    "        self._confusion_threshold = confusion_threshold\n",
-    "\n",
-    "    @property\n",
-    "    def _llm_type(self) -> str:\n",
-    "        return 'heuristic-tool-calling'\n",
-    "\n",
-    "    @property\n",
-    "    def _identifying_params(self) -> dict[str, Any]:\n",
-    "        return {'confusion_threshold': self._confusion_threshold}\n",
-    "\n",
-    "    def bind_tools(self, tools: list[Any], **kwargs: Any):  # noqa: ANN401\n",
-    "        _ = kwargs\n",
-    "        # Tools may include dict tool specs; filter those out.\n",
-    "        self._bound_tool_names = [t.name for t in tools if hasattr(t, 'name')]\n",
-    "        return self\n",
-    "\n",
-    "    def _generate(\n",
-    "        self,\n",
-    "        messages: list[BaseMessage],\n",
-    "        stop: list[str] | None = None,\n",
-    "        run_manager=None,\n",
-    "        **kwargs: Any,\n",
-    "    ) -> ChatResult:\n",
-    "        _ = (stop, run_manager, kwargs)\n",
-    "\n",
-    "        # If the last message is a tool output, produce a final response.\n",
-    "        if messages and isinstance(messages[-1], ToolMessage):\n",
-    "            tool_msg = messages[-1]\n",
-    "            return ChatResult(\n",
-    "                generations=[\n",
-    "                    ChatGeneration(\n",
-    "                        message=AIMessage(\n",
-    "                            content=(\n",
-    "                                f\"[final] saw tool={tool_msg.name} status={tool_msg.status}\\n\"\n",
-    "                                f\"{tool_msg.content}\".strip()\n",
-    "                            )\n",
-    "                        )\n",
-    "                    )\n",
-    "                ]\n",
-    "            )\n",
-    "\n",
-    "        # Find the last user message.\n",
-    "        user_text = ''\n",
-    "        for m in reversed(messages):\n",
-    "            if isinstance(m, HumanMessage):\n",
-    "                user_text = m.content\n",
-    "                break\n",
-    "\n",
-    "        tool_count = len(self._bound_tool_names)\n",
-    "\n",
-    "        # Confusion heuristic: if too many tools, prefer (wrong) web_search.\n",
-    "        if tool_count >= self._confusion_threshold and 'web_search' in self._bound_tool_names:\n",
-    "            chosen = 'web_search'\n",
-    "            args = {'query': user_text}\n",
-    "        else:\n",
-    "            # Prefer filesystem listing if available.\n",
-    "            chosen = 'ls' if 'ls' in self._bound_tool_names else (self._bound_tool_names[0] if self._bound_tool_names else 'ls')\n",
-    "            args = {'path': '/project'}\n",
-    "\n",
-    "        tool_call_id = f\"call_{uuid.uuid4().hex[:8]}\"\n",
-    "        msg = AIMessage(\n",
-    "            content=f\"[debug] tool_count={tool_count} chosen={chosen}\",\n",
-    "            tool_calls=[{'id': tool_call_id, 'name': chosen, 'args': args, 'type': 'tool_call'}],\n",
-    "        )\n",
-    "        return ChatResult(generations=[ChatGeneration(message=msg)])\n",
-    "\n",
-    "\n",
-    "def _print_messages(messages: list[BaseMessage]) -> None:\n",
-    "    for i, m in enumerate(messages):\n",
-    "        if isinstance(m, HumanMessage):\n",
-    "            print(f\"{i:02d} HUMAN: {m.content}\")\n",
-    "        elif isinstance(m, SystemMessage):\n",
-    "            print(f\"{i:02d} SYSTEM: {m.content}\")\n",
-    "        elif isinstance(m, AIMessage):\n",
-    "            tool_calls = getattr(m, 'tool_calls', None) or []\n",
-    "            print(f\"{i:02d} AI: {m.content}\")\n",
-    "            for tc in tool_calls:\n",
-    "                print(f\"     tool_call: name={tc.get('name')} id={tc.get('id')} args={tc.get('args')}\")\n",
-    "        elif isinstance(m, ToolMessage):\n",
-    "            print(f\"{i:02d} TOOL: name={m.name} status={m.status} id={m.tool_call_id}\")\n",
-    "            print(\"     content:\", str(m.content)[:200])\n",
-    "        else:\n",
-    "            print(f\"{i:02d} {type(m).__name__}: {getattr(m, 'content', '')}\")\n",
-    "\n",
-    "\n",
-    "def _sample_files() -> dict[str, dict[str, Any]]:\n",
-    "    return {\n",
-    "        '/project/README.md': create_file_data(\"\"\"# Demo\n",
-    "This is a demo file.\"\"\"),\n",
-    "        '/project/src/main.py': create_file_data(\"\"\"print(\\\"hello\\\")\n",
-    "\"\"\"),\n",
-    "        '/project/src/utils.py': create_file_data(\"\"\"def add(a, b):\n",
-    "    return a + b\n",
-    "\"\"\"),\n",
-    "    }\n",
-    "\n",
-    "\n",
-    "\n",
-    "def _make_agent(*, model: BaseChatModel, tools: list[Any], middleware: list[AgentMiddleware]):\n",
-    "    # Use StateBackend so FilesystemMiddleware can operate on in-memory `files`.\n",
-    "    backend = lambda rt: StateBackend(rt)  # noqa: E731\n",
-    "    mw = [FilesystemMiddleware(backend=backend), *middleware]\n",
-    "    return create_agent(model=model, tools=tools, middleware=mw)\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "exp5_confusion",
-   "metadata": {},
-   "source": [
-    "### 실험 5: Context Confusion (도구 과다/유사 도구)\n",
-    "\n",
-    "도구가 많고 설명이 유사해질수록(특히 파일/검색류처럼 겹치는 기능이 많을수록) “올바른 도구 선택”이 흔들릴 수 있습니다.\n",
-    "\n",
-    "이 실험은 **도구 설명 기반의 단순 스코어링(lexical overlap)**으로 도구를 고르는 상황을 가정해:\n",
-    "\n",
-    "- 도구가 적을 때 vs 많을 때\n",
-    "- 유사한 도구가 많은 경우\n",
-    "\n",
-    "선택이 얼마나 불안정해지는지(상위 후보 점수 차이가 거의 없어지는지) 보여주고,\n",
-    "완화책으로 **도구 로딩 제한**과 **계층적 액션 스페이스(카테고리→도구)**를 시뮬레이션합니다.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "id": "exp5_confusion_code",
-   "metadata": {},
-   "execution_count": null,
-   "outputs": [],
-   "source": [
-    "from __future__ import annotations\n",
-    "\n",
-    "from dataclasses import dataclass\n",
-    "\n",
-    "\n",
-    "@dataclass(frozen=True)\n",
-    "class ToyTool:\n",
-    "    name: str\n",
-    "    description: str\n",
-    "    category: str\n",
-    "\n",
-    "\n",
-    "def score_tool(query: str, tool: ToyTool) -> int:\n",
-    "    q = set(query.lower().split())\n",
-    "    d = set(tool.description.lower().split())\n",
-    "    # 아주 단순한 overlap 점수(결정론적)\n",
-    "    return len(q & d)\n",
-    "\n",
-    "\n",
-    "def rank_tools(query: str, tools: list[ToyTool]) -> list[tuple[int, ToyTool]]:\n",
-    "    ranked = [(score_tool(query, t), t) for t in tools]\n",
-    "    ranked.sort(key=lambda x: (x[0], x[1].name), reverse=True)\n",
-    "    return ranked\n",
-    "\n",
-    "\n",
-    "def show_top(query: str, tools: list[ToyTool], top_k: int = 8) -> None:\n",
-    "    ranked = rank_tools(query, tools)\n",
-    "    print(f\"Query: {query}\")\n",
-    "    print(\"Top candidates:\")\n",
-    "    for score, tool in ranked[:top_k]:\n",
-    "        print(f\"  - {tool.name:18} score={score:2}  ({tool.category})\")\n",
-    "    top_scores = [s for s, _ in ranked[:top_k]]\n",
-    "    gap = (top_scores[0] - top_scores[1]) if len(top_scores) > 1 else 0\n",
-    "    print(f\"Top-1 vs Top-2 score gap: {gap}\")\n",
-    "    print()\n",
-    "\n",
-    "\n",
-    "query = \"list files in directory and show file names\"\n",
-    "\n",
-    "small_toolset = [\n",
-    "    ToyTool(\"ls\", \"list files in a directory\", \"filesystem\"),\n",
-    "    ToyTool(\"read_file\", \"read a file from the filesystem\", \"filesystem\"),\n",
-    "    ToyTool(\"web_search\", \"search the web for information\", \"web\"),\n",
-    "]\n",
-    "\n",
-    "large_similar_toolset = [\n",
-    "    ToyTool(\"ls\", \"list files in a directory\", \"filesystem\"),\n",
-    "    ToyTool(\"list_files\", \"list files in a directory and show file names\", \"filesystem\"),\n",
-    "    ToyTool(\"list_dir\", \"list directory files and file names\", \"filesystem\"),\n",
-    "    ToyTool(\"dir\", \"show directory listing and files\", \"filesystem\"),\n",
-    "    ToyTool(\"glob\", \"find files matching a pattern\", \"filesystem\"),\n",
-    "    ToyTool(\"grep\", \"search for a pattern in files\", \"filesystem\"),\n",
-    "    ToyTool(\"read_file\", \"read a file from the filesystem\", \"filesystem\"),\n",
-    "    ToyTool(\"cat\", \"print file content\", \"filesystem\"),\n",
-    "    ToyTool(\"web_search\", \"search the web for information\", \"web\"),\n",
-    "    ToyTool(\"fetch_url\", \"fetch a url and convert html to markdown\", \"web\"),\n",
-    "]\n",
-    "\n",
-    "print(\"=\" * 60)\n",
-    "print(\"[A] 도구가 적을 때\")\n",
-    "print(\"=\" * 60)\n",
-    "show_top(query, small_toolset)\n",
-    "\n",
-    "print(\"=\" * 60)\n",
-    "print(\"[B] 유사 도구가 많을 때(Confusion 유발)\")\n",
-    "print(\"=\" * 60)\n",
-    "show_top(query, large_similar_toolset)\n",
-    "\n",
-    "print(\"=\" * 60)\n",
-    "print(\"[C] 완화책 1: 도구 로딩 제한(카테고리 필터링)\")\n",
-    "print(\"=\" * 60)\n",
-    "filesystem_only = [t for t in large_similar_toolset if t.category == \"filesystem\"]\n",
-    "show_top(query, filesystem_only)\n",
-    "\n",
-    "print(\"=\" * 60)\n",
-    "print(\"[D] 완화책 2: 계층적 액션 스페이스(카테고리→도구)\")\n",
-    "print(\"=\" * 60)\n",
-    "chosen_category = \"filesystem\" if (\"file\" in query.lower() or \"directory\" in query.lower()) else \"web\"\n",
-    "print(f\"Chosen category: {chosen_category}\")\n",
-    "ranked = rank_tools(query, [t for t in large_similar_toolset if t.category == chosen_category])\n",
-    "print(f\"Chosen tool: {ranked[0][1].name}\")\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "exp5_confusion_real_md",
-   "metadata": {},
-   "source": [
-    "#### (실행) LLMToolSelectorMiddleware로 도구 선택 제한 적용\n",
-    "\n",
-    "- Baseline: 도구가 너무 많아 `web_search`로 잘못 빠짐(Confusion)\n",
-    "- With tool selection: `LLMToolSelectorMiddleware(max_tools=5)`가 tool set을 줄여 `ls`로 유도\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "id": "exp5_confusion_real_code",
-   "metadata": {},
-   "execution_count": null,
-   "outputs": [],
-   "source": [
-    "# 많은 더미 도구(유사/잡다한 도구)를 추가해 Confusion 상황을 만든다.\n",
-    "\n",
-    "@tool\n",
-    "def web_search(query: str) -> str:\n",
-    "    \"\"\"Dummy web_search tool for the experiment.\"\"\"\n",
-    "    return f\"(dummy) web_search results for query={query!r}\"\n",
-    "\n",
-    "\n",
-    "def _dummy_tool_factory(n: int):\n",
-    "    @tool(f\"dummy_tool_{n}\", description=\"dummy tool\")\n",
-    "    def _t(x: str = \"\") -> str:\n",
-    "        return f\"dummy {n} {x}\".strip()\n",
-    "\n",
-    "    return _t\n",
-    "\n",
-    "\n",
-    "dummy_tools = [_dummy_tool_factory(i) for i in range(25)]\n",
-    "all_tools = [web_search, *dummy_tools]\n",
-    "\n",
-    "# Selection model: choose filesystem tools when user asks about files/directories.\n",
-    "selector_model = DeterministicStructuredSelectorModel(\n",
-    "    selector=lambda q, valid: [\n",
-    "        name\n",
-    "        for name in [\"ls\", \"read_file\", \"glob\", \"grep\"]\n",
-    "        if name in valid\n",
-    "    ]\n",
-    ")\n",
-    "\n",
-    "user = HumanMessage(content=\"/project 아래 파일 목록을 보여줘\")\n",
-    "state = {\"messages\": [user], \"files\": _sample_files()}\n",
-    "\n",
-    "print(\"=\" * 60)\n",
-    "print(\"[Baseline] tool selection 없음\")\n",
-    "print(\"=\" * 60)\n",
-    "agent_baseline = _make_agent(model=HeuristicToolCallingModel(confusion_threshold=10), tools=all_tools, middleware=[])\n",
-    "result_baseline = agent_baseline.invoke(state, {\"configurable\": {\"thread_id\": \"exp5_baseline\"}})\n",
-    "_print_messages(result_baseline[\"messages\"])\n",
-    "\n",
-    "print(\"\\n\" + \"=\" * 60)\n",
-    "print(\"[With LLMToolSelectorMiddleware] max_tools=5\")\n",
-    "print(\"=\" * 60)\n",
-    "agent_selected = _make_agent(\n",
-    "    model=HeuristicToolCallingModel(confusion_threshold=10),\n",
-    "    tools=all_tools,\n",
-    "    middleware=[LLMToolSelectorMiddleware(model=selector_model, max_tools=5)],\n",
-    ")\n",
-    "result_selected = agent_selected.invoke(state, {\"configurable\": {\"thread_id\": \"exp5_selected\"}})\n",
-    "_print_messages(result_selected[\"messages\"])\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "exp6_clash",
-   "metadata": {},
-   "source": [
-    "### 실험 6: Context Clash (모순되는 연속 관찰)\n",
-    "\n",
-    "연속된 도구 결과가 서로 모순될 때(예: 같은 키에 대해 다른 값), 모델은 어떤 값을 믿어야 할지 혼란스러워지고\n",
-    "이후 행동이 꼬일 수 있습니다.\n",
-    "\n",
-    "이 실험은:\n",
-    "\n",
-    "- 관찰을 상태(state)에 병합할 때 충돌을 감지\n",
-    "- “최신값 우선” 같은 임시 규칙 대신, **재검증/불확실성 표기**를 남기는 완화책\n",
-    "\n",
-    "을 시뮬레이션합니다.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "id": "exp6_clash_code",
-   "metadata": {},
-   "execution_count": null,
-   "outputs": [],
-   "source": [
-    "from __future__ import annotations\n",
-    "\n",
-    "\n",
-    "def merge_observation(state: dict[str, object], observation: dict[str, object], *, source: str):\n",
-    "    # 관찰 병합 + 충돌 감지\n",
-    "    conflicts: list[str] = []\n",
-    "    new_state = dict(state)\n",
-    "    for k, v in observation.items():\n",
-    "        if k in new_state and new_state[k] != v:\n",
-    "            conflicts.append(f\"{k}: '{new_state[k]}' vs '{v}' (source={source})\")\n",
-    "        new_state[k] = v\n",
-    "    return new_state, conflicts\n",
-    "\n",
-    "\n",
-    "state: dict[str, object] = {}\n",
-    "\n",
-    "obs1 = {\"latest_version\": \"1.2.0\", \"release_date\": \"2025-01-01\"}\n",
-    "obs2 = {\"latest_version\": \"1.3.0\", \"release_date\": \"2025-01-01\"}  # version만 충돌\n",
-    "\n",
-    "print(\"=\" * 60)\n",
-    "print(\"[A] 충돌 없이 병합\")\n",
-    "print(\"=\" * 60)\n",
-    "state, c1 = merge_observation(state, obs1, source=\"tool_call_1\")\n",
-    "print(\"state:\", state)\n",
-    "print(\"conflicts:\", c1)\n",
-    "print()\n",
-    "\n",
-    "print(\"=\" * 60)\n",
-    "print(\"[B] 모순 관찰 입력(Clash)\")\n",
-    "print(\"=\" * 60)\n",
-    "state2, c2 = merge_observation(state, obs2, source=\"tool_call_2\")\n",
-    "print(\"state:\", state2)\n",
-    "print(\"conflicts:\")\n",
-    "for c in c2:\n",
-    "    print(\"  -\", c)\n",
-    "print()\n",
-    "\n",
-    "print(\"=\" * 60)\n",
-    "print(\"[C] 완화책: 충돌을 로그/검증 큐로 분리\")\n",
-    "print(\"=\" * 60)\n",
-    "final_state = dict(state)\n",
-    "conflict_log: list[str] = []\n",
-    "verification_queue: list[dict[str, object]] = []\n",
-    "\n",
-    "_, conflicts = merge_observation(final_state, obs2, source=\"tool_call_2\")\n",
-    "if conflicts:\n",
-    "    conflict_log.extend(conflicts)\n",
-    "    verification_queue.append({\"key\": \"latest_version\", \"candidates\": [\"1.2.0\", \"1.3.0\"]})\n",
-    "\n",
-    "print(\"conflict_log:\", conflict_log)\n",
-    "print(\"verification_queue:\", verification_queue)\n",
-    "\n",
-    "# (가정) 추가 검증 결과(tool_call_3)\n",
-    "verified = {\"latest_version\": \"1.3.0\"}\n",
-    "final_state, _ = merge_observation(final_state, verified, source=\"tool_call_3\")\n",
-    "print(\"verified final_state:\", final_state)\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "exp6_clash_real_md",
-   "metadata": {},
-   "source": [
-    "#### (실행) 충돌 감지 미들웨어로 모순 관찰(Clash) 처리\n",
-    "\n",
-    "- Baseline: 두 소스가 서로 다른 값을 주면 “마지막 값”으로 덮어써 버림\n",
-    "- With clash detection: 충돌을 감지해 **verify tool 호출**을 유도\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "id": "exp6_clash_real_code",
-   "metadata": {},
-   "execution_count": null,
-   "outputs": [],
-   "source": [
-    "from __future__ import annotations\n",
-    "\n",
-    "from langchain.agents.middleware.types import AgentState\n",
-    "from langgraph.runtime import Runtime\n",
-    "\n",
-    "\n",
-    "@tool(description=\"Get latest version from source A\")\n",
-    "def get_version_source_a() -> str:\n",
-    "    return json.dumps({\"latest_version\": \"1.2.0\", \"source\": \"a\"})\n",
-    "\n",
-    "\n",
-    "@tool(description=\"Get latest version from source B\")\n",
-    "def get_version_source_b() -> str:\n",
-    "    return json.dumps({\"latest_version\": \"1.3.0\", \"source\": \"b\"})\n",
-    "\n",
-    "\n",
-    "@tool(description=\"Verify latest version from an authoritative source\")\n",
-    "def verify_latest_version() -> str:\n",
-    "    return json.dumps({\"latest_version\": \"1.3.0\", \"source\": \"verified\"})\n",
-    "\n",
-    "\n",
-    "class ClashDetectionMiddleware(AgentMiddleware):\n",
-    "    \"\"\"Detect conflicting JSON facts in the last tool messages and request verification.\"\"\"\n",
-    "\n",
-    "    def before_model(self, state: AgentState, runtime: Runtime[Any]) -> dict[str, Any] | None:  # noqa: ARG002\n",
-    "        messages = state.get(\"messages\", [])\n",
-    "        # Do not trigger if already verified\n",
-    "        for m in reversed(messages):\n",
-    "            if isinstance(m, ToolMessage) and m.name == \"verify_latest_version\":\n",
-    "                return None\n",
-    "\n",
-    "        # Collect last two version tool messages\n",
-    "        version_msgs: list[ToolMessage] = []\n",
-    "        for m in reversed(messages):\n",
-    "            if isinstance(m, ToolMessage) and m.name in {\"get_version_source_a\", \"get_version_source_b\"}:\n",
-    "                version_msgs.append(m)\n",
-    "            if len(version_msgs) >= 2:\n",
-    "                break\n",
-    "\n",
-    "        if len(version_msgs) < 2:\n",
-    "            return None\n",
-    "\n",
-    "        try:\n",
-    "            a = json.loads(str(version_msgs[0].content))\n",
-    "            b = json.loads(str(version_msgs[1].content))\n",
-    "        except json.JSONDecodeError:\n",
-    "            return None\n",
-    "\n",
-    "        va = a.get(\"latest_version\")\n",
-    "        vb = b.get(\"latest_version\")\n",
-    "        if va and vb and va != vb:\n",
-    "            patched = list(messages)\n",
-    "            patched.append(\n",
-    "                SystemMessage(\n",
-    "                    content=(\n",
-    "                        \"CONFLICT_DETECTED: latest_version has conflicting values. \"\n",
-    "                        \"Call verify_latest_version and use its result.\"\n",
-    "                    )\n",
-    "                )\n",
-    "            )\n",
-    "            return {\"messages\": Overwrite(patched)}\n",
-    "\n",
-    "        return None\n",
-    "\n",
-    "\n",
-    "class VersionResearchModel(BaseChatModel):\n",
-    "    def bind_tools(self, tools: list[Any], **kwargs: Any):  # noqa: ANN401\n",
-    "        _ = kwargs\n",
-    "        self._tool_names = [t.name for t in tools if hasattr(t, 'name')]\n",
-    "        return self\n",
-    "\n",
-    "    @property\n",
-    "    def _llm_type(self) -> str:\n",
-    "        return 'version-research'\n",
-    "\n",
-    "    @property\n",
-    "    def _identifying_params(self) -> dict[str, Any]:\n",
-    "        return {}\n",
-    "\n",
-    "    def _generate(self, messages: list[BaseMessage], stop=None, run_manager=None, **kwargs: Any) -> ChatResult:\n",
-    "        _ = (stop, run_manager, kwargs)\n",
-    "\n",
-    "        # Count tool results\n",
-    "        have_a = any(isinstance(m, ToolMessage) and m.name == 'get_version_source_a' for m in messages)\n",
-    "        have_b = any(isinstance(m, ToolMessage) and m.name == 'get_version_source_b' for m in messages)\n",
-    "        have_v = any(isinstance(m, ToolMessage) and m.name == 'verify_latest_version' for m in messages)\n",
-    "        conflict = any(isinstance(m, SystemMessage) and 'CONFLICT_DETECTED' in m.content for m in messages)\n",
-    "\n",
-    "        if not have_a:\n",
-    "            tcid = f\"call_{uuid.uuid4().hex[:8]}\"\n",
-    "            msg = AIMessage(content='call source a', tool_calls=[{'id': tcid, 'name': 'get_version_source_a', 'args': {}, 'type': 'tool_call'}])\n",
-    "            return ChatResult(generations=[ChatGeneration(message=msg)])\n",
-    "\n",
-    "        if not have_b:\n",
-    "            tcid = f\"call_{uuid.uuid4().hex[:8]}\"\n",
-    "            msg = AIMessage(content='call source b', tool_calls=[{'id': tcid, 'name': 'get_version_source_b', 'args': {}, 'type': 'tool_call'}])\n",
-    "            return ChatResult(generations=[ChatGeneration(message=msg)])\n",
-    "\n",
-    "        if conflict and not have_v:\n",
-    "            tcid = f\"call_{uuid.uuid4().hex[:8]}\"\n",
-    "            msg = AIMessage(content='verify', tool_calls=[{'id': tcid, 'name': 'verify_latest_version', 'args': {}, 'type': 'tool_call'}])\n",
-    "            return ChatResult(generations=[ChatGeneration(message=msg)])\n",
-    "\n",
-    "        # Finalize: choose last seen latest_version\n",
-    "        latest = None\n",
-    "        for m in reversed(messages):\n",
-    "            if isinstance(m, ToolMessage):\n",
-    "                try:\n",
-    "                    data = json.loads(str(m.content))\n",
-    "                except json.JSONDecodeError:\n",
-    "                    continue\n",
-    "                if 'latest_version' in data:\n",
-    "                    latest = data['latest_version']\n",
-    "                    break\n",
-    "        return ChatResult(generations=[ChatGeneration(message=AIMessage(content=f\"FINAL latest_version={latest}\"))])\n",
-    "\n",
-    "\n",
-    "user = HumanMessage(content=\"패키지 X의 최신 버전을 확인해줘\")\n",
-    "state = {\"messages\": [user]}\n",
-    "\n",
-    "tools = [get_version_source_a, get_version_source_b, verify_latest_version]\n",
-    "\n",
-    "print(\"=\" * 60)\n",
-    "print(\"[Baseline] clash detection 없음\")\n",
-    "print(\"=\" * 60)\n",
-    "agent_baseline = create_agent(model=VersionResearchModel(), tools=tools, middleware=[])\n",
-    "res1 = agent_baseline.invoke(state, {\"configurable\": {\"thread_id\": \"exp6_baseline\"}})\n",
-    "_print_messages(res1[\"messages\"])\n",
-    "\n",
-    "print(\"\\n\" + \"=\" * 60)\n",
-    "print(\"[With ClashDetectionMiddleware]\")\n",
-    "print(\"=\" * 60)\n",
-    "agent_clash = create_agent(model=VersionResearchModel(), tools=tools, middleware=[ClashDetectionMiddleware()])\n",
-    "res2 = agent_clash.invoke(state, {\"configurable\": {\"thread_id\": \"exp6_clash\"}})\n",
-    "_print_messages(res2[\"messages\"])\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "exp7_distraction",
-   "metadata": {},
-   "source": [
-    "### 실험 7: Context Distraction (장기 로그에서 반복 행동 쏠림)\n",
-    "\n",
-    "긴 실행 기록이 쌓일수록, 모델이 “새 계획”보다 “이미 했던 행동”을 반복하는 쪽으로 쏠릴 수 있습니다.\n",
-    "\n",
-    "이 실험은 LLM을 직접 호출하지 않고, 단순화된 정책으로:\n",
-    "\n",
-    "- 로그가 길수록 과거 빈도 높은 행동을 더 강하게 재선택\n",
-    "\n",
-    "되는 현상을 시뮬레이션하고,\n",
-    "완화책으로 **명시적 계획(todo/next step)**를 “강제 입력”했을 때 분포가 다시 목표 중심으로 돌아오는 모습을 보여줍니다.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "id": "exp7_distraction_code",
-   "metadata": {},
-   "execution_count": null,
-   "outputs": [],
-   "source": [
-    "from __future__ import annotations\n",
-    "\n",
-    "import math\n",
-    "from collections import Counter\n",
-    "\n",
-    "\n",
-    "def softmax(xs: list[float]) -> list[float]:\n",
-    "    m = max(xs)\n",
-    "    exps = [math.exp(x - m) for x in xs]\n",
-    "    s = sum(exps)\n",
-    "    return [e / s for e in exps]\n",
-    "\n",
-    "\n",
-    "def entropy(ps: list[float]) -> float:\n",
-    "    return -sum(p * math.log(p + 1e-12) for p in ps)\n",
-    "\n",
-    "\n",
-    "def action_distribution(actions: list[str], *, sharpness: float) -> dict[str, float]:\n",
-    "    counts = Counter(actions)\n",
-    "    keys = sorted(counts)\n",
-    "    logits = [sharpness * math.log(counts[k]) for k in keys]\n",
-    "    probs = softmax(logits)\n",
-    "    return dict(zip(keys, probs, strict=True))\n",
-    "\n",
-    "\n",
-    "def show_dist(title: str, dist: dict[str, float]) -> None:\n",
-    "    keys = sorted(dist, key=lambda k: dist[k], reverse=True)\n",
-    "    ps = [dist[k] for k in keys]\n",
-    "    print(title)\n",
-    "    for k in keys[:6]:\n",
-    "        print(f\"  - {k:14} p={dist[k]:.3f}\")\n",
-    "    print(f\"  entropy={entropy(ps):.3f}\")\n",
-    "    print()\n",
-    "\n",
-    "\n",
-    "# 과거 로그(반복 행동이 많은 상황)\n",
-    "actions = (\n",
-    "    [\"web_search\"] * 40\n",
-    "    + [\"read_file\"] * 20\n",
-    "    + [\"ls\"] * 15\n",
-    "    + [\"edit_file\"] * 5\n",
-    "    + [\"write_todos\"] * 2\n",
-    ")\n",
-    "\n",
-    "print(\"=\" * 60)\n",
-    "print(\"[A] 짧은 컨텍스트(덜 쏠림)\")\n",
-    "print(\"=\" * 60)\n",
-    "short_ctx = actions[:20]\n",
-    "show_dist(\"short_ctx\", action_distribution(short_ctx, sharpness=1.0))\n",
-    "\n",
-    "print(\"=\" * 60)\n",
-    "print(\"[B] 긴 컨텍스트(더 쏠림 / 반복 행동 강화)\")\n",
-    "print(\"=\" * 60)\n",
-    "long_ctx = actions\n",
-    "show_dist(\"long_ctx\", action_distribution(long_ctx, sharpness=2.5))\n",
-    "\n",
-    "print(\"=\" * 60)\n",
-    "print(\"[C] 완화책: '다음 행동'을 계획으로 고정(강제 next step)\")\n",
-    "print(\"=\" * 60)\n",
-    "next_step = \"write_todos\"  # 예: 계획 갱신을 강제\n",
-    "base = action_distribution(long_ctx, sharpness=2.5)\n",
-    "boost = 0.35\n",
-    "base[next_step] = base.get(next_step, 0.0) + boost\n",
-    "s = sum(base.values())\n",
-    "fixed = {k: v / s for k, v in base.items()}\n",
-    "show_dist(\"long_ctx + forced_next_step\", fixed)\n"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "exp7_distraction_real_md",
@@ -1408,10 +837,56 @@
   },
   {
    "cell_type": "code",
+   "execution_count": 19,
    "id": "exp7_distraction_real_code",
    "metadata": {},
-   "execution_count": null,
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "============================================================\n",
+      "[Baseline] 제한 없음\n",
+      "============================================================\n",
+      "00 HUMAN: Context engineering을 조사해줘\n",
+      "01 AI: search loop 1\n",
+      "     tool_call: name=web_search id=call_76fe1482 args={'query': 'context engineering'}\n",
+      "02 TOOL: name=web_search status=success id=call_76fe1482\n",
+      "     content: (dummy) result for 'context engineering'\n",
+      "03 AI: search loop 2\n",
+      "     tool_call: name=web_search id=call_9e523f0a args={'query': 'context engineering'}\n",
+      "04 TOOL: name=web_search status=success id=call_9e523f0a\n",
+      "     content: (dummy) result for 'context engineering'\n",
+      "05 AI: search loop 3\n",
+      "     tool_call: name=web_search id=call_ea484b66 args={'query': 'context engineering'}\n",
+      "06 TOOL: name=web_search status=success id=call_ea484b66\n",
+      "     content: (dummy) result for 'context engineering'\n",
+      "07 AI: switch to todos\n",
+      "     tool_call: name=write_todos id=call_61a16615 args={'todos': ['summarize findings']}\n",
+      "08 TOOL: name=write_todos status=success id=call_61a16615\n",
+      "     content: {\"todos\": [\"summarize findings\"]}\n",
+      "09 AI: FINAL todo list written\n",
+      "\n",
+      "============================================================\n",
+      "[With ToolCallLimitMiddleware] web_search run_limit=1\n",
+      "============================================================\n",
+      "00 HUMAN: Context engineering을 조사해줘\n",
+      "01 AI: search loop 1\n",
+      "     tool_call: name=web_search id=call_281f3c81 args={'query': 'context engineering'}\n",
+      "02 TOOL: name=web_search status=success id=call_281f3c81\n",
+      "     content: (dummy) result for 'context engineering'\n",
+      "03 AI: search loop 2\n",
+      "     tool_call: name=web_search id=call_9b876f09 args={'query': 'context engineering'}\n",
+      "04 TOOL: name=web_search status=error id=call_9b876f09\n",
+      "     content: Tool call limit exceeded. Do not call 'web_search' again.\n",
+      "05 AI: switch to todos\n",
+      "     tool_call: name=write_todos id=call_5a2def37 args={'todos': ['summarize findings']}\n",
+      "06 TOOL: name=write_todos status=success id=call_5a2def37\n",
+      "     content: {\"todos\": [\"summarize findings\"]}\n",
+      "07 AI: FINAL todo list written\n"
+     ]
+    }
+   ],
    "source": [
     "from __future__ import annotations\n",
     "\n",
@@ -1426,7 +901,7 @@
     "    return json.dumps({\"todos\": todos})\n",
     "\n",
     "\n",
-   "class LoopingSearchModel(BaseChatModel):\n",
+    "class LoopingSearchModel(BaseChatModel):\n",
     "    def bind_tools(self, tools: list[Any], **kwargs: Any):  # noqa: ANN401\n",
     "        _ = kwargs\n",
     "        self._tool_names = [t.name for t in tools if hasattr(t, 'name')]\n",
@@ -1515,10 +990,36 @@
   },
   {
    "cell_type": "code",
+   "execution_count": 20,
    "id": "exp8_poisoning_code",
    "metadata": {},
-   "execution_count": null,
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "============================================================\n",
+      "[A] 정상 메모리\n",
+      "============================================================\n",
+      "blind plan: Install the package.\n",
+      "verified-only plan: Install the package.\n",
+      "\n",
+      "============================================================\n",
+      "[B] 오염된 메모리(Poisoning)\n",
+      "============================================================\n",
+      "blind plan: Install the package.\n",
+      "verified-only plan: Install the package.\n",
+      "\n",
+      "============================================================\n",
+      "[C] 완화책: 출처 없는 사실은 검증 요청으로 라우팅\n",
+      "============================================================\n",
+      "needs_verification:\n",
+      "  - package_installed='yes' source=None verified=False\n",
+      "\n",
+      "→ 정책: tool로 재확인 후에만 state/memory에 반영\n"
+     ]
+    }
+   ],
    "source": [
     "from __future__ import annotations\n",
     "\n",
@@ -1595,10 +1096,41 @@
   },
   {
    "cell_type": "code",
+   "execution_count": 21,
    "id": "exp8_poisoning_real_code",
    "metadata": {},
-   "execution_count": null,
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "============================================================\n",
+      "[Baseline] verification gate 없음\n",
+      "============================================================\n",
+      "00 HUMAN: 패키지 X 설치가 필요한지 판단해줘\n",
+      "01 AI: guess\n",
+      "     tool_call: name=guess_install_status id=call_ccef8e23 args={}\n",
+      "02 TOOL: name=guess_install_status status=success id=call_ccef8e23\n",
+      "     content: {\"package_installed\": \"yes\", \"verified\": false, \"source\": \"guess\"}\n",
+      "03 AI: FINAL decision=SKIP (source=guess)\n",
+      "\n",
+      "============================================================\n",
+      "[With VerificationGateMiddleware]\n",
+      "============================================================\n",
+      "00 HUMAN: 패키지 X 설치가 필요한지 판단해줘\n",
+      "01 AI: guess\n",
+      "     tool_call: name=guess_install_status id=call_70cc8071 args={}\n",
+      "02 TOOL: name=guess_install_status status=success id=call_70cc8071\n",
+      "     content: {\"package_installed\": \"yes\", \"verified\": false, \"source\": \"guess\"}\n",
+      "03 SYSTEM: UNVERIFIED_FACT_BLOCKED: Do not trust guess_install_status. Call scan_install_status and decide based on verified=true only.\n",
+      "04 AI: scan\n",
+      "     tool_call: name=scan_install_status id=call_9e23662c args={}\n",
+      "05 TOOL: name=scan_install_status status=success id=call_9e23662c\n",
+      "     content: {\"package_installed\": \"no\", \"verified\": true, \"source\": \"scan\"}\n",
+      "06 AI: FINAL decision=INSTALL (source=scan)\n"
+     ]
+    }
+   ],
    "source": [
     "from __future__ import annotations\n",
     "\n",
@@ -1718,21 +1250,6 @@
     "_print_messages(res2['messages'])\n"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "exp5_recommendation",
-   "metadata": {},
-   "source": [
-    "### 권장 설정\n",
-    "\n",
-    "| 사용 사례 | Offloading | Reduction | Caching | 이유 |\n",
-    "|----------|------------|-----------|---------|------|\n",
-    "| **짧은 대화** | ❌ | ❌ | ✅ | 오버헤드 최소화 |\n",
-    "| **일반 작업** | ✅ | ❌ | ✅ | 대용량 결과 대비 |\n",
-    "| **장시간 연구** | ✅ | ✅ | ✅ | 모든 최적화 활용 |\n",
-    "| **디버깅** | ❌ | ❌ | ❌ | 전체 컨텍스트 확인 |"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "summary",
@@ -1779,13 +1296,21 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": ".venv",
+   "display_name": "deepagent-context-engineering (3.13.9)",
    "language": "python",
    "name": "python3"
   },
   "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
    "name": "python",
-   "version": "3.12.1"
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.9"
   }
  },
  "nbformat": 4,