"""CLI를 위한 작업 실행 및 스트리밍 로직.""" import asyncio import json import sys import termios import tty from langchain.agents.middleware.human_in_the_loop import ( ActionRequest, ApproveDecision, Decision, HITLRequest, HITLResponse, RejectDecision, ) from langchain_core.messages import HumanMessage, ToolMessage from langgraph.types import Command, Interrupt from pydantic import TypeAdapter, ValidationError from rich import box from rich.markdown import Markdown from rich.panel import Panel from deepagents_cli.config import COLORS, console from deepagents_cli.file_ops import FileOpTracker, build_approval_preview from deepagents_cli.image_utils import create_multimodal_content from deepagents_cli.input import ImageTracker, parse_file_mentions from deepagents_cli.ui import ( TokenTracker, format_tool_display, format_tool_message_content, render_diff_block, render_file_operation, render_todo_list, ) _HITL_REQUEST_ADAPTER = TypeAdapter(HITLRequest) def prompt_for_tool_approval( action_request: ActionRequest, assistant_id: str | None, ) -> Decision | dict: """방향키 탐색을 사용하여 도구 작업을 승인/거부하도록 사용자에게 묻습니다. Returns: Decision (ApproveDecision 또는 RejectDecision) 또는 자동 승인 모드로 전환하기 위한 {"type": "auto_approve_all"} dict """ description = action_request.get("description", "No description available") name = action_request["name"] args = action_request["args"] preview = build_approval_preview(name, args, assistant_id) if name else None body_lines = [] if preview: body_lines.append(f"[bold]{preview.title}[/bold]") body_lines.extend(preview.details) if preview.error: body_lines.append(f"[red]{preview.error}[/red]") else: body_lines.append(description) # Display action info first console.print( Panel( "[bold yellow]⚠️ 도구 작업 승인 필요[/bold yellow]\n\n" + "\n".join(body_lines), border_style="yellow", box=box.ROUNDED, padding=(0, 1), ) ) if preview and preview.diff and not preview.error: console.print() render_diff_block(preview.diff, preview.diff_title or preview.title) options = ["approve", "reject", "auto-accept all going forward"] selected = 0 # Start with approve selected try: fd = sys.stdin.fileno() old_settings = termios.tcgetattr(fd) try: tty.setraw(fd) # Hide cursor during menu interaction sys.stdout.write("\033[?25l") sys.stdout.flush() # Initial render flag first_render = True while True: if not first_render: # Move cursor back to start of menu (up 3 lines, then to start of line) sys.stdout.write("\033[3A\r") first_render = False # Display options vertically with ANSI color codes for i, option in enumerate(options): sys.stdout.write("\r\033[K") # Clear line from cursor to end if i == selected: if option == "approve": # Green bold with filled checkbox sys.stdout.write("\033[1;32m☑ 승인 (Approve)\033[0m\n") elif option == "reject": # Red bold with filled checkbox sys.stdout.write("\033[1;31m☑ 거부 (Reject)\033[0m\n") else: # Blue bold with filled checkbox for auto-accept sys.stdout.write("\033[1;34m☑ 이후 모두 자동 승인 (Auto-accept all)\033[0m\n") elif option == "approve": # Dim with empty checkbox sys.stdout.write("\033[2m☐ 승인 (Approve)\033[0m\n") elif option == "reject": # Dim with empty checkbox sys.stdout.write("\033[2m☐ 거부 (Reject)\033[0m\n") else: # Dim with empty checkbox sys.stdout.write("\033[2m☐ 이후 모두 자동 승인 (Auto-accept all)\033[0m\n") sys.stdout.flush() # Read key char = sys.stdin.read(1) if char == "\x1b": # ESC sequence (arrow keys) next1 = sys.stdin.read(1) next2 = sys.stdin.read(1) if next1 == "[": if next2 == "B": # Down arrow selected = (selected + 1) % len(options) elif next2 == "A": # Up arrow selected = (selected - 1) % len(options) elif char in {"\r", "\n"}: # Enter sys.stdout.write("\r\n") # Move to start of line and add newline break elif char == "\x03": # Ctrl+C sys.stdout.write("\r\n") # Move to start of line and add newline raise KeyboardInterrupt elif char.lower() == "a": selected = 0 sys.stdout.write("\r\n") # Move to start of line and add newline break elif char.lower() == "r": selected = 1 sys.stdout.write("\r\n") # Move to start of line and add newline break finally: # Show cursor again sys.stdout.write("\033[?25h") sys.stdout.flush() termios.tcsetattr(fd, termios.TCSADRAIN, old_settings) except (termios.error, AttributeError): # Fallback for non-Unix systems console.print(" ☐ (A)승인 (기본값)") console.print(" ☐ (R)거부") console.print(" ☐ (Auto)이후 모두 자동 승인") choice = input("\n선택 (A/R/Auto, 기본값=Approve): ").strip().lower() if choice in {"r", "reject"}: selected = 1 elif choice in {"auto", "auto-accept"}: selected = 2 else: selected = 0 # Return decision based on selection if selected == 0: return ApproveDecision(type="approve") if selected == 1: return RejectDecision(type="reject", message="User rejected the command") # Return special marker for auto-approve mode return {"type": "auto_approve_all"} async def execute_task( user_input: str, agent, assistant_id: str | None, session_state, token_tracker: TokenTracker | None = None, backend=None, image_tracker: ImageTracker | None = None, ) -> None: """모든 작업을 AI 에이전트에게 직접 전달하여 실행합니다.""" # Parse file mentions and inject content if any prompt_text, mentioned_files = parse_file_mentions(user_input) if mentioned_files: context_parts = [prompt_text, "\n\n## 참조된 파일 (Referenced Files)\n"] for file_path in mentioned_files: try: content = file_path.read_text() # Limit file content to reasonable size if len(content) > 50000: content = content[:50000] + "\n... (파일 잘림)" context_parts.append(f"\n### {file_path.name}\nPath: `{file_path}`\n```\n{content}\n```") except Exception as e: context_parts.append(f"\n### {file_path.name}\n[파일 읽기 오류: {e}]") final_input = "\n".join(context_parts) else: final_input = prompt_text # Include images in the message content images_to_send = [] if image_tracker: images_to_send = image_tracker.get_images() if images_to_send: message_content = create_multimodal_content(final_input, images_to_send) else: message_content = final_input config = { "configurable": {"thread_id": session_state.thread_id}, "metadata": {"assistant_id": assistant_id} if assistant_id else {}, } has_responded = False captured_input_tokens = 0 captured_output_tokens = 0 current_todos = None # Track current todo list state status = console.status(f"[bold {COLORS['thinking']}]에이전트가 생각 중...", spinner="dots") status.start() spinner_active = True tool_icons = { "read_file": "📖", "write_file": "✏️", "edit_file": "✂️", "ls": "📁", "glob": "🔍", "grep": "🔎", "shell": "⚡", "execute": "🔧", "web_search": "🌐", "http_request": "🌍", "task": "🤖", "write_todos": "📋", } file_op_tracker = FileOpTracker(assistant_id=assistant_id, backend=backend) # Track which tool calls we've displayed to avoid duplicates displayed_tool_ids = set() # Buffer partial tool-call chunks keyed by streaming index tool_call_buffers: dict[str | int, dict] = {} # Buffer assistant text so we can render complete markdown segments pending_text = "" def flush_text_buffer(*, final: bool = False) -> None: """Flush accumulated assistant text as rendered markdown when appropriate.""" nonlocal pending_text, spinner_active, has_responded if not final or not pending_text.strip(): return if spinner_active: status.stop() spinner_active = False if not has_responded: console.print("●", style=COLORS["agent"], markup=False, end=" ") has_responded = True markdown = Markdown(pending_text.rstrip()) console.print(markdown, style=COLORS["agent"]) pending_text = "" # Clear images from tracker after creating the message # (they've been encoded into the message content) if image_tracker: image_tracker.clear() # Stream input - may need to loop if there are interrupts stream_input = {"messages": [{"role": "user", "content": message_content}]} try: while True: interrupt_occurred = False hitl_response: dict[str, HITLResponse] = {} suppress_resumed_output = False # Track all pending interrupts: {interrupt_id: request_data} pending_interrupts: dict[str, HITLRequest] = {} async for chunk in agent.astream( stream_input, stream_mode=["messages", "updates"], # Dual-mode for HITL support subgraphs=True, config=config, durability="exit", ): # Unpack chunk - with subgraphs=True and dual-mode, it's (namespace, stream_mode, data) if not isinstance(chunk, tuple) or len(chunk) != 3: continue _namespace, current_stream_mode, data = chunk # Handle UPDATES stream - for interrupts and todos if current_stream_mode == "updates": if not isinstance(data, dict): continue # Check for interrupts - collect ALL pending interrupts if "__interrupt__" in data: interrupts: list[Interrupt] = data["__interrupt__"] if interrupts: for interrupt_obj in interrupts: # Interrupt has required fields: value (HITLRequest) and id (str) # Validate the HITLRequest using TypeAdapter try: validated_request = _HITL_REQUEST_ADAPTER.validate_python(interrupt_obj.value) pending_interrupts[interrupt_obj.id] = validated_request interrupt_occurred = True except ValidationError as e: console.print( f"[yellow]경고: 유효하지 않은 HITL 요청 데이터: {e}[/yellow]", style="dim", ) raise # Extract chunk_data from updates for todo checking chunk_data = next(iter(data.values())) if data else None if chunk_data and isinstance(chunk_data, dict): # Check for todo updates if "todos" in chunk_data: new_todos = chunk_data["todos"] if new_todos != current_todos: current_todos = new_todos # Stop spinner before rendering todos if spinner_active: status.stop() spinner_active = False console.print() render_todo_list(new_todos) console.print() # Handle MESSAGES stream - for content and tool calls elif current_stream_mode == "messages": # Messages stream returns (message, metadata) tuples if not isinstance(data, tuple) or len(data) != 2: continue message, _metadata = data if isinstance(message, HumanMessage): content = message.text if content: flush_text_buffer(final=True) if spinner_active: status.stop() spinner_active = False if not has_responded: console.print("●", style=COLORS["agent"], markup=False, end=" ") has_responded = True markdown = Markdown(content) console.print(markdown, style=COLORS["agent"]) console.print() continue if isinstance(message, ToolMessage): # Tool results are sent to the agent, not displayed to users # Exception: show shell command errors to help with debugging tool_name = getattr(message, "name", "") tool_status = getattr(message, "status", "success") tool_content = format_tool_message_content(message.content) record = file_op_tracker.complete_with_message(message) # Reset spinner message after tool completes if spinner_active: status.update(f"[bold {COLORS['thinking']}]에이전트가 생각 중...") if tool_name == "shell" and tool_status != "success": flush_text_buffer(final=True) if tool_content: if spinner_active: status.stop() spinner_active = False console.print() console.print(tool_content, style="red", markup=False) console.print() elif tool_content and isinstance(tool_content, str): stripped = tool_content.lstrip() if stripped.lower().startswith("error"): flush_text_buffer(final=True) if spinner_active: status.stop() spinner_active = False console.print() console.print(tool_content, style="red", markup=False) console.print() if record: flush_text_buffer(final=True) if spinner_active: status.stop() spinner_active = False console.print() render_file_operation(record) console.print() if not spinner_active: status.start() spinner_active = True # For all other tools (web_search, http_request, etc.), # results are hidden from user - agent will process and respond continue # Check if this is an AIMessageChunk if not hasattr(message, "content_blocks"): # Fallback for messages without content_blocks continue # Extract token usage if available if token_tracker and hasattr(message, "usage_metadata"): usage = message.usage_metadata if usage: input_toks = usage.get("input_tokens", 0) output_toks = usage.get("output_tokens", 0) if input_toks or output_toks: captured_input_tokens = max(captured_input_tokens, input_toks) captured_output_tokens = max(captured_output_tokens, output_toks) # Process content blocks (this is the key fix!) for block in message.content_blocks: block_type = block.get("type") # Handle text blocks if block_type == "text": text = block.get("text", "") if text: pending_text += text # Handle reasoning blocks elif block_type == "reasoning": flush_text_buffer(final=True) reasoning = block.get("reasoning", "") if reasoning and spinner_active: status.stop() spinner_active = False # Could display reasoning differently if desired # For now, skip it or handle minimally # Handle tool call chunks # Some models (OpenAI, Anthropic) stream tool_call_chunks # Others (Gemini) don't stream them and just return the full tool_call elif block_type in ("tool_call_chunk", "tool_call"): chunk_name = block.get("name") chunk_args = block.get("args") chunk_id = block.get("id") chunk_index = block.get("index") # Use index as stable buffer key; fall back to id if needed buffer_key: str | int if chunk_index is not None: buffer_key = chunk_index elif chunk_id is not None: buffer_key = chunk_id else: buffer_key = f"unknown-{len(tool_call_buffers)}" buffer = tool_call_buffers.setdefault( buffer_key, {"name": None, "id": None, "args": None, "args_parts": []}, ) if chunk_name: buffer["name"] = chunk_name if chunk_id: buffer["id"] = chunk_id if isinstance(chunk_args, dict): buffer["args"] = chunk_args buffer["args_parts"] = [] elif isinstance(chunk_args, str): if chunk_args: parts: list[str] = buffer.setdefault("args_parts", []) if not parts or chunk_args != parts[-1]: parts.append(chunk_args) buffer["args"] = "".join(parts) elif chunk_args is not None: buffer["args"] = chunk_args buffer_name = buffer.get("name") buffer_id = buffer.get("id") if buffer_name is None: continue parsed_args = buffer.get("args") if isinstance(parsed_args, str): if not parsed_args: continue try: parsed_args = json.loads(parsed_args) except json.JSONDecodeError: # Wait for more chunks to form valid JSON continue elif parsed_args is None: continue # Ensure args are in dict form for formatter if not isinstance(parsed_args, dict): parsed_args = {"value": parsed_args} flush_text_buffer(final=True) if buffer_id is not None: if buffer_id not in displayed_tool_ids: displayed_tool_ids.add(buffer_id) file_op_tracker.start_operation(buffer_name, parsed_args, buffer_id) else: file_op_tracker.update_args(buffer_id, parsed_args) tool_call_buffers.pop(buffer_key, None) icon = tool_icons.get(buffer_name, "🔧") if spinner_active: status.stop() if has_responded: console.print() display_str = format_tool_display(buffer_name, parsed_args) console.print( f" {icon} {display_str}", style=f"dim {COLORS['tool']}", markup=False, ) # Restart spinner with context about which tool is executing status.update(f"[bold {COLORS['thinking']}]{display_str} 실행 중...") status.start() spinner_active = True if getattr(message, "chunk_position", None) == "last": flush_text_buffer(final=True) # After streaming loop - handle interrupt if it occurred flush_text_buffer(final=True) # Handle human-in-the-loop after stream completes if interrupt_occurred: any_rejected = False for interrupt_id, hitl_request in pending_interrupts.items(): # Check if auto-approve is enabled if session_state.auto_approve: # Auto-approve all commands without prompting decisions = [] for action_request in hitl_request["action_requests"]: # Show what's being auto-approved (brief, dim message) if spinner_active: status.stop() spinner_active = False description = action_request.get("description", "tool action") console.print() console.print(f" [dim]⚡ {description}[/dim]") decisions.append({"type": "approve"}) hitl_response[interrupt_id] = {"decisions": decisions} # Restart spinner for continuation if not spinner_active: status.start() spinner_active = True else: # Normal HITL flow - stop spinner and prompt user if spinner_active: status.stop() spinner_active = False # Handle human-in-the-loop approval decisions = [] for action_index, action_request in enumerate(hitl_request["action_requests"]): decision = prompt_for_tool_approval( action_request, assistant_id, ) # Check if user wants to switch to auto-approve mode if isinstance(decision, dict) and decision.get("type") == "auto_approve_all": # Switch to auto-approve mode session_state.auto_approve = True console.print() console.print("[bold blue]✓ 자동 승인 모드 활성화됨[/bold blue]") console.print("[dim]향후 모든 도구 작업이 자동으로 승인됩니다.[/dim]") console.print() # Approve this action and all remaining actions in the batch decisions.append({"type": "approve"}) for _remaining_action in hitl_request["action_requests"][action_index + 1 :]: decisions.append({"type": "approve"}) break decisions.append(decision) # Mark file operations as HIL-approved if user approved if decision.get("type") == "approve": tool_name = action_request.get("name") if tool_name in {"write_file", "edit_file"}: file_op_tracker.mark_hitl_approved(tool_name, action_request.get("args", {})) if any(decision.get("type") == "reject" for decision in decisions): any_rejected = True hitl_response[interrupt_id] = {"decisions": decisions} suppress_resumed_output = any_rejected if interrupt_occurred and hitl_response: if suppress_resumed_output: if spinner_active: status.stop() spinner_active = False console.print("[yellow]명령이 거부되었습니다.[/yellow]", style="bold") console.print("에이전트에게 다르게 수행할 작업을 알려주세요.") console.print() return # Resume the agent with the human decision stream_input = Command(resume=hitl_response) # Continue the while loop to restream else: # No interrupt, break out of while loop break except asyncio.CancelledError: # Event loop cancelled the task (e.g. Ctrl+C during streaming) - clean up and return if spinner_active: status.stop() console.print("\n[yellow]사용자에 의해 중단됨[/yellow]") console.print("에이전트 상태 업데이트 중...", style="dim") try: await agent.aupdate_state( config=config, values={"messages": [HumanMessage(content="[이전 요청이 시스템에 의해 취소되었습니다]")]}, ) console.print("다음 명령 준비 완료.\n", style="dim") except Exception as e: console.print(f"[red]경고: 에이전트 상태 업데이트 실패: {e}[/red]\n") return except KeyboardInterrupt: # User pressed Ctrl+C - clean up and exit gracefully if spinner_active: status.stop() console.print("\n[yellow]사용자에 의해 중단됨[/yellow]") console.print("에이전트 상태 업데이트 중...", style="dim") # Inform the agent synchronously (in async context) try: await agent.aupdate_state( config=config, values={"messages": [HumanMessage(content="[사용자가 Ctrl+C로 이전 요청을 중단했습니다]")]}, ) console.print("다음 명령 준비 완료.\n", style="dim") except Exception as e: console.print(f"[red]경고: 에이전트 상태 업데이트 실패: {e}[/red]\n") return if spinner_active: status.stop() if has_responded: console.print() # Track token usage (display only via /tokens command) if token_tracker and (captured_input_tokens or captured_output_tokens): token_tracker.add(captured_input_tokens, captured_output_tokens)