Files
deepagent/deepagents_sourcecode/libs/deepagents-cli/deepagents_cli/image_utils.py
HyunjunJeon af5fbfabec 문서 추가: Context Engineering 문서 추가 및 deepagents_sourcecode 한국어 번역
- Context_Engineering.md: 에이전트 컨텍스트 엔지니어링 개념 정리 문서 추가
- Context_Engineering_Research.ipynb: 연구 노트북 업데이트
- deepagents_sourcecode/: docstring과 주석을 한국어로 번역
2026-01-11 17:55:52 +09:00

218 lines
6.5 KiB
Python

"""클립보드에서 이미지 붙여넣기(paste)를 처리하는 유틸리티입니다.
Utilities for handling image paste from clipboard.
"""
import base64
import contextlib
import io
import shutil
import subprocess
import sys
import tempfile
from dataclasses import dataclass
from pathlib import Path
from PIL import Image, UnidentifiedImageError
@dataclass
class ImageData:
"""Represents a pasted image with its base64 encoding."""
base64_data: str
format: str # "png", "jpeg", etc.
placeholder: str # Display text like "[image 1]"
def to_message_content(self) -> dict:
"""Convert to LangChain message content format.
Returns:
Dict with type and image_url for multimodal messages
"""
return {
"type": "image_url",
"image_url": {"url": f"data:image/{self.format};base64,{self.base64_data}"},
}
def get_clipboard_image() -> ImageData | None:
"""Attempt to read an image from the system clipboard.
Supports macOS via `pngpaste` or `osascript`.
Returns:
ImageData if an image is found, None otherwise
"""
if sys.platform == "darwin":
return _get_macos_clipboard_image()
# Linux/Windows support could be added here
return None
def _get_macos_clipboard_image() -> ImageData | None:
"""Get clipboard image on macOS using pngpaste or osascript.
First tries pngpaste (faster if installed), then falls back to osascript.
Returns:
ImageData if an image is found, None otherwise
"""
# Try pngpaste first (fast if installed)
pngpaste_path = shutil.which("pngpaste")
if pngpaste_path:
try:
result = subprocess.run( # noqa: S603
[pngpaste_path, "-"],
capture_output=True,
check=False,
timeout=2,
)
if result.returncode == 0 and result.stdout:
# Successfully got PNG data
try:
Image.open(io.BytesIO(result.stdout)) # Validate it's a real image
except (UnidentifiedImageError, OSError):
pass # Invalid image data
else:
base64_data = base64.b64encode(result.stdout).decode("utf-8")
return ImageData(
base64_data=base64_data,
format="png", # 'pngpaste -' always outputs PNG
placeholder="[image]",
)
except subprocess.TimeoutExpired:
pass # pngpaste timed out
# Fallback to osascript with temp file (built-in but slower)
return _get_clipboard_via_osascript()
def _get_clipboard_via_osascript() -> ImageData | None: # noqa: PLR0911
"""Get clipboard image via osascript using a temp file.
osascript outputs data in a special format that can't be captured as raw binary,
so we write to a temp file instead.
Returns:
ImageData if an image is found, None otherwise
"""
osascript_path = shutil.which("osascript")
if not osascript_path:
return None
# Create a temp file for the image
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
temp_file = Path(tmp.name)
try:
# First check if clipboard has PNG data
check_result = subprocess.run( # noqa: S603
[osascript_path, "-e", "clipboard info"],
capture_output=True,
check=False,
timeout=2,
text=True,
)
if check_result.returncode != 0:
return None
# Check for PNG or TIFF in clipboard info
clipboard_info = check_result.stdout.lower()
if "pngf" not in clipboard_info and "tiff" not in clipboard_info:
return None
# Try to get PNG first, fall back to TIFF
if "pngf" in clipboard_info:
get_script = f"""
set pngData to the clipboard as «class PNGf»
set theFile to open for access POSIX file "{temp_file.as_posix()}" with write permission
write pngData to theFile
close access theFile
return "success"
"""
else:
get_script = f"""
set tiffData to the clipboard as TIFF picture
set theFile to open for access POSIX file "{temp_file.as_posix()}" with write permission
write tiffData to theFile
close access theFile
return "success"
"""
result = subprocess.run( # noqa: S603
[osascript_path, "-e", get_script],
capture_output=True,
check=False,
timeout=3,
text=True,
)
if result.returncode != 0 or "success" not in result.stdout:
return None
# Check if file was created and has content
if not temp_file.exists() or temp_file.stat().st_size == 0:
return None
# Read and validate the image
image_data = temp_file.read_bytes()
try:
image = Image.open(io.BytesIO(image_data))
# Convert to PNG if it's not already (e.g., if we got TIFF)
buffer = io.BytesIO()
image.save(buffer, format="PNG")
buffer.seek(0)
base64_data = base64.b64encode(buffer.getvalue()).decode("utf-8")
return ImageData(
base64_data=base64_data,
format="png",
placeholder="[image]",
)
except (UnidentifiedImageError, OSError):
return None
except (subprocess.TimeoutExpired, OSError):
return None
finally:
# Clean up temp file
with contextlib.suppress(OSError):
temp_file.unlink()
def encode_image_to_base64(image_bytes: bytes) -> str:
"""Encode image bytes to base64 string.
Args:
image_bytes: Raw image bytes
Returns:
Base64-encoded string
"""
return base64.b64encode(image_bytes).decode("utf-8")
def create_multimodal_content(text: str, images: list[ImageData]) -> list[dict]:
"""Create multimodal message content with text and images.
Args:
text: Text content of the message
images: List of ImageData objects
Returns:
List of content blocks in LangChain format
"""
content_blocks = []
# Add text block
if text.strip():
content_blocks.append({"type": "text", "text": text})
# Add image blocks
content_blocks.extend([image.to_message_content() for image in images])
return content_blocks