- Context_Engineering.md: 에이전트 컨텍스트 엔지니어링 개념 정리 문서 추가 - Context_Engineering_Research.ipynb: 연구 노트북 업데이트 - deepagents_sourcecode/: docstring과 주석을 한국어로 번역
218 lines
6.5 KiB
Python
218 lines
6.5 KiB
Python
"""클립보드에서 이미지 붙여넣기(paste)를 처리하는 유틸리티입니다.
|
|
|
|
Utilities for handling image paste from clipboard.
|
|
"""
|
|
|
|
import base64
|
|
import contextlib
|
|
import io
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
|
|
from PIL import Image, UnidentifiedImageError
|
|
|
|
|
|
@dataclass
|
|
class ImageData:
|
|
"""Represents a pasted image with its base64 encoding."""
|
|
|
|
base64_data: str
|
|
format: str # "png", "jpeg", etc.
|
|
placeholder: str # Display text like "[image 1]"
|
|
|
|
def to_message_content(self) -> dict:
|
|
"""Convert to LangChain message content format.
|
|
|
|
Returns:
|
|
Dict with type and image_url for multimodal messages
|
|
"""
|
|
return {
|
|
"type": "image_url",
|
|
"image_url": {"url": f"data:image/{self.format};base64,{self.base64_data}"},
|
|
}
|
|
|
|
|
|
def get_clipboard_image() -> ImageData | None:
|
|
"""Attempt to read an image from the system clipboard.
|
|
|
|
Supports macOS via `pngpaste` or `osascript`.
|
|
|
|
Returns:
|
|
ImageData if an image is found, None otherwise
|
|
"""
|
|
if sys.platform == "darwin":
|
|
return _get_macos_clipboard_image()
|
|
# Linux/Windows support could be added here
|
|
return None
|
|
|
|
|
|
def _get_macos_clipboard_image() -> ImageData | None:
|
|
"""Get clipboard image on macOS using pngpaste or osascript.
|
|
|
|
First tries pngpaste (faster if installed), then falls back to osascript.
|
|
|
|
Returns:
|
|
ImageData if an image is found, None otherwise
|
|
"""
|
|
# Try pngpaste first (fast if installed)
|
|
pngpaste_path = shutil.which("pngpaste")
|
|
if pngpaste_path:
|
|
try:
|
|
result = subprocess.run( # noqa: S603
|
|
[pngpaste_path, "-"],
|
|
capture_output=True,
|
|
check=False,
|
|
timeout=2,
|
|
)
|
|
if result.returncode == 0 and result.stdout:
|
|
# Successfully got PNG data
|
|
try:
|
|
Image.open(io.BytesIO(result.stdout)) # Validate it's a real image
|
|
except (UnidentifiedImageError, OSError):
|
|
pass # Invalid image data
|
|
else:
|
|
base64_data = base64.b64encode(result.stdout).decode("utf-8")
|
|
return ImageData(
|
|
base64_data=base64_data,
|
|
format="png", # 'pngpaste -' always outputs PNG
|
|
placeholder="[image]",
|
|
)
|
|
except subprocess.TimeoutExpired:
|
|
pass # pngpaste timed out
|
|
|
|
# Fallback to osascript with temp file (built-in but slower)
|
|
return _get_clipboard_via_osascript()
|
|
|
|
|
|
def _get_clipboard_via_osascript() -> ImageData | None: # noqa: PLR0911
|
|
"""Get clipboard image via osascript using a temp file.
|
|
|
|
osascript outputs data in a special format that can't be captured as raw binary,
|
|
so we write to a temp file instead.
|
|
|
|
Returns:
|
|
ImageData if an image is found, None otherwise
|
|
"""
|
|
osascript_path = shutil.which("osascript")
|
|
if not osascript_path:
|
|
return None
|
|
|
|
# Create a temp file for the image
|
|
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
|
|
temp_file = Path(tmp.name)
|
|
|
|
try:
|
|
# First check if clipboard has PNG data
|
|
check_result = subprocess.run( # noqa: S603
|
|
[osascript_path, "-e", "clipboard info"],
|
|
capture_output=True,
|
|
check=False,
|
|
timeout=2,
|
|
text=True,
|
|
)
|
|
|
|
if check_result.returncode != 0:
|
|
return None
|
|
|
|
# Check for PNG or TIFF in clipboard info
|
|
clipboard_info = check_result.stdout.lower()
|
|
if "pngf" not in clipboard_info and "tiff" not in clipboard_info:
|
|
return None
|
|
|
|
# Try to get PNG first, fall back to TIFF
|
|
if "pngf" in clipboard_info:
|
|
get_script = f"""
|
|
set pngData to the clipboard as «class PNGf»
|
|
set theFile to open for access POSIX file "{temp_file.as_posix()}" with write permission
|
|
write pngData to theFile
|
|
close access theFile
|
|
return "success"
|
|
"""
|
|
else:
|
|
get_script = f"""
|
|
set tiffData to the clipboard as TIFF picture
|
|
set theFile to open for access POSIX file "{temp_file.as_posix()}" with write permission
|
|
write tiffData to theFile
|
|
close access theFile
|
|
return "success"
|
|
"""
|
|
|
|
result = subprocess.run( # noqa: S603
|
|
[osascript_path, "-e", get_script],
|
|
capture_output=True,
|
|
check=False,
|
|
timeout=3,
|
|
text=True,
|
|
)
|
|
|
|
if result.returncode != 0 or "success" not in result.stdout:
|
|
return None
|
|
|
|
# Check if file was created and has content
|
|
if not temp_file.exists() or temp_file.stat().st_size == 0:
|
|
return None
|
|
|
|
# Read and validate the image
|
|
image_data = temp_file.read_bytes()
|
|
|
|
try:
|
|
image = Image.open(io.BytesIO(image_data))
|
|
# Convert to PNG if it's not already (e.g., if we got TIFF)
|
|
buffer = io.BytesIO()
|
|
image.save(buffer, format="PNG")
|
|
buffer.seek(0)
|
|
base64_data = base64.b64encode(buffer.getvalue()).decode("utf-8")
|
|
|
|
return ImageData(
|
|
base64_data=base64_data,
|
|
format="png",
|
|
placeholder="[image]",
|
|
)
|
|
except (UnidentifiedImageError, OSError):
|
|
return None
|
|
|
|
except (subprocess.TimeoutExpired, OSError):
|
|
return None
|
|
finally:
|
|
# Clean up temp file
|
|
with contextlib.suppress(OSError):
|
|
temp_file.unlink()
|
|
|
|
|
|
def encode_image_to_base64(image_bytes: bytes) -> str:
|
|
"""Encode image bytes to base64 string.
|
|
|
|
Args:
|
|
image_bytes: Raw image bytes
|
|
|
|
Returns:
|
|
Base64-encoded string
|
|
"""
|
|
return base64.b64encode(image_bytes).decode("utf-8")
|
|
|
|
|
|
def create_multimodal_content(text: str, images: list[ImageData]) -> list[dict]:
|
|
"""Create multimodal message content with text and images.
|
|
|
|
Args:
|
|
text: Text content of the message
|
|
images: List of ImageData objects
|
|
|
|
Returns:
|
|
List of content blocks in LangChain format
|
|
"""
|
|
content_blocks = []
|
|
|
|
# Add text block
|
|
if text.strip():
|
|
content_blocks.append({"type": "text", "text": text})
|
|
|
|
# Add image blocks
|
|
content_blocks.extend([image.to_message_content() for image in images])
|
|
|
|
return content_blocks
|