|
|
import io
|
|
|
import base64
|
|
|
from dataclasses import dataclass, field
|
|
|
from typing import Any, List, Tuple, Dict
|
|
|
from PIL import Image
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
class LLMMessage:
|
|
|
content: str | List[Dict[str, Any]]
|
|
|
source: str = "user"
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
class SystemMessage(LLMMessage):
|
|
|
def __init__(self, content: str, source: str = "system"):
|
|
|
self.content = content
|
|
|
self.source = source
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
class UserMessage(LLMMessage):
|
|
|
def __init__(
|
|
|
self,
|
|
|
content: str | List[Dict[str, Any]],
|
|
|
source: str = "user",
|
|
|
is_original: bool = False,
|
|
|
):
|
|
|
self.content = content
|
|
|
self.source = source
|
|
|
self.is_original = is_original
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
class AssistantMessage(LLMMessage):
|
|
|
def __init__(self, content: str, source: str = "assistant"):
|
|
|
self.content = content
|
|
|
self.source = source
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
class ImageObj:
|
|
|
"""Image wrapper for handling screenshots and images"""
|
|
|
|
|
|
image: Image.Image
|
|
|
|
|
|
@classmethod
|
|
|
def from_pil(cls, image: Image.Image) -> "ImageObj":
|
|
|
return cls(image=image)
|
|
|
|
|
|
def to_base64(self) -> str:
|
|
|
"""Convert PIL image to base64 string"""
|
|
|
buffered = io.BytesIO()
|
|
|
self.image.save(buffered, format="PNG")
|
|
|
return base64.b64encode(buffered.getvalue()).decode("utf-8")
|
|
|
|
|
|
def resize(self, size: Tuple[int, int]) -> Image.Image:
|
|
|
"""Resize the image"""
|
|
|
return self.image.resize(size)
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
class ModelResponse:
|
|
|
"""Response from model call"""
|
|
|
|
|
|
content: str
|
|
|
usage: Dict[str, Any] = field(default_factory=dict)
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
class FunctionCall:
|
|
|
"""Represents a function call with arguments"""
|
|
|
|
|
|
id: str
|
|
|
name: str
|
|
|
arguments: Dict[str, Any]
|
|
|
|
|
|
|
|
|
def message_to_openai_format(message: LLMMessage) -> Dict[str, Any]:
|
|
|
"""Convert our LLMMessage to OpenAI API format"""
|
|
|
role = (
|
|
|
"system"
|
|
|
if isinstance(message, SystemMessage)
|
|
|
else "assistant"
|
|
|
if isinstance(message, AssistantMessage)
|
|
|
else "user"
|
|
|
)
|
|
|
|
|
|
|
|
|
if isinstance(message.content, list):
|
|
|
content_parts = []
|
|
|
for item in message.content:
|
|
|
if isinstance(item, ImageObj):
|
|
|
|
|
|
base64_image = item.to_base64()
|
|
|
content_parts.append(
|
|
|
{
|
|
|
"type": "image_url",
|
|
|
"image_url": {"url": f"data:image/png;base64,{base64_image}"},
|
|
|
}
|
|
|
)
|
|
|
elif isinstance(item, str):
|
|
|
content_parts.append({"type": "text", "text": item})
|
|
|
elif isinstance(item, dict):
|
|
|
|
|
|
content_parts.append(item)
|
|
|
return {"role": role, "content": content_parts}
|
|
|
else:
|
|
|
|
|
|
return {"role": role, "content": message.content}
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
class WebSurferEvent:
|
|
|
source: str
|
|
|
message: str
|
|
|
url: str
|
|
|
action: str | None = None
|
|
|
arguments: Dict[str, Any] | None = None
|
|
|
|