File size: 24,393 Bytes
7fcdb70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
import ast
import asyncio
import io
import json
import logging
import os
from typing import Any, Dict, List, Tuple
from urllib.parse import quote_plus

from openai import AsyncOpenAI
from PIL import Image
from playwright.async_api import BrowserContext, Download, Page
from tenacity import before_sleep_log, retry, stop_after_attempt, wait_exponential

from ._prompts import get_computer_use_system_prompt
from .browser.playwright_controller import PlaywrightController
from .types import (
    AssistantMessage,
    FunctionCall,
    ImageObj,
    LLMMessage,
    ModelResponse,
    SystemMessage,
    UserMessage,
    WebSurferEvent,
    message_to_openai_format,
)
from .utils import get_trimmed_url


class FaraAgent:
    DEFAULT_START_PAGE = "https://www.bing.com/"

    MLM_PROCESSOR_IM_CFG = {
        "min_pixels": 3136,
        "max_pixels": 12845056,
        "patch_size": 14,
        "merge_size": 2,
    }

    SCREENSHOT_TOKENS = 1105
    USER_MESSAGE = "Here is the next screenshot. Think about what to do next."
    MAX_URL_LENGTH = 100

    def __init__(

        self,

        browser_manager: Any,

        client_config: dict,

        downloads_folder: str | None = None,

        start_page: str | None = "about:blank",

        animate_actions: bool = False,

        single_tab_mode: bool = True,

        max_n_images: int = 3,

        fn_call_template: str = "default",

        model_call_timeout: int = 20,

        max_rounds: int = 10,

        save_screenshots: bool = False,

        logger: logging.Logger | None = None,

    ):
        self.downloads_folder = downloads_folder
        if not os.path.exists(self.downloads_folder or "") and self.downloads_folder:
            os.makedirs(self.downloads_folder)
        self.single_tab_mode = single_tab_mode
        self.start_page = start_page or self.DEFAULT_START_PAGE
        self.animate_actions = animate_actions
        self.browser_manager = browser_manager
        self.client_config = client_config
        self.max_n_images = max_n_images
        self.fn_call_template = fn_call_template
        self.model_call_timeout = model_call_timeout
        self.max_rounds = max_rounds
        self.max_url_chars = self.MAX_URL_LENGTH
        if save_screenshots and self.downloads_folder is None:
            assert False, "downloads_folder must be set if save_screenshots is True"
        self.save_screenshots = save_screenshots
        self._facts = []
        self._task_summary = None
        self._num_actions = 0
        self.logger = logger or logging.getLogger(__name__)
        self._mlm_width = 1440
        self._mlm_height = 900
        self.viewport_height = 900
        self.viewport_width = 1440
        self.include_input_text_key_args = True

        def _download_handler(download: Download) -> None:
            self._last_download = download

        self._download_handler = _download_handler
        self.did_initialize = False

        # OpenAI client will be initialized in initialize()
        self._openai_client: AsyncOpenAI | None = None
        self._chat_history: List[LLMMessage] = []

    async def initialize(self) -> None:
        if self.did_initialize:
            return
        self._last_download = None
        self._prior_metadata_hash = None

        # Initialize OpenAI client
        self._openai_client = AsyncOpenAI(
            api_key=self.client_config.get("api_key"),
            base_url=self.client_config.get("base_url"),
            default_headers=self.client_config.get("default_headers"),
        )

        # Set up download handler
        self.browser_manager.set_download_handler(self._download_handler)

        # Initialize browser
        await self.browser_manager.init(self.start_page)
        self.did_initialize = True

    @property
    def _page(self) -> Page | None:
        """Get the current page from browser manager."""
        return self.browser_manager.page if self.browser_manager else None

    @_page.setter
    def _page(self, value):
        if self.browser_manager:
            self.browser_manager.page = value
        else:
            raise ValueError("Browser manager is not initialized. Cannot set page.")

    @property
    def context(self) -> BrowserContext | None:
        """Get the browser context from browser manager."""
        return self.browser_manager.context if self.browser_manager else None

    @property
    def _playwright_controller(self) -> PlaywrightController | None:
        """Get the playwright controller from browser manager."""
        return (
            self.browser_manager.playwright_controller if self.browser_manager else None
        )

    async def wait_for_captcha_with_timeout(

        self, timeout_seconds=300

    ):  # 5 minutes default
        """Wait for captcha to be solved with timeout"""
        try:
            await asyncio.wait_for(
                self.browser_manager.wait_for_captcha_resolution(),
                timeout=timeout_seconds,
            )
            return True  # Captcha solved in time
        except asyncio.TimeoutError:
            self.logger.warning(f"Captcha timeout after {timeout_seconds} seconds!")
            # Force resume execution
            self.browser_manager._captcha_event.set()
            return False  # Captcha timed out

    @retry(

        stop=stop_after_attempt(5),

        wait=wait_exponential(multiplier=5.0, min=5.0, max=60),

        before_sleep=before_sleep_log(logging.getLogger(__name__), logging.WARNING),

        reraise=True,

    )
    async def _make_model_call(

        self,

        history: List[LLMMessage],

        extra_create_args: Dict[str, Any] | None = None,

    ) -> ModelResponse:
        """Make a model call using OpenAI client"""
        openai_messages = [message_to_openai_format(msg) for msg in history]
        request_params = {
            "model": self.client_config.get("model", "gpt-4o"),
            "messages": openai_messages,
        }
        if extra_create_args:
            request_params.update(extra_create_args)

        response = await self._openai_client.chat.completions.create(**request_params)
        content = response.choices[0].message.content
        usage = {}
        if response.usage:
            usage = {
                "prompt_tokens": response.usage.prompt_tokens,
                "completion_tokens": response.usage.completion_tokens,
                "total_tokens": response.usage.total_tokens,
            }
        return ModelResponse(content=content, usage=usage)

    def remove_screenshot_from_message(self, msg: List[Dict[str, Any]] | Any) -> Any:
        """Remove the screenshot from the message content."""
        if isinstance(msg.content, list):
            new_content = []
            for c in msg.content:
                if not isinstance(c, ImageObj):
                    new_content.append(c)
            msg.content = new_content
        elif isinstance(msg.content, ImageObj):
            msg = None
        return msg

    def maybe_remove_old_screenshots(

        self, history: List[LLMMessage], includes_current: bool = False

    ) -> List[LLMMessage]:
        """Remove old screenshots from the chat history. Assuming we have not yet added the current screenshot message.



        Note: Original user messages (marked with is_original=True) have their TEXT preserved,

        but their images may be removed if we exceed max_n_images. Boilerplate messages can be

        completely removed.

        """
        if self.max_n_images <= 0:
            return history

        max_n_images = self.max_n_images if includes_current else self.max_n_images - 1
        new_history: List[LLMMessage] = []
        n_images = 0
        for i in range(len(history) - 1, -1, -1):
            msg = history[i]

            is_original_user_message = isinstance(msg, UserMessage) and getattr(
                msg, "is_original", False
            )

            if i == 0 and n_images >= max_n_images:
                # First message is always the task so we keep it and remove the screenshot if necessary
                msg = self.remove_screenshot_from_message(msg)
                if msg is None:
                    continue

            if isinstance(msg.content, list):
                # Check if the message contains an image. Assumes 1 image per message.
                has_image = False
                for c in msg.content:
                    if isinstance(c, ImageObj):
                        has_image = True
                        break
                if has_image:
                    if n_images < max_n_images:
                        new_history.append(msg)
                    elif is_original_user_message:
                        # Original user message but over limit: keep text, remove image
                        msg = self.remove_screenshot_from_message(msg)
                        if msg is not None:
                            new_history.append(msg)
                    n_images += 1
                else:
                    new_history.append(msg)
            elif isinstance(msg.content, ImageObj):
                if n_images < max_n_images:
                    new_history.append(msg)
                n_images += 1
            else:
                new_history.append(msg)

        new_history = new_history[::-1]

        return new_history

    async def _get_scaled_screenshot(self) -> Image.Image:
        """Get current screenshot and scale it for the model."""
        screenshot = await self._playwright_controller.get_screenshot(self._page)
        screenshot = Image.open(io.BytesIO(screenshot))
        _, scaled_screenshot = self._get_system_message(screenshot)
        return scaled_screenshot

    def _get_system_message(

        self, screenshot: ImageObj | Image.Image

    ) -> Tuple[List[SystemMessage], Image.Image]:
        system_prompt_info = get_computer_use_system_prompt(
            screenshot,
            self.MLM_PROCESSOR_IM_CFG,
            include_input_text_key_args=self.include_input_text_key_args,
            fn_call_template=self.fn_call_template,
        )
        self._mlm_width, self._mlm_height = system_prompt_info["im_size"]
        scaled_screenshot = screenshot.resize((self._mlm_width, self._mlm_height))

        system_message = []
        for msg in system_prompt_info["conversation"]:
            tmp_content = ""
            for content in msg["content"]:
                tmp_content += content["text"]

            system_message.append(SystemMessage(content=tmp_content))

        return system_message, scaled_screenshot

    def _parse_thoughts_and_action(self, message: str) -> Tuple[str, Dict[str, Any]]:
        try:
            tmp = message.split("<tool_call>\n")
            thoughts = tmp[0].strip()
            action_text = tmp[1].split("\n</tool_call>")[0]
            try:
                action = json.loads(action_text)
            except json.decoder.JSONDecodeError:
                self.logger.error(f"Invalid action text: {action_text}")
                action = ast.literal_eval(action_text)

            return thoughts, action
        except Exception as e:
            self.logger.error(
                f"Error parsing thoughts and action: {message}", exc_info=True
            )
            raise e

    def convert_resized_coords_to_original(

        self, coords: List[float], rsz_w: int, rsz_h: int, og_w: int, og_h: int

    ) -> List[float]:
        scale_x = og_w / rsz_w
        scale_y = og_h / rsz_h
        return [coords[0] * scale_x, coords[1] * scale_y]

    def proc_coords(

        self,

        coords: List[float] | None,

        im_w: int,

        im_h: int,

        og_im_w: int | None = None,

        og_im_h: int | None = None,

    ) -> List[float] | None:
        if not coords:
            return coords

        if og_im_w is None:
            og_im_w = im_w
        if og_im_h is None:
            og_im_h = im_h

        tgt_x, tgt_y = coords
        return self.convert_resized_coords_to_original(
            [tgt_x, tgt_y], im_w, im_h, og_im_w, og_im_h
        )

    async def run(self, user_message: str) -> Tuple:
        """Run the agent with a user message."""
        # Initialize if not already done
        await self.initialize()

        # Ensure page is ready after initialization
        assert self._page is not None, "Page should be initialized"

        # Get initial screenshot and add user message with image to chat history
        scaled_screenshot = await self._get_scaled_screenshot()

        if self.save_screenshots:
            await self._playwright_controller.get_screenshot(
                self._page,
                path=os.path.join(
                    self.downloads_folder, f"screenshot{self._num_actions}.png"
                ),
            )

        self._chat_history.append(
            UserMessage(
                content=[ImageObj.from_pil(scaled_screenshot), user_message],
                is_original=True,
            )
        )

        all_actions = []
        all_observations = []
        final_answer = "<no_answer>"
        is_stop_action = False
        for i in range(self.max_rounds):
            is_first_round = i == 0
            if not self.browser_manager._captcha_event.is_set():
                self.logger.info("Waiting 60s for captcha to finish...")
                captcha_solved = await self.wait_for_captcha_with_timeout(60)
                if (
                    not captcha_solved
                    and not self.browser_manager._captcha_event.is_set()
                ):
                    raise RuntimeError(
                        "Captcha timed out, unable to proceed with web surfing."
                    )

            function_call, raw_response = await self.generate_model_call(
                is_first_round, scaled_screenshot if is_first_round else None
            )
            assert isinstance(raw_response, str)
            all_actions.append(raw_response)
            thoughts, action_dict = self._parse_thoughts_and_action(raw_response)
            action_args = action_dict.get("arguments", {})
            action = action_args["action"]
            self.logger.info(
                f"\nThought #{i + 1}: {thoughts}\nAction #{i + 1}: executing tool '{action}' with arguments {json.dumps(action_args)}"
            )

            (
                is_stop_action,
                new_screenshot,
                action_description,
            ) = await self.execute_action(function_call)
            all_observations.append(action_description)
            self.logger.info(f"Observation#{i + 1}: {action_description}")
            if is_stop_action:
                final_answer = thoughts
                break
        return final_answer, all_actions, all_observations

    async def generate_model_call(

        self, is_first_round: bool, first_screenshot: Image.Image | None = None

    ) -> Tuple[List[FunctionCall], str]:
        history = self.maybe_remove_old_screenshots(self._chat_history)

        screenshot_for_system = first_screenshot
        if not is_first_round:
            # Get screenshot and add new user message for subsequent rounds
            scaled_screenshot = await self._get_scaled_screenshot()
            screenshot_for_system = scaled_screenshot

            text_prompt = self.USER_MESSAGE
            curr_url = await self._playwright_controller.get_page_url(self._page)
            trimmed_url = get_trimmed_url(curr_url, max_len=self.max_url_chars)
            text_prompt = f"Current URL: {trimmed_url}\n" + text_prompt

            curr_message = UserMessage(
                content=[ImageObj.from_pil(scaled_screenshot), text_prompt]
            )
            self._chat_history.append(curr_message)
            history.append(curr_message)

        # Generate system message using the screenshot
        system_message, _ = self._get_system_message(screenshot_for_system)
        history = system_message + history
        response = await self._make_model_call(
            history, extra_create_args={"temperature": 0}
        )
        message = response.content

        self._chat_history.append(AssistantMessage(content=message))
        thoughts, action = self._parse_thoughts_and_action(message)
        action["arguments"]["thoughts"] = thoughts

        function_call = [FunctionCall(id="dummy", **action)]
        return function_call, message

    async def execute_action(

        self,

        function_call: List[FunctionCall],

    ) -> Tuple[bool, bytes, str]:
        name = function_call[0].name
        args = function_call[0].arguments
        action_description = ""
        assert self._page is not None
        self.logger.debug(
            WebSurferEvent(
                source="FaraAgent",
                url=await self._playwright_controller.get_page_url(self._page),
                action=name,
                arguments=args,
                message=f"{name}( {json.dumps(args)} )",
            )
        )
        if "coordinate" in args:
            args["coordinate"] = self.proc_coords(
                args["coordinate"],
                self._mlm_width,
                self._mlm_height,
                self.viewport_width,
                self.viewport_height,
            )

        is_stop_action = False

        if args["action"] == "visit_url":
            url = str(args["url"])
            action_description = f"I typed '{url}' into the browser address bar."
            # Check if the argument starts with a known protocol
            if url.startswith(("https://", "http://", "file://", "about:")):
                (
                    reset_prior_metadata,
                    reset_last_download,
                ) = await self._playwright_controller.visit_page(self._page, url)
            # If the argument contains a space, treat it as a search query
            elif " " in url:
                (
                    reset_prior_metadata,
                    reset_last_download,
                ) = await self._playwright_controller.visit_page(
                    self._page,
                    f"https://www.bing.com/search?q={quote_plus(url)}&FORM=QBLH",
                )
            # Otherwise, prefix with https://
            else:
                (
                    reset_prior_metadata,
                    reset_last_download,
                ) = await self._playwright_controller.visit_page(
                    self._page, "https://" + url
                )
            if reset_last_download and self._last_download is not None:
                self._last_download = None
            if reset_prior_metadata and self._prior_metadata_hash is not None:
                self._prior_metadata_hash = None
        elif args["action"] == "history_back":
            action_description = "I clicked the browser back button."
            await self._playwright_controller.back(self._page)
        elif args["action"] == "web_search":
            query = args.get("query")
            action_description = f"I typed '{query}' into the browser search bar."
            encoded_query = quote_plus(query)
            (
                reset_prior_metadata,
                reset_last_download,
            ) = await self._playwright_controller.visit_page(
                self._page, f"https://www.bing.com/search?q={encoded_query}&FORM=QBLH"
            )
            if reset_last_download and self._last_download is not None:
                self._last_download = None
            if reset_prior_metadata and self._prior_metadata_hash is not None:
                self._prior_metadata_hash = None
        elif args["action"] == "scroll":
            pixels = int(args.get("pixels", 0))
            if pixels > 0:
                action_description = "I scrolled up one page in the browser."
                await self._playwright_controller.page_up(self._page)
            elif pixels < 0:
                action_description = "I scrolled down one page in the browser."
                await self._playwright_controller.page_down(self._page)
        elif args["action"] == "keypress" or args["action"] == "key":
            keys = args.get("keys", [])
            action_description = f"I pressed the following keys: {keys}"
            await self._playwright_controller.keypress(self._page, keys)
        elif args["action"] == "hover" or args["action"] == "mouse_move":
            if "coordinate" in args:
                tgt_x, tgt_y = args["coordinate"]
                await self._playwright_controller.hover_coords(self._page, tgt_x, tgt_y)

        elif args["action"] == "sleep" or args["action"] == "wait":
            duration = args.get("duration", 3.0)
            duration = args.get("time", duration)
            action_description = (
                "I am waiting a short period of time before taking further action."
            )
            await self._playwright_controller.sleep(self._page, duration)
        elif args["action"] == "click" or args["action"] == "left_click":
            if "coordinate" in args:
                tgt_x, tgt_y = args["coordinate"]
                action_description = f"I clicked at coordinates ({tgt_x}, {tgt_y})."
                new_page_tentative = await self._playwright_controller.click_coords(
                    self._page, tgt_x, tgt_y
                )

            if new_page_tentative is not None:
                self._page = new_page_tentative
                self._prior_metadata_hash = None

        elif args["action"] == "input_text" or args["action"] == "type":
            text_value = str(args.get("text", args.get("text_value")))
            action_description = f"I typed '{text_value}'."
            press_enter = args.get("press_enter", True)
            delete_existing_text = args.get("delete_existing_text", False)

            if "coordinate" in args:
                tgt_x, tgt_y = args["coordinate"]
                new_page_tentative = await self._playwright_controller.fill_coords(
                    self._page,
                    tgt_x,
                    tgt_y,
                    text_value,
                    press_enter=press_enter,
                    delete_existing_text=delete_existing_text,
                )
                if new_page_tentative is not None:
                    self._page = new_page_tentative
                    self._prior_metadata_hash = None

        elif args["action"] == "pause_and_memorize_fact":
            fact = str(args.get("fact"))
            self._facts.append(fact)
            action_description = f"I memorized the following fact: {fact}"
        elif args["action"] == "stop" or args["action"] == "terminate":
            action_description = args.get("thoughts")
            is_stop_action = True

        else:
            raise ValueError(f"Unknown tool: {args['action']}")

        await self._playwright_controller.wait_for_load_state(self._page)
        await self._playwright_controller.sleep(self._page, 3)

        # Get new screenshot after action
        self._num_actions += 1
        if self.save_screenshots:
            new_screenshot = await self._playwright_controller.get_screenshot(
                self._page,
                path=os.path.join(
                    self.downloads_folder, f"screenshot{self._num_actions}.png"
                ),
            )
        else:
            new_screenshot = await self._playwright_controller.get_screenshot(
                self._page
            )
        return is_stop_action, new_screenshot, action_description

    async def close(self) -> None:
        """

        Close the browser and the page.

        Should be called when the agent is no longer needed.

        """
        if self._page is not None:
            self._page = None
        await self.browser_manager.close()