File size: 5,792 Bytes
7fcdb70 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 |
import asyncio
import argparse
import os
from fara import FaraAgent
from fara.browser.browser_bb import BrowserBB
import logging
from typing import Dict
from pathlib import Path
import json
# Configure logging to only show logs from fara.fara_agent
logging.basicConfig(
level=logging.CRITICAL,
format="%(message)s",
)
# Enable INFO level only for fara.fara_agent
fara_agent_logger = logging.getLogger("fara.fara_agent")
fara_agent_logger.setLevel(logging.INFO)
# Add a handler to ensure fara_agent logs are shown
handler = logging.StreamHandler()
handler.setLevel(logging.INFO)
handler.setFormatter(logging.Formatter("%(message)s"))
fara_agent_logger.addHandler(handler)
fara_agent_logger.propagate = False # Don't propagate to root logger
logger = logging.getLogger(__name__)
DEFAULT_ENDPOINT_CONFIG = {
"model": "microsoft/Fara-7B",
"base_url": "http://localhost:5000/v1",
"api_key": "not-needed",
}
async def run_fara_agent(
initial_task: str = None,
endpoint_config: Dict[str, str] = None,
start_page: str = "https://www.bing.com/",
headless: bool = True,
downloads_folder: str = None,
save_screenshots: bool = True,
max_rounds: int = 100,
use_browser_base: bool = False,
):
# Initialize browser manager
print("Initializing Browser...")
browser_manager = BrowserBB(
headless=headless,
viewport_height=900,
viewport_width=1440,
page_script_path=None,
browser_channel="firefox",
browser_data_dir=None,
downloads_folder=downloads_folder,
to_resize_viewport=True,
single_tab_mode=True,
animate_actions=False,
use_browser_base=use_browser_base,
logger=logger,
)
print("Browser Running... Starting Fara Agent...")
agent = FaraAgent(
browser_manager=browser_manager,
client_config=endpoint_config,
start_page=start_page,
downloads_folder=downloads_folder,
save_screenshots=save_screenshots,
max_rounds=max_rounds,
)
try:
await agent.initialize()
# Interactive loop
task = initial_task
first_round = True
while True:
if task is None:
if first_round:
task = input("Enter task: ").strip()
else:
task = input(
"\nEnter another task (or press Enter to exit): "
).strip()
if not task:
print("Exiting...")
break
print("##########################################")
print(f"Task: {task}")
print("##########################################")
try:
print("Running Fara...\n")
final_answer, all_actions, all_observations = await agent.run(task)
print(f"\nFinal Answer: {final_answer}")
except Exception as e:
print(f"Error occurred: {e}")
task = None
first_round = False
finally:
# Close the agent and browser
await agent.close()
def main():
"""CLI entry point for fara command."""
parser = argparse.ArgumentParser(description="Run FARA agent interactively")
parser.add_argument(
"--task",
type=str,
required=False,
help="Initial task for the FARA agent (optional)",
)
parser.add_argument(
"--start_page",
type=str,
default="https://www.bing.com/",
help="The starting page",
)
parser.add_argument(
"--headful",
action="store_true",
help="Run the browser in headful mode (show GUI, default is headless)",
)
parser.add_argument(
"--downloads_folder",
type=str,
default=None,
help="Folder to save screenshots and downloads",
)
parser.add_argument(
"--save_screenshots",
action="store_true",
help="Whether to save screenshots during the agent's operation",
)
parser.add_argument(
"--max_rounds",
type=int,
default=100,
help="Maximum number of rounds for the agent to run",
)
parser.add_argument(
"--browserbase",
action="store_true",
help="Whether to use BrowserBase for browser management",
)
parser.add_argument(
"--endpoint_config",
type=Path,
default=None,
help="Path to the endpoint configuration JSON file. By default, tries local vllm on 5000 port",
)
args = parser.parse_args()
if args.browserbase:
assert os.environ.get("BROWSERBASE_API_KEY"), (
"BROWSERBASE_API_KEY environment variable must be set to use browserbase"
)
assert os.environ.get("BROWSERBASE_PROJECT_ID"), (
"BROWSERBASE_API_KEY and BROWSERBASE_PROJECT_ID environment variables must be set to use browserbase"
)
endpoint_config = DEFAULT_ENDPOINT_CONFIG
if args.endpoint_config:
with open(args.endpoint_config, "r") as f:
endpoint_config = json.load(f)
asyncio.run(
run_fara_agent(
initial_task=args.task,
endpoint_config=endpoint_config,
start_page=args.start_page,
headless=not args.headful,
downloads_folder=args.downloads_folder,
save_screenshots=args.save_screenshots,
max_rounds=args.max_rounds,
use_browser_base=args.browserbase,
)
)
if __name__ == "__main__":
main()
|