autogen/python/packages/agbench/benchmarks/WebArena/Templates/MagenticOne/scenario.py

273 lines
9.7 KiB
Python

import asyncio
import logging
import json
import os
import re
import nltk
from typing import Any, Dict, List, Tuple, Union
from autogen_core import AgentId, AgentProxy, TopicId
from autogen_core.application import SingleThreadedAgentRuntime
from autogen_core.application.logging import EVENT_LOGGER_NAME
from autogen_core import DefaultSubscription, DefaultTopicId
from autogen_core.components.code_executor import LocalCommandLineCodeExecutor
from autogen_core.components.models import (
ChatCompletionClient,
UserMessage,
SystemMessage,
LLMMessage,
)
from autogen_magentic_one.markdown_browser import MarkdownConverter, UnsupportedFormatException
from autogen_magentic_one.agents.coder import Coder, Executor
from autogen_magentic_one.agents.orchestrator import RoundRobinOrchestrator, LedgerOrchestrator
from autogen_magentic_one.messages import BroadcastMessage, OrchestrationEvent, RequestReplyMessage, ResetMessage, DeactivateMessage
from autogen_magentic_one.agents.multimodal_web_surfer import MultimodalWebSurfer
from autogen_magentic_one.agents.file_surfer import FileSurfer
from autogen_magentic_one.utils import LogHandler, message_content_to_str, create_completion_client_from_env
import evaluation_harness
from evaluation_harness.env_config import (
ACCOUNTS,
GITLAB,
MAP,
REDDIT,
SHOPPING,
SHOPPING_ADMIN,
WIKIPEDIA,
HOMEPAGE,
SITE_URLS,
LOGIN_PROMPTS,
SITE_DESCRIPTIONS,
url_to_sitename,
)
REPLACEMENTS = {
"__REDDIT__": REDDIT,
"__SHOPPING__": SHOPPING,
"__SHOPPING_ADMIN__": SHOPPING_ADMIN,
"__GITLAB__": GITLAB,
"__WIKIPEDIA__": WIKIPEDIA,
"__MAP__": MAP,
"__HOMEPAGE__": HOMEPAGE,
}
nltk.download("punkt")
async def response_preparer(task: str, source: str, client: ChatCompletionClient, transcript: List[LLMMessage]) -> str:
messages: List[LLMMessage] = [
UserMessage(
content=f"Earlier you were asked the following:\n\n{task}\n\nYour team then worked diligently to address that request. Here is a transcript of that conversation:",
source=source,
)
]
# copy them to this context
for message in transcript:
messages.append(
UserMessage(
content = message_content_to_str(message.content),
# TODO fix this -> remove type ignore
source=message.source, # type: ignore
)
)
# ask for the final answer
messages.append(
UserMessage(
content= f"""
Read the above conversation and output a FINAL ANSWER to the original request. The original request is repeated here for convenience:
{task}
To output the final answer, use the following template: FINAL ANSWER: [YOUR FINAL ANSWER]
Your FINAL ANSWER should be as few words as possible.
If the original request was not a question, or you did not find a definitive answer, simply summarize the final state of the page or task as your FINAL ANSWER.""",
source=source,
)
)
response = await client.create(messages)
assert isinstance(response.content, str)
return response.content
async def main() -> None:
# Expand the prompt and the full task
task_prompt = ""
TASK = None
with open("task_prompt.json.txt", "rt") as fh:
task_prompt = fh.read()
with open("task_prompt.json", "wt") as fh:
for k in REPLACEMENTS:
task_prompt = task_prompt.replace(k, REPLACEMENTS[k])
fh.write(task_prompt)
TASK = json.loads(task_prompt)
if TASK["start_url"] == REDDIT:
TASK["start_url"] = TASK["start_url"] + "/forums/all"
full_task = ""
with open("full_task.json.txt", "rt") as fh:
full_task = fh.read()
with open("full_task.json", "wt") as fh:
for k in REPLACEMENTS:
full_task = full_task.replace(k, REPLACEMENTS[k])
fh.write(full_task)
# Create the runtime.
runtime = SingleThreadedAgentRuntime()
# Create the AzureOpenAI client, with AAD auth
client = create_completion_client_from_env()
# Login assistant
await runtime.register(
"LoginAssistant",
lambda: Coder(
model_client=client,
system_messages=[
SystemMessage("""You are a general-purpose AI assistant and can handle many questions -- but you don't have access to a web browser. However, the user you are talking to does have a browser, and you can see the screen. Provide short direct instructions to them to take you where you need to go to answer the initial question posed to you.
Once the user has taken the final necessary action to complete the task, and you have fully addressed the initial request, reply with the word TERMINATE.""",
)
],
),
subscriptions=lambda: [DefaultSubscription()],
)
login_assistant = AgentProxy(AgentId("LoginAssistant", "default"), runtime)
# Web surfer
await runtime.register(
"WebSurfer",
lambda: MultimodalWebSurfer(), # Configuration is set later by init()
subscriptions=lambda: [DefaultSubscription()],
)
web_surfer = AgentProxy(AgentId("WebSurfer", "default"), runtime)
actual_surfer = await runtime.try_get_underlying_agent_instance(web_surfer.id, type=MultimodalWebSurfer)
await actual_surfer.init(model_client=client, downloads_folder=os.getcwd(), browser_channel="chromium")
# Round-robin orchestrator
await runtime.register(
"round_robin_orc",
lambda: RoundRobinOrchestrator(agents=[web_surfer, login_assistant],),
subscriptions=lambda: [DefaultSubscription()],
)
round_robin_orc = AgentProxy(AgentId("round_robin_orc", "default"), runtime)
# Login to the necessary websites
for site in TASK["sites"]:
if site in ["reddit", "gitlab", "shopping", "shopping_admin"]:
actual_surfer.start_page = SITE_URLS[site]
runtime.start()
await runtime.publish_message(
ResetMessage(),
topic_id=DefaultTopicId(),
)
await runtime.publish_message(
BroadcastMessage(content=UserMessage(content=LOGIN_PROMPTS[site], source="human")),
topic_id=DefaultTopicId(),
)
await runtime.stop_when_idle()
# Deactivate the login-related agents
runtime.start()
await runtime.send_message(DeactivateMessage(), login_assistant.id)
await runtime.send_message(DeactivateMessage(), round_robin_orc.id)
await runtime.stop_when_idle()
# By this point, we should be logged in. Prepare for the main event
await runtime.register(
"Assistant",
lambda: Coder(model_client=client),
subscriptions=lambda: [DefaultSubscription()],
)
coder = AgentProxy(AgentId("Assistant", "default"), runtime)
await runtime.register(
"ComputerTerminal",
lambda: Executor(executor=LocalCommandLineCodeExecutor(), confirm_execution="ACCEPT_ALL"),
subscriptions=lambda: [DefaultSubscription()],
)
executor = AgentProxy(AgentId("ComputerTerminal", "default"), runtime)
await runtime.register(
"FileSurfer",
lambda: FileSurfer(model_client=client),
subscriptions=lambda: [DefaultSubscription()],
)
file_surfer = AgentProxy(AgentId("FileSurfer", "default"), runtime)
await runtime.register(
"orchestrator",
lambda: LedgerOrchestrator(
agents=[coder, executor, file_surfer, web_surfer],
model_client=client,
max_rounds=30,
max_time=25*60,
),
subscriptions=lambda: [DefaultSubscription()],
)
orchestrator = AgentProxy(AgentId("orchestrator", "default"), runtime)
# The main event
actual_surfer.start_page = TASK["start_url"]
runtime.start()
await runtime.send_message(ResetMessage(), web_surfer.id)
# Provide some background about the pages
site_description_prompt = ""
sitename = url_to_sitename(TASK["start_url"])
if sitename:
site_description_prompt = ", " + SITE_DESCRIPTIONS[sitename]
task = f"Your web browser is currently open to the website {TASK['start_url']}{site_description_prompt}. On this website, please complete the following task:\n\n{TASK['intent']}"
await runtime.publish_message(
BroadcastMessage(content=UserMessage(content=task.strip(), source="human")),
topic_id=DefaultTopicId(),
)
await runtime.stop_when_idle()
# Output the final answer
actual_orchestrator = await runtime.try_get_underlying_agent_instance(orchestrator.id, type=LedgerOrchestrator)
transcript: List[LLMMessage] = actual_orchestrator._chat_history # type: ignore
orc_metadata = await orchestrator.metadata
source = orc_metadata["type"]
final_answer = await response_preparer(task=TASK["intent"], source=source, client=client, transcript=transcript)
m = re.search("FINAL ANSWER:(.*)$", final_answer, re.DOTALL)
if m:
final_answer = m.group(1).strip()
print('page.stop("' + final_answer + '")')
print("MAIN TASK COMPLETE !#!#")
########## EVALUATION ##########
context = actual_surfer._context
page = actual_surfer._page
cdp_session = await context.new_cdp_session(page)
config_file = "full_task.json"
evaluator = evaluation_harness.evaluator_router(config_file)
score = await evaluator(
trajectory=evaluation_harness.make_answer_trajecotry(final_answer),
config_file=config_file,
page=page,
client=cdp_session,
# azure_config=llm_config,
)
print("FINAL SCORE: " + str(score))
if __name__ == "__main__":
logger = logging.getLogger(EVENT_LOGGER_NAME)
logger.setLevel(logging.INFO)
log_handler = LogHandler()
logger.handlers = [log_handler]
asyncio.run(main())