autogen/python/packages/agbench/benchmarks/WebArena/Templates/MagenticOne/scenario.py

import asyncio
import logging
import json
import os
import re
import nltk

from typing import Any, Dict, List, Tuple, Union

from autogen_core import AgentId, AgentProxy, TopicId
from autogen_core.application import SingleThreadedAgentRuntime
from autogen_core.application.logging import EVENT_LOGGER_NAME
from autogen_core import DefaultSubscription, DefaultTopicId
from autogen_core.components.code_executor import LocalCommandLineCodeExecutor
from autogen_core.components.models import (
    ChatCompletionClient,
    UserMessage,
    SystemMessage,
    LLMMessage,
)
from autogen_magentic_one.markdown_browser import MarkdownConverter, UnsupportedFormatException
from autogen_magentic_one.agents.coder import Coder, Executor
from autogen_magentic_one.agents.orchestrator import RoundRobinOrchestrator, LedgerOrchestrator
from autogen_magentic_one.messages import BroadcastMessage, OrchestrationEvent, RequestReplyMessage, ResetMessage, DeactivateMessage
from autogen_magentic_one.agents.multimodal_web_surfer import MultimodalWebSurfer
from autogen_magentic_one.agents.file_surfer import FileSurfer
from autogen_magentic_one.utils import LogHandler, message_content_to_str, create_completion_client_from_env


import evaluation_harness
from evaluation_harness.env_config import (
    ACCOUNTS,
    GITLAB,
    MAP,
    REDDIT,
    SHOPPING,
    SHOPPING_ADMIN,
    WIKIPEDIA,
    HOMEPAGE,
    SITE_URLS,
    LOGIN_PROMPTS,
    SITE_DESCRIPTIONS,
    url_to_sitename,
)

REPLACEMENTS = {
    "__REDDIT__": REDDIT,
    "__SHOPPING__": SHOPPING,
    "__SHOPPING_ADMIN__": SHOPPING_ADMIN,
    "__GITLAB__": GITLAB,
    "__WIKIPEDIA__": WIKIPEDIA,
    "__MAP__": MAP,
    "__HOMEPAGE__": HOMEPAGE,
}

nltk.download("punkt")


async def response_preparer(task: str, source: str, client: ChatCompletionClient, transcript: List[LLMMessage]) -> str:
    messages: List[LLMMessage] = [
        UserMessage(
            content=f"Earlier you were asked the following:\n\n{task}\n\nYour team then worked diligently to address that request. Here is a transcript of that conversation:",
            source=source,
        )
    ]

    # copy them to this context
    for message in transcript:
        messages.append(
            UserMessage(
                content = message_content_to_str(message.content),
                # TODO fix this -> remove type ignore
                source=message.source, # type: ignore
            )
        )

    # ask for the final answer
    messages.append(
        UserMessage(
            content= f"""
Read the above conversation and output a FINAL ANSWER to the original request. The original request is repeated here for convenience:

{task}

To output the final answer, use the following template: FINAL ANSWER: [YOUR FINAL ANSWER]
Your FINAL ANSWER should be as few words as possible.
If the original request was not a question, or you did not find a definitive answer, simply summarize the final state of the page or task as your FINAL ANSWER.""",
            source=source,
        )
    )

    response = await client.create(messages)
    assert isinstance(response.content, str)
    return response.content


async def main() -> None:
    # Expand the prompt and the full task
    task_prompt = ""
    TASK = None
    with open("task_prompt.json.txt", "rt") as fh:
        task_prompt = fh.read()
    with open("task_prompt.json", "wt") as fh:
        for k in REPLACEMENTS:
            task_prompt = task_prompt.replace(k, REPLACEMENTS[k])
        fh.write(task_prompt)
        TASK = json.loads(task_prompt)
        if TASK["start_url"] == REDDIT:
            TASK["start_url"] = TASK["start_url"] + "/forums/all"

    full_task = ""
    with open("full_task.json.txt", "rt") as fh:
        full_task = fh.read()
    with open("full_task.json", "wt") as fh:
        for k in REPLACEMENTS:
            full_task = full_task.replace(k, REPLACEMENTS[k])
        fh.write(full_task)

    # Create the runtime.
    runtime = SingleThreadedAgentRuntime()

    # Create the AzureOpenAI client, with AAD auth
    client = create_completion_client_from_env()
    # Login assistant
    await runtime.register(
        "LoginAssistant",
        lambda: Coder(
            model_client=client,
            system_messages=[
                SystemMessage("""You are a general-purpose AI assistant and can handle many questions -- but you don't have access to a web browser. However, the user you are talking to does have a browser, and you can see the screen. Provide short direct instructions to them to take you where you need to go to answer the initial question posed to you.

Once the user has taken the final necessary action to complete the task, and you have fully addressed the initial request, reply with the word TERMINATE.""",
                )
            ],
        ),
        subscriptions=lambda: [DefaultSubscription()],
    )
    login_assistant = AgentProxy(AgentId("LoginAssistant", "default"), runtime)

    # Web surfer
    await runtime.register(
        "WebSurfer",
        lambda: MultimodalWebSurfer(), # Configuration is set later by init()
        subscriptions=lambda: [DefaultSubscription()],
    )
    web_surfer = AgentProxy(AgentId("WebSurfer", "default"), runtime)

    actual_surfer = await runtime.try_get_underlying_agent_instance(web_surfer.id, type=MultimodalWebSurfer)
    await actual_surfer.init(model_client=client, downloads_folder=os.getcwd(), browser_channel="chromium")

    # Round-robin orchestrator
    await runtime.register(
        "round_robin_orc",
        lambda: RoundRobinOrchestrator(agents=[web_surfer, login_assistant],),
        subscriptions=lambda: [DefaultSubscription()],
    )
    round_robin_orc = AgentProxy(AgentId("round_robin_orc", "default"), runtime)

    # Login to the necessary websites
    for site in TASK["sites"]:
        if site in ["reddit", "gitlab", "shopping", "shopping_admin"]:
            actual_surfer.start_page = SITE_URLS[site]

            runtime.start()
            await runtime.publish_message(
                ResetMessage(),
                topic_id=DefaultTopicId(),
            )
            await runtime.publish_message(
                BroadcastMessage(content=UserMessage(content=LOGIN_PROMPTS[site], source="human")),
                topic_id=DefaultTopicId(),
            )
            await runtime.stop_when_idle()

    # Deactivate the login-related agents
    runtime.start()
    await runtime.send_message(DeactivateMessage(), login_assistant.id)
    await runtime.send_message(DeactivateMessage(), round_robin_orc.id)
    await runtime.stop_when_idle()

    # By this point, we should be logged in. Prepare for the main event
    await runtime.register(
        "Assistant",
        lambda: Coder(model_client=client),
        subscriptions=lambda: [DefaultSubscription()],
    )
    coder = AgentProxy(AgentId("Assistant", "default"), runtime)

    await runtime.register(
        "ComputerTerminal",
        lambda: Executor(executor=LocalCommandLineCodeExecutor(), confirm_execution="ACCEPT_ALL"),
        subscriptions=lambda: [DefaultSubscription()],
    )
    executor = AgentProxy(AgentId("ComputerTerminal", "default"), runtime)

    await runtime.register(
        "FileSurfer",
        lambda: FileSurfer(model_client=client),
        subscriptions=lambda: [DefaultSubscription()],
    )
    file_surfer = AgentProxy(AgentId("FileSurfer", "default"), runtime)

    await runtime.register(
        "orchestrator",
        lambda: LedgerOrchestrator(
            agents=[coder, executor, file_surfer, web_surfer],
            model_client=client,
            max_rounds=30,
            max_time=25*60,
        ),
        subscriptions=lambda: [DefaultSubscription()],
    )
    orchestrator = AgentProxy(AgentId("orchestrator", "default"), runtime)

    # The main event
    actual_surfer.start_page = TASK["start_url"]
    runtime.start()
    await runtime.send_message(ResetMessage(), web_surfer.id)

    # Provide some background about the pages
    site_description_prompt = ""
    sitename = url_to_sitename(TASK["start_url"])
    if sitename:
        site_description_prompt = ", " + SITE_DESCRIPTIONS[sitename]
    task = f"Your web browser is currently open to the website {TASK['start_url']}{site_description_prompt}. On this website, please complete the following task:\n\n{TASK['intent']}"

    await runtime.publish_message(
        BroadcastMessage(content=UserMessage(content=task.strip(), source="human")),
        topic_id=DefaultTopicId(),
    )

    await runtime.stop_when_idle()

    # Output the final answer
    actual_orchestrator = await runtime.try_get_underlying_agent_instance(orchestrator.id, type=LedgerOrchestrator)
    transcript: List[LLMMessage] = actual_orchestrator._chat_history # type: ignore

    orc_metadata = await orchestrator.metadata
    source = orc_metadata["type"]
    final_answer = await response_preparer(task=TASK["intent"], source=source, client=client, transcript=transcript)

    m = re.search("FINAL ANSWER:(.*)$", final_answer, re.DOTALL)
    if m:
        final_answer = m.group(1).strip()

    print('page.stop("' + final_answer + '")')
    print("MAIN TASK COMPLETE !#!#")

    ########## EVALUATION ##########
    context = actual_surfer._context
    page = actual_surfer._page
    cdp_session = await context.new_cdp_session(page)
    config_file = "full_task.json"

    evaluator = evaluation_harness.evaluator_router(config_file)
    score = await evaluator(
        trajectory=evaluation_harness.make_answer_trajecotry(final_answer),
        config_file=config_file,
        page=page,
        client=cdp_session,
    #    azure_config=llm_config,
    )

    print("FINAL SCORE: " + str(score))


if __name__ == "__main__":
    logger = logging.getLogger(EVENT_LOGGER_NAME)
    logger.setLevel(logging.INFO)
    log_handler = LogHandler()
    logger.handlers = [log_handler]
    asyncio.run(main())