mirror of https://github.com/microsoft/autogen.git
273 lines
9.7 KiB
Python
273 lines
9.7 KiB
Python
import asyncio
|
|
import logging
|
|
import json
|
|
import os
|
|
import re
|
|
import nltk
|
|
|
|
from typing import Any, Dict, List, Tuple, Union
|
|
|
|
from autogen_core import AgentId, AgentProxy, TopicId
|
|
from autogen_core.application import SingleThreadedAgentRuntime
|
|
from autogen_core.application.logging import EVENT_LOGGER_NAME
|
|
from autogen_core import DefaultSubscription, DefaultTopicId
|
|
from autogen_core.components.code_executor import LocalCommandLineCodeExecutor
|
|
from autogen_core.components.models import (
|
|
ChatCompletionClient,
|
|
UserMessage,
|
|
SystemMessage,
|
|
LLMMessage,
|
|
)
|
|
from autogen_magentic_one.markdown_browser import MarkdownConverter, UnsupportedFormatException
|
|
from autogen_magentic_one.agents.coder import Coder, Executor
|
|
from autogen_magentic_one.agents.orchestrator import RoundRobinOrchestrator, LedgerOrchestrator
|
|
from autogen_magentic_one.messages import BroadcastMessage, OrchestrationEvent, RequestReplyMessage, ResetMessage, DeactivateMessage
|
|
from autogen_magentic_one.agents.multimodal_web_surfer import MultimodalWebSurfer
|
|
from autogen_magentic_one.agents.file_surfer import FileSurfer
|
|
from autogen_magentic_one.utils import LogHandler, message_content_to_str, create_completion_client_from_env
|
|
|
|
|
|
import evaluation_harness
|
|
from evaluation_harness.env_config import (
|
|
ACCOUNTS,
|
|
GITLAB,
|
|
MAP,
|
|
REDDIT,
|
|
SHOPPING,
|
|
SHOPPING_ADMIN,
|
|
WIKIPEDIA,
|
|
HOMEPAGE,
|
|
SITE_URLS,
|
|
LOGIN_PROMPTS,
|
|
SITE_DESCRIPTIONS,
|
|
url_to_sitename,
|
|
)
|
|
|
|
REPLACEMENTS = {
|
|
"__REDDIT__": REDDIT,
|
|
"__SHOPPING__": SHOPPING,
|
|
"__SHOPPING_ADMIN__": SHOPPING_ADMIN,
|
|
"__GITLAB__": GITLAB,
|
|
"__WIKIPEDIA__": WIKIPEDIA,
|
|
"__MAP__": MAP,
|
|
"__HOMEPAGE__": HOMEPAGE,
|
|
}
|
|
|
|
nltk.download("punkt")
|
|
|
|
|
|
async def response_preparer(task: str, source: str, client: ChatCompletionClient, transcript: List[LLMMessage]) -> str:
|
|
messages: List[LLMMessage] = [
|
|
UserMessage(
|
|
content=f"Earlier you were asked the following:\n\n{task}\n\nYour team then worked diligently to address that request. Here is a transcript of that conversation:",
|
|
source=source,
|
|
)
|
|
]
|
|
|
|
# copy them to this context
|
|
for message in transcript:
|
|
messages.append(
|
|
UserMessage(
|
|
content = message_content_to_str(message.content),
|
|
# TODO fix this -> remove type ignore
|
|
source=message.source, # type: ignore
|
|
)
|
|
)
|
|
|
|
# ask for the final answer
|
|
messages.append(
|
|
UserMessage(
|
|
content= f"""
|
|
Read the above conversation and output a FINAL ANSWER to the original request. The original request is repeated here for convenience:
|
|
|
|
{task}
|
|
|
|
To output the final answer, use the following template: FINAL ANSWER: [YOUR FINAL ANSWER]
|
|
Your FINAL ANSWER should be as few words as possible.
|
|
If the original request was not a question, or you did not find a definitive answer, simply summarize the final state of the page or task as your FINAL ANSWER.""",
|
|
source=source,
|
|
)
|
|
)
|
|
|
|
response = await client.create(messages)
|
|
assert isinstance(response.content, str)
|
|
return response.content
|
|
|
|
|
|
async def main() -> None:
|
|
# Expand the prompt and the full task
|
|
task_prompt = ""
|
|
TASK = None
|
|
with open("task_prompt.json.txt", "rt") as fh:
|
|
task_prompt = fh.read()
|
|
with open("task_prompt.json", "wt") as fh:
|
|
for k in REPLACEMENTS:
|
|
task_prompt = task_prompt.replace(k, REPLACEMENTS[k])
|
|
fh.write(task_prompt)
|
|
TASK = json.loads(task_prompt)
|
|
if TASK["start_url"] == REDDIT:
|
|
TASK["start_url"] = TASK["start_url"] + "/forums/all"
|
|
|
|
full_task = ""
|
|
with open("full_task.json.txt", "rt") as fh:
|
|
full_task = fh.read()
|
|
with open("full_task.json", "wt") as fh:
|
|
for k in REPLACEMENTS:
|
|
full_task = full_task.replace(k, REPLACEMENTS[k])
|
|
fh.write(full_task)
|
|
|
|
# Create the runtime.
|
|
runtime = SingleThreadedAgentRuntime()
|
|
|
|
# Create the AzureOpenAI client, with AAD auth
|
|
client = create_completion_client_from_env()
|
|
# Login assistant
|
|
await runtime.register(
|
|
"LoginAssistant",
|
|
lambda: Coder(
|
|
model_client=client,
|
|
system_messages=[
|
|
SystemMessage("""You are a general-purpose AI assistant and can handle many questions -- but you don't have access to a web browser. However, the user you are talking to does have a browser, and you can see the screen. Provide short direct instructions to them to take you where you need to go to answer the initial question posed to you.
|
|
|
|
Once the user has taken the final necessary action to complete the task, and you have fully addressed the initial request, reply with the word TERMINATE.""",
|
|
)
|
|
],
|
|
),
|
|
subscriptions=lambda: [DefaultSubscription()],
|
|
)
|
|
login_assistant = AgentProxy(AgentId("LoginAssistant", "default"), runtime)
|
|
|
|
# Web surfer
|
|
await runtime.register(
|
|
"WebSurfer",
|
|
lambda: MultimodalWebSurfer(), # Configuration is set later by init()
|
|
subscriptions=lambda: [DefaultSubscription()],
|
|
)
|
|
web_surfer = AgentProxy(AgentId("WebSurfer", "default"), runtime)
|
|
|
|
actual_surfer = await runtime.try_get_underlying_agent_instance(web_surfer.id, type=MultimodalWebSurfer)
|
|
await actual_surfer.init(model_client=client, downloads_folder=os.getcwd(), browser_channel="chromium")
|
|
|
|
# Round-robin orchestrator
|
|
await runtime.register(
|
|
"round_robin_orc",
|
|
lambda: RoundRobinOrchestrator(agents=[web_surfer, login_assistant],),
|
|
subscriptions=lambda: [DefaultSubscription()],
|
|
)
|
|
round_robin_orc = AgentProxy(AgentId("round_robin_orc", "default"), runtime)
|
|
|
|
# Login to the necessary websites
|
|
for site in TASK["sites"]:
|
|
if site in ["reddit", "gitlab", "shopping", "shopping_admin"]:
|
|
actual_surfer.start_page = SITE_URLS[site]
|
|
|
|
runtime.start()
|
|
await runtime.publish_message(
|
|
ResetMessage(),
|
|
topic_id=DefaultTopicId(),
|
|
)
|
|
await runtime.publish_message(
|
|
BroadcastMessage(content=UserMessage(content=LOGIN_PROMPTS[site], source="human")),
|
|
topic_id=DefaultTopicId(),
|
|
)
|
|
await runtime.stop_when_idle()
|
|
|
|
# Deactivate the login-related agents
|
|
runtime.start()
|
|
await runtime.send_message(DeactivateMessage(), login_assistant.id)
|
|
await runtime.send_message(DeactivateMessage(), round_robin_orc.id)
|
|
await runtime.stop_when_idle()
|
|
|
|
# By this point, we should be logged in. Prepare for the main event
|
|
await runtime.register(
|
|
"Assistant",
|
|
lambda: Coder(model_client=client),
|
|
subscriptions=lambda: [DefaultSubscription()],
|
|
)
|
|
coder = AgentProxy(AgentId("Assistant", "default"), runtime)
|
|
|
|
await runtime.register(
|
|
"ComputerTerminal",
|
|
lambda: Executor(executor=LocalCommandLineCodeExecutor(), confirm_execution="ACCEPT_ALL"),
|
|
subscriptions=lambda: [DefaultSubscription()],
|
|
)
|
|
executor = AgentProxy(AgentId("ComputerTerminal", "default"), runtime)
|
|
|
|
await runtime.register(
|
|
"FileSurfer",
|
|
lambda: FileSurfer(model_client=client),
|
|
subscriptions=lambda: [DefaultSubscription()],
|
|
)
|
|
file_surfer = AgentProxy(AgentId("FileSurfer", "default"), runtime)
|
|
|
|
await runtime.register(
|
|
"orchestrator",
|
|
lambda: LedgerOrchestrator(
|
|
agents=[coder, executor, file_surfer, web_surfer],
|
|
model_client=client,
|
|
max_rounds=30,
|
|
max_time=25*60,
|
|
),
|
|
subscriptions=lambda: [DefaultSubscription()],
|
|
)
|
|
orchestrator = AgentProxy(AgentId("orchestrator", "default"), runtime)
|
|
|
|
# The main event
|
|
actual_surfer.start_page = TASK["start_url"]
|
|
runtime.start()
|
|
await runtime.send_message(ResetMessage(), web_surfer.id)
|
|
|
|
# Provide some background about the pages
|
|
site_description_prompt = ""
|
|
sitename = url_to_sitename(TASK["start_url"])
|
|
if sitename:
|
|
site_description_prompt = ", " + SITE_DESCRIPTIONS[sitename]
|
|
task = f"Your web browser is currently open to the website {TASK['start_url']}{site_description_prompt}. On this website, please complete the following task:\n\n{TASK['intent']}"
|
|
|
|
await runtime.publish_message(
|
|
BroadcastMessage(content=UserMessage(content=task.strip(), source="human")),
|
|
topic_id=DefaultTopicId(),
|
|
)
|
|
|
|
await runtime.stop_when_idle()
|
|
|
|
# Output the final answer
|
|
actual_orchestrator = await runtime.try_get_underlying_agent_instance(orchestrator.id, type=LedgerOrchestrator)
|
|
transcript: List[LLMMessage] = actual_orchestrator._chat_history # type: ignore
|
|
|
|
orc_metadata = await orchestrator.metadata
|
|
source = orc_metadata["type"]
|
|
final_answer = await response_preparer(task=TASK["intent"], source=source, client=client, transcript=transcript)
|
|
|
|
m = re.search("FINAL ANSWER:(.*)$", final_answer, re.DOTALL)
|
|
if m:
|
|
final_answer = m.group(1).strip()
|
|
|
|
print('page.stop("' + final_answer + '")')
|
|
print("MAIN TASK COMPLETE !#!#")
|
|
|
|
########## EVALUATION ##########
|
|
context = actual_surfer._context
|
|
page = actual_surfer._page
|
|
cdp_session = await context.new_cdp_session(page)
|
|
config_file = "full_task.json"
|
|
|
|
evaluator = evaluation_harness.evaluator_router(config_file)
|
|
score = await evaluator(
|
|
trajectory=evaluation_harness.make_answer_trajecotry(final_answer),
|
|
config_file=config_file,
|
|
page=page,
|
|
client=cdp_session,
|
|
# azure_config=llm_config,
|
|
)
|
|
|
|
print("FINAL SCORE: " + str(score))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
logger = logging.getLogger(EVENT_LOGGER_NAME)
|
|
logger.setLevel(logging.INFO)
|
|
log_handler = LogHandler()
|
|
logger.handlers = [log_handler]
|
|
asyncio.run(main())
|