settings -> configs, and other fixes

2025-02-07 10:06:16 -08:00 · 2025-02-07 10:06:16 -08:00 · bcc148a796
parent 69fd121139
commit bcc148a796
9 changed files with 70 additions and 66 deletions
--- a/python/packages/ame/src/ame/clients/_client_creator.py
+++ b/python/packages/ame/src/ame/clients/_client_creator.py
@ -5,8 +5,8 @@ from ._client_wrapper import ClientWrapper


 class ClientCreator:
-    def __init__(self, settings, logger):
-        self.settings = settings
+    def __init__(self, config, logger):
+        self.config = config
        self.logger = logger

    def create_client(self):
@ -14,19 +14,19 @@ class ClientCreator:

        # A few args are shared by all clients.
        args = {}
-        args["model"] = self.settings["model"]
-        args["max_completion_tokens"] = self.settings["max_completion_tokens"]
-        args["max_retries"] = self.settings["max_retries"]
+        args["model"] = self.config["model"]
+        args["max_completion_tokens"] = self.config["max_completion_tokens"]
+        args["max_retries"] = self.config["max_retries"]

        # The following args don't apply to the 'o1' family of models.
        if not args["model"].startswith("o1"):
-            args["temperature"] = self.settings["temperature"]
-            args["presence_penalty"] = self.settings["presence_penalty"]
-            args["frequency_penalty"] = self.settings["frequency_penalty"]
-            args["top_p"] = self.settings["top_p"]
+            args["temperature"] = self.config["temperature"]
+            args["presence_penalty"] = self.config["presence_penalty"]
+            args["frequency_penalty"] = self.config["frequency_penalty"]
+            args["top_p"] = self.config["top_p"]

        client = None
-        provider = self.settings["provider"]
+        provider = self.config["provider"]
        if provider == "openai":
            client, source = self.create_oai_client(args)
        elif provider == "azure_openai":
@ -41,18 +41,19 @@ class ClientCreator:
        self.logger.info(source)

        # Check if the client should be wrapped.
-        if "ClientWrapper" in self.settings:
-            wrapper_settings = self.settings["ClientWrapper"]
-            if wrapper_settings["enabled"]:
+        if "ClientWrapper" in self.config:
+            wrapper_config = self.config["ClientWrapper"]
+            if wrapper_config["enabled"]:
                # Wrap the client.
-                client = ClientWrapper(client, wrapper_settings["mode"], wrapper_settings["session_name"], self.logger)
+                client = ClientWrapper(client, wrapper_config["mode"], wrapper_config["session_name"], self.logger)

        self.logger.leave_function()
        return client

    def create_oai_client(self, args):
        # Create an OpenAI client
-        args["api_key"] = self.settings["api_key"]
+        if "api_key" in self.config:
+            args["api_key"] = self.config["api_key"]
        client = OpenAIChatCompletionClient(**args)
        return client, "  created through OpenAI"

@ -61,18 +62,13 @@ class ClientCreator:
        token_provider = get_bearer_token_provider(
            DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
        )
-        model = self.settings["model"]
-        if model == "gpt-4o-2024-08-06":
-            azure_deployment = (
-                "gpt-4o-2024-08-06-eval"  # This is DeploymentName in the table at https://aka.ms/trapi/models
-            )
-            azure_endpoint = "https://agentic2.openai.azure.com/"
-        elif model == "gpt-4o-2024-05-13":
-            azure_deployment = "gpt-4o-2024-05-13-eval"
-            azure_endpoint = "https://agentic1.openai.azure.com/"
-        elif model == "o1-preview":
-            azure_deployment = "o1-preview-2024-09-12-eval"
-            azure_endpoint = "https://agentic1.openai.azure.com/"
+        model = self.config["model"]
+        if model == "gpt-4o-2024-11-20":
+            azure_deployment = "gpt-4o"
+            azure_endpoint = "https://agentic1.openai.azure.com/"  # Also on agentic2
+        # elif model == "o1-preview":
+        #     azure_deployment = "o1-preview-2024-09-12-eval"
+        #     azure_endpoint = "https://agentic2.openai.azure.com/"
        else:
            assert False, "Unsupported model"
        api_version = "2024-12-01-preview"  # From https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release
@ -102,24 +98,31 @@ class ClientCreator:
            ),
            "api://trapi/.default",
        )
-        model = self.settings["model"]
+        model = self.config["model"]
        if model == "gpt-4o-2024-08-06":
            azure_deployment = "gpt-4o_2024-08-06"  # This is DeploymentName in the table at https://aka.ms/trapi/models
        elif model == "gpt-4o-2024-05-13":
            azure_deployment = "gpt-4o_2024-05-13"
+        elif model == "gpt-4o-2024-11-20":
+            azure_deployment = "gpt-4o_2024-11-20"
        elif model == "o1-preview":
            azure_deployment = "o1-preview_2024-09-12"
        elif model == "o1":
            azure_deployment = "o1_2024-12-17"
+        elif model == "o3-mini":
+            azure_deployment = "o3-mini_2025-01-31"
+            model_version = "2025-01-31"
        else:
            assert False, "Unsupported model"
        trapi_suffix = (
            "msraif/shared"  # This is TRAPISuffix (without /openai) in the table at https://aka.ms/trapi/models
        )
        endpoint = f"https://trapi.research.microsoft.com/{trapi_suffix}"
-        api_version = "2024-12-01-preview"  # From https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release
+        api_version = "2025-01-01-preview"  # From https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release
        args["azure_ad_token_provider"] = token_provider
        args["azure_deployment"] = azure_deployment
+        if model == "o3-mini":
+            args["model_version"] = model_version
        args["azure_endpoint"] = endpoint
        args["api_version"] = api_version
        client = AzureOpenAIChatCompletionClient(**args)
--- a/python/packages/ame/src/ame/clients/_client_wrapper.py
+++ b/python/packages/ame/src/ame/clients/_client_wrapper.py
@ -47,7 +47,10 @@ class ClientWrapper:
        self.base_client = base_client
        self.mode = mode
        self.next_item_index = 0
-        self.model_info = {"family": self.base_client.model_info["family"]}
+        self.model_info = {
+            "family": self.base_client.model_info["family"],
+            "vision": self.base_client.model_info.get("vision", False),
+        }
        self.path_to_output_file = os.path.join(os.path.expanduser("~/sessions/"), session_name + ".yaml")
        self.logger.info("Wrapping the base client in a ClientWrapper.")
        if self.mode == "record":
--- a/python/packages/ame/src/ame/settings/baseline.yaml
+++ b/python/packages/ame/src/ame/settings/baseline.yaml
@ -1,12 +1,12 @@

 PageLogger:
  level: DEBUG  # DEBUG, INFO, WARNING, ERROR, CRITICAL, or NONE.
-  path: ~/pagelogs/base2
+  path: ~/pagelogs/base3

 client:
-  model: gpt-4o-2024-08-06  # gpt-4o-2024-05-13, gpt-4o-2024-08-06, o1-preview, o1, etc.
+  model: gpt-4o-2024-08-06  # gpt-4o-2024-05-13, gpt-4o-2024-08-06, gpt-4o-2024-11-20, o1-preview, o1, etc.
  provider: trapi  # openai, azure_openai, or trapi
-  api_key: sk-  # only for openai
+  # api_key: sk-  # only for openai
  temperature: 0.8
  max_completion_tokens: 4096
  presence_penalty: 0.0
--- a/python/packages/ame/src/ame/settings/check.yaml
+++ b/python/packages/ame/src/ame/settings/check.yaml
@ -1,12 +1,12 @@

 PageLogger:
  level: DEBUG  # DEBUG, INFO, WARNING, ERROR, CRITICAL, or NONE.
-  path: ~/pagelogs/temp20
+  path: ~/pagelogs/temp22

 client:
  model: gpt-4o-2024-08-06  # gpt-4o-2024-05-13, gpt-4o-2024-08-06, o1-preview, o1, etc.
  provider: trapi  # openai, azure_openai, or trapi
-  api_key: sk-  # only for openai
+  # api_key: sk-  # only for openai
  temperature: 0.8
  max_completion_tokens: 4096
  presence_penalty: 0.0
--- a/python/packages/ame/src/ame/eval.py
+++ b/python/packages/ame/src/ame/eval.py
@ -7,31 +7,34 @@ from autogen_ext.agentic_memory import PageLogger, Apprentice
 from ame.clients._client_creator import ClientCreator


-async def perform_evaluations(settings, logger) -> None:
+async def perform_evaluations(config, logger) -> None:
    """
-    Perform the evaluations as specified in the settings file.
+    Perform the evaluations as specified in the config file.
    """
    logger.enter_function()

    # Create the client.
-    client_creator = ClientCreator(settings=settings["client"], logger=logger)
+    client_creator = ClientCreator(config=config["client"], logger=logger)
    client = client_creator.create_client()

    # Create the apprentice.
-    apprentice_settings = settings["Apprentice"]
-    apprentice = Apprentice(apprentice_settings, client, logger)
+    apprentice_config = config["Apprentice"]
+    apprentice = Apprentice(
+        client=client,
+        config=apprentice_config,
+        logger=logger)

    # Execute each evaluation.
-    for evaluation_settings in settings["evaluations"]:
+    for evaluation_config in config["evaluations"]:
        # Import the function.
-        function_settings = evaluation_settings["eval_function"]
-        module_path = function_settings["module_path"]
+        function_config = evaluation_config["eval_function"]
+        module_path = function_config["module_path"]
        try:
            module = importlib.import_module(module_path)
        except ModuleNotFoundError:
            print("Failed to import {}".format(module_path))
            raise
-        function_name = function_settings["function_name"]
+        function_name = function_config["function_name"]
        try:
            eval_function = getattr(module, function_name)
        except AttributeError:
@ -39,8 +42,8 @@ async def perform_evaluations(settings, logger) -> None:
            raise

        # Call the eval function for each listed run.
-        for run_dict in evaluation_settings["runs"]:
-            results = await eval_function(apprentice, client, logger, function_settings, run_dict)
+        for run_dict in evaluation_config["runs"]:
+            results = await eval_function(apprentice, client, logger, function_config, run_dict)
            print(results)

    if hasattr(client, "finalize"):
@ -51,14 +54,14 @@ async def perform_evaluations(settings, logger) -> None:
    logger.leave_function()


-async def run(settings_filepath):
-    # Load the settings from yaml.
-    with open(settings_filepath, "r") as file:
-        settings = yaml.load(file, Loader=yaml.FullLoader)
-        logger = PageLogger(settings["PageLogger"])
+async def run(config_filepath):
+    # Load the config from yaml.
+    with open(config_filepath, "r") as file:
+        config = yaml.load(file, Loader=yaml.FullLoader)
+        logger = PageLogger(config["PageLogger"])

        # Perform the evaluations.
-        await perform_evaluations(settings, logger)
+        await perform_evaluations(config, logger)


 if __name__ == "__main__":
@ -66,4 +69,4 @@ if __name__ == "__main__":
    if len(args) != 1:
        print("Usage:  amt.py <path to *.yaml file>")
    else:
-        asyncio.run(run(settings_filepath=args[0]))
+        asyncio.run(run(config_filepath=args[0]))
--- a/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py
+++ b/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py
@ -8,13 +8,13 @@ from autogen_ext.agentic_memory import Apprentice, Grader, PageLogger


 async def eval_learning_from_demonstration(apprentice: Apprentice, client: ChatCompletionClient,
-                                           logger: PageLogger, settings: Dict, run_dict: Dict) -> str:
+                                           logger: PageLogger, config: Dict, run_dict: Dict) -> str:
    """
    Evaluates the ability to learn quickly from demonstrations.
    """
    logger.enter_function()

-    num_trials = settings["num_trials"]
+    num_trials = config["num_trials"]
    grader = Grader(client, logger)

    # Load the specified data.
@ -40,7 +40,6 @@ async def eval_learning_from_demonstration(apprentice: Apprentice, client: ChatC
        num_trials=num_trials,
        use_memory=True,
        client=client,
-        logger=logger,
    )
    success_rate = round((num_successes / num_trials) * 100)
    results_str_1 = "Success rate before demonstration:  {}%".format(success_rate)
@ -59,7 +58,6 @@ async def eval_learning_from_demonstration(apprentice: Apprentice, client: ChatC
        num_trials=num_trials,
        use_memory=True,
        client=client,
-        logger=logger,
    )
    success_rate = round((num_successes / num_trials) * 100)
    results_str_2 = "Success rate after demonstration:  {}%".format(success_rate)
--- a/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py
+++ b/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py
@ -8,14 +8,14 @@ from autogen_ext.agentic_memory import Apprentice, Grader, PageLogger


 async def eval_self_teaching(apprentice: Apprentice, client: ChatCompletionClient,
-                             logger: PageLogger, settings: Dict, run_dict: Dict) -> str:
+                             logger: PageLogger, config: Dict, run_dict: Dict) -> str:
    """
    Evaluates the ability of an agent to learn quickly from its own trial and error.
    """
    logger.enter_function()

-    num_loops = settings["num_loops"]
-    num_final_test_trials = settings["num_final_test_trials"]
+    num_loops = config["num_loops"]
+    num_final_test_trials = config["num_final_test_trials"]
    grader = Grader(client, logger)

    # Load the specified data.
@ -48,7 +48,6 @@ async def eval_self_teaching(apprentice: Apprentice, client: ChatCompletionClien
            num_trials=num_final_test_trials,
            use_memory=True,
            client=client,
-            logger=logger,
        )
        logger.info("Task 1 success rate:  {}%".format(round((num_successes / num_trials) * 100)))
        total_num_successes_1 += num_successes
@ -61,7 +60,6 @@ async def eval_self_teaching(apprentice: Apprentice, client: ChatCompletionClien
            num_trials=num_final_test_trials,
            use_memory=True,
            client=client,
-            logger=logger,
        )
        logger.info("Task 2 success rate:  {}%".format(round((num_successes / num_trials) * 100)))
        total_num_successes_2 += num_successes
--- a/python/packages/ame/src/ame/eval_functions/eval_teachability.py
+++ b/python/packages/ame/src/ame/eval_functions/eval_teachability.py
@ -8,7 +8,7 @@ from autogen_ext.agentic_memory import Apprentice, Grader, PageLogger


 async def eval_teachability(apprentice: Apprentice, client: ChatCompletionClient,
-                            logger: PageLogger, settings: Dict, run_dict: Dict) -> str:
+                            logger: PageLogger, config: Dict, run_dict: Dict) -> str:
    """
    Evalutes the ability to learn quickly from user teachings, hints, and advice.
    """
--- a/python/packages/ame/src/ame/eval_functions/eval_without_learning.py
+++ b/python/packages/ame/src/ame/eval_functions/eval_without_learning.py
@ -8,13 +8,13 @@ from autogen_ext.agentic_memory import Apprentice, Grader, PageLogger


 async def eval_without_learning(apprentice: Apprentice, client: ChatCompletionClient,
-                                logger: PageLogger, settings: Dict, run_dict: Dict) -> str:
+                                logger: PageLogger, config: Dict, run_dict: Dict) -> str:
    """
    Performs an evaluation without the benefit of memory.
    """
    logger.enter_function()

-    num_trials = settings["num_trials"]
+    num_trials = config["num_trials"]
    grader = Grader(client, logger)

    # Load the specified data.
@ -34,7 +34,6 @@ async def eval_without_learning(apprentice: Apprentice, client: ChatCompletionCl
        num_trials=num_trials,
        use_memory=True,
        client=client,
-        logger=logger,
    )
    success_rate = round((num_successes / num_trials) * 100)
    results_str = "Success rate:  {}%".format(success_rate)