diff --git a/python/packages/autogen-core/docs/src/reference/index.md b/python/packages/autogen-core/docs/src/reference/index.md
index 869ffc234..fdaf598c0 100644
--- a/python/packages/autogen-core/docs/src/reference/index.md
+++ b/python/packages/autogen-core/docs/src/reference/index.md
@@ -51,6 +51,7 @@ python/autogen_ext.teams.magentic_one
 python/autogen_ext.models.cache
 python/autogen_ext.models.openai
 python/autogen_ext.models.replay
+python/autogen_ext.models.azure
 python/autogen_ext.models.semantic_kernel
 python/autogen_ext.tools.langchain
 python/autogen_ext.tools.graphrag
diff --git a/python/packages/autogen-core/docs/src/reference/python/autogen_ext.models.azure.rst b/python/packages/autogen-core/docs/src/reference/python/autogen_ext.models.azure.rst
new file mode 100644
index 000000000..64c16a5a5
--- /dev/null
+++ b/python/packages/autogen-core/docs/src/reference/python/autogen_ext.models.azure.rst
@@ -0,0 +1,8 @@
+autogen\_ext.models.azure
+==========================
+
+
+.. automodule:: autogen_ext.models.azure
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/python/packages/autogen-ext/pyproject.toml b/python/packages/autogen-ext/pyproject.toml
index 2ecb0afb7..40acd71af 100644
--- a/python/packages/autogen-ext/pyproject.toml
+++ b/python/packages/autogen-ext/pyproject.toml
@@ -20,7 +20,11 @@ dependencies = [
 
 [project.optional-dependencies]
 langchain = ["langchain_core~= 0.3.3"]
-azure = ["azure-core", "azure-identity"]
+azure = [
+    "azure-ai-inference>=1.0.0b7",
+    "azure-core",
+    "azure-identity",
+]
 docker = ["docker~=7.0"]
 openai = ["openai>=1.52.2", "tiktoken>=0.8.0", "aiofiles"]
 file-surfer = [
@@ -56,6 +60,7 @@ redis = [
 grpc = [
     "grpcio~=1.62.0", # TODO: update this once we have a stable version.
 ]
+
 jupyter-executor = [
     "ipykernel>=6.29.5",
     "nbclient>=0.10.2",
diff --git a/python/packages/autogen-ext/src/autogen_ext/models/azure/__init__.py b/python/packages/autogen-ext/src/autogen_ext/models/azure/__init__.py
new file mode 100644
index 000000000..2dc7b9c70
--- /dev/null
+++ b/python/packages/autogen-ext/src/autogen_ext/models/azure/__init__.py
@@ -0,0 +1,4 @@
+from ._azure_ai_client import AzureAIChatCompletionClient
+from .config import AzureAIChatCompletionClientConfig
+
+__all__ = ["AzureAIChatCompletionClient", "AzureAIChatCompletionClientConfig"]
diff --git a/python/packages/autogen-ext/src/autogen_ext/models/azure/_azure_ai_client.py b/python/packages/autogen-ext/src/autogen_ext/models/azure/_azure_ai_client.py
new file mode 100644
index 000000000..d10f678dc
--- /dev/null
+++ b/python/packages/autogen-ext/src/autogen_ext/models/azure/_azure_ai_client.py
@@ -0,0 +1,503 @@
+import asyncio
+import re
+from asyncio import Task
+from inspect import getfullargspec
+from typing import Any, Dict, List, Mapping, Optional, Sequence, cast
+
+from autogen_core import CancellationToken, FunctionCall, Image
+from autogen_core.models import (
+    AssistantMessage,
+    ChatCompletionClient,
+    CreateResult,
+    FinishReasons,
+    FunctionExecutionResultMessage,
+    LLMMessage,
+    ModelInfo,
+    RequestUsage,
+    SystemMessage,
+    UserMessage,
+)
+from autogen_core.tools import Tool, ToolSchema
+from azure.ai.inference.aio import ChatCompletionsClient
+from azure.ai.inference.models import (
+    AssistantMessage as AzureAssistantMessage,
+)
+from azure.ai.inference.models import (
+    ChatCompletions,
+    ChatCompletionsToolCall,
+    ChatCompletionsToolDefinition,
+    CompletionsFinishReason,
+    ContentItem,
+    FunctionDefinition,
+    ImageContentItem,
+    ImageDetailLevel,
+    ImageUrl,
+    StreamingChatChoiceUpdate,
+    StreamingChatCompletionsUpdate,
+    TextContentItem,
+)
+from azure.ai.inference.models import (
+    FunctionCall as AzureFunctionCall,
+)
+from azure.ai.inference.models import (
+    SystemMessage as AzureSystemMessage,
+)
+from azure.ai.inference.models import (
+    ToolMessage as AzureToolMessage,
+)
+from azure.ai.inference.models import (
+    UserMessage as AzureUserMessage,
+)
+from typing_extensions import AsyncGenerator, Union, Unpack
+
+from autogen_ext.models.azure.config import (
+    GITHUB_MODELS_ENDPOINT,
+    AzureAIChatCompletionClientConfig,
+)
+
+create_kwargs = set(getfullargspec(ChatCompletionsClient.complete).kwonlyargs)
+AzureMessage = Union[AzureSystemMessage, AzureUserMessage, AzureAssistantMessage, AzureToolMessage]
+
+
+def _is_github_model(endpoint: str) -> bool:
+    return endpoint == GITHUB_MODELS_ENDPOINT
+
+
+def convert_tools(tools: Sequence[Tool | ToolSchema]) -> List[ChatCompletionsToolDefinition]:
+    result: List[ChatCompletionsToolDefinition] = []
+    for tool in tools:
+        if isinstance(tool, Tool):
+            tool_schema = tool.schema.copy()
+        else:
+            assert isinstance(tool, dict)
+            tool_schema = tool.copy()
+
+        if "parameters" in tool_schema:
+            for value in tool_schema["parameters"]["properties"].values():
+                if "title" in value.keys():
+                    del value["title"]
+
+        function_def: Dict[str, Any] = dict(name=tool_schema["name"])
+        if "description" in tool_schema:
+            function_def["description"] = tool_schema["description"]
+        if "parameters" in tool_schema:
+            function_def["parameters"] = tool_schema["parameters"]
+
+        result.append(
+            ChatCompletionsToolDefinition(
+                function=FunctionDefinition(**function_def),
+            ),
+        )
+    return result
+
+
+def _func_call_to_azure(message: FunctionCall) -> ChatCompletionsToolCall:
+    return ChatCompletionsToolCall(
+        id=message.id,
+        function=AzureFunctionCall(arguments=message.arguments, name=message.name),
+    )
+
+
+def _system_message_to_azure(message: SystemMessage) -> AzureSystemMessage:
+    return AzureSystemMessage(content=message.content)
+
+
+def _user_message_to_azure(message: UserMessage) -> AzureUserMessage:
+    assert_valid_name(message.source)
+    if isinstance(message.content, str):
+        return AzureUserMessage(content=message.content)
+    else:
+        parts: List[ContentItem] = []
+        for part in message.content:
+            if isinstance(part, str):
+                parts.append(TextContentItem(text=part))
+            elif isinstance(part, Image):
+                # TODO: support url based images
+                # TODO: support specifying details
+                parts.append(ImageContentItem(image_url=ImageUrl(url=part.data_uri, detail=ImageDetailLevel.AUTO)))
+            else:
+                raise ValueError(f"Unknown content type: {message.content}")
+        return AzureUserMessage(content=parts)
+
+
+def _assistant_message_to_azure(message: AssistantMessage) -> AzureAssistantMessage:
+    assert_valid_name(message.source)
+    if isinstance(message.content, list):
+        return AzureAssistantMessage(
+            tool_calls=[_func_call_to_azure(x) for x in message.content],
+        )
+    else:
+        return AzureAssistantMessage(content=message.content)
+
+
+def _tool_message_to_azure(message: FunctionExecutionResultMessage) -> Sequence[AzureToolMessage]:
+    return [AzureToolMessage(content=x.content, tool_call_id=x.call_id) for x in message.content]
+
+
+def to_azure_message(message: LLMMessage) -> Sequence[AzureMessage]:
+    if isinstance(message, SystemMessage):
+        return [_system_message_to_azure(message)]
+    elif isinstance(message, UserMessage):
+        return [_user_message_to_azure(message)]
+    elif isinstance(message, AssistantMessage):
+        return [_assistant_message_to_azure(message)]
+    else:
+        return _tool_message_to_azure(message)
+
+
+def normalize_name(name: str) -> str:
+    """
+    LLMs sometimes ask functions while ignoring their own format requirements, this function should be used to replace invalid characters with "_".
+
+    Prefer _assert_valid_name for validating user configuration or input
+    """
+    return re.sub(r"[^a-zA-Z0-9_-]", "_", name)[:64]
+
+
+def assert_valid_name(name: str) -> str:
+    """
+    Ensure that configured names are valid, raises ValueError if not.
+
+    For munging LLM responses use _normalize_name to ensure LLM specified names don't break the API.
+    """
+    if not re.match(r"^[a-zA-Z0-9_-]+$", name):
+        raise ValueError(f"Invalid name: {name}. Only letters, numbers, '_' and '-' are allowed.")
+    if len(name) > 64:
+        raise ValueError(f"Invalid name: {name}. Name must be less than 64 characters.")
+    return name
+
+
+class AzureAIChatCompletionClient(ChatCompletionClient):
+    """
+    Chat completion client for models hosted on Azure AI Foundry or GitHub Models.
+    See `here <https://learn.microsoft.com/en-us/azure/ai-studio/reference/reference-model-inference-chat-completions>`_ for more info.
+
+    Args:
+        endpoint (str): The endpoint to use. **Required.**
+        credentials (union, AzureKeyCredential, AsyncTokenCredential): The credentials to use. **Required**
+        model_info (ModelInfo): The model family and capabilities of the model. **Required.**
+        model (str): The name of the model. **Required if model is hosted on GitHub Models.**
+        frequency_penalty: (optional,float)
+        presence_penalty: (optional,float)
+        temperature: (optional,float)
+        top_p: (optional,float)
+        max_tokens: (optional,int)
+        response_format: (optional,ChatCompletionsResponseFormat)
+        stop: (optional,List[str])
+        tools: (optional,List[ChatCompletionsToolDefinition])
+        tool_choice: (optional,Union[str, ChatCompletionsToolChoicePreset, ChatCompletionsNamedToolChoice]])
+        seed: (optional,int)
+        model_extras: (optional,Dict[str, Any])
+
+    To use this client, you must install the `azure-ai-inference` extension:
+
+        .. code-block:: bash
+
+            pip install "autogen-ext[azure]"
+
+    The following code snippet shows how to use the client:
+
+        .. code-block:: python
+
+            import asyncio
+            from azure.core.credentials import AzureKeyCredential
+            from autogen_ext.models.azure import AzureAIChatCompletionClient
+            from autogen_core.models import UserMessage
+
+
+            async def main():
+                client = AzureAIChatCompletionClient(
+                    endpoint="endpoint",
+                    credential=AzureKeyCredential("api_key"),
+                    model_info={
+                        "json_output": False,
+                        "function_calling": False,
+                        "vision": False,
+                        "family": "unknown",
+                    },
+                )
+
+                result = await client.create([UserMessage(content="What is the capital of France?", source="user")])
+                print(result)
+
+
+            if __name__ == "__main__":
+                asyncio.run(main())
+
+    """
+
+    def __init__(self, **kwargs: Unpack[AzureAIChatCompletionClientConfig]):
+        config = self._validate_config(kwargs)  # type: ignore
+        self._model_info = config["model_info"]  # type: ignore
+        self._client = self._create_client(config)
+        self._create_args = self._prepare_create_args(config)
+
+        self._actual_usage = RequestUsage(prompt_tokens=0, completion_tokens=0)
+        self._total_usage = RequestUsage(prompt_tokens=0, completion_tokens=0)
+
+    @staticmethod
+    def _validate_config(config: Dict[str, Any]) -> AzureAIChatCompletionClientConfig:
+        if "endpoint" not in config:
+            raise ValueError("endpoint is required for AzureAIChatCompletionClient")
+        if "credential" not in config:
+            raise ValueError("credential is required for AzureAIChatCompletionClient")
+        if "model_info" not in config:
+            raise ValueError("model_info is required for AzureAIChatCompletionClient")
+        if _is_github_model(config["endpoint"]) and "model" not in config:
+            raise ValueError("model is required for when using a Github model with AzureAIChatCompletionClient")
+        return cast(AzureAIChatCompletionClientConfig, config)
+
+    @staticmethod
+    def _create_client(config: AzureAIChatCompletionClientConfig) -> ChatCompletionsClient:
+        return ChatCompletionsClient(**config)
+
+    @staticmethod
+    def _prepare_create_args(config: Mapping[str, Any]) -> Dict[str, Any]:
+        create_args = {k: v for k, v in config.items() if k in create_kwargs}
+        return create_args
+
+    def add_usage(self, usage: RequestUsage) -> None:
+        self._total_usage = RequestUsage(
+            self._total_usage.prompt_tokens + usage.prompt_tokens,
+            self._total_usage.completion_tokens + usage.completion_tokens,
+        )
+
+    def _validate_model_info(
+        self,
+        messages: Sequence[LLMMessage],
+        tools: Sequence[Tool | ToolSchema],
+        json_output: Optional[bool],
+        create_args: Dict[str, Any],
+    ) -> None:
+        if self.model_info["vision"] is False:
+            for message in messages:
+                if isinstance(message, UserMessage):
+                    if isinstance(message.content, list) and any(isinstance(x, Image) for x in message.content):
+                        raise ValueError("Model does not support vision and image was provided")
+
+        if json_output is not None:
+            if self.model_info["json_output"] is False and json_output is True:
+                raise ValueError("Model does not support JSON output")
+
+            if json_output is True and "response_format" not in create_args:
+                create_args["response_format"] = "json_object"
+
+        if self.model_info["json_output"] is False and json_output is True:
+            raise ValueError("Model does not support JSON output")
+        if self.model_info["function_calling"] is False and len(tools) > 0:
+            raise ValueError("Model does not support function calling")
+
+    async def create(
+        self,
+        messages: Sequence[LLMMessage],
+        *,
+        tools: Sequence[Tool | ToolSchema] = [],
+        json_output: Optional[bool] = None,
+        extra_create_args: Mapping[str, Any] = {},
+        cancellation_token: Optional[CancellationToken] = None,
+    ) -> CreateResult:
+        extra_create_args_keys = set(extra_create_args.keys())
+        if not create_kwargs.issuperset(extra_create_args_keys):
+            raise ValueError(f"Extra create args are invalid: {extra_create_args_keys - create_kwargs}")
+
+        # Copy the create args and overwrite anything in extra_create_args
+        create_args = self._create_args.copy()
+        create_args.update(extra_create_args)
+
+        self._validate_model_info(messages, tools, json_output, create_args)
+
+        azure_messages_nested = [to_azure_message(msg) for msg in messages]
+        azure_messages = [item for sublist in azure_messages_nested for item in sublist]
+
+        task: Task[ChatCompletions]
+
+        if len(tools) > 0:
+            converted_tools = convert_tools(tools)
+            task = asyncio.create_task(  # type: ignore
+                self._client.complete(messages=azure_messages, tools=converted_tools, **create_args)  # type: ignore
+            )
+        else:
+            task = asyncio.create_task(  # type: ignore
+                self._client.complete(  # type: ignore
+                    messages=azure_messages,
+                    **create_args,
+                )
+            )
+
+        if cancellation_token is not None:
+            cancellation_token.link_future(task)
+
+        result: ChatCompletions = await task
+
+        usage = RequestUsage(
+            prompt_tokens=result.usage.prompt_tokens if result.usage else 0,
+            completion_tokens=result.usage.completion_tokens if result.usage else 0,
+        )
+
+        choice = result.choices[0]
+        if choice.finish_reason == CompletionsFinishReason.TOOL_CALLS:
+            assert choice.message.tool_calls is not None
+
+            content: Union[str, List[FunctionCall]] = [
+                FunctionCall(
+                    id=x.id,
+                    arguments=x.function.arguments,
+                    name=normalize_name(x.function.name),
+                )
+                for x in choice.message.tool_calls
+            ]
+            finish_reason = "function_calls"
+        else:
+            if isinstance(choice.finish_reason, CompletionsFinishReason):
+                finish_reason = choice.finish_reason.value
+            else:
+                finish_reason = choice.finish_reason  # type: ignore
+            content = choice.message.content or ""
+
+        response = CreateResult(
+            finish_reason=finish_reason,  # type: ignore
+            content=content,
+            usage=usage,
+            cached=False,
+        )
+
+        self.add_usage(usage)
+
+        return response
+
+    async def create_stream(
+        self,
+        messages: Sequence[LLMMessage],
+        *,
+        tools: Sequence[Tool | ToolSchema] = [],
+        json_output: Optional[bool] = None,
+        extra_create_args: Mapping[str, Any] = {},
+        cancellation_token: Optional[CancellationToken] = None,
+    ) -> AsyncGenerator[Union[str, CreateResult], None]:
+        extra_create_args_keys = set(extra_create_args.keys())
+        if not create_kwargs.issuperset(extra_create_args_keys):
+            raise ValueError(f"Extra create args are invalid: {extra_create_args_keys - create_kwargs}")
+
+        create_args: Dict[str, Any] = self._create_args.copy()
+        create_args.update(extra_create_args)
+
+        self._validate_model_info(messages, tools, json_output, create_args)
+
+        # azure_messages = [to_azure_message(m) for m in messages]
+        azure_messages_nested = [to_azure_message(msg) for msg in messages]
+        azure_messages = [item for sublist in azure_messages_nested for item in sublist]
+
+        if len(tools) > 0:
+            converted_tools = convert_tools(tools)
+            task = asyncio.create_task(
+                self._client.complete(messages=azure_messages, tools=converted_tools, stream=True, **create_args)
+            )
+        else:
+            task = asyncio.create_task(
+                self._client.complete(messages=azure_messages, max_tokens=20, stream=True, **create_args)
+            )
+
+        if cancellation_token is not None:
+            cancellation_token.link_future(task)
+
+        # result: ChatCompletions = await task
+        finish_reason: Optional[FinishReasons] = None
+        content_deltas: List[str] = []
+        full_tool_calls: Dict[str, FunctionCall] = {}
+        prompt_tokens = 0
+        completion_tokens = 0
+        chunk: Optional[StreamingChatCompletionsUpdate] = None
+        choice: Optional[StreamingChatChoiceUpdate] = None
+        async for chunk in await task:  # type: ignore
+            assert isinstance(chunk, StreamingChatCompletionsUpdate)
+            choice = chunk.choices[0] if len(chunk.choices) > 0 else None
+            if choice and choice.finish_reason is not None:
+                if isinstance(choice.finish_reason, CompletionsFinishReason):
+                    finish_reason = cast(FinishReasons, choice.finish_reason.value)
+                else:
+                    if choice.finish_reason in ["stop", "length", "function_calls", "content_filter", "unknown"]:
+                        finish_reason = choice.finish_reason  # type: ignore
+                    else:
+                        raise ValueError(f"Unexpected finish reason: {choice.finish_reason}")
+
+            # We first try to load the content
+            if choice and choice.delta.content is not None:
+                content_deltas.append(choice.delta.content)
+                yield choice.delta.content
+            # Otherwise, we try to load the tool calls
+            if choice and choice.delta.tool_calls is not None:
+                for tool_call_chunk in choice.delta.tool_calls:
+                    # print(tool_call_chunk)
+                    if "index" in tool_call_chunk:
+                        idx = tool_call_chunk["index"]
+                    else:
+                        idx = tool_call_chunk.id
+                    if idx not in full_tool_calls:
+                        full_tool_calls[idx] = FunctionCall(id="", arguments="", name="")
+
+                    full_tool_calls[idx].id += tool_call_chunk.id
+                    full_tool_calls[idx].name += tool_call_chunk.function.name
+                    full_tool_calls[idx].arguments += tool_call_chunk.function.arguments
+
+        if chunk and chunk.usage:
+            prompt_tokens = chunk.usage.prompt_tokens
+
+        if finish_reason is None:
+            raise ValueError("No stop reason found")
+
+        if choice and choice.finish_reason is CompletionsFinishReason.TOOL_CALLS:
+            finish_reason = "function_calls"
+
+        content: Union[str, List[FunctionCall]]
+
+        if len(content_deltas) > 1:
+            content = "".join(content_deltas)
+            if chunk and chunk.usage:
+                completion_tokens = chunk.usage.completion_tokens
+            else:
+                completion_tokens = 0
+        else:
+            content = list(full_tool_calls.values())
+
+        usage = RequestUsage(
+            completion_tokens=completion_tokens,
+            prompt_tokens=prompt_tokens,
+        )
+
+        result = CreateResult(
+            finish_reason=finish_reason,
+            content=content,
+            usage=usage,
+            cached=False,
+        )
+
+        self.add_usage(usage)
+
+        yield result
+
+    def actual_usage(self) -> RequestUsage:
+        return self._actual_usage
+
+    def total_usage(self) -> RequestUsage:
+        return self._total_usage
+
+    def count_tokens(self, messages: Sequence[LLMMessage], *, tools: Sequence[Tool | ToolSchema] = []) -> int:
+        return 0
+
+    def remaining_tokens(self, messages: Sequence[LLMMessage], *, tools: Sequence[Tool | ToolSchema] = []) -> int:
+        return 0
+
+    @property
+    def model_info(self) -> ModelInfo:
+        return self._model_info
+
+    @property
+    def capabilities(self) -> ModelInfo:
+        return self.model_info
+
+    def __del__(self) -> None:
+        # TODO: This is a hack to close the open client
+        try:
+            asyncio.get_running_loop().create_task(self._client.close())
+        except RuntimeError:
+            asyncio.run(self._client.close())
diff --git a/python/packages/autogen-ext/src/autogen_ext/models/azure/config/__init__.py b/python/packages/autogen-ext/src/autogen_ext/models/azure/config/__init__.py
new file mode 100644
index 000000000..c90afe897
--- /dev/null
+++ b/python/packages/autogen-ext/src/autogen_ext/models/azure/config/__init__.py
@@ -0,0 +1,46 @@
+from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
+
+from autogen_core.models import ModelInfo
+from azure.ai.inference.models import (
+    ChatCompletionsNamedToolChoice,
+    ChatCompletionsToolChoicePreset,
+    ChatCompletionsToolDefinition,
+)
+from azure.core.credentials import AzureKeyCredential
+from azure.core.credentials_async import AsyncTokenCredential
+
+GITHUB_MODELS_ENDPOINT = "https://models.inference.ai.azure.com"
+
+
+class JsonSchemaFormat(TypedDict, total=False):
+    """Represents the same fields as azure.ai.inference.models.JsonSchemaFormat."""
+
+    name: str
+    schema: Dict[str, Any]
+    description: Optional[str]
+    strict: Optional[bool]
+
+
+class AzureAIClientArguments(TypedDict, total=False):
+    endpoint: str
+    credential: Union[AzureKeyCredential, AsyncTokenCredential]
+    model_info: ModelInfo
+
+
+class AzureAICreateArguments(TypedDict, total=False):
+    frequency_penalty: Optional[float]
+    presence_penalty: Optional[float]
+    temperature: Optional[float]
+    top_p: Optional[float]
+    max_tokens: Optional[int]
+    response_format: Optional[Literal["text", "json_object"]]
+    stop: Optional[List[str]]
+    tools: Optional[List[ChatCompletionsToolDefinition]]
+    tool_choice: Optional[Union[str, ChatCompletionsToolChoicePreset, ChatCompletionsNamedToolChoice]]
+    seed: Optional[int]
+    model: Optional[str]
+    model_extras: Optional[Dict[str, Any]]
+
+
+class AzureAIChatCompletionClientConfig(AzureAIClientArguments, AzureAICreateArguments):
+    pass
diff --git a/python/packages/autogen-ext/tests/models/test_azure_ai_model_client.py b/python/packages/autogen-ext/tests/models/test_azure_ai_model_client.py
new file mode 100644
index 000000000..0e510f93e
--- /dev/null
+++ b/python/packages/autogen-ext/tests/models/test_azure_ai_model_client.py
@@ -0,0 +1,297 @@
+import asyncio
+import os
+from datetime import datetime
+from typing import Any, AsyncGenerator, List
+
+import pytest
+from autogen_core import CancellationToken, FunctionCall, Image
+from autogen_core.models import CreateResult, UserMessage
+from autogen_ext.models.azure import AzureAIChatCompletionClient
+from azure.ai.inference.aio import (
+    ChatCompletionsClient,
+)
+from azure.ai.inference.models import (
+    ChatChoice,
+    ChatCompletions,
+    ChatCompletionsToolCall,
+    ChatResponseMessage,
+    CompletionsFinishReason,
+    CompletionsUsage,
+    StreamingChatChoiceUpdate,
+    StreamingChatCompletionsUpdate,
+    StreamingChatResponseMessageUpdate,
+)
+from azure.ai.inference.models import (
+    FunctionCall as AzureFunctionCall,
+)
+from azure.core.credentials import AzureKeyCredential
+
+
+async def _mock_create_stream(*args: Any, **kwargs: Any) -> AsyncGenerator[StreamingChatCompletionsUpdate, None]:
+    mock_chunks_content = ["Hello", " Another Hello", " Yet Another Hello"]
+
+    mock_chunks = [
+        StreamingChatChoiceUpdate(
+            index=0,
+            finish_reason="stop",
+            delta=StreamingChatResponseMessageUpdate(role="assistant", content=chunk_content),
+        )
+        for chunk_content in mock_chunks_content
+    ]
+
+    for mock_chunk in mock_chunks:
+        await asyncio.sleep(0.1)
+        yield StreamingChatCompletionsUpdate(
+            id="id",
+            choices=[mock_chunk],
+            created=datetime.now(),
+            model="model",
+            usage=CompletionsUsage(prompt_tokens=0, completion_tokens=0, total_tokens=0),
+        )
+
+
+async def _mock_create(
+    *args: Any, **kwargs: Any
+) -> ChatCompletions | AsyncGenerator[StreamingChatCompletionsUpdate, None]:
+    stream = kwargs.get("stream", False)
+
+    if not stream:
+        await asyncio.sleep(0.1)
+        return ChatCompletions(
+            id="id",
+            created=datetime.now(),
+            model="model",
+            choices=[
+                ChatChoice(
+                    index=0, finish_reason="stop", message=ChatResponseMessage(content="Hello", role="assistant")
+                )
+            ],
+            usage=CompletionsUsage(prompt_tokens=0, completion_tokens=0, total_tokens=0),
+        )
+    else:
+        return _mock_create_stream(*args, **kwargs)
+
+
+@pytest.fixture
+def azure_client(monkeypatch: pytest.MonkeyPatch) -> AzureAIChatCompletionClient:
+    endpoint = os.getenv("AZURE_AI_INFERENCE_ENDPOINT")
+    api_key = os.getenv("AZURE_AI_INFERENCE_API_KEY")
+
+    if endpoint and api_key:
+        return AzureAIChatCompletionClient(
+            endpoint=endpoint,
+            credential=AzureKeyCredential(api_key),
+            model_info={
+                "json_output": False,
+                "function_calling": False,
+                "vision": False,
+                "family": "unknown",
+            },
+            model="model",
+        )
+
+    monkeypatch.setattr(ChatCompletionsClient, "complete", _mock_create)
+    return AzureAIChatCompletionClient(
+        endpoint="endpoint",
+        credential=AzureKeyCredential("api_key"),
+        model_info={
+            "json_output": False,
+            "function_calling": False,
+            "vision": False,
+            "family": "unknown",
+        },
+        model="model",
+    )
+
+
+@pytest.mark.asyncio
+async def test_azure_ai_chat_completion_client(azure_client: AzureAIChatCompletionClient) -> None:
+    assert azure_client
+
+
+@pytest.mark.asyncio
+async def test_azure_ai_chat_completion_client_create(azure_client: AzureAIChatCompletionClient) -> None:
+    result = await azure_client.create(messages=[UserMessage(content="Hello", source="user")])
+    assert result.content == "Hello"
+
+
+@pytest.mark.asyncio
+async def test_azure_ai_chat_completion_client_create_stream(azure_client: AzureAIChatCompletionClient) -> None:
+    chunks: List[str | CreateResult] = []
+    async for chunk in azure_client.create_stream(messages=[UserMessage(content="Hello", source="user")]):
+        chunks.append(chunk)
+
+    assert chunks[0] == "Hello"
+    assert chunks[1] == " Another Hello"
+    assert chunks[2] == " Yet Another Hello"
+
+
+@pytest.mark.asyncio
+async def test_azure_ai_chat_completion_client_create_cancel(azure_client: AzureAIChatCompletionClient) -> None:
+    cancellation_token = CancellationToken()
+    task = asyncio.create_task(
+        azure_client.create(
+            messages=[UserMessage(content="Hello", source="user")], cancellation_token=cancellation_token
+        )
+    )
+    cancellation_token.cancel()
+    with pytest.raises(asyncio.CancelledError):
+        await task
+
+
+@pytest.mark.asyncio
+async def test_azure_ai_chat_completion_client_create_stream_cancel(azure_client: AzureAIChatCompletionClient) -> None:
+    cancellation_token = CancellationToken()
+    stream = azure_client.create_stream(
+        messages=[UserMessage(content="Hello", source="user")], cancellation_token=cancellation_token
+    )
+    cancellation_token.cancel()
+    with pytest.raises(asyncio.CancelledError):
+        async for _ in stream:
+            pass
+
+
+@pytest.fixture
+def function_calling_client(monkeypatch: pytest.MonkeyPatch) -> AzureAIChatCompletionClient:
+    """
+    Returns a client that supports function calling.
+    """
+
+    async def _mock_function_call_create(*args: Any, **kwargs: Any) -> ChatCompletions:
+        await asyncio.sleep(0.01)
+        return ChatCompletions(
+            id="id",
+            created=datetime.now(),
+            model="model",
+            choices=[
+                ChatChoice(
+                    index=0,
+                    finish_reason=CompletionsFinishReason.TOOL_CALLS,
+                    message=ChatResponseMessage(
+                        role="assistant",
+                        content="",
+                        tool_calls=[
+                            ChatCompletionsToolCall(
+                                id="tool_call_id",
+                                function=AzureFunctionCall(name="some_function", arguments='{"foo": "bar"}'),
+                            )
+                        ],
+                    ),
+                )
+            ],
+            usage=CompletionsUsage(prompt_tokens=5, completion_tokens=2, total_tokens=7),
+        )
+
+    monkeypatch.setattr(ChatCompletionsClient, "complete", _mock_function_call_create)
+    return AzureAIChatCompletionClient(
+        endpoint="endpoint",
+        credential=AzureKeyCredential("api_key"),
+        model_info={
+            "json_output": False,
+            "function_calling": True,
+            "vision": False,
+            "family": "function_calling_model",
+        },
+        model="model",
+    )
+
+
+@pytest.mark.asyncio
+async def test_function_calling_not_supported(azure_client: AzureAIChatCompletionClient) -> None:
+    """
+    Ensures error is raised if we pass tools but the model_info doesn't support function calling.
+    """
+    with pytest.raises(ValueError) as exc:
+        await azure_client.create(
+            messages=[UserMessage(content="Hello", source="user")],
+            tools=[{"name": "dummy_tool"}],
+        )
+    assert "Model does not support function calling" in str(exc.value)
+
+
+@pytest.mark.asyncio
+async def test_function_calling_success(function_calling_client: AzureAIChatCompletionClient) -> None:
+    """
+    Ensures function calling works and returns FunctionCall content.
+    """
+    result = await function_calling_client.create(
+        messages=[UserMessage(content="Please call a function", source="user")],
+        tools=[{"name": "test_tool"}],
+    )
+    assert result.finish_reason == "function_calls"
+    assert isinstance(result.content, list)
+    assert isinstance(result.content[0], FunctionCall)
+    assert result.content[0].name == "some_function"
+    assert result.content[0].arguments == '{"foo": "bar"}'
+
+
+@pytest.mark.asyncio
+async def test_multimodal_unsupported_raises_error(azure_client: AzureAIChatCompletionClient) -> None:
+    """
+    If model does not support vision, providing an image should raise ValueError.
+    """
+    with pytest.raises(ValueError) as exc:
+        await azure_client.create(
+            messages=[
+                UserMessage(
+                    content=[  # type: ignore
+                        Image.from_base64(
+                            "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR4nGNgAAIAAAUAAen6L8YAAAAASUVORK5CYII="
+                        )
+                    ],
+                    source="user",
+                )
+            ]
+        )
+    assert "does not support vision and image was provided" in str(exc.value)
+
+
+@pytest.mark.asyncio
+async def test_multimodal_supported(monkeypatch: pytest.MonkeyPatch) -> None:
+    """
+    If model supports vision, providing an image should not raise.
+    """
+
+    async def _mock_create_noop(*args: Any, **kwargs: Any) -> ChatCompletions:
+        await asyncio.sleep(0.01)
+        return ChatCompletions(
+            id="id",
+            created=datetime.now(),
+            model="model",
+            choices=[
+                ChatChoice(
+                    index=0,
+                    finish_reason="stop",
+                    message=ChatResponseMessage(content="Handled image", role="assistant"),
+                )
+            ],
+            usage=CompletionsUsage(prompt_tokens=0, completion_tokens=0, total_tokens=0),
+        )
+
+    monkeypatch.setattr(ChatCompletionsClient, "complete", _mock_create_noop)
+
+    client = AzureAIChatCompletionClient(
+        endpoint="endpoint",
+        credential=AzureKeyCredential("api_key"),
+        model_info={
+            "json_output": False,
+            "function_calling": False,
+            "vision": True,
+            "family": "vision_model",
+        },
+        model="model",
+    )
+
+    result = await client.create(
+        messages=[
+            UserMessage(
+                content=[
+                    Image.from_base64(
+                        "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR4nGNgAAIAAAUAAen6L8YAAAAASUVORK5CYII="
+                    )
+                ],
+                source="user",
+            )
+        ]
+    )
+    assert result.content == "Handled image"
diff --git a/python/packages/autogen-ext/tests/tools/conftest.py b/python/packages/autogen-ext/tests/tools/conftest.py
index a341ca842..e4bda0d69 100644
--- a/python/packages/autogen-ext/tests/tools/conftest.py
+++ b/python/packages/autogen-ext/tests/tools/conftest.py
@@ -100,8 +100,8 @@ def report_df_fixture() -> pd.DataFrame:
             "The community centers around Baker Street, the iconic residence of Sherlock Holmes, and its connection to the London Underground. Baker Street serves as a significant landmark associated with the famous detective, while the Underground facilitates access to this notable location.",
         ],
         "full_content": [
-            "# Peterson and the Missing Billycock\\n\\nThe community centers around Peterson, a commissionaire involved in a mystery concerning a lost hat and a goose. His actions are pivotal in the investigation led by Sherlock Holmes, connecting various entities such as the row, the billycock hat, and multiple newspapers where advertisements were placed.\\n\\n## Peterson's central role in the investigation\\n\\nPeterson is a key figure in the mystery involving the missing blue carbuncle, acting as a commissionaire who aids Sherlock Holmes. His involvement is crucial as he not only discovers the lost billycock hat but also plays a significant role in disseminating information related to the case. This highlights his importance in the narrative and the potential impact of his actions on the investigation's outcome. [Data: Entities (333); Relationships (521, 522)]\\n\\n## The significance of the row incident\\n\\nThe row refers to the altercation that prompted Peterson's intervention, leading to the discovery of the hat. This incident is pivotal as it sets the stage for the entire investigation, illustrating how a seemingly minor event can have far-reaching consequences. The altercation not only affects Peterson but also ties into the larger mystery that Holmes is trying to solve. [Data: Entities (339); Relationships (521)]\\n\\n## The billycock hat as a central object\\n\\nThe billycock hat is not just an accessory but a crucial piece of evidence in the investigation. Its discovery by Peterson links him directly to the case and raises questions about its owner, Henry Baker. The hat's significance is underscored by its role in the narrative, as it is the object around which the mystery revolves. [Data: Entities (340); Relationships (522)]\\n\\n## Media involvement through advertisements\\n\\nPeterson's task of placing advertisements in various evening papers, including the Globe, Star, Pall Mall, and others, indicates the media's role in the investigation. This outreach is essential for gathering information about the hat's owner and demonstrates how public engagement can influence the resolution of the case. The involvement of multiple newspapers suggests a broad interest in the mystery, which could amplify its impact on the community. [Data: Entities (355, 356, 357, 358, 359, 360, 361); Relationships (545, 546, 547, 548, 549, 550, 551)]",
-            "# Baker Street and Sherlock Holmes Community\\n\\nThe community centers around Baker Street, the iconic residence of Sherlock Holmes, and its connection to the London Underground. Baker Street serves as a significant landmark associated with the famous detective, while the Underground facilitates access to this notable location.\\n\\n## Baker Street as a cultural landmark\\n\\nBaker Street is not only the residence of Sherlock Holmes but also a symbol of his adventures and detective work. Its association with the fictional detective has made it a notable landmark in London, drawing interest from fans and tourists alike. The street's historical and cultural significance contributes to its status as a must-visit location, enhancing its impact on the community. [Data: Entities (4), Relationships (178)]\\n\\n## Sherlock Holmes's connection to Baker Street\\n\\nSherlock Holmes is intrinsically linked to Baker Street, as it serves as his residence and the hub for his investigations. This relationship is central to the narrative of his character, making Baker Street a vital part of the Sherlock Holmes lore. The detective's activities at this location have become iconic, further solidifying the street's importance in popular culture. [Data: Entities (4), Relationships (178)]\\n\\n## The role of the Underground in accessing Baker Street\\n\\nThe London Underground plays a crucial role in facilitating access to Baker Street, making it easier for visitors to reach this iconic location. The connection between the Underground and Baker Street enhances the street's accessibility, contributing to its popularity as a tourist destination. This relationship underscores the importance of public transportation in connecting significant cultural landmarks. [Data: Entities (548), Relationships (862)]\\n\\n## Baker Street's association with detective work\\n\\nBaker Street is synonymous with detective work, primarily due to its association with Sherlock Holmes. The street is where many of Holmes's investigations take place, making it a focal point for fans of detective fiction. This connection to crime-solving and mystery adds to the allure of Baker Street, attracting those interested in the genre and its history. [Data: Entities (4), Relationships (178)]\\n\\n## Tourism and public interest in Baker Street\\n\\nThe combination of Baker Street's historical significance and its association with Sherlock Holmes has made it a popular tourist destination. Visitors often seek to explore the street and its surroundings, contributing to the local economy and cultural heritage. The public interest in this location highlights the impact of literary figures on real-world places and their ability to draw crowds. [Data: Entities (4), Relationships (178)]",
+            "# Peterson and the Missing Billycock\\n\\nThe community centers around Peterson, a commissionaire involved in a mystery concerning a lost hat and a goose. ...",
+            "# Baker Street and Sherlock Holmes Community\\n\\nThe community centers around Baker Street, the iconic residence of Sherlock Holmes, and its connection to the London Underground. Baker...",
         ],
         "rank": [6.5, 6.5],
         "rank_explanation": [
@@ -168,7 +168,7 @@ def entity_embedding_fixture() -> pd.DataFrame:
         "title": ["PROJECT GUTENBERG", "ARTHUR CONAN DOYLE"],
         "type": ["ORGANIZATION", "PERSON"],
         "description": [
-            'Project Gutenberg is a non-profit digital library that offers free access to a vast collection of eBooks, primarily focusing on works that are in the public domain. It provides a wide range of electronic works, including classic literature such as "The Adventures of Sherlock Holmes." The organization is supported by volunteers and donations, enabling it to maintain and expand its offerings of free eBooks to the public.',
+            "Project Gutenberg is a non-profit digital library that offers free access to a vast collection of eBooks, primarily focusing on works that are in the public domain...",
             "Arthur Conan Doyle is the author of The Adventures of Sherlock Holmes, a famous detective fiction series.",
         ],
         "text_unit_ids": [
@@ -219,7 +219,7 @@ def text_unit_df_fixture() -> pd.DataFrame:
         ],
         "human_readable_id": [1, 2],
         "text": [
-            "The Project Gutenberg eBook of The Adventures of Sherlock Holmes\n    \nThis ebook is for the use of anyone anywhere in the United States and\\nmost other parts of the world at no cost and with almost no restrictions\\nwhatsoever. You may copy it, give it away or re-use it under the terms\\nof the Project Gutenberg License included with this ebook or online\\nat www.gutenberg.org. If you are not located in the United States,\\nyou will have to check the laws of the country where you are located\\nbefore using this eBook.\\n\\nTitle: The Adventures of Sherlock Holmes\\n\\nAuthor: Arthur Conan Doyle\\n\\nRelease date: March 1, 1999 [eBook #1661]\\n                Most recently updated: October 10, 2023\\n\\nLanguage: English\\n\\nCredits: an anonymous Project Gutenberg volunteer and Jose Menendez\\n\\n\\n*** START OF THE PROJECT GUTENBERG EBOOK THE ADVENTURES OF SHERLOCK HOLMES ***\\n\\n\\n\\n\\nThe Adventures of Sherlock Holmes\\n\\nby Arthur Conan Doyle\\n\\n\\nContents\\n\\n   I.     A Scandal in Bohemia\\n   II.    The Red-Headed League\\n   III.   A Case of Identity\\n   IV.    The Boscombe Valley Mystery\\n   V.     The Five Orange Pips\\n   VI.    The Man with the Twisted Lip\\n   VII.   The Adventure of the Blue Carbuncle\\n   VIII.  The Adventure of the Speckled Band\\n   IX.    The Adventure of the Engineer's Thumb\\n   X.     The Adventure of the Noble Bachelor\\n   XI.    The Adventure of the Beryl Coronet\\n   XII.   The Adventure of the Copper Beeches\\n\\n\\n\\n\\nI. A SCANDAL IN BOHEMIA\\n\\n\\nI.\\n\\nTo Sherlock Holmes she is always _the_ woman. I have seldom heard him\\nmention her under any other name. In his eyes she eclipses and\\npredominates the whole of her sex. It was not that he felt any emotion\\nakin to love for Irene Adler. All emotions, and that one particularly,\\nwere abhorrent to his cold, precise but admirably balanced mind. He\\nwas, I take it, the most perfect reasoning and observing machine that\\nthe world has seen, but as a lover he would have placed himself in a\\nfalse position. He never spoke of the softer passions, save with a gibe\\nand a sneer. They were admirable things for the observer—excellent for\\ndrawing the veil from men's motives and actions. But for the trained\\nreasoner to admit such intrusions into his own delicate and finely\\nadjusted temperament was to introduce a distracting factor which might\\nthrow a doubt upon all his mental results. Grit in a sensitive\\ninstrument, or a crack in one of his own high-power lenses, would not\\nbe more disturbing than a strong emotion in a nature such as his. And\\nyet there was but one woman to him, and that woman was the late Irene\\nAdler, of dubious and questionable memory.\\n\\nI had seen little of Holmes lately. My marriage had drifted us away\\nfrom each other. My own complete happiness, and the home-centred\\ninterests which rise up around the man who first finds himself master\\nof his own establishment, were sufficient to absorb all my attention,\\nwhile Holmes, who loathed every form of society with his whole Bohemian\\nsoul, remained in our lodgings in Baker Street, buried among his old\\nbooks, and alternating from week to week between cocaine and ambition,\\nthe drowsiness of the drug, and the fierce energy of his own keen\\nnature. He was still, as ever, deeply attracted by the study of crime,\\nand occupied his immense faculties and extraordinary powers of\\nobservation in following out those clues, and clearing up those\\nmysteries which had been abandoned as hopeless by the official police.\\nFrom time to time I heard some vague account of his doings: of his\\nsummons to Odessa in the case of the Trepoff murder, of his clearing up\\nof the singular tragedy of the Atkinson brothers at Trincomalee, and\\nfinally of the mission which he had accomplished so delicately and\\nsuccessfully for the reigning family of Holland. Beyond these signs of\\nhis activity, however, which I merely shared with all the readers of\\nthe daily press, I knew little of my former friend and companion.\\n\\nOne night—it was on the twentieth of March, 1888—I was returning from a\\njourney to a patient (for I had now returned to civil practice), when\\nmy way led me through Baker Street. As I passed the well-remembered\\ndoor, which must always be associated in my mind with my wooing, and\\nwith the dark incidents of the Study in Scarlet, I was seized with a\\nkeen desire to see Holmes again, and to know how he was employing his\\nextraordinary powers. His rooms were brilliantly lit, and, even as I\\nlooked up, I saw his tall, spare figure pass twice in a dark silhouette\\nagainst the blind. He was pacing the room swiftly, eagerly, with his\\nhead sunk upon his chest and his hands clasped behind him. To me, who\\nknew his every mood and habit, his attitude and manner told their own\\nstory. He was at work again. He had risen out of his drug-created\\ndreams and was hot upon the scent of some new problem. I rang the bell\\nand was shown up to the chamber which had formerly been in part my own.\\n\\nHis manner was not effusive. It seldom was; but he was glad, I think,\\nto see me. With hardly a word spoken, but with a kindly eye, he waved\\nme to an armchair, threw across his case of cigars, and indicated a\\nspirit case and a gas",
+            "The Project Gutenberg eBook of The Adventures of Sherlock Holmes\n    \nThis ebook is for the use of anyone anywhere in the United States and...",
             "Some other text",
         ],
         "n_tokens": [1200, 1200],
@@ -280,4 +280,3 @@ def text_unit_df_fixture() -> pd.DataFrame:
         ],
     }
     return pd.DataFrame(data)
-    return pd.DataFrame(data)
diff --git a/python/uv.lock b/python/uv.lock
index 21adda9f9..d01ba4f8d 100644
--- a/python/uv.lock
+++ b/python/uv.lock
@@ -561,6 +561,7 @@ dependencies = [
 
 [package.optional-dependencies]
 azure = [
+    { name = "azure-ai-inference" },
     { name = "azure-core" },
     { name = "azure-identity" },
 ]
@@ -665,6 +666,7 @@ requires-dist = [
     { name = "autogen-agentchat", marker = "extra == 'video-surfer'", editable = "packages/autogen-agentchat" },
     { name = "autogen-agentchat", marker = "extra == 'web-surfer'", editable = "packages/autogen-agentchat" },
     { name = "autogen-core", editable = "packages/autogen-core" },
+    { name = "azure-ai-inference", marker = "extra == 'azure'", specifier = ">=1.0.0b7" },
     { name = "azure-core", marker = "extra == 'azure'" },
     { name = "azure-identity", marker = "extra == 'azure'" },
     { name = "diskcache", marker = "extra == 'diskcache'", specifier = ">=5.6.3" },
@@ -878,6 +880,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/9e/43/53afb8ba17218f19b77c7834128566c5bbb100a0ad9ba2e8e89d089d7079/autopep8-2.3.2-py2.py3-none-any.whl", hash = "sha256:ce8ad498672c845a0c3de2629c15b635ec2b05ef8177a6e7c91c74f3e9b51128", size = 45807 },
 ]
 
+[[package]]
+name = "azure-ai-inference"
+version = "1.0.0b7"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "azure-core" },
+    { name = "isodate" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/af/37/233eee0bebbf631d2f911a9f1ebbc3784b100d9bfb84efc275e71c1ea636/azure_ai_inference-1.0.0b7.tar.gz", hash = "sha256:bd912f71f7f855036ca46c9a21439f290eed5e61da418fd26bbb32e3c68bcce3", size = 175883 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cd/b6/5ba830eddc59f820c654694d476c14a3dd9c1f828ff9b48eb8b21dfd5f01/azure_ai_inference-1.0.0b7-py3-none-any.whl", hash = "sha256:59bb6a9ee62bd7654a69ca2bf12fe9335d7045df95b491cb8b5f9e3791c86175", size = 123030 },
+]
+
 [[package]]
 name = "azure-common"
 version = "1.1.28"