From ff294a2417cbfee93699e14c3a7f221a97e1fd56 Mon Sep 17 00:00:00 2001
From: Evan Mattson <evan.mattson@microsoft.com>
Date: Sat, 14 Dec 2024 18:04:13 +0900
Subject: [PATCH 1/7] Yield FunctionResultContent in streaming chat completion
 path. Update tests.

---
 .../chat_gpt_api_function_calling.py          |  2 ++
 .../ai/chat_completion_client_base.py         | 25 +++++++++++++--
 .../connectors/ai/function_calling_utils.py   | 28 +++++++++++++++++
 .../contents/function_call_content.py         |  3 +-
 .../contents/function_result_content.py       |  7 +++++
 .../functions/kernel_function_from_prompt.py  |  2 +-
 python/semantic_kernel/kernel.py              |  8 ++++-
 .../ai/azure_ai_inference/conftest.py         |  4 +--
 ...test_azure_ai_inference_chat_completion.py | 31 +++++++++++++------
 .../ai/google/google_ai/conftest.py           |  4 +--
 .../test_google_ai_chat_completion.py         | 29 +++++++++++------
 .../ai/google/vertex_ai/conftest.py           |  4 +--
 .../test_vertex_ai_chat_completion.py         | 29 +++++++++++------
 python/tests/unit/kernel/test_kernel.py       |  2 ++
 14 files changed, 140 insertions(+), 38 deletions(-)

diff --git a/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py b/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py
index f0381c1048ac..c17ac2b698cd 100644
--- a/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py
+++ b/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py
@@ -149,6 +149,8 @@ async def handle_streaming(
 
     print("\n")
     if result_content:
+        # this line is new to view content types
+        streaming_chat_message = reduce(lambda first, second: first + second, result_content)
         return "".join([str(content) for content in result_content])
     return None
 
diff --git a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py
index 80455a451842..2a478e4c80a0 100644
--- a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py
+++ b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py
@@ -12,7 +12,10 @@
 
 from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior
 from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
-from semantic_kernel.connectors.ai.function_calling_utils import merge_function_results
+from semantic_kernel.connectors.ai.function_calling_utils import (
+    merge_function_results,
+    merge_streaming_function_results,
+)
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior, FunctionChoiceType
 from semantic_kernel.const import AUTO_FUNCTION_INVOCATION_SPAN_NAME
 from semantic_kernel.contents.annotation_content import AnnotationContent
@@ -303,8 +306,18 @@ async def get_streaming_chat_message_contents(
                     ],
                 )
 
+                # Merge and yield the function results, regardless of the termination status
+                # Include the ai_model_id so we can later add two streaming messages together
+                # Some settings may not have an ai_model_id, so we need to check for it
+                ai_model_id = self._get_ai_model_id(settings)
+                function_result_messages = merge_streaming_function_results(
+                    messages=chat_history.messages[-len(results) :],
+                    ai_model_id=ai_model_id,  # type: ignore
+                )
+                if self._yield_function_result_messages(function_result_messages):
+                    yield function_result_messages
+
                 if any(result.terminate for result in results if result is not None):
-                    yield merge_function_results(chat_history.messages[-len(results) :])  # type: ignore
                     break
 
     async def get_streaming_chat_message_content(
@@ -415,4 +428,12 @@ def _start_auto_function_invocation_activity(self, kernel: "Kernel", settings: "
 
         return span
 
+    def _get_ai_model_id(self, settings: "PromptExecutionSettings") -> str:
+        """Retrieve the AI model ID from settings if available."""
+        return getattr(settings, "ai_model_id", "")
+
+    def _yield_function_result_messages(self, function_result_messages: list) -> bool:
+        """Determine if the function result messages should be yielded."""
+        return len(function_result_messages) > 0 and len(function_result_messages[0].items) > 0
+
     # endregion
diff --git a/python/semantic_kernel/connectors/ai/function_calling_utils.py b/python/semantic_kernel/connectors/ai/function_calling_utils.py
index f5d29665068e..181ead8c635a 100644
--- a/python/semantic_kernel/connectors/ai/function_calling_utils.py
+++ b/python/semantic_kernel/connectors/ai/function_calling_utils.py
@@ -5,6 +5,7 @@
 
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_result_content import FunctionResultContent
+from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
 from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError
 
@@ -95,3 +96,30 @@ def merge_function_results(
             items=items,
         )
     ]
+
+
+def merge_streaming_function_results(
+    messages: list[ChatMessageContent | StreamingChatMessageContent],
+    ai_model_id: str,
+) -> list[StreamingChatMessageContent]:
+    """Combine multiple streaming function result content types to one streaming chat message content type.
+
+    This method combines the FunctionResultContent items from separate StreamingChatMessageContent messages,
+    and is used in the event that the `context.terminate = True` condition is met.
+
+    Args:
+        messages: The list of streaming chat message content types.
+        ai_model_id: The AI model ID.
+
+    Returns:
+        The combined streaming chat message content type.
+    """
+    items: list[Any] = []
+    for message in messages:
+        items.extend([item for item in message.items if isinstance(item, FunctionResultContent)])
+
+    # If we want to be able to support adding the streaming message chunks together, then the author role needs to be
+    # `Assistant```, as the `Tool` role will cause the add method to break.
+    return [
+        StreamingChatMessageContent(role=AuthorRole.ASSISTANT, items=items, choice_index=0, ai_model_id=ai_model_id)
+    ]
diff --git a/python/semantic_kernel/contents/function_call_content.py b/python/semantic_kernel/contents/function_call_content.py
index 1700208522b1..3a197695153a 100644
--- a/python/semantic_kernel/contents/function_call_content.py
+++ b/python/semantic_kernel/contents/function_call_content.py
@@ -149,7 +149,8 @@ def parse_arguments(self) -> Mapping[str, Any] | None:
         if isinstance(self.arguments, Mapping):
             return self.arguments
         try:
-            return json.loads(self.arguments)
+            sanitized_arguments = self.arguments.replace("'", '"')
+            return json.loads(sanitized_arguments)
         except json.JSONDecodeError as exc:
             raise FunctionCallInvalidArgumentsException("Function Call arguments are not valid JSON.") from exc
 
diff --git a/python/semantic_kernel/contents/function_result_content.py b/python/semantic_kernel/contents/function_result_content.py
index 821cc46615d1..af84d1fa11e7 100644
--- a/python/semantic_kernel/contents/function_result_content.py
+++ b/python/semantic_kernel/contents/function_result_content.py
@@ -17,6 +17,7 @@
 if TYPE_CHECKING:
     from semantic_kernel.contents.chat_message_content import ChatMessageContent
     from semantic_kernel.contents.function_call_content import FunctionCallContent
+    from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
     from semantic_kernel.functions.function_result import FunctionResult
 
 TAG_CONTENT_MAP = {
@@ -157,6 +158,12 @@ def to_chat_message_content(self) -> "ChatMessageContent":
 
         return ChatMessageContent(role=AuthorRole.TOOL, items=[self])
 
+    def to_streaming_chat_message_content(self) -> "StreamingChatMessageContent":
+        """Convert the instance to a StreamingChatMessageContent."""
+        from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
+
+        return StreamingChatMessageContent(role=AuthorRole.TOOL, choice_index=0, items=[self])
+
     def to_dict(self) -> dict[str, str]:
         """Convert the instance to a dictionary."""
         return {
diff --git a/python/semantic_kernel/functions/kernel_function_from_prompt.py b/python/semantic_kernel/functions/kernel_function_from_prompt.py
index ecba3e9aa96c..1e301da4fa17 100644
--- a/python/semantic_kernel/functions/kernel_function_from_prompt.py
+++ b/python/semantic_kernel/functions/kernel_function_from_prompt.py
@@ -6,7 +6,7 @@
 from html import unescape
 from typing import TYPE_CHECKING, Any
 
-import yaml
+import yaml  # type: ignore
 from pydantic import Field, ValidationError, model_validator
 
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
diff --git a/python/semantic_kernel/kernel.py b/python/semantic_kernel/kernel.py
index 5ddef255f355..a827fb8dbf1c 100644
--- a/python/semantic_kernel/kernel.py
+++ b/python/semantic_kernel/kernel.py
@@ -10,6 +10,7 @@
 from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.function_result_content import FunctionResultContent
+from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
 from semantic_kernel.contents.streaming_content_mixin import StreamingContentMixin
 from semantic_kernel.exceptions import (
     FunctionCallInvalidArgumentsException,
@@ -398,7 +399,12 @@ async def invoke_function_call(
         frc = FunctionResultContent.from_function_call_content_and_result(
             function_call_content=function_call, result=invocation_context.function_result
         )
-        chat_history.add_message(message=frc.to_chat_message_content())
+
+        is_streaming = any(isinstance(message, StreamingChatMessageContent) for message in chat_history.messages)
+
+        message = frc.to_streaming_chat_message_content() if is_streaming else frc.to_chat_message_content()
+
+        chat_history.add_message(message=message)
 
         return invocation_context if invocation_context.terminate else None
 
diff --git a/python/tests/unit/connectors/ai/azure_ai_inference/conftest.py b/python/tests/unit/connectors/ai/azure_ai_inference/conftest.py
index 690f5706bf3c..7919f705857a 100644
--- a/python/tests/unit/connectors/ai/azure_ai_inference/conftest.py
+++ b/python/tests/unit/connectors/ai/azure_ai_inference/conftest.py
@@ -252,8 +252,8 @@ def mock_azure_ai_inference_streaming_chat_completion_response_with_tool_call(mo
                             ChatCompletionsToolCall(
                                 id="test_id",
                                 function=FunctionCall(
-                                    name="test_function",
-                                    arguments={"test_arg": "test_value"},
+                                    name="getLightStatus",
+                                    arguments={"arg1": "test_value"},
                                 ),
                             ),
                         ],
diff --git a/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py b/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py
index 2cccebda1f52..fe12b507eb35 100644
--- a/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py
+++ b/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py
@@ -20,6 +20,7 @@
     ServiceInvalidExecutionSettingsError,
 )
 from semantic_kernel.functions.kernel_arguments import KernelArguments
+from semantic_kernel.kernel import Kernel
 from semantic_kernel.utils.telemetry.user_agent import SEMANTIC_KERNEL_USER_AGENT
 
 
@@ -492,11 +493,12 @@ async def test_azure_ai_inference_streaming_chat_completion_with_function_choice
 async def test_azure_ai_inference_streaming_chat_completion_with_function_choice_behavior(
     mock_complete,
     azure_ai_inference_service,
-    kernel,
+    kernel: Kernel,
     chat_history: ChatHistory,
     mock_azure_ai_inference_streaming_chat_completion_response_with_tool_call,
+    decorated_native_function,
 ) -> None:
-    """Test streaming completion of AzureAIInferenceChatCompletion with function choice behavior"""
+    """Test streaming completion of AzureAIInferenceChatCompletion with function choice behavior."""
     user_message_content: str = "Hello"
     chat_history.add_user_message(user_message_content)
 
@@ -507,20 +509,31 @@ async def test_azure_ai_inference_streaming_chat_completion_with_function_choice
 
     mock_complete.return_value = mock_azure_ai_inference_streaming_chat_completion_response_with_tool_call
 
+    kernel.add_function(plugin_name="TestPlugin", function=decorated_native_function)
+
+    all_messages = []
     async for messages in azure_ai_inference_service.get_streaming_chat_message_contents(
         chat_history,
         settings,
         kernel=kernel,
         arguments=KernelArguments(),
     ):
-        assert len(messages) == 1
-        assert messages[0].role == "assistant"
-        assert messages[0].content == ""
-        assert messages[0].finish_reason == FinishReason.TOOL_CALLS
+        all_messages.extend(messages)
+
+    # Assert the number of total messages
+    assert len(all_messages) == 2, f"Expected 2 messages, got {len(all_messages)}"
+
+    # Validate the first message
+    assert all_messages[0].role == "assistant", f"Unexpected role for first message: {all_messages[0].role}"
+    assert all_messages[0].content == "", f"Unexpected content for first message: {all_messages[0].content}"
+    assert all_messages[0].finish_reason == FinishReason.TOOL_CALLS, (
+        f"Unexpected finish reason for first message: {all_messages[0].finish_reason}"
+    )
 
-    # Streaming completion with tool call does not invoke the model
-    # after maximum_auto_invoke_attempts is reached
-    assert mock_complete.call_count == 1
+    # Validate the second message
+    assert all_messages[1].role == "assistant", f"Unexpected role for second message: {all_messages[1].role}"
+    assert all_messages[1].content == "", f"Unexpected content for second message: {all_messages[1].content}"
+    assert all_messages[1].finish_reason is None
 
 
 @pytest.mark.parametrize(
diff --git a/python/tests/unit/connectors/ai/google/google_ai/conftest.py b/python/tests/unit/connectors/ai/google/google_ai/conftest.py
index 636344a97a02..77898412cb1d 100644
--- a/python/tests/unit/connectors/ai/google/google_ai/conftest.py
+++ b/python/tests/unit/connectors/ai/google/google_ai/conftest.py
@@ -128,8 +128,8 @@ async def mock_google_ai_streaming_chat_completion_response_with_tool_call() ->
         parts=[
             protos.Part(
                 function_call=protos.FunctionCall(
-                    name="test_function",
-                    args={"test_arg": "test_value"},
+                    name="getLightStatus",
+                    args={"arg1": "test_value"},
                 )
             )
         ],
diff --git a/python/tests/unit/connectors/ai/google/google_ai/services/test_google_ai_chat_completion.py b/python/tests/unit/connectors/ai/google/google_ai/services/test_google_ai_chat_completion.py
index 4fedc18d5386..74ee7299b31f 100644
--- a/python/tests/unit/connectors/ai/google/google_ai/services/test_google_ai_chat_completion.py
+++ b/python/tests/unit/connectors/ai/google/google_ai/services/test_google_ai_chat_completion.py
@@ -20,6 +20,7 @@
     ServiceInitializationError,
     ServiceInvalidExecutionSettingsError,
 )
+from semantic_kernel.kernel import Kernel
 
 
 # region init
@@ -259,9 +260,10 @@ async def test_google_ai_streaming_chat_completion_with_function_choice_behavior
 async def test_google_ai_streaming_chat_completion_with_function_choice_behavior(
     mock_google_ai_model_generate_content_async,
     google_ai_unit_test_env,
-    kernel,
+    kernel: Kernel,
     chat_history: ChatHistory,
     mock_google_ai_streaming_chat_completion_response_with_tool_call,
+    decorated_native_function,
 ) -> None:
     """Test streaming chat completion of GoogleAIChatCompletion with function choice behavior"""
     mock_google_ai_model_generate_content_async.return_value = (
@@ -275,20 +277,29 @@ async def test_google_ai_streaming_chat_completion_with_function_choice_behavior
 
     google_ai_chat_completion = GoogleAIChatCompletion()
 
+    kernel.add_function(plugin_name="TestPlugin", function=decorated_native_function)
+
+    all_messages = []
     async for messages in google_ai_chat_completion.get_streaming_chat_message_contents(
         chat_history,
         settings,
         kernel=kernel,
     ):
-        assert len(messages) == 1
-        assert messages[0].role == "assistant"
-        assert messages[0].content == ""
-        # Google doesn't return STOP as the finish reason for tool calls
-        assert messages[0].finish_reason == FinishReason.STOP
+        all_messages.extend(messages)
+
+    assert len(all_messages) == 2, f"Expected 2 messages, got {len(all_messages)}"
+
+    # Validate the first message
+    assert all_messages[0].role == "assistant", f"Unexpected role for first message: {all_messages[0].role}"
+    assert all_messages[0].content == "", f"Unexpected content for first message: {all_messages[0].content}"
+    assert all_messages[0].finish_reason == FinishReason.STOP, (
+        f"Unexpected finish reason for first message: {all_messages[0].finish_reason}"
+    )
 
-    # Streaming completion with tool call does not invoke the model
-    # after maximum_auto_invoke_attempts is reached
-    assert mock_google_ai_model_generate_content_async.call_count == 1
+    # Validate the second message
+    assert all_messages[1].role == "assistant", f"Unexpected role for second message: {all_messages[1].role}"
+    assert all_messages[1].content == "", f"Unexpected content for second message: {all_messages[1].content}"
+    assert all_messages[1].finish_reason is None
 
 
 @patch.object(GenerativeModel, "generate_content_async", new_callable=AsyncMock)
diff --git a/python/tests/unit/connectors/ai/google/vertex_ai/conftest.py b/python/tests/unit/connectors/ai/google/vertex_ai/conftest.py
index d1efbd80b19a..8892e6f0b29a 100644
--- a/python/tests/unit/connectors/ai/google/vertex_ai/conftest.py
+++ b/python/tests/unit/connectors/ai/google/vertex_ai/conftest.py
@@ -117,8 +117,8 @@ def mock_vertex_ai_streaming_chat_completion_response_with_tool_call() -> AsyncI
         parts=[
             Part(
                 function_call=FunctionCall(
-                    name="test_function",
-                    args={"test_arg": "test_value"},
+                    name="getLightStatus",
+                    args={"arg1": "test_value"},
                 )
             )
         ],
diff --git a/python/tests/unit/connectors/ai/google/vertex_ai/services/test_vertex_ai_chat_completion.py b/python/tests/unit/connectors/ai/google/vertex_ai/services/test_vertex_ai_chat_completion.py
index 6b2f1f1082b7..a523d66cc7ff 100644
--- a/python/tests/unit/connectors/ai/google/vertex_ai/services/test_vertex_ai_chat_completion.py
+++ b/python/tests/unit/connectors/ai/google/vertex_ai/services/test_vertex_ai_chat_completion.py
@@ -20,6 +20,7 @@
     ServiceInitializationError,
     ServiceInvalidExecutionSettingsError,
 )
+from semantic_kernel.kernel import Kernel
 
 
 # region init
@@ -259,9 +260,10 @@ async def test_vertex_ai_streaming_chat_completion_with_function_choice_behavior
 async def test_vertex_ai_streaming_chat_completion_with_function_choice_behavior(
     mock_vertex_ai_model_generate_content_async,
     vertex_ai_unit_test_env,
-    kernel,
+    kernel: Kernel,
     chat_history: ChatHistory,
     mock_vertex_ai_streaming_chat_completion_response_with_tool_call,
+    decorated_native_function,
 ) -> None:
     """Test streaming chat completion of VertexAIChatCompletion with function choice behavior"""
     mock_vertex_ai_model_generate_content_async.return_value = (
@@ -275,20 +277,29 @@ async def test_vertex_ai_streaming_chat_completion_with_function_choice_behavior
 
     vertex_ai_chat_completion = VertexAIChatCompletion()
 
+    kernel.add_function(plugin_name="TestPlugin", function=decorated_native_function)
+
+    all_messages = []
     async for messages in vertex_ai_chat_completion.get_streaming_chat_message_contents(
         chat_history,
         settings,
         kernel=kernel,
     ):
-        assert len(messages) == 1
-        assert messages[0].role == "assistant"
-        assert messages[0].content == ""
-        # Google doesn't return STOP as the finish reason for tool calls
-        assert messages[0].finish_reason == FinishReason.STOP
+        all_messages.extend(messages)
+
+    assert len(all_messages) == 2, f"Expected 2 messages, got {len(all_messages)}"
+
+    # Validate the first message
+    assert all_messages[0].role == "assistant", f"Unexpected role for first message: {all_messages[0].role}"
+    assert all_messages[0].content == "", f"Unexpected content for first message: {all_messages[0].content}"
+    assert all_messages[0].finish_reason == FinishReason.STOP, (
+        f"Unexpected finish reason for first message: {all_messages[0].finish_reason}"
+    )
 
-    # Streaming completion with tool call does not invoke the model
-    # after maximum_auto_invoke_attempts is reached
-    assert mock_vertex_ai_model_generate_content_async.call_count == 1
+    # Validate the second message
+    assert all_messages[1].role == "assistant", f"Unexpected role for second message: {all_messages[1].role}"
+    assert all_messages[1].content == "", f"Unexpected content for second message: {all_messages[1].content}"
+    assert all_messages[1].finish_reason is None
 
 
 @patch.object(GenerativeModel, "generate_content_async", new_callable=AsyncMock)
diff --git a/python/tests/unit/kernel/test_kernel.py b/python/tests/unit/kernel/test_kernel.py
index 38b1608d150f..808c69d4fc6e 100644
--- a/python/tests/unit/kernel/test_kernel.py
+++ b/python/tests/unit/kernel/test_kernel.py
@@ -18,6 +18,7 @@
 from semantic_kernel.contents import ChatMessageContent
 from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.contents.function_call_content import FunctionCallContent
+from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
 from semantic_kernel.exceptions import KernelFunctionAlreadyExistsError, KernelServiceNotFoundError
 from semantic_kernel.exceptions.content_exceptions import FunctionCallInvalidArgumentsException
 from semantic_kernel.exceptions.kernel_exceptions import (
@@ -299,6 +300,7 @@ async def test_invoke_function_call_throws_during_invoke(kernel: Kernel, get_too
     result_mock = MagicMock(spec=ChatMessageContent)
     result_mock.items = [tool_call_mock]
     chat_history_mock = MagicMock(spec=ChatHistory)
+    chat_history_mock.messages = [MagicMock(spec=StreamingChatMessageContent)]
 
     func_mock = AsyncMock(spec=KernelFunction)
     func_meta = KernelFunctionMetadata(name="function", is_prompt=False)

From d726301e07e0d1776a8b0212551fad2a5d6ab366 Mon Sep 17 00:00:00 2001
From: Evan Mattson <evan.mattson@microsoft.com>
Date: Sat, 14 Dec 2024 18:11:20 +0900
Subject: [PATCH 2/7] Revert sample test change.

---
 .../auto_function_calling/chat_gpt_api_function_calling.py      | 2 --
 1 file changed, 2 deletions(-)

diff --git a/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py b/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py
index c17ac2b698cd..f0381c1048ac 100644
--- a/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py
+++ b/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py
@@ -149,8 +149,6 @@ async def handle_streaming(
 
     print("\n")
     if result_content:
-        # this line is new to view content types
-        streaming_chat_message = reduce(lambda first, second: first + second, result_content)
         return "".join([str(content) for content in result_content])
     return None
 

From 399b00ae8aceafaa3784aa1420675ec23b0358ad Mon Sep 17 00:00:00 2001
From: Evan Mattson <evan.mattson@microsoft.com>
Date: Tue, 17 Dec 2024 07:04:32 +0900
Subject: [PATCH 3/7] Simplify sanitized args

---
 python/semantic_kernel/contents/function_call_content.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/semantic_kernel/contents/function_call_content.py b/python/semantic_kernel/contents/function_call_content.py
index 3a197695153a..ec861b8547d0 100644
--- a/python/semantic_kernel/contents/function_call_content.py
+++ b/python/semantic_kernel/contents/function_call_content.py
@@ -149,8 +149,7 @@ def parse_arguments(self) -> Mapping[str, Any] | None:
         if isinstance(self.arguments, Mapping):
             return self.arguments
         try:
-            sanitized_arguments = self.arguments.replace("'", '"')
-            return json.loads(sanitized_arguments)
+            return json.loads(self.arguments.replace("'", '"'))
         except json.JSONDecodeError as exc:
             raise FunctionCallInvalidArgumentsException("Function Call arguments are not valid JSON.") from exc
 

From f064acf54ecc730327009cfb6906fa24d87bc509 Mon Sep 17 00:00:00 2001
From: Evan Mattson <evan.mattson@microsoft.com>
Date: Tue, 17 Dec 2024 07:40:15 +0900
Subject: [PATCH 4/7] Remove type ignore for yaml

---
 python/semantic_kernel/functions/kernel_function_from_prompt.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/semantic_kernel/functions/kernel_function_from_prompt.py b/python/semantic_kernel/functions/kernel_function_from_prompt.py
index 1e301da4fa17..ecba3e9aa96c 100644
--- a/python/semantic_kernel/functions/kernel_function_from_prompt.py
+++ b/python/semantic_kernel/functions/kernel_function_from_prompt.py
@@ -6,7 +6,7 @@
 from html import unescape
 from typing import TYPE_CHECKING, Any
 
-import yaml  # type: ignore
+import yaml
 from pydantic import Field, ValidationError, model_validator
 
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase

From 0d64754501907eb6e26c733c4a05ad2c4bae087c Mon Sep 17 00:00:00 2001
From: Evan Mattson <evan.mattson@microsoft.com>
Date: Tue, 17 Dec 2024 08:21:08 +0900
Subject: [PATCH 5/7] check base class ai_model_id as a fallback

---
 .../connectors/ai/chat_completion_client_base.py          | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py
index 2a478e4c80a0..6a673dccd5eb 100644
--- a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py
+++ b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py
@@ -429,8 +429,12 @@ def _start_auto_function_invocation_activity(self, kernel: "Kernel", settings: "
         return span
 
     def _get_ai_model_id(self, settings: "PromptExecutionSettings") -> str:
-        """Retrieve the AI model ID from settings if available."""
-        return getattr(settings, "ai_model_id", "")
+        """Retrieve the AI model ID from settings if available.
+
+        Attempt to get ai_model_id from the settings object. If it doesn't exist or
+        is blank, fallback to self.ai_model_id (from AIServiceClientBase).
+        """
+        return getattr(settings, "ai_model_id", self.ai_model_id) or self.ai_model_id
 
     def _yield_function_result_messages(self, function_result_messages: list) -> bool:
         """Determine if the function result messages should be yielded."""

From af0b4816e3e88c4e165843137554cc439cd36189 Mon Sep 17 00:00:00 2001
From: Evan Mattson <evan.mattson@microsoft.com>
Date: Tue, 17 Dec 2024 09:26:31 +0900
Subject: [PATCH 6/7] Return FunctionResultContent as tool author

---
 .../chat_gpt_api_function_calling.py                  | 11 +++++++----
 .../connectors/ai/function_calling_utils.py           |  4 +---
 .../test_azure_ai_inference_chat_completion.py        |  2 +-
 .../services/test_google_ai_chat_completion.py        |  2 +-
 .../services/test_vertex_ai_chat_completion.py        |  2 +-
 5 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py b/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py
index f0381c1048ac..2ced79d2f8be 100644
--- a/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py
+++ b/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py
@@ -12,6 +12,7 @@
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
+from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.core_plugins.math_plugin import MathPlugin
 from semantic_kernel.core_plugins.time_plugin import TimePlugin
 from semantic_kernel.functions import KernelArguments
@@ -130,13 +131,15 @@ async def handle_streaming(
 
     print("Mosscap:> ", end="")
     streamed_chunks: list[StreamingChatMessageContent] = []
-    result_content = []
+    result_content: list[StreamingChatMessageContent] = []
     async for message in response:
-        if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance(
-            message[0], StreamingChatMessageContent
+        if (
+            not execution_settings.function_choice_behavior.auto_invoke_kernel_functions
+            and isinstance(message[0], StreamingChatMessageContent)
+            and message[0].role == AuthorRole.ASSISTANT
         ):
             streamed_chunks.append(message[0])
-        else:
+        elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
             result_content.append(message[0])
             print(str(message[0]), end="")
 
diff --git a/python/semantic_kernel/connectors/ai/function_calling_utils.py b/python/semantic_kernel/connectors/ai/function_calling_utils.py
index 181ead8c635a..924c0076f7c3 100644
--- a/python/semantic_kernel/connectors/ai/function_calling_utils.py
+++ b/python/semantic_kernel/connectors/ai/function_calling_utils.py
@@ -120,6 +120,4 @@ def merge_streaming_function_results(
 
     # If we want to be able to support adding the streaming message chunks together, then the author role needs to be
     # `Assistant```, as the `Tool` role will cause the add method to break.
-    return [
-        StreamingChatMessageContent(role=AuthorRole.ASSISTANT, items=items, choice_index=0, ai_model_id=ai_model_id)
-    ]
+    return [StreamingChatMessageContent(role=AuthorRole.TOOL, items=items, choice_index=0, ai_model_id=ai_model_id)]
diff --git a/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py b/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py
index fe12b507eb35..942322bf5153 100644
--- a/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py
+++ b/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py
@@ -531,7 +531,7 @@ async def test_azure_ai_inference_streaming_chat_completion_with_function_choice
     )
 
     # Validate the second message
-    assert all_messages[1].role == "assistant", f"Unexpected role for second message: {all_messages[1].role}"
+    assert all_messages[1].role == "tool", f"Unexpected role for second message: {all_messages[1].role}"
     assert all_messages[1].content == "", f"Unexpected content for second message: {all_messages[1].content}"
     assert all_messages[1].finish_reason is None
 
diff --git a/python/tests/unit/connectors/ai/google/google_ai/services/test_google_ai_chat_completion.py b/python/tests/unit/connectors/ai/google/google_ai/services/test_google_ai_chat_completion.py
index 74ee7299b31f..3f1d6e030a0d 100644
--- a/python/tests/unit/connectors/ai/google/google_ai/services/test_google_ai_chat_completion.py
+++ b/python/tests/unit/connectors/ai/google/google_ai/services/test_google_ai_chat_completion.py
@@ -297,7 +297,7 @@ async def test_google_ai_streaming_chat_completion_with_function_choice_behavior
     )
 
     # Validate the second message
-    assert all_messages[1].role == "assistant", f"Unexpected role for second message: {all_messages[1].role}"
+    assert all_messages[1].role == "tool", f"Unexpected role for second message: {all_messages[1].role}"
     assert all_messages[1].content == "", f"Unexpected content for second message: {all_messages[1].content}"
     assert all_messages[1].finish_reason is None
 
diff --git a/python/tests/unit/connectors/ai/google/vertex_ai/services/test_vertex_ai_chat_completion.py b/python/tests/unit/connectors/ai/google/vertex_ai/services/test_vertex_ai_chat_completion.py
index a523d66cc7ff..9ea53c68910e 100644
--- a/python/tests/unit/connectors/ai/google/vertex_ai/services/test_vertex_ai_chat_completion.py
+++ b/python/tests/unit/connectors/ai/google/vertex_ai/services/test_vertex_ai_chat_completion.py
@@ -297,7 +297,7 @@ async def test_vertex_ai_streaming_chat_completion_with_function_choice_behavior
     )
 
     # Validate the second message
-    assert all_messages[1].role == "assistant", f"Unexpected role for second message: {all_messages[1].role}"
+    assert all_messages[1].role == "tool", f"Unexpected role for second message: {all_messages[1].role}"
     assert all_messages[1].content == "", f"Unexpected content for second message: {all_messages[1].content}"
     assert all_messages[1].finish_reason is None
 

From 418cff990ba273864fa85f644be2e5a863012fcf Mon Sep 17 00:00:00 2001
From: Evan Mattson <evan.mattson@microsoft.com>
Date: Tue, 17 Dec 2024 16:29:49 +0900
Subject: [PATCH 7/7] Adjust samples based on latest streaming call updates.

---
 .../anthropic_api_function_calling.py         |  9 ++++---
 .../chat_mistral_ai_api_function_calling.py   |  9 ++++---
 .../function_calling_with_required_type.py    | 26 +++++++++++++++----
 .../functions_defined_in_json_prompt.py       | 23 ++++++++++++----
 .../functions_defined_in_yaml_prompt.py       | 24 ++++++++++++-----
 .../function_invocation_filters_stream.py     |  3 ++-
 .../plugins/openai_plugin_azure_key_vault.py  |  9 ++++---
 python/samples/learn_resources/templates.py   |  6 +++--
 .../connectors/ai/function_calling_utils.py   |  2 --
 9 files changed, 81 insertions(+), 30 deletions(-)

diff --git a/python/samples/concepts/auto_function_calling/anthropic_api_function_calling.py b/python/samples/concepts/auto_function_calling/anthropic_api_function_calling.py
index a38ba2187ab8..5769943157db 100644
--- a/python/samples/concepts/auto_function_calling/anthropic_api_function_calling.py
+++ b/python/samples/concepts/auto_function_calling/anthropic_api_function_calling.py
@@ -12,6 +12,7 @@
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
+from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.core_plugins.math_plugin import MathPlugin
 from semantic_kernel.core_plugins.time_plugin import TimePlugin
 from semantic_kernel.functions import KernelArguments
@@ -131,11 +132,13 @@ async def handle_streaming(
     streamed_chunks: list[StreamingChatMessageContent] = []
     result_content = []
     async for message in response:
-        if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance(
-            message[0], StreamingChatMessageContent
+        if (
+            not execution_settings.function_choice_behavior.auto_invoke_kernel_functions
+            and isinstance(message[0], StreamingChatMessageContent)
+            and message[0].role == AuthorRole.ASSISTANT
         ):
             streamed_chunks.append(message[0])
-        else:
+        elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
             result_content.append(message[0])
             print(str(message[0]), end="")
 
diff --git a/python/samples/concepts/auto_function_calling/chat_mistral_ai_api_function_calling.py b/python/samples/concepts/auto_function_calling/chat_mistral_ai_api_function_calling.py
index 489f527852aa..5ee05a835e2a 100644
--- a/python/samples/concepts/auto_function_calling/chat_mistral_ai_api_function_calling.py
+++ b/python/samples/concepts/auto_function_calling/chat_mistral_ai_api_function_calling.py
@@ -12,6 +12,7 @@
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
+from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.core_plugins.math_plugin import MathPlugin
 from semantic_kernel.core_plugins.time_plugin import TimePlugin
 from semantic_kernel.functions import KernelArguments
@@ -140,11 +141,13 @@ async def handle_streaming(
     streamed_chunks: list[StreamingChatMessageContent] = []
     result_content = []
     async for message in response:
-        if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance(
-            message[0], StreamingChatMessageContent
+        if (
+            not execution_settings.function_choice_behavior.auto_invoke_kernel_functions
+            and isinstance(message[0], StreamingChatMessageContent)
+            and message[0].role == AuthorRole.ASSISTANT
         ):
             streamed_chunks.append(message[0])
-        else:
+        elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
             result_content.append(message[0])
             print(str(message[0]), end="")
 
diff --git a/python/samples/concepts/auto_function_calling/function_calling_with_required_type.py b/python/samples/concepts/auto_function_calling/function_calling_with_required_type.py
index cd0821e5e6a3..26697141ab17 100644
--- a/python/samples/concepts/auto_function_calling/function_calling_with_required_type.py
+++ b/python/samples/concepts/auto_function_calling/function_calling_with_required_type.py
@@ -6,12 +6,13 @@
 from typing import TYPE_CHECKING
 
 from semantic_kernel import Kernel
-from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
+from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior, FunctionChoiceType
 from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion, OpenAIChatPromptExecutionSettings
 from semantic_kernel.contents import ChatHistory
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
+from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.core_plugins import MathPlugin, TimePlugin
 from semantic_kernel.functions import KernelArguments
 
@@ -131,20 +132,32 @@ async def handle_streaming(
 
     print("Mosscap:> ", end="")
     streamed_chunks: list[StreamingChatMessageContent] = []
+    result_content = []
     async for message in response:
-        if isinstance(message[0], StreamingChatMessageContent):
+        if (
+            (
+                not execution_settings.function_choice_behavior.auto_invoke_kernel_functions
+                or execution_settings.function_choice_behavior.type_ == FunctionChoiceType.REQUIRED
+            )
+            and isinstance(message[0], StreamingChatMessageContent)
+            and message[0].role == AuthorRole.ASSISTANT
+        ):
             streamed_chunks.append(message[0])
-        else:
+        elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
+            result_content.append(message[0])
             print(str(message[0]), end="")
 
     if streamed_chunks:
         streaming_chat_message = reduce(lambda first, second: first + second, streamed_chunks)
-        if hasattr(streaming_chat_message, "content"):
+        if hasattr(streaming_chat_message, "content") and streaming_chat_message.content:
             print(streaming_chat_message.content)
         print("Printing returned tool calls...")
         print_tool_calls(streaming_chat_message)
 
     print("\n")
+    if result_content:
+        return "".join([str(content) for content in result_content])
+    return None
 
 
 async def chat() -> bool:
@@ -164,7 +177,7 @@ async def chat() -> bool:
     arguments["chat_history"] = history
 
     if stream:
-        await handle_streaming(kernel, chat_function, arguments=arguments)
+        result = await handle_streaming(kernel, chat_function, arguments=arguments)
     else:
         result = await kernel.invoke(chat_function, arguments=arguments)
 
@@ -177,6 +190,9 @@ async def chat() -> bool:
             return True
 
         print(f"Mosscap:> {result}")
+
+    history.add_user_message(user_input)
+    history.add_assistant_message(str(result))
     return True
 
 
diff --git a/python/samples/concepts/auto_function_calling/functions_defined_in_json_prompt.py b/python/samples/concepts/auto_function_calling/functions_defined_in_json_prompt.py
index 73157390b2f6..c8042eb4ad6c 100644
--- a/python/samples/concepts/auto_function_calling/functions_defined_in_json_prompt.py
+++ b/python/samples/concepts/auto_function_calling/functions_defined_in_json_prompt.py
@@ -11,6 +11,7 @@
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
+from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.core_plugins import MathPlugin, TimePlugin
 from semantic_kernel.filters.auto_function_invocation.auto_function_invocation_context import (
     AutoFunctionInvocationContext,
@@ -144,7 +145,7 @@ async def handle_streaming(
     kernel: Kernel,
     chat_function: "KernelFunction",
     arguments: KernelArguments,
-) -> None:
+) -> str | None:
     response = kernel.invoke_stream(
         chat_function,
         return_function_results=False,
@@ -153,20 +154,29 @@ async def handle_streaming(
 
     print("Mosscap:> ", end="")
     streamed_chunks: list[StreamingChatMessageContent] = []
+    result_content: list[StreamingChatMessageContent] = []
     async for message in response:
-        if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance(
-            message[0], StreamingChatMessageContent
+        if (
+            not execution_settings.function_choice_behavior.auto_invoke_kernel_functions
+            and isinstance(message[0], StreamingChatMessageContent)
+            and message[0].role == AuthorRole.ASSISTANT
         ):
             streamed_chunks.append(message[0])
-        else:
+        elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
+            result_content.append(message[0])
             print(str(message[0]), end="")
 
     if streamed_chunks:
         streaming_chat_message = reduce(lambda first, second: first + second, streamed_chunks)
+        if hasattr(streaming_chat_message, "content"):
+            print(streaming_chat_message.content)
         print("Auto tool calls is disabled, printing returned tool calls...")
         print_tool_calls(streaming_chat_message)
 
     print("\n")
+    if result_content:
+        return "".join([str(content) for content in result_content])
+    return None
 
 
 async def chat() -> bool:
@@ -187,7 +197,7 @@ async def chat() -> bool:
 
     stream = False
     if stream:
-        await handle_streaming(kernel, chat_function, arguments=arguments)
+        result = await handle_streaming(kernel, chat_function, arguments=arguments)
     else:
         result = await kernel.invoke(chat_plugin["ChatBot"], arguments=arguments)
 
@@ -200,6 +210,9 @@ async def chat() -> bool:
             return True
 
         print(f"Mosscap:> {result}")
+
+    history.add_user_message(user_input)
+    history.add_assistant_message(str(result))
     return True
 
 
diff --git a/python/samples/concepts/auto_function_calling/functions_defined_in_yaml_prompt.py b/python/samples/concepts/auto_function_calling/functions_defined_in_yaml_prompt.py
index b10718089a96..724822850e33 100644
--- a/python/samples/concepts/auto_function_calling/functions_defined_in_yaml_prompt.py
+++ b/python/samples/concepts/auto_function_calling/functions_defined_in_yaml_prompt.py
@@ -12,6 +12,7 @@
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
+from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.core_plugins import MathPlugin, TimePlugin
 from semantic_kernel.filters.auto_function_invocation.auto_function_invocation_context import (
     AutoFunctionInvocationContext,
@@ -141,7 +142,7 @@ async def handle_streaming(
     kernel: Kernel,
     chat_function: "KernelFunction",
     arguments: KernelArguments,
-) -> None:
+) -> str | None:
     response = kernel.invoke_stream(
         chat_function,
         return_function_results=False,
@@ -150,20 +151,29 @@ async def handle_streaming(
 
     print("Mosscap:> ", end="")
     streamed_chunks: list[StreamingChatMessageContent] = []
+    result_content: list[StreamingChatMessageContent] = []
     async for message in response:
-        if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance(
-            message[0], StreamingChatMessageContent
+        if (
+            not execution_settings.function_choice_behavior.auto_invoke_kernel_functions
+            and isinstance(message[0], StreamingChatMessageContent)
+            and message[0].role == AuthorRole.ASSISTANT
         ):
             streamed_chunks.append(message[0])
-        else:
+        elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
+            result_content.append(message[0])
             print(str(message[0]), end="")
 
     if streamed_chunks:
         streaming_chat_message = reduce(lambda first, second: first + second, streamed_chunks)
+        if hasattr(streaming_chat_message, "content"):
+            print(streaming_chat_message.content)
         print("Auto tool calls is disabled, printing returned tool calls...")
         print_tool_calls(streaming_chat_message)
 
     print("\n")
+    if result_content:
+        return "".join([str(content) for content in result_content])
+    return None
 
 
 async def chat() -> bool:
@@ -184,8 +194,7 @@ async def chat() -> bool:
 
     stream = False
     if stream:
-        pass
-        # await handle_streaming(kernel, chat_function, arguments=arguments)
+        result = await handle_streaming(kernel, chat_function, arguments=arguments)
     else:
         result = await kernel.invoke(chat_plugin["ChatBot"], arguments=arguments)
 
@@ -198,6 +207,9 @@ async def chat() -> bool:
             return True
 
         print(f"Mosscap:> {result}")
+
+    history.add_user_message(user_input)
+    history.add_assistant_message(str(result))
     return True
 
 
diff --git a/python/samples/concepts/filtering/function_invocation_filters_stream.py b/python/samples/concepts/filtering/function_invocation_filters_stream.py
index f1dbb85b1601..74948472ac49 100644
--- a/python/samples/concepts/filtering/function_invocation_filters_stream.py
+++ b/python/samples/concepts/filtering/function_invocation_filters_stream.py
@@ -71,7 +71,8 @@ async def chat(chat_history: ChatHistory) -> bool:
         function_name="chat", plugin_name="chat", user_input=user_input, chat_history=chat_history
     )
     async for message in responses:
-        streamed_chunks.append(message[0])
+        if isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
+            streamed_chunks.append(message[0])
         print(str(message[0]), end="")
     print("")
     chat_history.add_user_message(user_input)
diff --git a/python/samples/concepts/plugins/openai_plugin_azure_key_vault.py b/python/samples/concepts/plugins/openai_plugin_azure_key_vault.py
index f206d7794f78..9685591dc73f 100644
--- a/python/samples/concepts/plugins/openai_plugin_azure_key_vault.py
+++ b/python/samples/concepts/plugins/openai_plugin_azure_key_vault.py
@@ -17,6 +17,7 @@
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
+from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.functions import KernelArguments, KernelFunction, KernelPlugin
 
 # region Helper functions
@@ -209,11 +210,13 @@ async def handle_streaming(
     print("Security Agent:> ", end="")
     streamed_chunks: list[StreamingChatMessageContent] = []
     async for message in response:
-        if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance(
-            message[0], StreamingChatMessageContent
+        if (
+            not execution_settings.function_choice_behavior.auto_invoke_kernel_functions
+            and isinstance(message[0], StreamingChatMessageContent)
+            and message[0].role == AuthorRole.ASSISTANT
         ):
             streamed_chunks.append(message[0])
-        else:
+        elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
             print(str(message[0]), end="")
 
     if streamed_chunks:
diff --git a/python/samples/learn_resources/templates.py b/python/samples/learn_resources/templates.py
index 90a6b27e38c2..d0a10d6977c2 100644
--- a/python/samples/learn_resources/templates.py
+++ b/python/samples/learn_resources/templates.py
@@ -7,6 +7,7 @@
 from semantic_kernel import Kernel
 from semantic_kernel.contents import ChatHistory
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
 from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.prompt_template import InputVariable, PromptTemplateConfig
 
@@ -144,8 +145,9 @@ async def main():
         all_chunks = []
         print("Assistant:> ", end="")
         async for chunk in result:
-            all_chunks.append(chunk[0])
-            print(str(chunk[0]), end="")
+            if isinstance(chunk[0], StreamingChatMessageContent) and chunk[0].role == AuthorRole.ASSISTANT:
+                all_chunks.append(chunk[0])
+                print(str(chunk[0]), end="")
         print()
 
         history.add_user_message(request)
diff --git a/python/semantic_kernel/connectors/ai/function_calling_utils.py b/python/semantic_kernel/connectors/ai/function_calling_utils.py
index 924c0076f7c3..365d43565ed9 100644
--- a/python/semantic_kernel/connectors/ai/function_calling_utils.py
+++ b/python/semantic_kernel/connectors/ai/function_calling_utils.py
@@ -118,6 +118,4 @@ def merge_streaming_function_results(
     for message in messages:
         items.extend([item for item in message.items if isinstance(item, FunctionResultContent)])
 
-    # If we want to be able to support adding the streaming message chunks together, then the author role needs to be
-    # `Assistant```, as the `Tool` role will cause the add method to break.
     return [StreamingChatMessageContent(role=AuthorRole.TOOL, items=items, choice_index=0, ai_model_id=ai_model_id)]