From ff294a2417cbfee93699e14c3a7f221a97e1fd56 Mon Sep 17 00:00:00 2001 From: Evan Mattson Date: Sat, 14 Dec 2024 18:04:13 +0900 Subject: [PATCH 1/7] Yield FunctionResultContent in streaming chat completion path. Update tests. --- .../chat_gpt_api_function_calling.py | 2 ++ .../ai/chat_completion_client_base.py | 25 +++++++++++++-- .../connectors/ai/function_calling_utils.py | 28 +++++++++++++++++ .../contents/function_call_content.py | 3 +- .../contents/function_result_content.py | 7 +++++ .../functions/kernel_function_from_prompt.py | 2 +- python/semantic_kernel/kernel.py | 8 ++++- .../ai/azure_ai_inference/conftest.py | 4 +-- ...test_azure_ai_inference_chat_completion.py | 31 +++++++++++++------ .../ai/google/google_ai/conftest.py | 4 +-- .../test_google_ai_chat_completion.py | 29 +++++++++++------ .../ai/google/vertex_ai/conftest.py | 4 +-- .../test_vertex_ai_chat_completion.py | 29 +++++++++++------ python/tests/unit/kernel/test_kernel.py | 2 ++ 14 files changed, 140 insertions(+), 38 deletions(-) diff --git a/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py b/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py index f0381c1048ac..c17ac2b698cd 100644 --- a/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py +++ b/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py @@ -149,6 +149,8 @@ async def handle_streaming( print("\n") if result_content: + # this line is new to view content types + streaming_chat_message = reduce(lambda first, second: first + second, result_content) return "".join([str(content) for content in result_content]) return None diff --git a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py index 80455a451842..2a478e4c80a0 100644 --- a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py +++ b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py @@ -12,7 +12,10 @@ from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration -from semantic_kernel.connectors.ai.function_calling_utils import merge_function_results +from semantic_kernel.connectors.ai.function_calling_utils import ( + merge_function_results, + merge_streaming_function_results, +) from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior, FunctionChoiceType from semantic_kernel.const import AUTO_FUNCTION_INVOCATION_SPAN_NAME from semantic_kernel.contents.annotation_content import AnnotationContent @@ -303,8 +306,18 @@ async def get_streaming_chat_message_contents( ], ) + # Merge and yield the function results, regardless of the termination status + # Include the ai_model_id so we can later add two streaming messages together + # Some settings may not have an ai_model_id, so we need to check for it + ai_model_id = self._get_ai_model_id(settings) + function_result_messages = merge_streaming_function_results( + messages=chat_history.messages[-len(results) :], + ai_model_id=ai_model_id, # type: ignore + ) + if self._yield_function_result_messages(function_result_messages): + yield function_result_messages + if any(result.terminate for result in results if result is not None): - yield merge_function_results(chat_history.messages[-len(results) :]) # type: ignore break async def get_streaming_chat_message_content( @@ -415,4 +428,12 @@ def _start_auto_function_invocation_activity(self, kernel: "Kernel", settings: " return span + def _get_ai_model_id(self, settings: "PromptExecutionSettings") -> str: + """Retrieve the AI model ID from settings if available.""" + return getattr(settings, "ai_model_id", "") + + def _yield_function_result_messages(self, function_result_messages: list) -> bool: + """Determine if the function result messages should be yielded.""" + return len(function_result_messages) > 0 and len(function_result_messages[0].items) > 0 + # endregion diff --git a/python/semantic_kernel/connectors/ai/function_calling_utils.py b/python/semantic_kernel/connectors/ai/function_calling_utils.py index f5d29665068e..181ead8c635a 100644 --- a/python/semantic_kernel/connectors/ai/function_calling_utils.py +++ b/python/semantic_kernel/connectors/ai/function_calling_utils.py @@ -5,6 +5,7 @@ from semantic_kernel.contents.chat_message_content import ChatMessageContent from semantic_kernel.contents.function_result_content import FunctionResultContent +from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent from semantic_kernel.contents.utils.author_role import AuthorRole from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError @@ -95,3 +96,30 @@ def merge_function_results( items=items, ) ] + + +def merge_streaming_function_results( + messages: list[ChatMessageContent | StreamingChatMessageContent], + ai_model_id: str, +) -> list[StreamingChatMessageContent]: + """Combine multiple streaming function result content types to one streaming chat message content type. + + This method combines the FunctionResultContent items from separate StreamingChatMessageContent messages, + and is used in the event that the `context.terminate = True` condition is met. + + Args: + messages: The list of streaming chat message content types. + ai_model_id: The AI model ID. + + Returns: + The combined streaming chat message content type. + """ + items: list[Any] = [] + for message in messages: + items.extend([item for item in message.items if isinstance(item, FunctionResultContent)]) + + # If we want to be able to support adding the streaming message chunks together, then the author role needs to be + # `Assistant```, as the `Tool` role will cause the add method to break. + return [ + StreamingChatMessageContent(role=AuthorRole.ASSISTANT, items=items, choice_index=0, ai_model_id=ai_model_id) + ] diff --git a/python/semantic_kernel/contents/function_call_content.py b/python/semantic_kernel/contents/function_call_content.py index 1700208522b1..3a197695153a 100644 --- a/python/semantic_kernel/contents/function_call_content.py +++ b/python/semantic_kernel/contents/function_call_content.py @@ -149,7 +149,8 @@ def parse_arguments(self) -> Mapping[str, Any] | None: if isinstance(self.arguments, Mapping): return self.arguments try: - return json.loads(self.arguments) + sanitized_arguments = self.arguments.replace("'", '"') + return json.loads(sanitized_arguments) except json.JSONDecodeError as exc: raise FunctionCallInvalidArgumentsException("Function Call arguments are not valid JSON.") from exc diff --git a/python/semantic_kernel/contents/function_result_content.py b/python/semantic_kernel/contents/function_result_content.py index 821cc46615d1..af84d1fa11e7 100644 --- a/python/semantic_kernel/contents/function_result_content.py +++ b/python/semantic_kernel/contents/function_result_content.py @@ -17,6 +17,7 @@ if TYPE_CHECKING: from semantic_kernel.contents.chat_message_content import ChatMessageContent from semantic_kernel.contents.function_call_content import FunctionCallContent + from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent from semantic_kernel.functions.function_result import FunctionResult TAG_CONTENT_MAP = { @@ -157,6 +158,12 @@ def to_chat_message_content(self) -> "ChatMessageContent": return ChatMessageContent(role=AuthorRole.TOOL, items=[self]) + def to_streaming_chat_message_content(self) -> "StreamingChatMessageContent": + """Convert the instance to a StreamingChatMessageContent.""" + from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent + + return StreamingChatMessageContent(role=AuthorRole.TOOL, choice_index=0, items=[self]) + def to_dict(self) -> dict[str, str]: """Convert the instance to a dictionary.""" return { diff --git a/python/semantic_kernel/functions/kernel_function_from_prompt.py b/python/semantic_kernel/functions/kernel_function_from_prompt.py index ecba3e9aa96c..1e301da4fa17 100644 --- a/python/semantic_kernel/functions/kernel_function_from_prompt.py +++ b/python/semantic_kernel/functions/kernel_function_from_prompt.py @@ -6,7 +6,7 @@ from html import unescape from typing import TYPE_CHECKING, Any -import yaml +import yaml # type: ignore from pydantic import Field, ValidationError, model_validator from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase diff --git a/python/semantic_kernel/kernel.py b/python/semantic_kernel/kernel.py index 5ddef255f355..a827fb8dbf1c 100644 --- a/python/semantic_kernel/kernel.py +++ b/python/semantic_kernel/kernel.py @@ -10,6 +10,7 @@ from semantic_kernel.contents.chat_history import ChatHistory from semantic_kernel.contents.function_call_content import FunctionCallContent from semantic_kernel.contents.function_result_content import FunctionResultContent +from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent from semantic_kernel.contents.streaming_content_mixin import StreamingContentMixin from semantic_kernel.exceptions import ( FunctionCallInvalidArgumentsException, @@ -398,7 +399,12 @@ async def invoke_function_call( frc = FunctionResultContent.from_function_call_content_and_result( function_call_content=function_call, result=invocation_context.function_result ) - chat_history.add_message(message=frc.to_chat_message_content()) + + is_streaming = any(isinstance(message, StreamingChatMessageContent) for message in chat_history.messages) + + message = frc.to_streaming_chat_message_content() if is_streaming else frc.to_chat_message_content() + + chat_history.add_message(message=message) return invocation_context if invocation_context.terminate else None diff --git a/python/tests/unit/connectors/ai/azure_ai_inference/conftest.py b/python/tests/unit/connectors/ai/azure_ai_inference/conftest.py index 690f5706bf3c..7919f705857a 100644 --- a/python/tests/unit/connectors/ai/azure_ai_inference/conftest.py +++ b/python/tests/unit/connectors/ai/azure_ai_inference/conftest.py @@ -252,8 +252,8 @@ def mock_azure_ai_inference_streaming_chat_completion_response_with_tool_call(mo ChatCompletionsToolCall( id="test_id", function=FunctionCall( - name="test_function", - arguments={"test_arg": "test_value"}, + name="getLightStatus", + arguments={"arg1": "test_value"}, ), ), ], diff --git a/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py b/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py index 2cccebda1f52..fe12b507eb35 100644 --- a/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py +++ b/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py @@ -20,6 +20,7 @@ ServiceInvalidExecutionSettingsError, ) from semantic_kernel.functions.kernel_arguments import KernelArguments +from semantic_kernel.kernel import Kernel from semantic_kernel.utils.telemetry.user_agent import SEMANTIC_KERNEL_USER_AGENT @@ -492,11 +493,12 @@ async def test_azure_ai_inference_streaming_chat_completion_with_function_choice async def test_azure_ai_inference_streaming_chat_completion_with_function_choice_behavior( mock_complete, azure_ai_inference_service, - kernel, + kernel: Kernel, chat_history: ChatHistory, mock_azure_ai_inference_streaming_chat_completion_response_with_tool_call, + decorated_native_function, ) -> None: - """Test streaming completion of AzureAIInferenceChatCompletion with function choice behavior""" + """Test streaming completion of AzureAIInferenceChatCompletion with function choice behavior.""" user_message_content: str = "Hello" chat_history.add_user_message(user_message_content) @@ -507,20 +509,31 @@ async def test_azure_ai_inference_streaming_chat_completion_with_function_choice mock_complete.return_value = mock_azure_ai_inference_streaming_chat_completion_response_with_tool_call + kernel.add_function(plugin_name="TestPlugin", function=decorated_native_function) + + all_messages = [] async for messages in azure_ai_inference_service.get_streaming_chat_message_contents( chat_history, settings, kernel=kernel, arguments=KernelArguments(), ): - assert len(messages) == 1 - assert messages[0].role == "assistant" - assert messages[0].content == "" - assert messages[0].finish_reason == FinishReason.TOOL_CALLS + all_messages.extend(messages) + + # Assert the number of total messages + assert len(all_messages) == 2, f"Expected 2 messages, got {len(all_messages)}" + + # Validate the first message + assert all_messages[0].role == "assistant", f"Unexpected role for first message: {all_messages[0].role}" + assert all_messages[0].content == "", f"Unexpected content for first message: {all_messages[0].content}" + assert all_messages[0].finish_reason == FinishReason.TOOL_CALLS, ( + f"Unexpected finish reason for first message: {all_messages[0].finish_reason}" + ) - # Streaming completion with tool call does not invoke the model - # after maximum_auto_invoke_attempts is reached - assert mock_complete.call_count == 1 + # Validate the second message + assert all_messages[1].role == "assistant", f"Unexpected role for second message: {all_messages[1].role}" + assert all_messages[1].content == "", f"Unexpected content for second message: {all_messages[1].content}" + assert all_messages[1].finish_reason is None @pytest.mark.parametrize( diff --git a/python/tests/unit/connectors/ai/google/google_ai/conftest.py b/python/tests/unit/connectors/ai/google/google_ai/conftest.py index 636344a97a02..77898412cb1d 100644 --- a/python/tests/unit/connectors/ai/google/google_ai/conftest.py +++ b/python/tests/unit/connectors/ai/google/google_ai/conftest.py @@ -128,8 +128,8 @@ async def mock_google_ai_streaming_chat_completion_response_with_tool_call() -> parts=[ protos.Part( function_call=protos.FunctionCall( - name="test_function", - args={"test_arg": "test_value"}, + name="getLightStatus", + args={"arg1": "test_value"}, ) ) ], diff --git a/python/tests/unit/connectors/ai/google/google_ai/services/test_google_ai_chat_completion.py b/python/tests/unit/connectors/ai/google/google_ai/services/test_google_ai_chat_completion.py index 4fedc18d5386..74ee7299b31f 100644 --- a/python/tests/unit/connectors/ai/google/google_ai/services/test_google_ai_chat_completion.py +++ b/python/tests/unit/connectors/ai/google/google_ai/services/test_google_ai_chat_completion.py @@ -20,6 +20,7 @@ ServiceInitializationError, ServiceInvalidExecutionSettingsError, ) +from semantic_kernel.kernel import Kernel # region init @@ -259,9 +260,10 @@ async def test_google_ai_streaming_chat_completion_with_function_choice_behavior async def test_google_ai_streaming_chat_completion_with_function_choice_behavior( mock_google_ai_model_generate_content_async, google_ai_unit_test_env, - kernel, + kernel: Kernel, chat_history: ChatHistory, mock_google_ai_streaming_chat_completion_response_with_tool_call, + decorated_native_function, ) -> None: """Test streaming chat completion of GoogleAIChatCompletion with function choice behavior""" mock_google_ai_model_generate_content_async.return_value = ( @@ -275,20 +277,29 @@ async def test_google_ai_streaming_chat_completion_with_function_choice_behavior google_ai_chat_completion = GoogleAIChatCompletion() + kernel.add_function(plugin_name="TestPlugin", function=decorated_native_function) + + all_messages = [] async for messages in google_ai_chat_completion.get_streaming_chat_message_contents( chat_history, settings, kernel=kernel, ): - assert len(messages) == 1 - assert messages[0].role == "assistant" - assert messages[0].content == "" - # Google doesn't return STOP as the finish reason for tool calls - assert messages[0].finish_reason == FinishReason.STOP + all_messages.extend(messages) + + assert len(all_messages) == 2, f"Expected 2 messages, got {len(all_messages)}" + + # Validate the first message + assert all_messages[0].role == "assistant", f"Unexpected role for first message: {all_messages[0].role}" + assert all_messages[0].content == "", f"Unexpected content for first message: {all_messages[0].content}" + assert all_messages[0].finish_reason == FinishReason.STOP, ( + f"Unexpected finish reason for first message: {all_messages[0].finish_reason}" + ) - # Streaming completion with tool call does not invoke the model - # after maximum_auto_invoke_attempts is reached - assert mock_google_ai_model_generate_content_async.call_count == 1 + # Validate the second message + assert all_messages[1].role == "assistant", f"Unexpected role for second message: {all_messages[1].role}" + assert all_messages[1].content == "", f"Unexpected content for second message: {all_messages[1].content}" + assert all_messages[1].finish_reason is None @patch.object(GenerativeModel, "generate_content_async", new_callable=AsyncMock) diff --git a/python/tests/unit/connectors/ai/google/vertex_ai/conftest.py b/python/tests/unit/connectors/ai/google/vertex_ai/conftest.py index d1efbd80b19a..8892e6f0b29a 100644 --- a/python/tests/unit/connectors/ai/google/vertex_ai/conftest.py +++ b/python/tests/unit/connectors/ai/google/vertex_ai/conftest.py @@ -117,8 +117,8 @@ def mock_vertex_ai_streaming_chat_completion_response_with_tool_call() -> AsyncI parts=[ Part( function_call=FunctionCall( - name="test_function", - args={"test_arg": "test_value"}, + name="getLightStatus", + args={"arg1": "test_value"}, ) ) ], diff --git a/python/tests/unit/connectors/ai/google/vertex_ai/services/test_vertex_ai_chat_completion.py b/python/tests/unit/connectors/ai/google/vertex_ai/services/test_vertex_ai_chat_completion.py index 6b2f1f1082b7..a523d66cc7ff 100644 --- a/python/tests/unit/connectors/ai/google/vertex_ai/services/test_vertex_ai_chat_completion.py +++ b/python/tests/unit/connectors/ai/google/vertex_ai/services/test_vertex_ai_chat_completion.py @@ -20,6 +20,7 @@ ServiceInitializationError, ServiceInvalidExecutionSettingsError, ) +from semantic_kernel.kernel import Kernel # region init @@ -259,9 +260,10 @@ async def test_vertex_ai_streaming_chat_completion_with_function_choice_behavior async def test_vertex_ai_streaming_chat_completion_with_function_choice_behavior( mock_vertex_ai_model_generate_content_async, vertex_ai_unit_test_env, - kernel, + kernel: Kernel, chat_history: ChatHistory, mock_vertex_ai_streaming_chat_completion_response_with_tool_call, + decorated_native_function, ) -> None: """Test streaming chat completion of VertexAIChatCompletion with function choice behavior""" mock_vertex_ai_model_generate_content_async.return_value = ( @@ -275,20 +277,29 @@ async def test_vertex_ai_streaming_chat_completion_with_function_choice_behavior vertex_ai_chat_completion = VertexAIChatCompletion() + kernel.add_function(plugin_name="TestPlugin", function=decorated_native_function) + + all_messages = [] async for messages in vertex_ai_chat_completion.get_streaming_chat_message_contents( chat_history, settings, kernel=kernel, ): - assert len(messages) == 1 - assert messages[0].role == "assistant" - assert messages[0].content == "" - # Google doesn't return STOP as the finish reason for tool calls - assert messages[0].finish_reason == FinishReason.STOP + all_messages.extend(messages) + + assert len(all_messages) == 2, f"Expected 2 messages, got {len(all_messages)}" + + # Validate the first message + assert all_messages[0].role == "assistant", f"Unexpected role for first message: {all_messages[0].role}" + assert all_messages[0].content == "", f"Unexpected content for first message: {all_messages[0].content}" + assert all_messages[0].finish_reason == FinishReason.STOP, ( + f"Unexpected finish reason for first message: {all_messages[0].finish_reason}" + ) - # Streaming completion with tool call does not invoke the model - # after maximum_auto_invoke_attempts is reached - assert mock_vertex_ai_model_generate_content_async.call_count == 1 + # Validate the second message + assert all_messages[1].role == "assistant", f"Unexpected role for second message: {all_messages[1].role}" + assert all_messages[1].content == "", f"Unexpected content for second message: {all_messages[1].content}" + assert all_messages[1].finish_reason is None @patch.object(GenerativeModel, "generate_content_async", new_callable=AsyncMock) diff --git a/python/tests/unit/kernel/test_kernel.py b/python/tests/unit/kernel/test_kernel.py index 38b1608d150f..808c69d4fc6e 100644 --- a/python/tests/unit/kernel/test_kernel.py +++ b/python/tests/unit/kernel/test_kernel.py @@ -18,6 +18,7 @@ from semantic_kernel.contents import ChatMessageContent from semantic_kernel.contents.chat_history import ChatHistory from semantic_kernel.contents.function_call_content import FunctionCallContent +from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent from semantic_kernel.exceptions import KernelFunctionAlreadyExistsError, KernelServiceNotFoundError from semantic_kernel.exceptions.content_exceptions import FunctionCallInvalidArgumentsException from semantic_kernel.exceptions.kernel_exceptions import ( @@ -299,6 +300,7 @@ async def test_invoke_function_call_throws_during_invoke(kernel: Kernel, get_too result_mock = MagicMock(spec=ChatMessageContent) result_mock.items = [tool_call_mock] chat_history_mock = MagicMock(spec=ChatHistory) + chat_history_mock.messages = [MagicMock(spec=StreamingChatMessageContent)] func_mock = AsyncMock(spec=KernelFunction) func_meta = KernelFunctionMetadata(name="function", is_prompt=False) From d726301e07e0d1776a8b0212551fad2a5d6ab366 Mon Sep 17 00:00:00 2001 From: Evan Mattson Date: Sat, 14 Dec 2024 18:11:20 +0900 Subject: [PATCH 2/7] Revert sample test change. --- .../auto_function_calling/chat_gpt_api_function_calling.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py b/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py index c17ac2b698cd..f0381c1048ac 100644 --- a/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py +++ b/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py @@ -149,8 +149,6 @@ async def handle_streaming( print("\n") if result_content: - # this line is new to view content types - streaming_chat_message = reduce(lambda first, second: first + second, result_content) return "".join([str(content) for content in result_content]) return None From 399b00ae8aceafaa3784aa1420675ec23b0358ad Mon Sep 17 00:00:00 2001 From: Evan Mattson Date: Tue, 17 Dec 2024 07:04:32 +0900 Subject: [PATCH 3/7] Simplify sanitized args --- python/semantic_kernel/contents/function_call_content.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/semantic_kernel/contents/function_call_content.py b/python/semantic_kernel/contents/function_call_content.py index 3a197695153a..ec861b8547d0 100644 --- a/python/semantic_kernel/contents/function_call_content.py +++ b/python/semantic_kernel/contents/function_call_content.py @@ -149,8 +149,7 @@ def parse_arguments(self) -> Mapping[str, Any] | None: if isinstance(self.arguments, Mapping): return self.arguments try: - sanitized_arguments = self.arguments.replace("'", '"') - return json.loads(sanitized_arguments) + return json.loads(self.arguments.replace("'", '"')) except json.JSONDecodeError as exc: raise FunctionCallInvalidArgumentsException("Function Call arguments are not valid JSON.") from exc From f064acf54ecc730327009cfb6906fa24d87bc509 Mon Sep 17 00:00:00 2001 From: Evan Mattson Date: Tue, 17 Dec 2024 07:40:15 +0900 Subject: [PATCH 4/7] Remove type ignore for yaml --- python/semantic_kernel/functions/kernel_function_from_prompt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/semantic_kernel/functions/kernel_function_from_prompt.py b/python/semantic_kernel/functions/kernel_function_from_prompt.py index 1e301da4fa17..ecba3e9aa96c 100644 --- a/python/semantic_kernel/functions/kernel_function_from_prompt.py +++ b/python/semantic_kernel/functions/kernel_function_from_prompt.py @@ -6,7 +6,7 @@ from html import unescape from typing import TYPE_CHECKING, Any -import yaml # type: ignore +import yaml from pydantic import Field, ValidationError, model_validator from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase From 0d64754501907eb6e26c733c4a05ad2c4bae087c Mon Sep 17 00:00:00 2001 From: Evan Mattson Date: Tue, 17 Dec 2024 08:21:08 +0900 Subject: [PATCH 5/7] check base class ai_model_id as a fallback --- .../connectors/ai/chat_completion_client_base.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py index 2a478e4c80a0..6a673dccd5eb 100644 --- a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py +++ b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py @@ -429,8 +429,12 @@ def _start_auto_function_invocation_activity(self, kernel: "Kernel", settings: " return span def _get_ai_model_id(self, settings: "PromptExecutionSettings") -> str: - """Retrieve the AI model ID from settings if available.""" - return getattr(settings, "ai_model_id", "") + """Retrieve the AI model ID from settings if available. + + Attempt to get ai_model_id from the settings object. If it doesn't exist or + is blank, fallback to self.ai_model_id (from AIServiceClientBase). + """ + return getattr(settings, "ai_model_id", self.ai_model_id) or self.ai_model_id def _yield_function_result_messages(self, function_result_messages: list) -> bool: """Determine if the function result messages should be yielded.""" From af0b4816e3e88c4e165843137554cc439cd36189 Mon Sep 17 00:00:00 2001 From: Evan Mattson Date: Tue, 17 Dec 2024 09:26:31 +0900 Subject: [PATCH 6/7] Return FunctionResultContent as tool author --- .../chat_gpt_api_function_calling.py | 11 +++++++---- .../connectors/ai/function_calling_utils.py | 4 +--- .../test_azure_ai_inference_chat_completion.py | 2 +- .../services/test_google_ai_chat_completion.py | 2 +- .../services/test_vertex_ai_chat_completion.py | 2 +- 5 files changed, 11 insertions(+), 10 deletions(-) diff --git a/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py b/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py index f0381c1048ac..2ced79d2f8be 100644 --- a/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py +++ b/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py @@ -12,6 +12,7 @@ from semantic_kernel.contents.chat_message_content import ChatMessageContent from semantic_kernel.contents.function_call_content import FunctionCallContent from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent +from semantic_kernel.contents.utils.author_role import AuthorRole from semantic_kernel.core_plugins.math_plugin import MathPlugin from semantic_kernel.core_plugins.time_plugin import TimePlugin from semantic_kernel.functions import KernelArguments @@ -130,13 +131,15 @@ async def handle_streaming( print("Mosscap:> ", end="") streamed_chunks: list[StreamingChatMessageContent] = [] - result_content = [] + result_content: list[StreamingChatMessageContent] = [] async for message in response: - if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance( - message[0], StreamingChatMessageContent + if ( + not execution_settings.function_choice_behavior.auto_invoke_kernel_functions + and isinstance(message[0], StreamingChatMessageContent) + and message[0].role == AuthorRole.ASSISTANT ): streamed_chunks.append(message[0]) - else: + elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT: result_content.append(message[0]) print(str(message[0]), end="") diff --git a/python/semantic_kernel/connectors/ai/function_calling_utils.py b/python/semantic_kernel/connectors/ai/function_calling_utils.py index 181ead8c635a..924c0076f7c3 100644 --- a/python/semantic_kernel/connectors/ai/function_calling_utils.py +++ b/python/semantic_kernel/connectors/ai/function_calling_utils.py @@ -120,6 +120,4 @@ def merge_streaming_function_results( # If we want to be able to support adding the streaming message chunks together, then the author role needs to be # `Assistant```, as the `Tool` role will cause the add method to break. - return [ - StreamingChatMessageContent(role=AuthorRole.ASSISTANT, items=items, choice_index=0, ai_model_id=ai_model_id) - ] + return [StreamingChatMessageContent(role=AuthorRole.TOOL, items=items, choice_index=0, ai_model_id=ai_model_id)] diff --git a/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py b/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py index fe12b507eb35..942322bf5153 100644 --- a/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py +++ b/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py @@ -531,7 +531,7 @@ async def test_azure_ai_inference_streaming_chat_completion_with_function_choice ) # Validate the second message - assert all_messages[1].role == "assistant", f"Unexpected role for second message: {all_messages[1].role}" + assert all_messages[1].role == "tool", f"Unexpected role for second message: {all_messages[1].role}" assert all_messages[1].content == "", f"Unexpected content for second message: {all_messages[1].content}" assert all_messages[1].finish_reason is None diff --git a/python/tests/unit/connectors/ai/google/google_ai/services/test_google_ai_chat_completion.py b/python/tests/unit/connectors/ai/google/google_ai/services/test_google_ai_chat_completion.py index 74ee7299b31f..3f1d6e030a0d 100644 --- a/python/tests/unit/connectors/ai/google/google_ai/services/test_google_ai_chat_completion.py +++ b/python/tests/unit/connectors/ai/google/google_ai/services/test_google_ai_chat_completion.py @@ -297,7 +297,7 @@ async def test_google_ai_streaming_chat_completion_with_function_choice_behavior ) # Validate the second message - assert all_messages[1].role == "assistant", f"Unexpected role for second message: {all_messages[1].role}" + assert all_messages[1].role == "tool", f"Unexpected role for second message: {all_messages[1].role}" assert all_messages[1].content == "", f"Unexpected content for second message: {all_messages[1].content}" assert all_messages[1].finish_reason is None diff --git a/python/tests/unit/connectors/ai/google/vertex_ai/services/test_vertex_ai_chat_completion.py b/python/tests/unit/connectors/ai/google/vertex_ai/services/test_vertex_ai_chat_completion.py index a523d66cc7ff..9ea53c68910e 100644 --- a/python/tests/unit/connectors/ai/google/vertex_ai/services/test_vertex_ai_chat_completion.py +++ b/python/tests/unit/connectors/ai/google/vertex_ai/services/test_vertex_ai_chat_completion.py @@ -297,7 +297,7 @@ async def test_vertex_ai_streaming_chat_completion_with_function_choice_behavior ) # Validate the second message - assert all_messages[1].role == "assistant", f"Unexpected role for second message: {all_messages[1].role}" + assert all_messages[1].role == "tool", f"Unexpected role for second message: {all_messages[1].role}" assert all_messages[1].content == "", f"Unexpected content for second message: {all_messages[1].content}" assert all_messages[1].finish_reason is None From 418cff990ba273864fa85f644be2e5a863012fcf Mon Sep 17 00:00:00 2001 From: Evan Mattson Date: Tue, 17 Dec 2024 16:29:49 +0900 Subject: [PATCH 7/7] Adjust samples based on latest streaming call updates. --- .../anthropic_api_function_calling.py | 9 ++++--- .../chat_mistral_ai_api_function_calling.py | 9 ++++--- .../function_calling_with_required_type.py | 26 +++++++++++++++---- .../functions_defined_in_json_prompt.py | 23 ++++++++++++---- .../functions_defined_in_yaml_prompt.py | 24 ++++++++++++----- .../function_invocation_filters_stream.py | 3 ++- .../plugins/openai_plugin_azure_key_vault.py | 9 ++++--- python/samples/learn_resources/templates.py | 6 +++-- .../connectors/ai/function_calling_utils.py | 2 -- 9 files changed, 81 insertions(+), 30 deletions(-) diff --git a/python/samples/concepts/auto_function_calling/anthropic_api_function_calling.py b/python/samples/concepts/auto_function_calling/anthropic_api_function_calling.py index a38ba2187ab8..5769943157db 100644 --- a/python/samples/concepts/auto_function_calling/anthropic_api_function_calling.py +++ b/python/samples/concepts/auto_function_calling/anthropic_api_function_calling.py @@ -12,6 +12,7 @@ from semantic_kernel.contents.chat_message_content import ChatMessageContent from semantic_kernel.contents.function_call_content import FunctionCallContent from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent +from semantic_kernel.contents.utils.author_role import AuthorRole from semantic_kernel.core_plugins.math_plugin import MathPlugin from semantic_kernel.core_plugins.time_plugin import TimePlugin from semantic_kernel.functions import KernelArguments @@ -131,11 +132,13 @@ async def handle_streaming( streamed_chunks: list[StreamingChatMessageContent] = [] result_content = [] async for message in response: - if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance( - message[0], StreamingChatMessageContent + if ( + not execution_settings.function_choice_behavior.auto_invoke_kernel_functions + and isinstance(message[0], StreamingChatMessageContent) + and message[0].role == AuthorRole.ASSISTANT ): streamed_chunks.append(message[0]) - else: + elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT: result_content.append(message[0]) print(str(message[0]), end="") diff --git a/python/samples/concepts/auto_function_calling/chat_mistral_ai_api_function_calling.py b/python/samples/concepts/auto_function_calling/chat_mistral_ai_api_function_calling.py index 489f527852aa..5ee05a835e2a 100644 --- a/python/samples/concepts/auto_function_calling/chat_mistral_ai_api_function_calling.py +++ b/python/samples/concepts/auto_function_calling/chat_mistral_ai_api_function_calling.py @@ -12,6 +12,7 @@ from semantic_kernel.contents.chat_message_content import ChatMessageContent from semantic_kernel.contents.function_call_content import FunctionCallContent from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent +from semantic_kernel.contents.utils.author_role import AuthorRole from semantic_kernel.core_plugins.math_plugin import MathPlugin from semantic_kernel.core_plugins.time_plugin import TimePlugin from semantic_kernel.functions import KernelArguments @@ -140,11 +141,13 @@ async def handle_streaming( streamed_chunks: list[StreamingChatMessageContent] = [] result_content = [] async for message in response: - if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance( - message[0], StreamingChatMessageContent + if ( + not execution_settings.function_choice_behavior.auto_invoke_kernel_functions + and isinstance(message[0], StreamingChatMessageContent) + and message[0].role == AuthorRole.ASSISTANT ): streamed_chunks.append(message[0]) - else: + elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT: result_content.append(message[0]) print(str(message[0]), end="") diff --git a/python/samples/concepts/auto_function_calling/function_calling_with_required_type.py b/python/samples/concepts/auto_function_calling/function_calling_with_required_type.py index cd0821e5e6a3..26697141ab17 100644 --- a/python/samples/concepts/auto_function_calling/function_calling_with_required_type.py +++ b/python/samples/concepts/auto_function_calling/function_calling_with_required_type.py @@ -6,12 +6,13 @@ from typing import TYPE_CHECKING from semantic_kernel import Kernel -from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior +from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior, FunctionChoiceType from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion, OpenAIChatPromptExecutionSettings from semantic_kernel.contents import ChatHistory from semantic_kernel.contents.chat_message_content import ChatMessageContent from semantic_kernel.contents.function_call_content import FunctionCallContent from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent +from semantic_kernel.contents.utils.author_role import AuthorRole from semantic_kernel.core_plugins import MathPlugin, TimePlugin from semantic_kernel.functions import KernelArguments @@ -131,20 +132,32 @@ async def handle_streaming( print("Mosscap:> ", end="") streamed_chunks: list[StreamingChatMessageContent] = [] + result_content = [] async for message in response: - if isinstance(message[0], StreamingChatMessageContent): + if ( + ( + not execution_settings.function_choice_behavior.auto_invoke_kernel_functions + or execution_settings.function_choice_behavior.type_ == FunctionChoiceType.REQUIRED + ) + and isinstance(message[0], StreamingChatMessageContent) + and message[0].role == AuthorRole.ASSISTANT + ): streamed_chunks.append(message[0]) - else: + elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT: + result_content.append(message[0]) print(str(message[0]), end="") if streamed_chunks: streaming_chat_message = reduce(lambda first, second: first + second, streamed_chunks) - if hasattr(streaming_chat_message, "content"): + if hasattr(streaming_chat_message, "content") and streaming_chat_message.content: print(streaming_chat_message.content) print("Printing returned tool calls...") print_tool_calls(streaming_chat_message) print("\n") + if result_content: + return "".join([str(content) for content in result_content]) + return None async def chat() -> bool: @@ -164,7 +177,7 @@ async def chat() -> bool: arguments["chat_history"] = history if stream: - await handle_streaming(kernel, chat_function, arguments=arguments) + result = await handle_streaming(kernel, chat_function, arguments=arguments) else: result = await kernel.invoke(chat_function, arguments=arguments) @@ -177,6 +190,9 @@ async def chat() -> bool: return True print(f"Mosscap:> {result}") + + history.add_user_message(user_input) + history.add_assistant_message(str(result)) return True diff --git a/python/samples/concepts/auto_function_calling/functions_defined_in_json_prompt.py b/python/samples/concepts/auto_function_calling/functions_defined_in_json_prompt.py index 73157390b2f6..c8042eb4ad6c 100644 --- a/python/samples/concepts/auto_function_calling/functions_defined_in_json_prompt.py +++ b/python/samples/concepts/auto_function_calling/functions_defined_in_json_prompt.py @@ -11,6 +11,7 @@ from semantic_kernel.contents.chat_message_content import ChatMessageContent from semantic_kernel.contents.function_call_content import FunctionCallContent from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent +from semantic_kernel.contents.utils.author_role import AuthorRole from semantic_kernel.core_plugins import MathPlugin, TimePlugin from semantic_kernel.filters.auto_function_invocation.auto_function_invocation_context import ( AutoFunctionInvocationContext, @@ -144,7 +145,7 @@ async def handle_streaming( kernel: Kernel, chat_function: "KernelFunction", arguments: KernelArguments, -) -> None: +) -> str | None: response = kernel.invoke_stream( chat_function, return_function_results=False, @@ -153,20 +154,29 @@ async def handle_streaming( print("Mosscap:> ", end="") streamed_chunks: list[StreamingChatMessageContent] = [] + result_content: list[StreamingChatMessageContent] = [] async for message in response: - if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance( - message[0], StreamingChatMessageContent + if ( + not execution_settings.function_choice_behavior.auto_invoke_kernel_functions + and isinstance(message[0], StreamingChatMessageContent) + and message[0].role == AuthorRole.ASSISTANT ): streamed_chunks.append(message[0]) - else: + elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT: + result_content.append(message[0]) print(str(message[0]), end="") if streamed_chunks: streaming_chat_message = reduce(lambda first, second: first + second, streamed_chunks) + if hasattr(streaming_chat_message, "content"): + print(streaming_chat_message.content) print("Auto tool calls is disabled, printing returned tool calls...") print_tool_calls(streaming_chat_message) print("\n") + if result_content: + return "".join([str(content) for content in result_content]) + return None async def chat() -> bool: @@ -187,7 +197,7 @@ async def chat() -> bool: stream = False if stream: - await handle_streaming(kernel, chat_function, arguments=arguments) + result = await handle_streaming(kernel, chat_function, arguments=arguments) else: result = await kernel.invoke(chat_plugin["ChatBot"], arguments=arguments) @@ -200,6 +210,9 @@ async def chat() -> bool: return True print(f"Mosscap:> {result}") + + history.add_user_message(user_input) + history.add_assistant_message(str(result)) return True diff --git a/python/samples/concepts/auto_function_calling/functions_defined_in_yaml_prompt.py b/python/samples/concepts/auto_function_calling/functions_defined_in_yaml_prompt.py index b10718089a96..724822850e33 100644 --- a/python/samples/concepts/auto_function_calling/functions_defined_in_yaml_prompt.py +++ b/python/samples/concepts/auto_function_calling/functions_defined_in_yaml_prompt.py @@ -12,6 +12,7 @@ from semantic_kernel.contents.chat_message_content import ChatMessageContent from semantic_kernel.contents.function_call_content import FunctionCallContent from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent +from semantic_kernel.contents.utils.author_role import AuthorRole from semantic_kernel.core_plugins import MathPlugin, TimePlugin from semantic_kernel.filters.auto_function_invocation.auto_function_invocation_context import ( AutoFunctionInvocationContext, @@ -141,7 +142,7 @@ async def handle_streaming( kernel: Kernel, chat_function: "KernelFunction", arguments: KernelArguments, -) -> None: +) -> str | None: response = kernel.invoke_stream( chat_function, return_function_results=False, @@ -150,20 +151,29 @@ async def handle_streaming( print("Mosscap:> ", end="") streamed_chunks: list[StreamingChatMessageContent] = [] + result_content: list[StreamingChatMessageContent] = [] async for message in response: - if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance( - message[0], StreamingChatMessageContent + if ( + not execution_settings.function_choice_behavior.auto_invoke_kernel_functions + and isinstance(message[0], StreamingChatMessageContent) + and message[0].role == AuthorRole.ASSISTANT ): streamed_chunks.append(message[0]) - else: + elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT: + result_content.append(message[0]) print(str(message[0]), end="") if streamed_chunks: streaming_chat_message = reduce(lambda first, second: first + second, streamed_chunks) + if hasattr(streaming_chat_message, "content"): + print(streaming_chat_message.content) print("Auto tool calls is disabled, printing returned tool calls...") print_tool_calls(streaming_chat_message) print("\n") + if result_content: + return "".join([str(content) for content in result_content]) + return None async def chat() -> bool: @@ -184,8 +194,7 @@ async def chat() -> bool: stream = False if stream: - pass - # await handle_streaming(kernel, chat_function, arguments=arguments) + result = await handle_streaming(kernel, chat_function, arguments=arguments) else: result = await kernel.invoke(chat_plugin["ChatBot"], arguments=arguments) @@ -198,6 +207,9 @@ async def chat() -> bool: return True print(f"Mosscap:> {result}") + + history.add_user_message(user_input) + history.add_assistant_message(str(result)) return True diff --git a/python/samples/concepts/filtering/function_invocation_filters_stream.py b/python/samples/concepts/filtering/function_invocation_filters_stream.py index f1dbb85b1601..74948472ac49 100644 --- a/python/samples/concepts/filtering/function_invocation_filters_stream.py +++ b/python/samples/concepts/filtering/function_invocation_filters_stream.py @@ -71,7 +71,8 @@ async def chat(chat_history: ChatHistory) -> bool: function_name="chat", plugin_name="chat", user_input=user_input, chat_history=chat_history ) async for message in responses: - streamed_chunks.append(message[0]) + if isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT: + streamed_chunks.append(message[0]) print(str(message[0]), end="") print("") chat_history.add_user_message(user_input) diff --git a/python/samples/concepts/plugins/openai_plugin_azure_key_vault.py b/python/samples/concepts/plugins/openai_plugin_azure_key_vault.py index f206d7794f78..9685591dc73f 100644 --- a/python/samples/concepts/plugins/openai_plugin_azure_key_vault.py +++ b/python/samples/concepts/plugins/openai_plugin_azure_key_vault.py @@ -17,6 +17,7 @@ from semantic_kernel.contents.chat_message_content import ChatMessageContent from semantic_kernel.contents.function_call_content import FunctionCallContent from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent +from semantic_kernel.contents.utils.author_role import AuthorRole from semantic_kernel.functions import KernelArguments, KernelFunction, KernelPlugin # region Helper functions @@ -209,11 +210,13 @@ async def handle_streaming( print("Security Agent:> ", end="") streamed_chunks: list[StreamingChatMessageContent] = [] async for message in response: - if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance( - message[0], StreamingChatMessageContent + if ( + not execution_settings.function_choice_behavior.auto_invoke_kernel_functions + and isinstance(message[0], StreamingChatMessageContent) + and message[0].role == AuthorRole.ASSISTANT ): streamed_chunks.append(message[0]) - else: + elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT: print(str(message[0]), end="") if streamed_chunks: diff --git a/python/samples/learn_resources/templates.py b/python/samples/learn_resources/templates.py index 90a6b27e38c2..d0a10d6977c2 100644 --- a/python/samples/learn_resources/templates.py +++ b/python/samples/learn_resources/templates.py @@ -7,6 +7,7 @@ from semantic_kernel import Kernel from semantic_kernel.contents import ChatHistory from semantic_kernel.contents.chat_message_content import ChatMessageContent +from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent from semantic_kernel.contents.utils.author_role import AuthorRole from semantic_kernel.prompt_template import InputVariable, PromptTemplateConfig @@ -144,8 +145,9 @@ async def main(): all_chunks = [] print("Assistant:> ", end="") async for chunk in result: - all_chunks.append(chunk[0]) - print(str(chunk[0]), end="") + if isinstance(chunk[0], StreamingChatMessageContent) and chunk[0].role == AuthorRole.ASSISTANT: + all_chunks.append(chunk[0]) + print(str(chunk[0]), end="") print() history.add_user_message(request) diff --git a/python/semantic_kernel/connectors/ai/function_calling_utils.py b/python/semantic_kernel/connectors/ai/function_calling_utils.py index 924c0076f7c3..365d43565ed9 100644 --- a/python/semantic_kernel/connectors/ai/function_calling_utils.py +++ b/python/semantic_kernel/connectors/ai/function_calling_utils.py @@ -118,6 +118,4 @@ def merge_streaming_function_results( for message in messages: items.extend([item for item in message.items if isinstance(item, FunctionResultContent)]) - # If we want to be able to support adding the streaming message chunks together, then the author role needs to be - # `Assistant```, as the `Tool` role will cause the add method to break. return [StreamingChatMessageContent(role=AuthorRole.TOOL, items=items, choice_index=0, ai_model_id=ai_model_id)]