upgraded langchain dependency versions and therefore fixed a lot of i…

…ssues caused by dependencies changes...
royerlab · Aug 30, 2024 · 1157a50 · 1157a50
1 parent 7b9d7c5
commit 1157a50
Show file tree

Hide file tree

Showing 28 changed files with 378 additions and 99 deletions.
diff --git a/setup.cfg b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = napari-chatgpt
-version = v2024.5.15
+version = v2024.8.27
 description = A napari plugin to process and analyse images with chatGPT.
 long_description = file: README.md
 long_description_content_type = text/markdown
@@ -36,12 +36,13 @@ install_requires =
     scikit-image
     qtpy
     QtAwesome
-    langchain==0.2.0rc2
-    langchain-community==0.2.0rc1
-    langchain-openai==0.1.6
-    langchain-anthropic==0.1.11
-    openai==1.29.0
-    anthropic
+    langchain==0.2.15
+    langchain-community==0.2.14
+    langchain-openai==0.1.23
+    langchain-anthropic==0.1.23
+#    langchain-google-genai==1.0.10
+    openai==1.42.0
+    anthropic==0.34.1
     fastapi
     uvicorn
     websockets

diff --git a/src/napari_chatgpt/chat_server/callbacks/callbacks_handle_chat.py b/src/napari_chatgpt/chat_server/callbacks/callbacks_handle_chat.py
@@ -121,7 +121,19 @@ async def on_agent_action(self, action: AgentAction, **kwargs: Any) -> Any:
         if self.verbose:
             aprint(f"CHAT on_agent_action: {action}")
         tool = camel_case_to_lower_case(action.tool)
-        message = f"I am using the {tool} to tackle your request: '{action.tool_input}'"
+
+        # extract value for args key after checking if action.tool_input is a dict:
+        if isinstance(action.tool_input, dict):
+            argument = action.tool_input.get('args', '')
+
+            # if argument is a singleton list, unpop that single element:
+            if isinstance(argument, list):
+                argument = argument[0]
+
+        else:
+            argument = action.tool_input
+
+        message = f"I am using the {tool} to tackle your request: '{argument}'"
 
         self.last_tool_used = tool
         self.last_tool_input = action.tool_input

diff --git a/src/napari_chatgpt/omega/memory/memory.py b/src/napari_chatgpt/omega/memory/memory.py
@@ -2,6 +2,7 @@
 from typing import Type
 
 from langchain.chains import LLMChain
+from langchain.memory import ConversationSummaryMemory
 from langchain.memory.chat_memory import BaseChatMemory
 from langchain.memory.prompt import SUMMARY_PROMPT
 from langchain_core.language_models import BaseLanguageModel
@@ -16,6 +17,8 @@
 ### LangChain's license is the MIT License
 ###
 
+ConversationSummaryMemory
+
 class SummarizerMixin(BaseModel):
     human_prefix: str = "Human"
     ai_prefix: str = "AI"

diff --git a/src/napari_chatgpt/omega/napari_bridge.py b/src/napari_chatgpt/omega/napari_bridge.py
@@ -71,7 +71,18 @@ def get_viewer_info(self) -> str:
         # Setting up delegated function:
         delegated_function = lambda v: get_viewer_info(v)
 
-        return self._execute_in_napari_context(delegated_function)
+        try:
+            # execute delegated function in napari context:
+            info = self._execute_in_napari_context(delegated_function)
+
+            return info
+
+        except Exception as e:
+            # print exception stack trace:
+            import traceback
+            traceback.print_exc()
+
+            return 'Could not get information about the viewer because of an error.'
 
 
     def take_snapshot(self):

diff --git a/src/napari_chatgpt/omega/omega_agent/OmegaOpenAIFunctionsAgentOutputParser.py b/src/napari_chatgpt/omega/omega_agent/OmegaOpenAIFunctionsAgentOutputParser.py
@@ -0,0 +1,92 @@
+import json
+from json import JSONDecodeError
+from typing import List, Union
+
+from langchain_core.agents import AgentAction, AgentActionMessageLog, AgentFinish
+from langchain_core.exceptions import OutputParserException
+from langchain_core.messages import (
+    AIMessage,
+    BaseMessage,
+)
+from langchain_core.outputs import ChatGeneration, Generation
+
+from langchain.agents.agent import AgentOutputParser
+
+
+class OpenAIFunctionsAgentOutputParser(AgentOutputParser):
+    """Parses a message into agent action/finish.
+
+    Is meant to be used with OpenAI models, as it relies on the specific
+    function_call parameter from OpenAI to convey what tools to use.
+
+    If a function_call parameter is passed, then that is used to get
+    the tool and tool input.
+
+    If one is not passed, then the AIMessage is assumed to be the final output.
+    """
+
+    @property
+    def _type(self) -> str:
+        return "openai-functions-agent"
+
+    @staticmethod
+    def _parse_ai_message(message: BaseMessage) -> Union[AgentAction, AgentFinish]:
+        """Parse an AI message."""
+        if not isinstance(message, AIMessage):
+            raise TypeError(f"Expected an AI message got {type(message)}")
+
+        function_call = message.additional_kwargs.get("function_call", {})
+
+        if function_call:
+            function_name = function_call["name"]
+            try:
+                if len(function_call["arguments"].strip()) == 0:
+                    # OpenAI returns an empty string for functions containing no args
+                    _tool_input = {}
+                else:
+                    # otherwise it returns a json object
+                    _tool_input = json.loads(function_call["arguments"], strict=False)
+            except JSONDecodeError:
+
+                # let's chill, no idea why this is a problem, my tools are just fine with this:
+                _tool_input = function_call["arguments"]
+
+                # raise OutputParserException(
+                #     f"Could not parse tool input: {function_call} because "
+                #     f"the `arguments` is not valid JSON."
+                # )
+
+            # HACK HACK HACK:
+            # The code that encodes tool input into Open AI uses a special variable
+            # name called `__arg1` to handle old style tools that do not expose a
+            # schema and expect a single string argument as an input.
+            # We unpack the argument here if it exists.
+            # Open AI does not support passing in a JSON array as an argument.
+            if "__arg1" in _tool_input:
+                tool_input = _tool_input["__arg1"]
+            else:
+                tool_input = _tool_input
+
+            content_msg = f"responded: {message.content}\n" if message.content else "\n"
+            log = f"\nInvoking: `{function_name}` with `{tool_input}`\n{content_msg}\n"
+            return AgentActionMessageLog(
+                tool=function_name,
+                tool_input=tool_input,
+                log=log,
+                message_log=[message],
+            )
+
+        return AgentFinish(
+            return_values={"output": message.content}, log=str(message.content)
+        )
+
+    def parse_result(
+        self, result: List[Generation], *, partial: bool = False
+    ) -> Union[AgentAction, AgentFinish]:
+        if not isinstance(result[0], ChatGeneration):
+            raise ValueError("This output parser only works on ChatGeneration output")
+        message = result[0].message
+        return self._parse_ai_message(message)
+
+    def parse(self, text: str) -> Union[AgentAction, AgentFinish]:
+        raise ValueError("Can only parse messages")
diff --git a/src/napari_chatgpt/omega/omega_agent/OpenAIFunctionsOmegaAgent.py b/src/napari_chatgpt/omega/omega_agent/OpenAIFunctionsOmegaAgent.py
@@ -5,16 +5,15 @@
 from langchain.agents.format_scratchpad.openai_functions import (
     format_to_openai_function_messages,
 )
-from langchain.agents.output_parsers.openai_functions import (
-    OpenAIFunctionsAgentOutputParser,
-)
 from langchain_core.agents import AgentAction, AgentFinish
 from langchain_core.callbacks import Callbacks
 from langchain_core.messages import (
     SystemMessage,
 )
 
 from napari_chatgpt.omega.napari_bridge import _get_viewer_info
+from napari_chatgpt.omega.omega_agent.OmegaOpenAIFunctionsAgentOutputParser import \
+    OpenAIFunctionsAgentOutputParser
 from napari_chatgpt.omega.omega_agent.prompts import DIDACTICS
 
 
@@ -25,26 +24,27 @@ class OpenAIFunctionsOmegaAgent(OpenAIFunctionsAgent):
     be_didactic: bool = False
 
     async def aplan(
-            self,
-            intermediate_steps: List[Tuple[AgentAction, str]],
-            callbacks: Callbacks = None,
-            **kwargs: Any,
+        self,
+        intermediate_steps: List[Tuple[AgentAction, str]],
+        callbacks: Callbacks = None,
+        **kwargs: Any,
     ) -> Union[AgentAction, AgentFinish]:
-        """Given input, decided what to do.
+        """Async given input, decided what to do.
 
         Args:
             intermediate_steps: Steps the LLM has taken to date,
-                along with observations
+                along with observations.
+            callbacks: Callbacks to use. Defaults to None.
             **kwargs: User inputs.
 
         Returns:
             Action specifying what tool to use.
+            If the agent is finished, returns an AgentFinish.
+            If the agent is not finished, returns an AgentAction.
         """
-        agent_scratchpad = format_to_openai_function_messages(
-            intermediate_steps)
+        agent_scratchpad = format_to_openai_function_messages(intermediate_steps)
         selected_inputs = {
-            k: kwargs[k] for k in self.prompt.input_variables if
-            k != "agent_scratchpad"
+            k: kwargs[k] for k in self.prompt.input_variables if k != "agent_scratchpad"
         }
         full_inputs = dict(**selected_inputs, agent_scratchpad=agent_scratchpad)
         prompt = self.prompt.format_prompt(**full_inputs)
@@ -60,6 +60,7 @@ async def aplan(
                 )
             ))
 
+        # Add didactics to the messages:
         if self.be_didactic:
             messages.insert(-1, SystemMessage(
                 content=DIDACTICS,
@@ -68,10 +69,11 @@ async def aplan(
                 )
             ))
 
+        # predict the message:
         predicted_message = await self.llm.apredict_messages(
             messages, functions=self.functions, callbacks=callbacks
         )
-        agent_decision = OpenAIFunctionsAgentOutputParser._parse_ai_message(
-            predicted_message
-        )
+
+        # parse the AI message:
+        agent_decision = OpenAIFunctionsAgentOutputParser._parse_ai_message(predicted_message)
         return agent_decision
diff --git a/src/napari_chatgpt/omega/omega_agent/prompts.py b/src/napari_chatgpt/omega/omega_agent/prompts.py
@@ -7,6 +7,8 @@
 You can use all the tools and functions at your disposal (see below) to assist the user with image processing and image analysis. 
 Since you are an helpful expert, you are polite and answer in the same language as the user's question.
 You have been created by Loic A. Royer, a Senior Group Leader and Director of Imaging AI at the Chan Zuckerberg Biohub San Francisco.
+
+You are provided with a series of tools/functions that give you the possibility to execute code in the context of an existing napari viewer instance.
 """
 
 PERSONALITY = {}

diff --git a/src/napari_chatgpt/omega/omega_init.py b/src/napari_chatgpt/omega/omega_init.py
@@ -2,7 +2,7 @@
 
 import langchain
 from arbol import aprint
-from langchain.agents import AgentExecutor
+from langchain.agents import AgentExecutor, create_openai_functions_agent
 from langchain.agents.conversational_chat.prompt import SUFFIX
 from langchain.base_language import BaseLanguageModel
 from langchain.callbacks.base import BaseCallbackHandler
@@ -39,6 +39,8 @@
 from napari_chatgpt.omega.tools.special.functions_info_tool import \
     PythonFunctionsInfoTool
 from napari_chatgpt.omega.tools.special.human_input_tool import HumanInputTool
+from napari_chatgpt.omega.tools.special.package_info_tool import \
+    PythonPackageInfoTool
 from napari_chatgpt.omega.tools.special.pip_install_tool import PipInstallTool
 from napari_chatgpt.omega.tools.special.python_repl import \
     PythonCodeExecutionTool
@@ -90,6 +92,7 @@ def initialize_omega_agent(to_napari_queue: Queue = None,
              ExceptionCatcherTool(callbacks=tool_callbacks),
              # FileDownloadTool(),
              PythonCodeExecutionTool(callbacks=tool_callbacks),
+             PythonPackageInfoTool(callbacks=tool_callbacks),
              PipInstallTool(callbacks=tool_callbacks)]
 
     # Adding the human input tool if required:

diff --git a/src/napari_chatgpt/omega/tools/async_base_tool.py b/src/napari_chatgpt/omega/tools/async_base_tool.py
@@ -14,11 +14,18 @@ class AsyncBaseTool(BaseTool):
 
     notebook: JupyterNotebookFile = None
 
-    async def _arun(self, query: str) -> str:
-        """Use the tool asynchronously."""
-        aprint(f"Starting async call to {type(self).__name__}({query}) ")
-        result = await asyncio.get_running_loop().run_in_executor(
-            _aysync_tool_thread_pool,
-            self._run,
-            query)
-        return result
+    def normalise_to_string(self, kwargs):
+
+        # extract the value for args key in kwargs:
+        if isinstance(kwargs, dict):
+            query = kwargs.get('args', '')
+        else:
+            query = kwargs
+
+        # If query is a singleton list, extract the value:
+        if isinstance(query, list) and len(query) == 1:
+            query = query[0]
+
+        # convert the query to string:
+        query = str(query)
+        return query
diff --git a/src/napari_chatgpt/omega/tools/instructions.py b/src/napari_chatgpt/omega/tools/instructions.py
@@ -20,7 +20,8 @@
 - When and if you use PyTorch functions make sure to pass tensors with the right dtype and number of dimensions in order to match PyTorch's functions parameter requirements. For instance, add and remove batch dimensions and convert to a compatible dtype before and after a series of calls to PyTorch functions.
 - The only data types supported by PyTorch are: float32, float64, float16, bfloat16, uint8, int8, int16, int32, int64, and bool. Make sure to convert the input to one of these types before passing it to a PyTorch function.
 - When using Numba to write image processing code make sure to avoid high-level numpy functions and instead implement the algorithms with loops and low-level numpy functions. Also, make sure to use the right data types for the input and output arrays.
-- If you need to get the selected layer in the napari viewer, use the following code: `viewer.layers.selection.active` .
+- If you need to get the selected layer in the napari viewer, use the following code: `viewer.layers.selection.active`.
+- napari layers do not have a 'type' field, if you need to check the type of a layer, use for example the following code: `isinstance(layer, napari.layers.Shapes)`.
 - If you need to rotate the viewer camera to a specific set of angles, use the following code: `viewer.camera.angles = (angle_z, angle_y, angle_x)`  .
 """
 
diff --git a/src/napari_chatgpt/omega/tools/napari/delegated_code/test/classic_test.py b/src/napari_chatgpt/omega/tools/napari/delegated_code/test/classic_test.py
@@ -47,7 +47,7 @@ def test_classsic_3d(show_viewer: bool = False):
     aprint('')
 
     # Load the 'cells' example dataset
-    cells = skimage.data.cells3d()[:, 1]
+    cells = skimage.data.cells3d()[0:100, 0:100, 1].copy()
 
     # Segment the cells:
     labels = classic_segmentation(cells)
@@ -58,7 +58,7 @@ def test_classsic_3d(show_viewer: bool = False):
     aprint(nb_unique_labels)
 
     # Check that the number of unique labels is correct:
-    assert nb_unique_labels == 25
+    assert nb_unique_labels == 6
 
     # If the viewer is not requested, return:
     if not show_viewer:

diff --git a/src/napari_chatgpt/omega/tools/napari/napari_base_tool.py b/src/napari_chatgpt/omega/tools/napari/napari_base_tool.py
@@ -3,7 +3,7 @@
 import traceback
 from pathlib import Path
 from queue import Queue
-from typing import Union, Optional
+from typing import Union, Optional, Any
 
 from arbol import aprint, asection
 from langchain.chains import LLMChain
@@ -63,9 +63,12 @@ class NapariBaseTool(AsyncBaseTool):
 
     last_generated_code: Optional[str] = None
 
-    def _run(self, query: str) -> str:
+    def _run(self, *args: Any, **kwargs: Any) -> Any:
         """Use the tool."""
 
+        # Get query:
+        query = self.normalise_to_string(kwargs)
+
         if self.prompt:
             # Instantiate chain:
             chain = LLMChain(
@@ -137,6 +140,7 @@ def _run(self, query: str) -> str:
 
         return response
 
+
     def _run_code(self, query: str, code: str, viewer: Viewer) -> str:
         """
         This is the code that is executed, see implementations for details,