remove non-number values from stats return (#267)

biocypher · Jan 27, 2025 · d744044 · d744044
1 parent b3cb3f0
commit d744044
Show file tree

Hide file tree

Showing 2 changed files with 61 additions and 1 deletion.
diff --git a/biochatter/llm_connect.py b/biochatter/llm_connect.py
@@ -1595,9 +1595,15 @@ def _update_usage_stats(self, model: str, token_usage: dict) -> None:
 
         """
         if self.user == "community":
+            # Only process integer values
+            stats_dict = {
+                f"{k}:{model}": v
+                for k, v in token_usage.items()
+                if isinstance(v, int | float)
+            }
             self.usage_stats.increment(
                 "usage:[date]:[user]",
-                {f"{k}:{model}": v for k, v in token_usage.items()},
+                stats_dict,
             )
 
         if self._update_token_usage is not None:

diff --git a/test/test_llm_connect.py b/test/test_llm_connect.py
@@ -684,3 +684,57 @@ def __init__(__pydantic_self__, **data: Any) -> None:
     # Verify both chat attributes are accessible
     assert convo.chat is not None
     assert convo.ca_chat is not None
+
+
+def test_gpt_update_usage_stats():
+    """Test the _update_usage_stats method in GptConversation."""
+    # Arrange
+    convo = GptConversation(
+        model_name="gpt-3.5-turbo",
+        prompts={},
+        correct=False,
+    )
+
+    # Mock the usage_stats object
+    mock_usage_stats = Mock()
+    convo.usage_stats = mock_usage_stats
+    convo.user = "community"  # Set user to enable stats tracking
+
+    # Mock the update_token_usage callback
+    mock_update_callback = Mock()
+    convo._update_token_usage = mock_update_callback
+
+    model = "gpt-3.5-turbo"
+    token_usage = {
+        "prompt_tokens": 50,
+        "completion_tokens": 30,
+        "total_tokens": 80,
+        "non_numeric_field": "should be ignored",
+        "nested_dict": {  # Should be ignored as it's a dictionary
+            "sub_field": 100,
+            "another_field": 200,
+        },
+        "another_field": "also ignored",
+    }
+
+    # Act
+    convo._update_usage_stats(model, token_usage)
+
+    # Assert
+    # Verify increment was called with correct arguments for community stats
+    # Only numeric values at the top level should be included
+    mock_usage_stats.increment.assert_called_once_with(
+        "usage:[date]:[user]",
+        {
+            "prompt_tokens:gpt-3.5-turbo": 50,
+            "completion_tokens:gpt-3.5-turbo": 30,
+            "total_tokens:gpt-3.5-turbo": 80,
+        },
+    )
+
+    # Verify callback was called with complete token_usage including nested dict
+    mock_update_callback.assert_called_once_with(
+        "community",
+        "gpt-3.5-turbo",
+        token_usage,  # Full dictionary including nested values
+    )