From 8172038e060a2138244c6f52864400ba1a18f672 Mon Sep 17 00:00:00 2001
From: baozongbo <zongbobao@gmail.com>
Date: Tue, 7 Nov 2023 07:42:13 +0000
Subject: [PATCH 1/3] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E6=89=A7=E8=A1=8C?=
 =?UTF-8?q?=E7=B1=BB=E5=92=8C=E6=B5=8B=E8=AF=95?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/tool_inference.py           | 45 +++++++++++++++
 scripts/run_tool.sh                  |  3 +
 src/lmflow/pipeline/inferencer.py    | 86 ++++++++++++++++++++++++++++
 tests/models/test_tool_inferencer.py | 35 +++++++++++
 4 files changed, 169 insertions(+)
 create mode 100644 examples/tool_inference.py
 create mode 100644 scripts/run_tool.sh
 create mode 100644 tests/models/test_tool_inferencer.py
diff --git a/examples/tool_inference.py b/examples/tool_inference.py
new file mode 100644
index 000000000..34b08af5c
--- /dev/null
+++ b/examples/tool_inference.py
@@ -0,0 +1,45 @@
+import os
+import argparse
+from lmflow.args import InferencerArguments
+from lmflow.args import ModelArguments
+from lmflow.args import DatasetArguments
+from lmflow.models import hf_decoder_model
+from lmflow.pipeline.inferencer import ToolInferencer
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--gpu', type=str, default='0', 
+                        help='gpu id, currently speculative inference only support single gpu')
+    parser.add_argument('--model', type=str, default='codellama/CodeLlama-7b-instruct-hf',
+                        help='target code generation model name or path you  \
+                            currently only supports huggingface decoder only models')
+    params = parser.parse_args()
+    os.environ["CUDA_VISIBLE_DEVICES"] = params.gpu
+    
+    model_args = ModelArguments(model_name_or_path=params.model)
+    model = hf_decoder_model.HFDecoderModel(model_args)
+    inferencer_args = InferencerArguments()
+    data_args = DatasetArguments()
+    
+    toolinf = ToolInferencer(model_args, data_args, inferencer_args)
+    
+    while True:
+        try:
+            text = input("Tool Inference: ")
+            toolinf_res = toolinf.inference(model, text)
+            toolinf_res = toolinf_res.replace("<s>","")
+            toolinf_res = toolinf_res.replace("</s>","")
+            print('\n\nResult:')
+            print(toolinf_res)
+            print('\n\n')
+            run_code = input("Run code? (y/n): ")
+            if run_code == 'y':
+                toolinf.code_exec(toolinf_res)
+            if run_code == 'n':
+                continue
+            
+
+        except EOFError:
+            break
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/scripts/run_tool.sh b/scripts/run_tool.sh
new file mode 100644
index 000000000..0160577ed
--- /dev/null
+++ b/scripts/run_tool.sh
@@ -0,0 +1,3 @@
+model="/home/baozongbo/model/gorilla-hf-final-v1"
+python examples/tool_inference.py \
+    --model ${model} \
\ No newline at end of file
diff --git a/src/lmflow/pipeline/inferencer.py b/src/lmflow/pipeline/inferencer.py
index b9d66f486..0414bdcb5 100644
--- a/src/lmflow/pipeline/inferencer.py
+++ b/src/lmflow/pipeline/inferencer.py
@@ -15,6 +15,7 @@
 import logging
 from typing import Dict, List
 from concurrent.futures import ThreadPoolExecutor
+import subprocess
 
 from transformers import AutoConfig
 import torch.distributed as dist
@@ -553,3 +554,88 @@ def speculative_sampling(input_ids: torch.Tensor,
 
     def stream_inference(self):
         raise NotImplementedError("Streaming output for SpeculativeInferencer is not supported yet")
+
+class ToolInferencer(Inferencer):
+    """
+    Initializes the `ToolInferencer` class with given arguments.
+
+    Parameters
+    ------------
+    model_args : ModelArguments object.
+        Contains the arguments required to load the model.
+
+    data_args : DatasetArguments object.
+        Contains the arguments required to load the dataset.
+
+    inferencer_args : InferencerArguments object.
+        Contains the arguments required to perform inference.
+
+
+    """
+    def __init__(self, model_args, data_args, inferencer_args):
+        super().__init__(model_args, data_args, inferencer_args)
+
+        self.model = HFDecoderModel(self.model_args)
+
+    def inference(
+        self,
+        model: HFDecoderModel,
+        input: str,
+        max_new_tokens: int=1024,
+    ):
+        """
+        Perform inference for a model
+
+        Parameters
+        ------------
+        model : HFDecoderModel object.
+            TunableModel to perform inference
+
+        input : str.
+            The input text (i.e., the prompt) for the model. 
+            
+        max_new_tokens : int.
+            The maximum number of tokens to be generated by the model.
+
+        Returns:
+
+        output : str.
+            The output text generated by the model.
+        """
+        if self.inferencer_args.device == "gpu":
+            input_id = model.encode(input, return_tensors="pt").to(device=self.local_rank)
+        elif self.inferencer_args.device == "cpu":
+            input_id = model.encode(input, return_tensors="pt").to(device='cpu')
+        logger.debug(f"input_id: {input_id}")
+        input_length = input_id.shape[1]
+        output_id = model.inference(
+            input_id,
+            use_accelerator=True,
+            max_new_tokens=max_new_tokens,
+            # pad_token_id=model.tokenizer.eos_token_id,
+        )
+        # logger.debug(f"output: {output_id}")
+        output = model.decode(output_id[0])
+        output = output.replace(input,"")
+        return output
+    
+    def code_exec(self, code):
+        # 使用 Python 解释器执行代码字符串
+        result = subprocess.run(["python", "-c", code], capture_output=True, text=True)
+
+        # 检查执行结果
+        if result.returncode == 0:
+            print("执行成功，输出如下:")
+            print(result.stdout)
+            return result.stdout
+        else:
+            print("执行失败，错误信息如下:")
+            print(result.stderr)
+            return result
+            
+        
+            
+        
+        
+        
+        
diff --git a/tests/models/test_tool_inferencer.py b/tests/models/test_tool_inferencer.py
new file mode 100644
index 000000000..4eb45c848
--- /dev/null
+++ b/tests/models/test_tool_inferencer.py
@@ -0,0 +1,35 @@
+from lmflow.pipeline.inferencer import ToolInferencer
+import unittest
+from lmflow.args import InferencerArguments
+from lmflow.args import ModelArguments
+from lmflow.args import DatasetArguments
+from lmflow.models import hf_decoder_model
+
+CODE_1 = "print(\"hello world\")"
+RES_1 = "hello world\n"
+CODE_2 = "b=a+1\nprint(b)"
+RES_2 = """Traceback (most recent call last):
+  File "<string>", line 1, in <module>
+NameError: name 'a' is not defined
+"""
+
+class ToolInferencerTest(unittest.TestCase):
+    def setUp(self):
+        model_args = ModelArguments(model_name_or_path="codellama/CodeLlama-7b-instruct-hf")
+        model = hf_decoder_model.HFDecoderModel(model_args)
+        inferencer_args = InferencerArguments()
+        data_args = DatasetArguments()
+        self.toolinf = ToolInferencer(model_args, data_args, inferencer_args)
+        
+    def test_code_exec_1(self,code=CODE_1, expected_output=RES_1):
+        toolinf_res = self.toolinf.code_exec(code)
+        self.assertEqual(toolinf_res, expected_output)
+        
+    def test_code_exec_2(self,code=CODE_2):
+        toolinf_res = self.toolinf.code_exec(code)
+        self.assertNotEqual(toolinf_res.returncode, 0)
+        
+unittest.main()
+        
+        
+        
\ No newline at end of file

From 948278657a8d16e6f01ac2a2dbc33db2a5403d18 Mon Sep 17 00:00:00 2001
From: baozongbo <zongbobao@gmail.com>
Date: Sat, 2 Dec 2023 08:58:48 +0000
Subject: [PATCH 2/3] code exec feature fixed

---
 scripts/run_tool.sh                  | 2 +-
 src/lmflow/pipeline/inferencer.py    | 4 ++--
 tests/models/test_tool_inferencer.py | 3 ++-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/scripts/run_tool.sh b/scripts/run_tool.sh
index 0160577ed..45904f9ea 100644
--- a/scripts/run_tool.sh
+++ b/scripts/run_tool.sh
@@ -1,3 +1,3 @@
-model="/home/baozongbo/model/gorilla-hf-final-v1"
+model="gorilla-llm/gorilla-7b-hf-delta-v1"
 python examples/tool_inference.py \
     --model ${model} \
\ No newline at end of file
diff --git a/src/lmflow/pipeline/inferencer.py b/src/lmflow/pipeline/inferencer.py
index 0414bdcb5..fd36b6a8f 100644
--- a/src/lmflow/pipeline/inferencer.py
+++ b/src/lmflow/pipeline/inferencer.py
@@ -625,11 +625,11 @@ def code_exec(self, code):
 
         # 检查执行结果
         if result.returncode == 0:
-            print("执行成功，输出如下:")
+            print("Successfully Executed, the result is:")
             print(result.stdout)
             return result.stdout
         else:
-            print("执行失败，错误信息如下:")
+            print("Error:")
             print(result.stderr)
             return result
             
diff --git a/tests/models/test_tool_inferencer.py b/tests/models/test_tool_inferencer.py
index 4eb45c848..fd856bf57 100644
--- a/tests/models/test_tool_inferencer.py
+++ b/tests/models/test_tool_inferencer.py
@@ -14,7 +14,7 @@
 """
 
 class ToolInferencerTest(unittest.TestCase):
-    def setUp(self):
+    def set_up(self):
         model_args = ModelArguments(model_name_or_path="codellama/CodeLlama-7b-instruct-hf")
         model = hf_decoder_model.HFDecoderModel(model_args)
         inferencer_args = InferencerArguments()
@@ -22,6 +22,7 @@ def setUp(self):
         self.toolinf = ToolInferencer(model_args, data_args, inferencer_args)
         
     def test_code_exec_1(self,code=CODE_1, expected_output=RES_1):
+        
         toolinf_res = self.toolinf.code_exec(code)
         self.assertEqual(toolinf_res, expected_output)
         

From b2aa8fa9d89cb41b0b6373196c2ecce72142ebb6 Mon Sep 17 00:00:00 2001
From: baozongbo <zongbobao@gmail.com>
Date: Sat, 2 Dec 2023 10:11:06 +0000
Subject: [PATCH 3/3] comment transformed into English

---
 src/lmflow/pipeline/inferencer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/lmflow/pipeline/inferencer.py b/src/lmflow/pipeline/inferencer.py
index fd36b6a8f..fb11ab564 100644
--- a/src/lmflow/pipeline/inferencer.py
+++ b/src/lmflow/pipeline/inferencer.py
@@ -620,10 +620,10 @@ def inference(
         return output
     
     def code_exec(self, code):
-        # 使用 Python 解释器执行代码字符串
+        # Execute the code
         result = subprocess.run(["python", "-c", code], capture_output=True, text=True)
 
-        # 检查执行结果
+        # Print the result
         if result.returncode == 0:
             print("Successfully Executed, the result is:")
             print(result.stdout)