From 8172038e060a2138244c6f52864400ba1a18f672 Mon Sep 17 00:00:00 2001 From: baozongbo Date: Tue, 7 Nov 2023 07:42:13 +0000 Subject: [PATCH 1/3] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E6=89=A7=E8=A1=8C?= =?UTF-8?q?=E7=B1=BB=E5=92=8C=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/tool_inference.py | 45 +++++++++++++++ scripts/run_tool.sh | 3 + src/lmflow/pipeline/inferencer.py | 86 ++++++++++++++++++++++++++++ tests/models/test_tool_inferencer.py | 35 +++++++++++ 4 files changed, 169 insertions(+) create mode 100644 examples/tool_inference.py create mode 100644 scripts/run_tool.sh create mode 100644 tests/models/test_tool_inferencer.py diff --git a/examples/tool_inference.py b/examples/tool_inference.py new file mode 100644 index 000000000..34b08af5c --- /dev/null +++ b/examples/tool_inference.py @@ -0,0 +1,45 @@ +import os +import argparse +from lmflow.args import InferencerArguments +from lmflow.args import ModelArguments +from lmflow.args import DatasetArguments +from lmflow.models import hf_decoder_model +from lmflow.pipeline.inferencer import ToolInferencer +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--gpu', type=str, default='0', + help='gpu id, currently speculative inference only support single gpu') + parser.add_argument('--model', type=str, default='codellama/CodeLlama-7b-instruct-hf', + help='target code generation model name or path you \ + currently only supports huggingface decoder only models') + params = parser.parse_args() + os.environ["CUDA_VISIBLE_DEVICES"] = params.gpu + + model_args = ModelArguments(model_name_or_path=params.model) + model = hf_decoder_model.HFDecoderModel(model_args) + inferencer_args = InferencerArguments() + data_args = DatasetArguments() + + toolinf = ToolInferencer(model_args, data_args, inferencer_args) + + while True: + try: + text = input("Tool Inference: ") + toolinf_res = toolinf.inference(model, text) + toolinf_res = toolinf_res.replace("","") + toolinf_res = toolinf_res.replace("","") + print('\n\nResult:') + print(toolinf_res) + print('\n\n') + run_code = input("Run code? (y/n): ") + if run_code == 'y': + toolinf.code_exec(toolinf_res) + if run_code == 'n': + continue + + + except EOFError: + break + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/scripts/run_tool.sh b/scripts/run_tool.sh new file mode 100644 index 000000000..0160577ed --- /dev/null +++ b/scripts/run_tool.sh @@ -0,0 +1,3 @@ +model="/home/baozongbo/model/gorilla-hf-final-v1" +python examples/tool_inference.py \ + --model ${model} \ \ No newline at end of file diff --git a/src/lmflow/pipeline/inferencer.py b/src/lmflow/pipeline/inferencer.py index b9d66f486..0414bdcb5 100644 --- a/src/lmflow/pipeline/inferencer.py +++ b/src/lmflow/pipeline/inferencer.py @@ -15,6 +15,7 @@ import logging from typing import Dict, List from concurrent.futures import ThreadPoolExecutor +import subprocess from transformers import AutoConfig import torch.distributed as dist @@ -553,3 +554,88 @@ def speculative_sampling(input_ids: torch.Tensor, def stream_inference(self): raise NotImplementedError("Streaming output for SpeculativeInferencer is not supported yet") + +class ToolInferencer(Inferencer): + """ + Initializes the `ToolInferencer` class with given arguments. + + Parameters + ------------ + model_args : ModelArguments object. + Contains the arguments required to load the model. + + data_args : DatasetArguments object. + Contains the arguments required to load the dataset. + + inferencer_args : InferencerArguments object. + Contains the arguments required to perform inference. + + + """ + def __init__(self, model_args, data_args, inferencer_args): + super().__init__(model_args, data_args, inferencer_args) + + self.model = HFDecoderModel(self.model_args) + + def inference( + self, + model: HFDecoderModel, + input: str, + max_new_tokens: int=1024, + ): + """ + Perform inference for a model + + Parameters + ------------ + model : HFDecoderModel object. + TunableModel to perform inference + + input : str. + The input text (i.e., the prompt) for the model. + + max_new_tokens : int. + The maximum number of tokens to be generated by the model. + + Returns: + + output : str. + The output text generated by the model. + """ + if self.inferencer_args.device == "gpu": + input_id = model.encode(input, return_tensors="pt").to(device=self.local_rank) + elif self.inferencer_args.device == "cpu": + input_id = model.encode(input, return_tensors="pt").to(device='cpu') + logger.debug(f"input_id: {input_id}") + input_length = input_id.shape[1] + output_id = model.inference( + input_id, + use_accelerator=True, + max_new_tokens=max_new_tokens, + # pad_token_id=model.tokenizer.eos_token_id, + ) + # logger.debug(f"output: {output_id}") + output = model.decode(output_id[0]) + output = output.replace(input,"") + return output + + def code_exec(self, code): + # 使用 Python 解释器执行代码字符串 + result = subprocess.run(["python", "-c", code], capture_output=True, text=True) + + # 检查执行结果 + if result.returncode == 0: + print("执行成功,输出如下:") + print(result.stdout) + return result.stdout + else: + print("执行失败,错误信息如下:") + print(result.stderr) + return result + + + + + + + diff --git a/tests/models/test_tool_inferencer.py b/tests/models/test_tool_inferencer.py new file mode 100644 index 000000000..4eb45c848 --- /dev/null +++ b/tests/models/test_tool_inferencer.py @@ -0,0 +1,35 @@ +from lmflow.pipeline.inferencer import ToolInferencer +import unittest +from lmflow.args import InferencerArguments +from lmflow.args import ModelArguments +from lmflow.args import DatasetArguments +from lmflow.models import hf_decoder_model + +CODE_1 = "print(\"hello world\")" +RES_1 = "hello world\n" +CODE_2 = "b=a+1\nprint(b)" +RES_2 = """Traceback (most recent call last): + File "", line 1, in +NameError: name 'a' is not defined +""" + +class ToolInferencerTest(unittest.TestCase): + def setUp(self): + model_args = ModelArguments(model_name_or_path="codellama/CodeLlama-7b-instruct-hf") + model = hf_decoder_model.HFDecoderModel(model_args) + inferencer_args = InferencerArguments() + data_args = DatasetArguments() + self.toolinf = ToolInferencer(model_args, data_args, inferencer_args) + + def test_code_exec_1(self,code=CODE_1, expected_output=RES_1): + toolinf_res = self.toolinf.code_exec(code) + self.assertEqual(toolinf_res, expected_output) + + def test_code_exec_2(self,code=CODE_2): + toolinf_res = self.toolinf.code_exec(code) + self.assertNotEqual(toolinf_res.returncode, 0) + +unittest.main() + + + \ No newline at end of file From 948278657a8d16e6f01ac2a2dbc33db2a5403d18 Mon Sep 17 00:00:00 2001 From: baozongbo Date: Sat, 2 Dec 2023 08:58:48 +0000 Subject: [PATCH 2/3] code exec feature fixed --- scripts/run_tool.sh | 2 +- src/lmflow/pipeline/inferencer.py | 4 ++-- tests/models/test_tool_inferencer.py | 3 ++- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/scripts/run_tool.sh b/scripts/run_tool.sh index 0160577ed..45904f9ea 100644 --- a/scripts/run_tool.sh +++ b/scripts/run_tool.sh @@ -1,3 +1,3 @@ -model="/home/baozongbo/model/gorilla-hf-final-v1" +model="gorilla-llm/gorilla-7b-hf-delta-v1" python examples/tool_inference.py \ --model ${model} \ \ No newline at end of file diff --git a/src/lmflow/pipeline/inferencer.py b/src/lmflow/pipeline/inferencer.py index 0414bdcb5..fd36b6a8f 100644 --- a/src/lmflow/pipeline/inferencer.py +++ b/src/lmflow/pipeline/inferencer.py @@ -625,11 +625,11 @@ def code_exec(self, code): # 检查执行结果 if result.returncode == 0: - print("执行成功,输出如下:") + print("Successfully Executed, the result is:") print(result.stdout) return result.stdout else: - print("执行失败,错误信息如下:") + print("Error:") print(result.stderr) return result diff --git a/tests/models/test_tool_inferencer.py b/tests/models/test_tool_inferencer.py index 4eb45c848..fd856bf57 100644 --- a/tests/models/test_tool_inferencer.py +++ b/tests/models/test_tool_inferencer.py @@ -14,7 +14,7 @@ """ class ToolInferencerTest(unittest.TestCase): - def setUp(self): + def set_up(self): model_args = ModelArguments(model_name_or_path="codellama/CodeLlama-7b-instruct-hf") model = hf_decoder_model.HFDecoderModel(model_args) inferencer_args = InferencerArguments() @@ -22,6 +22,7 @@ def setUp(self): self.toolinf = ToolInferencer(model_args, data_args, inferencer_args) def test_code_exec_1(self,code=CODE_1, expected_output=RES_1): + toolinf_res = self.toolinf.code_exec(code) self.assertEqual(toolinf_res, expected_output) From b2aa8fa9d89cb41b0b6373196c2ecce72142ebb6 Mon Sep 17 00:00:00 2001 From: baozongbo Date: Sat, 2 Dec 2023 10:11:06 +0000 Subject: [PATCH 3/3] comment transformed into English --- src/lmflow/pipeline/inferencer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lmflow/pipeline/inferencer.py b/src/lmflow/pipeline/inferencer.py index fd36b6a8f..fb11ab564 100644 --- a/src/lmflow/pipeline/inferencer.py +++ b/src/lmflow/pipeline/inferencer.py @@ -620,10 +620,10 @@ def inference( return output def code_exec(self, code): - # 使用 Python 解释器执行代码字符串 + # Execute the code result = subprocess.run(["python", "-c", code], capture_output=True, text=True) - # 检查执行结果 + # Print the result if result.returncode == 0: print("Successfully Executed, the result is:") print(result.stdout)