diff --git a/lab/test.py b/lab/test.py index 422bd81..498f72d 100644 --- a/lab/test.py +++ b/lab/test.py @@ -8,6 +8,7 @@ import time from typing import List +from llm_kira.client.llms import ChatGptParam from llm_kira.radio.anchor import SearchCraw from loguru import logger from llm_kira import radio @@ -68,19 +69,6 @@ async def completion(): conversation_id=12094, # random.randint(1, 10000000), ) -llm = llm_kira.client.llms.OpenAi( - profile=conversation, - api_key=openaiApiKey, - token_limit=4000, - auto_penalty=False, - call_func=None, -) - -mem = receiver.MemoryManager(profile=conversation) -chat_client = receiver.ChatBot(profile=conversation, - llm_model=llm - ) - async def mood_hook(): _think = ThinkEngine(profile=conversation) @@ -90,8 +78,84 @@ async def mood_hook(): print(_think.build_status(rank=5)) +async def chatGpt(): + llm = llm_kira.client.llms.ChatGpt( + profile=conversation, + api_key=openaiApiKey, + token_limit=4000, + auto_penalty=False, + call_func=None, + ) + + mem = llm_kira.client.MemoryManager(profile=conversation) + chat_client = llm_kira.client.ChatBot( + profile=conversation, + llm_model=llm + ) + promptManager = llm_kira.creator.engine.PromptEngine( + reverse_prompt_buffer=False, + profile=conversation, + connect_words="\n", + memory_manger=mem, + llm_model=llm, + description="晚上了,这里是河边", + reference_ratio=0.5, + forget_words=["忘掉对话"], + optimizer=Optimizer.SinglePoint, + ) + # 大型数据对抗测试 + # promptManager.insert_prompt(prompt=PromptItem(start="Neko", text=random_string(8000))) + # promptManager.insert_prompt(prompt=PromptItem(start="Neko", text=random_string(500))) + + # 多 prompt 对抗测试 + testPrompt = input("TestPrompt:") + promptManager.insert_prompt(prompt=PromptItem(start="Neko", text="喵喵喵")) + promptManager.insert_interaction(Interaction(single=True, ask=PromptItem(start="alice", text="MewMewMewMew"))) + _result = await promptManager.build_skeleton(query=testPrompt, + llm_task="Summary Text" if len( + testPrompt) > 20 else None, + skeleton=random.choice([SearchCraw( + deacon=["https://www.bing.com/search?q={}&form=QBLH"])]) + ) + _index = 1 + for item in _result: + logger.trace(item.content) + item.ask.start = f"[{_index}]" + promptManager.insert_knowledge(knowledge=item) + _index += 1 + promptManager.insert_knowledge(Interaction(single=True, ask=PromptItem(start="alice", text="MewMewMewMew"))) + # 测试 + promptManager.insert_prompt(prompt=PromptItem(start=conversation.start_name, text=testPrompt)) + response = await chat_client.predict( + prompt=promptManager, + llm_param=ChatGptParam(model_name="gpt-3.5-turbo", temperature=0.8, presence_penalty=0.1, n=1, best_of=1), + predict_tokens=1000, + ) + print(f"id {response.conversation_id}") + print(f"ask {response.ask}") + print(f"reply {response.reply}") + print(f"usage:{response.llm.usage}") + print(f"raw:{response.llm.raw}") + print(f"---{response.llm.time}---") + promptManager.clean(clean_prompt=True, clean_knowledge=False, clean_memory=False) + return "End" + + async def chat(): - promptManager = llm_kira.creator.PromptEngine( + llm = llm_kira.client.llms.OpenAi( + profile=conversation, + api_key=openaiApiKey, + token_limit=4000, + auto_penalty=False, + call_func=None, + ) + + mem = llm_kira.client.MemoryManager(profile=conversation) + chat_client = llm_kira.client.ChatBot( + profile=conversation, + llm_model=llm + ) + promptManager = llm_kira.creator.engine.PromptEngine( reverse_prompt_buffer=False, profile=conversation, connect_words="\n", @@ -181,7 +245,13 @@ async def Sentiment(): async def Sim(): # response = llm_kira.utils.chat.Utils.edit_similarity(pre="4552", aft="1224") # print(response) - response = llm_kira.utils.chat.Sim.cosion_similarity(pre="", aft="你是不是啊") + test1 = """ + 早苗(さなえ) +耕种水稻时刚刚种植的幼苗 +原型是守矢早苗(もりやさなえ,生于1945年),守矢家第七十八代当主,是实际存在的人物。 +守矢家是洩矢神的子孙,现任诹访神社下社神长官。洩矢神的祭祀司守矢家代代口传的祭神秘法。那个秘传是一脉相承的,在半夜没有火光的祈祷殿之中秘密传授。但是随着时代的变迁,世袭神官制度在明治五年被取消了。到明治六年,家传之宝(包括:印(印文「卖神祝印」)与镜、太刀等)从诹访大社上社被移走家里只残留下用佐奈伎铃(在大御立座祭神中所使用的)祭祀御左口神的方法。在明治时代,守矢实久(第七十六代当主)被取消了神长官一职,可惜当时口传秘法已失,实久只告诉了守矢真幸(第七十七代当主,实久之弟,诹访大社的祢宜宫司)剩下的部分。到守矢早苗(第七十八代当主,真幸之孙,平成18年(注)3月末从校长(注)[6]一职退下之后,一直致力于环境保护的演讲)这一代,已经不再继承代代相传的已消失的秘法了。到现在,再也没有人知道守矢祭神秘法了。 + """ + response = llm_kira.utils.chat.Sim.cosion_similarity(pre=test1, aft="守矢家第七十八代当主") print(response) @@ -230,7 +300,8 @@ async def Web(): # asyncio.run(completion()) # asyncio.run(mood_hook()) # asyncio.run(Web()) -asyncio.run(chat()) +# asyncio.run(chat()) +asyncio.run(chatGpt()) # asyncio.run(Moderation()) # asyncio.run(Sentiment()) # asyncio.run(KeyParse()) diff --git a/pyproject.toml b/pyproject.toml index 137a7bc..9b2549d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "llm_kira" -version = "0.6.50" +version = "0.7.0" description = "chatbot client for llm" authors = ["sudoskys "] maintainers = [ diff --git a/src/llm_kira/client/Optimizer.py b/src/llm_kira/client/Optimizer.py index 38ba94b..db01f3a 100644 --- a/src/llm_kira/client/Optimizer.py +++ b/src/llm_kira/client/Optimizer.py @@ -179,7 +179,10 @@ def run(self) -> List[Interaction]: knowledge = Scorer.build_weight(self.knowledge) _knowledge_token_limit = int(self.token_limit * self.reference_ratio) _interaction_token_limit = self.token_limit - _knowledge_token_limit - _returner = [Interaction(single=True, ask=PromptItem(start="*", text=self.desc))] + + # Desc + _returner = [Interaction(single=True, ask=PromptItem(start="system", text=self.desc))] + _old_prompt = interaction[:1] # Desc if self.tokenizer(self.desc) > self.token_limit: diff --git a/src/llm_kira/client/__init__.py b/src/llm_kira/client/__init__.py index b019ae4..464c144 100644 --- a/src/llm_kira/client/__init__.py +++ b/src/llm_kira/client/__init__.py @@ -6,5 +6,5 @@ from .llms import openai from .enhance import Support -from .agent import Conversation,MemoryManager +from .agent import Conversation, MemoryManager from .anchor import ChatBot diff --git a/src/llm_kira/client/agent.py b/src/llm_kira/client/agent.py index 8829385..7b8716a 100644 --- a/src/llm_kira/client/agent.py +++ b/src/llm_kira/client/agent.py @@ -76,3 +76,4 @@ def read_context(self) -> List[Interaction]: def save_context(self, message: List[Interaction], override: bool = True): self._DataManager.save(interaction_flow=message, override=override) return message + diff --git a/src/llm_kira/client/anchor.py b/src/llm_kira/client/anchor.py index 67020e0..be0da90 100644 --- a/src/llm_kira/client/anchor.py +++ b/src/llm_kira/client/anchor.py @@ -10,12 +10,10 @@ # from loguru import logger from .llms.base import LlmBaseParam from .llms.openai import LlmBase -from .types import LlmReturn, Interaction, PromptItem +from .types import LlmReturn, PromptItem from ..creator.engine import PromptEngine -from ..error import LLMException -# Utils -from ..utils.chat import Sim +from ..error import LLMException # Completion from .types import ChatBotReturn @@ -37,40 +35,17 @@ def __init__(self, if llm_model is None: raise LLMException("Whats your llm model?") - def __person(self, prompt, prompt_list): - _person_list = [f"{self.profile.start_name}:", - f"{self.profile.restart_name}:", - f"{self.profile.start_name}:", - f"{self.profile.restart_name}:", - ] - for item in prompt_list: - if item.ask.connect_words.strip() in [":", ":"]: - _person_list.append(f"{item.ask.start}{item.ask.connect_words}") - _person_list = self.__rank_name(prompt=prompt.prompt, users=_person_list) - return _person_list - - @staticmethod - def __rank_name(prompt: str, users: List[str]): - __temp = {} - for item in users: - __temp[item] = 0 - users = list(__temp.keys()) - _ranked = list(sorted(users, key=lambda i: Sim.cosion_similarity(pre=str(prompt), aft=str(i)), reverse=True)) - return _ranked - async def predict(self, prompt: PromptEngine, predict_tokens: Union[int] = 100, llm_param: LlmBaseParam = None, parse_reply: Callable[[list], str] = None, - rank_name: bool = True, ) -> ChatBotReturn: """ :param prompt: PromptEngine :param predict_tokens: 预测 Token 位 :param llm_param: 大语言模型参数 :param parse_reply: Callable[[list], str] 覆写解析方法 - :param rank_name: 自动排序停止词减少第三人称的冲突出现 """ self.prompt = prompt # ReWrite @@ -79,28 +54,17 @@ async def predict(self, if predict_tokens > self.llm.get_token_limit(): # Or Auto Cut? raise LLMException("Why your predict token > set token limit?") - _llm_result_limit = self.llm.get_token_limit() - predict_tokens - _llm_result_limit = _llm_result_limit if _llm_result_limit > 0 else 1 - # Get - _prompt_index, _prompt = self.prompt.build_prompt(predict_tokens=predict_tokens) - _prompt_list = [] - _person_list = None if not rank_name else self.__person(prompt=_prompt_index, prompt_list=_prompt) - - # Prompt 构建 - for item in _prompt: - _prompt_list.extend(item.content) - prompt_build = "\n".join(_prompt_list) + f"\n{self.profile.restart_name}:" - prompt_build = self.llm.resize_sentence(prompt_build, token=_llm_result_limit) - # ODO - # logger.trace(prompt_build) + # Get Question Index + _prompt_index = self.prompt.prompt # Get llm_result: LlmReturn = await self.llm.run( - prompt=prompt_build, + prompt=prompt, predict_tokens=predict_tokens, - llm_param=llm_param, - stop_words=_person_list + llm_param=llm_param ) + prompt.clean(clean_prompt=True) + self.prompt.build_interaction( ask=_prompt_index, response=PromptItem( diff --git a/src/llm_kira/client/llms/base.py b/src/llm_kira/client/llms/base.py index 0a86883..dc237ce 100644 --- a/src/llm_kira/client/llms/base.py +++ b/src/llm_kira/client/llms/base.py @@ -8,10 +8,8 @@ import os from abc import abstractmethod, ABC from typing import Union, Optional, Callable, Any, Dict, Tuple, Mapping, List - from loguru import logger from pydantic import BaseModel - from ..types import LlmReturn @@ -87,7 +85,7 @@ def parse_usage(response) -> Optional[int]: @abstractmethod async def run(self, - prompt: str, + prompt: Any, validate: Union[List[str], None] = None, predict_tokens: int = 500, llm_param: LlmBaseParam = None, diff --git a/src/llm_kira/client/llms/chatgpt.py b/src/llm_kira/client/llms/chatgpt.py index 3e991ef..8ab2669 100644 --- a/src/llm_kira/client/llms/chatgpt.py +++ b/src/llm_kira/client/llms/chatgpt.py @@ -5,42 +5,69 @@ # @Github :sudoskys import math import time +import random import tiktoken -from typing import Union, Optional, Callable, Any, Dict, Tuple, Mapping, List +from typing import Union, Optional, Callable, Any, Dict, Tuple, Mapping, List, Literal +from loguru import logger + +from ...creator.engine import PromptEngine # from loguru import logger from ...error import RateLimitError, ServiceUnavailableError +from ...tool import openai as openai_api from pydantic import BaseModel, Field from tenacity import retry_if_exception_type, retry, stop_after_attempt, wait_exponential from ..agent import Conversation from ..llms.base import LlmBase, LlmBaseParam -from ..types import LlmReturn +from ..types import LlmReturn, Interaction, LlmException +from ...tool.openai import ChatPrompt +from ...utils.chat import Sim from ...utils.data import DataUtils from ...utils.setting import llmRetryAttempt, llmRetryTime, llmRetryTimeMax, llmRetryTimeMin -from ...utils import network class ChatGptParam(LlmBaseParam, BaseModel): - api: str - """Mew Mew API""" - model_name: str = "text-davinci-003" + model_name: str = "gpt-3.5-turbo" """Model name to use.""" + temperature: float = 0.8 + """What sampling temperature to use.""" max_tokens: int = 256 """The maximum number of tokens to generate in the completion. -1 returns as many tokens as possible given the prompt and the models maximal context size.""" - request_timeout: Optional[Union[float, Tuple[float, float]]] = None - """Timeout for requests to OpenAI completion API. Default is 600 seconds.""" + top_p: float = 1 + """Total probability mass of tokens to consider at each step.""" + frequency_penalty: float = 0 + """Penalizes repeated tokens according to frequency.""" + presence_penalty: float = 0 + """Penalizes repeated tokens.""" + n: int = 1 + """How many completions to generate for each prompt.""" + best_of: int = 1 + """Generates best_of completions server-side and returns the "best".""" model_kwargs: Dict[str, Any] = Field(default_factory=dict) """Holds any model parameters valid for create call not explicitly specified.""" + batch_size: int = 20 + """Batch size to use when passing multiple documents to generate.""" + request_timeout: Optional[Union[float, Tuple[float, float]]] = None + """Timeout for requests to OpenAI completion API. Default is 600 seconds.""" + logit_bias: Optional[Dict[str, float]] = Field(default_factory=dict) + """Adjust the probability of specific tokens being generated.""" @property def _default_params(self) -> Dict[str, Any]: """Get the default parameters for calling OpenAI API.""" normal_params = { + "temperature": self.temperature, "max_tokens": self.max_tokens, + "top_p": self.top_p, + "frequency_penalty": self.frequency_penalty, + "presence_penalty": self.presence_penalty, + "n": self.n, + "best_of": self.best_of, "request_timeout": self.request_timeout, + "logit_bias": self.logit_bias, } return {**normal_params, **self.model_kwargs} @@ -56,10 +83,9 @@ def identifying_params(self) -> Mapping[str, Any]: class ChatGpt(LlmBase): - """CatGpt""" - def __init__(self, - profile: Conversation, + def __init__(self, profile: Conversation, + api_key: Union[str, list] = None, token_limit: int = 3700, auto_penalty: bool = False, call_func: Callable[[dict, str], Any] = None, @@ -74,6 +100,17 @@ def __init__(self, """ self.auto_penalty = auto_penalty self.profile = profile + # if api_key is None: + # api_key = setting.openaiApiKey + if isinstance(api_key, list): + api_key: list + if not api_key: + raise RuntimeError("NO KEY") + api_key = random.choice(api_key) + api_key: str + self.__api_key = api_key + if not api_key: + raise RuntimeError("NO KEY") self.__start_sequence = self.profile.start_name self.__restart_sequence = self.profile.restart_name self.__call_func = call_func @@ -92,16 +129,22 @@ def tokenizer(self, text, raw: bool = False) -> Union[int, list]: @staticmethod def parse_response(response) -> list: REPLY = [] - Choice = response.get("response") + Choice = response.get("choices") if Choice: - REPLY.append(Choice) + for item in Choice: + _text = item["message"]["content"] + REPLY.append(_text) if not REPLY: REPLY = [""] return REPLY @staticmethod def parse_usage(response) -> Optional[int]: - return 0 + usage = None + usage_dict = response.get("usage") + if usage_dict: + usage = usage_dict["total_tokens"] + return usage @property def _llm_type(self) -> str: @@ -141,7 +184,7 @@ async def task_context(self, task: str, prompt: str, predict_tokens: int = 500) _prompt = f"Text:{prompt}\n{task}: " llm_result = await self.run(prompt=_prompt, predict_tokens=predict_tokens, - llm_param=ChatGptParam(model_name="text-davinci-003"), + llm_param=ChatGptParam(model_name="gpt-3.5-turbo"), stop_words=["Text:", "\n\n"] ) return llm_result @@ -190,14 +233,16 @@ def model_context_size(model_name: str) -> int: ServiceUnavailableError)), stop=stop_after_attempt(llmRetryAttempt), wait=wait_exponential(multiplier=llmRetryTime, min=llmRetryTimeMin, max=llmRetryTimeMax), - reraise=True) + reraise=True, + ) async def run(self, - prompt: str, + prompt: Union[PromptEngine, str], validate: Union[List[str], None] = None, predict_tokens: int = 500, llm_param: ChatGptParam = None, stop_words: list = None, - anonymous_user: bool = True, + anonymous_user: bool = False, + rank_name: bool = False, **kwargs ) -> LlmReturn: """ @@ -208,6 +253,7 @@ async def run(self, :param llm_param: 参数表 :param anonymous_user: :param stop_words: + :param rank_name: :return: """ _request_arg = { @@ -215,44 +261,92 @@ async def run(self, "n": 1 } _request_arg: dict - if stop_words is None: - stop_words = [f"{self.profile.start_name}:", - f"{self.profile.restart_name}:", - f"{self.profile.start_name}:", - f"{self.profile.restart_name}:"] - # Kwargs + _llm_result_limit = self.get_token_limit() - predict_tokens + _llm_result_limit = _llm_result_limit if _llm_result_limit > 0 else 1 + _prompt_input, _prompt = prompt.build_prompt(predict_tokens=_llm_result_limit) + _prompt: List[Interaction] + + # Get + if not _prompt_input: + raise LlmException("Input Is Empty") + _prompt_input = _prompt_input.prompt + + # Temp + _message = [] + for item in _prompt: + item: Interaction + _message.extend(item.message) + # Prompt + _message_list = [ChatPrompt(role="system", content=prompt.description)] + for item in _message: + item: List[str] + # 对齐 _role + _role = self.__role_edit(item[0]) + _content = item[1] + if _content != prompt.description: + _message_list.append(ChatPrompt(role=_role, content=_content)) + print(_message_list) + # 补全参数 if llm_param: _request_arg.update(llm_param.invocation_params) if validate is None: validate = [] + + # 构造覆盖信息 + _request_arg.update(model=str(llm_param.model_name), + max_tokens=int(predict_tokens), + user=str(self.profile.get_conversation_hash()), + ) # Anonymous if anonymous_user: _request_arg.pop("user", None) - headers = {"User-Agent": "Mozilla/5.0", "Content-Type": "application/json"} - _message_arg = { - "message": prompt - } + if _request_arg.get("frequency_penalty") == 0: + _request_arg.pop("frequency_penalty", None) + if _request_arg.get("presence_penalty") == 0: + _request_arg.pop("presence_penalty", None) + # 校准字节参数 + if not _request_arg.get("logit_bias"): + _request_arg["logit_bias"] = {} + _request_arg.pop("logit_bias", None) + # 校准温度和惩罚参数 + if _request_arg.get("frequency_penalty"): + _frequency_penalty = _request_arg["frequency_penalty"] + _frequency_penalty = _frequency_penalty if -2.0 < _frequency_penalty else -1.9 + _frequency_penalty = _frequency_penalty if _frequency_penalty < 2.0 else 1.9 + _request_arg["frequency_penalty"] = _frequency_penalty + if _request_arg.get("presence_penalty"): + _presence_penalty = _request_arg["presence_penalty"] + _presence_penalty = _presence_penalty if -2.0 < _presence_penalty else -1.9 + _presence_penalty = _presence_penalty if _presence_penalty < 2.0 else 1.9 + _request_arg["presence_penalty"] = _presence_penalty + if _request_arg.get("temperature"): + _temperature = _request_arg["temperature"] + _request_arg["temperature"] = _temperature if 0 < _temperature < 1 else 0.9 + # 自维护 Api 库 - try: - response = await network.request( - method="POST", - url=llm_param.api + "/message", - data=_message_arg, - headers=headers, - json_body=True, - ) - _ = response.json() - except Exception as e: - raise ServiceUnavailableError(f"Server:{e}") - if response.status_code != 200: - raise ServiceUnavailableError(f"Server:{response.json().get('error')}") + response = await openai_api.ChatCompletion(api_key=self.__api_key, call_func=self.__call_func).create( + prompt=_message_list, + **_request_arg + ) + # Reply reply = self.parse_response(response) self.profile.update_usage(usage=self.parse_usage(response)) return LlmReturn(model_flag=llm_param.model_name, - raw=response.json(), - prompt=prompt, + raw=response, + prompt=_prompt_input, usage=self.profile.get_round_usage(), time=int(time.time()), - reply=reply, + reply=reply ) + + def __role_edit(self, _role) -> Literal["system", "user", "assistant"]: + if _role in ["system", "user", "assistant"]: + return _role + if Sim.cosion_similarity(pre=str(_role), aft=self.profile.start_name) > 0.85: + return "user" + if Sim.cosion_similarity(pre=str(_role), aft=self.profile.restart_name) > 0.85: + return "assistant" + if _role not in ["system", "user", "assistant"]: + return "user" + return "user" diff --git a/src/llm_kira/client/llms/openai.py b/src/llm_kira/client/llms/openai.py index ac33440..ecac374 100644 --- a/src/llm_kira/client/llms/openai.py +++ b/src/llm_kira/client/llms/openai.py @@ -8,6 +8,8 @@ import random import tiktoken from typing import Union, Optional, Callable, Any, Dict, Tuple, Mapping, List + +from loguru import logger # from loguru import logger from ...error import RateLimitError, ServiceUnavailableError @@ -15,8 +17,10 @@ from pydantic import BaseModel, Field from tenacity import retry_if_exception_type, retry, stop_after_attempt, wait_exponential from ..agent import Conversation +from ...creator.engine import PromptEngine from ..llms.base import LlmBase, LlmBaseParam -from ..types import LlmReturn +from ..types import LlmReturn, LlmException +from ...utils.chat import Sim from ...utils.data import DataUtils from ...utils.setting import llmRetryAttempt, llmRetryTime, llmRetryTimeMax, llmRetryTimeMin @@ -229,12 +233,13 @@ def model_context_size(model_name: str) -> int: wait=wait_exponential(multiplier=llmRetryTime, min=llmRetryTimeMin, max=llmRetryTimeMax), reraise=True) async def run(self, - prompt: str, + prompt: Union[PromptEngine, str], validate: Union[List[str], None] = None, predict_tokens: int = 500, llm_param: OpenAiParam = None, stop_words: list = None, - anonymous_user: bool = True, + anonymous_user: bool = False, + rank_name: bool = False, **kwargs ) -> LlmReturn: """ @@ -245,6 +250,7 @@ async def run(self, :param llm_param: 参数表 :param anonymous_user: :param stop_words: + :param rank_name: :return: """ _request_arg = { @@ -252,25 +258,48 @@ async def run(self, "n": 1 } _request_arg: dict + if not isinstance(prompt, str): + # Prompt + _prompt_list = [] + # Get + _llm_result_limit = self.get_token_limit() - predict_tokens + _llm_result_limit = _llm_result_limit if _llm_result_limit > 0 else 1 + _prompt_input, _prompt = prompt.build_prompt(predict_tokens=_llm_result_limit) + if not _prompt_input: + raise LlmException("Input Is Empty") + for item in _prompt: + _prompt_list.extend(item.content) + prompt_build = "\n".join(_prompt_list) + f"\n{self.profile.restart_name}:" + prompt_build = self.resize_sentence(prompt_build, token=self.get_token_limit()) + stop_words = None if not rank_name else self.__person(prompt=_prompt_input, prompt_list=_prompt) + else: + prompt_build = self.resize_sentence(prompt, token=predict_tokens) + + # 停止符号 if stop_words is None: stop_words = [f"{self.profile.start_name}:", f"{self.profile.restart_name}:", f"{self.profile.start_name}:", f"{self.profile.restart_name}:"] - # Kwargs + + # 补全参数 if llm_param: _request_arg.update(llm_param.invocation_params) if validate is None: validate = [] + + # 构造覆盖信息 _request_arg.update(model=str(llm_param.model_name), - prompt=str(prompt), + prompt=prompt_build, # IMPORTANT max_tokens=int(predict_tokens), user=str(self.profile.get_conversation_hash()), stop=stop_words[:4], ) + # Anonymous if anonymous_user: _request_arg.pop("user", None) + # Adjust Penalty """ if self.auto_penalty and validate: @@ -313,18 +342,6 @@ async def run(self, _temperature = _request_arg["temperature"] _request_arg["temperature"] = _temperature if 0 < _temperature < 1 else 0.9 - # 继承重写 - # openai_client.api_key = self.__api_key - # try: - # response = await openai_client.Completion.acreate(**_request_arg) - # except openai_client.error.OpenAIError as e: - # if self.__call_func: - # self.__call_func(e.json_body, self.__api_key) - # openai_error_handler(e.code, e.error) - # raise - # except Exception as e: - # raise LLMException(e) - # 自维护 Api 库 response = await openai_api.Completion(api_key=self.__api_key, call_func=self.__call_func).create( **_request_arg @@ -335,8 +352,29 @@ async def run(self, self.profile.update_usage(usage=self.parse_usage(response)) return LlmReturn(model_flag=llm_param.model_name, raw=response, - prompt=prompt, + prompt=prompt_build, usage=self.profile.get_round_usage(), time=int(time.time()), - reply=reply, + reply=reply ) + + @staticmethod + def __rank_name(prompt: str, users: List[str]): + __temp = {} + for item in users: + __temp[item] = 0 + users = list(__temp.keys()) + _ranked = list(sorted(users, key=lambda i: Sim.cosion_similarity(pre=str(prompt), aft=str(i)), reverse=True)) + return _ranked + + def __person(self, prompt, prompt_list: list): + _person_list = [f"{self.profile.start_name}:", + f"{self.profile.restart_name}:", + f"{self.profile.start_name}:", + f"{self.profile.restart_name}:", + ] + for item in prompt_list: + if item.ask.connect_words.strip() in [":", ":"]: + _person_list.append(f"{item.ask.start}{item.ask.connect_words}") + _person_list = self.__rank_name(prompt=prompt, users=_person_list) + return _person_list diff --git a/src/llm_kira/client/types.py b/src/llm_kira/client/types.py index 2c1d33d..df84880 100644 --- a/src/llm_kira/client/types.py +++ b/src/llm_kira/client/types.py @@ -55,6 +55,13 @@ def content(self): def raw(self): return "\n".join(self.content) + @property + def message(self): + if self.single: + return [[self.ask.start, self.ask.text]] + else: + return [[self.ask.start, self.ask.text], [self.reply.start, self.reply.text]] + class InteractionWeight(BaseModel): interaction: Interaction @@ -83,3 +90,7 @@ class ChatBotReturn(BaseModel): llm: LlmReturn ask: str reply: str + + +class LlmException(Exception): + pass diff --git a/src/llm_kira/creator/__init__.py b/src/llm_kira/creator/__init__.py index 309d3dd..4532cea 100644 --- a/src/llm_kira/creator/__init__.py +++ b/src/llm_kira/creator/__init__.py @@ -3,5 +3,5 @@ # @FileName: __init__.py # @Software: PyCharm # @Github :sudoskys -from .engine import PromptEngine, Preset -from .think import Hook, ThinkEngine, HookPool + +__all__ = ["base", "engine", "think"] diff --git a/src/llm_kira/creator/base.py b/src/llm_kira/creator/base.py new file mode 100644 index 0000000..ddb92cc --- /dev/null +++ b/src/llm_kira/creator/base.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- +# @Time : 3/2/23 4:13 PM +# @FileName: base.py +# @Software: PyCharm +# @Github :sudoskys +from abc import ABC, abstractmethod + +class BaseEngine(ABC): + + @abstractmethod + def build_prompt(self, predict_tokens: int = 500): + pass + + diff --git a/src/llm_kira/creator/engine.py b/src/llm_kira/creator/engine.py index 8763c94..d83c3c3 100644 --- a/src/llm_kira/creator/engine.py +++ b/src/llm_kira/creator/engine.py @@ -4,12 +4,12 @@ # @Software: PyCharm # @Github :sudoskys import time -from typing import List, Union, Tuple +from typing import List, Union, Tuple, Optional from loguru import logger -from llm_kira.client import Optimizer - +from .base import BaseEngine +from ..client import Optimizer from ..client.agent import Conversation, MemoryManager from ..client.llms.base import LlmBase from ..client.types import Interaction, PromptItem @@ -17,7 +17,7 @@ from ..utils.data import MsgFlow -class PromptEngine(object): +class PromptEngine(BaseEngine): """ 设计用于维护提示系统和接入外骨骼 """ @@ -95,10 +95,11 @@ def _build_prompt_buffer(self): _buffer = self.prompt_buffer.copy() if self.reverse_prompt_buffer: _buffer = list(reversed(_buffer)) + if not _buffer: + return None _index = _buffer.pop(-1) for item in _buffer: self.build_interaction(ask=item, single=True) - self.clean(clean_prompt=True) return _index def read_interaction(self): @@ -193,11 +194,14 @@ def build_context(self, prompt: PromptItem, predict_tokens) -> List[Interaction] _optimized_prompt.append(Interaction(single=True, ask=prompt)) return _optimized_prompt - def build_prompt(self, predict_tokens: int = 500) -> Tuple[PromptItem, List[Interaction]]: + def build_prompt(self, predict_tokens: int = 500) -> Tuple[Optional[PromptItem], List[Interaction]]: """ Optimising context and re-cutting """ user_input = self._build_prompt_buffer() + if not user_input: + logger.warning("No Buffer") + return None, [] prompt = self.build_context(user_input, predict_tokens=predict_tokens) return user_input, prompt @@ -285,3 +289,9 @@ def head(self, head = f"{start_name}{restart_name}の会話," head = self.add_tail(prompt_iscode, sentence=head, tail="プログラミング指導を提供する,") return f"{head}" + + +class MiddlePrompt(object): + def __init__(self, prompt: PromptEngine = None, limit_token: int = 2000): + self.prompt = prompt + self.limit_token: int = limit_token diff --git a/src/llm_kira/radio/__init__.py b/src/llm_kira/radio/__init__.py index 725f158..6b4b176 100644 --- a/src/llm_kira/radio/__init__.py +++ b/src/llm_kira/radio/__init__.py @@ -4,4 +4,4 @@ # @Software: PyCharm # @Github :sudoskys -__all__ = ["anchor", "crawer", "decomposer", "setting"] +# __all__ = ["anchor", "crawer", "decomposer", "setting"] diff --git a/src/llm_kira/tool/openai/__init__.py b/src/llm_kira/tool/openai/__init__.py index 3311bbc..13db58c 100644 --- a/src/llm_kira/tool/openai/__init__.py +++ b/src/llm_kira/tool/openai/__init__.py @@ -5,3 +5,4 @@ # @Github :sudoskys from .resouce import Completion from .resouce import Moderations +from .resouce import ChatCompletion, ChatPrompt diff --git a/src/llm_kira/tool/openai/api/api_url.json b/src/llm_kira/tool/openai/api/api_url.json index 169344d..77a6aee 100644 --- a/src/llm_kira/tool/openai/api/api_url.json +++ b/src/llm_kira/tool/openai/api/api_url.json @@ -12,6 +12,86 @@ } } }, + "chat": { + "completions": { + "desc": "write", + "url": "https://api.openai.com/v1/chat/completions", + "params": { + "model": { + "type": "string", + "intro": "ID of the model to use.", + "must": true, + "Defaults": "" + }, + "messages": { + "type": "string,array", + "intro": "The prompt(s) to generate completions for, encoded as a string, array of strings, array of tokens, or array of token arrays.", + "must": true, + "Defaults": "" + }, + "temperature": { + "type": "number", + "intro": "What sampling temperature to use. ", + "must": false, + "Defaults": 1 + }, + "top_p": { + "type": "number", + "intro": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.", + "must": false, + "Defaults": 1 + }, + "n": { + "type": "integer", + "intro": "How many completions to generate for each prompt.", + "must": false, + "Defaults": 1 + }, + "stream": { + "type": "string", + "intro": "Whether to stream back partial progress.", + "must": false, + "Defaults": false + }, + "max_tokens": { + "type": "integer", + "intro": "The maximum number of tokens to generate in the completion.", + "must": false, + "Defaults": 16 + }, + "logprobs": { + "type": "boolean", + "intro": "Include the log probabilities on the logprobs most likely tokens, as well the chosen tokens. ", + "must": false, + "Defaults": false + }, + "presence_penalty": { + "type": "number", + "intro": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", + "must": false, + "Defaults": 0 + }, + "frequency_penalty": { + "type": "number", + "intro": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", + "must": false, + "Defaults": 0 + }, + "logit_bias": { + "type": "map", + "intro": "Modify the likelihood of specified tokens appearing in the completion.", + "must": false, + "Defaults": {} + }, + "user": { + "type": "string", + "intro": "A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.", + "must": false, + "Defaults": "" + } + } + } + }, "completions": { "desc": "write", "url": "https://api.openai.com/v1/completions", diff --git a/src/llm_kira/tool/openai/resouce/__init__.py b/src/llm_kira/tool/openai/resouce/__init__.py index 5d0a70a..32426a8 100644 --- a/src/llm_kira/tool/openai/resouce/__init__.py +++ b/src/llm_kira/tool/openai/resouce/__init__.py @@ -3,7 +3,7 @@ # @FileName: __init__.py # @Software: PyCharm # @Github :sudoskys - +from .chat import ChatCompletion, ChatPrompt from .completion import Completion from .moderations import Moderations diff --git a/src/llm_kira/tool/openai/resouce/chat.py b/src/llm_kira/tool/openai/resouce/chat.py new file mode 100644 index 0000000..dec864a --- /dev/null +++ b/src/llm_kira/tool/openai/resouce/chat.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- +# @Time : 3/2/23 4:44 PM +# @FileName: chat.py +# @Software: PyCharm +# @Github :sudoskys +import random +from typing import Union, Optional, Literal, List + +from pydantic import BaseModel + +from ...openai.api.api_utils import load_api +from ...openai.api.network import request +from ....utils import setting + +API = load_api() + + +class ChatPrompt(BaseModel): + role: Literal["system", "user", "assistant"] = "user" + content: str = "" + + +class ChatCompletion(object): + def __init__(self, api_key: Union[str, list] = None, proxy_url: str = "", call_func=None): + # if api_key is None: + # api_key = setting.openaiApiKey + if isinstance(api_key, list): + api_key: list + if not api_key: + raise RuntimeError("Use Out") + random.shuffle(api_key) + api_key = random.choice(api_key) + api_key: str + if not api_key: + raise RuntimeError("NO KEY") + self.__api_key = api_key + if not proxy_url: + proxy_url = setting.proxyUrl + self.__proxy = proxy_url + self.__call_func = call_func + + def get_api_key(self): + return self.__api_key + + async def create(self, + model: str = "gpt-3.5-turbo", + prompt: Union[List[ChatPrompt], dict] = None, + temperature: float = 0, + max_tokens: int = 7, + **kwargs + ): + """ + 得到一个对话,预设了一些参数,其实还有很多参数,如果你有api文档 + :param model: 模型 + :param prompt: 提示 + :param temperature: unknown + :param max_tokens: 返回数量 + :return: + """ + """ + curl https://api.openai.com/v1/chat/completions \ + -H 'Content-Type: application/json' \ + -H 'Authorization: Bearer YOUR_API_KEY' \ + -d '{ + "model": "gpt-3.5-turbo", + "messages": [{"role": "user", "content": "Hello!"}] + }' + """ + api = API["v1"]["chat"]["completions"] + if not isinstance(prompt, dict): + prompt = [item.dict() for item in prompt] + # 参数决定 + params = { + "model": model, + "messages": prompt, + "temperature": temperature, + "max_tokens": max_tokens + } + _api_config = { + param: api["params"][param]["Defaults"] + for param in api["params"].keys() + if (param in kwargs) or (param in params) + } + _api_config.update(params) + _api_config.update(kwargs) + _api_config = {key: item + for key, item in _api_config.items() + if key in api["params"].keys() + } + # 返回请求 + return await request( + method="POST", + url=api["url"], + data=_api_config, + auth=self.__api_key, + proxy=self.__proxy, + json_body=True, + call_func=self.__call_func + ) diff --git a/src/llm_kira/tool/openai/resouce/completion.py b/src/llm_kira/tool/openai/resouce/completion.py index 80dfaf8..7b6795e 100644 --- a/src/llm_kira/tool/openai/resouce/completion.py +++ b/src/llm_kira/tool/openai/resouce/completion.py @@ -7,7 +7,7 @@ from typing import Union from ...openai.api.api_utils import load_api from ...openai.api.network import request -from llm_kira.utils import setting +from ....utils import setting API = load_api() diff --git a/src/llm_kira/tool/openai/resouce/moderations.py b/src/llm_kira/tool/openai/resouce/moderations.py index 1025dd3..d7cafb0 100644 --- a/src/llm_kira/tool/openai/resouce/moderations.py +++ b/src/llm_kira/tool/openai/resouce/moderations.py @@ -8,7 +8,7 @@ from ..api.api_utils import load_api from ..api.network import request -from llm_kira.utils import setting +from ....utils import setting API = load_api()