Skip to content
This repository has been archived by the owner on Oct 14, 2023. It is now read-only.

Commit

Permalink
Merge pull request #1 from sudoskys/fix_
Browse files Browse the repository at this point in the history
0.2.0 修复依赖库,修复 header
  • Loading branch information
sudoskys authored Jan 27, 2023
2 parents 7027323 + cb39661 commit 4a86efc
Show file tree
Hide file tree
Showing 9 changed files with 376 additions and 226 deletions.
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "llm_kira"
version = "0.1.9"
version = "0.2.0"
description = "chatbot client for llm"
authors = ["sudoskys <[email protected]>"]
maintainers = [
Expand All @@ -26,7 +26,7 @@ openai-async = "^0.0.2"
numpy = "^1.24.1"
tiktoken = "^0.1.2"
scikit-learn = "^1.2.0"
pycurl = "^7.43.0.6"
# pycurl = "^7.43.0.6"


[build-system]
Expand Down
114 changes: 53 additions & 61 deletions src/llm_kira/client/Optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class MatrixPoint(Point):
def __init__(self,
tokenizer,
prompt: str = "",
memory: List[Memory_Flow] = None,
memory: List[dict] = None,
attention: int = 3,
# start_token: int = 0,
extra_token: int = 0,
Expand Down Expand Up @@ -91,66 +91,67 @@ def run(self) -> list:
if self.memory is None:
return []
_create_token = self.token_limit - self.extra_token

# 入口检查
if len(memory) - attention < 0:
return convert_msgflow_to_list(memory)

# 转换为
for item in range(len(memory)):
if isinstance(memory[item], Memory_Flow):
memory[item] = memory[item].dict()
memory: list

def forgetting_curve(x):
_weight = numpy.exp(-x / 5) * 100 + 10
# 低谷值
_weight = _weight if _weight > 0 else 0
# 高度线
_weight = _weight if _weight < 100 else 100
# 推底值,防止无法唤起
_weight = _weight if _weight > 15 else 5
return _weight

# 转换为
for item in range(len(memory)):
memory[item] = memory[item].dict()
memory: list

# 计算初始保留比并初始化
memory = list(reversed(memory))
for i in range(0, len(memory)):
_forget = forgetting_curve(i)
if _forget > 10:
if _forget > 5:
memory[i]["content"]["weight"] = [_forget]
else:
memory[i]["content"]["weight"] = []
memory = list(reversed(memory))

# 筛选标准发言
_index = []
for i in range(0, len(memory) - attention):
ask, reply = MsgFlow.get_content(memory[i], sign=False)
if len(ask) < 1 or len(reply) < 1:
memory[i]["content"]["weight"].append(-1000)

# 相似度检索
for i in range(0, len(memory)):
ask, reply = MsgFlow.get_content(memory[i], sign=False)
_diff1 = Utils.cosion_sismilarity(pre=prompt, aft=f"{ask}{reply}")
_diff = _diff1
score = _diff * 100
score = score if score < 95 else 1
_ask_diff = Utils.cosion_sismilarity(pre=prompt, aft=ask)
_ask_diff = _ask_diff * 100
score = _ask_diff if _ask_diff < 90 else 0
if score != 0:
memory[i]["content"]["weight"].append(score)

# 主题检索
_key = Utils.tfidf_keywords(prompt, topK=5)
# print(_key)
for i in range(0, len(memory)):
score = 0
full_score = len(_key) if len(_key) != 0 else 1
ask, reply = MsgFlow.get_content(memory[i], sign=False)
for ir in _key:
if ir in f"{ask}{reply}":
score += 1
_get = (score / full_score) * 100
if _get:
memory[i]["content"]["weight"].append(_get) # 基准数据,置信为 0.5 百分比

# 预处理
for i in range(0, len(memory) - attention):
ask, reply = MsgFlow.get_content(memory[i], sign=False)
if self.tokenizer(f"{ask}{reply}") > 240:
if Detect.get_text_language(sentence=f"{ask}{reply}") == "CN":
_sum = Utils.tfidf_summarization(sentence=f"{ask}{reply}", ratio=0.5)
if len(_sum) > 7:
memory[i]["content"]["ask"] = "info"
memory[i]["content"]["reply"] = _sum
full_score = len(_key)
if full_score > 5:
for i in range(0, len(memory)):
score = 0
ask, reply = MsgFlow.get_content(memory[i], sign=False)
for ir in _key:
if ir in f"{ask}{reply}":
score += 1
_get = (score / full_score) * 100
_get = _get if _get < 95 else 50
if _get != 0:
memory[i]["content"]["weight"].append(_get) # 基准数据,置信为 0.5 百分比

# 进行筛选,计算限制
_msg_flow = []
Expand All @@ -163,7 +164,8 @@ def forgetting_curve(x):
score = sum(memory[i]["content"]["weight"])
level = (score / full_score) * 100
ask, reply = MsgFlow.get_content(memory[i], sign=True)
if level > 30:
# print(ask, reply, level)
if level > 50:
_now_token += self.tokenizer(f"{ask}{reply}")
if _now_token > _create_token:
break
Expand All @@ -179,7 +181,7 @@ class SinglePoint(Point):
def __init__(self,
tokenizer,
prompt: str = "",
memory: List[Memory_Flow] = None,
memory: List[dict] = None,
attention: int = 3,
# start_token: int = 0,
extra_token: int = 0,
Expand All @@ -205,7 +207,6 @@ def __init__(self,
self.tokenizer = tokenizer

def run(self) -> list:

# 单条消息的内容 {"ask": self._restart_sequence+prompt, "reply": self._start_sequence+REPLY[0]}
memory = self.memory
attention = self.attention
Expand All @@ -214,6 +215,7 @@ def run(self) -> list:
if self.memory is None:
memory = []
_create_token = self.token_limit - self.extra_token

# 入口检查
if len(memory) - attention < 0:
return convert_msgflow_to_list(memory)
Expand Down Expand Up @@ -254,38 +256,27 @@ def forgetting_curve(x):
# 相似度检索
for i in range(0, len(memory)):
ask, reply = MsgFlow.get_content(memory[i], sign=False)
_diff1 = Utils.cosion_sismilarity(pre=prompt, aft=ask)
_diff2 = Utils.cosion_sismilarity(pre=prompt, aft=reply)
_diff = _diff1 if _diff1 > _diff2 else _diff2
score = _diff * 100
score = score if score < 90 else 1
_ask_diff = Utils.cosion_sismilarity(pre=prompt, aft=ask)
_ask_diff = _ask_diff * 100
score = _ask_diff if _ask_diff < 90 else 0
if score != 0:
memory[i]["content"]["weight"].append(score)

# 主题检索
_key = Utils.tfidf_keywords(prompt, topK=5)
# print(_key)
for i in range(0, len(memory)):
score = 0
full_score = len(_key) if len(_key) != 0 else 1
ask, reply = MsgFlow.get_content(memory[i], sign=False)
for ir in _key:
if ir in f"{ask}{reply}":
score += 1
_get = (score / full_score) * 100
_get = _get if _get < 95 else 50
if _get != 0:
memory[i]["content"]["weight"].append(_get) # 基准数据,置信为 0.5 百分比
full_score = len(_key)
if full_score > 5:
for i in range(0, len(memory)):
score = 0
ask, reply = MsgFlow.get_content(memory[i], sign=False)
for ir in _key:
if ir in f"{ask}{reply}":
score += 1
_get = (score / full_score) * 100
_get = _get if _get < 95 else 50
if _get != 0:
memory[i]["content"]["weight"].append(_get) # 基准数据,置信为 0.5 百分比

# 预处理
for i in range(0, len(memory) - attention):
ask, reply = MsgFlow.get_content(memory[i], sign=False)
if self.tokenizer(f"{ask}{reply}") > 240:
if Detect.get_text_language(sentence=f"{ask}{reply}") == "CN":
_sum = Utils.tfidf_summarization(sentence=f"{ask}{reply}", ratio=0.5)
if len(_sum) > 7:
memory[i]["content"]["ask"] = "info"
memory[i]["content"]["reply"] = _sum
# 进行筛选,计算限制
_msg_flow = []
_msg_return = []
Expand All @@ -297,6 +288,7 @@ def forgetting_curve(x):
score = sum(memory[i]["content"]["weight"])
level = (score / full_score) * 100
ask, reply = MsgFlow.get_content(memory[i], sign=True)
# print(ask, reply, level)
if level > 50:
_now_token += self.tokenizer(f"{ask}{reply}")
if _now_token > _create_token:
Expand Down
Loading

0 comments on commit 4a86efc

Please sign in to comment.