Skip to content

Commit

Permalink
misc: Remove unnecessary metadata lookups (#1448)
Browse files Browse the repository at this point in the history
Special tokens are already mapped from metadata by llama.cpp
  • Loading branch information
CISC authored May 14, 2024
1 parent 4b54f79 commit 389e09c
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions llama_cpp/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,8 +410,8 @@ def __init__(
if self.verbose:
print(f"Model metadata: {self.metadata}", file=sys.stderr)

eos_token_id = int(self.metadata.get("tokenizer.ggml.eos_token_id", self.token_eos()))
bos_token_id = int(self.metadata.get("tokenizer.ggml.bos_token_id", self.token_bos()))
eos_token_id = self.token_eos()
bos_token_id = self.token_bos()

eos_token = self._model.token_get_text(eos_token_id)
bos_token = self._model.token_get_text(bos_token_id)
Expand Down Expand Up @@ -961,9 +961,9 @@ def _create_completion(

completion_id: str = f"cmpl-{str(uuid.uuid4())}"
created: int = int(time.time())
prefix_token_id: int = int(self.metadata.get("tokenizer.ggml.prefix_token_id", self._model.token_prefix()))
middle_token_id: int = int(self.metadata.get("tokenizer.ggml.middle_token_id", self._model.token_middle()))
suffix_token_id: int = int(self.metadata.get("tokenizer.ggml.suffix_token_id", self._model.token_suffix()))
prefix_token_id: int = self._model.token_prefix()
middle_token_id: int = self._model.token_middle()
suffix_token_id: int = self._model.token_suffix()
# If prompt is empty, initialize completion with BOS token to avoid
# detokenization including a space at the beginning of the completion
completion_tokens: List[int] = [] if len(prompt) > 0 else [self.token_bos()]
Expand Down

0 comments on commit 389e09c

Please sign in to comment.