Skip to content

Commit

Permalink
Update convert_mistral_weights_to_hf.py
Browse files Browse the repository at this point in the history
  • Loading branch information
Cyrilvallez committed Jan 29, 2025
1 parent 1eadcd0 commit 593cca3
Showing 1 changed file with 3 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from tokenizers import Regex, Tokenizer, decoders, pre_tokenizers, processors
from tokenizers.models import BPE

from transformers import LlamaTokenizer, MistralConfig, MistralForCausalLM, PreTrainedTokenizerFast, AutoTokenizer
from transformers import AutoTokenizer, LlamaTokenizer, MistralConfig, MistralForCausalLM, PreTrainedTokenizerFast
from transformers.convert_slow_tokenizer import bytes_to_unicode


Expand Down Expand Up @@ -302,7 +302,7 @@ def converted(self) -> Tokenizer:

return tokenizer


def convert_and_write_tokenizer(input_dir: str, output_dir: str):
"""Convert the tokenizer and save it."""
# Tekken format -- need to use the Converter
Expand All @@ -329,7 +329,7 @@ def convert_and_write_tokenizer(input_dir: str, output_dir: str):
unk_token="<unk>",
eos_token="</s>",
)

# Post-process
tokenizer.add_special_tokens({"additional_special_tokens": all_special})
tokenizer.model_input_names = ["input_ids", "attention_mask"]
Expand Down

0 comments on commit 593cca3

Please sign in to comment.