Skip to content

Commit

Permalink
Apply isort and black reformatting
Browse files Browse the repository at this point in the history
Signed-off-by: akoumpa <[email protected]>
  • Loading branch information
akoumpa committed Jan 6, 2025
1 parent da653c0 commit b9af29f
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 2 deletions.
1 change: 1 addition & 0 deletions nemo/collections/nlp/modules/common/tokenizer_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ def get_nmt_tokenizer(
"""
import omegaconf
from omegaconf import OmegaConf

if isinstance(special_tokens, omegaconf.listconfig.ListConfig):
special_tokens = OmegaConf.to_container(special_tokens)
if special_tokens is None:
Expand Down
13 changes: 11 additions & 2 deletions tests/collections/nlp/test_tokenizer_with_special_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,17 @@

TOKENIZER_SPM_FILE = '/home/TestData/nlp/tokenizer_with_special_tokens/tokenizer.model'


def test_spm_with_special_tokens() -> None:
special_tokens = ['[INST]', '[/INST]', '[TOOL_CALLS]', '[AVAILABLE_TOOLS]', '[/AVAILABLE_TOOLS]', '[TOOL_RESULTS]', '[/TOOL_RESULTS]']
special_tokens = [
'[INST]',
'[/INST]',
'[TOOL_CALLS]',
'[AVAILABLE_TOOLS]',
'[/AVAILABLE_TOOLS]',
'[TOOL_RESULTS]',
'[/TOOL_RESULTS]',
]
tokenizer_cfg = {
"library": "sentencepiece",
"type": None,
Expand All @@ -48,4 +57,4 @@ def test_spm_with_special_tokens() -> None:

assert tokenizer.text_to_ids('[INST]') == [3]
for special_token in special_tokens:
assert special_token in tokenizer.special_token_to_id, f'Expected {special_token} to be a special token'
assert special_token in tokenizer.special_token_to_id, f'Expected {special_token} to be a special token'

0 comments on commit b9af29f

Please sign in to comment.