Skip to content

Commit

Permalink
Enhance Qwen2 Model Finetune Script and Make Qwen2Tokenzier callable (#…
Browse files Browse the repository at this point in the history
…273)

Co-authored-by: 同润 <[email protected]>
  • Loading branch information
jerryli1981 and 同润 authored Jun 20, 2024
1 parent e31375f commit b9a9e7b
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 2 deletions.
2 changes: 1 addition & 1 deletion examples/deepseek_v2/run_finetune_deepseek.sh
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ megatron_options=" \
--no-load-rng \
--num-workers 8 \
--extra-vocab-size ${EXTRA_VOCAB_SIZE} \
--patch-tokenizer-type LLamaTokenizer \
--patch-tokenizer-type DeepSeekV2Tokenizer \
--dataset LLama-Pretrain-Raw \
--swiglu \
--normalization RMSNorm \
Expand Down
28 changes: 27 additions & 1 deletion examples/qwen2/run_finetune_qwen.sh
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,32 @@ EXTRA_VOCAB_SIZE=421
moe_options=" \
"

elif [ $MODEL_SIZE = A14B ]; then

HIDDEN_SIZE=3584
INTERMEDIATE_SIZE=18944
MAX_POSITION_EMBEDDINGS=131072
MAX_WINDOW_LAYERS=28
MOE_INTERMEDIATE_SIZE=2560
NUM_ATTENTION_HEADS=28
NUM_EXPERTS=64
NUM_EXPERTS_PER_TOPK=8
NUM_HIDDEN_LAYERS=28
NUM_KEY_VALUE_HEADS=4
RMS_NORM_EPS=1e-6
ROPE_THETA=1000000
SHARED_EXPERT_INTERMEDIATE_SIZE=20480
SLIDING_WINDOW=131072
EXTRA_VOCAB_SIZE=293

moe_options=" \
--moe-router-topk ${NUM_EXPERTS_PER_TOPK} \
--num-experts ${NUM_EXPERTS} \
--expert-model-parallel-size ${EP}\
--moe-ffn-hidden-size ${MOE_INTERMEDIATE_SIZE} \
--shared-moe-ffn-hidden-size ${SHARED_EXPERT_INTERMEDIATE_SIZE} \
--enable-shared-expert"

fi

if [ $AC = full ]; then
Expand Down Expand Up @@ -246,7 +272,7 @@ megatron_options=" \
--no-load-rng \
--num-workers 8 \
--extra-vocab-size ${EXTRA_VOCAB_SIZE} \
--patch-tokenizer-type LLamaTokenizer \
--patch-tokenizer-type Qwen2Tokenizer \
--dataset LLama-Pretrain-Raw \
--swiglu \
--normalization RMSNorm \
Expand Down
30 changes: 30 additions & 0 deletions megatron_patch/tokenizer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,12 @@ def __init__(self, tokenizer_path, extra_vocab_size):
self.extra_vocab_size = extra_vocab_size
self.tokenizer.add_special_tokens(special_tokens_dict=dict(pad_token="<|extra_0|>"))

def __call__(self, text, return_tensors=None,
padding=None, max_length=None, truncation=None, add_special_tokens=None):

return self.tokenizer(text, return_tensors=return_tensors, padding=padding,
max_length=max_length, truncation=truncation, add_special_tokens=add_special_tokens)

@property
def vocab_size(self):
return len(self.tokenizer.encoder) + self.extra_vocab_size
Expand Down Expand Up @@ -211,6 +217,10 @@ def eos_token(self):
def pad_token_id(self):
return self.tokenizer.pad_token_id

@property
def eos_token_id(self):
return self.tokenizer.eos_token_id

tokenizer = _Qwen2Tokenizer(args.load, args.extra_vocab_size)
args.padded_vocab_size = tokenizer.vocab_size

Expand All @@ -226,6 +236,12 @@ def __init__(self, tokenizer_path, extra_vocab_size):
)
self.extra_vocab_size = extra_vocab_size

def __call__(self, text, return_tensors=None,
padding=None, max_length=None, truncation=None, add_special_tokens=None):

return self.tokenizer(text, return_tensors=return_tensors, padding=padding,
max_length=max_length, truncation=truncation, add_special_tokens=add_special_tokens)

@property
def vocab_size(self):
return len(self.tokenizer) + self.extra_vocab_size - 2
Expand Down Expand Up @@ -256,6 +272,10 @@ def eos_token(self):
def pad_token_id(self):
return self.tokenizer.pad_token_id

@property
def eos_token_id(self):
return self.tokenizer.eos_token_id

tokenizer = _DeepSeekV2Tokenizer(args.load, args.extra_vocab_size)
args.padded_vocab_size = tokenizer.vocab_size

Expand Down Expand Up @@ -332,6 +352,12 @@ def __init__(self, tokenizer_path, extra_vocab_size):
)
self.extra_vocab_size = extra_vocab_size

def __call__(self, text, return_tensors=None,
padding=None, max_length=None, truncation=None, add_special_tokens=None):

return self.tokenizer(text, return_tensors=return_tensors, padding=padding,
max_length=max_length, truncation=truncation, add_special_tokens=add_special_tokens)

@property
def vocab_size(self):
return self.tokenizer.vocab_size + self.extra_vocab_size
Expand Down Expand Up @@ -362,6 +388,10 @@ def eos_token(self):
def pad_token_id(self):
return self.tokenizer.pad_token_id

@property
def eos_token_id(self):
return self.tokenizer.eos_token_id

tokenizer = _LLama3Tokenizer(args.load, args.extra_vocab_size)
args.padded_vocab_size = tokenizer.vocab_size

Expand Down

0 comments on commit b9a9e7b

Please sign in to comment.