From 34a1c7b6872a4c4842548d7fefc7a1adffe07a17 Mon Sep 17 00:00:00 2001 From: yvonwin Date: Wed, 17 Apr 2024 17:19:20 +0800 Subject: [PATCH] Add CodeQwenConverter class for converting CodeQwen1.5 models --- qwen_cpp/convert.py | 74 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 66 insertions(+), 8 deletions(-) diff --git a/qwen_cpp/convert.py b/qwen_cpp/convert.py index b773b2f..18e9f04 100644 --- a/qwen_cpp/convert.py +++ b/qwen_cpp/convert.py @@ -41,6 +41,7 @@ class ModelType(Enum): QWEN1 = 1 QWEN2 = 2 QWEN2MOE = 3 + CODEQWEN = 4 # not use now def quantize_q8_0(tensor: torch.Tensor) -> torch.CharTensor: @@ -345,18 +346,70 @@ def dump_model(f, model, ggml_type): print(len(weight_names)) # 4659 dump_state_dict(f, weight_names, model.state_dict(), ggml_type) +class CodeQwenConverter: + # This model has the same architecture as the base model 'qwen2'. However, the tokenizer has been changed from 'tiketoken' to 'sentencepiece'. + MODEL_TYPE = ModelType.QWEN2 + @classmethod + def convert(cls, f, model, tokenizer, ggml_type): + f.write(b"ggml") # magic + f.write(struct.pack("ii", cls.MODEL_TYPE.value, 1)) # model type & version + cls.dump_config(f, model.config, model.generation_config, tokenizer, ggml_type) # generation_config is not use now. + cls.dump_model(f, model, ggml_type) + + @staticmethod + def dump_config(f, config, generation_config, tokenizer, ggml_type): + config_values = [ + ggml_type.value, + config.vocab_size, + config.hidden_size, + config.num_attention_heads, + config.num_key_value_heads, + config.num_hidden_layers, + config.intermediate_size, + config.seq_length, + config.bos_token_id if config.bos_token_id is not None else -1, + config.eos_token_id if config.eos_token_id is not None else -1, + config.pad_token_id if config.pad_token_id is not None else -1, + config.sep_token_id if config.sep_token_id is not None else -1, + ] + f.write(struct.pack("i" * len(config_values), *config_values)) + + @staticmethod + def dump_model(f, model, ggml_type): + weight_names = ["model.embed_tokens.weight"] + for i in range(model.config.num_hidden_layers): + weight_names += [ + f"model.layers.{i}.input_layernorm.weight", + f"model.layers.{i}.self_attn.q_proj.weight", + f"model.layers.{i}.self_attn.q_proj.bias", + f"model.layers.{i}.self_attn.k_proj.weight", + f"model.layers.{i}.self_attn.k_proj.bias", + f"model.layers.{i}.self_attn.v_proj.weight", + f"model.layers.{i}.self_attn.v_proj.bias", + f"model.layers.{i}.self_attn.o_proj.weight", + f"model.layers.{i}.post_attention_layernorm.weight", + f"model.layers.{i}.mlp.gate_proj.weight", + f"model.layers.{i}.mlp.up_proj.weight", + f"model.layers.{i}.mlp.down_proj.weight", + ] + + weight_names += [ + "model.norm.weight", + "lm_head.weight", + ] + print(len(weight_names)) + dump_state_dict(f, weight_names, model.state_dict(), ggml_type) + def convert(f: BinaryIO, model_name_or_path: str, dtype: str = "q4_0"): ggml_type = GGMLType[dtype.upper()] - # tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True) - # model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) model = AutoModelForCausalLM.from_pretrained(model_name_or_path) # print(model) - state_dict = model.state_dict() - keys = state_dict.keys() - print(keys) + # state_dict = model.state_dict() + # keys = state_dict.keys() + # print(keys) # print(state_dict) # print(len(state_dict)) @@ -366,13 +419,18 @@ def convert(f: BinaryIO, model_name_or_path: str, dtype: str = "q4_0"): # print(list(tokenizer.added_tokens_decoder.keys())) # 1.5 only if model.config.architectures[0]=="Qwen2ForCausalLM": - print('Convert Qwen1.5') - Qwen2Converter.convert(f, model, tokenizer, ggml_type) + if "Code" not in model_name_or_path: + print('Convert Qwen1.5') + Qwen2Converter.convert(f, model, tokenizer, ggml_type) + else: + print("Convert CodeQwen1.5") + CodeQwenConverter.convert(f, model, tokenizer, ggml_type) + elif model.config.architectures[0]=="Qwen2MoeForCausalLM": print('Convert Qwen1.5-Moe') Qwen2MOEConverter.convert(f, model, tokenizer, ggml_type) else: - print('Warning: Qwen1 ist not supported now') + print('Warning: Qwen1 is not supported now') # QwenConverter.convert(f, model, tokenizer, ggml_type)