From cc2f0ee5d3bac0195e07dec058382b6f8dd4c67d Mon Sep 17 00:00:00 2001 From: Yuan0320 Date: Mon, 25 Nov 2024 12:18:04 -0800 Subject: [PATCH 01/17] SQFT Finetuning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: J. Pablo Muñoz Signed-off-by: J. Pablo Muñoz --- comps/finetuning/README.md | 43 +- comps/finetuning/finetune_config.py | 29 +- comps/finetuning/finetuning_service.py | 13 +- comps/finetuning/handlers.py | 100 ++- .../llm_on_ray/finetune/finetune.py | 63 +- .../utils/create_sqft_nncf_config.py | 142 ++++ comps/finetuning/utils/extract_sub_adapter.py | 97 +++ comps/finetuning/utils/merge_adapter.py | 16 + comps/finetuning_sqft/Dockerfile | 50 ++ comps/finetuning_sqft/README.md | 240 +++++++ .../example_nncf_config/nncf_config.json | 630 ++++++++++++++++++ comps/finetuning_sqft/finetune_runner.py | 38 ++ comps/finetuning_sqft/finetune_sqft_config.py | 215 ++++++ .../finetuning_sqft_service.py | 76 +++ comps/finetuning_sqft/handlers.py | 338 ++++++++++ comps/finetuning_sqft/launch.sh | 12 + .../llm_on_ray/common/__init__.py | 6 + .../llm_on_ray/common/common.py | 29 + .../llm_on_ray/common/torch_config.py | 72 ++ .../llm_on_ray/finetune/__init__.py | 4 + .../llm_on_ray/finetune/data_process.py | 352 ++++++++++ .../llm_on_ray/finetune/finetune.py | 602 +++++++++++++++++ .../llm_on_ray/finetune/modeling.py | 211 ++++++ .../patches/nncf-v2.12.0.patch | 72 ++ .../patches/peft-v0.10.0.patch | 220 ++++++ .../patches/transformers-v4.44.2.patch | 171 +++++ comps/finetuning_sqft/requirements.txt | 17 + .../utils/extract_sub_adapter.py | 101 +++ comps/finetuning_sqft/utils/merge.py | 27 + .../utils/nncf_config_process.py | 156 +++++ 30 files changed, 4116 insertions(+), 26 deletions(-) create mode 100644 comps/finetuning/utils/create_sqft_nncf_config.py create mode 100644 comps/finetuning/utils/extract_sub_adapter.py create mode 100644 comps/finetuning/utils/merge_adapter.py create mode 100644 comps/finetuning_sqft/Dockerfile create mode 100644 comps/finetuning_sqft/README.md create mode 100644 comps/finetuning_sqft/example_nncf_config/nncf_config.json create mode 100644 comps/finetuning_sqft/finetune_runner.py create mode 100644 comps/finetuning_sqft/finetune_sqft_config.py create mode 100644 comps/finetuning_sqft/finetuning_sqft_service.py create mode 100644 comps/finetuning_sqft/handlers.py create mode 100644 comps/finetuning_sqft/launch.sh create mode 100644 comps/finetuning_sqft/llm_on_ray/common/__init__.py create mode 100644 comps/finetuning_sqft/llm_on_ray/common/common.py create mode 100644 comps/finetuning_sqft/llm_on_ray/common/torch_config.py create mode 100644 comps/finetuning_sqft/llm_on_ray/finetune/__init__.py create mode 100644 comps/finetuning_sqft/llm_on_ray/finetune/data_process.py create mode 100644 comps/finetuning_sqft/llm_on_ray/finetune/finetune.py create mode 100644 comps/finetuning_sqft/llm_on_ray/finetune/modeling.py create mode 100644 comps/finetuning_sqft/patches/nncf-v2.12.0.patch create mode 100644 comps/finetuning_sqft/patches/peft-v0.10.0.patch create mode 100644 comps/finetuning_sqft/patches/transformers-v4.44.2.patch create mode 100644 comps/finetuning_sqft/requirements.txt create mode 100644 comps/finetuning_sqft/utils/extract_sub_adapter.py create mode 100644 comps/finetuning_sqft/utils/merge.py create mode 100644 comps/finetuning_sqft/utils/nncf_config_process.py diff --git a/comps/finetuning/README.md b/comps/finetuning/README.md index 6f554ca221..d2e26582f4 100644 --- a/comps/finetuning/README.md +++ b/comps/finetuning/README.md @@ -114,7 +114,42 @@ curl http://${your_ip}:8015/v1/fine_tuning/jobs \ }' ``` -#### 3.2.2 Reranking Model Training +#### 3.2.2 Instruction Tuning with SQFT's Neural Low-Rank Adapter Search (NLS) + +In addition to traditional fine-tuning, you can use SQFT's NLS to fine-tune your model. +More details about SQFT can be found in [this paper](https://aclanthology.org/2024.findings-emnlp.749.pdf). +Please follow the additional installation requirements [here](https://github.com/IntelLabs/Hardware-Aware-Automated-Machine-Learning/tree/main/SQFT/opea#-start-nls-microservice-with-python). +Use the following command to launch a finetuning job with the NLS algorithm: + +```bash +# create a fine-tuning job with NLS +# Max LoRA rank: 16 +# LoRA target modules -> Low-rank search space +# ["q_proj", "k_proj", "v_proj"] -> [16,12,8] +# ["up_proj"] -> [16,12,8] +# ["down_proj"] -> [16,12,8] +curl http://${your_ip}:8015/v1/fine_tuning/jobs \ + -X POST \ + -H "Content-Type: application/json" \ + -d '{ + "training_file": "alpaca_data.json", + "model": "meta-llama/Llama-2-7b-chat-hf", + "General": { + "lora_config": { + "r": 16, + "neural_lora_search": true, + "target_module_groups": [["q_proj", "k_proj", "v_proj"], ["up_proj"], ["down_proj"]], + "search_space": ["16,12,8", "16,12,8", "16,12,8"] + } + } + }' +``` + +Detailed explanations for the parameters can be found [here](https://github.com/IntelLabs/Hardware-Aware-Automated-Machine-Learning/tree/main/SQFT/opea#create-nls-fine-tuning-job). +Additional use-cases and benefits of SQFT are available [here](https://github.com/IntelLabs/Hardware-Aware-Automated-Machine-Learning/tree/main/SQFT/opea). +Instructions to extracting the desired sub-adapter and merging it with the base model can be found [here](https://github.com/IntelLabs/Hardware-Aware-Automated-Machine-Learning/tree/main/SQFT/opea#leverage-fine-tuned-super-adapter). + +#### 3.2.3 Reranking Model Training Use the following command to launch a finetuning job for reranking model finetuning, such as `BAAI/bge-reranker-large`: @@ -133,7 +168,7 @@ curl http://${your_ip}:8015/v1/fine_tuning/jobs \ }' ``` -#### 3.2.3 Embedding Model Training +#### 3.2.4 Embedding Model Training Use the following command to launch a finetuning job for embedding model finetuning, such as `BAAI/bge-base-en-v1.5`: @@ -173,7 +208,7 @@ curl http://${your_ip}:8015/v1/fine_tuning/jobs \ ``` -#### 3.2.4 LLM Pretraining +#### 3.2.5 LLM Pretraining Use the following command to launch a job for LLM pretraining, such as `meta-llama/Llama-2-7b-hf`: @@ -199,7 +234,7 @@ Below is an example for the format of the pretraining dataset: {"text": "A boy with a blue tank top sitting watching three dogs."} ``` -#### 3.2.5 Direct Preference Optimization (DPO) +#### 3.2.6 Direct Preference Optimization (DPO) Use the following command to launch a job for LLM Direct Preference Optimization, such as `meta-llama/Llama-2-7b-hf`: diff --git a/comps/finetuning/finetune_config.py b/comps/finetuning/finetune_config.py index 0b2faf53db..3f297c80f1 100644 --- a/comps/finetuning/finetune_config.py +++ b/comps/finetuning/finetune_config.py @@ -5,9 +5,9 @@ from typing import List, Optional, Union -from pydantic import BaseModel, Field, validator +from pydantic import BaseModel, Field, validator, root_validator -from comps.cores.proto.api_protocol import FineTuningJobsRequest +from comps.cores.proto.api_protocol import FineTuningJobIDRequest, FineTuningJobsRequest PRECISION_BF16 = "bf16" PRECISION_FP16 = "fp16" @@ -37,6 +37,22 @@ class LoraConfig(BaseModel): target_modules: Optional[List[str]] = None +class SQFTNLSConfig(LoraConfig): + neural_lora_search: bool = False + target_module_groups: Optional[List[List[str]]] = None + search_space: Optional[List[str]] = None + + @root_validator(pre=True) + def set_target_modules(cls, values): + target_module_groups = values.get('target_module_groups') + if target_module_groups is not None: + values['target_modules'] = [item for sublist in target_module_groups for item in sublist] + search_space = values.get('search_space') + if search_space is not None: + assert len(search_space) == len(target_module_groups) + return values + + class GeneralConfig(BaseModel): base_model: str = None tokenizer_name: Optional[str] = None @@ -47,7 +63,7 @@ class GeneralConfig(BaseModel): resume_from_checkpoint: Optional[str] = None save_strategy: str = "no" config: LoadConfig = LoadConfig() - lora_config: Optional[LoraConfig] = LoraConfig() + lora_config: Optional[Union[LoraConfig, SQFTNLSConfig]] = LoraConfig() enable_gradient_checkpointing: bool = False task: str = "instruction_tuning" @@ -200,3 +216,10 @@ class FineTuningParams(FineTuningJobsRequest): General: GeneralConfig = GeneralConfig() Dataset: DatasetConfig = DatasetConfig() Training: TrainingConfig = TrainingConfig() + +class ExtractSubAdapterParams(FineTuningJobIDRequest): + adapter_version: str = "heuristic" + custom_config: Optional[List[int]] = None + +class MergeAdapterParams(FineTuningJobIDRequest): + adapter_version: Optional[str] = None diff --git a/comps/finetuning/finetuning_service.py b/comps/finetuning/finetuning_service.py index 64097c720c..4a925ff837 100644 --- a/comps/finetuning/finetuning_service.py +++ b/comps/finetuning/finetuning_service.py @@ -4,12 +4,14 @@ from comps import opea_microservices, register_microservice from comps.cores.proto.api_protocol import FineTuningJobIDRequest, UploadFileRequest -from comps.finetuning.finetune_config import FineTuningParams +from comps.finetuning.finetune_config import FineTuningParams, ExtractSubAdapterParams, MergeAdapterParams from comps.finetuning.handlers import ( handle_cancel_finetuning_job, handle_create_finetuning_jobs, + handle_extract_sub_adapter, handle_list_finetuning_checkpoints, handle_list_finetuning_jobs, + handle_merge_adapter, handle_retrieve_finetuning_job, handle_upload_training_files, upload_file, @@ -20,7 +22,6 @@ def create_finetuning_jobs(request: FineTuningParams, background_tasks: BackgroundTasks): return handle_create_finetuning_jobs(request, background_tasks) - @register_microservice( name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs", host="0.0.0.0", port=8015, methods=["GET"] ) @@ -62,6 +63,14 @@ def list_checkpoints(request: FineTuningJobIDRequest): checkpoints = handle_list_finetuning_checkpoints(request) return checkpoints +@register_microservice(name="opea_service@finetuning", endpoint="/v1/finetune/extract_sub_adapter", host="0.0.0.0", port=8015) +def extract_sub_adapter(request: ExtractSubAdapterParams): + return handle_extract_sub_adapter(request) + +@register_microservice(name="opea_service@finetuning", endpoint="/v1/finetune/merge_adapter", host="0.0.0.0", port=8015) +def merge_adapter(request: MergeAdapterParams): + return handle_merge_adapter(request) + if __name__ == "__main__": opea_microservices["opea_service@finetuning"].start() diff --git a/comps/finetuning/handlers.py b/comps/finetuning/handlers.py index a47b9f980a..dde6424420 100644 --- a/comps/finetuning/handlers.py +++ b/comps/finetuning/handlers.py @@ -11,7 +11,7 @@ from typing import Dict from fastapi import BackgroundTasks, File, Form, HTTPException, UploadFile -from pydantic_yaml import to_yaml_file +from pydantic_yaml import parse_yaml_file_as, to_yaml_file from ray.job_submission import JobSubmissionClient from comps import CustomLogger @@ -23,7 +23,12 @@ FineTuningJobList, UploadFileRequest, ) -from comps.finetuning.finetune_config import FinetuneConfig, FineTuningParams +from comps.finetuning.finetune_config import ( + ExtractSubAdapterParams, + FinetuneConfig, + FineTuningParams, + MergeAdapterParams, +) logger = CustomLogger("finetuning_handlers") @@ -134,6 +139,97 @@ def handle_create_finetuning_jobs(request: FineTuningParams, background_tasks: B return job +def handle_extract_sub_adapter(request: ExtractSubAdapterParams): + fine_tuning_job_id = request.fine_tuning_job_id + finetune_config_file = f"{JOBS_PATH}/{fine_tuning_job_id}.yaml" + finetune_config = parse_yaml_file_as(FinetuneConfig, finetune_config_file) + + job = running_finetuning_jobs.get(fine_tuning_job_id) + if job is None: + raise HTTPException(status_code=404, detail=f"Fine-tuning job '{fine_tuning_job_id}' not found!") + finetuned_model_path = os.path.join(OUTPUT_DIR, fine_tuning_job_id) + assert finetuned_model_path == finetune_config.General.output_dir + if not os.path.exists(finetuned_model_path): + raise HTTPException( + status_code=404, + detail=f"The fine-tuned model saved by the fine-tuning job '{fine_tuning_job_id}' was not found!" + ) + if job.status != "succeeded": + raise HTTPException(status_code=404, detail=f"Fine-tuning job '{fine_tuning_job_id}' has not completed!") + + if finetune_config.General.lora_config is None: + raise HTTPException( + status_code=404, + detail=f"The fine-tuning job '{fine_tuning_job_id}' does not enable LoRA adapter fine-tuning!" + ) + if not finetune_config.General.lora_config.neural_lora_search: + raise HTTPException( + status_code=404, + detail=f"The fine-tuning job '{fine_tuning_job_id}' did not enable NLS algorithm, " + f"there is no need to extract sub-adapters!" + ) + nncf_config_path = os.path.join(finetune_config.General.output_dir, "nncf_config.json") + if not os.path.exists(nncf_config_path): + raise HTTPException( + status_code=404, + detail=f"The NNCF config file does not exist in the fine-tuning job '{fine_tuning_job_id}!" + ) + + from comps.finetuning.utils.extract_sub_adapter import main as extract_sub_adapter_main + extract_sub_adapter_main( + adapter_model_path=finetuned_model_path, + nncf_config=nncf_config_path, + adapter_version=request.adapter_version, + custom_config=request.custom_config + ) + + return fine_tuning_job_id + + +def handle_merge_adapter(request: MergeAdapterParams): + fine_tuning_job_id = request.fine_tuning_job_id + finetune_config_file = f"{JOBS_PATH}/{fine_tuning_job_id}.yaml" + finetune_config = parse_yaml_file_as(FinetuneConfig, finetune_config_file) + + job = running_finetuning_jobs.get(fine_tuning_job_id) + if job is None: + raise HTTPException(status_code=404, detail=f"Fine-tuning job '{fine_tuning_job_id}' not found!") + finetuned_model_path = os.path.join(OUTPUT_DIR, fine_tuning_job_id) + assert finetuned_model_path == finetune_config.General.output_dir + if not os.path.exists(finetuned_model_path): + raise HTTPException( + status_code=404, + detail=f"The fine-tuned model saved by the fine-tuning job '{fine_tuning_job_id}' was not found!" + ) + if job.status != "succeeded": + raise HTTPException(status_code=404, detail=f"Fine-tuning job '{fine_tuning_job_id}' has not completed!") + + if finetune_config.General.lora_config is None: + raise HTTPException( + status_code=404, + detail=f"The fine-tuning job '{fine_tuning_job_id}' does not enable LoRA adapter fine-tuning!" + ) + + adapter_path = finetuned_model_path + adapter_version = request.adapter_version + if adapter_version is not None: + adapter_path = os.path.join(adapter_path, adapter_version) + if not os.path.exists(adapter_path): + raise HTTPException( + status_code=404, + detail=f"The fine-tuning job '{fine_tuning_job_id}' does not have a '{adapter_version}' adapter!" + ) + + from comps.finetuning.utils.merge_adapter import main as merge_adapter_main + merge_adapter_main( + base_model_path=finetune_config.General.base_model, + adapter_model_path=adapter_path, + output_path=os.path.join(adapter_path, "merged_model") + ) + + return fine_tuning_job_id + + def handle_list_finetuning_jobs(): finetuning_jobs_list = FineTuningJobList(data=list(running_finetuning_jobs.values()), has_more=False) diff --git a/comps/finetuning/llm_on_ray/finetune/finetune.py b/comps/finetuning/llm_on_ray/finetune/finetune.py index d105269a40..97a6257b33 100644 --- a/comps/finetuning/llm_on_ray/finetune/finetune.py +++ b/comps/finetuning/llm_on_ray/finetune/finetune.py @@ -39,6 +39,16 @@ logger = CustomLogger("llm_on_ray/finetune") +try: + from comps.finetuning.utils.create_sqft_nncf_config import create_sqft_nncf_config + from nncf.experimental.torch.nas.bootstrapNAS.training.model_creator_helpers import ( + create_compressed_model_from_algo_names, + ) + from nncf.torch.model_creation import create_nncf_network + is_nncf_available = True +except ImportError: + is_nncf_available = False + def adapt_transformers_to_device(config: Dict): device = config["Training"]["device"] @@ -338,6 +348,7 @@ def load_model(config: Dict): model_config = config["General"].get("config", {}) task = config["General"].get("task", "instruction_tuning") ref_model = None + nls_controller = None if task in ["instruction_tuning", "pretraining", "dpo"]: model = transformers.AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=model_dtype, **model_config) if task == "dpo": @@ -346,8 +357,26 @@ def load_model(config: Dict): ) lora_config = config["General"].get("lora_config", None) if lora_config and task == "instruction_tuning": + neural_lora_search = lora_config.pop("neural_lora_search", False) + target_module_groups = lora_config.pop("target_module_groups", None) + search_space = lora_config.pop("search_space", None) peft_config = LoraConfig(**lora_config) model = get_peft_model(model, peft_config) + + # Neural LoRA Search (NLS) + if neural_lora_search: + if not is_nncf_available: + raise NotImplementedError("NNCF is not installed. Please install it for enabling NLS algorithm.") + nncf_config = create_sqft_nncf_config( + config=config, + model=model, + target_module_groups=target_module_groups, + search_space=search_space + ) + model = create_nncf_network(model, nncf_config) + nls_controller, model = create_compressed_model_from_algo_names( + model, nncf_config, algo_names=["nls"] + ) elif task == "rerank": model = CrossEncoder.from_pretrained( config["Dataset"].get("train_group_size", 8), @@ -383,10 +412,9 @@ def load_model(config: Dict): model.to(dtype=model_dtype, device=torch.device(config["Training"]["device"])) - return model, ref_model - + return model, ref_model, nls_controller -def get_trainer(config: Dict, model, ref_model, tokenizer, tokenized_dataset, data_collator): +def get_trainer(config: Dict, model, ref_model, tokenizer, tokenized_dataset, data_collator, nls_controller=None): device = config["Training"]["device"] task = config["General"].get("task", "instruction_tuning") if device in ["cpu", "gpu", "cuda"]: @@ -411,18 +439,21 @@ def get_trainer(config: Dict, model, ref_model, tokenizer, tokenized_dataset, da max_length=config["Dataset"].get("max_length", 1024), ) else: - trainer = Trainer( - model=model, - args=training_args, - train_dataset=tokenized_dataset["train"], - eval_dataset=( - tokenized_dataset["validation"] if tokenized_dataset.get("validation") is not None else None - ), - tokenizer=tokenizer, - data_collator=data_collator, - ) + trainer_args = { + "model": model, + "args": training_args, + "train_dataset": tokenized_dataset["train"], + "eval_dataset": tokenized_dataset["validation"] if tokenized_dataset.get("validation") is not None else None, + "tokenizer": tokenizer, + "data_collator": data_collator, + } + if nls_controller is not None: + trainer_args["compression_ctrl"] = nls_controller + trainer = Trainer(**trainer_args) return training_args, trainer elif device in ["hpu"]: + if nls_controller is not None: + raise NotImplementedError(f"NLS algorithm is not supported on HPU now.") from optimum.habana import GaudiConfig from optimum.habana.transformers import GaudiTrainer, GaudiTrainingArguments @@ -495,9 +526,11 @@ def train_func(config: Dict[str, Any]): data_collator = prepare_data_collator(config, tokenizer) - model, ref_model = load_model(config) + model, ref_model, nls_controller = load_model(config) - training_args, trainer = get_trainer(config, model, ref_model, tokenizer, tokenized_dataset, data_collator) + training_args, trainer = get_trainer( + config, model, ref_model, tokenizer, tokenized_dataset, data_collator, nls_controller=nls_controller + ) logger.info("train start") trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint) diff --git a/comps/finetuning/utils/create_sqft_nncf_config.py b/comps/finetuning/utils/create_sqft_nncf_config.py new file mode 100644 index 0000000000..eb76fcc310 --- /dev/null +++ b/comps/finetuning/utils/create_sqft_nncf_config.py @@ -0,0 +1,142 @@ +import os +import json + +try: + from nncf import NNCFConfig + from nncf.experimental.torch import sqft + is_nncf_available = True +except ImportError: + is_nncf_available = False + + +NNCF_CONFIG_TEMPLATE = { + "input_info": [ + { + "sample_size": [1, 256], + "type": "long", + "keyword": "input_ids" + }, + { + "sample_size": [1, 256], + "type": "long", + "keyword": "attention_mask" + } + ], + "SQFT": { + "training": { + "algorithm": "nls", + "elasticity": { + "available_elasticity_dims": ["width"], + "width": { + "overwrite_groups": [], + "overwrite_groups_widths": [] + } + } + } + } +} + + +def add_lr_epochs(nncf_config, learning_rate=3e-4, num_train_epochs=3): + """Add learning rate and epochs to the NNCF configuration. + + Args: + nncf_config (dict): The NNCF configuration dictionary. + learning_rate (float): The initial learning rate to set. + num_epochs (int): The number of epochs to set. + + Returns: + dict: The updated NNCF configuration. + """ + overwrite_groups_widths = nncf_config["SQFT"]["training"]["elasticity"]["width"]["overwrite_groups_widths"] + # Add learning rate and epochs to the configuration + nncf_config["SQFT"]["training"]["schedule"] = { + "list_stage_descriptions": [ + { + "train_dims": ["width"], + "width_indicator": max([len(widths) for widths in overwrite_groups_widths]), + "init_lr": learning_rate, + "epochs": num_train_epochs, + "epochs_lr": num_train_epochs, + } + ] + } + return nncf_config + + +def get_model_paths(model, target_module_name): + """ + Find all paths to the target layer in the model. + + Args: + model (torch.nn.Module): The model to search. + target_module_name (str): The name of the target layer. + + Returns: + list: A list of paths to the target layer. + """ + def find_layers(module, target_module_name, path, paths): + for name, sub_module in module.named_children(): + new_path = f"{path}/{sub_module.__class__.__name__}[{name}]" + if target_module_name in name: + # Check if 'lora_A' is in the sub_module's children + for sub_name, _ in sub_module.named_children(): + if "lora_A" in sub_name: + paths.append(f"{new_path}/ModuleDict[lora_A]/NNCFLinear[default]/linear_0") + find_layers(sub_module, target_module_name, new_path, paths) + + base_path = model.__class__.__name__ + paths = [] + find_layers(model, target_module_name, base_path, paths) + return paths + + +def create_sqft_nncf_config( + config, + model, + target_module_groups=None, + search_space=None +): + """Load and preprocess the NNCF configuration file. + + Returns: + NNCFConfig: The preprocessed NNCF configuration object. + """ + if not is_nncf_available: + raise NotImplementedError("NNCF is not installed. Please install it for enabling NLS algorithm.") + if search_space is None and target_module_groups: + raise ValueError("Neural LoRA search is enabled, `search_space` and `target_module_groups` must be provided.") + # The NNCF Config will be automatically generated based on `target_module_groups` and `search_space`. + num_hidden_layers = model.config.num_hidden_layers + nncf_config_dict = NNCF_CONFIG_TEMPLATE + overwrite_groups = [] + for group in target_module_groups: + group_paths = [] + for module in group: + target_layer_name = module + paths = get_model_paths(model, target_layer_name) + assert paths, f"No paths found for module {module}" + group_paths.append(paths) + # Transpose the list of lists to combine paths by their positions + transposed_paths = list(zip(*group_paths)) + overwrite_groups.extend([list(path_group) for path_group in transposed_paths]) + nncf_config_dict["SQFT"]["training"]["elasticity"]["width"]["overwrite_groups"] = overwrite_groups + + overwrite_groups_widths = [] + for space in search_space: + space = [int(width) for width in space.split(",")] + overwrite_groups_widths.extend([space] * num_hidden_layers) + nncf_config_dict["SQFT"]["training"]["elasticity"]["width"]["overwrite_groups_widths"] = overwrite_groups_widths + assert len(overwrite_groups) == len(overwrite_groups_widths) + nncf_config_dict = add_lr_epochs( + nncf_config_dict, + learning_rate=config["Training"]["learning_rate"], + num_train_epochs=config["Training"]["epochs"] + ) + nncf_config = NNCFConfig.from_dict(nncf_config_dict) + + nncf_config["log_dir"] = config["General"]["output_dir"] + os.makedirs(nncf_config["log_dir"], exist_ok=True) + with open(os.path.join(nncf_config["log_dir"], "nncf_config.json"), "w") as f: + json.dump(nncf_config, f, indent=4) + return nncf_config diff --git a/comps/finetuning/utils/extract_sub_adapter.py b/comps/finetuning/utils/extract_sub_adapter.py new file mode 100644 index 0000000000..f7b0bf6ff1 --- /dev/null +++ b/comps/finetuning/utils/extract_sub_adapter.py @@ -0,0 +1,97 @@ +import os +import re + +import torch + +from peft.utils import CONFIG_NAME, WEIGHTS_NAME, SAFETENSORS_WEIGHTS_NAME + +try: + from nncf import NNCFConfig + is_nncf_available = True +except ImportError: + is_nncf_available = False + + +PATTERN = re.compile(r"[[](.*?)[]]", re.S) + + +def get_width_for_query_prefix(torch_module_to_width, query_module, length=5): + """ + Get the width for a given query module prefix. + + Args: + torch_module_to_width (dict): Mapping from torch module to width. + query_module (str): The query module name. + length (int, optional): The length of the prefix to match. Default is 5. + + Returns: + int: The width for the query module prefix. + """ + query_module_list = query_module.split(".") + width = next( + ( + value + for torch_module, value in torch_module_to_width.items() + if torch_module.split(".")[:length] == query_module_list[:length] + ), + None, + ) + return width + + +def main(adapter_model_path, nncf_config, adapter_version, custom_config=None): + if not is_nncf_available: + raise NotImplementedError("NNCF is not installed. Please install it.") + output_dir = os.path.join(adapter_model_path, adapter_version) + os.makedirs(output_dir, exist_ok=True) + nncf_config = NNCFConfig.from_json(nncf_config) + try: + groups = nncf_config["SQFT"]["training"]["elasticity"]["width"]["overwrite_groups"] + groups_widths = nncf_config["SQFT"]["training"]["elasticity"]["width"]["overwrite_groups_widths"] + assert len(groups) == len(groups_widths) + except Exception: + raise ValueError("Cannot get the search space in NNCF config.") + + if adapter_version == "maximal": + subnetwork_config = {idx: space[0] for idx, space in enumerate(groups_widths)} + elif adapter_version == "heuristic": + subnetwork_config = {idx: space[(len(space) - 1) // 2] for idx, space in enumerate(groups_widths)} + elif adapter_version == "minimal": + subnetwork_config = {idx: space[-1] for idx, space in enumerate(groups_widths)} + else: + assert custom_config is not None, "Missing custom subnetwork config." + assert isinstance(custom_config, list), "Custom config must be a list." + subnetwork_config = {i: value for i, value in enumerate(custom_config)} + + # Mapping: nncf node -> width + nncf_node_to_width = {} + for idx, value in subnetwork_config.items(): + space = groups_widths[idx] + assert min(space) <= value <= max(space) + cur_dict = {node: value for node in groups[idx]} + nncf_node_to_width.update(cur_dict) + + # Prune adapter model (LoRA low-rank) + lora_torch_module_to_width = { + ".".join(re.findall(PATTERN, k)): v for k, v in nncf_node_to_width.items() if "lora_A" in k + } + num_module_name_item = list(lora_torch_module_to_width.keys())[0].split(".").index("lora_A") + # Load adapter weights + try: + super_adapter_weights = torch.load(os.path.join(adapter_model_path, WEIGHTS_NAME)) + except: + from safetensors.torch import load_file + super_adapter_weights = load_file(os.path.join(adapter_model_path, SAFETENSORS_WEIGHTS_NAME)) + sub_adapter_weights = {} + for weight_key, weight_tensor in super_adapter_weights.items(): + width = get_width_for_query_prefix(lora_torch_module_to_width, weight_key, length=num_module_name_item) + if width is not None: + is_loraA = "lora_A" in weight_key + new_weight_tensor = weight_tensor[:width].clone() if is_loraA else weight_tensor[:, :width].clone() + else: + new_weight_tensor = weight_tensor.clone() + sub_adapter_weights[weight_key] = new_weight_tensor + os.makedirs(output_dir, exist_ok=True) + torch.save(sub_adapter_weights, os.path.join(output_dir, WEIGHTS_NAME)) + config_path = os.path.join(adapter_model_path, CONFIG_NAME) + os.system(f"cp {config_path} {output_dir}") diff --git a/comps/finetuning/utils/merge_adapter.py b/comps/finetuning/utils/merge_adapter.py new file mode 100644 index 0000000000..a127061ef6 --- /dev/null +++ b/comps/finetuning/utils/merge_adapter.py @@ -0,0 +1,16 @@ +from peft import PeftModel +from transformers import AutoModelForCausalLM, AutoTokenizer + + +def main(base_model_path, adapter_model_path, output_path): + base_model = AutoModelForCausalLM.from_pretrained(base_model_path, trust_remote_code=True) + model = PeftModel.from_pretrained(base_model, adapter_model_path) + model.eval() + for name, param in model.named_parameters(): + param.requires_grad = False + merged_model = model.merge_and_unload() + merged_model.train(False) + base_model.save_pretrained(output_path, state_dict=merged_model.state_dict()) + + tokenizer = AutoTokenizer.from_pretrained(base_model_path, trust_remote_code=True) + tokenizer.save_pretrained(output_path) diff --git a/comps/finetuning_sqft/Dockerfile b/comps/finetuning_sqft/Dockerfile new file mode 100644 index 0000000000..4715470aec --- /dev/null +++ b/comps/finetuning_sqft/Dockerfile @@ -0,0 +1,50 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Use the same python version with ray +FROM python:3.10.14 + +ARG HF_TOKEN + +ENV HF_TOKEN=$HF_TOKEN + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +COPY comps /home/user/comps + +RUN chown -R user /home/user/comps/finetuning_sqft + +USER user + +ENV PATH=$PATH:/home/user/.local/bin + +RUN python -m pip install --no-cache-dir --upgrade pip && \ + python -m pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu && \ + python -m pip install --no-cache-dir intel-extension-for-pytorch && \ + python -m pip install --no-cache-dir oneccl_bind_pt --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ && \ + python -m pip install --no-cache-dir -r /home/user/comps/finetuning/requirements.txt + +# Set up third-party dependencies (SQFT) +ENV PATH_TO_FINETUNE=/home/user/comps/finetuning_sqft +RUN mkdir -p $PATH_TO_FINETUNE/third_party && cd $PATH_TO_FINETUNE/third_party && \ + git clone https://github.com/huggingface/peft.git && \ + cd peft && git checkout v0.10.0 && git apply --ignore-space-change --ignore-whitespace ${PATH_TO_FINETUNE}/patches/peft-v0.10.0.patch && pip install -e . && cd .. && \ + git clone https://github.com/huggingface/transformers.git && \ + cd transformers && git checkout v4.44.2 && git apply --ignore-space-change --ignore-whitespace ${PATH_TO_FINETUNE}/patches/transformers-v4.44.2.patch && pip install -e . && cd .. && \ + git clone https://github.com/openvinotoolkit/nncf.git && \ + cd nncf && git checkout v2.12.0 && git apply --ignore-space-change --ignore-whitespace ${PATH_TO_FINETUNE}/patches/nncf-v2.12.0.patch && pip install -e . && cd .. + +ENV PYTHONPATH=$PYTHONPATH:/home/user + +WORKDIR /home/user/comps/finetuning_sqft + +RUN echo PKGPATH=$(python3 -c "import pkg_resources; print(pkg_resources.get_distribution('oneccl-bind-pt').location)") >> run.sh && \ + echo 'export LD_LIBRARY_PATH=$PKGPATH/oneccl_bindings_for_pytorch/opt/mpi/lib/:$LD_LIBRARY_PATH' >> run.sh && \ + echo 'source $PKGPATH/oneccl_bindings_for_pytorch/env/setvars.sh' >> run.sh && \ + echo ray start --head --dashboard-host=0.0.0.0 >> run.sh && \ + echo export RAY_ADDRESS=http://localhost:8265 >> run.sh && \ + echo python finetuning_sqft_service_service.py >> run.sh + +CMD bash run.sh diff --git a/comps/finetuning_sqft/README.md b/comps/finetuning_sqft/README.md new file mode 100644 index 0000000000..a5748caf76 --- /dev/null +++ b/comps/finetuning_sqft/README.md @@ -0,0 +1,240 @@ +# SQFT Fine-tuning Microservice + +Fine-tuning microservice with SQFT involves adapting a model to a specific task or dataset to improve its performance on that task, we currently support instruction tuning for LLMs. + +## 🚀1. Start Microservice with Python (Option 1) + +### 1.1 Install Requirements + +```bash +python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu +python -m pip install intel-extension-for-pytorch +python -m pip install oneccl_bind_pt --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ +pip install -r requirements.txt +``` +To enable elastic adapter fine-tuning (Neural Low-Rank Adapter Search) or SparsePEFT from [SQFT](https://arxiv.org/abs/2410.03750), please perform this additional installation: + +```bash +PATH_TO_FINETUNE=$PWD +mkdir third_party && cd third_party + +# transformers (for Neural Lora Search) +git clone https://github.com/huggingface/transformers.git +cd transformers && git checkout v4.44.2 && git apply --ignore-space-change --ignore-whitespace ${PATH_TO_FINETUNE}/patches/transformers-v4.44.2.patch && pip install -e . && cd .. + +# peft (for Neural Low-Rank Adapter Search and SparsePEFT) +git clone https://github.com/huggingface/peft.git +cd peft && git checkout v0.10.0 && git apply --ignore-space-change --ignore-whitespace ${PATH_TO_FINETUNE}/patches/peft-v0.10.0.patch && pip install -e . && cd .. + +# nncf (for Neural Lora Search) +git clone https://github.com/openvinotoolkit/nncf.git +cd nncf && git checkout v2.12.0 && git apply --ignore-space-change --ignore-whitespace ${PATH_TO_FINETUNE}/patches/nncf-v2.12.0.patch && pip install -e . && cd .. +``` + +### 1.2 Start Fine-tuning Service with Python Script + +#### 1.2.1 Start Ray Cluster + +OneCCL and Intel MPI libraries should be dynamically linked in every node before Ray starts: + +```bash +source $(python -c "import oneccl_bindings_for_pytorch as torch_ccl; print(torch_ccl.cwd)")/env/setvars.sh +``` + +Start Ray locally using the following command. + +```bash +ray start --head +``` + +For a multi-node cluster, start additional Ray worker nodes with below command. + +```bash +ray start --address='${head_node_ip}:6379' +``` + +#### 1.2.2 Start Finetuning Service + +```bash +export HF_TOKEN= +export PYTHONPATH= +python finetuning_sqft_service.py +``` + +## 🚀2. Start Microservice with Docker (Option 2) + +### 2.1 Setup on CPU + +#### 2.1.1 Build Docker Image + +Build docker image with below command: + +```bash +export HF_TOKEN=${your_huggingface_token} +cd ../../ +docker build -t opea/finetuning:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg HF_TOKEN=$HF_TOKEN -f comps/finetuning_sqft/Dockerfile . +``` + +#### 2.1.2 Run Docker with CLI + +Start docker container with below command: + +```bash +docker run -d --name="finetuning-server" -p 8015:8015 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/finetuning:latest +``` + +## 🚀3. Consume Fine-tuning Service + +### 3.1 Upload a training file + +Download a training file, such as `alpaca_data.json` for instruction tuning and upload it to the server with below command, this file can be downloaded in [here](https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json): + +```bash +# upload a training file +curl http://${your_ip}:8015/v1/files -X POST -H "Content-Type: multipart/form-data" -F "file=@./alpaca_data.json" -F purpose="fine-tune" +``` + + +### 3.2 Create fine-tuning job + +#### 3.2.1 Instruction Tuning + +After a training file like `alpaca_data.json` is uploaded, use the following command to launch a fine-tuning job using `meta-llama/Llama-2-7b-chat-hf` as base model: + +```bash +# create a finetuning job +curl http://${your_ip}:8015/v1/fine_tuning/jobs \ + -X POST \ + -H "Content-Type: application/json" \ + -d '{ + "training_file": "alpaca_data.json", + "model": "meta-llama/Llama-2-7b-chat-hf" + }' + +# create a finetuning job (with SparsePEFT) +curl http://${your_ip}:8015/v1/fine_tuning/jobs \ + -X POST \ + -H "Content-Type: application/json" \ + -d '{ + "training_file": "alpaca_data.json", + "model": , + "General": { + "lora_config": { + "sparse_adapter": true + } + } + }' + +# create a fine-tuning job (with Neural Low-rank adapter Search) +# Max LoRA rank: 16 +# LoRA target modules -> Low-rank search space +# ["q_proj", "k_proj", "v_proj"] -> [16,12,8] +# ["up_proj"] -> [16,12,8] +# ["down_proj"] -> [16,12,8] +curl http://${your_ip}:8015/v1/fine_tuning/jobs \ + -X POST \ + -H "Content-Type: application/json" \ + -d '{ + "training_file": "alpaca_data.json", + "model": "meta-llama/Llama-2-7b-chat-hf", + "General": { + "lora_config": { + "r": 16, + "neural_lora_search": true, + "target_module_groups": [["q_proj", "k_proj", "v_proj"], ["up_proj"], ["down_proj"]], + "search_space": ["16,12,8", "16,12,8", "16,12,8"] + } + } + }' +``` + +Below are some explanations for the parameters related to the Neural Low-rank adapter Search algorithm: + +- `target_module_groups` specifies the target module groups, which means that the adapters within the same group will share the same activated low-rank value. +- `search_space` specifies the search space for each target module (adapter) group. +Here, it is `["16,12,8", "16,12,8", "16,12,8"]`, meaning that the search space for each group is [16, 12, 8]. + +Note that the number of groups should be equal to the number of search spaces (one-to-one correspondence). +Feel free to try your favorite group design and search spaces. + +### 3.3 Manage fine-tuning job + +Below commands show how to list fine-tuning jobs, retrieve a fine-tuning job, cancel a fine-tuning job and list checkpoints of a fine-tuning job. + +```bash +# list fine-tuning jobs +curl http://${your_ip}:8015/v1/fine_tuning/jobs -X GET + +# retrieve one fine-tuning job +curl http://localhost:8015/v1/fine_tuning/jobs/retrieve -X POST -H "Content-Type: application/json" -d '{"fine_tuning_job_id": ${fine_tuning_job_id}}' + +# cancel one fine-tuning job +curl http://localhost:8015/v1/fine_tuning/jobs/cancel -X POST -H "Content-Type: application/json" -d '{"fine_tuning_job_id": ${fine_tuning_job_id}}' + +# list checkpoints of a fine-tuning job +curl http://${your_ip}:8015/v1/finetune/list_checkpoints -X POST -H "Content-Type: application/json" -d '{"fine_tuning_job_id": ${fine_tuning_job_id}}' +``` + +### 3.4 Leverage fine-tuned model + +#### 3.4.1 Extract the sub-adapter + +After completing the super-adapter fine-tuning (the checkpoints of the fine-tuning job), +the following command demonstrates how to extract the heuristic sub-adapter. +Additionally, more powerful sub-adapters can be obtained through other advanced search algorithms. + +```bash +curl http://${your_ip}:8015/v1/finetune/extract_adapter \ + -X POST \ + -H "Content-Type: application/json" \ + -d '{ + "fine_tuning_job_id": ${fine_tuning_job_id}, + "sub_adapter_version": "heuristic" + }' +``` + +`sub_adapter_version` can be heuristic, minimal, or a custom name. +When `sub_adapter_version` is set to a custom name, we need to provide a specific configuration in `custom_config`. +The extracted adapter will be saved in ` / `. + +
+An example of a custom configuration + +```bash +curl http://${your_ip}:8015/v1/finetune/extract_adapter \ + -X POST \ + -H "Content-Type: application/json" \ + -d '{ + "fine_tuning_job_id": ${fine_tuning_job_id}, + "sub_adapter_version": "custom", + "custom_config": [8, 8, 16, 8, 8, 12, 8, 12, 12, 12, 8, 16, 12, 16, 16, 12, 12, 8, 8, 16, 8, 8, 12, 8, 16, 12, 8, 16, 8, 16, 12, 8, 8, 16, 16, 16, 16, 16, 8, 12, 12, 16, 12, 16, 12, 16, 16, 12, 8, 12, 12, 8, 8, 12, 8, 12, 12, 8, 16, 8, 8, 8, 8, 12, 16, 16], + }' +``` + +In the fine-tuning job with Neural Low-rank adapter Search algorithm, the `nncf_config.json` file (which includes the elastic adapter information) will be saved in the output directory. +The `custom_config` must correspond with the `overwrite_groups` (adapter modules) or `overwrite_groups_widths` +(search space for the rank of adapter modules) in `nncf_config.json`. +The above command corresponds to the example in [example_nncf_config/nncf_config.json](./example_nncf_config/nncf_config.json), +and it will save the sub-adapter to ` / custom`. + +
+ +#### 3.4.2 Merge + +The following command demonstrates how to merge the sub-adapter to the base pretrained model: + +```bash +curl http://${your_ip}:8015/v1/ffinetune/merge_adapter \ + -X POST \ + -H "Content-Type: application/json" \ + -d '{ + "fine_tuning_job_id": ${fine_tuning_job_id}, + "sub_adapter_version": "heuristic" + }' +``` + +The merged model will be saved in ` / / merged_model`. + +## 🚀4. Descriptions for Finetuning parameters + +We utilize [OpenAI finetuning parameters](https://platform.openai.com/docs/api-reference/fine-tuning) and extend it with more customizable parameters, see the definitions at [finetune_sqft_config](./finetune_sqft_config.py). diff --git a/comps/finetuning_sqft/example_nncf_config/nncf_config.json b/comps/finetuning_sqft/example_nncf_config/nncf_config.json new file mode 100644 index 0000000000..ead7ffe4c6 --- /dev/null +++ b/comps/finetuning_sqft/example_nncf_config/nncf_config.json @@ -0,0 +1,630 @@ +{ + "input_info": [ + { + "sample_size": [ + 1, + 256 + ], + "type": "long", + "keyword": "input_ids" + }, + { + "sample_size": [ + 1, + 256 + ], + "type": "long", + "keyword": "attention_mask" + } + ], + "bootstrapNAS": { + "training": { + "algorithm": "progressive_shrinking", + "frozen_layers_allowed": true, + "progressivity_of_elasticity": [ + "width" + ], + "batchnorm_adaptation": { + "num_bn_adaptation_samples": 0 + }, + "schedule": { + "list_stage_descriptions": [ + { + "train_dims": [ + "width" + ], + "epochs": 3, + "depth_indicator": 1, + "width_indicator": 8, + "init_lr": 0.0003, + "epochs_lr": 3, + "sample_rate": 1 + } + ] + }, + "elasticity": { + "available_elasticity_dims": [ + "width" + ], + "width": { + "overwrite_groups": [ + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[2]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[2]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[2]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[3]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[3]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[3]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[4]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[4]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[4]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[5]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[5]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[5]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[6]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[6]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[6]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[7]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[7]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[7]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[8]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[8]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[8]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[9]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[9]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[9]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[10]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[10]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[10]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[11]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[11]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[11]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[12]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[12]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[12]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[13]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[13]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[13]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[14]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[14]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[14]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[15]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[15]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[15]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[16]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[16]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[16]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[17]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[17]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[17]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[18]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[18]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[18]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[19]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[19]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[19]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[20]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[20]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[20]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[21]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[21]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[21]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[2]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[3]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[4]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[5]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[6]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[7]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[8]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[9]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[10]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[11]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[12]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[13]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[14]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[15]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[16]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[17]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[18]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[19]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[20]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[21]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[2]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[3]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[4]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[5]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[6]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[7]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[8]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[9]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[10]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[11]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[12]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[13]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[14]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[15]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[16]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[17]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[18]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[19]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[20]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[21]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ] + ], + "overwrite_groups_widths": [ + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ], + [ + 16, + 12, + 8 + ] + ] + } + } + } + } +} \ No newline at end of file diff --git a/comps/finetuning_sqft/finetune_runner.py b/comps/finetuning_sqft/finetune_runner.py new file mode 100644 index 0000000000..45cad43d56 --- /dev/null +++ b/comps/finetuning_sqft/finetune_runner.py @@ -0,0 +1,38 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse + +from pydantic_yaml import parse_yaml_raw_as +from transformers import TrainerCallback, TrainerControl, TrainerState, TrainingArguments + +from comps.finetuning_sqft.finetune_sqft_config import FinetuneConfig + + +class FineTuneCallback(TrainerCallback): + def __init__(self) -> None: + super().__init__() + + def on_log(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs): + print("FineTuneCallback:", args, state) + + +def main(): + parser = argparse.ArgumentParser(description="Runner for llm_on_ray-finetune") + parser.add_argument("--config_file", type=str, required=True, default=None) + args = parser.parse_args() + model_config_file = args.config_file + + with open(model_config_file) as f: + finetune_config = parse_yaml_raw_as(FinetuneConfig, f).model_dump() + + callback = FineTuneCallback() + finetune_config["Training"]["callbacks"] = [callback] + + from comps.finetuning_sqft.llm_on_ray.finetune.finetune import main as llm_on_ray_finetune_main + + llm_on_ray_finetune_main(finetune_config) + + +if __name__ == "__main__": + main() diff --git a/comps/finetuning_sqft/finetune_sqft_config.py b/comps/finetuning_sqft/finetune_sqft_config.py new file mode 100644 index 0000000000..a34a9e7c3b --- /dev/null +++ b/comps/finetuning_sqft/finetune_sqft_config.py @@ -0,0 +1,215 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright 2023 The LLM-on-Ray Authors. + +from typing import List, Optional, Union + +from pydantic import BaseModel, Field, validator, root_validator + +from comps.cores.proto.api_protocol import FineTuningJobIDRequest, FineTuningJobsRequest + +PRECISION_BF16 = "bf16" +PRECISION_FP16 = "fp16" +PRECISION_NO = "no" + +DEVICE_CPU = "cpu" +DEVICE_HPU = "hpu" +DEVICE_GPU = "gpu" +DEVICE_CUDA = "cuda" + +ACCELERATE_STRATEGY_DDP = "DDP" +ACCELERATE_STRATEGY_FSDP = "FSDP" +ACCELERATE_STRATEGY_DEEPSPEED = "DEEPSPEED" + + +class LoadConfig(BaseModel): + trust_remote_code: bool = False + # set Huggingface token to access dataset/model + token: Optional[str] = None + + +class LoraConfig(BaseModel): + task_type: str = "CAUSAL_LM" + r: int = 8 + lora_alpha: int = 16 + lora_dropout: float = 0.1 + target_modules: Optional[List[str]] = None + + +class SQFTLoRAConfig(LoraConfig): + neural_lora_search: bool = False + target_module_groups: Optional[List[List[str]]] = None + search_space: Optional[List[str]] = None + sparse_adapter: bool = False + nncf_config: Optional[str] = None + + @root_validator(pre=True) + def set_target_modules(cls, values): + target_module_groups = values.get('target_module_groups') + if target_module_groups is not None: + values['target_modules'] = [item for sublist in target_module_groups for item in sublist] + search_space = values.get('search_space') + if search_space is not None: + assert len(search_space) == len(target_module_groups) + return values + + +class GeneralConfig(BaseModel): + base_model: str = None + tokenizer_name: Optional[str] = None + gaudi_config_name: Optional[str] = None + gpt_base_model: bool = False + output_dir: str = "./tmp" + report_to: str = "none" + resume_from_checkpoint: Optional[str] = None + save_strategy: str = "no" + config: LoadConfig = LoadConfig() + lora_config: Optional[Union[LoraConfig, SQFTLoRAConfig]] = LoraConfig() + enable_gradient_checkpointing: bool = False + task: str = "instruction_tuning" + + @validator("report_to") + def check_report_to(cls, v: str): + assert v in ["none", "tensorboard"] + return v + + @validator("task") + def check_task(cls, v: str): + assert v in ["instruction_tuning"] + return v + + +class DatasetConfig(BaseModel): + train_file: str = None + validation_file: Optional[str] = None + validation_split_percentage: int = 5 + max_length: int = 512 + group: bool = True + block_size: int = 512 + shuffle: bool = False + max_source_length: int = 384 + padding_side: str = "right" + truncation_side: str = "right" + max_seq_length: int = 512 + truncation: bool = True + padding: Union[bool, str] = True + mask_input: bool = True + mask_response: bool = True + data_preprocess_type: str = "neural_chat" + max_train_samples: int = 0 + max_eval_samples: int = 0 + train_group_size: int = 8 + query_max_len: int = Field( + default=128, + description=( + "The maximum total input sequence length after tokenization for passage. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ), + ) + passage_max_len: int = Field( + default=128, + description=( + "The maximum total input sequence length after tokenization for passage. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ), + ) + query_instruction_for_retrieval: Optional[str] = Field(default=None, description="instruction for query") + passage_instruction_for_retrieval: Optional[str] = Field(default=None, description="instruction for passage") + + +class RayResourceConfig(BaseModel): + CPU: int = 32 + GPU: int = 0 + HPU: int = 0 + + +class TrainingConfig(BaseModel): + optimizer: str = "adamw_torch" + batch_size: int = 2 + epochs: int = 1 + max_train_steps: Optional[int] = None + learning_rate: float = 5.0e-5 + lr_scheduler: str = "linear" + weight_decay: float = 0.0 + device: str = DEVICE_CPU + hpu_execution_mode: str = "lazy" + num_training_workers: int = 1 + resources_per_worker: RayResourceConfig = RayResourceConfig() + accelerate_mode: str = ACCELERATE_STRATEGY_DDP + mixed_precision: str = PRECISION_NO + gradient_accumulation_steps: int = 1 + logging_steps: int = 10 + deepspeed_config_file: str = "" + + @validator("device") + def check_device(cls, v: str): + # will convert to lower case + if v: + assert v.lower() in [DEVICE_CPU, DEVICE_GPU, DEVICE_HPU, DEVICE_CUDA] + return v.lower() + + @validator("hpu_execution_mode") + def check_hpu_execution_mode(cls, v: str): + if v: + assert v in ["lazy", "eager", "eager.compile"] + return v + + @validator("accelerate_mode") + def check_accelerate_mode(cls, v: str): + if v: + assert v in [ + ACCELERATE_STRATEGY_DDP, + ACCELERATE_STRATEGY_FSDP, + ACCELERATE_STRATEGY_DEEPSPEED, + ] + return v + + @validator("mixed_precision") + def check_mixed_precision(cls, v: str): + if v: + assert v in [PRECISION_BF16, PRECISION_FP16, PRECISION_NO] + return v + + @validator("logging_steps") + def check_logging_steps(cls, v: int): + assert v > 0 + return v + + # @model_validator(mode='after') + # def check_device_and_accelerate_mode(self) -> "Training": + # dev = self.device + # res = self.resources_per_worker + # mode = self.accelerate_mode + # if dev == "CPU": + # if res.GPU is not None and res.GPU > 0: + # raise ValueError("Please not specified GPU resource when use CPU only in Ray.") + # if mode != "CPU_DDP": + # raise ValueError("Please specified CPU related accelerate mode when use CPU only in Ray.") + # elif dev == "GPU": + # if res.GPU is None or res.GPU == 0: + # raise ValueError("Please specified GPU resource when use GPU to fine tune in Ray.") + # if mode not in ["GPU_DDP", "GPU_FSDP"]: + # raise ValueError("Please speicifed GPU related accelerate mode when use GPU to fine tune in Ray.") + + # return self + + +class FinetuneConfig(BaseModel): + General: GeneralConfig = GeneralConfig() + Dataset: DatasetConfig = DatasetConfig() + Training: TrainingConfig = TrainingConfig() + + +class FineTuningParams(FineTuningJobsRequest): + # priority use FineTuningJobsRequest params + General: GeneralConfig = GeneralConfig() + Dataset: DatasetConfig = DatasetConfig() + Training: TrainingConfig = TrainingConfig() + +class ExtractAdapterParams(FineTuningJobIDRequest): + sub_adapter_version: str = "heuristic" + custom_config: Optional[List[int]] = None + +class MergeAdapterParams(FineTuningJobIDRequest): + adapter_version: Optional[str] = None diff --git a/comps/finetuning_sqft/finetuning_sqft_service.py b/comps/finetuning_sqft/finetuning_sqft_service.py new file mode 100644 index 0000000000..bc11a6cd23 --- /dev/null +++ b/comps/finetuning_sqft/finetuning_sqft_service.py @@ -0,0 +1,76 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +from fastapi import BackgroundTasks, Depends + +from comps import opea_microservices, register_microservice +from comps.cores.proto.api_protocol import FineTuningJobIDRequest, UploadFileRequest +from comps.finetuning_sqft.finetune_sqft_config import FineTuningParams, ExtractAdapterParams, MergeAdapterParams +from comps.finetuning_sqft.handlers import ( + handle_cancel_finetuning_job, + handle_create_finetuning_jobs, + handle_extract_sub_adapter, + handle_list_finetuning_checkpoints, + handle_list_finetuning_jobs, + handle_merge_adapter, + handle_retrieve_finetuning_job, + handle_upload_training_files, + upload_file, +) + + +@register_microservice(name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs", host="0.0.0.0", port=8015) +def create_finetuning_jobs(request: FineTuningParams, background_tasks: BackgroundTasks): + return handle_create_finetuning_jobs(request, background_tasks) + +@register_microservice( + name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs", host="0.0.0.0", port=8015, methods=["GET"] +) +def list_finetuning_jobs(): + return handle_list_finetuning_jobs() + + +@register_microservice( + name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs/retrieve", host="0.0.0.0", port=8015 +) +def retrieve_finetuning_job(request: FineTuningJobIDRequest): + job = handle_retrieve_finetuning_job(request) + return job + + +@register_microservice( + name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs/cancel", host="0.0.0.0", port=8015 +) +def cancel_finetuning_job(request: FineTuningJobIDRequest): + job = handle_cancel_finetuning_job(request) + return job + + +@register_microservice( + name="opea_service@finetuning", + endpoint="/v1/files", + host="0.0.0.0", + port=8015, +) +async def upload_training_files(request: UploadFileRequest = Depends(upload_file)): + uploadFileInfo = await handle_upload_training_files(request) + return uploadFileInfo + + +@register_microservice( + name="opea_service@finetuning", endpoint="/v1/finetune/list_checkpoints", host="0.0.0.0", port=8015 +) +def list_checkpoints(request: FineTuningJobIDRequest): + checkpoints = handle_list_finetuning_checkpoints(request) + return checkpoints + +@register_microservice(name="opea_service@finetuning", endpoint="/v1/finetune/extract_adapter", host="0.0.0.0", port=8015) +def extract_sub_adapter(request: ExtractAdapterParams): + return handle_extract_sub_adapter(request) + +@register_microservice(name="opea_service@finetuning", endpoint="/v1/finetune/merge_adapter", host="0.0.0.0", port=8015) +def merge_adapter(request: MergeAdapterParams): + return handle_merge_adapter(request) + + +if __name__ == "__main__": + opea_microservices["opea_service@finetuning"].start() diff --git a/comps/finetuning_sqft/handlers.py b/comps/finetuning_sqft/handlers.py new file mode 100644 index 0000000000..03e5745981 --- /dev/null +++ b/comps/finetuning_sqft/handlers.py @@ -0,0 +1,338 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import random +import re +import time +import urllib.parse +import uuid +from pathlib import Path +from typing import Dict + +from fastapi import BackgroundTasks, File, Form, HTTPException, UploadFile +from pydantic_yaml import parse_yaml_file_as, to_yaml_file +from ray.job_submission import JobSubmissionClient + +from comps import CustomLogger +from comps.cores.proto.api_protocol import ( + FileObject, + FineTuningJob, + FineTuningJobCheckpoint, + FineTuningJobIDRequest, + FineTuningJobList, + UploadFileRequest, +) +from comps.finetuning_sqft.finetune_sqft_config import ( + ExtractAdapterParams, + FinetuneConfig, + FineTuningParams, + MergeAdapterParams, +) + +logger = CustomLogger("finetuning_handlers") + +DATASET_BASE_PATH = "datasets" +JOBS_PATH = "jobs" +OUTPUT_DIR = "output" + +if not os.path.exists(DATASET_BASE_PATH): + os.mkdir(DATASET_BASE_PATH) +if not os.path.exists(JOBS_PATH): + os.mkdir(JOBS_PATH) +if not os.path.exists(OUTPUT_DIR): + os.mkdir(OUTPUT_DIR) + +FineTuningJobID = str +CheckpointID = str +CheckpointPath = str + +CHECK_JOB_STATUS_INTERVAL = 5 # Check every 5 secs + +global ray_client +ray_client: JobSubmissionClient = None + +running_finetuning_jobs: Dict[FineTuningJobID, FineTuningJob] = {} +finetuning_job_to_ray_job: Dict[FineTuningJobID, str] = {} +checkpoint_id_to_checkpoint_path: Dict[CheckpointID, CheckpointPath] = {} + + +# Add a background task to periodicly update job status +def update_job_status(job_id: FineTuningJobID): + while True: + job_status = ray_client.get_job_status(finetuning_job_to_ray_job[job_id]) + status = str(job_status).lower() + # Ray status "stopped" is OpenAI status "cancelled" + status = "cancelled" if status == "stopped" else status + logger.info(f"Status of job {job_id} is '{status}'") + running_finetuning_jobs[job_id].status = status + if status == "succeeded" or status == "cancelled" or status == "failed": + break + time.sleep(CHECK_JOB_STATUS_INTERVAL) + + +def handle_create_finetuning_jobs(request: FineTuningParams, background_tasks: BackgroundTasks): + base_model = request.model + train_file = request.training_file + train_file_path = os.path.join(DATASET_BASE_PATH, train_file) + + if not os.path.exists(train_file_path): + raise HTTPException(status_code=404, detail=f"Training file '{train_file}' not found!") + + finetune_config = FinetuneConfig(General=request.General, Dataset=request.Dataset, Training=request.Training) + finetune_config.General.base_model = base_model + finetune_config.Dataset.train_file = train_file_path + if request.hyperparameters is not None: + if request.hyperparameters.epochs != "auto": + finetune_config.Training.epochs = request.hyperparameters.epochs + + if request.hyperparameters.batch_size != "auto": + finetune_config.Training.batch_size = request.hyperparameters.batch_size + + if request.hyperparameters.learning_rate_multiplier != "auto": + finetune_config.Training.learning_rate = request.hyperparameters.learning_rate_multiplier + + if os.getenv("HF_TOKEN", None): + finetune_config.General.config.token = os.getenv("HF_TOKEN", None) + + job = FineTuningJob( + id=f"ft-job-{uuid.uuid4()}", + model=base_model, + created_at=int(time.time()), + training_file=train_file, + hyperparameters={ + "n_epochs": finetune_config.Training.epochs, + "batch_size": finetune_config.Training.batch_size, + "learning_rate_multiplier": finetune_config.Training.learning_rate, + }, + status="running", + seed=random.randint(0, 1000) if request.seed is None else request.seed, + ) + finetune_config.General.output_dir = os.path.join(OUTPUT_DIR, job.id) + if os.getenv("DEVICE", ""): + logger.info(f"specific device: {os.getenv('DEVICE')}") + + finetune_config.Training.device = os.getenv("DEVICE") + if finetune_config.Training.device == "hpu": + if finetune_config.Training.resources_per_worker.HPU == 0: + # set 1 + finetune_config.Training.resources_per_worker.HPU = 1 + + finetune_config_file = f"{JOBS_PATH}/{job.id}.yaml" + to_yaml_file(finetune_config_file, finetune_config) + + global ray_client + ray_client = JobSubmissionClient() if ray_client is None else ray_client + + ray_job_id = ray_client.submit_job( + # Entrypoint shell command to execute + entrypoint=f"python finetune_runner.py --config_file {finetune_config_file}", + ) + + logger.info(f"Submitted Ray job: {ray_job_id} ...") + + running_finetuning_jobs[job.id] = job + finetuning_job_to_ray_job[job.id] = ray_job_id + + background_tasks.add_task(update_job_status, job.id) + + return job + + +def handle_extract_sub_adapter(request: ExtractAdapterParams): + fine_tuning_job_id = request.fine_tuning_job_id + finetune_config_file = f"{JOBS_PATH}/{fine_tuning_job_id}.yaml" + finetune_config = parse_yaml_file_as(FinetuneConfig, finetune_config_file) + + job = running_finetuning_jobs.get(fine_tuning_job_id) + if job is None: + raise HTTPException(status_code=404, detail=f"Fine-tuning job '{fine_tuning_job_id}' not found!") + finetuned_model_path = os.path.join(OUTPUT_DIR, fine_tuning_job_id) + assert finetuned_model_path == finetune_config.General.output_dir + if not os.path.exists(finetuned_model_path): + raise HTTPException( + status_code=404, + detail=f"The fine-tuned model saved by the fine-tuning job '{fine_tuning_job_id}' was not found!" + ) + if job.status != "succeeded": + raise HTTPException(status_code=404, detail=f"Fine-tuning job '{fine_tuning_job_id}' has not completed!") + + if finetune_config.General.lora_config is None: + raise HTTPException( + status_code=404, + detail=f"The fine-tuning job '{fine_tuning_job_id}' does not enable LoRA adapter fine-tuning!" + ) + if not finetune_config.General.lora_config.neural_lora_search: + raise HTTPException( + status_code=404, + detail=f"The fine-tuning job '{fine_tuning_job_id}' did not enable NLS algorithm, " + f"there is no need to extract sub-adapters!" + ) + nncf_config_path = os.path.join(finetune_config.General.output_dir, "nncf_config.json") + if not os.path.exists(nncf_config_path): + raise HTTPException( + status_code=404, + detail=f"The NNCF config file does not exist in the fine-tuning job '{fine_tuning_job_id}!" + ) + + from comps.finetuning_sqft.utils.extract_sub_adapter import main as extract_sub_adapter_main + extract_sub_adapter_main( + adapter_model_path=finetuned_model_path, + nncf_config=nncf_config_path, + sub_adapter_version=request.sub_adapter_version, + custom_config=request.custom_config + ) + + return fine_tuning_job_id + + +def handle_merge_adapter(request: MergeAdapterParams): + fine_tuning_job_id = request.fine_tuning_job_id + finetune_config_file = f"{JOBS_PATH}/{fine_tuning_job_id}.yaml" + finetune_config = parse_yaml_file_as(FinetuneConfig, finetune_config_file) + + job = running_finetuning_jobs.get(fine_tuning_job_id) + if job is None: + raise HTTPException(status_code=404, detail=f"Fine-tuning job '{fine_tuning_job_id}' not found!") + finetuned_model_path = os.path.join(OUTPUT_DIR, fine_tuning_job_id) + assert finetuned_model_path == finetune_config.General.output_dir + if not os.path.exists(finetuned_model_path): + raise HTTPException( + status_code=404, + detail=f"The fine-tuned model saved by the fine-tuning job '{fine_tuning_job_id}' was not found!" + ) + if job.status != "succeeded": + raise HTTPException(status_code=404, detail=f"Fine-tuning job '{fine_tuning_job_id}' has not completed!") + + if finetune_config.General.lora_config is None: + raise HTTPException( + status_code=404, + detail=f"The fine-tuning job '{fine_tuning_job_id}' does not enable LoRA adapter fine-tuning!" + ) + + adapter_path = finetuned_model_path + adapter_version = request.adapter_version + if adapter_version is not None: + adapter_path = os.path.join(adapter_path, adapter_version) + if not os.path.exists(adapter_path): + raise HTTPException( + status_code=404, + detail=f"The fine-tuning job '{fine_tuning_job_id}' does not have a '{adapter_version}' adapter!" + ) + + from comps.finetuning_sqft.utils.merge import main as merge_adapter_main + merge_adapter_main( + base_model_path=finetune_config.General.base_model, + adapter_model_path=adapter_path, + output_path=os.path.join(adapter_path, "merged_model") + ) + + return fine_tuning_job_id + + +def handle_list_finetuning_jobs(): + finetuning_jobs_list = FineTuningJobList(data=list(running_finetuning_jobs.values()), has_more=False) + + return finetuning_jobs_list + + +def handle_retrieve_finetuning_job(request: FineTuningJobIDRequest): + fine_tuning_job_id = request.fine_tuning_job_id + + job = running_finetuning_jobs.get(fine_tuning_job_id) + if job is None: + raise HTTPException(status_code=404, detail=f"Fine-tuning job '{fine_tuning_job_id}' not found!") + return job + + +def handle_cancel_finetuning_job(request: FineTuningJobIDRequest): + fine_tuning_job_id = request.fine_tuning_job_id + + ray_job_id = finetuning_job_to_ray_job.get(fine_tuning_job_id) + if ray_job_id is None: + raise HTTPException(status_code=404, detail=f"Fine-tuning job '{fine_tuning_job_id}' not found!") + + global ray_client + ray_client = JobSubmissionClient() if ray_client is None else ray_client + ray_client.stop_job(ray_job_id) + + job = running_finetuning_jobs.get(fine_tuning_job_id) + job.status = "cancelled" + return job + + +async def save_content_to_local_disk(save_path: str, content): + save_path = Path(save_path) + try: + if isinstance(content, str): + with open(save_path, "w", encoding="utf-8") as file: + file.write(content) + else: + with save_path.open("wb") as fout: + content = await content.read() + fout.write(content) + except Exception as e: + logger.info(f"Write file failed. Exception: {e}") + raise Exception(status_code=500, detail=f"Write file {save_path} failed. Exception: {e}") + + +def handle_list_finetuning_checkpoints(request: FineTuningJobIDRequest): + fine_tuning_job_id = request.fine_tuning_job_id + + job = running_finetuning_jobs.get(fine_tuning_job_id) + if job is None: + raise HTTPException(status_code=404, detail=f"Fine-tuning job '{fine_tuning_job_id}' not found!") + output_dir = os.path.join(OUTPUT_DIR, job.id) + checkpoints = [] + if os.path.exists(output_dir): + # Iterate over the contents of the directory and add an entry for each + files = os.listdir(output_dir) + for file in files: # Loop over directory contents + file_path = os.path.join(output_dir, file) + if os.path.isdir(file_path) and file.startswith("checkpoint"): + steps = re.findall("\d+", file)[0] + checkpointsResponse = FineTuningJobCheckpoint( + id=f"ftckpt-{uuid.uuid4()}", # Generate a unique ID + created_at=int(time.time()), # Use the current timestamp + fine_tuned_model_checkpoint=file_path, # Directory path itself + fine_tuning_job_id=fine_tuning_job_id, + object="fine_tuning.job.checkpoint", + step_number=steps, + ) + checkpoints.append(checkpointsResponse) + if job.status == "succeeded": + checkpointsResponse = FineTuningJobCheckpoint( + id=f"ftckpt-{uuid.uuid4()}", # Generate a unique ID + created_at=int(time.time()), # Use the current timestamp + fine_tuned_model_checkpoint=output_dir, # Directory path itself + fine_tuning_job_id=fine_tuning_job_id, + object="fine_tuning.job.checkpoint", + ) + checkpoints.append(checkpointsResponse) + + return checkpoints + + +async def upload_file(purpose: str = Form(...), file: UploadFile = File(...)): + return UploadFileRequest(purpose=purpose, file=file) + + +async def handle_upload_training_files(request: UploadFileRequest): + file = request.file + if file is None: + raise HTTPException(status_code=404, detail="upload file failed!") + filename = urllib.parse.quote(file.filename, safe="") + save_path = os.path.join(DATASET_BASE_PATH, filename) + await save_content_to_local_disk(save_path, file) + + fileBytes = os.path.getsize(save_path) + fileInfo = FileObject( + id=f"file-{uuid.uuid4()}", + object="file", + bytes=fileBytes, + created_at=int(time.time()), + filename=filename, + purpose="fine-tune", + ) + + return fileInfo diff --git a/comps/finetuning_sqft/launch.sh b/comps/finetuning_sqft/launch.sh new file mode 100644 index 0000000000..034c82f3d2 --- /dev/null +++ b/comps/finetuning_sqft/launch.sh @@ -0,0 +1,12 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +if [[ -n "$RAY_PORT" ]];then + ray start --head --port $RAY_PORT --dashboard-host=0.0.0.0 +else + ray start --head --dashboard-host=0.0.0.0 + export RAY_PORT=8265 +fi + +export RAY_ADDRESS=http://localhost:$RAY_PORT +python finetuning_sqft_service.py diff --git a/comps/finetuning_sqft/llm_on_ray/common/__init__.py b/comps/finetuning_sqft/llm_on_ray/common/__init__.py new file mode 100644 index 0000000000..954b7baa4b --- /dev/null +++ b/comps/finetuning_sqft/llm_on_ray/common/__init__.py @@ -0,0 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright 2023 The LLM-on-Ray Authors. + +from .torch_config import TorchConfig diff --git a/comps/finetuning_sqft/llm_on_ray/common/common.py b/comps/finetuning_sqft/llm_on_ray/common/common.py new file mode 100644 index 0000000000..ac01ae12e1 --- /dev/null +++ b/comps/finetuning_sqft/llm_on_ray/common/common.py @@ -0,0 +1,29 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright 2023 The LLM-on-Ray Authors. + +import glob +import importlib +import os + +from comps import CustomLogger + +logger = CustomLogger("llm_on_ray") + + +def import_all_modules(basedir, prefix=None): + all_py_files = glob.glob(basedir + "/*.py") + modules = [os.path.basename(f) for f in all_py_files] + + for module in modules: + if not module.startswith("_"): + module = module.rstrip(".py") + if prefix is None: + module_name = module + else: + module_name = f"{prefix}.{module}" + try: + importlib.import_module(module_name) + except Exception: + logger.warning(f"import {module_name} error", exc_info=True) diff --git a/comps/finetuning_sqft/llm_on_ray/common/torch_config.py b/comps/finetuning_sqft/llm_on_ray/common/torch_config.py new file mode 100644 index 0000000000..9e3f48a7c3 --- /dev/null +++ b/comps/finetuning_sqft/llm_on_ray/common/torch_config.py @@ -0,0 +1,72 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright 2023 The LLM-on-Ray Authors. + +import os +import sys +from dataclasses import dataclass +from typing import Optional + +from ray.train._internal.worker_group import WorkerGroup +from ray.train.torch.config import TorchConfig as RayTorchConfig +from ray.train.torch.config import _TorchBackend + +# The package importlib_metadata is in a different place, depending on the Python version. +if sys.version_info < (3, 8): + import importlib_metadata +else: + import importlib.metadata as importlib_metadata + + +@dataclass +class TorchConfig(RayTorchConfig): + device: Optional[str] = None + + @property + def backend_cls(self): + EnableCCLBackend.device = self.device + return EnableCCLBackend + + +def xpu_libs_import(): + """Try to import IPEX and oneCCL.""" + try: + import intel_extension_for_pytorch + except ImportError: + raise ImportError("Please install intel_extension_for_pytorch") + try: + ccl_version = importlib_metadata.version("oneccl_bind_pt") + if ccl_version >= "1.12": + import oneccl_bindings_for_pytorch + else: + import torch_ccl + except ImportError as ccl_not_exist: + raise ImportError("Please install torch-ccl") from ccl_not_exist + + +def hpu_libs_import(): + """Try to import habana frameworkfs for torch.""" + try: + import habana_frameworks.torch # noqa: F401 + except ImportError as habana_not_exist: + raise ImportError("Please install habana_frameworks") from habana_not_exist + + +def _set_torch_distributed_env_vars(device): + if device is not None: + os.environ["ACCELERATE_TORCH_DEVICE"] = device + + +class EnableCCLBackend(_TorchBackend): + device: Optional[str] = None + + def on_start(self, worker_group: WorkerGroup, backend_config: RayTorchConfig): + libs_import = hpu_libs_import if self.device is not None and self.device.startswith("hpu") else xpu_libs_import + for i in range(len(worker_group)): + worker_group.execute_single_async(i, libs_import) + super().on_start(worker_group, backend_config) + + def on_training_start(self, worker_group: WorkerGroup, backend_config: RayTorchConfig): + super().on_training_start(worker_group, backend_config) + worker_group.execute(_set_torch_distributed_env_vars, self.device) diff --git a/comps/finetuning_sqft/llm_on_ray/finetune/__init__.py b/comps/finetuning_sqft/llm_on_ray/finetune/__init__.py new file mode 100644 index 0000000000..0262e494a9 --- /dev/null +++ b/comps/finetuning_sqft/llm_on_ray/finetune/__init__.py @@ -0,0 +1,4 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright 2023 The LLM-on-Ray Authors. diff --git a/comps/finetuning_sqft/llm_on_ray/finetune/data_process.py b/comps/finetuning_sqft/llm_on_ray/finetune/data_process.py new file mode 100644 index 0000000000..07b12d71e1 --- /dev/null +++ b/comps/finetuning_sqft/llm_on_ray/finetune/data_process.py @@ -0,0 +1,352 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright 2023 The LLM-on-Ray Authors. + +import copy +import math +import random +import re +from dataclasses import dataclass +from itertools import chain +from typing import Dict, List, Tuple + +import torch +from torch.utils.data import Dataset +from transformers import BatchEncoding, DataCollatorWithPadding + +IGNORE_INDEX = -100 + + +class InstructionDataProcessor: + # We used the following prompts for fine-tuning the Alpaca model. You can find reference doc form this URL(https://github.com/tatsu-lab/stanford_alpaca/blob/main/README.md#data-release) + def __init__(self, config, tokenizer): + self.tokenizer = tokenizer + self.end = tokenizer.eos_token + self.intro = ( + "Below is an instruction that describes a task. Write a response that appropriately completes the request." + ) + self.instruction = "### Instruction:\n" + self.input = "### Input:\n" + self.response = "### Response:\n" + self.padding_side = config["Dataset"].get("padding_side", "right") + self.truncation_side = config["Dataset"].get("truncation_side", "right") + self.max_length = self.max_seq_length = config["Dataset"].get("max_length", 512) + self.max_source_length = config["Dataset"].get("max_source_length", 384) + self.truncation = config["Dataset"].get("truncation", True) + self.padding = config["Dataset"].get("padding", True) + self.mask_input = config["Dataset"].get("mask_input", True) + self.mask_response = config["Dataset"].get("mask_response", True) + + def make_prompt(self, examples): + prompts = {} + prompts["prompt_sources"] = [] + prompts["prompt_targets"] = [] + for rec in examples: + instruction = rec["instruction"] + response = rec["input"] + context = rec.get("output") + if not instruction: + raise ValueError(f"Expected an instruction in: {rec}") + # if not response: + # raise ValueError(f"Expected a response in: {rec}") + if context: + prompt = ( + self.intro + + self.end + + "\n" + + self.instruction + + instruction + + self.input + + context + + self.end + + "\n" + + self.response + ) + prompts["prompt_sources"].append(prompt) + else: + prompt = self.intro + self.end + "\n" + self.instruction + instruction + self.end + "\n" + self.response + prompts["prompt_sources"].append(prompt) + prompt_response = response + self.end + prompts["prompt_targets"].append(prompt_response) + return prompts + + def __truncate_sequences(self, sequences, max_length): + """ + Copied from https://github.com/intel/intel-extension-for-transformers/blob/ae54f698b73a66e5729427cb19f69c33e1a5c34d/intel_extension_for_transformers/transformers/llm/finetuning/data_utils.py#L40 + """ + words_to_cut = sum(list(map(len, sequences))) - max_length + if words_to_cut <= 0: + return sequences + + while words_to_cut > 0 and len(sequences) > 0: + words_to_cut -= len(sequences[0]) + sequences = sequences[1:] + return sequences + + def tokenize_by_neural_chat(self, examples): + """ + Copied from https://github.com/intel/intel-extension-for-transformers/blob/ae54f698b73a66e5729427cb19f69c33e1a5c34d/intel_extension_for_transformers/transformers/llm/finetuning/data_utils.py#L225 + The only differences are: + - using our own prompt style + - add left or right padding and truncation + - add mask_input and mask_response + """ + keys = list(examples.data.keys()) + if len(keys) != 2: + raise ValueError("Unsupported dataset format") + assistant_tokens = self.tokenizer.tokenize(self.response) + header = self.intro + self.end + "\n" + + examples["input_ids"] = [] + examples["labels"] = [] + examples["attention_mask"] = [] + for instruction, response in zip(examples[keys[0]], examples[keys[1]]): + convs = re.findall( + r"{0}.*?{2}|{1}.*?{2}".format(self.instruction, self.response, self.end), + instruction, + re.DOTALL, + ) + convs_tokens = [self.tokenizer.tokenize(conv) + self.tokenizer.tokenize("\n") for conv in convs] + header_tokens = self.tokenizer.tokenize(header) + self.tokenizer.tokenize("\n") + max_input = self.max_source_length - len(header_tokens) - len(assistant_tokens) + truncated_convs = self.__truncate_sequences(convs_tokens, max_input) + if len(truncated_convs) == 0: + truncated_convs = [convs_tokens[-1][: max_input - 3] + convs_tokens[-1][-3:]] + + prompt_tokens = [header_tokens] + truncated_convs + [assistant_tokens] + prompt_ids = [self.tokenizer.convert_tokens_to_ids(prompt_token) for prompt_token in prompt_tokens] + prompt_ids = list(chain(*prompt_ids)) + + resp_ids = self.tokenizer.convert_tokens_to_ids(self.tokenizer.tokenize(response.strip())) + # keep last and eos_id + max_resp = self.max_seq_length - len(prompt_ids) - 1 + + # truncating response + if len(resp_ids) > max_resp: + if self.truncation_side == "right": + resp_ids = resp_ids[: max_resp - 1] + resp_ids[-1:] + else: + resp_ids = resp_ids[-max_resp:] + + # masking + input_ids = prompt_ids + resp_ids + [self.tokenizer.eos_token_id] + if self.mask_input: + labels = [IGNORE_INDEX] * len(prompt_ids) + resp_ids + [self.tokenizer.eos_token_id] + elif self.mask_response: + labels = prompt_ids + [IGNORE_INDEX] * len(resp_ids) + [self.tokenizer.eos_token_id] + else: + labels = input_ids + + # padding + input_len = len(input_ids) + pad_len = self.max_seq_length - input_len + if self.padding_side == "right": + input_ids = input_ids + [self.tokenizer.eos_token_id] * pad_len + labels = labels + [IGNORE_INDEX] * pad_len + attention_mask = [1] * input_len + [0] * pad_len + else: + input_ids = [self.tokenizer.eos_token_id] * pad_len + input_ids + labels = [IGNORE_INDEX] * pad_len + labels + attention_mask = [0] * pad_len + [1] * input_len + + assert len(input_ids) == self.max_seq_length + assert len(prompt_ids) <= self.max_source_length + assert len(labels) == len(input_ids) == len(attention_mask) + + examples["input_ids"].append(torch.tensor(input_ids)) + examples["labels"].append(labels) + examples["attention_mask"].append(attention_mask) + + return examples + + def tokenize(self, examples): + keys = list(examples.data.keys()) + if len(keys) != 2: + raise ValueError("Unsupported dataset format") + + examples["input_ids"] = [] + examples["labels"] = [] + examples["attention_mask"] = [] + for s, t in zip(examples[keys[0]], examples[keys[1]]): + results = self.tokenizer( + s + t, + padding=self.padding, + truncation=self.truncation, + return_tensors=None, + max_length=self.max_length, + ) + + input_ids = results["input_ids"] + input_len = len(input_ids) + labels = copy.deepcopy(input_ids) + if self.mask_input or self.mask_response: + sources_tokenized = self.tokenizer( + s, + padding=False, + truncation=True, + return_tensors=None, + max_length=self.max_length, + ) + input_id_len = len(sources_tokenized["input_ids"]) + # mask input + if self.mask_input: + labels[:input_id_len] = [IGNORE_INDEX] * input_id_len + # mask response + if self.mask_response: + labels[input_id_len:input_len] = [IGNORE_INDEX] * (input_len - input_id_len) + + examples["input_ids"].append(results["input_ids"]) + examples["labels"].append(labels) + examples["attention_mask"].append(results["attention_mask"]) + return examples + + +class PretrainingDataProcessor: + def __init__(self, config, tokenizer): + self.tokenizer = tokenizer + self.max_length = self.max_seq_length = config["Dataset"].get("max_length", 512) + self.truncation = config["Dataset"].get("truncation", True) + self.padding = config["Dataset"].get("padding", True) + + def tokenize(self, examples): + keys = list(examples.data.keys()) + if len(keys) != 1 and "text" not in keys: + raise ValueError("Unsupported dataset format") + + key = keys[0] if len(keys) == 1 else "text" + examples["input_ids"] = [] + examples["labels"] = [] + examples["attention_mask"] = [] + for exp in examples[key]: + results = self.tokenizer( + exp, + padding=self.padding, + truncation=self.truncation, + return_tensors=None, + max_length=self.max_length, + ) + + input_ids = results["input_ids"] + labels = copy.deepcopy(input_ids) + examples["input_ids"].append(results["input_ids"]) + examples["labels"].append(labels) + examples["attention_mask"].append(results["attention_mask"]) + return examples + + +class TrainDatasetForCE(Dataset): + def __init__(self, dataset, args, tokenizer): + self.dataset = dataset + self.tokenizer = tokenizer + self.args = args + self.total_len = len(self.dataset) + + def create_one_example(self, qry_encoding: str, doc_encoding: str): + item = self.tokenizer.encode_plus( + qry_encoding, + doc_encoding, + truncation=True, + max_length=self.args.get("max_length", 512), + padding=False, + ) + return item + + def __len__(self): + return self.total_len + + def __getitem__(self, item) -> List[BatchEncoding]: + query = self.dataset[item]["query"] + pos = random.choice(self.dataset[item]["pos"]) + train_group_size = self.args.get("train_group_size", 8) + if len(self.dataset[item]["neg"]) < train_group_size - 1: + num = math.ceil((train_group_size - 1) / len(self.dataset[item]["neg"])) + negs = random.sample(self.dataset[item]["neg"] * num, train_group_size - 1) + else: + negs = random.sample(self.dataset[item]["neg"], train_group_size - 1) + + batch_data = [] + batch_data.append(self.create_one_example(query, pos)) + for neg in negs: + batch_data.append(self.create_one_example(query, neg)) + + return batch_data + + +@dataclass +class GroupCollator(DataCollatorWithPadding): + def __call__(self, features) -> Tuple[Dict[str, torch.Tensor], Dict[str, torch.Tensor]]: + if isinstance(features[0], list): + features = sum(features, []) + return super().__call__(features) + + +class TrainDatasetForEmbedding(Dataset): + def __init__(self, dataset, args, tokenizer): + self.dataset = dataset + self.tokenizer = tokenizer + self.args = args + self.total_len = len(self.dataset) + + def __len__(self): + return self.total_len + + def __getitem__(self, item) -> Tuple[str, List[str]]: + query = self.dataset[item]["query"] + if self.args["query_instruction_for_retrieval"] is not None: + query = self.args["query_instruction_for_retrieval"] + query + + passages = [] + + assert isinstance(self.dataset[item]["pos"], list) + pos = random.choice(self.dataset[item]["pos"]) + passages.append(pos) + + train_group_size = self.args.get("train_group_size", 8) + if len(self.dataset[item]["neg"]) < train_group_size - 1: + num = math.ceil((train_group_size - 1) / len(self.dataset[item]["neg"])) + negs = random.sample(self.dataset[item]["neg"] * num, train_group_size - 1) + else: + negs = random.sample(self.dataset[item]["neg"], train_group_size - 1) + passages.extend(negs) + + if self.args["passage_instruction_for_retrieval"] is not None: + passages = [self.args["passage_instruction_for_retrieval"] + p for p in passages] + return query, passages + + +@dataclass +class EmbedCollator(DataCollatorWithPadding): + """Wrapper that does conversion from List[Tuple[encode_qry, encode_psg]] to List[qry], List[psg] + and pass batch separately to the actual collator. + + Abstract out data detail for the model. + """ + + query_max_len: int = 32 + passage_max_len: int = 128 + + def __call__(self, features): + query = [f[0] for f in features] + passage = [f[1] for f in features] + + if isinstance(query[0], list): + query = sum(query, []) + if isinstance(passage[0], list): + passage = sum(passage, []) + + q_collated = self.tokenizer( + query, + padding=self.padding, + truncation=True, + max_length=self.query_max_len, + return_tensors="pt", + ) + d_collated = self.tokenizer( + passage, + padding=self.padding, + truncation=True, + max_length=self.passage_max_len, + return_tensors="pt", + ) + return {"query": q_collated, "passage": d_collated} diff --git a/comps/finetuning_sqft/llm_on_ray/finetune/finetune.py b/comps/finetuning_sqft/llm_on_ray/finetune/finetune.py new file mode 100644 index 0000000000..82f2e65c1d --- /dev/null +++ b/comps/finetuning_sqft/llm_on_ray/finetune/finetune.py @@ -0,0 +1,602 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright 2023 The LLM-on-Ray Authors. + +#!/usr/bin/env python + +import argparse +import os +import sys +from itertools import chain +from typing import Any, Dict, Optional + +import datasets +import ray +import torch +import transformers +from peft import LoraConfig, get_peft_model +from pydantic_yaml import parse_yaml_raw_as +from ray.air import FailureConfig, RunConfig +from ray.air.config import ScalingConfig +from ray.train.torch import TorchTrainer +from transformers import Trainer, TrainingArguments + +from comps import CustomLogger +from comps.finetuning_sqft.finetune_sqft_config import FinetuneConfig +from comps.finetuning_sqft.llm_on_ray import common +from comps.finetuning_sqft.llm_on_ray.finetune.data_process import ( + EmbedCollator, + GroupCollator, + InstructionDataProcessor, + PretrainingDataProcessor, + TrainDatasetForCE, + TrainDatasetForEmbedding, +) +from comps.finetuning_sqft.llm_on_ray.finetune.modeling import BiEncoderModel, CrossEncoder + +logger = CustomLogger("llm_on_ray/finetune") + +try: + from comps.finetuning_sqft.utils.nncf_config_process import load_nncf_config + from nncf.experimental.torch.nas.bootstrapNAS.training.model_creator_helpers import ( + create_compressed_model_from_algo_names, + ) + from nncf.torch.model_creation import create_nncf_network + is_nncf_available = True +except ImportError: + is_nncf_available = False + logger.info("NNCF is not installed. Please install it if necessary.") + + +def adapt_transformers_to_device(config: Dict): + device = config["Training"]["device"] + if device in ["hpu"]: + from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi + + # adapt transformers to gaudi + adapt_transformers_to_gaudi() + + +def set_seed(config: Dict): + seed = config["Training"].get("seed", None) + if seed is None: + return + device = config["Training"]["device"] + if device in ["cpu", "gpu"]: + from accelerate.utils import set_seed as _set_seed + + _set_seed(seed) + elif device in ["hpu"]: + from optimum.habana.utils import set_seed as _set_seed + + _set_seed(seed) + + +def convert_to_training_args(cls, config: Dict): + device = config["Training"]["device"] + accelerate_mode = config["Training"]["accelerate_mode"] + save_strategy = config["General"]["save_strategy"] + + args = { + "output_dir": config["General"]["output_dir"], + "report_to": config["General"]["report_to"], + "resume_from_checkpoint": config["General"]["resume_from_checkpoint"], + "gradient_checkpointing": config["General"]["enable_gradient_checkpointing"], + "save_strategy": save_strategy if save_strategy != "False" else "no", + "bf16": config["Training"]["mixed_precision"] == "bf16", + "num_train_epochs": config["Training"]["epochs"], + "per_device_train_batch_size": config["Training"]["batch_size"], + "per_device_eval_batch_size": config["Training"]["batch_size"], + "optim": config["Training"]["optimizer"], + "learning_rate": config["Training"]["learning_rate"], + "logging_steps": config["Training"]["logging_steps"], + "lr_scheduler_type": config["Training"]["lr_scheduler"], + "weight_decay": config["Training"]["weight_decay"], + "gradient_accumulation_steps": config["Training"]["gradient_accumulation_steps"], + "do_train": True, + } + + # set attr do_eval + vf = config["Dataset"].get("validation_file", None) + vsp = config["Dataset"].get("validation_split_percentage", 0) + if vf is not None or (vsp / 100 > 0.0 and vsp / 100 < 1.0): + args.update({"do_eval": True}) + + # set attr max_steps + if config["Training"]["max_train_steps"] is not None: + args.update({"max_steps": config["Training"]["max_train_steps"]}) + + # set attr for device cpu + if device == "cpu": + if hasattr(cls, "use_cpu"): + args.update({"use_cpu": True}) + if hasattr(cls, "no_cuda"): + args.update({"no_cuda": True}) + # To be tested: whether it works when enabling Neural Lora Search (using NNCF) + args.update({"use_ipex": True}) + + # set attr 'deepspeed' + if accelerate_mode == "DEEPSPEED": + args.update({"deepspeed": config["Training"]["deepspeed_config_file"]}) + + # set attr for FSDP + # if accelerate_mode == "FSDP": + # args.updatwe({}) + + # set attr for Intel Gaudi + if device == "hpu": + args.update({"use_habana": True}) + args.update({"use_lazy_mode": config["Training"]["hpu_execution_mode"] == "lazy"}) + args.update({"pipelining_fwd_bwd": True}) + + return cls(**args) + + +def convert_dtype(dtype: str) -> Optional[torch.dtype]: + supported_dtypes = { + "fp16": torch.float16, + "bf16": torch.bfloat16, + "no": None, + } + return supported_dtypes[dtype] + + +def load_tokenizer(config: Dict): + if config["General"].get("tokenizer_name") is not None: + tokenizer_name = config["General"].get("tokenizer_name") + else: + tokenizer_name = config["General"]["base_model"] + load_config = config["General"].get("config", {}) + # default padding side is right + padding_side = config["Dataset"].get("padding_side", "right") + # default truncation side is right + truncation_side = config["Dataset"].get("truncation_side", "right") + tokenizer = transformers.AutoTokenizer.from_pretrained( + tokenizer_name, padding_side=padding_side, truncation_side=truncation_side, **load_config + ) + return tokenizer + + +def load_dataset(config: Dict): + dataset_file = config["Dataset"].get("train_file", None) + if dataset_file is None: + return + + if os.path.exists(dataset_file): + # load from local file + def local_load(name, **load_config): + if os.path.isfile(name): + file = os.path.basename(os.path.abspath(name)) + path = os.path.dirname(os.path.abspath(name)) + dataset = datasets.load_dataset(path, data_files=file, **load_config) + else: + dataset = datasets.load_dataset(name, **load_config) + return dataset["train"] + + train_dataset = local_load(dataset_file) + validation_file = config["Dataset"].get("validation_file", None) + if validation_file is not None: + validation_dataset = local_load(validation_file) + return datasets.DatasetDict({"train": train_dataset, "validation": validation_dataset}) + + validation_split_percentage = config["Dataset"].get("validation_split_percentage", 0) + if validation_split_percentage / 100 > 0.0 and validation_split_percentage / 100 < 1.0: + dataset_dict = train_dataset.train_test_split(test_size=validation_split_percentage / 100) + dataset_dict["validation"] = dataset_dict["test"] + return dataset_dict + + return datasets.DatasetDict({"train": train_dataset}) + else: + # try to download and load dataset from huggingface.co + load_config = config["General"].get("config", {}) + use_auth_token = load_config.get("token", None) + raw_dataset = datasets.load_dataset(dataset_file, token=use_auth_token) + + validation_split_percentage = config["Dataset"].get("validation_split_percentage", 0) + if "validation" not in raw_dataset.keys() and ( + validation_split_percentage / 100 > 0.0 and validation_split_percentage / 100 < 1.0 + ): + dataset_dict = raw_dataset["train"].train_test_split(test_size=validation_split_percentage / 100) + dataset_dict["validation"] = dataset_dict["test"] + return dataset_dict + + return raw_dataset + + +def tokenize_dataset(config: Dict, tokenizer, dataset): + task = config["General"].get("task", "instruction_tuning") + if task == "instruction_tuning": + group = config["Dataset"].get("group", True) + block_size = config["Dataset"].get("block_size", 512) + tokenizer.pad_token = tokenizer.eos_token if not tokenizer.pad_token else tokenizer.pad_token + + processor = InstructionDataProcessor(config, tokenizer) + + for key in dataset: + prompts = processor.make_prompt(dataset[key]) + dataset[key] = datasets.Dataset.from_dict(prompts) + + column_names = list(dataset["train"].features) + tokenize_fn = ( + processor.tokenize_by_neural_chat + if config["Dataset"].get("data_preprocess_type", "") == "neural_chat" + else processor.tokenize + ) + + tokenized_dataset = dataset.map( + tokenize_fn, + remove_columns=column_names, + batched=True, + load_from_cache_file=False, + desc="Tokenize dataset", + ) + + if group: + + def group_texts(examples): + # Concatenate all texts. + concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()} + total_length = len(concatenated_examples[list(examples.keys())[0]]) + # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can + # customize this part to your needs. + if total_length >= block_size: + total_length = (total_length // block_size) * block_size + # Split by chunks of max_len. + result = { + k: [t[i : i + block_size] for i in range(0, total_length, block_size)] + for k, t in concatenated_examples.items() + } + return result + + tokenized_dataset = tokenized_dataset.map( + group_texts, + batched=True, + load_from_cache_file=False, + desc=f"Grouping texts in chunks of {block_size}", + ) + + return tokenized_dataset + elif task == "pretraining": + group = True + block_size = config["Dataset"].get("block_size", 512) + tokenizer.pad_token = tokenizer.eos_token if not tokenizer.pad_token else tokenizer.pad_token + + processor = PretrainingDataProcessor(config, tokenizer) + + column_names = list(dataset["train"].features) + + tokenized_dataset = dataset.map( + processor.tokenize, + remove_columns=column_names, + batched=True, + load_from_cache_file=False, + desc="Tokenize dataset", + ) + + if group: + + def group_texts(examples): + # Concatenate all texts. + concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()} + total_length = len(concatenated_examples[list(examples.keys())[0]]) + # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can + # customize this part to your needs. + if total_length >= block_size: + total_length = (total_length // block_size) * block_size + # Split by chunks of max_len. + result = { + k: [t[i : i + block_size] for i in range(0, total_length, block_size)] + for k, t in concatenated_examples.items() + } + return result + + tokenized_dataset = tokenized_dataset.map( + group_texts, + batched=True, + load_from_cache_file=False, + desc=f"Grouping texts in chunks of {block_size}", + ) + + return tokenized_dataset + elif task == "rerank": + dataset["train"] = TrainDatasetForCE(dataset["train"], config["Dataset"], tokenizer) + return dataset + elif task == "embedding": + dataset["train"] = TrainDatasetForEmbedding(dataset["train"], config["Dataset"], tokenizer) + return dataset + else: + raise NotImplementedError(f"Unsupported task {task}, only support instruction_tuning, rerank, embedding now.") + + +def prepare_data_collator(config: Dict, tokenizer): + task = config["General"].get("task", "instruction_tuning") + if task == "instruction_tuning" or task == "pretraining": + return transformers.DataCollatorForLanguageModeling( + tokenizer=tokenizer, mlm=False, return_tensors="pt", pad_to_multiple_of=8 + ) + elif task == "rerank": + return GroupCollator(tokenizer) + elif task == "embedding": + return EmbedCollator( + tokenizer=tokenizer, + padding=config["Dataset"]["padding"], + query_max_len=config["Dataset"]["query_max_len"], + passage_max_len=config["Dataset"]["passage_max_len"], + ) + else: + raise NotImplementedError(f"Unsupported task {task}, only support instruction_tuning, rerank, embedding now.") + + +def load_model(config: Dict): + model_name = config["General"]["base_model"] + model_dtype = convert_dtype(config["Training"].get("mixed_precision", "no")) + model_config = config["General"].get("config", {}) + task = config["General"].get("task", "instruction_tuning") + compression_ctrl = None + if task == "instruction_tuning" or task == "pretraining": + model = transformers.AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=model_dtype, **model_config) + lora_config = config["General"].get("lora_config", None) + if lora_config and task != "pretraining": + neural_lora_search = lora_config.pop("neural_lora_search", False) + target_module_groups = lora_config.pop("target_module_groups", None) + search_space = lora_config.pop("search_space", None) + nncf_config = lora_config.pop("nncf_config", None) + if not lora_config.get("sparse_adapter", False): + # To avoid the error in the following case: + # not using SparsePEFT and not having the peft library that supports SparsePEFT installed. + lora_config.pop("sparse_adapter", False) + peft_config = LoraConfig(**lora_config) + model = get_peft_model(model, peft_config) + + # Neural LoRA Search (NLS) + if neural_lora_search: + if not is_nncf_available: + raise ImportError("NNCF is not installed. Please install it.") + nncf_config = load_nncf_config( + config=config, + model=model, + target_module_groups=target_module_groups, + search_space=search_space, + nncf_config=nncf_config + ) + model = create_nncf_network(model, nncf_config) + compression_ctrl, model = create_compressed_model_from_algo_names( + model, nncf_config, algo_names=["progressive_shrinking"] + ) + elif task == "rerank": + model = CrossEncoder.from_pretrained( + config["Dataset"].get("train_group_size", 8), + config["Training"]["batch_size"], + model_name, + from_tf=bool(".ckpt" in model_name), + config=model_config, + ) + elif task == "embedding": + should_concat = False + if ( + config["Dataset"]["query_max_len"] == config["Dataset"]["passage_max_len"] + and config["Dataset"]["padding"] == "max_length" + ): + should_concat = True + if config["Training"]["device"] == "hpu" and not should_concat: + raise ValueError("please set query_max_len==passage_max_len and padding='max_length' for hpu.") + + if config["Training"].get("embedding_training_config", None) is not None: + model = BiEncoderModel( + model_name=model_name, should_concat=should_concat, **config["Training"]["embedding_training_config"] + ) + else: + model = BiEncoderModel(model_name=model_name, should_concat=should_concat) + else: + raise NotImplementedError(f"Unsupported task {task}, only support instruction_tuning, rerank, embedding now.") + + egc = config["General"].get("enable_gradient_checkpointing", False) + if egc: + model.enable_input_require_grads() + model.gradient_checkpointing_enable() + model.config.use_cache = False + + model.to(dtype=model_dtype, device=torch.device(config["Training"]["device"])) + + return model, compression_ctrl + +def get_trainer(config: Dict, model, tokenizer, tokenized_dataset, data_collator, compression_ctrl=None): + device = config["Training"]["device"] + if device in ["cpu", "gpu", "cuda"]: + training_args = convert_to_training_args(TrainingArguments, config) + trainer_args = { + "model": model, + "args": training_args, + "train_dataset": tokenized_dataset["train"], + "eval_dataset": tokenized_dataset["validation"] if tokenized_dataset.get("validation") is not None else None, + "tokenizer": tokenizer, + "data_collator": data_collator, + } + if compression_ctrl is not None: + trainer_args["compression_ctrl"] = compression_ctrl + + trainer = Trainer(**trainer_args) + return training_args, trainer + elif device in ["hpu"]: + assert compression_ctrl is None + from optimum.habana import GaudiConfig + from optimum.habana.transformers import GaudiTrainer, GaudiTrainingArguments + + # If gaudi_config_name is provided, load gaudi_config from huggingface model hub(https://huggingface.co/Habana), otherwise use default gaudi_config + gaudi_config_name = config["General"].get("gaudi_config_name", None) + if gaudi_config_name is not None: + gaudi_config = GaudiConfig.from_pretrained(gaudi_config_name) + else: + gaudi_config = GaudiConfig() + gaudi_config.use_fused_adam = True + gaudi_config.use_fused_clip_norm = True + + training_args = convert_to_training_args(GaudiTrainingArguments, config) + trainer = GaudiTrainer( + model=model, + args=training_args, + gaudi_config=gaudi_config, + train_dataset=tokenized_dataset["train"], + eval_dataset=tokenized_dataset["validation"] if tokenized_dataset.get("validation") is not None else None, + tokenizer=tokenizer, + data_collator=data_collator, + ) + return training_args, trainer + return None + + +def train_func(config: Dict[str, Any]): + os.chdir(config["cwd"]) + + adapt_transformers_to_device(config) + + set_seed(config) + + tokenizer = load_tokenizer(config) + + dataset = load_dataset(config) + + max_train_samples = config["Dataset"].get("max_train_samples", 0) + if 0 < max_train_samples < len(dataset["train"]): + dataset["train"] = dataset["train"].select(range(max_train_samples)) + + max_eval_samples = config["Dataset"].get("max_eval_samples", 0) + if "validation" in dataset and 0 < max_eval_samples < len(dataset["validation"]): + dataset["validation"] = dataset["validation"].select(range(max_eval_samples)) + + tokenized_dataset = tokenize_dataset(config, tokenizer, dataset) + + data_collator = prepare_data_collator(config, tokenizer) + + model, compression_ctrl = load_model(config) + + training_args, trainer = get_trainer(config, model, tokenizer, tokenized_dataset, data_collator, compression_ctrl=compression_ctrl) + + logger.info("train start") + trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint) + trainer.save_model() + logger.info("train finish") + + +def get_finetune_config(): + parser = argparse.ArgumentParser(description="Finetune a transformers model on a causal language modeling task") + parser.add_argument( + "--config_file", + type=str, + required=True, + default=None, + help="The name of the dataset to use (via the datasets library).", + ) + + # Print help if no arguments were provided + if len(sys.argv) == 1: + parser.print_help(sys.stderr) + sys.exit(1) + + args = parser.parse_args() + config_file = args.config_file + + with open(config_file) as f: + finetune_config = parse_yaml_raw_as(FinetuneConfig, f) + return finetune_config.dict() + + +def main(external_config=None): + if not external_config: + config = get_finetune_config() + else: + config = external_config + + config["cwd"] = os.getcwd() + + num_training_workers = config["Training"].get("num_training_workers") + resources_per_worker = config["Training"].get("resources_per_worker") + + if num_training_workers > 1 and config["Training"].get("accelerate_mode", None) is None: + config["Training"]["accelerate_mode"] = "DDP" # will use DDP to accelerate if no method specified + + ccl_worker_count = 1 + device = config["Training"]["device"] + if device != "cpu": + ccl_worker_count = num_training_workers + + if not ray.is_initialized(): + runtime_env = { + "env_vars": { + "OMP_NUM_THREADS": str(resources_per_worker["CPU"]), + "CCL_ZE_IPC_EXCHANGE": "sockets", + "CCL_WORKER_COUNT": str(ccl_worker_count), + "CCL_LOG_LEVEL": "info", + "FI_TCP_IFACE": "lo", + "FI_PROVIDER": "tcp", + } + } + + if config["General"]["gpt_base_model"] is True: + runtime_env["pip"] = ["transformers==4.26.0"] + + if device == "gpu": + num_cpus = resources_per_worker["CPU"] * num_training_workers + 1 # additional 1 for head worker + ray.init(num_cpus=num_cpus, runtime_env=runtime_env) + else: + ray.init(runtime_env=runtime_env) + + logger.info(f"ray available resources = {ray.available_resources()}") + + use_gpu = True if device == "gpu" else False + scaling_config = ScalingConfig( + num_workers=num_training_workers, + use_gpu=use_gpu, + resources_per_worker=resources_per_worker, + placement_strategy="SPREAD", + ) + + # if try to use Intel GPU, convert device to 'xpu' + # due to accelerate internal use 'xpu' represent Intel GPU + if device == "gpu": + from accelerate.utils import is_xpu_available + + if is_xpu_available(): + device = "xpu" + + # Jinjie: commented out the code from line 572 to 581 to temporarily disable CCL for debugging purposes. + # if config.get("torch_config", None) is None: + # backend = None + # if device == "cpu" or device == "xpu" or device == "gpu": + # backend = "ccl" + # elif device == "hpu": + # backend = "hccl" + # torch_config = common.TorchConfig(backend=backend, device=device) + # else: + # customer_torch_config = config.get("torch_config") + # torch_config = common.TorchConfig(**customer_torch_config, device=device) + + if config.get("failure_config", None) is None: + failure_config = FailureConfig() + else: + customer_failure_config = config.get("failure_config") + failure_config = FailureConfig(**customer_failure_config) + + if config.get("run_config", None) is None: + run_config = RunConfig(failure_config=failure_config) + else: + customer_run_config = config.get("run_config") + if customer_run_config.get("failure_config", None) is None: + customer_run_config["failure_config"] = failure_config + run_config = RunConfig(**customer_run_config) + + trainer = TorchTrainer( + train_func, + train_loop_config=config, + scaling_config=scaling_config, + # torch_config=torch_config, # Jinjie: check line 571. + run_config=run_config, + ) + results = trainer.fit() + if external_config is not None: + return results + + +if __name__ == "__main__": + main() diff --git a/comps/finetuning_sqft/llm_on_ray/finetune/modeling.py b/comps/finetuning_sqft/llm_on_ray/finetune/modeling.py new file mode 100644 index 0000000000..7a2884f3bc --- /dev/null +++ b/comps/finetuning_sqft/llm_on_ray/finetune/modeling.py @@ -0,0 +1,211 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from typing import Dict, Optional + +import torch +import torch.distributed as dist +from torch import nn +from transformers import AutoModel, AutoModelForSequenceClassification, PreTrainedModel +from transformers.modeling_outputs import MaskedLMOutput, SequenceClassifierOutput + +from comps import CustomLogger + +logger = CustomLogger("llm_on_ray/finetune/modeling") + + +class CrossEncoder(PreTrainedModel): + def __init__(self, hf_model: PreTrainedModel, train_group_size: int, batch_size: int): + super().__init__(hf_model.config) + self.hf_model = hf_model + self.train_group_size = train_group_size + self.batch_size = batch_size + + self.cross_entropy = nn.CrossEntropyLoss(reduction="mean") + + self.register_buffer("target_label", torch.zeros(self.batch_size, dtype=torch.long)) + + def gradient_checkpointing_enable(self, **kwargs): + self.hf_model.gradient_checkpointing_enable(**kwargs) + + def forward(self, **batch): + ranker_out: SequenceClassifierOutput = self.hf_model(**batch, return_dict=True) + logits = ranker_out.logits + + if self.training: + scores = logits.view(-1, self.train_group_size) + loss = self.cross_entropy(scores, self.target_label[: scores.shape[0]]) + + return SequenceClassifierOutput( + loss=loss, + **ranker_out, + ) + else: + return ranker_out + + @classmethod + def from_pretrained(cls, train_group_size: int, batch_size: int, *args, **kwargs): + hf_model = AutoModelForSequenceClassification.from_pretrained(*args, **kwargs) + reranker = cls(hf_model, train_group_size, batch_size) + return reranker + + def save_pretrained(self, output_dir: str, **kwargs): + state_dict = self.hf_model.state_dict() + state_dict = type(state_dict)({k: v.clone().cpu() for k, v in state_dict.items()}) + kwargs.pop("state_dict") + self.hf_model.save_pretrained(output_dir, state_dict=state_dict, **kwargs) + + +class BiEncoderModel(nn.Module): + TRANSFORMER_CLS = AutoModel + + def __init__( + self, + model_name: str = None, + should_concat: bool = False, + normalized: bool = False, + sentence_pooling_method: str = "cls", + negatives_cross_device: bool = False, + temperature: float = 1.0, + use_inbatch_neg: bool = True, + ): + super().__init__() + self.model = AutoModel.from_pretrained(model_name, add_pooling_layer=False) + self.cross_entropy = nn.CrossEntropyLoss(reduction="mean") + + self.should_concat = should_concat + self.normalized = normalized + self.sentence_pooling_method = sentence_pooling_method + self.temperature = temperature + self.use_inbatch_neg = use_inbatch_neg + self.config = self.model.config + + if not normalized: + self.temperature = 1.0 + logger.info("reset temperature = 1.0 due to using inner product to compute similarity") + if normalized: + if self.temperature > 0.5: + raise ValueError( + "Temperature should be smaller than 1.0 when use cosine similarity (i.e., normalized=True). Recommend to set it 0.01-0.1" + ) + + self.negatives_cross_device = negatives_cross_device + if self.negatives_cross_device: + if not dist.is_initialized(): + raise ValueError("Distributed training has not been initialized for representation all gather.") + # logger.info("Run in a single GPU, set negatives_cross_device=False") + # self.negatives_cross_device = False + # else: + self.process_rank = dist.get_rank() + self.world_size = dist.get_world_size() + + def gradient_checkpointing_enable(self, **kwargs): + self.model.gradient_checkpointing_enable(**kwargs) + + def sentence_embedding(self, hidden_state, mask): + if self.sentence_pooling_method == "mean": + s = torch.sum(hidden_state * mask.unsqueeze(-1).float(), dim=1) + d = mask.sum(axis=1, keepdim=True).float() + return s / d + elif self.sentence_pooling_method == "cls": + return hidden_state[:, 0] + + def encode(self, features): + if features is None: + return None + psg_out = self.model(**features, return_dict=True) + p_reps = self.sentence_embedding(psg_out.last_hidden_state, features["attention_mask"]) + if self.normalized: + p_reps = torch.nn.functional.normalize(p_reps, dim=-1) + return p_reps.contiguous() + + def encode_concat(self, query, passage): + if query is None or passage is None: + return None + + batch_size = query["input_ids"].size()[0] + + psg_out = self.model( + input_ids=torch.cat([query["input_ids"], passage["input_ids"]]), + attention_mask=torch.cat([query["attention_mask"], passage["attention_mask"]]), + return_dict=True, + ) + reps = self.sentence_embedding( + psg_out.last_hidden_state, torch.cat([query["attention_mask"], passage["attention_mask"]]) + ) + if self.normalized: + reps = torch.nn.functional.normalize(reps, dim=-1) + + q_reps = reps[:batch_size] + p_reps = reps[batch_size:] + + return q_reps.contiguous(), p_reps.contiguous() + + def compute_similarity(self, q_reps, p_reps): + if len(p_reps.size()) == 2: + return torch.matmul(q_reps, p_reps.transpose(0, 1)) + return torch.matmul(q_reps, p_reps.transpose(-2, -1)) + + def forward(self, query: Dict[str, torch.Tensor] = None, passage: Dict[str, torch.Tensor] = None): + if self.should_concat: + q_reps, p_reps = self.encode_concat(query, passage) + else: + q_reps = self.encode(query) + p_reps = self.encode(passage) + + if self.training: + if self.negatives_cross_device and self.use_inbatch_neg: + q_reps = self._dist_gather_tensor(q_reps) + p_reps = self._dist_gather_tensor(p_reps) + + group_size = p_reps.size(0) // q_reps.size(0) + if self.use_inbatch_neg: + scores = self.compute_similarity(q_reps, p_reps) / self.temperature # B B*G + scores = scores.view(q_reps.size(0), -1) + + target = torch.arange(scores.size(0), device=scores.device, dtype=torch.long) + target = target * group_size + loss = self.compute_loss(scores, target) + else: + scores = ( + self.compute_similarity( + q_reps[ + :, + None, + :, + ], + p_reps.view(q_reps.size(0), group_size, -1), + ).squeeze(1) + / self.temperature + ) # B G + + scores = scores.view(q_reps.size(0), -1) + target = torch.zeros(scores.size(0), device=scores.device, dtype=torch.long) + loss = self.compute_loss(scores, target) + + else: + scores = self.compute_similarity(q_reps, p_reps) + loss = None + + return MaskedLMOutput(loss=loss, logits=None, hidden_states=None, attentions=None) + + def compute_loss(self, scores, target): + return self.cross_entropy(scores, target) + + def _dist_gather_tensor(self, t: Optional[torch.Tensor]): + if t is None: + return None + t = t.contiguous() + + all_tensors = [torch.empty_like(t) for _ in range(self.world_size)] + dist.all_gather(all_tensors, t) + + all_tensors[self.process_rank] = t + all_tensors = torch.cat(all_tensors, dim=0) + + return all_tensors + + def save(self, output_dir: str): + state_dict = self.model.state_dict() + state_dict = type(state_dict)({k: v.clone().cpu() for k, v in state_dict.items()}) + self.model.save_pretrained(output_dir, state_dict=state_dict) diff --git a/comps/finetuning_sqft/patches/nncf-v2.12.0.patch b/comps/finetuning_sqft/patches/nncf-v2.12.0.patch new file mode 100644 index 0000000000..f4cbfe0401 --- /dev/null +++ b/comps/finetuning_sqft/patches/nncf-v2.12.0.patch @@ -0,0 +1,72 @@ +diff --git a/nncf/experimental/torch/nas/bootstrapNAS/elasticity/elasticity_builder.py b/nncf/experimental/torch/nas/bootstrapNAS/elasticity/elasticity_builder.py +index bc6464b24..ca2666626 100644 +--- a/nncf/experimental/torch/nas/bootstrapNAS/elasticity/elasticity_builder.py ++++ b/nncf/experimental/torch/nas/bootstrapNAS/elasticity/elasticity_builder.py +@@ -152,3 +152,16 @@ class ElasticityBuilder(PTCompressionAlgorithmBuilder): + + # No conflict resolving with the related config options, parameters are overridden by compression state + self._available_elasticity_dims = list(map(ElasticityDim, available_elasticity_dims_state)) ++ ++ def _are_frozen_layers_allowed(self): ++ """ ++ Check if frozen layers are allowed based on NNCF configuration. ++ If specified in NNCF configuration, frozen layers will be allowed. ++ ++ :return: A tuple where the first element is a boolean indicating if frozen layers are allowed, ++ and the second element is a string message explaining the reason. ++ """ ++ frozen_layers_allowed = self.config.get("bootstrapNAS", {}).get("training", {}).get("frozen_layers_allowed", False) ++ if frozen_layers_allowed: ++ return True, "Frozen layers are allowed (`frozen_layers_allowed` is set to True in NNCF config)" ++ return super()._are_frozen_layers_allowed() +diff --git a/nncf/experimental/torch/nas/bootstrapNAS/training/progressive_shrinking_builder.py b/nncf/experimental/torch/nas/bootstrapNAS/training/progressive_shrinking_builder.py +index 92609327f..7a0555e3e 100644 +--- a/nncf/experimental/torch/nas/bootstrapNAS/training/progressive_shrinking_builder.py ++++ b/nncf/experimental/torch/nas/bootstrapNAS/training/progressive_shrinking_builder.py +@@ -152,3 +152,16 @@ class ProgressiveShrinkingBuilder(PTCompressionAlgorithmBuilder): + self._bn_adapt_params = state_without_name[self._state_names.BN_ADAPTATION_PARAMS] + bn_adapt_algo_kwargs = get_bn_adapt_algo_kwargs(self.config, self._bn_adapt_params) + self._bn_adaptation = BatchnormAdaptationAlgorithm(**bn_adapt_algo_kwargs) if bn_adapt_algo_kwargs else None ++ ++ def _are_frozen_layers_allowed(self): ++ """ ++ Check if frozen layers are allowed based on the algorithm configuration. ++ If specified in the algorithm configuration, frozen layers will be allowed. ++ ++ :return: A tuple where the first element is a boolean indicating if frozen layers are allowed, ++ and the second element is a string message explaining the reason. ++ """ ++ frozen_layers_allowed = self._algo_config.get("frozen_layers_allowed", False) ++ if frozen_layers_allowed: ++ return True, "Frozen layers are allowed (`frozen_layers_allowed` is set to True in the algorithm config)" ++ return super()._are_frozen_layers_allowed() +diff --git a/nncf/torch/layer_utils.py b/nncf/torch/layer_utils.py +index fb7d7bed7..3b8fda98e 100644 +--- a/nncf/torch/layer_utils.py ++++ b/nncf/torch/layer_utils.py +@@ -127,6 +127,25 @@ class _NNCFModuleMixin: + results = op_results + return results + ++ def get_proxy_module(self, *args): ++ """ ++ Gets a proxy module with pre-operations applied. ++ ++ Args: ++ *args: Arguments for the pre-operations. ++ ++ Returns: ++ ProxyModule: The proxy module with pre-operations applied. ++ """ ++ proxy_module = ProxyModule(self) ++ for op in self.pre_ops.values(): ++ op_args = op(proxy_module, args) ++ if op_args is not None: ++ if not isinstance(op_args, tuple): ++ op_args = tuple([op_args]) ++ args = op_args ++ return proxy_module ++ + + class CompressionParameter(nn.Parameter): + """ diff --git a/comps/finetuning_sqft/patches/peft-v0.10.0.patch b/comps/finetuning_sqft/patches/peft-v0.10.0.patch new file mode 100644 index 0000000000..caefc3e735 --- /dev/null +++ b/comps/finetuning_sqft/patches/peft-v0.10.0.patch @@ -0,0 +1,220 @@ +diff --git a/src/peft/tuners/lora/config.py b/src/peft/tuners/lora/config.py +index cc5c60a..fa1422e 100644 +--- a/src/peft/tuners/lora/config.py ++++ b/src/peft/tuners/lora/config.py +@@ -268,6 +268,31 @@ class LoraConfig(PeftConfig): + ) + }, + ) ++ sparse_adapter: bool = field( ++ default=False, ++ metadata={ ++ "help": ( ++ "Enable 'SparsePEFT'. This strategy is designed for fine-tuning sparse models using adapters. " ++ "It sparsifies the adapter's parameter matrix (BA) such that the sparsity pattern of BA aligns " ++ "with that of the base model's weights (W). This alignment allows for the merging of the adapter " ++ "with the base model without disrupting its sparsity. It is derived from SQFT() and is used in the " ++ "pipelines SQFT + SparsePEFT and SQFT + QA-SparsePEFT." ++ ) ++ } ++ ) ++ quantization_aware: bool = field( ++ default=False, ++ metadata={ ++ "help": ( ++ "Enable quantization-aware training. This strategy is designed for fine-tuning GPTQ quantized models " ++ "using adapters. It activates the `SQFTQuantAwareLinear` from SQFT in place of `QuantLinear`, enabling " ++ "quantization-aware training for adapters. This helps optimize model accuracy and allows the adapter " ++ "to be merged with the base quantized model, improving performance and deployment efficiency during " ++ "inference. This strategy, when used in conjunction with `sparse_adapter`, corresponds to the " ++ "SQFT + QA-SparsePEFT method described in the SQFT paper." ++ ) ++ } ++ ) + + def __post_init__(self): + self.peft_type = PeftType.LORA +diff --git a/src/peft/tuners/lora/gptq.py b/src/peft/tuners/lora/gptq.py +index 333dfa6..7272824 100644 +--- a/src/peft/tuners/lora/gptq.py ++++ b/src/peft/tuners/lora/gptq.py +@@ -108,7 +108,17 @@ def dispatch_gptq( + AutoGPTQQuantLinear = get_auto_gptq_quant_linear(gptq_quantization_config) + + if AutoGPTQQuantLinear is not None and isinstance(target_base_layer, AutoGPTQQuantLinear): +- new_module = QuantLinear(target, adapter_name, **kwargs) ++ quantization_aware = kwargs.get("quantization_aware", False) ++ if quantization_aware: ++ # Attempt to import the `SQFTQuantAwareLinear` module ++ # from https://github.com/IntelLabs/Hardware-Aware-Automated-Machine-Learning/blob/main/SQFT/modules/sqft_linear.py ++ try: ++ from modules.sqft_linear import SQFTQuantAwareLinear ++ except ImportError: ++ raise ImportError("The module 'SQFTQuantAwareLinear' could not be imported.") ++ new_module = SQFTQuantAwareLinear(target, adapter_name, **kwargs) ++ else: ++ new_module = QuantLinear(target, adapter_name, **kwargs) + target.qweight = target_base_layer.qweight + + return new_module +diff --git a/src/peft/tuners/lora/layer.py b/src/peft/tuners/lora/layer.py +index 829b7bd..9d83967 100644 +--- a/src/peft/tuners/lora/layer.py ++++ b/src/peft/tuners/lora/layer.py +@@ -28,6 +28,10 @@ from peft.utils.other import transpose + + from .config import LoraConfig + ++try: ++ from nncf.torch.layers import NNCFLinear ++except ImportError: ++ NNCFLinear = None + + class LoraLayer(BaseTunerLayer): + # All names of layers that may contain (trainable) adapter weights +@@ -346,6 +350,7 @@ class Linear(nn.Module, LoraLayer): + init_lora_weights: Union[bool, str] = True, + use_rslora: bool = False, + use_dora: bool = False, ++ sparse_adapter: bool = False, # Set this to True if enabling 'SparsePEFT' for fine-tuning sparse models + **kwargs, + ) -> None: + super().__init__() +@@ -363,6 +368,7 @@ class Linear(nn.Module, LoraLayer): + use_dora=use_dora, + ) + self.is_target_conv_1d_layer = is_target_conv_1d_layer ++ self.sparse_adapter = sparse_adapter + + def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = None) -> None: + """ +@@ -471,6 +477,10 @@ class Linear(nn.Module, LoraLayer): + weight_B = weight_B.float() + + output_tensor = transpose(weight_B @ weight_A, self.fan_in_fan_out) * self.scaling[adapter] ++ if self.sparse_adapter: ++ # Apply the sparse mask to BA (`output_tensor`). ++ mask = (self.base_layer.weight != 0) ++ output_tensor = output_tensor * mask + + if cast_to_fp32: + output_tensor = output_tensor.to(dtype=dtype) +@@ -506,7 +516,26 @@ class Linear(nn.Module, LoraLayer): + x = x.to(lora_A.weight.dtype) + + if not self.use_dora[active_adapter]: +- result = result + lora_B(lora_A(dropout(x))) * scaling ++ if not self.sparse_adapter: ++ result = result + lora_B(lora_A(dropout(x))) * scaling ++ else: ++ # Since 'sparse_adapter' is enabled, we need to multiply the parameter matrices of `lora_B` and ++ # `lora_A` here instead of calling the forward methods of `lora_B` and `lora_A`. This results ++ # in the NNCF graph not recognizing lora A and lora B nodes when using NLS strategy. Therefore, ++ # we execute `lora_B(lora_A(x))` solely to include these two NNCFLinear nodes in the NNCF graph. ++ if NNCFLinear is not None and not self.training: ++ lora_B(lora_A(x)) ++ if NNCFLinear is not None and isinstance(lora_A, NNCFLinear): ++ adapter_weight = torch.matmul( ++ lora_B.get_proxy_module(x).weight, ++ lora_A.get_proxy_module(x).weight ++ ) * scaling ++ else: ++ adapter_weight = torch.matmul(lora_B.weight, lora_A.weight) * scaling ++ # Apply the sparse mask to BA (`adapter_weight`). ++ mask = (self.base_layer.weight != 0).detach() ++ adapter_weight = adapter_weight * mask ++ result = result + nn.functional.linear(dropout(x), adapter_weight) + else: + x = dropout(x) + result = result + self._apply_dora(x, lora_A, lora_B, scaling, active_adapter) +diff --git a/src/peft/tuners/lora/model.py b/src/peft/tuners/lora/model.py +index 3f381ef..3e696ca 100644 +--- a/src/peft/tuners/lora/model.py ++++ b/src/peft/tuners/lora/model.py +@@ -193,6 +193,8 @@ class LoraModel(BaseTuner): + "init_lora_weights": lora_config.init_lora_weights, + "use_rslora": lora_config.use_rslora, + "use_dora": lora_config.use_dora, ++ "quantization_aware": lora_config.quantization_aware, ++ "sparse_adapter": lora_config.sparse_adapter, + "loaded_in_8bit": getattr(self.model, "is_loaded_in_8bit", False), + "loaded_in_4bit": getattr(self.model, "is_loaded_in_4bit", False), + } +@@ -233,7 +235,10 @@ class LoraModel(BaseTuner): + child = child.base_layer + + if not hasattr(new_module, "base_layer"): +- new_module.weight = child.weight ++ if hasattr(child, "qweight"): ++ new_module.qweight = child.qweight ++ else: ++ new_module.weight = child.weight + if hasattr(child, "bias"): + new_module.bias = child.bias + +@@ -401,7 +406,11 @@ class LoraModel(BaseTuner): + Currently gptq quantization and replicated layers do not support merging. + """ + if getattr(self.model, "quantization_method", None) == "gptq": +- raise ValueError("Cannot merge LORA layers when the model is gptq quantized") ++ peft_config = self.get_peft_config_as_dict() ++ # Check if the 'quantization_aware' flag is set to False in the PEFT configuration ++ # Raise an error if the model is GPTQ quantized and 'quantization_aware' is not enabled ++ if not peft_config.get("quantization_aware", False): ++ raise ValueError("Cannot merge LORA layers when the model is gptq quantized") + if self.peft_config.get("layer_replication"): + raise ValueError("Cannot merge LORA layers when base model layers are replicated") + +diff --git a/src/peft/utils/save_and_load.py b/src/peft/utils/save_and_load.py +index 5ac1264..acb5d27 100644 +--- a/src/peft/utils/save_and_load.py ++++ b/src/peft/utils/save_and_load.py +@@ -246,6 +246,48 @@ def set_peft_model_state_dict(model, peft_model_state_dict, adapter_name="defaul + else: + raise NotImplementedError + ++ def module_reshape(state_dict): ++ """Reshape the linear module to match the state dict. ++ ++ Args: ++ state_dict (dict): The state dict containing the parameters. ++ """ ++ for param_name, param in state_dict.items(): ++ tensor_name = param_name ++ splits = tensor_name.split(".") ++ ++ # If the parameter name has multiple parts, navigate through the module hierarchy ++ if len(splits) > 1: ++ module = model ++ parent = None ++ ++ # Traverse the module hierarchy to find the target module ++ for split in splits[:-1]: ++ new_module = getattr(module, split, None) ++ if new_module is None: ++ raise ValueError(f"{module} has no attribute {split}.") ++ parent = module ++ module = new_module ++ ++ tensor_name = splits[-1] ++ old_value = getattr(module, tensor_name) ++ ++ # Check if the shape of the original module differs from the shape of the loaded parameter ++ if old_value.shape != param.shape and isinstance(module, torch.nn.Linear): ++ # Create a new Linear module with the new shape ++ new_module = torch.nn.Linear( ++ param.shape[1], ++ param.shape[0], ++ bias=module.bias is not None, ++ dtype=module.weight.dtype, ++ device=module.weight.device ++ ) ++ # Replace the old module with the new one in the parent module ++ setattr(parent, splits[-2], new_module) ++ ++ # Reshape the modules in the peft model to match the state dict ++ module_reshape(peft_model_state_dict) ++ + load_result = model.load_state_dict(peft_model_state_dict, strict=False) + if config.is_prompt_learning: + model.prompt_encoder[adapter_name].embedding.load_state_dict( diff --git a/comps/finetuning_sqft/patches/transformers-v4.44.2.patch b/comps/finetuning_sqft/patches/transformers-v4.44.2.patch new file mode 100644 index 0000000000..a35e96297a --- /dev/null +++ b/comps/finetuning_sqft/patches/transformers-v4.44.2.patch @@ -0,0 +1,171 @@ +diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py +index 68ba7babf..6b54a3987 100755 +--- a/src/transformers/trainer.py ++++ b/src/transformers/trainer.py +@@ -155,6 +155,7 @@ from .utils import ( + is_in_notebook, + is_ipex_available, + is_lomo_available, ++ is_nncf_available, + is_peft_available, + is_safetensors_available, + is_sagemaker_dp_enabled, +@@ -245,6 +246,11 @@ if is_accelerate_available(): + if is_accelerate_available("0.28.0"): + from accelerate.utils import DataLoaderConfiguration + ++if is_nncf_available(): ++ from nncf.torch.compression_method_api import PTCompressionAlgorithmController ++else: ++ PTCompressionAlgorithmController = None ++ + + def _is_peft_model(model): + if is_peft_available(): +@@ -352,6 +358,8 @@ class Trainer: + by this function will be reflected in the predictions received by `compute_metrics`. + + Note that the labels (second parameter) will be `None` if the dataset does not have them. ++ compression_ctrl ([`PTCompressionAlgorithmController`], *optional*): A compression controller to use. Note that ++ this script only supports `ProgressiveShrinkingController` of NNCF (https://github.com/openvinotoolkit/nncf). + + Important attributes: + +@@ -387,6 +395,7 @@ class Trainer: + callbacks: Optional[List[TrainerCallback]] = None, + optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None), + preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None, ++ compression_ctrl: PTCompressionAlgorithmController = None + ): + if args is None: + output_dir = "tmp_trainer" +@@ -400,6 +409,7 @@ class Trainer: + " summary statistics should be returned by the function." + ) + self.args = args ++ self.compression_ctrl = compression_ctrl + # Seed must be set before instantiating the model when using model + enable_full_determinism(self.args.seed) if self.args.full_determinism else set_seed(self.args.seed) + self.hp_name = None +@@ -1040,7 +1050,10 @@ class Trainer: + optimizer = self.optimizer.optimizer + else: + optimizer = self.optimizer +- self.create_scheduler(num_training_steps=num_training_steps, optimizer=optimizer) ++ # If compression_ctrl (`ProgressiveShrinkingController`) is not used, create a scheduler. ++ # If compression_ctrl is used (not None), it will use its own learning rate scheduler. ++ if self.compression_ctrl is None: ++ self.create_scheduler(num_training_steps=num_training_steps, optimizer=optimizer) + + def get_decay_parameter_names(self, model) -> List[str]: + """ +@@ -1569,7 +1582,9 @@ class Trainer: + self.state.stateful_callbacks["TrainerControl"] = self.control.state() + self.state.save_to_json(os.path.join(output_dir, TRAINER_STATE_NAME)) + torch.save(self.optimizer.state_dict(), os.path.join(output_dir, OPTIMIZER_NAME)) +- torch.save(self.lr_scheduler.state_dict(), os.path.join(output_dir, SCHEDULER_NAME)) ++ # Save the learning rate scheduler state if compression_ctrl is not used. ++ if self.compression_ctrl is None: ++ torch.save(self.lr_scheduler.state_dict(), os.path.join(output_dir, SCHEDULER_NAME)) + + def call_model_init(self, trial=None): + model_init_argcount = number_of_arguments(self.model_init) +@@ -2204,8 +2219,16 @@ class Trainer: + if args.eval_on_start: + self._evaluate(trial, ignore_keys_for_eval, skip_scheduler=True) + ++ # Initialize the learning rate scheduler if compression_ctrl is used. ++ if self.compression_ctrl is not None: ++ train_iters = len(train_dataloader) ++ self.compression_ctrl.set_training_lr_scheduler_args(self.optimizer, train_iters) ++ + total_batched_samples = 0 + for epoch in range(epochs_trained, num_train_epochs): ++ # Perform an epoch step for the compression controller's scheduler if it is used. ++ if self.compression_ctrl is not None: ++ self.compression_ctrl.scheduler.epoch_step() + epoch_iterator = train_dataloader + if hasattr(epoch_iterator, "set_epoch"): + epoch_iterator.set_epoch(epoch) +@@ -2234,6 +2257,10 @@ class Trainer: + + step = -1 + for step, inputs in enumerate(epoch_iterator): ++ # Perform a step for the compression controller's scheduler if it is used. ++ # Include actions such as activating the subnetwork or updating the learning rate. ++ if self.compression_ctrl is not None: ++ self.compression_ctrl.scheduler.step() + total_batched_samples += 1 + + if self.args.include_num_input_tokens_seen: +@@ -2345,7 +2372,10 @@ class Trainer: + optimizer_was_run = not self.accelerator.optimizer_step_was_skipped + if optimizer_was_run: + # Delay optimizer scheduling until metrics are generated +- if not isinstance(self.lr_scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau): ++ if ( ++ not isinstance(self.lr_scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau) ++ and self.compression_ctrl is None ++ ): + self.lr_scheduler.step() + + model.zero_grad() +@@ -2791,7 +2821,11 @@ class Trainer: + logs["loss"] = round(tr_loss_scalar / (self.state.global_step - self._globalstep_last_logged), 4) + if grad_norm is not None: + logs["grad_norm"] = grad_norm.detach().item() if isinstance(grad_norm, torch.Tensor) else grad_norm +- logs["learning_rate"] = self._get_learning_rate() ++ # Retrieve the current learning rate from the compression controller if available, otherwise use the default method ++ if self.compression_ctrl is not None: ++ logs["learning_rate"] = self.compression_ctrl.scheduler.lr_scheduler.get_last_lr()[0] ++ else: ++ logs["learning_rate"] = self._get_learning_rate() + + self._total_loss_scalar += tr_loss_scalar + self._globalstep_last_logged = self.state.global_step +@@ -3015,7 +3049,9 @@ class Trainer: + and not is_torch_xla_available() + ): + with warnings.catch_warnings(record=True) as caught_warnings: +- torch.save(self.lr_scheduler.state_dict(), os.path.join(output_dir, SCHEDULER_NAME)) ++ # Save the learning rate scheduler state if compression_ctrl is not used. ++ if self.compression_ctrl is None: ++ torch.save(self.lr_scheduler.state_dict(), os.path.join(output_dir, SCHEDULER_NAME)) + reissue_pt_warnings(caught_warnings) + + def _load_optimizer_and_scheduler(self, checkpoint): +diff --git a/src/transformers/utils/__init__.py b/src/transformers/utils/__init__.py +index efe473a6c..1040a75f4 100755 +--- a/src/transformers/utils/__init__.py ++++ b/src/transformers/utils/__init__.py +@@ -152,6 +152,7 @@ from .import_utils import ( + is_natten_available, + is_ninja_available, + is_nltk_available, ++ is_nncf_available, + is_onnx_available, + is_openai_available, + is_optimum_available, +diff --git a/src/transformers/utils/import_utils.py b/src/transformers/utils/import_utils.py +index 3b0abd334..823e8919f 100755 +--- a/src/transformers/utils/import_utils.py ++++ b/src/transformers/utils/import_utils.py +@@ -131,6 +131,7 @@ _levenshtein_available = _is_package_available("Levenshtein") + _librosa_available = _is_package_available("librosa") + _natten_available = _is_package_available("natten") + _nltk_available = _is_package_available("nltk") ++_nncf_available = _is_package_available("nncf") + _onnx_available = _is_package_available("onnx") + _openai_available = _is_package_available("openai") + _optimum_available = _is_package_available("optimum") +@@ -1056,6 +1057,10 @@ def is_nltk_available(): + return _nltk_available + + ++def is_nncf_available(): ++ return _nncf_available ++ ++ + def is_torchaudio_available(): + return _torchaudio_available + diff --git a/comps/finetuning_sqft/requirements.txt b/comps/finetuning_sqft/requirements.txt new file mode 100644 index 0000000000..6eff6b62ac --- /dev/null +++ b/comps/finetuning_sqft/requirements.txt @@ -0,0 +1,17 @@ +aiohttp +datasets +docarray +fastapi +httpx +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +prometheus-fastapi-instrumentator +pydantic==2.8.2 +pydantic_yaml +python-multipart +pyyaml +ray[all] +requests +shortuuid +uvicorn diff --git a/comps/finetuning_sqft/utils/extract_sub_adapter.py b/comps/finetuning_sqft/utils/extract_sub_adapter.py new file mode 100644 index 0000000000..ae2a3b7faf --- /dev/null +++ b/comps/finetuning_sqft/utils/extract_sub_adapter.py @@ -0,0 +1,101 @@ +import argparse +import os +import re + +import torch +from nncf import NNCFConfig +from peft.utils import CONFIG_NAME, WEIGHTS_NAME, SAFETENSORS_WEIGHTS_NAME + +PATTERN = re.compile(r"[[](.*?)[]]", re.S) + + +def get_width_for_query_prefix(torch_module_to_width, query_module, length=5): + """ + Get the width for a given query module prefix. + + Args: + torch_module_to_width (dict): Mapping from torch module to width. + query_module (str): The query module name. + length (int, optional): The length of the prefix to match. Default is 5. + + Returns: + int: The width for the query module prefix. + """ + query_module_list = query_module.split(".") + width = next( + ( + value + for torch_module, value in torch_module_to_width.items() + if torch_module.split(".")[:length] == query_module_list[:length] + ), + None, + ) + return width + + +def main(adapter_model_path, nncf_config, sub_adapter_version, custom_config=None): + output_dir = os.path.join(adapter_model_path, sub_adapter_version) + os.makedirs(output_dir, exist_ok=True) + nncf_config = NNCFConfig.from_json(nncf_config) + try: + overwrite_groups = nncf_config["bootstrapNAS"]["training"]["elasticity"]["width"]["overwrite_groups"] + overwrite_groups_widths = nncf_config["bootstrapNAS"]["training"]["elasticity"]["width"][ + "overwrite_groups_widths" + ] + assert len(overwrite_groups) == len(overwrite_groups_widths) + except Exception: + raise ValueError("Cannot get the search space in NNCF config.") + + if sub_adapter_version == "maximal": + subnetwork_config = {idx: space[0] for idx, space in enumerate(overwrite_groups_widths)} + elif sub_adapter_version == "heuristic": + subnetwork_config = {idx: space[(len(space) - 1) // 2] for idx, space in enumerate(overwrite_groups_widths)} + elif sub_adapter_version == "minimal": + subnetwork_config = {idx: space[-1] for idx, space in enumerate(overwrite_groups_widths)} + else: + assert custom_config is not None, "Missing custom subnetwork config." + assert isinstance(custom_config, list), "Custom config must be a list." + subnetwork_config = {i: value for i, value in enumerate(custom_config)} + + # Mapping: nncf node -> width + nncf_node_to_width = {} + for idx, value in subnetwork_config.items(): + space = overwrite_groups_widths[idx] + assert min(space) <= value <= max(space) + cur_dict = {node: value for node in overwrite_groups[idx]} + nncf_node_to_width.update(cur_dict) + + # Prune adapter model (LoRA low-rank) + lora_torch_module_to_width = { + ".".join(re.findall(PATTERN, k)): v for k, v in nncf_node_to_width.items() if "lora_A" in k + } + num_module_name_item = list(lora_torch_module_to_width.keys())[0].split(".").index("lora_A") + # Load adapter weights + try: + super_adapter_weights = torch.load(os.path.join(adapter_model_path, WEIGHTS_NAME)) + except: + from safetensors.torch import load_file + super_adapter_weights = load_file(os.path.join(adapter_model_path, SAFETENSORS_WEIGHTS_NAME)) + sub_adapter_weights = {} + for weight_key, weight_tensor in super_adapter_weights.items(): + width = get_width_for_query_prefix(lora_torch_module_to_width, weight_key, length=num_module_name_item) + if width is not None: + is_loraA = "lora_A" in weight_key + new_weight_tensor = weight_tensor[:width].clone() if is_loraA else weight_tensor[:, :width].clone() + else: + new_weight_tensor = weight_tensor.clone() + sub_adapter_weights[weight_key] = new_weight_tensor + os.makedirs(output_dir, exist_ok=True) + torch.save(sub_adapter_weights, os.path.join(output_dir, WEIGHTS_NAME)) + config_path = os.path.join(adapter_model_path, CONFIG_NAME) + os.system(f"cp {config_path} {output_dir}") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Merge base model and adapter model with additional configurations") + parser.add_argument('--adapter_model_path', type=str, required=True, help="Path to the adapter model") + parser.add_argument('--nncf_config', type=str, required=True, help="Path to the NNCF configuration") + parser.add_argument('--sub_adapter_version', type=str, required=True, help="Sub adapter version") + parser.add_argument('--custom_config', type=str, default=None, help="Path to custom configuration (optional)") + args = parser.parse_args() + main(args.adapter_model_path, args.nncf_config, args.sub_adapter_version, args.custom_config) diff --git a/comps/finetuning_sqft/utils/merge.py b/comps/finetuning_sqft/utils/merge.py new file mode 100644 index 0000000000..51b8381235 --- /dev/null +++ b/comps/finetuning_sqft/utils/merge.py @@ -0,0 +1,27 @@ +import argparse +from peft import PeftModel +from transformers import AutoModelForCausalLM, AutoTokenizer + + +def main(base_model_path, adapter_model_path, output_path): + base_model = AutoModelForCausalLM.from_pretrained(base_model_path, trust_remote_code=True) + model = PeftModel.from_pretrained(base_model, adapter_model_path) + model.eval() + for name, param in model.named_parameters(): + param.requires_grad = False + merged_model = model.merge_and_unload() + merged_model.train(False) + base_model.save_pretrained(output_path, state_dict=merged_model.state_dict()) + + tokenizer = AutoTokenizer.from_pretrained(base_model_path, trust_remote_code=True) + tokenizer.save_pretrained(output_path) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Merge base model and adapter model") + parser.add_argument('--base_model_path', type=str, required=True, help="Path to the base model") + parser.add_argument('--adapter_model_path', type=str, required=True, help="Path to the adapter model") + parser.add_argument('--output_path', type=str, required=True, help="Path to save the merged model") + + args = parser.parse_args() + main(args.base_model_path, args.adapter_model_path, args.output_path) diff --git a/comps/finetuning_sqft/utils/nncf_config_process.py b/comps/finetuning_sqft/utils/nncf_config_process.py new file mode 100644 index 0000000000..521e6fefa7 --- /dev/null +++ b/comps/finetuning_sqft/utils/nncf_config_process.py @@ -0,0 +1,156 @@ +import os +import json +from nncf import NNCFConfig + + +NNCF_CONFIG_TEMPLATE = { + "input_info": [ + { + "sample_size": [1, 256], + "type": "long", + "keyword": "input_ids" + }, + { + "sample_size": [1, 256], + "type": "long", + "keyword": "attention_mask" + } + ], + "bootstrapNAS": { + "training": { + "algorithm": "progressive_shrinking", + "frozen_layers_allowed": True, + "progressivity_of_elasticity": ["width"], + "batchnorm_adaptation": { + "num_bn_adaptation_samples": 0 + }, + "schedule": { + "list_stage_descriptions": [ + {"train_dims": ["width"], "epochs": -1, "depth_indicator": 1, "width_indicator": 8, "init_lr": -1, "epochs_lr": -1, "sample_rate": 1} + ] + }, + "elasticity": { + "available_elasticity_dims": ["width"], + "width": { + "overwrite_groups": [], + "overwrite_groups_widths": [] + } + } + } + } +} + + +def add_lr_epochs(nncf_config, learning_rate=3e-4, num_epochs=3): + """Add learning rate and epochs to the NNCF configuration. + + Args: + nncf_config (dict): The NNCF configuration dictionary. + learning_rate (float): The initial learning rate to set. + num_epochs (int): The number of epochs to set. + + Returns: + dict: The updated NNCF configuration. + """ + stage_description = nncf_config["bootstrapNAS"]["training"]["schedule"]["list_stage_descriptions"][0] + if stage_description["init_lr"] == -1: + stage_description["init_lr"] = learning_rate + if stage_description["epochs"] == -1: + stage_description["epochs"] = num_epochs + stage_description["epochs_lr"] = num_epochs + + return nncf_config + + +def get_model_paths(model, target_module_name): + """ + Find all paths to the target layer in the model. + + Args: + model (torch.nn.Module): The model to search. + target_module_name (str): The name of the target layer. + + Returns: + list: A list of paths to the target layer. + """ + def find_layers(module, target_module_name, path, paths): + for name, sub_module in module.named_children(): + new_path = f"{path}/{sub_module.__class__.__name__}[{name}]" + if target_module_name in name: + # Check if 'lora_A' is in the sub_module's children + for sub_name, _ in sub_module.named_children(): + if "lora_A" in sub_name: + paths.append(f"{new_path}/ModuleDict[lora_A]/NNCFLinear[default]/linear_0") + find_layers(sub_module, target_module_name, new_path, paths) + + base_path = model.__class__.__name__ + paths = [] + find_layers(model, target_module_name, base_path, paths) + return paths + +def load_nncf_config( + config, + model, + target_module_groups=None, + search_space=None, + nncf_config=None +): + """Load and preprocess the NNCF configuration file. + + Returns: + NNCFConfig: The preprocessed NNCF configuration object. + """ + + if nncf_config is not None: + nncf_config = NNCFConfig.from_json(nncf_config) + else: + if search_space is None and target_module_groups: + raise ValueError("Neural LoRA search is enabled, `search_space` and `target_module_groups` must be provided.") + # The NNCF Config will be automatically generated based on `target_module_groups` and `search_space`. + num_hidden_layers = model.config.num_hidden_layers + nncf_config_dict = NNCF_CONFIG_TEMPLATE + overwrite_groups = [] + for group in target_module_groups: + group_paths = [] + for module in group: + target_layer_name = module + paths = get_model_paths(model, target_layer_name) + assert paths, f"No paths found for module {module}" + group_paths.append(paths) + # Transpose the list of lists to combine paths by their positions + transposed_paths = list(zip(*group_paths)) + overwrite_groups.extend([list(path_group) for path_group in transposed_paths]) + nncf_config_dict["bootstrapNAS"]["training"]["elasticity"]["width"]["overwrite_groups"] = overwrite_groups + + overwrite_groups_widths = [] + for space in search_space: + space = [int(width) for width in space.split(",")] + overwrite_groups_widths.extend([space] * num_hidden_layers) + nncf_config_dict["bootstrapNAS"]["training"]["elasticity"]["width"]["overwrite_groups_widths"] = overwrite_groups_widths + assert len(overwrite_groups) == len(overwrite_groups_widths) + nncf_config_dict = add_lr_epochs( + nncf_config_dict, + learning_rate=config["Training"]["learning_rate"], + num_epochs=config["Training"]["epochs"] + ) + nncf_config = NNCFConfig.from_dict(nncf_config_dict) + + nncf_config["log_dir"] = config["General"]["output_dir"] + os.makedirs(nncf_config["log_dir"], exist_ok=True) + with open(os.path.join(nncf_config["log_dir"], "nncf_config.json"), "w") as f: + json.dump(nncf_config, f, indent=4) + return nncf_config + + +if __name__ == '__main__': + import transformers + from peft import LoraConfig, get_peft_model + model = transformers.AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") + lora_config = { + "task_type": "CAUSAL_LM", + "r": 16, + "target_modules": ["q_proj", "k_proj", "v_proj", "up_proj", "down_proj"] + } + peft_config = LoraConfig(**lora_config) + model = get_peft_model(model, peft_config) + load_nncf_config(None, model, [["q_proj", "k_proj", "v_proj"], ["up_proj"], ["down_proj"]], ["16,12,8", "16", "16,12"]) From 6cbab5998ef2b7e3c89acdb829a8dcc0f565fb80 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 25 Nov 2024 20:26:49 +0000 Subject: [PATCH 02/17] [pre-commit.ci] auto fixes from pre-commit.com hooks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit for more information, see https://pre-commit.ci Signed-off-by: J. Pablo Muñoz --- comps/finetuning/README.md | 4 +- comps/finetuning/finetune_config.py | 10 +- comps/finetuning/finetuning_service.py | 9 +- comps/finetuning/handlers.py | 21 +- .../llm_on_ray/finetune/finetune.py | 20 +- .../utils/create_sqft_nncf_config.py | 40 +- comps/finetuning/utils/extract_sub_adapter.py | 11 +- comps/finetuning/utils/merge_adapter.py | 3 + comps/finetuning_sqft/README.md | 18 +- .../example_nncf_config/nncf_config.json | 978 +++++++----------- comps/finetuning_sqft/finetune_sqft_config.py | 10 +- .../finetuning_sqft_service.py | 9 +- comps/finetuning_sqft/handlers.py | 21 +- .../llm_on_ray/finetune/finetune.py | 15 +- .../patches/peft-v0.10.0.patch | 2 +- .../utils/extract_sub_adapter.py | 17 +- comps/finetuning_sqft/utils/merge.py | 10 +- .../utils/nncf_config_process.py | 76 +- 18 files changed, 510 insertions(+), 764 deletions(-) diff --git a/comps/finetuning/README.md b/comps/finetuning/README.md index d2e26582f4..d6ad323670 100644 --- a/comps/finetuning/README.md +++ b/comps/finetuning/README.md @@ -114,9 +114,9 @@ curl http://${your_ip}:8015/v1/fine_tuning/jobs \ }' ``` -#### 3.2.2 Instruction Tuning with SQFT's Neural Low-Rank Adapter Search (NLS) +#### 3.2.2 Instruction Tuning with SQFT's Neural Low-Rank Adapter Search (NLS) -In addition to traditional fine-tuning, you can use SQFT's NLS to fine-tune your model. +In addition to traditional fine-tuning, you can use SQFT's NLS to fine-tune your model. More details about SQFT can be found in [this paper](https://aclanthology.org/2024.findings-emnlp.749.pdf). Please follow the additional installation requirements [here](https://github.com/IntelLabs/Hardware-Aware-Automated-Machine-Learning/tree/main/SQFT/opea#-start-nls-microservice-with-python). Use the following command to launch a finetuning job with the NLS algorithm: diff --git a/comps/finetuning/finetune_config.py b/comps/finetuning/finetune_config.py index 3f297c80f1..df9a6be9bb 100644 --- a/comps/finetuning/finetune_config.py +++ b/comps/finetuning/finetune_config.py @@ -5,7 +5,7 @@ from typing import List, Optional, Union -from pydantic import BaseModel, Field, validator, root_validator +from pydantic import BaseModel, Field, root_validator, validator from comps.cores.proto.api_protocol import FineTuningJobIDRequest, FineTuningJobsRequest @@ -44,10 +44,10 @@ class SQFTNLSConfig(LoraConfig): @root_validator(pre=True) def set_target_modules(cls, values): - target_module_groups = values.get('target_module_groups') + target_module_groups = values.get("target_module_groups") if target_module_groups is not None: - values['target_modules'] = [item for sublist in target_module_groups for item in sublist] - search_space = values.get('search_space') + values["target_modules"] = [item for sublist in target_module_groups for item in sublist] + search_space = values.get("search_space") if search_space is not None: assert len(search_space) == len(target_module_groups) return values @@ -217,9 +217,11 @@ class FineTuningParams(FineTuningJobsRequest): Dataset: DatasetConfig = DatasetConfig() Training: TrainingConfig = TrainingConfig() + class ExtractSubAdapterParams(FineTuningJobIDRequest): adapter_version: str = "heuristic" custom_config: Optional[List[int]] = None + class MergeAdapterParams(FineTuningJobIDRequest): adapter_version: Optional[str] = None diff --git a/comps/finetuning/finetuning_service.py b/comps/finetuning/finetuning_service.py index 4a925ff837..1d76eab0ae 100644 --- a/comps/finetuning/finetuning_service.py +++ b/comps/finetuning/finetuning_service.py @@ -4,7 +4,7 @@ from comps import opea_microservices, register_microservice from comps.cores.proto.api_protocol import FineTuningJobIDRequest, UploadFileRequest -from comps.finetuning.finetune_config import FineTuningParams, ExtractSubAdapterParams, MergeAdapterParams +from comps.finetuning.finetune_config import ExtractSubAdapterParams, FineTuningParams, MergeAdapterParams from comps.finetuning.handlers import ( handle_cancel_finetuning_job, handle_create_finetuning_jobs, @@ -22,6 +22,7 @@ def create_finetuning_jobs(request: FineTuningParams, background_tasks: BackgroundTasks): return handle_create_finetuning_jobs(request, background_tasks) + @register_microservice( name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs", host="0.0.0.0", port=8015, methods=["GET"] ) @@ -63,10 +64,14 @@ def list_checkpoints(request: FineTuningJobIDRequest): checkpoints = handle_list_finetuning_checkpoints(request) return checkpoints -@register_microservice(name="opea_service@finetuning", endpoint="/v1/finetune/extract_sub_adapter", host="0.0.0.0", port=8015) + +@register_microservice( + name="opea_service@finetuning", endpoint="/v1/finetune/extract_sub_adapter", host="0.0.0.0", port=8015 +) def extract_sub_adapter(request: ExtractSubAdapterParams): return handle_extract_sub_adapter(request) + @register_microservice(name="opea_service@finetuning", endpoint="/v1/finetune/merge_adapter", host="0.0.0.0", port=8015) def merge_adapter(request: MergeAdapterParams): return handle_merge_adapter(request) diff --git a/comps/finetuning/handlers.py b/comps/finetuning/handlers.py index dde6424420..fb4fe9afdc 100644 --- a/comps/finetuning/handlers.py +++ b/comps/finetuning/handlers.py @@ -152,7 +152,7 @@ def handle_extract_sub_adapter(request: ExtractSubAdapterParams): if not os.path.exists(finetuned_model_path): raise HTTPException( status_code=404, - detail=f"The fine-tuned model saved by the fine-tuning job '{fine_tuning_job_id}' was not found!" + detail=f"The fine-tuned model saved by the fine-tuning job '{fine_tuning_job_id}' was not found!", ) if job.status != "succeeded": raise HTTPException(status_code=404, detail=f"Fine-tuning job '{fine_tuning_job_id}' has not completed!") @@ -160,27 +160,27 @@ def handle_extract_sub_adapter(request: ExtractSubAdapterParams): if finetune_config.General.lora_config is None: raise HTTPException( status_code=404, - detail=f"The fine-tuning job '{fine_tuning_job_id}' does not enable LoRA adapter fine-tuning!" + detail=f"The fine-tuning job '{fine_tuning_job_id}' does not enable LoRA adapter fine-tuning!", ) if not finetune_config.General.lora_config.neural_lora_search: raise HTTPException( status_code=404, detail=f"The fine-tuning job '{fine_tuning_job_id}' did not enable NLS algorithm, " - f"there is no need to extract sub-adapters!" + f"there is no need to extract sub-adapters!", ) nncf_config_path = os.path.join(finetune_config.General.output_dir, "nncf_config.json") if not os.path.exists(nncf_config_path): raise HTTPException( - status_code=404, - detail=f"The NNCF config file does not exist in the fine-tuning job '{fine_tuning_job_id}!" + status_code=404, detail=f"The NNCF config file does not exist in the fine-tuning job '{fine_tuning_job_id}!" ) from comps.finetuning.utils.extract_sub_adapter import main as extract_sub_adapter_main + extract_sub_adapter_main( adapter_model_path=finetuned_model_path, nncf_config=nncf_config_path, adapter_version=request.adapter_version, - custom_config=request.custom_config + custom_config=request.custom_config, ) return fine_tuning_job_id @@ -199,7 +199,7 @@ def handle_merge_adapter(request: MergeAdapterParams): if not os.path.exists(finetuned_model_path): raise HTTPException( status_code=404, - detail=f"The fine-tuned model saved by the fine-tuning job '{fine_tuning_job_id}' was not found!" + detail=f"The fine-tuned model saved by the fine-tuning job '{fine_tuning_job_id}' was not found!", ) if job.status != "succeeded": raise HTTPException(status_code=404, detail=f"Fine-tuning job '{fine_tuning_job_id}' has not completed!") @@ -207,7 +207,7 @@ def handle_merge_adapter(request: MergeAdapterParams): if finetune_config.General.lora_config is None: raise HTTPException( status_code=404, - detail=f"The fine-tuning job '{fine_tuning_job_id}' does not enable LoRA adapter fine-tuning!" + detail=f"The fine-tuning job '{fine_tuning_job_id}' does not enable LoRA adapter fine-tuning!", ) adapter_path = finetuned_model_path @@ -217,14 +217,15 @@ def handle_merge_adapter(request: MergeAdapterParams): if not os.path.exists(adapter_path): raise HTTPException( status_code=404, - detail=f"The fine-tuning job '{fine_tuning_job_id}' does not have a '{adapter_version}' adapter!" + detail=f"The fine-tuning job '{fine_tuning_job_id}' does not have a '{adapter_version}' adapter!", ) from comps.finetuning.utils.merge_adapter import main as merge_adapter_main + merge_adapter_main( base_model_path=finetune_config.General.base_model, adapter_model_path=adapter_path, - output_path=os.path.join(adapter_path, "merged_model") + output_path=os.path.join(adapter_path, "merged_model"), ) return fine_tuning_job_id diff --git a/comps/finetuning/llm_on_ray/finetune/finetune.py b/comps/finetuning/llm_on_ray/finetune/finetune.py index 97a6257b33..5216cc660b 100644 --- a/comps/finetuning/llm_on_ray/finetune/finetune.py +++ b/comps/finetuning/llm_on_ray/finetune/finetune.py @@ -40,11 +40,13 @@ logger = CustomLogger("llm_on_ray/finetune") try: - from comps.finetuning.utils.create_sqft_nncf_config import create_sqft_nncf_config from nncf.experimental.torch.nas.bootstrapNAS.training.model_creator_helpers import ( create_compressed_model_from_algo_names, ) from nncf.torch.model_creation import create_nncf_network + + from comps.finetuning.utils.create_sqft_nncf_config import create_sqft_nncf_config + is_nncf_available = True except ImportError: is_nncf_available = False @@ -368,15 +370,10 @@ def load_model(config: Dict): if not is_nncf_available: raise NotImplementedError("NNCF is not installed. Please install it for enabling NLS algorithm.") nncf_config = create_sqft_nncf_config( - config=config, - model=model, - target_module_groups=target_module_groups, - search_space=search_space + config=config, model=model, target_module_groups=target_module_groups, search_space=search_space ) model = create_nncf_network(model, nncf_config) - nls_controller, model = create_compressed_model_from_algo_names( - model, nncf_config, algo_names=["nls"] - ) + nls_controller, model = create_compressed_model_from_algo_names(model, nncf_config, algo_names=["nls"]) elif task == "rerank": model = CrossEncoder.from_pretrained( config["Dataset"].get("train_group_size", 8), @@ -414,6 +411,7 @@ def load_model(config: Dict): return model, ref_model, nls_controller + def get_trainer(config: Dict, model, ref_model, tokenizer, tokenized_dataset, data_collator, nls_controller=None): device = config["Training"]["device"] task = config["General"].get("task", "instruction_tuning") @@ -443,7 +441,9 @@ def get_trainer(config: Dict, model, ref_model, tokenizer, tokenized_dataset, da "model": model, "args": training_args, "train_dataset": tokenized_dataset["train"], - "eval_dataset": tokenized_dataset["validation"] if tokenized_dataset.get("validation") is not None else None, + "eval_dataset": ( + tokenized_dataset["validation"] if tokenized_dataset.get("validation") is not None else None + ), "tokenizer": tokenizer, "data_collator": data_collator, } @@ -453,7 +453,7 @@ def get_trainer(config: Dict, model, ref_model, tokenizer, tokenized_dataset, da return training_args, trainer elif device in ["hpu"]: if nls_controller is not None: - raise NotImplementedError(f"NLS algorithm is not supported on HPU now.") + raise NotImplementedError("NLS algorithm is not supported on HPU now.") from optimum.habana import GaudiConfig from optimum.habana.transformers import GaudiTrainer, GaudiTrainingArguments diff --git a/comps/finetuning/utils/create_sqft_nncf_config.py b/comps/finetuning/utils/create_sqft_nncf_config.py index eb76fcc310..731791da41 100644 --- a/comps/finetuning/utils/create_sqft_nncf_config.py +++ b/comps/finetuning/utils/create_sqft_nncf_config.py @@ -1,9 +1,13 @@ -import os +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + import json +import os try: from nncf import NNCFConfig from nncf.experimental.torch import sqft + is_nncf_available = True except ImportError: is_nncf_available = False @@ -11,29 +15,18 @@ NNCF_CONFIG_TEMPLATE = { "input_info": [ - { - "sample_size": [1, 256], - "type": "long", - "keyword": "input_ids" - }, - { - "sample_size": [1, 256], - "type": "long", - "keyword": "attention_mask" - } + {"sample_size": [1, 256], "type": "long", "keyword": "input_ids"}, + {"sample_size": [1, 256], "type": "long", "keyword": "attention_mask"}, ], "SQFT": { "training": { "algorithm": "nls", "elasticity": { "available_elasticity_dims": ["width"], - "width": { - "overwrite_groups": [], - "overwrite_groups_widths": [] - } - } + "width": {"overwrite_groups": [], "overwrite_groups_widths": []}, + }, } - } + }, } @@ -65,8 +58,7 @@ def add_lr_epochs(nncf_config, learning_rate=3e-4, num_train_epochs=3): def get_model_paths(model, target_module_name): - """ - Find all paths to the target layer in the model. + """Find all paths to the target layer in the model. Args: model (torch.nn.Module): The model to search. @@ -75,6 +67,7 @@ def get_model_paths(model, target_module_name): Returns: list: A list of paths to the target layer. """ + def find_layers(module, target_module_name, path, paths): for name, sub_module in module.named_children(): new_path = f"{path}/{sub_module.__class__.__name__}[{name}]" @@ -91,12 +84,7 @@ def find_layers(module, target_module_name, path, paths): return paths -def create_sqft_nncf_config( - config, - model, - target_module_groups=None, - search_space=None -): +def create_sqft_nncf_config(config, model, target_module_groups=None, search_space=None): """Load and preprocess the NNCF configuration file. Returns: @@ -131,7 +119,7 @@ def create_sqft_nncf_config( nncf_config_dict = add_lr_epochs( nncf_config_dict, learning_rate=config["Training"]["learning_rate"], - num_train_epochs=config["Training"]["epochs"] + num_train_epochs=config["Training"]["epochs"], ) nncf_config = NNCFConfig.from_dict(nncf_config_dict) diff --git a/comps/finetuning/utils/extract_sub_adapter.py b/comps/finetuning/utils/extract_sub_adapter.py index f7b0bf6ff1..00f477f684 100644 --- a/comps/finetuning/utils/extract_sub_adapter.py +++ b/comps/finetuning/utils/extract_sub_adapter.py @@ -1,12 +1,15 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + import os import re import torch - -from peft.utils import CONFIG_NAME, WEIGHTS_NAME, SAFETENSORS_WEIGHTS_NAME +from peft.utils import CONFIG_NAME, SAFETENSORS_WEIGHTS_NAME, WEIGHTS_NAME try: from nncf import NNCFConfig + is_nncf_available = True except ImportError: is_nncf_available = False @@ -16,8 +19,7 @@ def get_width_for_query_prefix(torch_module_to_width, query_module, length=5): - """ - Get the width for a given query module prefix. + """Get the width for a given query module prefix. Args: torch_module_to_width (dict): Mapping from torch module to width. @@ -81,6 +83,7 @@ def main(adapter_model_path, nncf_config, adapter_version, custom_config=None): super_adapter_weights = torch.load(os.path.join(adapter_model_path, WEIGHTS_NAME)) except: from safetensors.torch import load_file + super_adapter_weights = load_file(os.path.join(adapter_model_path, SAFETENSORS_WEIGHTS_NAME)) sub_adapter_weights = {} for weight_key, weight_tensor in super_adapter_weights.items(): diff --git a/comps/finetuning/utils/merge_adapter.py b/comps/finetuning/utils/merge_adapter.py index a127061ef6..f1bca2ab51 100644 --- a/comps/finetuning/utils/merge_adapter.py +++ b/comps/finetuning/utils/merge_adapter.py @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + from peft import PeftModel from transformers import AutoModelForCausalLM, AutoTokenizer diff --git a/comps/finetuning_sqft/README.md b/comps/finetuning_sqft/README.md index a5748caf76..e9d2c3e596 100644 --- a/comps/finetuning_sqft/README.md +++ b/comps/finetuning_sqft/README.md @@ -12,6 +12,7 @@ python -m pip install intel-extension-for-pytorch python -m pip install oneccl_bind_pt --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ pip install -r requirements.txt ``` + To enable elastic adapter fine-tuning (Neural Low-Rank Adapter Search) or SparsePEFT from [SQFT](https://arxiv.org/abs/2410.03750), please perform this additional installation: ```bash @@ -94,7 +95,6 @@ Download a training file, such as `alpaca_data.json` for instruction tuning and curl http://${your_ip}:8015/v1/files -X POST -H "Content-Type: multipart/form-data" -F "file=@./alpaca_data.json" -F purpose="fine-tune" ``` - ### 3.2 Create fine-tuning job #### 3.2.1 Instruction Tuning @@ -110,7 +110,7 @@ curl http://${your_ip}:8015/v1/fine_tuning/jobs \ "training_file": "alpaca_data.json", "model": "meta-llama/Llama-2-7b-chat-hf" }' - + # create a finetuning job (with SparsePEFT) curl http://${your_ip}:8015/v1/fine_tuning/jobs \ -X POST \ @@ -124,7 +124,7 @@ curl http://${your_ip}:8015/v1/fine_tuning/jobs \ } } }' - + # create a fine-tuning job (with Neural Low-rank adapter Search) # Max LoRA rank: 16 # LoRA target modules -> Low-rank search space @@ -151,8 +151,8 @@ curl http://${your_ip}:8015/v1/fine_tuning/jobs \ Below are some explanations for the parameters related to the Neural Low-rank adapter Search algorithm: - `target_module_groups` specifies the target module groups, which means that the adapters within the same group will share the same activated low-rank value. -- `search_space` specifies the search space for each target module (adapter) group. -Here, it is `["16,12,8", "16,12,8", "16,12,8"]`, meaning that the search space for each group is [16, 12, 8]. +- `search_space` specifies the search space for each target module (adapter) group. + Here, it is `["16,12,8", "16,12,8", "16,12,8"]`, meaning that the search space for each group is [16, 12, 8]. Note that the number of groups should be equal to the number of search spaces (one-to-one correspondence). Feel free to try your favorite group design and search spaces. @@ -179,7 +179,7 @@ curl http://${your_ip}:8015/v1/finetune/list_checkpoints -X POST -H "Content-Typ #### 3.4.1 Extract the sub-adapter -After completing the super-adapter fine-tuning (the checkpoints of the fine-tuning job), +After completing the super-adapter fine-tuning (the checkpoints of the fine-tuning job), the following command demonstrates how to extract the heuristic sub-adapter. Additionally, more powerful sub-adapters can be obtained through other advanced search algorithms. @@ -211,10 +211,10 @@ curl http://${your_ip}:8015/v1/finetune/extract_adapter \ }' ``` -In the fine-tuning job with Neural Low-rank adapter Search algorithm, the `nncf_config.json` file (which includes the elastic adapter information) will be saved in the output directory. +In the fine-tuning job with Neural Low-rank adapter Search algorithm, the `nncf_config.json` file (which includes the elastic adapter information) will be saved in the output directory. The `custom_config` must correspond with the `overwrite_groups` (adapter modules) or `overwrite_groups_widths` -(search space for the rank of adapter modules) in `nncf_config.json`. -The above command corresponds to the example in [example_nncf_config/nncf_config.json](./example_nncf_config/nncf_config.json), +(search space for the rank of adapter modules) in `nncf_config.json`. +The above command corresponds to the example in [example_nncf_config/nncf_config.json](./example_nncf_config/nncf_config.json), and it will save the sub-adapter to ` / custom`. diff --git a/comps/finetuning_sqft/example_nncf_config/nncf_config.json b/comps/finetuning_sqft/example_nncf_config/nncf_config.json index ead7ffe4c6..7ec9b3a578 100644 --- a/comps/finetuning_sqft/example_nncf_config/nncf_config.json +++ b/comps/finetuning_sqft/example_nncf_config/nncf_config.json @@ -1,630 +1,354 @@ { - "input_info": [ - { - "sample_size": [ - 1, - 256 - ], - "type": "long", - "keyword": "input_ids" - }, - { - "sample_size": [ - 1, - 256 - ], - "type": "long", - "keyword": "attention_mask" - } - ], - "bootstrapNAS": { - "training": { - "algorithm": "progressive_shrinking", - "frozen_layers_allowed": true, - "progressivity_of_elasticity": [ - "width" - ], - "batchnorm_adaptation": { - "num_bn_adaptation_samples": 0 - }, - "schedule": { - "list_stage_descriptions": [ - { - "train_dims": [ - "width" - ], - "epochs": 3, - "depth_indicator": 1, - "width_indicator": 8, - "init_lr": 0.0003, - "epochs_lr": 3, - "sample_rate": 1 - } - ] - }, - "elasticity": { - "available_elasticity_dims": [ - "width" - ], - "width": { - "overwrite_groups": [ - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[2]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[2]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[2]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[3]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[3]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[3]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[4]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[4]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[4]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[5]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[5]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[5]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[6]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[6]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[6]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[7]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[7]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[7]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[8]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[8]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[8]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[9]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[9]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[9]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[10]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[10]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[10]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[11]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[11]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[11]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[12]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[12]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[12]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[13]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[13]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[13]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[14]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[14]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[14]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[15]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[15]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[15]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[16]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[16]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[16]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[17]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[17]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[17]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[18]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[18]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[18]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[19]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[19]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[19]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[20]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[20]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[20]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[21]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[21]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[21]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[2]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[3]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[4]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[5]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[6]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[7]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[8]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[9]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[10]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[11]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[12]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[13]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[14]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[15]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[16]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[17]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[18]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[19]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[20]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[21]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[2]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[3]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[4]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[5]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[6]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[7]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[8]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[9]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[10]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[11]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[12]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[13]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[14]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[15]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[16]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[17]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[18]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[19]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[20]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[21]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ] - ], - "overwrite_groups_widths": [ - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ], - [ - 16, - 12, - 8 - ] - ] - } - } + "input_info": [ + { + "sample_size": [1, 256], + "type": "long", + "keyword": "input_ids" + }, + { + "sample_size": [1, 256], + "type": "long", + "keyword": "attention_mask" + } + ], + "bootstrapNAS": { + "training": { + "algorithm": "progressive_shrinking", + "frozen_layers_allowed": true, + "progressivity_of_elasticity": ["width"], + "batchnorm_adaptation": { + "num_bn_adaptation_samples": 0 + }, + "schedule": { + "list_stage_descriptions": [ + { + "train_dims": ["width"], + "epochs": 3, + "depth_indicator": 1, + "width_indicator": 8, + "init_lr": 0.0003, + "epochs_lr": 3, + "sample_rate": 1 + } + ] + }, + "elasticity": { + "available_elasticity_dims": ["width"], + "width": { + "overwrite_groups": [ + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[2]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[2]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[2]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[3]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[3]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[3]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[4]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[4]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[4]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[5]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[5]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[5]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[6]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[6]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[6]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[7]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[7]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[7]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[8]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[8]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[8]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[9]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[9]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[9]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[10]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[10]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[10]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[11]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[11]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[11]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[12]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[12]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[12]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[13]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[13]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[13]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[14]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[14]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[14]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[15]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[15]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[15]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[16]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[16]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[16]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[17]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[17]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[17]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[18]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[18]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[18]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[19]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[19]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[19]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[20]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[20]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[20]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[21]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[21]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[21]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[2]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[3]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[4]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[5]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[6]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[7]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[8]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[9]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[10]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[11]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[12]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[13]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[14]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[15]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[16]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[17]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[18]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[19]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[20]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[21]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[2]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[3]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[4]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[5]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[6]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[7]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[8]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[9]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[10]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[11]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[12]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[13]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[14]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[15]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[16]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[17]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[18]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[19]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[20]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ], + [ + "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[21]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" + ] + ], + "overwrite_groups_widths": [ + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8], + [16, 12, 8] + ] } + } } -} \ No newline at end of file + } +} diff --git a/comps/finetuning_sqft/finetune_sqft_config.py b/comps/finetuning_sqft/finetune_sqft_config.py index a34a9e7c3b..e5b35cef91 100644 --- a/comps/finetuning_sqft/finetune_sqft_config.py +++ b/comps/finetuning_sqft/finetune_sqft_config.py @@ -5,7 +5,7 @@ from typing import List, Optional, Union -from pydantic import BaseModel, Field, validator, root_validator +from pydantic import BaseModel, Field, root_validator, validator from comps.cores.proto.api_protocol import FineTuningJobIDRequest, FineTuningJobsRequest @@ -46,10 +46,10 @@ class SQFTLoRAConfig(LoraConfig): @root_validator(pre=True) def set_target_modules(cls, values): - target_module_groups = values.get('target_module_groups') + target_module_groups = values.get("target_module_groups") if target_module_groups is not None: - values['target_modules'] = [item for sublist in target_module_groups for item in sublist] - search_space = values.get('search_space') + values["target_modules"] = [item for sublist in target_module_groups for item in sublist] + search_space = values.get("search_space") if search_space is not None: assert len(search_space) == len(target_module_groups) return values @@ -207,9 +207,11 @@ class FineTuningParams(FineTuningJobsRequest): Dataset: DatasetConfig = DatasetConfig() Training: TrainingConfig = TrainingConfig() + class ExtractAdapterParams(FineTuningJobIDRequest): sub_adapter_version: str = "heuristic" custom_config: Optional[List[int]] = None + class MergeAdapterParams(FineTuningJobIDRequest): adapter_version: Optional[str] = None diff --git a/comps/finetuning_sqft/finetuning_sqft_service.py b/comps/finetuning_sqft/finetuning_sqft_service.py index bc11a6cd23..af9f237399 100644 --- a/comps/finetuning_sqft/finetuning_sqft_service.py +++ b/comps/finetuning_sqft/finetuning_sqft_service.py @@ -4,7 +4,7 @@ from comps import opea_microservices, register_microservice from comps.cores.proto.api_protocol import FineTuningJobIDRequest, UploadFileRequest -from comps.finetuning_sqft.finetune_sqft_config import FineTuningParams, ExtractAdapterParams, MergeAdapterParams +from comps.finetuning_sqft.finetune_sqft_config import ExtractAdapterParams, FineTuningParams, MergeAdapterParams from comps.finetuning_sqft.handlers import ( handle_cancel_finetuning_job, handle_create_finetuning_jobs, @@ -22,6 +22,7 @@ def create_finetuning_jobs(request: FineTuningParams, background_tasks: BackgroundTasks): return handle_create_finetuning_jobs(request, background_tasks) + @register_microservice( name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs", host="0.0.0.0", port=8015, methods=["GET"] ) @@ -63,10 +64,14 @@ def list_checkpoints(request: FineTuningJobIDRequest): checkpoints = handle_list_finetuning_checkpoints(request) return checkpoints -@register_microservice(name="opea_service@finetuning", endpoint="/v1/finetune/extract_adapter", host="0.0.0.0", port=8015) + +@register_microservice( + name="opea_service@finetuning", endpoint="/v1/finetune/extract_adapter", host="0.0.0.0", port=8015 +) def extract_sub_adapter(request: ExtractAdapterParams): return handle_extract_sub_adapter(request) + @register_microservice(name="opea_service@finetuning", endpoint="/v1/finetune/merge_adapter", host="0.0.0.0", port=8015) def merge_adapter(request: MergeAdapterParams): return handle_merge_adapter(request) diff --git a/comps/finetuning_sqft/handlers.py b/comps/finetuning_sqft/handlers.py index 03e5745981..cdb6b224aa 100644 --- a/comps/finetuning_sqft/handlers.py +++ b/comps/finetuning_sqft/handlers.py @@ -152,7 +152,7 @@ def handle_extract_sub_adapter(request: ExtractAdapterParams): if not os.path.exists(finetuned_model_path): raise HTTPException( status_code=404, - detail=f"The fine-tuned model saved by the fine-tuning job '{fine_tuning_job_id}' was not found!" + detail=f"The fine-tuned model saved by the fine-tuning job '{fine_tuning_job_id}' was not found!", ) if job.status != "succeeded": raise HTTPException(status_code=404, detail=f"Fine-tuning job '{fine_tuning_job_id}' has not completed!") @@ -160,27 +160,27 @@ def handle_extract_sub_adapter(request: ExtractAdapterParams): if finetune_config.General.lora_config is None: raise HTTPException( status_code=404, - detail=f"The fine-tuning job '{fine_tuning_job_id}' does not enable LoRA adapter fine-tuning!" + detail=f"The fine-tuning job '{fine_tuning_job_id}' does not enable LoRA adapter fine-tuning!", ) if not finetune_config.General.lora_config.neural_lora_search: raise HTTPException( status_code=404, detail=f"The fine-tuning job '{fine_tuning_job_id}' did not enable NLS algorithm, " - f"there is no need to extract sub-adapters!" + f"there is no need to extract sub-adapters!", ) nncf_config_path = os.path.join(finetune_config.General.output_dir, "nncf_config.json") if not os.path.exists(nncf_config_path): raise HTTPException( - status_code=404, - detail=f"The NNCF config file does not exist in the fine-tuning job '{fine_tuning_job_id}!" + status_code=404, detail=f"The NNCF config file does not exist in the fine-tuning job '{fine_tuning_job_id}!" ) from comps.finetuning_sqft.utils.extract_sub_adapter import main as extract_sub_adapter_main + extract_sub_adapter_main( adapter_model_path=finetuned_model_path, nncf_config=nncf_config_path, sub_adapter_version=request.sub_adapter_version, - custom_config=request.custom_config + custom_config=request.custom_config, ) return fine_tuning_job_id @@ -199,7 +199,7 @@ def handle_merge_adapter(request: MergeAdapterParams): if not os.path.exists(finetuned_model_path): raise HTTPException( status_code=404, - detail=f"The fine-tuned model saved by the fine-tuning job '{fine_tuning_job_id}' was not found!" + detail=f"The fine-tuned model saved by the fine-tuning job '{fine_tuning_job_id}' was not found!", ) if job.status != "succeeded": raise HTTPException(status_code=404, detail=f"Fine-tuning job '{fine_tuning_job_id}' has not completed!") @@ -207,7 +207,7 @@ def handle_merge_adapter(request: MergeAdapterParams): if finetune_config.General.lora_config is None: raise HTTPException( status_code=404, - detail=f"The fine-tuning job '{fine_tuning_job_id}' does not enable LoRA adapter fine-tuning!" + detail=f"The fine-tuning job '{fine_tuning_job_id}' does not enable LoRA adapter fine-tuning!", ) adapter_path = finetuned_model_path @@ -217,14 +217,15 @@ def handle_merge_adapter(request: MergeAdapterParams): if not os.path.exists(adapter_path): raise HTTPException( status_code=404, - detail=f"The fine-tuning job '{fine_tuning_job_id}' does not have a '{adapter_version}' adapter!" + detail=f"The fine-tuning job '{fine_tuning_job_id}' does not have a '{adapter_version}' adapter!", ) from comps.finetuning_sqft.utils.merge import main as merge_adapter_main + merge_adapter_main( base_model_path=finetune_config.General.base_model, adapter_model_path=adapter_path, - output_path=os.path.join(adapter_path, "merged_model") + output_path=os.path.join(adapter_path, "merged_model"), ) return fine_tuning_job_id diff --git a/comps/finetuning_sqft/llm_on_ray/finetune/finetune.py b/comps/finetuning_sqft/llm_on_ray/finetune/finetune.py index 82f2e65c1d..8433cbacb8 100644 --- a/comps/finetuning_sqft/llm_on_ray/finetune/finetune.py +++ b/comps/finetuning_sqft/llm_on_ray/finetune/finetune.py @@ -38,11 +38,13 @@ logger = CustomLogger("llm_on_ray/finetune") try: - from comps.finetuning_sqft.utils.nncf_config_process import load_nncf_config from nncf.experimental.torch.nas.bootstrapNAS.training.model_creator_helpers import ( create_compressed_model_from_algo_names, ) from nncf.torch.model_creation import create_nncf_network + + from comps.finetuning_sqft.utils.nncf_config_process import load_nncf_config + is_nncf_available = True except ImportError: is_nncf_available = False @@ -358,7 +360,7 @@ def load_model(config: Dict): model=model, target_module_groups=target_module_groups, search_space=search_space, - nncf_config=nncf_config + nncf_config=nncf_config, ) model = create_nncf_network(model, nncf_config) compression_ctrl, model = create_compressed_model_from_algo_names( @@ -401,6 +403,7 @@ def load_model(config: Dict): return model, compression_ctrl + def get_trainer(config: Dict, model, tokenizer, tokenized_dataset, data_collator, compression_ctrl=None): device = config["Training"]["device"] if device in ["cpu", "gpu", "cuda"]: @@ -409,7 +412,9 @@ def get_trainer(config: Dict, model, tokenizer, tokenized_dataset, data_collator "model": model, "args": training_args, "train_dataset": tokenized_dataset["train"], - "eval_dataset": tokenized_dataset["validation"] if tokenized_dataset.get("validation") is not None else None, + "eval_dataset": ( + tokenized_dataset["validation"] if tokenized_dataset.get("validation") is not None else None + ), "tokenizer": tokenizer, "data_collator": data_collator, } @@ -471,7 +476,9 @@ def train_func(config: Dict[str, Any]): model, compression_ctrl = load_model(config) - training_args, trainer = get_trainer(config, model, tokenizer, tokenized_dataset, data_collator, compression_ctrl=compression_ctrl) + training_args, trainer = get_trainer( + config, model, tokenizer, tokenized_dataset, data_collator, compression_ctrl=compression_ctrl + ) logger.info("train start") trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint) diff --git a/comps/finetuning_sqft/patches/peft-v0.10.0.patch b/comps/finetuning_sqft/patches/peft-v0.10.0.patch index caefc3e735..9606bd24ef 100644 --- a/comps/finetuning_sqft/patches/peft-v0.10.0.patch +++ b/comps/finetuning_sqft/patches/peft-v0.10.0.patch @@ -169,7 +169,7 @@ diff --git a/src/peft/utils/save_and_load.py b/src/peft/utils/save_and_load.py index 5ac1264..acb5d27 100644 --- a/src/peft/utils/save_and_load.py +++ b/src/peft/utils/save_and_load.py -@@ -246,6 +246,48 @@ def set_peft_model_state_dict(model, peft_model_state_dict, adapter_name="defaul +@@ -246,6 +246,48 @@ def set_peft_model_state_dict(model, peft_model_state_dict, adapter_name="default else: raise NotImplementedError diff --git a/comps/finetuning_sqft/utils/extract_sub_adapter.py b/comps/finetuning_sqft/utils/extract_sub_adapter.py index ae2a3b7faf..82e4471719 100644 --- a/comps/finetuning_sqft/utils/extract_sub_adapter.py +++ b/comps/finetuning_sqft/utils/extract_sub_adapter.py @@ -1,17 +1,19 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + import argparse import os import re import torch from nncf import NNCFConfig -from peft.utils import CONFIG_NAME, WEIGHTS_NAME, SAFETENSORS_WEIGHTS_NAME +from peft.utils import CONFIG_NAME, SAFETENSORS_WEIGHTS_NAME, WEIGHTS_NAME PATTERN = re.compile(r"[[](.*?)[]]", re.S) def get_width_for_query_prefix(torch_module_to_width, query_module, length=5): - """ - Get the width for a given query module prefix. + """Get the width for a given query module prefix. Args: torch_module_to_width (dict): Mapping from torch module to width. @@ -75,6 +77,7 @@ def main(adapter_model_path, nncf_config, sub_adapter_version, custom_config=Non super_adapter_weights = torch.load(os.path.join(adapter_model_path, WEIGHTS_NAME)) except: from safetensors.torch import load_file + super_adapter_weights = load_file(os.path.join(adapter_model_path, SAFETENSORS_WEIGHTS_NAME)) sub_adapter_weights = {} for weight_key, weight_tensor in super_adapter_weights.items(): @@ -93,9 +96,9 @@ def main(adapter_model_path, nncf_config, sub_adapter_version, custom_config=Non if __name__ == "__main__": parser = argparse.ArgumentParser(description="Merge base model and adapter model with additional configurations") - parser.add_argument('--adapter_model_path', type=str, required=True, help="Path to the adapter model") - parser.add_argument('--nncf_config', type=str, required=True, help="Path to the NNCF configuration") - parser.add_argument('--sub_adapter_version', type=str, required=True, help="Sub adapter version") - parser.add_argument('--custom_config', type=str, default=None, help="Path to custom configuration (optional)") + parser.add_argument("--adapter_model_path", type=str, required=True, help="Path to the adapter model") + parser.add_argument("--nncf_config", type=str, required=True, help="Path to the NNCF configuration") + parser.add_argument("--sub_adapter_version", type=str, required=True, help="Sub adapter version") + parser.add_argument("--custom_config", type=str, default=None, help="Path to custom configuration (optional)") args = parser.parse_args() main(args.adapter_model_path, args.nncf_config, args.sub_adapter_version, args.custom_config) diff --git a/comps/finetuning_sqft/utils/merge.py b/comps/finetuning_sqft/utils/merge.py index 51b8381235..266ee0eac4 100644 --- a/comps/finetuning_sqft/utils/merge.py +++ b/comps/finetuning_sqft/utils/merge.py @@ -1,4 +1,8 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + import argparse + from peft import PeftModel from transformers import AutoModelForCausalLM, AutoTokenizer @@ -19,9 +23,9 @@ def main(base_model_path, adapter_model_path, output_path): if __name__ == "__main__": parser = argparse.ArgumentParser(description="Merge base model and adapter model") - parser.add_argument('--base_model_path', type=str, required=True, help="Path to the base model") - parser.add_argument('--adapter_model_path', type=str, required=True, help="Path to the adapter model") - parser.add_argument('--output_path', type=str, required=True, help="Path to save the merged model") + parser.add_argument("--base_model_path", type=str, required=True, help="Path to the base model") + parser.add_argument("--adapter_model_path", type=str, required=True, help="Path to the adapter model") + parser.add_argument("--output_path", type=str, required=True, help="Path to save the merged model") args = parser.parse_args() main(args.base_model_path, args.adapter_model_path, args.output_path) diff --git a/comps/finetuning_sqft/utils/nncf_config_process.py b/comps/finetuning_sqft/utils/nncf_config_process.py index 521e6fefa7..5f6abb7c8f 100644 --- a/comps/finetuning_sqft/utils/nncf_config_process.py +++ b/comps/finetuning_sqft/utils/nncf_config_process.py @@ -1,43 +1,41 @@ -import os +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + import json -from nncf import NNCFConfig +import os +from nncf import NNCFConfig NNCF_CONFIG_TEMPLATE = { "input_info": [ - { - "sample_size": [1, 256], - "type": "long", - "keyword": "input_ids" - }, - { - "sample_size": [1, 256], - "type": "long", - "keyword": "attention_mask" - } + {"sample_size": [1, 256], "type": "long", "keyword": "input_ids"}, + {"sample_size": [1, 256], "type": "long", "keyword": "attention_mask"}, ], "bootstrapNAS": { "training": { "algorithm": "progressive_shrinking", "frozen_layers_allowed": True, "progressivity_of_elasticity": ["width"], - "batchnorm_adaptation": { - "num_bn_adaptation_samples": 0 - }, + "batchnorm_adaptation": {"num_bn_adaptation_samples": 0}, "schedule": { "list_stage_descriptions": [ - {"train_dims": ["width"], "epochs": -1, "depth_indicator": 1, "width_indicator": 8, "init_lr": -1, "epochs_lr": -1, "sample_rate": 1} + { + "train_dims": ["width"], + "epochs": -1, + "depth_indicator": 1, + "width_indicator": 8, + "init_lr": -1, + "epochs_lr": -1, + "sample_rate": 1, + } ] }, "elasticity": { "available_elasticity_dims": ["width"], - "width": { - "overwrite_groups": [], - "overwrite_groups_widths": [] - } - } + "width": {"overwrite_groups": [], "overwrite_groups_widths": []}, + }, } - } + }, } @@ -63,8 +61,7 @@ def add_lr_epochs(nncf_config, learning_rate=3e-4, num_epochs=3): def get_model_paths(model, target_module_name): - """ - Find all paths to the target layer in the model. + """Find all paths to the target layer in the model. Args: model (torch.nn.Module): The model to search. @@ -73,6 +70,7 @@ def get_model_paths(model, target_module_name): Returns: list: A list of paths to the target layer. """ + def find_layers(module, target_module_name, path, paths): for name, sub_module in module.named_children(): new_path = f"{path}/{sub_module.__class__.__name__}[{name}]" @@ -88,13 +86,8 @@ def find_layers(module, target_module_name, path, paths): find_layers(model, target_module_name, base_path, paths) return paths -def load_nncf_config( - config, - model, - target_module_groups=None, - search_space=None, - nncf_config=None -): + +def load_nncf_config(config, model, target_module_groups=None, search_space=None, nncf_config=None): """Load and preprocess the NNCF configuration file. Returns: @@ -105,7 +98,9 @@ def load_nncf_config( nncf_config = NNCFConfig.from_json(nncf_config) else: if search_space is None and target_module_groups: - raise ValueError("Neural LoRA search is enabled, `search_space` and `target_module_groups` must be provided.") + raise ValueError( + "Neural LoRA search is enabled, `search_space` and `target_module_groups` must be provided." + ) # The NNCF Config will be automatically generated based on `target_module_groups` and `search_space`. num_hidden_layers = model.config.num_hidden_layers nncf_config_dict = NNCF_CONFIG_TEMPLATE @@ -126,12 +121,12 @@ def load_nncf_config( for space in search_space: space = [int(width) for width in space.split(",")] overwrite_groups_widths.extend([space] * num_hidden_layers) - nncf_config_dict["bootstrapNAS"]["training"]["elasticity"]["width"]["overwrite_groups_widths"] = overwrite_groups_widths + nncf_config_dict["bootstrapNAS"]["training"]["elasticity"]["width"][ + "overwrite_groups_widths" + ] = overwrite_groups_widths assert len(overwrite_groups) == len(overwrite_groups_widths) nncf_config_dict = add_lr_epochs( - nncf_config_dict, - learning_rate=config["Training"]["learning_rate"], - num_epochs=config["Training"]["epochs"] + nncf_config_dict, learning_rate=config["Training"]["learning_rate"], num_epochs=config["Training"]["epochs"] ) nncf_config = NNCFConfig.from_dict(nncf_config_dict) @@ -142,15 +137,18 @@ def load_nncf_config( return nncf_config -if __name__ == '__main__': +if __name__ == "__main__": import transformers from peft import LoraConfig, get_peft_model + model = transformers.AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") lora_config = { "task_type": "CAUSAL_LM", "r": 16, - "target_modules": ["q_proj", "k_proj", "v_proj", "up_proj", "down_proj"] + "target_modules": ["q_proj", "k_proj", "v_proj", "up_proj", "down_proj"], } peft_config = LoraConfig(**lora_config) model = get_peft_model(model, peft_config) - load_nncf_config(None, model, [["q_proj", "k_proj", "v_proj"], ["up_proj"], ["down_proj"]], ["16,12,8", "16", "16,12"]) + load_nncf_config( + None, model, [["q_proj", "k_proj", "v_proj"], ["up_proj"], ["down_proj"]], ["16,12,8", "16", "16,12"] + ) From 156c2255cf7295ec51467cb889e2da43e908cace Mon Sep 17 00:00:00 2001 From: Yuan Jinjie Date: Tue, 26 Nov 2024 08:57:52 +0800 Subject: [PATCH 03/17] Delete old finetuning_sqft directory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: J. Pablo Muñoz Signed-off-by: J. Pablo Muñoz --- comps/finetuning_sqft/Dockerfile | 50 -- comps/finetuning_sqft/README.md | 240 ------- .../example_nncf_config/nncf_config.json | 354 ---------- comps/finetuning_sqft/finetune_runner.py | 38 -- comps/finetuning_sqft/finetune_sqft_config.py | 217 ------- .../finetuning_sqft_service.py | 81 --- comps/finetuning_sqft/handlers.py | 339 ---------- comps/finetuning_sqft/launch.sh | 12 - .../llm_on_ray/common/__init__.py | 6 - .../llm_on_ray/common/common.py | 29 - .../llm_on_ray/common/torch_config.py | 72 --- .../llm_on_ray/finetune/__init__.py | 4 - .../llm_on_ray/finetune/data_process.py | 352 ---------- .../llm_on_ray/finetune/finetune.py | 609 ------------------ .../llm_on_ray/finetune/modeling.py | 211 ------ .../patches/nncf-v2.12.0.patch | 72 --- .../patches/peft-v0.10.0.patch | 220 ------- .../patches/transformers-v4.44.2.patch | 171 ----- comps/finetuning_sqft/requirements.txt | 17 - .../utils/extract_sub_adapter.py | 104 --- comps/finetuning_sqft/utils/merge.py | 31 - .../utils/nncf_config_process.py | 154 ----- 22 files changed, 3383 deletions(-) delete mode 100644 comps/finetuning_sqft/Dockerfile delete mode 100644 comps/finetuning_sqft/README.md delete mode 100644 comps/finetuning_sqft/example_nncf_config/nncf_config.json delete mode 100644 comps/finetuning_sqft/finetune_runner.py delete mode 100644 comps/finetuning_sqft/finetune_sqft_config.py delete mode 100644 comps/finetuning_sqft/finetuning_sqft_service.py delete mode 100644 comps/finetuning_sqft/handlers.py delete mode 100644 comps/finetuning_sqft/launch.sh delete mode 100644 comps/finetuning_sqft/llm_on_ray/common/__init__.py delete mode 100644 comps/finetuning_sqft/llm_on_ray/common/common.py delete mode 100644 comps/finetuning_sqft/llm_on_ray/common/torch_config.py delete mode 100644 comps/finetuning_sqft/llm_on_ray/finetune/__init__.py delete mode 100644 comps/finetuning_sqft/llm_on_ray/finetune/data_process.py delete mode 100644 comps/finetuning_sqft/llm_on_ray/finetune/finetune.py delete mode 100644 comps/finetuning_sqft/llm_on_ray/finetune/modeling.py delete mode 100644 comps/finetuning_sqft/patches/nncf-v2.12.0.patch delete mode 100644 comps/finetuning_sqft/patches/peft-v0.10.0.patch delete mode 100644 comps/finetuning_sqft/patches/transformers-v4.44.2.patch delete mode 100644 comps/finetuning_sqft/requirements.txt delete mode 100644 comps/finetuning_sqft/utils/extract_sub_adapter.py delete mode 100644 comps/finetuning_sqft/utils/merge.py delete mode 100644 comps/finetuning_sqft/utils/nncf_config_process.py diff --git a/comps/finetuning_sqft/Dockerfile b/comps/finetuning_sqft/Dockerfile deleted file mode 100644 index 4715470aec..0000000000 --- a/comps/finetuning_sqft/Dockerfile +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# Use the same python version with ray -FROM python:3.10.14 - -ARG HF_TOKEN - -ENV HF_TOKEN=$HF_TOKEN - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -COPY comps /home/user/comps - -RUN chown -R user /home/user/comps/finetuning_sqft - -USER user - -ENV PATH=$PATH:/home/user/.local/bin - -RUN python -m pip install --no-cache-dir --upgrade pip && \ - python -m pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu && \ - python -m pip install --no-cache-dir intel-extension-for-pytorch && \ - python -m pip install --no-cache-dir oneccl_bind_pt --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ && \ - python -m pip install --no-cache-dir -r /home/user/comps/finetuning/requirements.txt - -# Set up third-party dependencies (SQFT) -ENV PATH_TO_FINETUNE=/home/user/comps/finetuning_sqft -RUN mkdir -p $PATH_TO_FINETUNE/third_party && cd $PATH_TO_FINETUNE/third_party && \ - git clone https://github.com/huggingface/peft.git && \ - cd peft && git checkout v0.10.0 && git apply --ignore-space-change --ignore-whitespace ${PATH_TO_FINETUNE}/patches/peft-v0.10.0.patch && pip install -e . && cd .. && \ - git clone https://github.com/huggingface/transformers.git && \ - cd transformers && git checkout v4.44.2 && git apply --ignore-space-change --ignore-whitespace ${PATH_TO_FINETUNE}/patches/transformers-v4.44.2.patch && pip install -e . && cd .. && \ - git clone https://github.com/openvinotoolkit/nncf.git && \ - cd nncf && git checkout v2.12.0 && git apply --ignore-space-change --ignore-whitespace ${PATH_TO_FINETUNE}/patches/nncf-v2.12.0.patch && pip install -e . && cd .. - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -WORKDIR /home/user/comps/finetuning_sqft - -RUN echo PKGPATH=$(python3 -c "import pkg_resources; print(pkg_resources.get_distribution('oneccl-bind-pt').location)") >> run.sh && \ - echo 'export LD_LIBRARY_PATH=$PKGPATH/oneccl_bindings_for_pytorch/opt/mpi/lib/:$LD_LIBRARY_PATH' >> run.sh && \ - echo 'source $PKGPATH/oneccl_bindings_for_pytorch/env/setvars.sh' >> run.sh && \ - echo ray start --head --dashboard-host=0.0.0.0 >> run.sh && \ - echo export RAY_ADDRESS=http://localhost:8265 >> run.sh && \ - echo python finetuning_sqft_service_service.py >> run.sh - -CMD bash run.sh diff --git a/comps/finetuning_sqft/README.md b/comps/finetuning_sqft/README.md deleted file mode 100644 index e9d2c3e596..0000000000 --- a/comps/finetuning_sqft/README.md +++ /dev/null @@ -1,240 +0,0 @@ -# SQFT Fine-tuning Microservice - -Fine-tuning microservice with SQFT involves adapting a model to a specific task or dataset to improve its performance on that task, we currently support instruction tuning for LLMs. - -## 🚀1. Start Microservice with Python (Option 1) - -### 1.1 Install Requirements - -```bash -python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu -python -m pip install intel-extension-for-pytorch -python -m pip install oneccl_bind_pt --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ -pip install -r requirements.txt -``` - -To enable elastic adapter fine-tuning (Neural Low-Rank Adapter Search) or SparsePEFT from [SQFT](https://arxiv.org/abs/2410.03750), please perform this additional installation: - -```bash -PATH_TO_FINETUNE=$PWD -mkdir third_party && cd third_party - -# transformers (for Neural Lora Search) -git clone https://github.com/huggingface/transformers.git -cd transformers && git checkout v4.44.2 && git apply --ignore-space-change --ignore-whitespace ${PATH_TO_FINETUNE}/patches/transformers-v4.44.2.patch && pip install -e . && cd .. - -# peft (for Neural Low-Rank Adapter Search and SparsePEFT) -git clone https://github.com/huggingface/peft.git -cd peft && git checkout v0.10.0 && git apply --ignore-space-change --ignore-whitespace ${PATH_TO_FINETUNE}/patches/peft-v0.10.0.patch && pip install -e . && cd .. - -# nncf (for Neural Lora Search) -git clone https://github.com/openvinotoolkit/nncf.git -cd nncf && git checkout v2.12.0 && git apply --ignore-space-change --ignore-whitespace ${PATH_TO_FINETUNE}/patches/nncf-v2.12.0.patch && pip install -e . && cd .. -``` - -### 1.2 Start Fine-tuning Service with Python Script - -#### 1.2.1 Start Ray Cluster - -OneCCL and Intel MPI libraries should be dynamically linked in every node before Ray starts: - -```bash -source $(python -c "import oneccl_bindings_for_pytorch as torch_ccl; print(torch_ccl.cwd)")/env/setvars.sh -``` - -Start Ray locally using the following command. - -```bash -ray start --head -``` - -For a multi-node cluster, start additional Ray worker nodes with below command. - -```bash -ray start --address='${head_node_ip}:6379' -``` - -#### 1.2.2 Start Finetuning Service - -```bash -export HF_TOKEN= -export PYTHONPATH= -python finetuning_sqft_service.py -``` - -## 🚀2. Start Microservice with Docker (Option 2) - -### 2.1 Setup on CPU - -#### 2.1.1 Build Docker Image - -Build docker image with below command: - -```bash -export HF_TOKEN=${your_huggingface_token} -cd ../../ -docker build -t opea/finetuning:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg HF_TOKEN=$HF_TOKEN -f comps/finetuning_sqft/Dockerfile . -``` - -#### 2.1.2 Run Docker with CLI - -Start docker container with below command: - -```bash -docker run -d --name="finetuning-server" -p 8015:8015 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/finetuning:latest -``` - -## 🚀3. Consume Fine-tuning Service - -### 3.1 Upload a training file - -Download a training file, such as `alpaca_data.json` for instruction tuning and upload it to the server with below command, this file can be downloaded in [here](https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json): - -```bash -# upload a training file -curl http://${your_ip}:8015/v1/files -X POST -H "Content-Type: multipart/form-data" -F "file=@./alpaca_data.json" -F purpose="fine-tune" -``` - -### 3.2 Create fine-tuning job - -#### 3.2.1 Instruction Tuning - -After a training file like `alpaca_data.json` is uploaded, use the following command to launch a fine-tuning job using `meta-llama/Llama-2-7b-chat-hf` as base model: - -```bash -# create a finetuning job -curl http://${your_ip}:8015/v1/fine_tuning/jobs \ - -X POST \ - -H "Content-Type: application/json" \ - -d '{ - "training_file": "alpaca_data.json", - "model": "meta-llama/Llama-2-7b-chat-hf" - }' - -# create a finetuning job (with SparsePEFT) -curl http://${your_ip}:8015/v1/fine_tuning/jobs \ - -X POST \ - -H "Content-Type: application/json" \ - -d '{ - "training_file": "alpaca_data.json", - "model": , - "General": { - "lora_config": { - "sparse_adapter": true - } - } - }' - -# create a fine-tuning job (with Neural Low-rank adapter Search) -# Max LoRA rank: 16 -# LoRA target modules -> Low-rank search space -# ["q_proj", "k_proj", "v_proj"] -> [16,12,8] -# ["up_proj"] -> [16,12,8] -# ["down_proj"] -> [16,12,8] -curl http://${your_ip}:8015/v1/fine_tuning/jobs \ - -X POST \ - -H "Content-Type: application/json" \ - -d '{ - "training_file": "alpaca_data.json", - "model": "meta-llama/Llama-2-7b-chat-hf", - "General": { - "lora_config": { - "r": 16, - "neural_lora_search": true, - "target_module_groups": [["q_proj", "k_proj", "v_proj"], ["up_proj"], ["down_proj"]], - "search_space": ["16,12,8", "16,12,8", "16,12,8"] - } - } - }' -``` - -Below are some explanations for the parameters related to the Neural Low-rank adapter Search algorithm: - -- `target_module_groups` specifies the target module groups, which means that the adapters within the same group will share the same activated low-rank value. -- `search_space` specifies the search space for each target module (adapter) group. - Here, it is `["16,12,8", "16,12,8", "16,12,8"]`, meaning that the search space for each group is [16, 12, 8]. - -Note that the number of groups should be equal to the number of search spaces (one-to-one correspondence). -Feel free to try your favorite group design and search spaces. - -### 3.3 Manage fine-tuning job - -Below commands show how to list fine-tuning jobs, retrieve a fine-tuning job, cancel a fine-tuning job and list checkpoints of a fine-tuning job. - -```bash -# list fine-tuning jobs -curl http://${your_ip}:8015/v1/fine_tuning/jobs -X GET - -# retrieve one fine-tuning job -curl http://localhost:8015/v1/fine_tuning/jobs/retrieve -X POST -H "Content-Type: application/json" -d '{"fine_tuning_job_id": ${fine_tuning_job_id}}' - -# cancel one fine-tuning job -curl http://localhost:8015/v1/fine_tuning/jobs/cancel -X POST -H "Content-Type: application/json" -d '{"fine_tuning_job_id": ${fine_tuning_job_id}}' - -# list checkpoints of a fine-tuning job -curl http://${your_ip}:8015/v1/finetune/list_checkpoints -X POST -H "Content-Type: application/json" -d '{"fine_tuning_job_id": ${fine_tuning_job_id}}' -``` - -### 3.4 Leverage fine-tuned model - -#### 3.4.1 Extract the sub-adapter - -After completing the super-adapter fine-tuning (the checkpoints of the fine-tuning job), -the following command demonstrates how to extract the heuristic sub-adapter. -Additionally, more powerful sub-adapters can be obtained through other advanced search algorithms. - -```bash -curl http://${your_ip}:8015/v1/finetune/extract_adapter \ - -X POST \ - -H "Content-Type: application/json" \ - -d '{ - "fine_tuning_job_id": ${fine_tuning_job_id}, - "sub_adapter_version": "heuristic" - }' -``` - -`sub_adapter_version` can be heuristic, minimal, or a custom name. -When `sub_adapter_version` is set to a custom name, we need to provide a specific configuration in `custom_config`. -The extracted adapter will be saved in ` / `. - -
-An example of a custom configuration - -```bash -curl http://${your_ip}:8015/v1/finetune/extract_adapter \ - -X POST \ - -H "Content-Type: application/json" \ - -d '{ - "fine_tuning_job_id": ${fine_tuning_job_id}, - "sub_adapter_version": "custom", - "custom_config": [8, 8, 16, 8, 8, 12, 8, 12, 12, 12, 8, 16, 12, 16, 16, 12, 12, 8, 8, 16, 8, 8, 12, 8, 16, 12, 8, 16, 8, 16, 12, 8, 8, 16, 16, 16, 16, 16, 8, 12, 12, 16, 12, 16, 12, 16, 16, 12, 8, 12, 12, 8, 8, 12, 8, 12, 12, 8, 16, 8, 8, 8, 8, 12, 16, 16], - }' -``` - -In the fine-tuning job with Neural Low-rank adapter Search algorithm, the `nncf_config.json` file (which includes the elastic adapter information) will be saved in the output directory. -The `custom_config` must correspond with the `overwrite_groups` (adapter modules) or `overwrite_groups_widths` -(search space for the rank of adapter modules) in `nncf_config.json`. -The above command corresponds to the example in [example_nncf_config/nncf_config.json](./example_nncf_config/nncf_config.json), -and it will save the sub-adapter to ` / custom`. - -
- -#### 3.4.2 Merge - -The following command demonstrates how to merge the sub-adapter to the base pretrained model: - -```bash -curl http://${your_ip}:8015/v1/ffinetune/merge_adapter \ - -X POST \ - -H "Content-Type: application/json" \ - -d '{ - "fine_tuning_job_id": ${fine_tuning_job_id}, - "sub_adapter_version": "heuristic" - }' -``` - -The merged model will be saved in ` / / merged_model`. - -## 🚀4. Descriptions for Finetuning parameters - -We utilize [OpenAI finetuning parameters](https://platform.openai.com/docs/api-reference/fine-tuning) and extend it with more customizable parameters, see the definitions at [finetune_sqft_config](./finetune_sqft_config.py). diff --git a/comps/finetuning_sqft/example_nncf_config/nncf_config.json b/comps/finetuning_sqft/example_nncf_config/nncf_config.json deleted file mode 100644 index 7ec9b3a578..0000000000 --- a/comps/finetuning_sqft/example_nncf_config/nncf_config.json +++ /dev/null @@ -1,354 +0,0 @@ -{ - "input_info": [ - { - "sample_size": [1, 256], - "type": "long", - "keyword": "input_ids" - }, - { - "sample_size": [1, 256], - "type": "long", - "keyword": "attention_mask" - } - ], - "bootstrapNAS": { - "training": { - "algorithm": "progressive_shrinking", - "frozen_layers_allowed": true, - "progressivity_of_elasticity": ["width"], - "batchnorm_adaptation": { - "num_bn_adaptation_samples": 0 - }, - "schedule": { - "list_stage_descriptions": [ - { - "train_dims": ["width"], - "epochs": 3, - "depth_indicator": 1, - "width_indicator": 8, - "init_lr": 0.0003, - "epochs_lr": 3, - "sample_rate": 1 - } - ] - }, - "elasticity": { - "available_elasticity_dims": ["width"], - "width": { - "overwrite_groups": [ - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[2]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[2]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[2]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[3]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[3]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[3]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[4]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[4]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[4]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[5]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[5]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[5]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[6]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[6]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[6]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[7]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[7]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[7]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[8]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[8]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[8]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[9]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[9]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[9]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[10]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[10]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[10]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[11]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[11]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[11]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[12]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[12]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[12]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[13]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[13]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[13]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[14]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[14]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[14]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[15]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[15]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[15]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[16]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[16]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[16]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[17]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[17]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[17]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[18]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[18]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[18]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[19]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[19]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[19]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[20]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[20]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[20]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[21]/LlamaSdpaAttention[self_attn]/Linear[q_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[21]/LlamaSdpaAttention[self_attn]/Linear[k_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0", - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[21]/LlamaSdpaAttention[self_attn]/Linear[v_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[2]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[3]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[4]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[5]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[6]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[7]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[8]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[9]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[10]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[11]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[12]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[13]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[14]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[15]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[16]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[17]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[18]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[19]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[20]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[21]/LlamaMLP[mlp]/Linear[up_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[2]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[3]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[4]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[5]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[6]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[7]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[8]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[9]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[10]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[11]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[12]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[13]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[14]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[15]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[16]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[17]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[18]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[19]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[20]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ], - [ - "PeftModelForCausalLM/LoraModel[base_model]/LlamaForCausalLM[model]/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[21]/LlamaMLP[mlp]/Linear[down_proj]/ModuleDict[lora_A]/NNCFLinear[default]/linear_0" - ] - ], - "overwrite_groups_widths": [ - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8], - [16, 12, 8] - ] - } - } - } - } -} diff --git a/comps/finetuning_sqft/finetune_runner.py b/comps/finetuning_sqft/finetune_runner.py deleted file mode 100644 index 45cad43d56..0000000000 --- a/comps/finetuning_sqft/finetune_runner.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import argparse - -from pydantic_yaml import parse_yaml_raw_as -from transformers import TrainerCallback, TrainerControl, TrainerState, TrainingArguments - -from comps.finetuning_sqft.finetune_sqft_config import FinetuneConfig - - -class FineTuneCallback(TrainerCallback): - def __init__(self) -> None: - super().__init__() - - def on_log(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs): - print("FineTuneCallback:", args, state) - - -def main(): - parser = argparse.ArgumentParser(description="Runner for llm_on_ray-finetune") - parser.add_argument("--config_file", type=str, required=True, default=None) - args = parser.parse_args() - model_config_file = args.config_file - - with open(model_config_file) as f: - finetune_config = parse_yaml_raw_as(FinetuneConfig, f).model_dump() - - callback = FineTuneCallback() - finetune_config["Training"]["callbacks"] = [callback] - - from comps.finetuning_sqft.llm_on_ray.finetune.finetune import main as llm_on_ray_finetune_main - - llm_on_ray_finetune_main(finetune_config) - - -if __name__ == "__main__": - main() diff --git a/comps/finetuning_sqft/finetune_sqft_config.py b/comps/finetuning_sqft/finetune_sqft_config.py deleted file mode 100644 index e5b35cef91..0000000000 --- a/comps/finetuning_sqft/finetune_sqft_config.py +++ /dev/null @@ -1,217 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# -# Copyright 2023 The LLM-on-Ray Authors. - -from typing import List, Optional, Union - -from pydantic import BaseModel, Field, root_validator, validator - -from comps.cores.proto.api_protocol import FineTuningJobIDRequest, FineTuningJobsRequest - -PRECISION_BF16 = "bf16" -PRECISION_FP16 = "fp16" -PRECISION_NO = "no" - -DEVICE_CPU = "cpu" -DEVICE_HPU = "hpu" -DEVICE_GPU = "gpu" -DEVICE_CUDA = "cuda" - -ACCELERATE_STRATEGY_DDP = "DDP" -ACCELERATE_STRATEGY_FSDP = "FSDP" -ACCELERATE_STRATEGY_DEEPSPEED = "DEEPSPEED" - - -class LoadConfig(BaseModel): - trust_remote_code: bool = False - # set Huggingface token to access dataset/model - token: Optional[str] = None - - -class LoraConfig(BaseModel): - task_type: str = "CAUSAL_LM" - r: int = 8 - lora_alpha: int = 16 - lora_dropout: float = 0.1 - target_modules: Optional[List[str]] = None - - -class SQFTLoRAConfig(LoraConfig): - neural_lora_search: bool = False - target_module_groups: Optional[List[List[str]]] = None - search_space: Optional[List[str]] = None - sparse_adapter: bool = False - nncf_config: Optional[str] = None - - @root_validator(pre=True) - def set_target_modules(cls, values): - target_module_groups = values.get("target_module_groups") - if target_module_groups is not None: - values["target_modules"] = [item for sublist in target_module_groups for item in sublist] - search_space = values.get("search_space") - if search_space is not None: - assert len(search_space) == len(target_module_groups) - return values - - -class GeneralConfig(BaseModel): - base_model: str = None - tokenizer_name: Optional[str] = None - gaudi_config_name: Optional[str] = None - gpt_base_model: bool = False - output_dir: str = "./tmp" - report_to: str = "none" - resume_from_checkpoint: Optional[str] = None - save_strategy: str = "no" - config: LoadConfig = LoadConfig() - lora_config: Optional[Union[LoraConfig, SQFTLoRAConfig]] = LoraConfig() - enable_gradient_checkpointing: bool = False - task: str = "instruction_tuning" - - @validator("report_to") - def check_report_to(cls, v: str): - assert v in ["none", "tensorboard"] - return v - - @validator("task") - def check_task(cls, v: str): - assert v in ["instruction_tuning"] - return v - - -class DatasetConfig(BaseModel): - train_file: str = None - validation_file: Optional[str] = None - validation_split_percentage: int = 5 - max_length: int = 512 - group: bool = True - block_size: int = 512 - shuffle: bool = False - max_source_length: int = 384 - padding_side: str = "right" - truncation_side: str = "right" - max_seq_length: int = 512 - truncation: bool = True - padding: Union[bool, str] = True - mask_input: bool = True - mask_response: bool = True - data_preprocess_type: str = "neural_chat" - max_train_samples: int = 0 - max_eval_samples: int = 0 - train_group_size: int = 8 - query_max_len: int = Field( - default=128, - description=( - "The maximum total input sequence length after tokenization for passage. Sequences longer " - "than this will be truncated, sequences shorter will be padded." - ), - ) - passage_max_len: int = Field( - default=128, - description=( - "The maximum total input sequence length after tokenization for passage. Sequences longer " - "than this will be truncated, sequences shorter will be padded." - ), - ) - query_instruction_for_retrieval: Optional[str] = Field(default=None, description="instruction for query") - passage_instruction_for_retrieval: Optional[str] = Field(default=None, description="instruction for passage") - - -class RayResourceConfig(BaseModel): - CPU: int = 32 - GPU: int = 0 - HPU: int = 0 - - -class TrainingConfig(BaseModel): - optimizer: str = "adamw_torch" - batch_size: int = 2 - epochs: int = 1 - max_train_steps: Optional[int] = None - learning_rate: float = 5.0e-5 - lr_scheduler: str = "linear" - weight_decay: float = 0.0 - device: str = DEVICE_CPU - hpu_execution_mode: str = "lazy" - num_training_workers: int = 1 - resources_per_worker: RayResourceConfig = RayResourceConfig() - accelerate_mode: str = ACCELERATE_STRATEGY_DDP - mixed_precision: str = PRECISION_NO - gradient_accumulation_steps: int = 1 - logging_steps: int = 10 - deepspeed_config_file: str = "" - - @validator("device") - def check_device(cls, v: str): - # will convert to lower case - if v: - assert v.lower() in [DEVICE_CPU, DEVICE_GPU, DEVICE_HPU, DEVICE_CUDA] - return v.lower() - - @validator("hpu_execution_mode") - def check_hpu_execution_mode(cls, v: str): - if v: - assert v in ["lazy", "eager", "eager.compile"] - return v - - @validator("accelerate_mode") - def check_accelerate_mode(cls, v: str): - if v: - assert v in [ - ACCELERATE_STRATEGY_DDP, - ACCELERATE_STRATEGY_FSDP, - ACCELERATE_STRATEGY_DEEPSPEED, - ] - return v - - @validator("mixed_precision") - def check_mixed_precision(cls, v: str): - if v: - assert v in [PRECISION_BF16, PRECISION_FP16, PRECISION_NO] - return v - - @validator("logging_steps") - def check_logging_steps(cls, v: int): - assert v > 0 - return v - - # @model_validator(mode='after') - # def check_device_and_accelerate_mode(self) -> "Training": - # dev = self.device - # res = self.resources_per_worker - # mode = self.accelerate_mode - # if dev == "CPU": - # if res.GPU is not None and res.GPU > 0: - # raise ValueError("Please not specified GPU resource when use CPU only in Ray.") - # if mode != "CPU_DDP": - # raise ValueError("Please specified CPU related accelerate mode when use CPU only in Ray.") - # elif dev == "GPU": - # if res.GPU is None or res.GPU == 0: - # raise ValueError("Please specified GPU resource when use GPU to fine tune in Ray.") - # if mode not in ["GPU_DDP", "GPU_FSDP"]: - # raise ValueError("Please speicifed GPU related accelerate mode when use GPU to fine tune in Ray.") - - # return self - - -class FinetuneConfig(BaseModel): - General: GeneralConfig = GeneralConfig() - Dataset: DatasetConfig = DatasetConfig() - Training: TrainingConfig = TrainingConfig() - - -class FineTuningParams(FineTuningJobsRequest): - # priority use FineTuningJobsRequest params - General: GeneralConfig = GeneralConfig() - Dataset: DatasetConfig = DatasetConfig() - Training: TrainingConfig = TrainingConfig() - - -class ExtractAdapterParams(FineTuningJobIDRequest): - sub_adapter_version: str = "heuristic" - custom_config: Optional[List[int]] = None - - -class MergeAdapterParams(FineTuningJobIDRequest): - adapter_version: Optional[str] = None diff --git a/comps/finetuning_sqft/finetuning_sqft_service.py b/comps/finetuning_sqft/finetuning_sqft_service.py deleted file mode 100644 index af9f237399..0000000000 --- a/comps/finetuning_sqft/finetuning_sqft_service.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -from fastapi import BackgroundTasks, Depends - -from comps import opea_microservices, register_microservice -from comps.cores.proto.api_protocol import FineTuningJobIDRequest, UploadFileRequest -from comps.finetuning_sqft.finetune_sqft_config import ExtractAdapterParams, FineTuningParams, MergeAdapterParams -from comps.finetuning_sqft.handlers import ( - handle_cancel_finetuning_job, - handle_create_finetuning_jobs, - handle_extract_sub_adapter, - handle_list_finetuning_checkpoints, - handle_list_finetuning_jobs, - handle_merge_adapter, - handle_retrieve_finetuning_job, - handle_upload_training_files, - upload_file, -) - - -@register_microservice(name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs", host="0.0.0.0", port=8015) -def create_finetuning_jobs(request: FineTuningParams, background_tasks: BackgroundTasks): - return handle_create_finetuning_jobs(request, background_tasks) - - -@register_microservice( - name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs", host="0.0.0.0", port=8015, methods=["GET"] -) -def list_finetuning_jobs(): - return handle_list_finetuning_jobs() - - -@register_microservice( - name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs/retrieve", host="0.0.0.0", port=8015 -) -def retrieve_finetuning_job(request: FineTuningJobIDRequest): - job = handle_retrieve_finetuning_job(request) - return job - - -@register_microservice( - name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs/cancel", host="0.0.0.0", port=8015 -) -def cancel_finetuning_job(request: FineTuningJobIDRequest): - job = handle_cancel_finetuning_job(request) - return job - - -@register_microservice( - name="opea_service@finetuning", - endpoint="/v1/files", - host="0.0.0.0", - port=8015, -) -async def upload_training_files(request: UploadFileRequest = Depends(upload_file)): - uploadFileInfo = await handle_upload_training_files(request) - return uploadFileInfo - - -@register_microservice( - name="opea_service@finetuning", endpoint="/v1/finetune/list_checkpoints", host="0.0.0.0", port=8015 -) -def list_checkpoints(request: FineTuningJobIDRequest): - checkpoints = handle_list_finetuning_checkpoints(request) - return checkpoints - - -@register_microservice( - name="opea_service@finetuning", endpoint="/v1/finetune/extract_adapter", host="0.0.0.0", port=8015 -) -def extract_sub_adapter(request: ExtractAdapterParams): - return handle_extract_sub_adapter(request) - - -@register_microservice(name="opea_service@finetuning", endpoint="/v1/finetune/merge_adapter", host="0.0.0.0", port=8015) -def merge_adapter(request: MergeAdapterParams): - return handle_merge_adapter(request) - - -if __name__ == "__main__": - opea_microservices["opea_service@finetuning"].start() diff --git a/comps/finetuning_sqft/handlers.py b/comps/finetuning_sqft/handlers.py deleted file mode 100644 index cdb6b224aa..0000000000 --- a/comps/finetuning_sqft/handlers.py +++ /dev/null @@ -1,339 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os -import random -import re -import time -import urllib.parse -import uuid -from pathlib import Path -from typing import Dict - -from fastapi import BackgroundTasks, File, Form, HTTPException, UploadFile -from pydantic_yaml import parse_yaml_file_as, to_yaml_file -from ray.job_submission import JobSubmissionClient - -from comps import CustomLogger -from comps.cores.proto.api_protocol import ( - FileObject, - FineTuningJob, - FineTuningJobCheckpoint, - FineTuningJobIDRequest, - FineTuningJobList, - UploadFileRequest, -) -from comps.finetuning_sqft.finetune_sqft_config import ( - ExtractAdapterParams, - FinetuneConfig, - FineTuningParams, - MergeAdapterParams, -) - -logger = CustomLogger("finetuning_handlers") - -DATASET_BASE_PATH = "datasets" -JOBS_PATH = "jobs" -OUTPUT_DIR = "output" - -if not os.path.exists(DATASET_BASE_PATH): - os.mkdir(DATASET_BASE_PATH) -if not os.path.exists(JOBS_PATH): - os.mkdir(JOBS_PATH) -if not os.path.exists(OUTPUT_DIR): - os.mkdir(OUTPUT_DIR) - -FineTuningJobID = str -CheckpointID = str -CheckpointPath = str - -CHECK_JOB_STATUS_INTERVAL = 5 # Check every 5 secs - -global ray_client -ray_client: JobSubmissionClient = None - -running_finetuning_jobs: Dict[FineTuningJobID, FineTuningJob] = {} -finetuning_job_to_ray_job: Dict[FineTuningJobID, str] = {} -checkpoint_id_to_checkpoint_path: Dict[CheckpointID, CheckpointPath] = {} - - -# Add a background task to periodicly update job status -def update_job_status(job_id: FineTuningJobID): - while True: - job_status = ray_client.get_job_status(finetuning_job_to_ray_job[job_id]) - status = str(job_status).lower() - # Ray status "stopped" is OpenAI status "cancelled" - status = "cancelled" if status == "stopped" else status - logger.info(f"Status of job {job_id} is '{status}'") - running_finetuning_jobs[job_id].status = status - if status == "succeeded" or status == "cancelled" or status == "failed": - break - time.sleep(CHECK_JOB_STATUS_INTERVAL) - - -def handle_create_finetuning_jobs(request: FineTuningParams, background_tasks: BackgroundTasks): - base_model = request.model - train_file = request.training_file - train_file_path = os.path.join(DATASET_BASE_PATH, train_file) - - if not os.path.exists(train_file_path): - raise HTTPException(status_code=404, detail=f"Training file '{train_file}' not found!") - - finetune_config = FinetuneConfig(General=request.General, Dataset=request.Dataset, Training=request.Training) - finetune_config.General.base_model = base_model - finetune_config.Dataset.train_file = train_file_path - if request.hyperparameters is not None: - if request.hyperparameters.epochs != "auto": - finetune_config.Training.epochs = request.hyperparameters.epochs - - if request.hyperparameters.batch_size != "auto": - finetune_config.Training.batch_size = request.hyperparameters.batch_size - - if request.hyperparameters.learning_rate_multiplier != "auto": - finetune_config.Training.learning_rate = request.hyperparameters.learning_rate_multiplier - - if os.getenv("HF_TOKEN", None): - finetune_config.General.config.token = os.getenv("HF_TOKEN", None) - - job = FineTuningJob( - id=f"ft-job-{uuid.uuid4()}", - model=base_model, - created_at=int(time.time()), - training_file=train_file, - hyperparameters={ - "n_epochs": finetune_config.Training.epochs, - "batch_size": finetune_config.Training.batch_size, - "learning_rate_multiplier": finetune_config.Training.learning_rate, - }, - status="running", - seed=random.randint(0, 1000) if request.seed is None else request.seed, - ) - finetune_config.General.output_dir = os.path.join(OUTPUT_DIR, job.id) - if os.getenv("DEVICE", ""): - logger.info(f"specific device: {os.getenv('DEVICE')}") - - finetune_config.Training.device = os.getenv("DEVICE") - if finetune_config.Training.device == "hpu": - if finetune_config.Training.resources_per_worker.HPU == 0: - # set 1 - finetune_config.Training.resources_per_worker.HPU = 1 - - finetune_config_file = f"{JOBS_PATH}/{job.id}.yaml" - to_yaml_file(finetune_config_file, finetune_config) - - global ray_client - ray_client = JobSubmissionClient() if ray_client is None else ray_client - - ray_job_id = ray_client.submit_job( - # Entrypoint shell command to execute - entrypoint=f"python finetune_runner.py --config_file {finetune_config_file}", - ) - - logger.info(f"Submitted Ray job: {ray_job_id} ...") - - running_finetuning_jobs[job.id] = job - finetuning_job_to_ray_job[job.id] = ray_job_id - - background_tasks.add_task(update_job_status, job.id) - - return job - - -def handle_extract_sub_adapter(request: ExtractAdapterParams): - fine_tuning_job_id = request.fine_tuning_job_id - finetune_config_file = f"{JOBS_PATH}/{fine_tuning_job_id}.yaml" - finetune_config = parse_yaml_file_as(FinetuneConfig, finetune_config_file) - - job = running_finetuning_jobs.get(fine_tuning_job_id) - if job is None: - raise HTTPException(status_code=404, detail=f"Fine-tuning job '{fine_tuning_job_id}' not found!") - finetuned_model_path = os.path.join(OUTPUT_DIR, fine_tuning_job_id) - assert finetuned_model_path == finetune_config.General.output_dir - if not os.path.exists(finetuned_model_path): - raise HTTPException( - status_code=404, - detail=f"The fine-tuned model saved by the fine-tuning job '{fine_tuning_job_id}' was not found!", - ) - if job.status != "succeeded": - raise HTTPException(status_code=404, detail=f"Fine-tuning job '{fine_tuning_job_id}' has not completed!") - - if finetune_config.General.lora_config is None: - raise HTTPException( - status_code=404, - detail=f"The fine-tuning job '{fine_tuning_job_id}' does not enable LoRA adapter fine-tuning!", - ) - if not finetune_config.General.lora_config.neural_lora_search: - raise HTTPException( - status_code=404, - detail=f"The fine-tuning job '{fine_tuning_job_id}' did not enable NLS algorithm, " - f"there is no need to extract sub-adapters!", - ) - nncf_config_path = os.path.join(finetune_config.General.output_dir, "nncf_config.json") - if not os.path.exists(nncf_config_path): - raise HTTPException( - status_code=404, detail=f"The NNCF config file does not exist in the fine-tuning job '{fine_tuning_job_id}!" - ) - - from comps.finetuning_sqft.utils.extract_sub_adapter import main as extract_sub_adapter_main - - extract_sub_adapter_main( - adapter_model_path=finetuned_model_path, - nncf_config=nncf_config_path, - sub_adapter_version=request.sub_adapter_version, - custom_config=request.custom_config, - ) - - return fine_tuning_job_id - - -def handle_merge_adapter(request: MergeAdapterParams): - fine_tuning_job_id = request.fine_tuning_job_id - finetune_config_file = f"{JOBS_PATH}/{fine_tuning_job_id}.yaml" - finetune_config = parse_yaml_file_as(FinetuneConfig, finetune_config_file) - - job = running_finetuning_jobs.get(fine_tuning_job_id) - if job is None: - raise HTTPException(status_code=404, detail=f"Fine-tuning job '{fine_tuning_job_id}' not found!") - finetuned_model_path = os.path.join(OUTPUT_DIR, fine_tuning_job_id) - assert finetuned_model_path == finetune_config.General.output_dir - if not os.path.exists(finetuned_model_path): - raise HTTPException( - status_code=404, - detail=f"The fine-tuned model saved by the fine-tuning job '{fine_tuning_job_id}' was not found!", - ) - if job.status != "succeeded": - raise HTTPException(status_code=404, detail=f"Fine-tuning job '{fine_tuning_job_id}' has not completed!") - - if finetune_config.General.lora_config is None: - raise HTTPException( - status_code=404, - detail=f"The fine-tuning job '{fine_tuning_job_id}' does not enable LoRA adapter fine-tuning!", - ) - - adapter_path = finetuned_model_path - adapter_version = request.adapter_version - if adapter_version is not None: - adapter_path = os.path.join(adapter_path, adapter_version) - if not os.path.exists(adapter_path): - raise HTTPException( - status_code=404, - detail=f"The fine-tuning job '{fine_tuning_job_id}' does not have a '{adapter_version}' adapter!", - ) - - from comps.finetuning_sqft.utils.merge import main as merge_adapter_main - - merge_adapter_main( - base_model_path=finetune_config.General.base_model, - adapter_model_path=adapter_path, - output_path=os.path.join(adapter_path, "merged_model"), - ) - - return fine_tuning_job_id - - -def handle_list_finetuning_jobs(): - finetuning_jobs_list = FineTuningJobList(data=list(running_finetuning_jobs.values()), has_more=False) - - return finetuning_jobs_list - - -def handle_retrieve_finetuning_job(request: FineTuningJobIDRequest): - fine_tuning_job_id = request.fine_tuning_job_id - - job = running_finetuning_jobs.get(fine_tuning_job_id) - if job is None: - raise HTTPException(status_code=404, detail=f"Fine-tuning job '{fine_tuning_job_id}' not found!") - return job - - -def handle_cancel_finetuning_job(request: FineTuningJobIDRequest): - fine_tuning_job_id = request.fine_tuning_job_id - - ray_job_id = finetuning_job_to_ray_job.get(fine_tuning_job_id) - if ray_job_id is None: - raise HTTPException(status_code=404, detail=f"Fine-tuning job '{fine_tuning_job_id}' not found!") - - global ray_client - ray_client = JobSubmissionClient() if ray_client is None else ray_client - ray_client.stop_job(ray_job_id) - - job = running_finetuning_jobs.get(fine_tuning_job_id) - job.status = "cancelled" - return job - - -async def save_content_to_local_disk(save_path: str, content): - save_path = Path(save_path) - try: - if isinstance(content, str): - with open(save_path, "w", encoding="utf-8") as file: - file.write(content) - else: - with save_path.open("wb") as fout: - content = await content.read() - fout.write(content) - except Exception as e: - logger.info(f"Write file failed. Exception: {e}") - raise Exception(status_code=500, detail=f"Write file {save_path} failed. Exception: {e}") - - -def handle_list_finetuning_checkpoints(request: FineTuningJobIDRequest): - fine_tuning_job_id = request.fine_tuning_job_id - - job = running_finetuning_jobs.get(fine_tuning_job_id) - if job is None: - raise HTTPException(status_code=404, detail=f"Fine-tuning job '{fine_tuning_job_id}' not found!") - output_dir = os.path.join(OUTPUT_DIR, job.id) - checkpoints = [] - if os.path.exists(output_dir): - # Iterate over the contents of the directory and add an entry for each - files = os.listdir(output_dir) - for file in files: # Loop over directory contents - file_path = os.path.join(output_dir, file) - if os.path.isdir(file_path) and file.startswith("checkpoint"): - steps = re.findall("\d+", file)[0] - checkpointsResponse = FineTuningJobCheckpoint( - id=f"ftckpt-{uuid.uuid4()}", # Generate a unique ID - created_at=int(time.time()), # Use the current timestamp - fine_tuned_model_checkpoint=file_path, # Directory path itself - fine_tuning_job_id=fine_tuning_job_id, - object="fine_tuning.job.checkpoint", - step_number=steps, - ) - checkpoints.append(checkpointsResponse) - if job.status == "succeeded": - checkpointsResponse = FineTuningJobCheckpoint( - id=f"ftckpt-{uuid.uuid4()}", # Generate a unique ID - created_at=int(time.time()), # Use the current timestamp - fine_tuned_model_checkpoint=output_dir, # Directory path itself - fine_tuning_job_id=fine_tuning_job_id, - object="fine_tuning.job.checkpoint", - ) - checkpoints.append(checkpointsResponse) - - return checkpoints - - -async def upload_file(purpose: str = Form(...), file: UploadFile = File(...)): - return UploadFileRequest(purpose=purpose, file=file) - - -async def handle_upload_training_files(request: UploadFileRequest): - file = request.file - if file is None: - raise HTTPException(status_code=404, detail="upload file failed!") - filename = urllib.parse.quote(file.filename, safe="") - save_path = os.path.join(DATASET_BASE_PATH, filename) - await save_content_to_local_disk(save_path, file) - - fileBytes = os.path.getsize(save_path) - fileInfo = FileObject( - id=f"file-{uuid.uuid4()}", - object="file", - bytes=fileBytes, - created_at=int(time.time()), - filename=filename, - purpose="fine-tune", - ) - - return fileInfo diff --git a/comps/finetuning_sqft/launch.sh b/comps/finetuning_sqft/launch.sh deleted file mode 100644 index 034c82f3d2..0000000000 --- a/comps/finetuning_sqft/launch.sh +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -if [[ -n "$RAY_PORT" ]];then - ray start --head --port $RAY_PORT --dashboard-host=0.0.0.0 -else - ray start --head --dashboard-host=0.0.0.0 - export RAY_PORT=8265 -fi - -export RAY_ADDRESS=http://localhost:$RAY_PORT -python finetuning_sqft_service.py diff --git a/comps/finetuning_sqft/llm_on_ray/common/__init__.py b/comps/finetuning_sqft/llm_on_ray/common/__init__.py deleted file mode 100644 index 954b7baa4b..0000000000 --- a/comps/finetuning_sqft/llm_on_ray/common/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# -# Copyright 2023 The LLM-on-Ray Authors. - -from .torch_config import TorchConfig diff --git a/comps/finetuning_sqft/llm_on_ray/common/common.py b/comps/finetuning_sqft/llm_on_ray/common/common.py deleted file mode 100644 index ac01ae12e1..0000000000 --- a/comps/finetuning_sqft/llm_on_ray/common/common.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# -# Copyright 2023 The LLM-on-Ray Authors. - -import glob -import importlib -import os - -from comps import CustomLogger - -logger = CustomLogger("llm_on_ray") - - -def import_all_modules(basedir, prefix=None): - all_py_files = glob.glob(basedir + "/*.py") - modules = [os.path.basename(f) for f in all_py_files] - - for module in modules: - if not module.startswith("_"): - module = module.rstrip(".py") - if prefix is None: - module_name = module - else: - module_name = f"{prefix}.{module}" - try: - importlib.import_module(module_name) - except Exception: - logger.warning(f"import {module_name} error", exc_info=True) diff --git a/comps/finetuning_sqft/llm_on_ray/common/torch_config.py b/comps/finetuning_sqft/llm_on_ray/common/torch_config.py deleted file mode 100644 index 9e3f48a7c3..0000000000 --- a/comps/finetuning_sqft/llm_on_ray/common/torch_config.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# -# Copyright 2023 The LLM-on-Ray Authors. - -import os -import sys -from dataclasses import dataclass -from typing import Optional - -from ray.train._internal.worker_group import WorkerGroup -from ray.train.torch.config import TorchConfig as RayTorchConfig -from ray.train.torch.config import _TorchBackend - -# The package importlib_metadata is in a different place, depending on the Python version. -if sys.version_info < (3, 8): - import importlib_metadata -else: - import importlib.metadata as importlib_metadata - - -@dataclass -class TorchConfig(RayTorchConfig): - device: Optional[str] = None - - @property - def backend_cls(self): - EnableCCLBackend.device = self.device - return EnableCCLBackend - - -def xpu_libs_import(): - """Try to import IPEX and oneCCL.""" - try: - import intel_extension_for_pytorch - except ImportError: - raise ImportError("Please install intel_extension_for_pytorch") - try: - ccl_version = importlib_metadata.version("oneccl_bind_pt") - if ccl_version >= "1.12": - import oneccl_bindings_for_pytorch - else: - import torch_ccl - except ImportError as ccl_not_exist: - raise ImportError("Please install torch-ccl") from ccl_not_exist - - -def hpu_libs_import(): - """Try to import habana frameworkfs for torch.""" - try: - import habana_frameworks.torch # noqa: F401 - except ImportError as habana_not_exist: - raise ImportError("Please install habana_frameworks") from habana_not_exist - - -def _set_torch_distributed_env_vars(device): - if device is not None: - os.environ["ACCELERATE_TORCH_DEVICE"] = device - - -class EnableCCLBackend(_TorchBackend): - device: Optional[str] = None - - def on_start(self, worker_group: WorkerGroup, backend_config: RayTorchConfig): - libs_import = hpu_libs_import if self.device is not None and self.device.startswith("hpu") else xpu_libs_import - for i in range(len(worker_group)): - worker_group.execute_single_async(i, libs_import) - super().on_start(worker_group, backend_config) - - def on_training_start(self, worker_group: WorkerGroup, backend_config: RayTorchConfig): - super().on_training_start(worker_group, backend_config) - worker_group.execute(_set_torch_distributed_env_vars, self.device) diff --git a/comps/finetuning_sqft/llm_on_ray/finetune/__init__.py b/comps/finetuning_sqft/llm_on_ray/finetune/__init__.py deleted file mode 100644 index 0262e494a9..0000000000 --- a/comps/finetuning_sqft/llm_on_ray/finetune/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# -# Copyright 2023 The LLM-on-Ray Authors. diff --git a/comps/finetuning_sqft/llm_on_ray/finetune/data_process.py b/comps/finetuning_sqft/llm_on_ray/finetune/data_process.py deleted file mode 100644 index 07b12d71e1..0000000000 --- a/comps/finetuning_sqft/llm_on_ray/finetune/data_process.py +++ /dev/null @@ -1,352 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# -# Copyright 2023 The LLM-on-Ray Authors. - -import copy -import math -import random -import re -from dataclasses import dataclass -from itertools import chain -from typing import Dict, List, Tuple - -import torch -from torch.utils.data import Dataset -from transformers import BatchEncoding, DataCollatorWithPadding - -IGNORE_INDEX = -100 - - -class InstructionDataProcessor: - # We used the following prompts for fine-tuning the Alpaca model. You can find reference doc form this URL(https://github.com/tatsu-lab/stanford_alpaca/blob/main/README.md#data-release) - def __init__(self, config, tokenizer): - self.tokenizer = tokenizer - self.end = tokenizer.eos_token - self.intro = ( - "Below is an instruction that describes a task. Write a response that appropriately completes the request." - ) - self.instruction = "### Instruction:\n" - self.input = "### Input:\n" - self.response = "### Response:\n" - self.padding_side = config["Dataset"].get("padding_side", "right") - self.truncation_side = config["Dataset"].get("truncation_side", "right") - self.max_length = self.max_seq_length = config["Dataset"].get("max_length", 512) - self.max_source_length = config["Dataset"].get("max_source_length", 384) - self.truncation = config["Dataset"].get("truncation", True) - self.padding = config["Dataset"].get("padding", True) - self.mask_input = config["Dataset"].get("mask_input", True) - self.mask_response = config["Dataset"].get("mask_response", True) - - def make_prompt(self, examples): - prompts = {} - prompts["prompt_sources"] = [] - prompts["prompt_targets"] = [] - for rec in examples: - instruction = rec["instruction"] - response = rec["input"] - context = rec.get("output") - if not instruction: - raise ValueError(f"Expected an instruction in: {rec}") - # if not response: - # raise ValueError(f"Expected a response in: {rec}") - if context: - prompt = ( - self.intro - + self.end - + "\n" - + self.instruction - + instruction - + self.input - + context - + self.end - + "\n" - + self.response - ) - prompts["prompt_sources"].append(prompt) - else: - prompt = self.intro + self.end + "\n" + self.instruction + instruction + self.end + "\n" + self.response - prompts["prompt_sources"].append(prompt) - prompt_response = response + self.end - prompts["prompt_targets"].append(prompt_response) - return prompts - - def __truncate_sequences(self, sequences, max_length): - """ - Copied from https://github.com/intel/intel-extension-for-transformers/blob/ae54f698b73a66e5729427cb19f69c33e1a5c34d/intel_extension_for_transformers/transformers/llm/finetuning/data_utils.py#L40 - """ - words_to_cut = sum(list(map(len, sequences))) - max_length - if words_to_cut <= 0: - return sequences - - while words_to_cut > 0 and len(sequences) > 0: - words_to_cut -= len(sequences[0]) - sequences = sequences[1:] - return sequences - - def tokenize_by_neural_chat(self, examples): - """ - Copied from https://github.com/intel/intel-extension-for-transformers/blob/ae54f698b73a66e5729427cb19f69c33e1a5c34d/intel_extension_for_transformers/transformers/llm/finetuning/data_utils.py#L225 - The only differences are: - - using our own prompt style - - add left or right padding and truncation - - add mask_input and mask_response - """ - keys = list(examples.data.keys()) - if len(keys) != 2: - raise ValueError("Unsupported dataset format") - assistant_tokens = self.tokenizer.tokenize(self.response) - header = self.intro + self.end + "\n" - - examples["input_ids"] = [] - examples["labels"] = [] - examples["attention_mask"] = [] - for instruction, response in zip(examples[keys[0]], examples[keys[1]]): - convs = re.findall( - r"{0}.*?{2}|{1}.*?{2}".format(self.instruction, self.response, self.end), - instruction, - re.DOTALL, - ) - convs_tokens = [self.tokenizer.tokenize(conv) + self.tokenizer.tokenize("\n") for conv in convs] - header_tokens = self.tokenizer.tokenize(header) + self.tokenizer.tokenize("\n") - max_input = self.max_source_length - len(header_tokens) - len(assistant_tokens) - truncated_convs = self.__truncate_sequences(convs_tokens, max_input) - if len(truncated_convs) == 0: - truncated_convs = [convs_tokens[-1][: max_input - 3] + convs_tokens[-1][-3:]] - - prompt_tokens = [header_tokens] + truncated_convs + [assistant_tokens] - prompt_ids = [self.tokenizer.convert_tokens_to_ids(prompt_token) for prompt_token in prompt_tokens] - prompt_ids = list(chain(*prompt_ids)) - - resp_ids = self.tokenizer.convert_tokens_to_ids(self.tokenizer.tokenize(response.strip())) - # keep last and eos_id - max_resp = self.max_seq_length - len(prompt_ids) - 1 - - # truncating response - if len(resp_ids) > max_resp: - if self.truncation_side == "right": - resp_ids = resp_ids[: max_resp - 1] + resp_ids[-1:] - else: - resp_ids = resp_ids[-max_resp:] - - # masking - input_ids = prompt_ids + resp_ids + [self.tokenizer.eos_token_id] - if self.mask_input: - labels = [IGNORE_INDEX] * len(prompt_ids) + resp_ids + [self.tokenizer.eos_token_id] - elif self.mask_response: - labels = prompt_ids + [IGNORE_INDEX] * len(resp_ids) + [self.tokenizer.eos_token_id] - else: - labels = input_ids - - # padding - input_len = len(input_ids) - pad_len = self.max_seq_length - input_len - if self.padding_side == "right": - input_ids = input_ids + [self.tokenizer.eos_token_id] * pad_len - labels = labels + [IGNORE_INDEX] * pad_len - attention_mask = [1] * input_len + [0] * pad_len - else: - input_ids = [self.tokenizer.eos_token_id] * pad_len + input_ids - labels = [IGNORE_INDEX] * pad_len + labels - attention_mask = [0] * pad_len + [1] * input_len - - assert len(input_ids) == self.max_seq_length - assert len(prompt_ids) <= self.max_source_length - assert len(labels) == len(input_ids) == len(attention_mask) - - examples["input_ids"].append(torch.tensor(input_ids)) - examples["labels"].append(labels) - examples["attention_mask"].append(attention_mask) - - return examples - - def tokenize(self, examples): - keys = list(examples.data.keys()) - if len(keys) != 2: - raise ValueError("Unsupported dataset format") - - examples["input_ids"] = [] - examples["labels"] = [] - examples["attention_mask"] = [] - for s, t in zip(examples[keys[0]], examples[keys[1]]): - results = self.tokenizer( - s + t, - padding=self.padding, - truncation=self.truncation, - return_tensors=None, - max_length=self.max_length, - ) - - input_ids = results["input_ids"] - input_len = len(input_ids) - labels = copy.deepcopy(input_ids) - if self.mask_input or self.mask_response: - sources_tokenized = self.tokenizer( - s, - padding=False, - truncation=True, - return_tensors=None, - max_length=self.max_length, - ) - input_id_len = len(sources_tokenized["input_ids"]) - # mask input - if self.mask_input: - labels[:input_id_len] = [IGNORE_INDEX] * input_id_len - # mask response - if self.mask_response: - labels[input_id_len:input_len] = [IGNORE_INDEX] * (input_len - input_id_len) - - examples["input_ids"].append(results["input_ids"]) - examples["labels"].append(labels) - examples["attention_mask"].append(results["attention_mask"]) - return examples - - -class PretrainingDataProcessor: - def __init__(self, config, tokenizer): - self.tokenizer = tokenizer - self.max_length = self.max_seq_length = config["Dataset"].get("max_length", 512) - self.truncation = config["Dataset"].get("truncation", True) - self.padding = config["Dataset"].get("padding", True) - - def tokenize(self, examples): - keys = list(examples.data.keys()) - if len(keys) != 1 and "text" not in keys: - raise ValueError("Unsupported dataset format") - - key = keys[0] if len(keys) == 1 else "text" - examples["input_ids"] = [] - examples["labels"] = [] - examples["attention_mask"] = [] - for exp in examples[key]: - results = self.tokenizer( - exp, - padding=self.padding, - truncation=self.truncation, - return_tensors=None, - max_length=self.max_length, - ) - - input_ids = results["input_ids"] - labels = copy.deepcopy(input_ids) - examples["input_ids"].append(results["input_ids"]) - examples["labels"].append(labels) - examples["attention_mask"].append(results["attention_mask"]) - return examples - - -class TrainDatasetForCE(Dataset): - def __init__(self, dataset, args, tokenizer): - self.dataset = dataset - self.tokenizer = tokenizer - self.args = args - self.total_len = len(self.dataset) - - def create_one_example(self, qry_encoding: str, doc_encoding: str): - item = self.tokenizer.encode_plus( - qry_encoding, - doc_encoding, - truncation=True, - max_length=self.args.get("max_length", 512), - padding=False, - ) - return item - - def __len__(self): - return self.total_len - - def __getitem__(self, item) -> List[BatchEncoding]: - query = self.dataset[item]["query"] - pos = random.choice(self.dataset[item]["pos"]) - train_group_size = self.args.get("train_group_size", 8) - if len(self.dataset[item]["neg"]) < train_group_size - 1: - num = math.ceil((train_group_size - 1) / len(self.dataset[item]["neg"])) - negs = random.sample(self.dataset[item]["neg"] * num, train_group_size - 1) - else: - negs = random.sample(self.dataset[item]["neg"], train_group_size - 1) - - batch_data = [] - batch_data.append(self.create_one_example(query, pos)) - for neg in negs: - batch_data.append(self.create_one_example(query, neg)) - - return batch_data - - -@dataclass -class GroupCollator(DataCollatorWithPadding): - def __call__(self, features) -> Tuple[Dict[str, torch.Tensor], Dict[str, torch.Tensor]]: - if isinstance(features[0], list): - features = sum(features, []) - return super().__call__(features) - - -class TrainDatasetForEmbedding(Dataset): - def __init__(self, dataset, args, tokenizer): - self.dataset = dataset - self.tokenizer = tokenizer - self.args = args - self.total_len = len(self.dataset) - - def __len__(self): - return self.total_len - - def __getitem__(self, item) -> Tuple[str, List[str]]: - query = self.dataset[item]["query"] - if self.args["query_instruction_for_retrieval"] is not None: - query = self.args["query_instruction_for_retrieval"] + query - - passages = [] - - assert isinstance(self.dataset[item]["pos"], list) - pos = random.choice(self.dataset[item]["pos"]) - passages.append(pos) - - train_group_size = self.args.get("train_group_size", 8) - if len(self.dataset[item]["neg"]) < train_group_size - 1: - num = math.ceil((train_group_size - 1) / len(self.dataset[item]["neg"])) - negs = random.sample(self.dataset[item]["neg"] * num, train_group_size - 1) - else: - negs = random.sample(self.dataset[item]["neg"], train_group_size - 1) - passages.extend(negs) - - if self.args["passage_instruction_for_retrieval"] is not None: - passages = [self.args["passage_instruction_for_retrieval"] + p for p in passages] - return query, passages - - -@dataclass -class EmbedCollator(DataCollatorWithPadding): - """Wrapper that does conversion from List[Tuple[encode_qry, encode_psg]] to List[qry], List[psg] - and pass batch separately to the actual collator. - - Abstract out data detail for the model. - """ - - query_max_len: int = 32 - passage_max_len: int = 128 - - def __call__(self, features): - query = [f[0] for f in features] - passage = [f[1] for f in features] - - if isinstance(query[0], list): - query = sum(query, []) - if isinstance(passage[0], list): - passage = sum(passage, []) - - q_collated = self.tokenizer( - query, - padding=self.padding, - truncation=True, - max_length=self.query_max_len, - return_tensors="pt", - ) - d_collated = self.tokenizer( - passage, - padding=self.padding, - truncation=True, - max_length=self.passage_max_len, - return_tensors="pt", - ) - return {"query": q_collated, "passage": d_collated} diff --git a/comps/finetuning_sqft/llm_on_ray/finetune/finetune.py b/comps/finetuning_sqft/llm_on_ray/finetune/finetune.py deleted file mode 100644 index 8433cbacb8..0000000000 --- a/comps/finetuning_sqft/llm_on_ray/finetune/finetune.py +++ /dev/null @@ -1,609 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# -# Copyright 2023 The LLM-on-Ray Authors. - -#!/usr/bin/env python - -import argparse -import os -import sys -from itertools import chain -from typing import Any, Dict, Optional - -import datasets -import ray -import torch -import transformers -from peft import LoraConfig, get_peft_model -from pydantic_yaml import parse_yaml_raw_as -from ray.air import FailureConfig, RunConfig -from ray.air.config import ScalingConfig -from ray.train.torch import TorchTrainer -from transformers import Trainer, TrainingArguments - -from comps import CustomLogger -from comps.finetuning_sqft.finetune_sqft_config import FinetuneConfig -from comps.finetuning_sqft.llm_on_ray import common -from comps.finetuning_sqft.llm_on_ray.finetune.data_process import ( - EmbedCollator, - GroupCollator, - InstructionDataProcessor, - PretrainingDataProcessor, - TrainDatasetForCE, - TrainDatasetForEmbedding, -) -from comps.finetuning_sqft.llm_on_ray.finetune.modeling import BiEncoderModel, CrossEncoder - -logger = CustomLogger("llm_on_ray/finetune") - -try: - from nncf.experimental.torch.nas.bootstrapNAS.training.model_creator_helpers import ( - create_compressed_model_from_algo_names, - ) - from nncf.torch.model_creation import create_nncf_network - - from comps.finetuning_sqft.utils.nncf_config_process import load_nncf_config - - is_nncf_available = True -except ImportError: - is_nncf_available = False - logger.info("NNCF is not installed. Please install it if necessary.") - - -def adapt_transformers_to_device(config: Dict): - device = config["Training"]["device"] - if device in ["hpu"]: - from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi - - # adapt transformers to gaudi - adapt_transformers_to_gaudi() - - -def set_seed(config: Dict): - seed = config["Training"].get("seed", None) - if seed is None: - return - device = config["Training"]["device"] - if device in ["cpu", "gpu"]: - from accelerate.utils import set_seed as _set_seed - - _set_seed(seed) - elif device in ["hpu"]: - from optimum.habana.utils import set_seed as _set_seed - - _set_seed(seed) - - -def convert_to_training_args(cls, config: Dict): - device = config["Training"]["device"] - accelerate_mode = config["Training"]["accelerate_mode"] - save_strategy = config["General"]["save_strategy"] - - args = { - "output_dir": config["General"]["output_dir"], - "report_to": config["General"]["report_to"], - "resume_from_checkpoint": config["General"]["resume_from_checkpoint"], - "gradient_checkpointing": config["General"]["enable_gradient_checkpointing"], - "save_strategy": save_strategy if save_strategy != "False" else "no", - "bf16": config["Training"]["mixed_precision"] == "bf16", - "num_train_epochs": config["Training"]["epochs"], - "per_device_train_batch_size": config["Training"]["batch_size"], - "per_device_eval_batch_size": config["Training"]["batch_size"], - "optim": config["Training"]["optimizer"], - "learning_rate": config["Training"]["learning_rate"], - "logging_steps": config["Training"]["logging_steps"], - "lr_scheduler_type": config["Training"]["lr_scheduler"], - "weight_decay": config["Training"]["weight_decay"], - "gradient_accumulation_steps": config["Training"]["gradient_accumulation_steps"], - "do_train": True, - } - - # set attr do_eval - vf = config["Dataset"].get("validation_file", None) - vsp = config["Dataset"].get("validation_split_percentage", 0) - if vf is not None or (vsp / 100 > 0.0 and vsp / 100 < 1.0): - args.update({"do_eval": True}) - - # set attr max_steps - if config["Training"]["max_train_steps"] is not None: - args.update({"max_steps": config["Training"]["max_train_steps"]}) - - # set attr for device cpu - if device == "cpu": - if hasattr(cls, "use_cpu"): - args.update({"use_cpu": True}) - if hasattr(cls, "no_cuda"): - args.update({"no_cuda": True}) - # To be tested: whether it works when enabling Neural Lora Search (using NNCF) - args.update({"use_ipex": True}) - - # set attr 'deepspeed' - if accelerate_mode == "DEEPSPEED": - args.update({"deepspeed": config["Training"]["deepspeed_config_file"]}) - - # set attr for FSDP - # if accelerate_mode == "FSDP": - # args.updatwe({}) - - # set attr for Intel Gaudi - if device == "hpu": - args.update({"use_habana": True}) - args.update({"use_lazy_mode": config["Training"]["hpu_execution_mode"] == "lazy"}) - args.update({"pipelining_fwd_bwd": True}) - - return cls(**args) - - -def convert_dtype(dtype: str) -> Optional[torch.dtype]: - supported_dtypes = { - "fp16": torch.float16, - "bf16": torch.bfloat16, - "no": None, - } - return supported_dtypes[dtype] - - -def load_tokenizer(config: Dict): - if config["General"].get("tokenizer_name") is not None: - tokenizer_name = config["General"].get("tokenizer_name") - else: - tokenizer_name = config["General"]["base_model"] - load_config = config["General"].get("config", {}) - # default padding side is right - padding_side = config["Dataset"].get("padding_side", "right") - # default truncation side is right - truncation_side = config["Dataset"].get("truncation_side", "right") - tokenizer = transformers.AutoTokenizer.from_pretrained( - tokenizer_name, padding_side=padding_side, truncation_side=truncation_side, **load_config - ) - return tokenizer - - -def load_dataset(config: Dict): - dataset_file = config["Dataset"].get("train_file", None) - if dataset_file is None: - return - - if os.path.exists(dataset_file): - # load from local file - def local_load(name, **load_config): - if os.path.isfile(name): - file = os.path.basename(os.path.abspath(name)) - path = os.path.dirname(os.path.abspath(name)) - dataset = datasets.load_dataset(path, data_files=file, **load_config) - else: - dataset = datasets.load_dataset(name, **load_config) - return dataset["train"] - - train_dataset = local_load(dataset_file) - validation_file = config["Dataset"].get("validation_file", None) - if validation_file is not None: - validation_dataset = local_load(validation_file) - return datasets.DatasetDict({"train": train_dataset, "validation": validation_dataset}) - - validation_split_percentage = config["Dataset"].get("validation_split_percentage", 0) - if validation_split_percentage / 100 > 0.0 and validation_split_percentage / 100 < 1.0: - dataset_dict = train_dataset.train_test_split(test_size=validation_split_percentage / 100) - dataset_dict["validation"] = dataset_dict["test"] - return dataset_dict - - return datasets.DatasetDict({"train": train_dataset}) - else: - # try to download and load dataset from huggingface.co - load_config = config["General"].get("config", {}) - use_auth_token = load_config.get("token", None) - raw_dataset = datasets.load_dataset(dataset_file, token=use_auth_token) - - validation_split_percentage = config["Dataset"].get("validation_split_percentage", 0) - if "validation" not in raw_dataset.keys() and ( - validation_split_percentage / 100 > 0.0 and validation_split_percentage / 100 < 1.0 - ): - dataset_dict = raw_dataset["train"].train_test_split(test_size=validation_split_percentage / 100) - dataset_dict["validation"] = dataset_dict["test"] - return dataset_dict - - return raw_dataset - - -def tokenize_dataset(config: Dict, tokenizer, dataset): - task = config["General"].get("task", "instruction_tuning") - if task == "instruction_tuning": - group = config["Dataset"].get("group", True) - block_size = config["Dataset"].get("block_size", 512) - tokenizer.pad_token = tokenizer.eos_token if not tokenizer.pad_token else tokenizer.pad_token - - processor = InstructionDataProcessor(config, tokenizer) - - for key in dataset: - prompts = processor.make_prompt(dataset[key]) - dataset[key] = datasets.Dataset.from_dict(prompts) - - column_names = list(dataset["train"].features) - tokenize_fn = ( - processor.tokenize_by_neural_chat - if config["Dataset"].get("data_preprocess_type", "") == "neural_chat" - else processor.tokenize - ) - - tokenized_dataset = dataset.map( - tokenize_fn, - remove_columns=column_names, - batched=True, - load_from_cache_file=False, - desc="Tokenize dataset", - ) - - if group: - - def group_texts(examples): - # Concatenate all texts. - concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()} - total_length = len(concatenated_examples[list(examples.keys())[0]]) - # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can - # customize this part to your needs. - if total_length >= block_size: - total_length = (total_length // block_size) * block_size - # Split by chunks of max_len. - result = { - k: [t[i : i + block_size] for i in range(0, total_length, block_size)] - for k, t in concatenated_examples.items() - } - return result - - tokenized_dataset = tokenized_dataset.map( - group_texts, - batched=True, - load_from_cache_file=False, - desc=f"Grouping texts in chunks of {block_size}", - ) - - return tokenized_dataset - elif task == "pretraining": - group = True - block_size = config["Dataset"].get("block_size", 512) - tokenizer.pad_token = tokenizer.eos_token if not tokenizer.pad_token else tokenizer.pad_token - - processor = PretrainingDataProcessor(config, tokenizer) - - column_names = list(dataset["train"].features) - - tokenized_dataset = dataset.map( - processor.tokenize, - remove_columns=column_names, - batched=True, - load_from_cache_file=False, - desc="Tokenize dataset", - ) - - if group: - - def group_texts(examples): - # Concatenate all texts. - concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()} - total_length = len(concatenated_examples[list(examples.keys())[0]]) - # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can - # customize this part to your needs. - if total_length >= block_size: - total_length = (total_length // block_size) * block_size - # Split by chunks of max_len. - result = { - k: [t[i : i + block_size] for i in range(0, total_length, block_size)] - for k, t in concatenated_examples.items() - } - return result - - tokenized_dataset = tokenized_dataset.map( - group_texts, - batched=True, - load_from_cache_file=False, - desc=f"Grouping texts in chunks of {block_size}", - ) - - return tokenized_dataset - elif task == "rerank": - dataset["train"] = TrainDatasetForCE(dataset["train"], config["Dataset"], tokenizer) - return dataset - elif task == "embedding": - dataset["train"] = TrainDatasetForEmbedding(dataset["train"], config["Dataset"], tokenizer) - return dataset - else: - raise NotImplementedError(f"Unsupported task {task}, only support instruction_tuning, rerank, embedding now.") - - -def prepare_data_collator(config: Dict, tokenizer): - task = config["General"].get("task", "instruction_tuning") - if task == "instruction_tuning" or task == "pretraining": - return transformers.DataCollatorForLanguageModeling( - tokenizer=tokenizer, mlm=False, return_tensors="pt", pad_to_multiple_of=8 - ) - elif task == "rerank": - return GroupCollator(tokenizer) - elif task == "embedding": - return EmbedCollator( - tokenizer=tokenizer, - padding=config["Dataset"]["padding"], - query_max_len=config["Dataset"]["query_max_len"], - passage_max_len=config["Dataset"]["passage_max_len"], - ) - else: - raise NotImplementedError(f"Unsupported task {task}, only support instruction_tuning, rerank, embedding now.") - - -def load_model(config: Dict): - model_name = config["General"]["base_model"] - model_dtype = convert_dtype(config["Training"].get("mixed_precision", "no")) - model_config = config["General"].get("config", {}) - task = config["General"].get("task", "instruction_tuning") - compression_ctrl = None - if task == "instruction_tuning" or task == "pretraining": - model = transformers.AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=model_dtype, **model_config) - lora_config = config["General"].get("lora_config", None) - if lora_config and task != "pretraining": - neural_lora_search = lora_config.pop("neural_lora_search", False) - target_module_groups = lora_config.pop("target_module_groups", None) - search_space = lora_config.pop("search_space", None) - nncf_config = lora_config.pop("nncf_config", None) - if not lora_config.get("sparse_adapter", False): - # To avoid the error in the following case: - # not using SparsePEFT and not having the peft library that supports SparsePEFT installed. - lora_config.pop("sparse_adapter", False) - peft_config = LoraConfig(**lora_config) - model = get_peft_model(model, peft_config) - - # Neural LoRA Search (NLS) - if neural_lora_search: - if not is_nncf_available: - raise ImportError("NNCF is not installed. Please install it.") - nncf_config = load_nncf_config( - config=config, - model=model, - target_module_groups=target_module_groups, - search_space=search_space, - nncf_config=nncf_config, - ) - model = create_nncf_network(model, nncf_config) - compression_ctrl, model = create_compressed_model_from_algo_names( - model, nncf_config, algo_names=["progressive_shrinking"] - ) - elif task == "rerank": - model = CrossEncoder.from_pretrained( - config["Dataset"].get("train_group_size", 8), - config["Training"]["batch_size"], - model_name, - from_tf=bool(".ckpt" in model_name), - config=model_config, - ) - elif task == "embedding": - should_concat = False - if ( - config["Dataset"]["query_max_len"] == config["Dataset"]["passage_max_len"] - and config["Dataset"]["padding"] == "max_length" - ): - should_concat = True - if config["Training"]["device"] == "hpu" and not should_concat: - raise ValueError("please set query_max_len==passage_max_len and padding='max_length' for hpu.") - - if config["Training"].get("embedding_training_config", None) is not None: - model = BiEncoderModel( - model_name=model_name, should_concat=should_concat, **config["Training"]["embedding_training_config"] - ) - else: - model = BiEncoderModel(model_name=model_name, should_concat=should_concat) - else: - raise NotImplementedError(f"Unsupported task {task}, only support instruction_tuning, rerank, embedding now.") - - egc = config["General"].get("enable_gradient_checkpointing", False) - if egc: - model.enable_input_require_grads() - model.gradient_checkpointing_enable() - model.config.use_cache = False - - model.to(dtype=model_dtype, device=torch.device(config["Training"]["device"])) - - return model, compression_ctrl - - -def get_trainer(config: Dict, model, tokenizer, tokenized_dataset, data_collator, compression_ctrl=None): - device = config["Training"]["device"] - if device in ["cpu", "gpu", "cuda"]: - training_args = convert_to_training_args(TrainingArguments, config) - trainer_args = { - "model": model, - "args": training_args, - "train_dataset": tokenized_dataset["train"], - "eval_dataset": ( - tokenized_dataset["validation"] if tokenized_dataset.get("validation") is not None else None - ), - "tokenizer": tokenizer, - "data_collator": data_collator, - } - if compression_ctrl is not None: - trainer_args["compression_ctrl"] = compression_ctrl - - trainer = Trainer(**trainer_args) - return training_args, trainer - elif device in ["hpu"]: - assert compression_ctrl is None - from optimum.habana import GaudiConfig - from optimum.habana.transformers import GaudiTrainer, GaudiTrainingArguments - - # If gaudi_config_name is provided, load gaudi_config from huggingface model hub(https://huggingface.co/Habana), otherwise use default gaudi_config - gaudi_config_name = config["General"].get("gaudi_config_name", None) - if gaudi_config_name is not None: - gaudi_config = GaudiConfig.from_pretrained(gaudi_config_name) - else: - gaudi_config = GaudiConfig() - gaudi_config.use_fused_adam = True - gaudi_config.use_fused_clip_norm = True - - training_args = convert_to_training_args(GaudiTrainingArguments, config) - trainer = GaudiTrainer( - model=model, - args=training_args, - gaudi_config=gaudi_config, - train_dataset=tokenized_dataset["train"], - eval_dataset=tokenized_dataset["validation"] if tokenized_dataset.get("validation") is not None else None, - tokenizer=tokenizer, - data_collator=data_collator, - ) - return training_args, trainer - return None - - -def train_func(config: Dict[str, Any]): - os.chdir(config["cwd"]) - - adapt_transformers_to_device(config) - - set_seed(config) - - tokenizer = load_tokenizer(config) - - dataset = load_dataset(config) - - max_train_samples = config["Dataset"].get("max_train_samples", 0) - if 0 < max_train_samples < len(dataset["train"]): - dataset["train"] = dataset["train"].select(range(max_train_samples)) - - max_eval_samples = config["Dataset"].get("max_eval_samples", 0) - if "validation" in dataset and 0 < max_eval_samples < len(dataset["validation"]): - dataset["validation"] = dataset["validation"].select(range(max_eval_samples)) - - tokenized_dataset = tokenize_dataset(config, tokenizer, dataset) - - data_collator = prepare_data_collator(config, tokenizer) - - model, compression_ctrl = load_model(config) - - training_args, trainer = get_trainer( - config, model, tokenizer, tokenized_dataset, data_collator, compression_ctrl=compression_ctrl - ) - - logger.info("train start") - trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint) - trainer.save_model() - logger.info("train finish") - - -def get_finetune_config(): - parser = argparse.ArgumentParser(description="Finetune a transformers model on a causal language modeling task") - parser.add_argument( - "--config_file", - type=str, - required=True, - default=None, - help="The name of the dataset to use (via the datasets library).", - ) - - # Print help if no arguments were provided - if len(sys.argv) == 1: - parser.print_help(sys.stderr) - sys.exit(1) - - args = parser.parse_args() - config_file = args.config_file - - with open(config_file) as f: - finetune_config = parse_yaml_raw_as(FinetuneConfig, f) - return finetune_config.dict() - - -def main(external_config=None): - if not external_config: - config = get_finetune_config() - else: - config = external_config - - config["cwd"] = os.getcwd() - - num_training_workers = config["Training"].get("num_training_workers") - resources_per_worker = config["Training"].get("resources_per_worker") - - if num_training_workers > 1 and config["Training"].get("accelerate_mode", None) is None: - config["Training"]["accelerate_mode"] = "DDP" # will use DDP to accelerate if no method specified - - ccl_worker_count = 1 - device = config["Training"]["device"] - if device != "cpu": - ccl_worker_count = num_training_workers - - if not ray.is_initialized(): - runtime_env = { - "env_vars": { - "OMP_NUM_THREADS": str(resources_per_worker["CPU"]), - "CCL_ZE_IPC_EXCHANGE": "sockets", - "CCL_WORKER_COUNT": str(ccl_worker_count), - "CCL_LOG_LEVEL": "info", - "FI_TCP_IFACE": "lo", - "FI_PROVIDER": "tcp", - } - } - - if config["General"]["gpt_base_model"] is True: - runtime_env["pip"] = ["transformers==4.26.0"] - - if device == "gpu": - num_cpus = resources_per_worker["CPU"] * num_training_workers + 1 # additional 1 for head worker - ray.init(num_cpus=num_cpus, runtime_env=runtime_env) - else: - ray.init(runtime_env=runtime_env) - - logger.info(f"ray available resources = {ray.available_resources()}") - - use_gpu = True if device == "gpu" else False - scaling_config = ScalingConfig( - num_workers=num_training_workers, - use_gpu=use_gpu, - resources_per_worker=resources_per_worker, - placement_strategy="SPREAD", - ) - - # if try to use Intel GPU, convert device to 'xpu' - # due to accelerate internal use 'xpu' represent Intel GPU - if device == "gpu": - from accelerate.utils import is_xpu_available - - if is_xpu_available(): - device = "xpu" - - # Jinjie: commented out the code from line 572 to 581 to temporarily disable CCL for debugging purposes. - # if config.get("torch_config", None) is None: - # backend = None - # if device == "cpu" or device == "xpu" or device == "gpu": - # backend = "ccl" - # elif device == "hpu": - # backend = "hccl" - # torch_config = common.TorchConfig(backend=backend, device=device) - # else: - # customer_torch_config = config.get("torch_config") - # torch_config = common.TorchConfig(**customer_torch_config, device=device) - - if config.get("failure_config", None) is None: - failure_config = FailureConfig() - else: - customer_failure_config = config.get("failure_config") - failure_config = FailureConfig(**customer_failure_config) - - if config.get("run_config", None) is None: - run_config = RunConfig(failure_config=failure_config) - else: - customer_run_config = config.get("run_config") - if customer_run_config.get("failure_config", None) is None: - customer_run_config["failure_config"] = failure_config - run_config = RunConfig(**customer_run_config) - - trainer = TorchTrainer( - train_func, - train_loop_config=config, - scaling_config=scaling_config, - # torch_config=torch_config, # Jinjie: check line 571. - run_config=run_config, - ) - results = trainer.fit() - if external_config is not None: - return results - - -if __name__ == "__main__": - main() diff --git a/comps/finetuning_sqft/llm_on_ray/finetune/modeling.py b/comps/finetuning_sqft/llm_on_ray/finetune/modeling.py deleted file mode 100644 index 7a2884f3bc..0000000000 --- a/comps/finetuning_sqft/llm_on_ray/finetune/modeling.py +++ /dev/null @@ -1,211 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -from typing import Dict, Optional - -import torch -import torch.distributed as dist -from torch import nn -from transformers import AutoModel, AutoModelForSequenceClassification, PreTrainedModel -from transformers.modeling_outputs import MaskedLMOutput, SequenceClassifierOutput - -from comps import CustomLogger - -logger = CustomLogger("llm_on_ray/finetune/modeling") - - -class CrossEncoder(PreTrainedModel): - def __init__(self, hf_model: PreTrainedModel, train_group_size: int, batch_size: int): - super().__init__(hf_model.config) - self.hf_model = hf_model - self.train_group_size = train_group_size - self.batch_size = batch_size - - self.cross_entropy = nn.CrossEntropyLoss(reduction="mean") - - self.register_buffer("target_label", torch.zeros(self.batch_size, dtype=torch.long)) - - def gradient_checkpointing_enable(self, **kwargs): - self.hf_model.gradient_checkpointing_enable(**kwargs) - - def forward(self, **batch): - ranker_out: SequenceClassifierOutput = self.hf_model(**batch, return_dict=True) - logits = ranker_out.logits - - if self.training: - scores = logits.view(-1, self.train_group_size) - loss = self.cross_entropy(scores, self.target_label[: scores.shape[0]]) - - return SequenceClassifierOutput( - loss=loss, - **ranker_out, - ) - else: - return ranker_out - - @classmethod - def from_pretrained(cls, train_group_size: int, batch_size: int, *args, **kwargs): - hf_model = AutoModelForSequenceClassification.from_pretrained(*args, **kwargs) - reranker = cls(hf_model, train_group_size, batch_size) - return reranker - - def save_pretrained(self, output_dir: str, **kwargs): - state_dict = self.hf_model.state_dict() - state_dict = type(state_dict)({k: v.clone().cpu() for k, v in state_dict.items()}) - kwargs.pop("state_dict") - self.hf_model.save_pretrained(output_dir, state_dict=state_dict, **kwargs) - - -class BiEncoderModel(nn.Module): - TRANSFORMER_CLS = AutoModel - - def __init__( - self, - model_name: str = None, - should_concat: bool = False, - normalized: bool = False, - sentence_pooling_method: str = "cls", - negatives_cross_device: bool = False, - temperature: float = 1.0, - use_inbatch_neg: bool = True, - ): - super().__init__() - self.model = AutoModel.from_pretrained(model_name, add_pooling_layer=False) - self.cross_entropy = nn.CrossEntropyLoss(reduction="mean") - - self.should_concat = should_concat - self.normalized = normalized - self.sentence_pooling_method = sentence_pooling_method - self.temperature = temperature - self.use_inbatch_neg = use_inbatch_neg - self.config = self.model.config - - if not normalized: - self.temperature = 1.0 - logger.info("reset temperature = 1.0 due to using inner product to compute similarity") - if normalized: - if self.temperature > 0.5: - raise ValueError( - "Temperature should be smaller than 1.0 when use cosine similarity (i.e., normalized=True). Recommend to set it 0.01-0.1" - ) - - self.negatives_cross_device = negatives_cross_device - if self.negatives_cross_device: - if not dist.is_initialized(): - raise ValueError("Distributed training has not been initialized for representation all gather.") - # logger.info("Run in a single GPU, set negatives_cross_device=False") - # self.negatives_cross_device = False - # else: - self.process_rank = dist.get_rank() - self.world_size = dist.get_world_size() - - def gradient_checkpointing_enable(self, **kwargs): - self.model.gradient_checkpointing_enable(**kwargs) - - def sentence_embedding(self, hidden_state, mask): - if self.sentence_pooling_method == "mean": - s = torch.sum(hidden_state * mask.unsqueeze(-1).float(), dim=1) - d = mask.sum(axis=1, keepdim=True).float() - return s / d - elif self.sentence_pooling_method == "cls": - return hidden_state[:, 0] - - def encode(self, features): - if features is None: - return None - psg_out = self.model(**features, return_dict=True) - p_reps = self.sentence_embedding(psg_out.last_hidden_state, features["attention_mask"]) - if self.normalized: - p_reps = torch.nn.functional.normalize(p_reps, dim=-1) - return p_reps.contiguous() - - def encode_concat(self, query, passage): - if query is None or passage is None: - return None - - batch_size = query["input_ids"].size()[0] - - psg_out = self.model( - input_ids=torch.cat([query["input_ids"], passage["input_ids"]]), - attention_mask=torch.cat([query["attention_mask"], passage["attention_mask"]]), - return_dict=True, - ) - reps = self.sentence_embedding( - psg_out.last_hidden_state, torch.cat([query["attention_mask"], passage["attention_mask"]]) - ) - if self.normalized: - reps = torch.nn.functional.normalize(reps, dim=-1) - - q_reps = reps[:batch_size] - p_reps = reps[batch_size:] - - return q_reps.contiguous(), p_reps.contiguous() - - def compute_similarity(self, q_reps, p_reps): - if len(p_reps.size()) == 2: - return torch.matmul(q_reps, p_reps.transpose(0, 1)) - return torch.matmul(q_reps, p_reps.transpose(-2, -1)) - - def forward(self, query: Dict[str, torch.Tensor] = None, passage: Dict[str, torch.Tensor] = None): - if self.should_concat: - q_reps, p_reps = self.encode_concat(query, passage) - else: - q_reps = self.encode(query) - p_reps = self.encode(passage) - - if self.training: - if self.negatives_cross_device and self.use_inbatch_neg: - q_reps = self._dist_gather_tensor(q_reps) - p_reps = self._dist_gather_tensor(p_reps) - - group_size = p_reps.size(0) // q_reps.size(0) - if self.use_inbatch_neg: - scores = self.compute_similarity(q_reps, p_reps) / self.temperature # B B*G - scores = scores.view(q_reps.size(0), -1) - - target = torch.arange(scores.size(0), device=scores.device, dtype=torch.long) - target = target * group_size - loss = self.compute_loss(scores, target) - else: - scores = ( - self.compute_similarity( - q_reps[ - :, - None, - :, - ], - p_reps.view(q_reps.size(0), group_size, -1), - ).squeeze(1) - / self.temperature - ) # B G - - scores = scores.view(q_reps.size(0), -1) - target = torch.zeros(scores.size(0), device=scores.device, dtype=torch.long) - loss = self.compute_loss(scores, target) - - else: - scores = self.compute_similarity(q_reps, p_reps) - loss = None - - return MaskedLMOutput(loss=loss, logits=None, hidden_states=None, attentions=None) - - def compute_loss(self, scores, target): - return self.cross_entropy(scores, target) - - def _dist_gather_tensor(self, t: Optional[torch.Tensor]): - if t is None: - return None - t = t.contiguous() - - all_tensors = [torch.empty_like(t) for _ in range(self.world_size)] - dist.all_gather(all_tensors, t) - - all_tensors[self.process_rank] = t - all_tensors = torch.cat(all_tensors, dim=0) - - return all_tensors - - def save(self, output_dir: str): - state_dict = self.model.state_dict() - state_dict = type(state_dict)({k: v.clone().cpu() for k, v in state_dict.items()}) - self.model.save_pretrained(output_dir, state_dict=state_dict) diff --git a/comps/finetuning_sqft/patches/nncf-v2.12.0.patch b/comps/finetuning_sqft/patches/nncf-v2.12.0.patch deleted file mode 100644 index f4cbfe0401..0000000000 --- a/comps/finetuning_sqft/patches/nncf-v2.12.0.patch +++ /dev/null @@ -1,72 +0,0 @@ -diff --git a/nncf/experimental/torch/nas/bootstrapNAS/elasticity/elasticity_builder.py b/nncf/experimental/torch/nas/bootstrapNAS/elasticity/elasticity_builder.py -index bc6464b24..ca2666626 100644 ---- a/nncf/experimental/torch/nas/bootstrapNAS/elasticity/elasticity_builder.py -+++ b/nncf/experimental/torch/nas/bootstrapNAS/elasticity/elasticity_builder.py -@@ -152,3 +152,16 @@ class ElasticityBuilder(PTCompressionAlgorithmBuilder): - - # No conflict resolving with the related config options, parameters are overridden by compression state - self._available_elasticity_dims = list(map(ElasticityDim, available_elasticity_dims_state)) -+ -+ def _are_frozen_layers_allowed(self): -+ """ -+ Check if frozen layers are allowed based on NNCF configuration. -+ If specified in NNCF configuration, frozen layers will be allowed. -+ -+ :return: A tuple where the first element is a boolean indicating if frozen layers are allowed, -+ and the second element is a string message explaining the reason. -+ """ -+ frozen_layers_allowed = self.config.get("bootstrapNAS", {}).get("training", {}).get("frozen_layers_allowed", False) -+ if frozen_layers_allowed: -+ return True, "Frozen layers are allowed (`frozen_layers_allowed` is set to True in NNCF config)" -+ return super()._are_frozen_layers_allowed() -diff --git a/nncf/experimental/torch/nas/bootstrapNAS/training/progressive_shrinking_builder.py b/nncf/experimental/torch/nas/bootstrapNAS/training/progressive_shrinking_builder.py -index 92609327f..7a0555e3e 100644 ---- a/nncf/experimental/torch/nas/bootstrapNAS/training/progressive_shrinking_builder.py -+++ b/nncf/experimental/torch/nas/bootstrapNAS/training/progressive_shrinking_builder.py -@@ -152,3 +152,16 @@ class ProgressiveShrinkingBuilder(PTCompressionAlgorithmBuilder): - self._bn_adapt_params = state_without_name[self._state_names.BN_ADAPTATION_PARAMS] - bn_adapt_algo_kwargs = get_bn_adapt_algo_kwargs(self.config, self._bn_adapt_params) - self._bn_adaptation = BatchnormAdaptationAlgorithm(**bn_adapt_algo_kwargs) if bn_adapt_algo_kwargs else None -+ -+ def _are_frozen_layers_allowed(self): -+ """ -+ Check if frozen layers are allowed based on the algorithm configuration. -+ If specified in the algorithm configuration, frozen layers will be allowed. -+ -+ :return: A tuple where the first element is a boolean indicating if frozen layers are allowed, -+ and the second element is a string message explaining the reason. -+ """ -+ frozen_layers_allowed = self._algo_config.get("frozen_layers_allowed", False) -+ if frozen_layers_allowed: -+ return True, "Frozen layers are allowed (`frozen_layers_allowed` is set to True in the algorithm config)" -+ return super()._are_frozen_layers_allowed() -diff --git a/nncf/torch/layer_utils.py b/nncf/torch/layer_utils.py -index fb7d7bed7..3b8fda98e 100644 ---- a/nncf/torch/layer_utils.py -+++ b/nncf/torch/layer_utils.py -@@ -127,6 +127,25 @@ class _NNCFModuleMixin: - results = op_results - return results - -+ def get_proxy_module(self, *args): -+ """ -+ Gets a proxy module with pre-operations applied. -+ -+ Args: -+ *args: Arguments for the pre-operations. -+ -+ Returns: -+ ProxyModule: The proxy module with pre-operations applied. -+ """ -+ proxy_module = ProxyModule(self) -+ for op in self.pre_ops.values(): -+ op_args = op(proxy_module, args) -+ if op_args is not None: -+ if not isinstance(op_args, tuple): -+ op_args = tuple([op_args]) -+ args = op_args -+ return proxy_module -+ - - class CompressionParameter(nn.Parameter): - """ diff --git a/comps/finetuning_sqft/patches/peft-v0.10.0.patch b/comps/finetuning_sqft/patches/peft-v0.10.0.patch deleted file mode 100644 index 9606bd24ef..0000000000 --- a/comps/finetuning_sqft/patches/peft-v0.10.0.patch +++ /dev/null @@ -1,220 +0,0 @@ -diff --git a/src/peft/tuners/lora/config.py b/src/peft/tuners/lora/config.py -index cc5c60a..fa1422e 100644 ---- a/src/peft/tuners/lora/config.py -+++ b/src/peft/tuners/lora/config.py -@@ -268,6 +268,31 @@ class LoraConfig(PeftConfig): - ) - }, - ) -+ sparse_adapter: bool = field( -+ default=False, -+ metadata={ -+ "help": ( -+ "Enable 'SparsePEFT'. This strategy is designed for fine-tuning sparse models using adapters. " -+ "It sparsifies the adapter's parameter matrix (BA) such that the sparsity pattern of BA aligns " -+ "with that of the base model's weights (W). This alignment allows for the merging of the adapter " -+ "with the base model without disrupting its sparsity. It is derived from SQFT() and is used in the " -+ "pipelines SQFT + SparsePEFT and SQFT + QA-SparsePEFT." -+ ) -+ } -+ ) -+ quantization_aware: bool = field( -+ default=False, -+ metadata={ -+ "help": ( -+ "Enable quantization-aware training. This strategy is designed for fine-tuning GPTQ quantized models " -+ "using adapters. It activates the `SQFTQuantAwareLinear` from SQFT in place of `QuantLinear`, enabling " -+ "quantization-aware training for adapters. This helps optimize model accuracy and allows the adapter " -+ "to be merged with the base quantized model, improving performance and deployment efficiency during " -+ "inference. This strategy, when used in conjunction with `sparse_adapter`, corresponds to the " -+ "SQFT + QA-SparsePEFT method described in the SQFT paper." -+ ) -+ } -+ ) - - def __post_init__(self): - self.peft_type = PeftType.LORA -diff --git a/src/peft/tuners/lora/gptq.py b/src/peft/tuners/lora/gptq.py -index 333dfa6..7272824 100644 ---- a/src/peft/tuners/lora/gptq.py -+++ b/src/peft/tuners/lora/gptq.py -@@ -108,7 +108,17 @@ def dispatch_gptq( - AutoGPTQQuantLinear = get_auto_gptq_quant_linear(gptq_quantization_config) - - if AutoGPTQQuantLinear is not None and isinstance(target_base_layer, AutoGPTQQuantLinear): -- new_module = QuantLinear(target, adapter_name, **kwargs) -+ quantization_aware = kwargs.get("quantization_aware", False) -+ if quantization_aware: -+ # Attempt to import the `SQFTQuantAwareLinear` module -+ # from https://github.com/IntelLabs/Hardware-Aware-Automated-Machine-Learning/blob/main/SQFT/modules/sqft_linear.py -+ try: -+ from modules.sqft_linear import SQFTQuantAwareLinear -+ except ImportError: -+ raise ImportError("The module 'SQFTQuantAwareLinear' could not be imported.") -+ new_module = SQFTQuantAwareLinear(target, adapter_name, **kwargs) -+ else: -+ new_module = QuantLinear(target, adapter_name, **kwargs) - target.qweight = target_base_layer.qweight - - return new_module -diff --git a/src/peft/tuners/lora/layer.py b/src/peft/tuners/lora/layer.py -index 829b7bd..9d83967 100644 ---- a/src/peft/tuners/lora/layer.py -+++ b/src/peft/tuners/lora/layer.py -@@ -28,6 +28,10 @@ from peft.utils.other import transpose - - from .config import LoraConfig - -+try: -+ from nncf.torch.layers import NNCFLinear -+except ImportError: -+ NNCFLinear = None - - class LoraLayer(BaseTunerLayer): - # All names of layers that may contain (trainable) adapter weights -@@ -346,6 +350,7 @@ class Linear(nn.Module, LoraLayer): - init_lora_weights: Union[bool, str] = True, - use_rslora: bool = False, - use_dora: bool = False, -+ sparse_adapter: bool = False, # Set this to True if enabling 'SparsePEFT' for fine-tuning sparse models - **kwargs, - ) -> None: - super().__init__() -@@ -363,6 +368,7 @@ class Linear(nn.Module, LoraLayer): - use_dora=use_dora, - ) - self.is_target_conv_1d_layer = is_target_conv_1d_layer -+ self.sparse_adapter = sparse_adapter - - def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = None) -> None: - """ -@@ -471,6 +477,10 @@ class Linear(nn.Module, LoraLayer): - weight_B = weight_B.float() - - output_tensor = transpose(weight_B @ weight_A, self.fan_in_fan_out) * self.scaling[adapter] -+ if self.sparse_adapter: -+ # Apply the sparse mask to BA (`output_tensor`). -+ mask = (self.base_layer.weight != 0) -+ output_tensor = output_tensor * mask - - if cast_to_fp32: - output_tensor = output_tensor.to(dtype=dtype) -@@ -506,7 +516,26 @@ class Linear(nn.Module, LoraLayer): - x = x.to(lora_A.weight.dtype) - - if not self.use_dora[active_adapter]: -- result = result + lora_B(lora_A(dropout(x))) * scaling -+ if not self.sparse_adapter: -+ result = result + lora_B(lora_A(dropout(x))) * scaling -+ else: -+ # Since 'sparse_adapter' is enabled, we need to multiply the parameter matrices of `lora_B` and -+ # `lora_A` here instead of calling the forward methods of `lora_B` and `lora_A`. This results -+ # in the NNCF graph not recognizing lora A and lora B nodes when using NLS strategy. Therefore, -+ # we execute `lora_B(lora_A(x))` solely to include these two NNCFLinear nodes in the NNCF graph. -+ if NNCFLinear is not None and not self.training: -+ lora_B(lora_A(x)) -+ if NNCFLinear is not None and isinstance(lora_A, NNCFLinear): -+ adapter_weight = torch.matmul( -+ lora_B.get_proxy_module(x).weight, -+ lora_A.get_proxy_module(x).weight -+ ) * scaling -+ else: -+ adapter_weight = torch.matmul(lora_B.weight, lora_A.weight) * scaling -+ # Apply the sparse mask to BA (`adapter_weight`). -+ mask = (self.base_layer.weight != 0).detach() -+ adapter_weight = adapter_weight * mask -+ result = result + nn.functional.linear(dropout(x), adapter_weight) - else: - x = dropout(x) - result = result + self._apply_dora(x, lora_A, lora_B, scaling, active_adapter) -diff --git a/src/peft/tuners/lora/model.py b/src/peft/tuners/lora/model.py -index 3f381ef..3e696ca 100644 ---- a/src/peft/tuners/lora/model.py -+++ b/src/peft/tuners/lora/model.py -@@ -193,6 +193,8 @@ class LoraModel(BaseTuner): - "init_lora_weights": lora_config.init_lora_weights, - "use_rslora": lora_config.use_rslora, - "use_dora": lora_config.use_dora, -+ "quantization_aware": lora_config.quantization_aware, -+ "sparse_adapter": lora_config.sparse_adapter, - "loaded_in_8bit": getattr(self.model, "is_loaded_in_8bit", False), - "loaded_in_4bit": getattr(self.model, "is_loaded_in_4bit", False), - } -@@ -233,7 +235,10 @@ class LoraModel(BaseTuner): - child = child.base_layer - - if not hasattr(new_module, "base_layer"): -- new_module.weight = child.weight -+ if hasattr(child, "qweight"): -+ new_module.qweight = child.qweight -+ else: -+ new_module.weight = child.weight - if hasattr(child, "bias"): - new_module.bias = child.bias - -@@ -401,7 +406,11 @@ class LoraModel(BaseTuner): - Currently gptq quantization and replicated layers do not support merging. - """ - if getattr(self.model, "quantization_method", None) == "gptq": -- raise ValueError("Cannot merge LORA layers when the model is gptq quantized") -+ peft_config = self.get_peft_config_as_dict() -+ # Check if the 'quantization_aware' flag is set to False in the PEFT configuration -+ # Raise an error if the model is GPTQ quantized and 'quantization_aware' is not enabled -+ if not peft_config.get("quantization_aware", False): -+ raise ValueError("Cannot merge LORA layers when the model is gptq quantized") - if self.peft_config.get("layer_replication"): - raise ValueError("Cannot merge LORA layers when base model layers are replicated") - -diff --git a/src/peft/utils/save_and_load.py b/src/peft/utils/save_and_load.py -index 5ac1264..acb5d27 100644 ---- a/src/peft/utils/save_and_load.py -+++ b/src/peft/utils/save_and_load.py -@@ -246,6 +246,48 @@ def set_peft_model_state_dict(model, peft_model_state_dict, adapter_name="default - else: - raise NotImplementedError - -+ def module_reshape(state_dict): -+ """Reshape the linear module to match the state dict. -+ -+ Args: -+ state_dict (dict): The state dict containing the parameters. -+ """ -+ for param_name, param in state_dict.items(): -+ tensor_name = param_name -+ splits = tensor_name.split(".") -+ -+ # If the parameter name has multiple parts, navigate through the module hierarchy -+ if len(splits) > 1: -+ module = model -+ parent = None -+ -+ # Traverse the module hierarchy to find the target module -+ for split in splits[:-1]: -+ new_module = getattr(module, split, None) -+ if new_module is None: -+ raise ValueError(f"{module} has no attribute {split}.") -+ parent = module -+ module = new_module -+ -+ tensor_name = splits[-1] -+ old_value = getattr(module, tensor_name) -+ -+ # Check if the shape of the original module differs from the shape of the loaded parameter -+ if old_value.shape != param.shape and isinstance(module, torch.nn.Linear): -+ # Create a new Linear module with the new shape -+ new_module = torch.nn.Linear( -+ param.shape[1], -+ param.shape[0], -+ bias=module.bias is not None, -+ dtype=module.weight.dtype, -+ device=module.weight.device -+ ) -+ # Replace the old module with the new one in the parent module -+ setattr(parent, splits[-2], new_module) -+ -+ # Reshape the modules in the peft model to match the state dict -+ module_reshape(peft_model_state_dict) -+ - load_result = model.load_state_dict(peft_model_state_dict, strict=False) - if config.is_prompt_learning: - model.prompt_encoder[adapter_name].embedding.load_state_dict( diff --git a/comps/finetuning_sqft/patches/transformers-v4.44.2.patch b/comps/finetuning_sqft/patches/transformers-v4.44.2.patch deleted file mode 100644 index a35e96297a..0000000000 --- a/comps/finetuning_sqft/patches/transformers-v4.44.2.patch +++ /dev/null @@ -1,171 +0,0 @@ -diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py -index 68ba7babf..6b54a3987 100755 ---- a/src/transformers/trainer.py -+++ b/src/transformers/trainer.py -@@ -155,6 +155,7 @@ from .utils import ( - is_in_notebook, - is_ipex_available, - is_lomo_available, -+ is_nncf_available, - is_peft_available, - is_safetensors_available, - is_sagemaker_dp_enabled, -@@ -245,6 +246,11 @@ if is_accelerate_available(): - if is_accelerate_available("0.28.0"): - from accelerate.utils import DataLoaderConfiguration - -+if is_nncf_available(): -+ from nncf.torch.compression_method_api import PTCompressionAlgorithmController -+else: -+ PTCompressionAlgorithmController = None -+ - - def _is_peft_model(model): - if is_peft_available(): -@@ -352,6 +358,8 @@ class Trainer: - by this function will be reflected in the predictions received by `compute_metrics`. - - Note that the labels (second parameter) will be `None` if the dataset does not have them. -+ compression_ctrl ([`PTCompressionAlgorithmController`], *optional*): A compression controller to use. Note that -+ this script only supports `ProgressiveShrinkingController` of NNCF (https://github.com/openvinotoolkit/nncf). - - Important attributes: - -@@ -387,6 +395,7 @@ class Trainer: - callbacks: Optional[List[TrainerCallback]] = None, - optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None), - preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None, -+ compression_ctrl: PTCompressionAlgorithmController = None - ): - if args is None: - output_dir = "tmp_trainer" -@@ -400,6 +409,7 @@ class Trainer: - " summary statistics should be returned by the function." - ) - self.args = args -+ self.compression_ctrl = compression_ctrl - # Seed must be set before instantiating the model when using model - enable_full_determinism(self.args.seed) if self.args.full_determinism else set_seed(self.args.seed) - self.hp_name = None -@@ -1040,7 +1050,10 @@ class Trainer: - optimizer = self.optimizer.optimizer - else: - optimizer = self.optimizer -- self.create_scheduler(num_training_steps=num_training_steps, optimizer=optimizer) -+ # If compression_ctrl (`ProgressiveShrinkingController`) is not used, create a scheduler. -+ # If compression_ctrl is used (not None), it will use its own learning rate scheduler. -+ if self.compression_ctrl is None: -+ self.create_scheduler(num_training_steps=num_training_steps, optimizer=optimizer) - - def get_decay_parameter_names(self, model) -> List[str]: - """ -@@ -1569,7 +1582,9 @@ class Trainer: - self.state.stateful_callbacks["TrainerControl"] = self.control.state() - self.state.save_to_json(os.path.join(output_dir, TRAINER_STATE_NAME)) - torch.save(self.optimizer.state_dict(), os.path.join(output_dir, OPTIMIZER_NAME)) -- torch.save(self.lr_scheduler.state_dict(), os.path.join(output_dir, SCHEDULER_NAME)) -+ # Save the learning rate scheduler state if compression_ctrl is not used. -+ if self.compression_ctrl is None: -+ torch.save(self.lr_scheduler.state_dict(), os.path.join(output_dir, SCHEDULER_NAME)) - - def call_model_init(self, trial=None): - model_init_argcount = number_of_arguments(self.model_init) -@@ -2204,8 +2219,16 @@ class Trainer: - if args.eval_on_start: - self._evaluate(trial, ignore_keys_for_eval, skip_scheduler=True) - -+ # Initialize the learning rate scheduler if compression_ctrl is used. -+ if self.compression_ctrl is not None: -+ train_iters = len(train_dataloader) -+ self.compression_ctrl.set_training_lr_scheduler_args(self.optimizer, train_iters) -+ - total_batched_samples = 0 - for epoch in range(epochs_trained, num_train_epochs): -+ # Perform an epoch step for the compression controller's scheduler if it is used. -+ if self.compression_ctrl is not None: -+ self.compression_ctrl.scheduler.epoch_step() - epoch_iterator = train_dataloader - if hasattr(epoch_iterator, "set_epoch"): - epoch_iterator.set_epoch(epoch) -@@ -2234,6 +2257,10 @@ class Trainer: - - step = -1 - for step, inputs in enumerate(epoch_iterator): -+ # Perform a step for the compression controller's scheduler if it is used. -+ # Include actions such as activating the subnetwork or updating the learning rate. -+ if self.compression_ctrl is not None: -+ self.compression_ctrl.scheduler.step() - total_batched_samples += 1 - - if self.args.include_num_input_tokens_seen: -@@ -2345,7 +2372,10 @@ class Trainer: - optimizer_was_run = not self.accelerator.optimizer_step_was_skipped - if optimizer_was_run: - # Delay optimizer scheduling until metrics are generated -- if not isinstance(self.lr_scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau): -+ if ( -+ not isinstance(self.lr_scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau) -+ and self.compression_ctrl is None -+ ): - self.lr_scheduler.step() - - model.zero_grad() -@@ -2791,7 +2821,11 @@ class Trainer: - logs["loss"] = round(tr_loss_scalar / (self.state.global_step - self._globalstep_last_logged), 4) - if grad_norm is not None: - logs["grad_norm"] = grad_norm.detach().item() if isinstance(grad_norm, torch.Tensor) else grad_norm -- logs["learning_rate"] = self._get_learning_rate() -+ # Retrieve the current learning rate from the compression controller if available, otherwise use the default method -+ if self.compression_ctrl is not None: -+ logs["learning_rate"] = self.compression_ctrl.scheduler.lr_scheduler.get_last_lr()[0] -+ else: -+ logs["learning_rate"] = self._get_learning_rate() - - self._total_loss_scalar += tr_loss_scalar - self._globalstep_last_logged = self.state.global_step -@@ -3015,7 +3049,9 @@ class Trainer: - and not is_torch_xla_available() - ): - with warnings.catch_warnings(record=True) as caught_warnings: -- torch.save(self.lr_scheduler.state_dict(), os.path.join(output_dir, SCHEDULER_NAME)) -+ # Save the learning rate scheduler state if compression_ctrl is not used. -+ if self.compression_ctrl is None: -+ torch.save(self.lr_scheduler.state_dict(), os.path.join(output_dir, SCHEDULER_NAME)) - reissue_pt_warnings(caught_warnings) - - def _load_optimizer_and_scheduler(self, checkpoint): -diff --git a/src/transformers/utils/__init__.py b/src/transformers/utils/__init__.py -index efe473a6c..1040a75f4 100755 ---- a/src/transformers/utils/__init__.py -+++ b/src/transformers/utils/__init__.py -@@ -152,6 +152,7 @@ from .import_utils import ( - is_natten_available, - is_ninja_available, - is_nltk_available, -+ is_nncf_available, - is_onnx_available, - is_openai_available, - is_optimum_available, -diff --git a/src/transformers/utils/import_utils.py b/src/transformers/utils/import_utils.py -index 3b0abd334..823e8919f 100755 ---- a/src/transformers/utils/import_utils.py -+++ b/src/transformers/utils/import_utils.py -@@ -131,6 +131,7 @@ _levenshtein_available = _is_package_available("Levenshtein") - _librosa_available = _is_package_available("librosa") - _natten_available = _is_package_available("natten") - _nltk_available = _is_package_available("nltk") -+_nncf_available = _is_package_available("nncf") - _onnx_available = _is_package_available("onnx") - _openai_available = _is_package_available("openai") - _optimum_available = _is_package_available("optimum") -@@ -1056,6 +1057,10 @@ def is_nltk_available(): - return _nltk_available - - -+def is_nncf_available(): -+ return _nncf_available -+ -+ - def is_torchaudio_available(): - return _torchaudio_available - diff --git a/comps/finetuning_sqft/requirements.txt b/comps/finetuning_sqft/requirements.txt deleted file mode 100644 index 6eff6b62ac..0000000000 --- a/comps/finetuning_sqft/requirements.txt +++ /dev/null @@ -1,17 +0,0 @@ -aiohttp -datasets -docarray -fastapi -httpx -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -prometheus-fastapi-instrumentator -pydantic==2.8.2 -pydantic_yaml -python-multipart -pyyaml -ray[all] -requests -shortuuid -uvicorn diff --git a/comps/finetuning_sqft/utils/extract_sub_adapter.py b/comps/finetuning_sqft/utils/extract_sub_adapter.py deleted file mode 100644 index 82e4471719..0000000000 --- a/comps/finetuning_sqft/utils/extract_sub_adapter.py +++ /dev/null @@ -1,104 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import argparse -import os -import re - -import torch -from nncf import NNCFConfig -from peft.utils import CONFIG_NAME, SAFETENSORS_WEIGHTS_NAME, WEIGHTS_NAME - -PATTERN = re.compile(r"[[](.*?)[]]", re.S) - - -def get_width_for_query_prefix(torch_module_to_width, query_module, length=5): - """Get the width for a given query module prefix. - - Args: - torch_module_to_width (dict): Mapping from torch module to width. - query_module (str): The query module name. - length (int, optional): The length of the prefix to match. Default is 5. - - Returns: - int: The width for the query module prefix. - """ - query_module_list = query_module.split(".") - width = next( - ( - value - for torch_module, value in torch_module_to_width.items() - if torch_module.split(".")[:length] == query_module_list[:length] - ), - None, - ) - return width - - -def main(adapter_model_path, nncf_config, sub_adapter_version, custom_config=None): - output_dir = os.path.join(adapter_model_path, sub_adapter_version) - os.makedirs(output_dir, exist_ok=True) - nncf_config = NNCFConfig.from_json(nncf_config) - try: - overwrite_groups = nncf_config["bootstrapNAS"]["training"]["elasticity"]["width"]["overwrite_groups"] - overwrite_groups_widths = nncf_config["bootstrapNAS"]["training"]["elasticity"]["width"][ - "overwrite_groups_widths" - ] - assert len(overwrite_groups) == len(overwrite_groups_widths) - except Exception: - raise ValueError("Cannot get the search space in NNCF config.") - - if sub_adapter_version == "maximal": - subnetwork_config = {idx: space[0] for idx, space in enumerate(overwrite_groups_widths)} - elif sub_adapter_version == "heuristic": - subnetwork_config = {idx: space[(len(space) - 1) // 2] for idx, space in enumerate(overwrite_groups_widths)} - elif sub_adapter_version == "minimal": - subnetwork_config = {idx: space[-1] for idx, space in enumerate(overwrite_groups_widths)} - else: - assert custom_config is not None, "Missing custom subnetwork config." - assert isinstance(custom_config, list), "Custom config must be a list." - subnetwork_config = {i: value for i, value in enumerate(custom_config)} - - # Mapping: nncf node -> width - nncf_node_to_width = {} - for idx, value in subnetwork_config.items(): - space = overwrite_groups_widths[idx] - assert min(space) <= value <= max(space) - cur_dict = {node: value for node in overwrite_groups[idx]} - nncf_node_to_width.update(cur_dict) - - # Prune adapter model (LoRA low-rank) - lora_torch_module_to_width = { - ".".join(re.findall(PATTERN, k)): v for k, v in nncf_node_to_width.items() if "lora_A" in k - } - num_module_name_item = list(lora_torch_module_to_width.keys())[0].split(".").index("lora_A") - # Load adapter weights - try: - super_adapter_weights = torch.load(os.path.join(adapter_model_path, WEIGHTS_NAME)) - except: - from safetensors.torch import load_file - - super_adapter_weights = load_file(os.path.join(adapter_model_path, SAFETENSORS_WEIGHTS_NAME)) - sub_adapter_weights = {} - for weight_key, weight_tensor in super_adapter_weights.items(): - width = get_width_for_query_prefix(lora_torch_module_to_width, weight_key, length=num_module_name_item) - if width is not None: - is_loraA = "lora_A" in weight_key - new_weight_tensor = weight_tensor[:width].clone() if is_loraA else weight_tensor[:, :width].clone() - else: - new_weight_tensor = weight_tensor.clone() - sub_adapter_weights[weight_key] = new_weight_tensor - os.makedirs(output_dir, exist_ok=True) - torch.save(sub_adapter_weights, os.path.join(output_dir, WEIGHTS_NAME)) - config_path = os.path.join(adapter_model_path, CONFIG_NAME) - os.system(f"cp {config_path} {output_dir}") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Merge base model and adapter model with additional configurations") - parser.add_argument("--adapter_model_path", type=str, required=True, help="Path to the adapter model") - parser.add_argument("--nncf_config", type=str, required=True, help="Path to the NNCF configuration") - parser.add_argument("--sub_adapter_version", type=str, required=True, help="Sub adapter version") - parser.add_argument("--custom_config", type=str, default=None, help="Path to custom configuration (optional)") - args = parser.parse_args() - main(args.adapter_model_path, args.nncf_config, args.sub_adapter_version, args.custom_config) diff --git a/comps/finetuning_sqft/utils/merge.py b/comps/finetuning_sqft/utils/merge.py deleted file mode 100644 index 266ee0eac4..0000000000 --- a/comps/finetuning_sqft/utils/merge.py +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import argparse - -from peft import PeftModel -from transformers import AutoModelForCausalLM, AutoTokenizer - - -def main(base_model_path, adapter_model_path, output_path): - base_model = AutoModelForCausalLM.from_pretrained(base_model_path, trust_remote_code=True) - model = PeftModel.from_pretrained(base_model, adapter_model_path) - model.eval() - for name, param in model.named_parameters(): - param.requires_grad = False - merged_model = model.merge_and_unload() - merged_model.train(False) - base_model.save_pretrained(output_path, state_dict=merged_model.state_dict()) - - tokenizer = AutoTokenizer.from_pretrained(base_model_path, trust_remote_code=True) - tokenizer.save_pretrained(output_path) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Merge base model and adapter model") - parser.add_argument("--base_model_path", type=str, required=True, help="Path to the base model") - parser.add_argument("--adapter_model_path", type=str, required=True, help="Path to the adapter model") - parser.add_argument("--output_path", type=str, required=True, help="Path to save the merged model") - - args = parser.parse_args() - main(args.base_model_path, args.adapter_model_path, args.output_path) diff --git a/comps/finetuning_sqft/utils/nncf_config_process.py b/comps/finetuning_sqft/utils/nncf_config_process.py deleted file mode 100644 index 5f6abb7c8f..0000000000 --- a/comps/finetuning_sqft/utils/nncf_config_process.py +++ /dev/null @@ -1,154 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import json -import os - -from nncf import NNCFConfig - -NNCF_CONFIG_TEMPLATE = { - "input_info": [ - {"sample_size": [1, 256], "type": "long", "keyword": "input_ids"}, - {"sample_size": [1, 256], "type": "long", "keyword": "attention_mask"}, - ], - "bootstrapNAS": { - "training": { - "algorithm": "progressive_shrinking", - "frozen_layers_allowed": True, - "progressivity_of_elasticity": ["width"], - "batchnorm_adaptation": {"num_bn_adaptation_samples": 0}, - "schedule": { - "list_stage_descriptions": [ - { - "train_dims": ["width"], - "epochs": -1, - "depth_indicator": 1, - "width_indicator": 8, - "init_lr": -1, - "epochs_lr": -1, - "sample_rate": 1, - } - ] - }, - "elasticity": { - "available_elasticity_dims": ["width"], - "width": {"overwrite_groups": [], "overwrite_groups_widths": []}, - }, - } - }, -} - - -def add_lr_epochs(nncf_config, learning_rate=3e-4, num_epochs=3): - """Add learning rate and epochs to the NNCF configuration. - - Args: - nncf_config (dict): The NNCF configuration dictionary. - learning_rate (float): The initial learning rate to set. - num_epochs (int): The number of epochs to set. - - Returns: - dict: The updated NNCF configuration. - """ - stage_description = nncf_config["bootstrapNAS"]["training"]["schedule"]["list_stage_descriptions"][0] - if stage_description["init_lr"] == -1: - stage_description["init_lr"] = learning_rate - if stage_description["epochs"] == -1: - stage_description["epochs"] = num_epochs - stage_description["epochs_lr"] = num_epochs - - return nncf_config - - -def get_model_paths(model, target_module_name): - """Find all paths to the target layer in the model. - - Args: - model (torch.nn.Module): The model to search. - target_module_name (str): The name of the target layer. - - Returns: - list: A list of paths to the target layer. - """ - - def find_layers(module, target_module_name, path, paths): - for name, sub_module in module.named_children(): - new_path = f"{path}/{sub_module.__class__.__name__}[{name}]" - if target_module_name in name: - # Check if 'lora_A' is in the sub_module's children - for sub_name, _ in sub_module.named_children(): - if "lora_A" in sub_name: - paths.append(f"{new_path}/ModuleDict[lora_A]/NNCFLinear[default]/linear_0") - find_layers(sub_module, target_module_name, new_path, paths) - - base_path = model.__class__.__name__ - paths = [] - find_layers(model, target_module_name, base_path, paths) - return paths - - -def load_nncf_config(config, model, target_module_groups=None, search_space=None, nncf_config=None): - """Load and preprocess the NNCF configuration file. - - Returns: - NNCFConfig: The preprocessed NNCF configuration object. - """ - - if nncf_config is not None: - nncf_config = NNCFConfig.from_json(nncf_config) - else: - if search_space is None and target_module_groups: - raise ValueError( - "Neural LoRA search is enabled, `search_space` and `target_module_groups` must be provided." - ) - # The NNCF Config will be automatically generated based on `target_module_groups` and `search_space`. - num_hidden_layers = model.config.num_hidden_layers - nncf_config_dict = NNCF_CONFIG_TEMPLATE - overwrite_groups = [] - for group in target_module_groups: - group_paths = [] - for module in group: - target_layer_name = module - paths = get_model_paths(model, target_layer_name) - assert paths, f"No paths found for module {module}" - group_paths.append(paths) - # Transpose the list of lists to combine paths by their positions - transposed_paths = list(zip(*group_paths)) - overwrite_groups.extend([list(path_group) for path_group in transposed_paths]) - nncf_config_dict["bootstrapNAS"]["training"]["elasticity"]["width"]["overwrite_groups"] = overwrite_groups - - overwrite_groups_widths = [] - for space in search_space: - space = [int(width) for width in space.split(",")] - overwrite_groups_widths.extend([space] * num_hidden_layers) - nncf_config_dict["bootstrapNAS"]["training"]["elasticity"]["width"][ - "overwrite_groups_widths" - ] = overwrite_groups_widths - assert len(overwrite_groups) == len(overwrite_groups_widths) - nncf_config_dict = add_lr_epochs( - nncf_config_dict, learning_rate=config["Training"]["learning_rate"], num_epochs=config["Training"]["epochs"] - ) - nncf_config = NNCFConfig.from_dict(nncf_config_dict) - - nncf_config["log_dir"] = config["General"]["output_dir"] - os.makedirs(nncf_config["log_dir"], exist_ok=True) - with open(os.path.join(nncf_config["log_dir"], "nncf_config.json"), "w") as f: - json.dump(nncf_config, f, indent=4) - return nncf_config - - -if __name__ == "__main__": - import transformers - from peft import LoraConfig, get_peft_model - - model = transformers.AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") - lora_config = { - "task_type": "CAUSAL_LM", - "r": 16, - "target_modules": ["q_proj", "k_proj", "v_proj", "up_proj", "down_proj"], - } - peft_config = LoraConfig(**lora_config) - model = get_peft_model(model, peft_config) - load_nncf_config( - None, model, [["q_proj", "k_proj", "v_proj"], ["up_proj"], ["down_proj"]], ["16,12,8", "16", "16,12"] - ) From ed9dc5fc606615f7c43aa08b220bd7766687cfa4 Mon Sep 17 00:00:00 2001 From: Yuan0320 Date: Tue, 26 Nov 2024 11:15:03 +0800 Subject: [PATCH 04/17] Add test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Yuan0320 Signed-off-by: J. Pablo Muñoz --- tests/finetuning/test_finetuning.sh | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/finetuning/test_finetuning.sh b/tests/finetuning/test_finetuning.sh index 11a544dfda..c333a2f228 100644 --- a/tests/finetuning/test_finetuning.sh +++ b/tests/finetuning/test_finetuning.sh @@ -144,6 +144,18 @@ function validate_microservice() { '{"training_file": "test_data.json","model": "facebook/opt-125m"}' + ########################## + # sqft test # + ########################## + # test /v1/fine_tuning/jobs + validate_finetune \ + "http://${ip_address}:$finetuning_service_port/v1/fine_tuning/jobs" \ + "sqft - finetuning" \ + "test-comps-finetuning-server" \ + '{"id":"ft-job' \ + '{"training_file": "test_data.json","model": "facebook/opt-125m", "General": {"lora_config": {"r": 8, "neural_lora_search": true, "target_module_groups": [["q_proj"]], "search_space": ["8,6,4"]}}}' + + ########################## # rerank test # ########################## From d5e559c2b6457d36a9b6aa4b2a8a409dcd42462c Mon Sep 17 00:00:00 2001 From: Yuan0320 Date: Tue, 26 Nov 2024 11:46:18 +0800 Subject: [PATCH 05/17] Update links MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Yuan0320 Signed-off-by: J. Pablo Muñoz --- comps/finetuning/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/comps/finetuning/README.md b/comps/finetuning/README.md index d6ad323670..83b2adddc5 100644 --- a/comps/finetuning/README.md +++ b/comps/finetuning/README.md @@ -118,7 +118,7 @@ curl http://${your_ip}:8015/v1/fine_tuning/jobs \ In addition to traditional fine-tuning, you can use SQFT's NLS to fine-tune your model. More details about SQFT can be found in [this paper](https://aclanthology.org/2024.findings-emnlp.749.pdf). -Please follow the additional installation requirements [here](https://github.com/IntelLabs/Hardware-Aware-Automated-Machine-Learning/tree/main/SQFT/opea#-start-nls-microservice-with-python). +Please follow the additional installation requirements [here](https://github.com/IntelLabs/Hardware-Aware-Automated-Machine-Learning/tree/main/SQFT/opea#-start-the-nls-microservice-with-python). Use the following command to launch a finetuning job with the NLS algorithm: ```bash @@ -145,9 +145,9 @@ curl http://${your_ip}:8015/v1/fine_tuning/jobs \ }' ``` -Detailed explanations for the parameters can be found [here](https://github.com/IntelLabs/Hardware-Aware-Automated-Machine-Learning/tree/main/SQFT/opea#create-nls-fine-tuning-job). +Detailed explanations for the parameters can be found [here](https://github.com/IntelLabs/Hardware-Aware-Automated-Machine-Learning/tree/main/SQFT/opea#create-an-nls-fine-tuning-job). Additional use-cases and benefits of SQFT are available [here](https://github.com/IntelLabs/Hardware-Aware-Automated-Machine-Learning/tree/main/SQFT/opea). -Instructions to extracting the desired sub-adapter and merging it with the base model can be found [here](https://github.com/IntelLabs/Hardware-Aware-Automated-Machine-Learning/tree/main/SQFT/opea#leverage-fine-tuned-super-adapter). +Instructions to extracting the desired sub-adapter and merging it with the base model can be found [here](https://github.com/IntelLabs/Hardware-Aware-Automated-Machine-Learning/tree/main/SQFT/opea#leverage-the-fine-tuned-super-adapter). #### 3.2.3 Reranking Model Training From 0e275d2bcd1617b78b67d4fdead8b6e1f7ef43ee Mon Sep 17 00:00:00 2001 From: Yuan0320 Date: Tue, 26 Nov 2024 12:13:41 +0800 Subject: [PATCH 06/17] refactor(SQFTNLSConfig): enhance set_target_modules logic for better validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Yuan0320 Signed-off-by: J. Pablo Muñoz --- comps/finetuning/finetune_config.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/comps/finetuning/finetune_config.py b/comps/finetuning/finetune_config.py index df9a6be9bb..1cf3b08794 100644 --- a/comps/finetuning/finetune_config.py +++ b/comps/finetuning/finetune_config.py @@ -44,12 +44,14 @@ class SQFTNLSConfig(LoraConfig): @root_validator(pre=True) def set_target_modules(cls, values): - target_module_groups = values.get("target_module_groups") - if target_module_groups is not None: - values["target_modules"] = [item for sublist in target_module_groups for item in sublist] - search_space = values.get("search_space") - if search_space is not None: - assert len(search_space) == len(target_module_groups) + if values.get("neural_lora_search"): + target_module_groups = values.get("target_module_groups") + search_space = values.get("search_space") + if target_module_groups is None or search_space is None: + raise ValueError("Please specified `target_module_groups` and `search_space` when using NLS strategy.") + if len(search_space) != len(target_module_groups): + raise ValueError("The length of `search_space` must be equal to the length of `target_module_groups`.") + values["target_modules"] = [module for groups in target_module_groups for module in groups] return values From a304263d777a18ca213dfddad3b7df24c53aceb8 Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Tue, 26 Nov 2024 19:15:07 +0800 Subject: [PATCH 07/17] Fix build issue (#946) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: ZePan110 Signed-off-by: J. Pablo Muñoz --- tests/retrievers/test_retrievers_pathway_langchain.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/retrievers/test_retrievers_pathway_langchain.sh b/tests/retrievers/test_retrievers_pathway_langchain.sh index a1e4e773a7..33d60b025f 100644 --- a/tests/retrievers/test_retrievers_pathway_langchain.sh +++ b/tests/retrievers/test_retrievers_pathway_langchain.sh @@ -10,9 +10,7 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH - cd comps/vectorstores/pathway - - docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/vectorstore-pathway:comps . + docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/vectorstore-pathway:comps -f comps/vectorstores/pathway/Dockerfile . cd $WORKPATH From 98bc3f8293f6cb755a6784837db07c0fed2f0a56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=2E=20Pablo=20Mu=C3=B1oz?= Date: Wed, 27 Nov 2024 10:24:24 -0800 Subject: [PATCH 08/17] Fix issue with copying folders in sub-adapter extraction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Yuan0320 Signed-off-by: J. Pablo Muñoz --- comps/finetuning/utils/extract_sub_adapter.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/comps/finetuning/utils/extract_sub_adapter.py b/comps/finetuning/utils/extract_sub_adapter.py index 00f477f684..2f5eccde32 100644 --- a/comps/finetuning/utils/extract_sub_adapter.py +++ b/comps/finetuning/utils/extract_sub_adapter.py @@ -3,6 +3,7 @@ import os import re +import shutil import torch from peft.utils import CONFIG_NAME, SAFETENSORS_WEIGHTS_NAME, WEIGHTS_NAME @@ -97,4 +98,4 @@ def main(adapter_model_path, nncf_config, adapter_version, custom_config=None): os.makedirs(output_dir, exist_ok=True) torch.save(sub_adapter_weights, os.path.join(output_dir, WEIGHTS_NAME)) config_path = os.path.join(adapter_model_path, CONFIG_NAME) - os.system(f"cp {config_path} {output_dir}") + shutil.copy(config_path, output_dir) From 606ef11003b3054e5ea66f74a65b47408d13fc60 Mon Sep 17 00:00:00 2001 From: Yuan0320 Date: Mon, 2 Dec 2024 14:36:44 +0800 Subject: [PATCH 09/17] Temporarily remove test due to sqft environment (additional installation) Signed-off-by: Yuan0320 --- tests/finetuning/test_finetuning.sh | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/tests/finetuning/test_finetuning.sh b/tests/finetuning/test_finetuning.sh index c333a2f228..11a544dfda 100644 --- a/tests/finetuning/test_finetuning.sh +++ b/tests/finetuning/test_finetuning.sh @@ -144,18 +144,6 @@ function validate_microservice() { '{"training_file": "test_data.json","model": "facebook/opt-125m"}' - ########################## - # sqft test # - ########################## - # test /v1/fine_tuning/jobs - validate_finetune \ - "http://${ip_address}:$finetuning_service_port/v1/fine_tuning/jobs" \ - "sqft - finetuning" \ - "test-comps-finetuning-server" \ - '{"id":"ft-job' \ - '{"training_file": "test_data.json","model": "facebook/opt-125m", "General": {"lora_config": {"r": 8, "neural_lora_search": true, "target_module_groups": [["q_proj"]], "search_space": ["8,6,4"]}}}' - - ########################## # rerank test # ########################## From c83f86aea6927cfd277c9a6d875c727683d72b3e Mon Sep 17 00:00:00 2001 From: Yuan0320 Date: Mon, 2 Dec 2024 20:01:51 +0800 Subject: [PATCH 10/17] Add sqft test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: J. Pablo Muñoz Signed-off-by: Yuan0320 --- comps/finetuning/Dockerfile.sqft | 66 +++++++++++++++++++++++++++++ tests/finetuning/test_finetuning.sh | 59 +++++++++++++++++++++++++- 2 files changed, 123 insertions(+), 2 deletions(-) create mode 100644 comps/finetuning/Dockerfile.sqft diff --git a/comps/finetuning/Dockerfile.sqft b/comps/finetuning/Dockerfile.sqft new file mode 100644 index 0000000000..ee47310fb2 --- /dev/null +++ b/comps/finetuning/Dockerfile.sqft @@ -0,0 +1,66 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Use the same python version with ray +FROM python:3.10.14 + +ARG HF_TOKEN + +ENV HF_TOKEN=$HF_TOKEN + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +COPY comps /home/user/comps + +RUN chown -R user /home/user/comps/finetuning + +USER user + +ENV PATH=$PATH:/home/user/.local/bin + +RUN python -m pip install --no-cache-dir --upgrade pip && \ + python -m pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu && \ + python -m pip install --no-cache-dir intel-extension-for-pytorch && \ + python -m pip install --no-cache-dir oneccl_bind_pt --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ && \ + python -m pip install --no-cache-dir -r /home/user/comps/finetuning/requirements.txt + +WORKDIR /home/user/comps/finetuning + +RUN git clone https://github.com/IntelLabs/Hardware-Aware-Automated-Machine-Learning.git && \ + cp -r Hardware-Aware-Automated-Machine-Learning/SQFT/patches /home/user/comps/finetuning/patches && \ + rm -rf Hardware-Aware-Automated-Machine-Learning && \ + mkdir third_party + +# Clone and set up transformers +RUN git clone https://github.com/huggingface/transformers.git third_party/transformers && \ + cd third_party/transformers && \ + git checkout v4.44.2 && \ + git apply --ignore-space-change --ignore-whitespace /home/user/comps/finetuning/patches/transformers-v4.44.2.patch && \ + pip install -e . + +# Clone and set up peft +RUN git clone https://github.com/huggingface/peft.git third_party/peft && \ + cd third_party/peft && \ + git checkout v0.10.0 && \ + git apply --ignore-space-change --ignore-whitespace /home/user/comps/finetuning/patches/peft-v0.10.0.patch && \ + pip install -e . + +# Clone and set up nncf +RUN git clone https://github.com/openvinotoolkit/nncf.git third_party/nncf && \ + cd third_party/nncf && \ + git checkout f143e1c && \ + git apply --ignore-space-change --ignore-whitespace /home/user/comps/finetuning/patches/nncf-f143e1c.patch && \ + pip install -e . + +ENV PYTHONPATH=$PYTHONPATH:/home/user + +RUN echo PKGPATH=$(python3 -c "import pkg_resources; print(pkg_resources.get_distribution('oneccl-bind-pt').location)") >> run.sh && \ + echo 'export LD_LIBRARY_PATH=$PKGPATH/oneccl_bindings_for_pytorch/opt/mpi/lib/:$LD_LIBRARY_PATH' >> run.sh && \ + echo 'source $PKGPATH/oneccl_bindings_for_pytorch/env/setvars.sh' >> run.sh && \ + echo ray start --head --dashboard-host=0.0.0.0 >> run.sh && \ + echo export RAY_ADDRESS=http://localhost:8265 >> run.sh && \ + echo python finetuning_service.py >> run.sh + +CMD bash run.sh diff --git a/tests/finetuning/test_finetuning.sh b/tests/finetuning/test_finetuning.sh index 11a544dfda..6314bad81b 100644 --- a/tests/finetuning/test_finetuning.sh +++ b/tests/finetuning/test_finetuning.sh @@ -22,6 +22,19 @@ function build_docker_images() { fi } +function build_sqft_docker_images() { + cd $WORKPATH + echo $(pwd) + # TODO: get the Dockerfile from the SQFT source repository instead of comps/finetuning/Dockerfile.sqft. + docker build -t opea/finetuning:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg HF_TOKEN=$HF_TOKEN -f comps/finetuning/Dockerfile.sqft . + if [ $? -ne 0 ]; then + echo "opea/finetuning (sqft) built fail" + exit 1 + else + echo "opea/finetuning (sqft) built successful" + fi +} + function start_service() { export no_proxy="localhost,127.0.0.1,"${ip_address} docker run -d --name="test-comps-finetuning-server" -p $finetuning_service_port:$finetuning_service_port -p $ray_port:$ray_port --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy opea/finetuning:comps @@ -225,6 +238,44 @@ EOF } +function validate_sqft_microservice() { + cd $LOG_PATH + export no_proxy="localhost,127.0.0.1,"${ip_address} + + ########################## + # general test # + ########################## + # test /v1/dataprep upload file + echo '[{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."}]' > $LOG_PATH/test_data.json + validate_upload \ + "http://${ip_address}:$finetuning_service_port/v1/files" \ + "general - upload" \ + "test-comps-finetuning-server" \ + "fine-tune" \ + "test_data.json" + + # test /v1/fine_tuning/jobs + validate_finetune \ + "http://${ip_address}:$finetuning_service_port/v1/fine_tuning/jobs" \ + "general - finetuning" \ + "test-comps-finetuning-server" \ + '{"id":"ft-job' \ + '{"training_file": "test_data.json","model": "facebook/opt-125m"}' + + + ########################## + # sqft test # + ########################## + # test /v1/fine_tuning/jobs + validate_finetune \ + "http://${ip_address}:$finetuning_service_port/v1/fine_tuning/jobs" \ + "sqft - finetuning" \ + "test-comps-finetuning-server" \ + '{"id":"ft-job' \ + '{"training_file": "test_data.json","model": "facebook/opt-125m", "General": {"lora_config": {"r": 8, "neural_lora_search": true, "target_module_groups": [["q_proj"]], "search_space": ["8,6,4"]}}}' + +} + function stop_docker() { cid=$(docker ps -aq --filter "name=test-comps-finetuning-server*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi @@ -233,12 +284,16 @@ function stop_docker() { function main() { stop_docker - build_docker_images start_service - validate_microservice + # test sqft + stop_docker + build_sqft_docker_images + start_service + validate_sqft_microservice + stop_docker echo y | docker system prune From 95e1f26de58ba5d1dfe68c310aceb41ec8bf2d62 Mon Sep 17 00:00:00 2001 From: Yuan0320 Date: Tue, 3 Dec 2024 08:55:38 +0800 Subject: [PATCH 11/17] Get the Dockerfile from SQFT repo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: J. Pablo Muñoz Signed-off-by: Yuan0320 --- comps/finetuning/Dockerfile.sqft | 66 ----------------------------- tests/finetuning/test_finetuning.sh | 2 +- 2 files changed, 1 insertion(+), 67 deletions(-) delete mode 100644 comps/finetuning/Dockerfile.sqft diff --git a/comps/finetuning/Dockerfile.sqft b/comps/finetuning/Dockerfile.sqft deleted file mode 100644 index ee47310fb2..0000000000 --- a/comps/finetuning/Dockerfile.sqft +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# Use the same python version with ray -FROM python:3.10.14 - -ARG HF_TOKEN - -ENV HF_TOKEN=$HF_TOKEN - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -COPY comps /home/user/comps - -RUN chown -R user /home/user/comps/finetuning - -USER user - -ENV PATH=$PATH:/home/user/.local/bin - -RUN python -m pip install --no-cache-dir --upgrade pip && \ - python -m pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu && \ - python -m pip install --no-cache-dir intel-extension-for-pytorch && \ - python -m pip install --no-cache-dir oneccl_bind_pt --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ && \ - python -m pip install --no-cache-dir -r /home/user/comps/finetuning/requirements.txt - -WORKDIR /home/user/comps/finetuning - -RUN git clone https://github.com/IntelLabs/Hardware-Aware-Automated-Machine-Learning.git && \ - cp -r Hardware-Aware-Automated-Machine-Learning/SQFT/patches /home/user/comps/finetuning/patches && \ - rm -rf Hardware-Aware-Automated-Machine-Learning && \ - mkdir third_party - -# Clone and set up transformers -RUN git clone https://github.com/huggingface/transformers.git third_party/transformers && \ - cd third_party/transformers && \ - git checkout v4.44.2 && \ - git apply --ignore-space-change --ignore-whitespace /home/user/comps/finetuning/patches/transformers-v4.44.2.patch && \ - pip install -e . - -# Clone and set up peft -RUN git clone https://github.com/huggingface/peft.git third_party/peft && \ - cd third_party/peft && \ - git checkout v0.10.0 && \ - git apply --ignore-space-change --ignore-whitespace /home/user/comps/finetuning/patches/peft-v0.10.0.patch && \ - pip install -e . - -# Clone and set up nncf -RUN git clone https://github.com/openvinotoolkit/nncf.git third_party/nncf && \ - cd third_party/nncf && \ - git checkout f143e1c && \ - git apply --ignore-space-change --ignore-whitespace /home/user/comps/finetuning/patches/nncf-f143e1c.patch && \ - pip install -e . - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -RUN echo PKGPATH=$(python3 -c "import pkg_resources; print(pkg_resources.get_distribution('oneccl-bind-pt').location)") >> run.sh && \ - echo 'export LD_LIBRARY_PATH=$PKGPATH/oneccl_bindings_for_pytorch/opt/mpi/lib/:$LD_LIBRARY_PATH' >> run.sh && \ - echo 'source $PKGPATH/oneccl_bindings_for_pytorch/env/setvars.sh' >> run.sh && \ - echo ray start --head --dashboard-host=0.0.0.0 >> run.sh && \ - echo export RAY_ADDRESS=http://localhost:8265 >> run.sh && \ - echo python finetuning_service.py >> run.sh - -CMD bash run.sh diff --git a/tests/finetuning/test_finetuning.sh b/tests/finetuning/test_finetuning.sh index 6314bad81b..0c11b866e5 100644 --- a/tests/finetuning/test_finetuning.sh +++ b/tests/finetuning/test_finetuning.sh @@ -25,7 +25,7 @@ function build_docker_images() { function build_sqft_docker_images() { cd $WORKPATH echo $(pwd) - # TODO: get the Dockerfile from the SQFT source repository instead of comps/finetuning/Dockerfile.sqft. + curl -o comps/finetuning/Dockerfile.sqft https://raw.githubusercontent.com/IntelLabs/Hardware-Aware-Automated-Machine-Learning/main/SQFT/opea/Dockerfile docker build -t opea/finetuning:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg HF_TOKEN=$HF_TOKEN -f comps/finetuning/Dockerfile.sqft . if [ $? -ne 0 ]; then echo "opea/finetuning (sqft) built fail" From 24ec2a7400440fe6fbea3e0a1e51593c89a2b5e1 Mon Sep 17 00:00:00 2001 From: Yuan0320 Date: Tue, 3 Dec 2024 12:00:25 +0800 Subject: [PATCH 12/17] Add tests for adapter merging and extract MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: J. Pablo Muñoz Signed-off-by: Yuan0320 --- tests/finetuning/test_finetuning.sh | 69 +++++++++++++++++++++++++---- 1 file changed, 61 insertions(+), 8 deletions(-) diff --git a/tests/finetuning/test_finetuning.sh b/tests/finetuning/test_finetuning.sh index 0c11b866e5..a1357ed83f 100644 --- a/tests/finetuning/test_finetuning.sh +++ b/tests/finetuning/test_finetuning.sh @@ -130,6 +130,35 @@ function validate_finetune() { fi sleep 1m done + + echo "$FINTUNING_ID" +} + +function validate_merge_or_extract_adapter() { + local URL="$1" + local SERVICE_NAME="$2" + local DOCKER_NAME="$3" + local EXPECTED_DATA="$4" + local INPUT_DATA="$5" + + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' -d "$INPUT_DATA" "$URL") + HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') + + if [ "$HTTP_STATUS" -ne "200" ]; then + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + exit 1 + else + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + fi + + # Check if the parsed values match the expected values + if [[ "$RESPONSE_BODY" != *"$EXPECTED_DATA"* ]]; then + echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" + exit 1 + else + echo "[ $SERVICE_NAME ] Content is as expected." + fi } function validate_microservice() { @@ -243,7 +272,7 @@ function validate_sqft_microservice() { export no_proxy="localhost,127.0.0.1,"${ip_address} ########################## - # general test # + # general test # ########################## # test /v1/dataprep upload file echo '[{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."}]' > $LOG_PATH/test_data.json @@ -254,25 +283,49 @@ function validate_sqft_microservice() { "fine-tune" \ "test_data.json" - # test /v1/fine_tuning/jobs - validate_finetune \ + # test /v1/fine_tuning/jobs (LoRA) + FINTUNING_ID=$(validate_finetune \ "http://${ip_address}:$finetuning_service_port/v1/fine_tuning/jobs" \ "general - finetuning" \ "test-comps-finetuning-server" \ '{"id":"ft-job' \ - '{"training_file": "test_data.json","model": "facebook/opt-125m"}' + '{"training_file": "test_data.json","model": "facebook/opt-125m", "General": {"lora_config": {"r": 8, "target_modules": ["q_proj"]}}}') + + # test merging the LoRA adapter into the base model + validate_merge_or_extract_adapter \ + "http://${ip_address}:$finetuning_service_port/v1/finetune/merge_adapter" \ + "adapter merge" \ + "test-comps-finetuning-server" \ + "${FINTUNING_ID}" \ + "{\"fine_tuning_job_id\": \"${FINTUNING_ID}\"}" ########################## - # sqft test # + # sqft test # ########################## - # test /v1/fine_tuning/jobs - validate_finetune \ + # test /v1/fine_tuning/jobs (SQFT-NLS) + FINTUNING_ID=$(validate_finetune \ "http://${ip_address}:$finetuning_service_port/v1/fine_tuning/jobs" \ "sqft - finetuning" \ "test-comps-finetuning-server" \ '{"id":"ft-job' \ - '{"training_file": "test_data.json","model": "facebook/opt-125m", "General": {"lora_config": {"r": 8, "neural_lora_search": true, "target_module_groups": [["q_proj"]], "search_space": ["8,6,4"]}}}' + '{"training_file": "test_data.json","model": "facebook/opt-125m", "General": {"lora_config": {"r": 8, "neural_lora_search": true, "target_module_groups": [["q_proj"]], "search_space": ["8,6,4"]}}}') + + # test extracting heuristic sub-adapter + validate_merge_or_extract_adapter \ + "http://${ip_address}:$finetuning_service_port/v1/finetune/extract_sub_adapter" \ + "extract sub-adapter" \ + "test-comps-finetuning-server" \ + "${FINTUNING_ID}" \ + "{\"fine_tuning_job_id\": \"${FINTUNING_ID}\", \"adapter_version\": \"heuristic\"}" + + # test merging the heuristic sub-adapter into the base model + validate_merge_or_extract_adapter \ + "http://${ip_address}:$finetuning_service_port/v1/finetune/merge_adapter" \ + "adapter merge" \ + "test-comps-finetuning-server" \ + "${FINTUNING_ID}" \ + "{\"fine_tuning_job_id\": \"${FINTUNING_ID}\", \"adapter_version\": \"heuristic\"}" } From 812aa4a73c67d05279a94a302264c384ad6f790e Mon Sep 17 00:00:00 2001 From: Yuan0320 Date: Tue, 3 Dec 2024 12:43:28 +0800 Subject: [PATCH 13/17] Add --no-cache option to sqft Docker build process MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: J. Pablo Muñoz Signed-off-by: Yuan0320 --- tests/finetuning/test_finetuning.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/finetuning/test_finetuning.sh b/tests/finetuning/test_finetuning.sh index a1357ed83f..363f26886b 100644 --- a/tests/finetuning/test_finetuning.sh +++ b/tests/finetuning/test_finetuning.sh @@ -26,7 +26,7 @@ function build_sqft_docker_images() { cd $WORKPATH echo $(pwd) curl -o comps/finetuning/Dockerfile.sqft https://raw.githubusercontent.com/IntelLabs/Hardware-Aware-Automated-Machine-Learning/main/SQFT/opea/Dockerfile - docker build -t opea/finetuning:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg HF_TOKEN=$HF_TOKEN -f comps/finetuning/Dockerfile.sqft . + docker build --no-cache -t opea/finetuning:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg HF_TOKEN=$HF_TOKEN -f comps/finetuning/Dockerfile.sqft . if [ $? -ne 0 ]; then echo "opea/finetuning (sqft) built fail" exit 1 From fcd5f3d644ea9fcb8b64bc51c3271255c2ab89c3 Mon Sep 17 00:00:00 2001 From: Yuan0320 Date: Tue, 3 Dec 2024 13:30:29 +0800 Subject: [PATCH 14/17] fix: resolve issue with FINTUNING_ID handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: J. Pablo Muñoz Signed-off-by: Yuan0320 --- tests/finetuning/test_finetuning.sh | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/finetuning/test_finetuning.sh b/tests/finetuning/test_finetuning.sh index 363f26886b..165c2ce3fc 100644 --- a/tests/finetuning/test_finetuning.sh +++ b/tests/finetuning/test_finetuning.sh @@ -130,8 +130,6 @@ function validate_finetune() { fi sleep 1m done - - echo "$FINTUNING_ID" } function validate_merge_or_extract_adapter() { @@ -284,12 +282,12 @@ function validate_sqft_microservice() { "test_data.json" # test /v1/fine_tuning/jobs (LoRA) - FINTUNING_ID=$(validate_finetune \ + validate_finetune \ "http://${ip_address}:$finetuning_service_port/v1/fine_tuning/jobs" \ "general - finetuning" \ "test-comps-finetuning-server" \ '{"id":"ft-job' \ - '{"training_file": "test_data.json","model": "facebook/opt-125m", "General": {"lora_config": {"r": 8, "target_modules": ["q_proj"]}}}') + '{"training_file": "test_data.json","model": "facebook/opt-125m", "General": {"lora_config": {"r": 8, "target_modules": ["q_proj"]}}}' # test merging the LoRA adapter into the base model validate_merge_or_extract_adapter \ @@ -304,12 +302,12 @@ function validate_sqft_microservice() { # sqft test # ########################## # test /v1/fine_tuning/jobs (SQFT-NLS) - FINTUNING_ID=$(validate_finetune \ + validate_finetune \ "http://${ip_address}:$finetuning_service_port/v1/fine_tuning/jobs" \ "sqft - finetuning" \ "test-comps-finetuning-server" \ '{"id":"ft-job' \ - '{"training_file": "test_data.json","model": "facebook/opt-125m", "General": {"lora_config": {"r": 8, "neural_lora_search": true, "target_module_groups": [["q_proj"]], "search_space": ["8,6,4"]}}}') + '{"training_file": "test_data.json","model": "facebook/opt-125m", "General": {"lora_config": {"r": 8, "neural_lora_search": true, "target_module_groups": [["q_proj"]], "search_space": ["8,6,4"]}}}' # test extracting heuristic sub-adapter validate_merge_or_extract_adapter \ From 44781f02c849d84fe8687435e13a37f623e80eda Mon Sep 17 00:00:00 2001 From: Yuan0320 Date: Tue, 3 Dec 2024 17:48:31 +0800 Subject: [PATCH 15/17] Add logging of Docker container output on HTTP status or content mismatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: J. Pablo Muñoz Signed-off-by: Yuan0320 --- tests/finetuning/test_finetuning.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/finetuning/test_finetuning.sh b/tests/finetuning/test_finetuning.sh index 165c2ce3fc..554525a0cc 100644 --- a/tests/finetuning/test_finetuning.sh +++ b/tests/finetuning/test_finetuning.sh @@ -145,6 +145,7 @@ function validate_merge_or_extract_adapter() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs $DOCKER_NAME >> ${LOG_PATH}/finetuning-server_merge_or_extract_adapter.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." @@ -153,6 +154,7 @@ function validate_merge_or_extract_adapter() { # Check if the parsed values match the expected values if [[ "$RESPONSE_BODY" != *"$EXPECTED_DATA"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" + docker logs $DOCKER_NAME >> ${LOG_PATH}/finetuning-server_merge_or_extract_adapter.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." From a0446eb0dffbe857f6483106009b59643545af3c Mon Sep 17 00:00:00 2001 From: Yuan0320 Date: Tue, 3 Dec 2024 19:53:47 +0800 Subject: [PATCH 16/17] fix: resolve ipex issue and remove useless code in merge_adapter.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: J. Pablo Muñoz Signed-off-by: Yuan0320 --- comps/finetuning/utils/merge_adapter.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/comps/finetuning/utils/merge_adapter.py b/comps/finetuning/utils/merge_adapter.py index f1bca2ab51..44fd01e8ad 100644 --- a/comps/finetuning/utils/merge_adapter.py +++ b/comps/finetuning/utils/merge_adapter.py @@ -1,6 +1,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import intel_extension_for_pytorch from peft import PeftModel from transformers import AutoModelForCausalLM, AutoTokenizer @@ -9,8 +10,6 @@ def main(base_model_path, adapter_model_path, output_path): base_model = AutoModelForCausalLM.from_pretrained(base_model_path, trust_remote_code=True) model = PeftModel.from_pretrained(base_model, adapter_model_path) model.eval() - for name, param in model.named_parameters(): - param.requires_grad = False merged_model = model.merge_and_unload() merged_model.train(False) base_model.save_pretrained(output_path, state_dict=merged_model.state_dict()) From 68f2bb0292eb383727e767b37c94b998a9db64c1 Mon Sep 17 00:00:00 2001 From: Yuan0320 Date: Tue, 3 Dec 2024 20:59:04 +0800 Subject: [PATCH 17/17] Add tests for custom sub-adapter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: J. Pablo Muñoz Signed-off-by: Yuan0320 --- tests/finetuning/test_finetuning.sh | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/tests/finetuning/test_finetuning.sh b/tests/finetuning/test_finetuning.sh index 554525a0cc..1dcb6d7b60 100644 --- a/tests/finetuning/test_finetuning.sh +++ b/tests/finetuning/test_finetuning.sh @@ -314,7 +314,7 @@ function validate_sqft_microservice() { # test extracting heuristic sub-adapter validate_merge_or_extract_adapter \ "http://${ip_address}:$finetuning_service_port/v1/finetune/extract_sub_adapter" \ - "extract sub-adapter" \ + "extract heuristic sub-adapter" \ "test-comps-finetuning-server" \ "${FINTUNING_ID}" \ "{\"fine_tuning_job_id\": \"${FINTUNING_ID}\", \"adapter_version\": \"heuristic\"}" @@ -322,11 +322,27 @@ function validate_sqft_microservice() { # test merging the heuristic sub-adapter into the base model validate_merge_or_extract_adapter \ "http://${ip_address}:$finetuning_service_port/v1/finetune/merge_adapter" \ - "adapter merge" \ + "merge heuristic sub-adapter" \ "test-comps-finetuning-server" \ "${FINTUNING_ID}" \ "{\"fine_tuning_job_id\": \"${FINTUNING_ID}\", \"adapter_version\": \"heuristic\"}" + # test extracting sub-adapter with custom configuration + validate_merge_or_extract_adapter \ + "http://${ip_address}:$finetuning_service_port/v1/finetune/extract_sub_adapter" \ + "extract custom sub-adapter" \ + "test-comps-finetuning-server" \ + "${FINTUNING_ID}" \ + "{\"fine_tuning_job_id\": \"${FINTUNING_ID}\", \"adapter_version\": \"custom\", \"custom_config\": [8, 6, 4, 4, 8, 6, 8, 8, 8, 8, 4, 8]}" + + # test merging the custom sub-adapter into the base model + validate_merge_or_extract_adapter \ + "http://${ip_address}:$finetuning_service_port/v1/finetune/merge_adapter" \ + "merge custom sub-adapter" \ + "test-comps-finetuning-server" \ + "${FINTUNING_ID}" \ + "{\"fine_tuning_job_id\": \"${FINTUNING_ID}\", \"adapter_version\": \"custom\"}" + } function stop_docker() {