From 90fe1bb0dbf05dcbad5cc2de2ca33e7361b20755 Mon Sep 17 00:00:00 2001 From: Wonderplex <50866817+Jasonqi146@users.noreply.github.com> Date: Wed, 8 Nov 2023 01:13:05 -0500 Subject: [PATCH] Feature/llama factory llama2 pipeline (#89) * added llama-factory under llm_rl * added sft training bash * added datasets from llama-factory; will delete later * finished llama-2-13b train and inference * fixed minor errors * changed config * added deepspeed config * added more training config to train bash * adding fix for wandb tags and distributed ranks * added fastchat data to replicate training for 2k --- .gitignore | 7 ++- llm_rl/cli_inference-llama-2-13b.sh | 6 +++ llm_rl/deepspeed_config_s2.json | 24 ++++++++++ llm_rl/finetune-llama-2-13b.sh | 46 +++++++++++++++++++ llm_rl/preprocess/create_dummy.py | 15 ++++++ llm_rl/preprocess/create_sft_data.py | 27 +++++++++++ .../preprocess/create_sft_data_from_chat.py | 17 +++++++ llm_rl/requirements.txt | 6 +++ llm_rl/reward_model.sh | 14 +++--- llm_rl/src/llmtuner/extras/template.py | 13 ++++++ .../src/llmtuner/hparams/finetuning_args.py | 13 ++++++ llm_rl/src/llmtuner/tuner/core/utils.py | 8 +++- llm_rl/src/llmtuner/tuner/rm/trainer.py | 2 +- llm_rl/src/llmtuner/tuner/sft/workflow.py | 6 +++ llm_rl/src/llmtuner/tuner/tune.py | 8 +++- 15 files changed, 202 insertions(+), 10 deletions(-) create mode 100644 llm_rl/cli_inference-llama-2-13b.sh create mode 100644 llm_rl/deepspeed_config_s2.json create mode 100644 llm_rl/finetune-llama-2-13b.sh create mode 100644 llm_rl/preprocess/create_dummy.py create mode 100644 llm_rl/preprocess/create_sft_data.py create mode 100644 llm_rl/preprocess/create_sft_data_from_chat.py diff --git a/.gitignore b/.gitignore index 83339037..256458e7 100644 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,7 @@ llm_ft/checkpoints/* llm_ft/*_checkpoints/* !**/dummy_conversation.json !llm_ft/deepspeed_config_s2.json +!llm_rl/data/*.json # Editor .idea @@ -193,4 +194,8 @@ cython_debug/ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ \ No newline at end of file +#.idea/ + +./llm_rl/preprocess/GPT4-4_Redis_Easy_No_Slide + +llm_rl/*cache/ diff --git a/llm_rl/cli_inference-llama-2-13b.sh b/llm_rl/cli_inference-llama-2-13b.sh new file mode 100644 index 00000000..6230bb6c --- /dev/null +++ b/llm_rl/cli_inference-llama-2-13b.sh @@ -0,0 +1,6 @@ +python src/cli_demo.py \ + --model_name_or_path meta-llama/Llama-2-13b-hf \ + --cache_dir ./model_cache \ + --template llama2-sotopia \ + --finetuning_type lora \ + --checkpoint_dir /workspace/sotopia-llm/llm_rl/llama2-13b-sft_cache/checkpoint-35 \ No newline at end of file diff --git a/llm_rl/deepspeed_config_s2.json b/llm_rl/deepspeed_config_s2.json new file mode 100644 index 00000000..b79c37a6 --- /dev/null +++ b/llm_rl/deepspeed_config_s2.json @@ -0,0 +1,24 @@ +{ + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "zero_allow_untested_optimizer": true, + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "initial_scale_power": 16, + "loss_scale_window": 1000, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "zero_optimization": { + "stage": 2, + "allgather_partitions": true, + "allgather_bucket_size": 5e8, + "reduce_scatter": true, + "reduce_bucket_size": 5e8, + "overlap_comm": false, + "contiguous_gradients": true + } + } \ No newline at end of file diff --git a/llm_rl/finetune-llama-2-13b.sh b/llm_rl/finetune-llama-2-13b.sh new file mode 100644 index 00000000..3fd321bb --- /dev/null +++ b/llm_rl/finetune-llama-2-13b.sh @@ -0,0 +1,46 @@ +deepspeed src/train_bash.py \ + --stage sft \ + --model_name_or_path meta-llama/Llama-2-13b-hf \ + --dataset sotopia_easy_sft \ + --dataset_dir ./data/ \ + --val_size 0.1 \ + --cutoff_len 4096 \ + --template llama2-sotopia \ + --wandb_project "llama-factory-sft" \ + --wandb_tags "['llama-2-13b-hf']" \ + --use_fast_tokenizer False \ + --do_train \ + --num_train_epochs 15.0 \ + --per_device_train_batch_size 8 \ + --gradient_accumulation_steps 8 \ + --finetuning_type lora \ + --lora_target q_proj,v_proj \ + --lora_rank 8 \ + --lora_alpha 16 \ + --lora_dropout 0.05 \ + --learning_rate 5e-5 \ + --lr_scheduler_type cosine \ + --weight_decay 0. \ + --warmup_ratio 0.03 \ + --quantization_bit 4 \ + --quantization_type nf4 \ + --double_quantization \ + --flash_attn True \ + --gradient_checkpointing True \ + --bf16 \ + --cache_dir ./model_cache \ + --overwrite_cache \ + --output_dir ./llama2-13b-sft_cache \ + --overwrite_output_dir \ + --logging_steps 1 \ + --evaluation_strategy "steps" \ + --per_device_eval_batch_size 32 \ + --eval_accumulation_steps 32 \ + --save_strategy "epoch" \ + --save_total_limit 5 \ + --use_auth_token True \ + --wandb_token "99caa13ec9552adf0e92e5c30021307ce3cf7fa4" \ + --hf_auth_token "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" \ + --deepspeed ./deepspeed_config_s2.json + + # --dataset alpaca_gpt4_en \ \ No newline at end of file diff --git a/llm_rl/preprocess/create_dummy.py b/llm_rl/preprocess/create_dummy.py new file mode 100644 index 00000000..1f23689d --- /dev/null +++ b/llm_rl/preprocess/create_dummy.py @@ -0,0 +1,15 @@ +import json + +dummy_qa = { + "instruction": "How old is Haofei? ", + "input": "", + "output": "Haofei is one year old. " + } + +res = [] +for i in range(1000): + new_qa = dict(dummy_qa) + res.append(new_qa) + +with open("../data/dummy_convs.json", "w") as f: + json.dump(res, f, indent=4) \ No newline at end of file diff --git a/llm_rl/preprocess/create_sft_data.py b/llm_rl/preprocess/create_sft_data.py new file mode 100644 index 00000000..b50ca7c5 --- /dev/null +++ b/llm_rl/preprocess/create_sft_data.py @@ -0,0 +1,27 @@ +import os +import json + +def join_json_files(directory_path): + # List to hold all the joined data + joined_data = [] + + # Iterating through each file in the directory + for filename in os.listdir(directory_path): + # Constructing full file path + file_path = os.path.join(directory_path, filename) + + # Ensuring it's a file and has a .json extension + if os.path.isfile(file_path) and file_path.endswith('.json'): + with open(file_path, 'r') as file: + # Load the content of the file + data = json.load(file) + new_data = {"instruction": data["prompt"], + "input": "", + "output": data["result"] + } + joined_data.append(new_data) + return joined_data + +joined_data = join_json_files("./GPT4-4_Redis_Easy_No_Slide/") +with open("../data/GPT4-4_Redis_Easy_No_Slide.json", "w") as f: + json.dump(joined_data, f) \ No newline at end of file diff --git a/llm_rl/preprocess/create_sft_data_from_chat.py b/llm_rl/preprocess/create_sft_data_from_chat.py new file mode 100644 index 00000000..3698217b --- /dev/null +++ b/llm_rl/preprocess/create_sft_data_from_chat.py @@ -0,0 +1,17 @@ +import json + +with open("./fastchat-ft-gp4-gpt4-easy-truncated.json", 'r') as f: + data = json.load(f) + +result = [] +for dp in data: + new_dp = {} + convs = dp['conversations'] + new_dp['instruction'] = convs[0]['value'] + new_dp['input'] = "" + new_dp['output'] = convs[1]['value'] + + result.append(new_dp) + +with open("../data/fastchat-ft-gp4-gpt4-easy-truncated.json", 'w') as f: + json.dump(result, f, indent=4) \ No newline at end of file diff --git a/llm_rl/requirements.txt b/llm_rl/requirements.txt index 840d2f2d..03ac2855 100644 --- a/llm_rl/requirements.txt +++ b/llm_rl/requirements.txt @@ -17,4 +17,10 @@ uvicorn pydantic fastapi sse-starlette +packaging matplotlib +py-cpuinfo +deepspeed +bitsandbytes>=0.39.0 +flash-attn +wandb \ No newline at end of file diff --git a/llm_rl/reward_model.sh b/llm_rl/reward_model.sh index 3068fb43..fa5424df 100644 --- a/llm_rl/reward_model.sh +++ b/llm_rl/reward_model.sh @@ -1,16 +1,15 @@ -python src/train_bash.py \ +CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \ --stage rm \ - --model_name_or_path meta-llama/Llama-2-13b \ + --model_name_or_path meta-llama/Llama-2-13b-hf \ --do_train \ --dataset comparison_gpt4_en \ --template default \ --finetuning_type lora \ --lora_target q_proj,v_proj \ --resume_lora_training False \ - --checkpoint_dir ./llama-2-13b-rm \ --output_dir ./llama-2-13b-rm \ - --per_device_train_batch_size 2 \ - --gradient_accumulation_steps 4 \ + --per_device_train_batch_size 8 \ + --gradient_accumulation_steps 8 \ --lr_scheduler_type cosine \ --logging_steps 10 \ --save_steps 1000 \ @@ -18,4 +17,7 @@ python src/train_bash.py \ --num_train_epochs 1.0 \ --plot_loss \ --fp16 \ - --hf_auth_token "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" \ No newline at end of file + --use_auth_token True \ + --wandb_token "99caa13ec9552adf0e92e5c30021307ce3cf7fa4" \ + --hf_auth_token "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" \ + --deepspeed ./deepspeed_config_s2.json diff --git a/llm_rl/src/llmtuner/extras/template.py b/llm_rl/src/llmtuner/extras/template.py index 401750ce..508c2ce0 100644 --- a/llm_rl/src/llmtuner/extras/template.py +++ b/llm_rl/src/llmtuner/extras/template.py @@ -499,6 +499,19 @@ def get_template_and_fix_tokenizer( sep=[] ) +register_template( + name="llama2-sotopia", + prefix=[ + "{{system}}" + ], + prompt=[ + "[INST] {{query}} [/INST]" + ], + system=( + "" + ), + sep=[] +) r""" Supports: https://huggingface.co/ziqingyang/chinese-alpaca-2-7b diff --git a/llm_rl/src/llmtuner/hparams/finetuning_args.py b/llm_rl/src/llmtuner/hparams/finetuning_args.py index d5ef323d..d8f2d299 100644 --- a/llm_rl/src/llmtuner/hparams/finetuning_args.py +++ b/llm_rl/src/llmtuner/hparams/finetuning_args.py @@ -1,3 +1,4 @@ +from typing import List import json from typing import Literal, Optional from dataclasses import asdict, dataclass, field @@ -83,6 +84,18 @@ class FinetuningArguments: default=0, metadata={"help": "The alpha parameter to control the noise magnitude in NEFTune."} ) + wandb_token: Optional[str] = field( + default=None, + metadata={"help": "The login api token for wandb."} + ) + wandb_project: Optional[str] = field( + default=None, + metadata={"help": "The project name for the current wandb log."} + ) + wandb_tags: Optional[List[str]] = field( + default=None, + metadata={"help": "The tag for the current wandb run."} + ) def __post_init__(self): if isinstance(self.lora_target, str): # support custom target modules/layers of LoRA diff --git a/llm_rl/src/llmtuner/tuner/core/utils.py b/llm_rl/src/llmtuner/tuner/core/utils.py index d9a1aac9..03043e20 100644 --- a/llm_rl/src/llmtuner/tuner/core/utils.py +++ b/llm_rl/src/llmtuner/tuner/core/utils.py @@ -9,10 +9,16 @@ from transformers.modeling_utils import PreTrainedModel from llmtuner.hparams import FinetuningArguments +import torch.distributed as dist +import os logger = get_logger(__name__) - +def is_first_node(): + world_rank = dist.get_rank() if torch.distributed.is_initialized() else 0 + local_rank = int(os.environ['LOCAL_RANK']) if 'LOCAL_RANK' in os.environ else 0 + return world_rank == local_rank == 0 + def find_all_linear_modules( model: "PreTrainedModel", quantization_bit: Optional[int] = None, diff --git a/llm_rl/src/llmtuner/tuner/rm/trainer.py b/llm_rl/src/llmtuner/tuner/rm/trainer.py index 80502937..94549f18 100644 --- a/llm_rl/src/llmtuner/tuner/rm/trainer.py +++ b/llm_rl/src/llmtuner/tuner/rm/trainer.py @@ -38,7 +38,7 @@ def compute_loss( See: https://github.com/huggingface/transformers/blob/v4.30.2/src/transformers/trainer.py#L3509 """ # Compute rewards - _, _, values = model(**inputs, output_hidden_states=True, return_dict=True) + _, _, values = model(**inputs, output_hidden_states=True, return_dict=True) # (lm_logits, loss, value) if values.size(0) != inputs["input_ids"].size(0): # adapt to chatglm2 values = torch.transpose(values, 0, 1) diff --git a/llm_rl/src/llmtuner/tuner/sft/workflow.py b/llm_rl/src/llmtuner/tuner/sft/workflow.py index 8d53605d..171fc5da 100644 --- a/llm_rl/src/llmtuner/tuner/sft/workflow.py +++ b/llm_rl/src/llmtuner/tuner/sft/workflow.py @@ -10,6 +10,7 @@ from llmtuner.tuner.core import load_model_and_tokenizer from llmtuner.tuner.sft.metric import ComputeMetrics from llmtuner.tuner.sft.trainer import CustomSeq2SeqTrainer +from llmtuner.tuner.core.utils import is_first_node if TYPE_CHECKING: from transformers import TrainerCallback @@ -27,6 +28,9 @@ def run_sft( dataset = get_dataset(model_args, data_args) model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train, stage="sft") dataset = preprocess_dataset(dataset, tokenizer, data_args, training_args, stage="sft") + + if training_args.gradient_checkpointing: + model.enable_input_require_grads() if training_args.predict_with_generate: tokenizer.padding_side = "left" # use left-padding in generation @@ -44,6 +48,8 @@ def run_sft( generation_num_beams=data_args.eval_num_beams or training_args.generation_num_beams )) training_args = Seq2SeqTrainingArguments(**training_args_dict) + if is_first_node(): + training_args.report_to = ["wandb"] # Initialize our Trainer trainer = CustomSeq2SeqTrainer( diff --git a/llm_rl/src/llmtuner/tuner/tune.py b/llm_rl/src/llmtuner/tuner/tune.py index 4eb7f78f..054a6b1c 100644 --- a/llm_rl/src/llmtuner/tuner/tune.py +++ b/llm_rl/src/llmtuner/tuner/tune.py @@ -9,6 +9,9 @@ from llmtuner.tuner.ppo import run_ppo from llmtuner.tuner.dpo import run_dpo +from llmtuner.tuner.core.utils import is_first_node +import wandb + if TYPE_CHECKING: from transformers import TrainerCallback @@ -19,7 +22,10 @@ def run_exp(args: Optional[Dict[str, Any]] = None, callbacks: Optional[List["TrainerCallback"]] = None): model_args, data_args, training_args, finetuning_args, generating_args = get_train_args(args) callbacks = [LogCallback()] if callbacks is None else callbacks - + if is_first_node(): + wandb.login(key=finetuning_args.wandb_token) + wandb.init(project=finetuning_args.wandb_project, tags=[*finetuning_args.wandb_tags] if finetuning_args.wandb_tags else None) + if finetuning_args.stage == "pt": run_pt(model_args, data_args, training_args, finetuning_args, callbacks) elif finetuning_args.stage == "sft":