From 90fe1bb0dbf05dcbad5cc2de2ca33e7361b20755 Mon Sep 17 00:00:00 2001
From: Wonderplex <50866817+Jasonqi146@users.noreply.github.com>
Date: Wed, 8 Nov 2023 01:13:05 -0500
Subject: [PATCH] Feature/llama factory llama2 pipeline (#89)

* added llama-factory under llm_rl

* added sft training bash

* added datasets from llama-factory; will delete later

* finished llama-2-13b train and inference

* fixed minor errors

* changed config

* added deepspeed config

* added more training config to train bash

* adding fix for wandb tags and distributed ranks

* added fastchat data to replicate training for 2k
---
 .gitignore                                    |  7 ++-
 llm_rl/cli_inference-llama-2-13b.sh           |  6 +++
 llm_rl/deepspeed_config_s2.json               | 24 ++++++++++
 llm_rl/finetune-llama-2-13b.sh                | 46 +++++++++++++++++++
 llm_rl/preprocess/create_dummy.py             | 15 ++++++
 llm_rl/preprocess/create_sft_data.py          | 27 +++++++++++
 .../preprocess/create_sft_data_from_chat.py   | 17 +++++++
 llm_rl/requirements.txt                       |  6 +++
 llm_rl/reward_model.sh                        | 14 +++---
 llm_rl/src/llmtuner/extras/template.py        | 13 ++++++
 .../src/llmtuner/hparams/finetuning_args.py   | 13 ++++++
 llm_rl/src/llmtuner/tuner/core/utils.py       |  8 +++-
 llm_rl/src/llmtuner/tuner/rm/trainer.py       |  2 +-
 llm_rl/src/llmtuner/tuner/sft/workflow.py     |  6 +++
 llm_rl/src/llmtuner/tuner/tune.py             |  8 +++-
 15 files changed, 202 insertions(+), 10 deletions(-)
 create mode 100644 llm_rl/cli_inference-llama-2-13b.sh
 create mode 100644 llm_rl/deepspeed_config_s2.json
 create mode 100644 llm_rl/finetune-llama-2-13b.sh
 create mode 100644 llm_rl/preprocess/create_dummy.py
 create mode 100644 llm_rl/preprocess/create_sft_data.py
 create mode 100644 llm_rl/preprocess/create_sft_data_from_chat.py

diff --git a/.gitignore b/.gitignore
index 83339037..256458e7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,6 +20,7 @@ llm_ft/checkpoints/*
 llm_ft/*_checkpoints/*
 !**/dummy_conversation.json
 !llm_ft/deepspeed_config_s2.json
+!llm_rl/data/*.json
 
 # Editor
 .idea
@@ -193,4 +194,8 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
\ No newline at end of file
+#.idea/
+
+./llm_rl/preprocess/GPT4-4_Redis_Easy_No_Slide
+
+llm_rl/*cache/
diff --git a/llm_rl/cli_inference-llama-2-13b.sh b/llm_rl/cli_inference-llama-2-13b.sh
new file mode 100644
index 00000000..6230bb6c
--- /dev/null
+++ b/llm_rl/cli_inference-llama-2-13b.sh
@@ -0,0 +1,6 @@
+python src/cli_demo.py \
+    --model_name_or_path meta-llama/Llama-2-13b-hf \
+    --cache_dir ./model_cache \
+    --template llama2-sotopia \
+    --finetuning_type lora \
+    --checkpoint_dir /workspace/sotopia-llm/llm_rl/llama2-13b-sft_cache/checkpoint-35
\ No newline at end of file
diff --git a/llm_rl/deepspeed_config_s2.json b/llm_rl/deepspeed_config_s2.json
new file mode 100644
index 00000000..b79c37a6
--- /dev/null
+++ b/llm_rl/deepspeed_config_s2.json
@@ -0,0 +1,24 @@
+{
+    "train_batch_size": "auto",
+    "train_micro_batch_size_per_gpu": "auto",
+    "gradient_accumulation_steps": "auto",
+    "gradient_clipping": "auto",
+    "zero_allow_untested_optimizer": true,
+    "fp16": {
+      "enabled": "auto",
+      "loss_scale": 0,
+      "initial_scale_power": 16,
+      "loss_scale_window": 1000,
+      "hysteresis": 2,
+      "min_loss_scale": 1
+    },  
+    "zero_optimization": {
+      "stage": 2,
+      "allgather_partitions": true,
+      "allgather_bucket_size": 5e8,
+      "reduce_scatter": true,
+      "reduce_bucket_size": 5e8,
+      "overlap_comm": false,
+      "contiguous_gradients": true
+    }
+  }
\ No newline at end of file
diff --git a/llm_rl/finetune-llama-2-13b.sh b/llm_rl/finetune-llama-2-13b.sh
new file mode 100644
index 00000000..3fd321bb
--- /dev/null
+++ b/llm_rl/finetune-llama-2-13b.sh
@@ -0,0 +1,46 @@
+deepspeed src/train_bash.py \
+    --stage sft \
+    --model_name_or_path meta-llama/Llama-2-13b-hf \
+    --dataset sotopia_easy_sft \
+    --dataset_dir ./data/ \
+    --val_size 0.1 \
+    --cutoff_len 4096 \
+    --template llama2-sotopia \
+    --wandb_project "llama-factory-sft" \
+    --wandb_tags "['llama-2-13b-hf']" \
+    --use_fast_tokenizer False \
+    --do_train \
+    --num_train_epochs 15.0 \
+    --per_device_train_batch_size 8 \
+    --gradient_accumulation_steps 8 \
+    --finetuning_type lora \
+    --lora_target q_proj,v_proj \
+    --lora_rank 8 \
+    --lora_alpha 16 \
+    --lora_dropout 0.05 \
+    --learning_rate 5e-5 \
+    --lr_scheduler_type cosine \
+    --weight_decay 0. \
+    --warmup_ratio 0.03 \
+    --quantization_bit 4 \
+    --quantization_type nf4 \
+    --double_quantization \
+    --flash_attn True \
+    --gradient_checkpointing True \
+    --bf16 \
+    --cache_dir ./model_cache \
+    --overwrite_cache \
+    --output_dir ./llama2-13b-sft_cache \
+    --overwrite_output_dir \
+    --logging_steps 1 \
+    --evaluation_strategy "steps" \
+    --per_device_eval_batch_size 32 \
+    --eval_accumulation_steps 32 \
+    --save_strategy "epoch" \
+    --save_total_limit 5 \
+    --use_auth_token True \
+    --wandb_token "99caa13ec9552adf0e92e5c30021307ce3cf7fa4" \
+    --hf_auth_token "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" \
+    --deepspeed ./deepspeed_config_s2.json
+
+    # --dataset alpaca_gpt4_en \
\ No newline at end of file
diff --git a/llm_rl/preprocess/create_dummy.py b/llm_rl/preprocess/create_dummy.py
new file mode 100644
index 00000000..1f23689d
--- /dev/null
+++ b/llm_rl/preprocess/create_dummy.py
@@ -0,0 +1,15 @@
+import json
+
+dummy_qa =   {
+    "instruction": "How old is Haofei? ",
+    "input": "",
+    "output": "Haofei is one year old. "
+  }
+
+res = []
+for i in range(1000):
+    new_qa = dict(dummy_qa)
+    res.append(new_qa)
+
+with open("../data/dummy_convs.json", "w") as f:
+    json.dump(res, f, indent=4)
\ No newline at end of file
diff --git a/llm_rl/preprocess/create_sft_data.py b/llm_rl/preprocess/create_sft_data.py
new file mode 100644
index 00000000..b50ca7c5
--- /dev/null
+++ b/llm_rl/preprocess/create_sft_data.py
@@ -0,0 +1,27 @@
+import os
+import json
+
+def join_json_files(directory_path):
+    # List to hold all the joined data
+    joined_data = []
+
+    # Iterating through each file in the directory
+    for filename in os.listdir(directory_path):
+        # Constructing full file path
+        file_path = os.path.join(directory_path, filename)
+
+        # Ensuring it's a file and has a .json extension
+        if os.path.isfile(file_path) and file_path.endswith('.json'):
+            with open(file_path, 'r') as file:
+                # Load the content of the file
+                data = json.load(file)
+                new_data =   {"instruction": data["prompt"],
+                              "input": "",
+                              "output": data["result"]
+                              }
+                joined_data.append(new_data)
+    return joined_data
+
+joined_data = join_json_files("./GPT4-4_Redis_Easy_No_Slide/")
+with open("../data/GPT4-4_Redis_Easy_No_Slide.json", "w") as f:
+    json.dump(joined_data, f)
\ No newline at end of file
diff --git a/llm_rl/preprocess/create_sft_data_from_chat.py b/llm_rl/preprocess/create_sft_data_from_chat.py
new file mode 100644
index 00000000..3698217b
--- /dev/null
+++ b/llm_rl/preprocess/create_sft_data_from_chat.py
@@ -0,0 +1,17 @@
+import json
+
+with open("./fastchat-ft-gp4-gpt4-easy-truncated.json", 'r') as f:
+    data = json.load(f)
+
+result = []
+for dp in data:
+    new_dp = {}
+    convs = dp['conversations']
+    new_dp['instruction'] = convs[0]['value']
+    new_dp['input'] = ""
+    new_dp['output'] = convs[1]['value']
+    
+    result.append(new_dp)
+
+with open("../data/fastchat-ft-gp4-gpt4-easy-truncated.json", 'w') as f:
+    json.dump(result, f, indent=4)
\ No newline at end of file
diff --git a/llm_rl/requirements.txt b/llm_rl/requirements.txt
index 840d2f2d..03ac2855 100644
--- a/llm_rl/requirements.txt
+++ b/llm_rl/requirements.txt
@@ -17,4 +17,10 @@ uvicorn
 pydantic
 fastapi
 sse-starlette
+packaging
 matplotlib
+py-cpuinfo
+deepspeed
+bitsandbytes>=0.39.0
+flash-attn
+wandb
\ No newline at end of file
diff --git a/llm_rl/reward_model.sh b/llm_rl/reward_model.sh
index 3068fb43..fa5424df 100644
--- a/llm_rl/reward_model.sh
+++ b/llm_rl/reward_model.sh
@@ -1,16 +1,15 @@
-python src/train_bash.py \
+CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
     --stage rm \
-    --model_name_or_path meta-llama/Llama-2-13b \
+    --model_name_or_path meta-llama/Llama-2-13b-hf \
     --do_train \
     --dataset comparison_gpt4_en \
     --template default \
     --finetuning_type lora \
     --lora_target q_proj,v_proj \
     --resume_lora_training False \
-    --checkpoint_dir ./llama-2-13b-rm \
     --output_dir ./llama-2-13b-rm \
-    --per_device_train_batch_size 2 \
-    --gradient_accumulation_steps 4 \
+    --per_device_train_batch_size 8 \
+    --gradient_accumulation_steps 8 \
     --lr_scheduler_type cosine \
     --logging_steps 10 \
     --save_steps 1000 \
@@ -18,4 +17,7 @@ python src/train_bash.py \
     --num_train_epochs 1.0 \
     --plot_loss \
     --fp16 \
-    --hf_auth_token "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"
\ No newline at end of file
+    --use_auth_token True \
+    --wandb_token "99caa13ec9552adf0e92e5c30021307ce3cf7fa4" \
+    --hf_auth_token "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" \
+    --deepspeed ./deepspeed_config_s2.json 
diff --git a/llm_rl/src/llmtuner/extras/template.py b/llm_rl/src/llmtuner/extras/template.py
index 401750ce..508c2ce0 100644
--- a/llm_rl/src/llmtuner/extras/template.py
+++ b/llm_rl/src/llmtuner/extras/template.py
@@ -499,6 +499,19 @@ def get_template_and_fix_tokenizer(
     sep=[]
 )
 
+register_template(
+    name="llama2-sotopia",
+    prefix=[
+        "{{system}}"
+    ],
+    prompt=[
+        "[INST] {{query}} [/INST]"
+    ],
+    system=(
+        ""
+    ),
+    sep=[]
+)
 
 r"""
 Supports: https://huggingface.co/ziqingyang/chinese-alpaca-2-7b
diff --git a/llm_rl/src/llmtuner/hparams/finetuning_args.py b/llm_rl/src/llmtuner/hparams/finetuning_args.py
index d5ef323d..d8f2d299 100644
--- a/llm_rl/src/llmtuner/hparams/finetuning_args.py
+++ b/llm_rl/src/llmtuner/hparams/finetuning_args.py
@@ -1,3 +1,4 @@
+from typing import List
 import json
 from typing import Literal, Optional
 from dataclasses import asdict, dataclass, field
@@ -83,6 +84,18 @@ class FinetuningArguments:
         default=0,
         metadata={"help": "The alpha parameter to control the noise magnitude in NEFTune."}
     )
+    wandb_token: Optional[str] = field(
+        default=None,
+        metadata={"help": "The login api token for wandb."}
+    )
+    wandb_project: Optional[str] = field(
+        default=None,
+        metadata={"help": "The project name for the current wandb log."}
+    )
+    wandb_tags: Optional[List[str]] = field(
+        default=None,
+        metadata={"help": "The tag for the current wandb run."}
+    )
 
     def __post_init__(self):
         if isinstance(self.lora_target, str): # support custom target modules/layers of LoRA
diff --git a/llm_rl/src/llmtuner/tuner/core/utils.py b/llm_rl/src/llmtuner/tuner/core/utils.py
index d9a1aac9..03043e20 100644
--- a/llm_rl/src/llmtuner/tuner/core/utils.py
+++ b/llm_rl/src/llmtuner/tuner/core/utils.py
@@ -9,10 +9,16 @@
     from transformers.modeling_utils import PreTrainedModel
     from llmtuner.hparams import FinetuningArguments
 
+import torch.distributed as dist
+import os
 
 logger = get_logger(__name__)
 
-
+def is_first_node():
+    world_rank = dist.get_rank() if torch.distributed.is_initialized() else 0
+    local_rank = int(os.environ['LOCAL_RANK']) if 'LOCAL_RANK' in os.environ else 0
+    return world_rank == local_rank == 0
+  
 def find_all_linear_modules(
     model: "PreTrainedModel",
     quantization_bit: Optional[int] = None,
diff --git a/llm_rl/src/llmtuner/tuner/rm/trainer.py b/llm_rl/src/llmtuner/tuner/rm/trainer.py
index 80502937..94549f18 100644
--- a/llm_rl/src/llmtuner/tuner/rm/trainer.py
+++ b/llm_rl/src/llmtuner/tuner/rm/trainer.py
@@ -38,7 +38,7 @@ def compute_loss(
         See: https://github.com/huggingface/transformers/blob/v4.30.2/src/transformers/trainer.py#L3509
         """
         # Compute rewards
-        _, _, values = model(**inputs, output_hidden_states=True, return_dict=True)
+        _, _, values = model(**inputs, output_hidden_states=True, return_dict=True) # (lm_logits, loss, value)
         if values.size(0) != inputs["input_ids"].size(0): # adapt to chatglm2
             values = torch.transpose(values, 0, 1)
 
diff --git a/llm_rl/src/llmtuner/tuner/sft/workflow.py b/llm_rl/src/llmtuner/tuner/sft/workflow.py
index 8d53605d..171fc5da 100644
--- a/llm_rl/src/llmtuner/tuner/sft/workflow.py
+++ b/llm_rl/src/llmtuner/tuner/sft/workflow.py
@@ -10,6 +10,7 @@
 from llmtuner.tuner.core import load_model_and_tokenizer
 from llmtuner.tuner.sft.metric import ComputeMetrics
 from llmtuner.tuner.sft.trainer import CustomSeq2SeqTrainer
+from llmtuner.tuner.core.utils import is_first_node
 
 if TYPE_CHECKING:
     from transformers import TrainerCallback
@@ -27,6 +28,9 @@ def run_sft(
     dataset = get_dataset(model_args, data_args)
     model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train, stage="sft")
     dataset = preprocess_dataset(dataset, tokenizer, data_args, training_args, stage="sft")
+    
+    if training_args.gradient_checkpointing:
+        model.enable_input_require_grads()
 
     if training_args.predict_with_generate:
         tokenizer.padding_side = "left" # use left-padding in generation
@@ -44,6 +48,8 @@ def run_sft(
         generation_num_beams=data_args.eval_num_beams or training_args.generation_num_beams
     ))
     training_args = Seq2SeqTrainingArguments(**training_args_dict)
+    if is_first_node():
+        training_args.report_to = ["wandb"]
 
     # Initialize our Trainer
     trainer = CustomSeq2SeqTrainer(
diff --git a/llm_rl/src/llmtuner/tuner/tune.py b/llm_rl/src/llmtuner/tuner/tune.py
index 4eb7f78f..054a6b1c 100644
--- a/llm_rl/src/llmtuner/tuner/tune.py
+++ b/llm_rl/src/llmtuner/tuner/tune.py
@@ -9,6 +9,9 @@
 from llmtuner.tuner.ppo import run_ppo
 from llmtuner.tuner.dpo import run_dpo
 
+from llmtuner.tuner.core.utils import is_first_node
+import wandb
+
 if TYPE_CHECKING:
     from transformers import TrainerCallback
 
@@ -19,7 +22,10 @@
 def run_exp(args: Optional[Dict[str, Any]] = None, callbacks: Optional[List["TrainerCallback"]] = None):
     model_args, data_args, training_args, finetuning_args, generating_args = get_train_args(args)
     callbacks = [LogCallback()] if callbacks is None else callbacks
-
+    if is_first_node():
+        wandb.login(key=finetuning_args.wandb_token)
+        wandb.init(project=finetuning_args.wandb_project, tags=[*finetuning_args.wandb_tags] if finetuning_args.wandb_tags else None)
+    
     if finetuning_args.stage == "pt":
         run_pt(model_args, data_args, training_args, finetuning_args, callbacks)
     elif finetuning_args.stage == "sft":