Skip to content

Commit

Permalink
Feature/llama factory llama2 pipeline (#89)
Browse files Browse the repository at this point in the history
* added llama-factory under llm_rl

* added sft training bash

* added datasets from llama-factory; will delete later

* finished llama-2-13b train and inference

* fixed minor errors

* changed config

* added deepspeed config

* added more training config to train bash

* adding fix for wandb tags and distributed ranks

* added fastchat data to replicate training for 2k

(cherry picked from commit c769638)
  • Loading branch information
Jasonqi146 authored and lwaekfjlk committed Nov 8, 2023
1 parent a616e52 commit 95bc7ba
Show file tree
Hide file tree
Showing 15 changed files with 202 additions and 10 deletions.
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ llm_ft/checkpoints/*
llm_ft/*_checkpoints/*
!**/dummy_conversation.json
!llm_ft/deepspeed_config_s2.json
!llm_rl/data/*.json

# Editor
.idea
Expand Down Expand Up @@ -193,4 +194,8 @@ cython_debug/
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
#.idea/

./llm_rl/preprocess/GPT4-4_Redis_Easy_No_Slide

llm_rl/*cache/
6 changes: 6 additions & 0 deletions llm_rl/cli_inference-llama-2-13b.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
python src/cli_demo.py \
--model_name_or_path meta-llama/Llama-2-13b-hf \
--cache_dir ./model_cache \
--template llama2-sotopia \
--finetuning_type lora \
--checkpoint_dir /workspace/sotopia-llm/llm_rl/llama2-13b-sft_cache/checkpoint-35
24 changes: 24 additions & 0 deletions llm_rl/deepspeed_config_s2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"train_batch_size": "auto",
"train_micro_batch_size_per_gpu": "auto",
"gradient_accumulation_steps": "auto",
"gradient_clipping": "auto",
"zero_allow_untested_optimizer": true,
"fp16": {
"enabled": "auto",
"loss_scale": 0,
"initial_scale_power": 16,
"loss_scale_window": 1000,
"hysteresis": 2,
"min_loss_scale": 1
},
"zero_optimization": {
"stage": 2,
"allgather_partitions": true,
"allgather_bucket_size": 5e8,
"reduce_scatter": true,
"reduce_bucket_size": 5e8,
"overlap_comm": false,
"contiguous_gradients": true
}
}
46 changes: 46 additions & 0 deletions llm_rl/finetune-llama-2-13b.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
deepspeed src/train_bash.py \
--stage sft \
--model_name_or_path meta-llama/Llama-2-13b-hf \
--dataset sotopia_easy_sft \
--dataset_dir ./data/ \
--val_size 0.1 \
--cutoff_len 4096 \
--template llama2-sotopia \
--wandb_project "llama-factory-sft" \
--wandb_tags "['llama-2-13b-hf']" \
--use_fast_tokenizer False \
--do_train \
--num_train_epochs 15.0 \
--per_device_train_batch_size 8 \
--gradient_accumulation_steps 8 \
--finetuning_type lora \
--lora_target q_proj,v_proj \
--lora_rank 8 \
--lora_alpha 16 \
--lora_dropout 0.05 \
--learning_rate 5e-5 \
--lr_scheduler_type cosine \
--weight_decay 0. \
--warmup_ratio 0.03 \
--quantization_bit 4 \
--quantization_type nf4 \
--double_quantization \
--flash_attn True \
--gradient_checkpointing True \
--bf16 \
--cache_dir ./model_cache \
--overwrite_cache \
--output_dir ./llama2-13b-sft_cache \
--overwrite_output_dir \
--logging_steps 1 \
--evaluation_strategy "steps" \
--per_device_eval_batch_size 32 \
--eval_accumulation_steps 32 \
--save_strategy "epoch" \
--save_total_limit 5 \
--use_auth_token True \
--wandb_token "99caa13ec9552adf0e92e5c30021307ce3cf7fa4" \
--hf_auth_token "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" \
--deepspeed ./deepspeed_config_s2.json

# --dataset alpaca_gpt4_en \
15 changes: 15 additions & 0 deletions llm_rl/preprocess/create_dummy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import json

dummy_qa = {
"instruction": "How old is Haofei? ",
"input": "",
"output": "Haofei is one year old. "
}

res = []
for i in range(1000):
new_qa = dict(dummy_qa)
res.append(new_qa)

with open("../data/dummy_convs.json", "w") as f:
json.dump(res, f, indent=4)
27 changes: 27 additions & 0 deletions llm_rl/preprocess/create_sft_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import os
import json

def join_json_files(directory_path):
# List to hold all the joined data
joined_data = []

# Iterating through each file in the directory
for filename in os.listdir(directory_path):
# Constructing full file path
file_path = os.path.join(directory_path, filename)

# Ensuring it's a file and has a .json extension
if os.path.isfile(file_path) and file_path.endswith('.json'):
with open(file_path, 'r') as file:
# Load the content of the file
data = json.load(file)
new_data = {"instruction": data["prompt"],
"input": "",
"output": data["result"]
}
joined_data.append(new_data)
return joined_data

joined_data = join_json_files("./GPT4-4_Redis_Easy_No_Slide/")
with open("../data/GPT4-4_Redis_Easy_No_Slide.json", "w") as f:
json.dump(joined_data, f)
17 changes: 17 additions & 0 deletions llm_rl/preprocess/create_sft_data_from_chat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import json

with open("./fastchat-ft-gp4-gpt4-easy-truncated.json", 'r') as f:
data = json.load(f)

result = []
for dp in data:
new_dp = {}
convs = dp['conversations']
new_dp['instruction'] = convs[0]['value']
new_dp['input'] = ""
new_dp['output'] = convs[1]['value']

result.append(new_dp)

with open("../data/fastchat-ft-gp4-gpt4-easy-truncated.json", 'w') as f:
json.dump(result, f, indent=4)
6 changes: 6 additions & 0 deletions llm_rl/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,10 @@ uvicorn
pydantic
fastapi
sse-starlette
packaging
matplotlib
py-cpuinfo
deepspeed
bitsandbytes>=0.39.0
flash-attn
wandb
14 changes: 8 additions & 6 deletions llm_rl/reward_model.sh
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
python src/train_bash.py \
CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
--stage rm \
--model_name_or_path meta-llama/Llama-2-13b \
--model_name_or_path meta-llama/Llama-2-13b-hf \
--do_train \
--dataset comparison_gpt4_en \
--template default \
--finetuning_type lora \
--lora_target q_proj,v_proj \
--resume_lora_training False \
--checkpoint_dir ./llama-2-13b-rm \
--output_dir ./llama-2-13b-rm \
--per_device_train_batch_size 2 \
--gradient_accumulation_steps 4 \
--per_device_train_batch_size 8 \
--gradient_accumulation_steps 8 \
--lr_scheduler_type cosine \
--logging_steps 10 \
--save_steps 1000 \
--learning_rate 1e-6 \
--num_train_epochs 1.0 \
--plot_loss \
--fp16 \
--hf_auth_token "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"
--use_auth_token True \
--wandb_token "99caa13ec9552adf0e92e5c30021307ce3cf7fa4" \
--hf_auth_token "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" \
--deepspeed ./deepspeed_config_s2.json
13 changes: 13 additions & 0 deletions llm_rl/src/llmtuner/extras/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,19 @@ def get_template_and_fix_tokenizer(
sep=[]
)

register_template(
name="llama2-sotopia",
prefix=[
"{{system}}"
],
prompt=[
"[INST] {{query}} [/INST]"
],
system=(
""
),
sep=[]
)

r"""
Supports: https://huggingface.co/ziqingyang/chinese-alpaca-2-7b
Expand Down
13 changes: 13 additions & 0 deletions llm_rl/src/llmtuner/hparams/finetuning_args.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from typing import List
import json
from typing import Literal, Optional
from dataclasses import asdict, dataclass, field
Expand Down Expand Up @@ -83,6 +84,18 @@ class FinetuningArguments:
default=0,
metadata={"help": "The alpha parameter to control the noise magnitude in NEFTune."}
)
wandb_token: Optional[str] = field(
default=None,
metadata={"help": "The login api token for wandb."}
)
wandb_project: Optional[str] = field(
default=None,
metadata={"help": "The project name for the current wandb log."}
)
wandb_tags: Optional[List[str]] = field(
default=None,
metadata={"help": "The tag for the current wandb run."}
)

def __post_init__(self):
if isinstance(self.lora_target, str): # support custom target modules/layers of LoRA
Expand Down
8 changes: 7 additions & 1 deletion llm_rl/src/llmtuner/tuner/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,16 @@
from transformers.modeling_utils import PreTrainedModel
from llmtuner.hparams import FinetuningArguments

import torch.distributed as dist
import os

logger = get_logger(__name__)


def is_first_node():
world_rank = dist.get_rank() if torch.distributed.is_initialized() else 0
local_rank = int(os.environ['LOCAL_RANK']) if 'LOCAL_RANK' in os.environ else 0
return world_rank == local_rank == 0

def find_all_linear_modules(
model: "PreTrainedModel",
quantization_bit: Optional[int] = None,
Expand Down
2 changes: 1 addition & 1 deletion llm_rl/src/llmtuner/tuner/rm/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def compute_loss(
See: https://github.com/huggingface/transformers/blob/v4.30.2/src/transformers/trainer.py#L3509
"""
# Compute rewards
_, _, values = model(**inputs, output_hidden_states=True, return_dict=True)
_, _, values = model(**inputs, output_hidden_states=True, return_dict=True) # (lm_logits, loss, value)
if values.size(0) != inputs["input_ids"].size(0): # adapt to chatglm2
values = torch.transpose(values, 0, 1)

Expand Down
6 changes: 6 additions & 0 deletions llm_rl/src/llmtuner/tuner/sft/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from llmtuner.tuner.core import load_model_and_tokenizer
from llmtuner.tuner.sft.metric import ComputeMetrics
from llmtuner.tuner.sft.trainer import CustomSeq2SeqTrainer
from llmtuner.tuner.core.utils import is_first_node

if TYPE_CHECKING:
from transformers import TrainerCallback
Expand All @@ -27,6 +28,9 @@ def run_sft(
dataset = get_dataset(model_args, data_args)
model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train, stage="sft")
dataset = preprocess_dataset(dataset, tokenizer, data_args, training_args, stage="sft")

if training_args.gradient_checkpointing:
model.enable_input_require_grads()

if training_args.predict_with_generate:
tokenizer.padding_side = "left" # use left-padding in generation
Expand All @@ -44,6 +48,8 @@ def run_sft(
generation_num_beams=data_args.eval_num_beams or training_args.generation_num_beams
))
training_args = Seq2SeqTrainingArguments(**training_args_dict)
if is_first_node():
training_args.report_to = ["wandb"]

# Initialize our Trainer
trainer = CustomSeq2SeqTrainer(
Expand Down
8 changes: 7 additions & 1 deletion llm_rl/src/llmtuner/tuner/tune.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
from llmtuner.tuner.ppo import run_ppo
from llmtuner.tuner.dpo import run_dpo

from llmtuner.tuner.core.utils import is_first_node
import wandb

if TYPE_CHECKING:
from transformers import TrainerCallback

Expand All @@ -19,7 +22,10 @@
def run_exp(args: Optional[Dict[str, Any]] = None, callbacks: Optional[List["TrainerCallback"]] = None):
model_args, data_args, training_args, finetuning_args, generating_args = get_train_args(args)
callbacks = [LogCallback()] if callbacks is None else callbacks

if is_first_node():
wandb.login(key=finetuning_args.wandb_token)
wandb.init(project=finetuning_args.wandb_project, tags=[*finetuning_args.wandb_tags] if finetuning_args.wandb_tags else None)

if finetuning_args.stage == "pt":
run_pt(model_args, data_args, training_args, finetuning_args, callbacks)
elif finetuning_args.stage == "sft":
Expand Down

0 comments on commit 95bc7ba

Please sign in to comment.