add hymba scripts and add loss curve

OptimalScale · Dec 2, 2024 · 04fc127 · 04fc127
1 parent 7c4d19d
commit 04fc127
Show file tree

Hide file tree

Showing 3 changed files with 88 additions and 0 deletions.
diff --git a/assets/Hymba_loss.png b/assets/Hymba_loss.png
diff --git a/experimental/Hymba/README.md b/experimental/Hymba/README.md
@@ -33,6 +33,11 @@ For training the Hymba model, please add below arguments to the `run_finetune.sh
 --bf16
 ```
 
+Demo script: [run_finetune_hymba.sh](./run_finetune_hymba.sh)
+
 Recommend on the A100, H100, A40 GPUs.
 
 
+## Training Loss
+The training loss curve for `nvidia/Hymba-1.5B-Instruct`, fine-tuned on the `MedMCQA/train` dataset with a learning rate of $5e-5$ over 100 steps using SFT, LoRA, LISA, and DORA, is shown below:
+![Training Loss](../../assets/Hymba_loss.png)
diff --git a/experimental/Hymba/run_finetune_hymba.sh b/experimental/Hymba/run_finetune_hymba.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+# Please run this script under ${project_id} in project directory of
+#   https://github.com/shizhediao/llm-ft
+#     COMMIT: d5fecf30ba8011067b10cf51fede53a5ab6574e4
+
+# Parses arguments
+model_name_or_path=nvidia/Hymba-1.5B-Instruct
+dataset_path=MedMCQA/train
+output_dir=output_models/finetune
+deepspeed_args="--master_port=11000"
+conversation_template=llama2
+
+# Safety related arguments
+trust_remote_code=0
+
+while [[ $# -ge 1 ]]; do
+  key="$1"
+  case ${key} in
+    -m|--model_name_or_path)
+      model_name_or_path="$2"
+      shift
+      ;;
+    -d|--dataset_path)
+      dataset_path="$2"
+      shift
+      ;;
+    -o|--output_model_path)
+      output_dir="$2"
+      shift
+      ;;
+    --conversation_template)
+      conversation_template="$2"
+      shift
+      ;;
+    --deepspeed_args)
+      deepspeed_args="$2"
+      shift
+      ;;
+    --trust_remote_code)
+      trust_remote_code="$2"
+      shift
+      ;;
+    *)
+      echo "error: unknown option \"${key}\"" 1>&2
+      exit 1
+  esac
+  shift
+done
+
+# Finetune
+exp_id=finetune
+project_dir=$(cd "$(dirname $0)"/..; pwd)
+log_dir=${project_dir}/log/${exp_id}
+mkdir -p ${output_dir} ${log_dir}
+
+deepspeed ${deepspeed_args} \
+  examples/finetune.py \
+    --model_name_or_path ${model_name_or_path} \
+    --trust_remote_code ${trust_remote_code} \
+    --dataset_path ${dataset_path} \
+    --output_dir ${output_dir} --overwrite_output_dir \
+    --conversation_template ${conversation_template} \
+    --num_train_epochs 0.01 \
+    --learning_rate 5e-5 \
+    --disable_group_texts 1 \
+    --block_size 256 \
+    --trust_remote_code True \
+    --per_device_train_batch_size 1 \
+    --deepspeed configs/ds_config_zero2_no_offload.json \
+    --bf16 \
+    --run_name hymba_finetune \
+    --validation_split_percentage 0 \
+    --logging_steps 1 \
+    --do_train \
+    --gradient_checkpointing 1 \
+    --use_flash_attention 1 \
+    --ddp_timeout 72000 \
+    --save_steps 5000 \
+    --dataloader_num_workers 1 \
+    > >(tee ${log_dir}/train.log) \
+    2> >(tee ${log_dir}/train.err >&2)
+
+