diff --git a/assets/Hymba_loss.png b/assets/Hymba_loss.png new file mode 100644 index 000000000..173ea2546 Binary files /dev/null and b/assets/Hymba_loss.png differ diff --git a/experimental/Hymba/README.md b/experimental/Hymba/README.md index 12650cfc7..3d11018c8 100644 --- a/experimental/Hymba/README.md +++ b/experimental/Hymba/README.md @@ -33,6 +33,11 @@ For training the Hymba model, please add below arguments to the `run_finetune.sh --bf16 ``` +Demo script: [run_finetune_hymba.sh](./run_finetune_hymba.sh) + Recommend on the A100, H100, A40 GPUs. +## Training Loss +The training loss curve for `nvidia/Hymba-1.5B-Instruct`, fine-tuned on the `MedMCQA/train` dataset with a learning rate of $5e-5$ over 100 steps using SFT, LoRA, LISA, and DORA, is shown below: +![Training Loss](../../assets/Hymba_loss.png) \ No newline at end of file diff --git a/experimental/Hymba/run_finetune_hymba.sh b/experimental/Hymba/run_finetune_hymba.sh new file mode 100644 index 000000000..37b6b05b5 --- /dev/null +++ b/experimental/Hymba/run_finetune_hymba.sh @@ -0,0 +1,83 @@ +#!/bin/bash +# Please run this script under ${project_id} in project directory of +# https://github.com/shizhediao/llm-ft +# COMMIT: d5fecf30ba8011067b10cf51fede53a5ab6574e4 + +# Parses arguments +model_name_or_path=nvidia/Hymba-1.5B-Instruct +dataset_path=MedMCQA/train +output_dir=output_models/finetune +deepspeed_args="--master_port=11000" +conversation_template=llama2 + +# Safety related arguments +trust_remote_code=0 + +while [[ $# -ge 1 ]]; do + key="$1" + case ${key} in + -m|--model_name_or_path) + model_name_or_path="$2" + shift + ;; + -d|--dataset_path) + dataset_path="$2" + shift + ;; + -o|--output_model_path) + output_dir="$2" + shift + ;; + --conversation_template) + conversation_template="$2" + shift + ;; + --deepspeed_args) + deepspeed_args="$2" + shift + ;; + --trust_remote_code) + trust_remote_code="$2" + shift + ;; + *) + echo "error: unknown option \"${key}\"" 1>&2 + exit 1 + esac + shift +done + +# Finetune +exp_id=finetune +project_dir=$(cd "$(dirname $0)"/..; pwd) +log_dir=${project_dir}/log/${exp_id} +mkdir -p ${output_dir} ${log_dir} + +deepspeed ${deepspeed_args} \ + examples/finetune.py \ + --model_name_or_path ${model_name_or_path} \ + --trust_remote_code ${trust_remote_code} \ + --dataset_path ${dataset_path} \ + --output_dir ${output_dir} --overwrite_output_dir \ + --conversation_template ${conversation_template} \ + --num_train_epochs 0.01 \ + --learning_rate 5e-5 \ + --disable_group_texts 1 \ + --block_size 256 \ + --trust_remote_code True \ + --per_device_train_batch_size 1 \ + --deepspeed configs/ds_config_zero2_no_offload.json \ + --bf16 \ + --run_name hymba_finetune \ + --validation_split_percentage 0 \ + --logging_steps 1 \ + --do_train \ + --gradient_checkpointing 1 \ + --use_flash_attention 1 \ + --ddp_timeout 72000 \ + --save_steps 5000 \ + --dataloader_num_workers 1 \ + > >(tee ${log_dir}/train.log) \ + 2> >(tee ${log_dir}/train.err >&2) + +