Skip to content

Commit

Permalink
Add lisa support for gpt2
Browse files Browse the repository at this point in the history
- add preliminary of support for mixtral
- add --lisa_layers_attribute for customizable models
  • Loading branch information
research4pan committed Apr 4, 2024
1 parent 9bdd4f6 commit 3b3b658
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 1 deletion.
6 changes: 6 additions & 0 deletions src/lmflow/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,12 @@ class FinetunerArguments(TrainingArguments):
"help": "the number of steps in each freezing interval of LISA, i.e. the selected unfreezed layers are randomly switched every {lisa_interval_steps} steps."
}
)
lisa_layers_attribute: int = field(
default="model.model.layers",
metadata={
"help": "where the layer attribute stores, e.g. model.model.layers"
}
)


@dataclass
Expand Down
6 changes: 5 additions & 1 deletion src/lmflow/pipeline/finetuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,10 +311,14 @@ def __init__(self, n_layers, interval_steps, model):
self.layers_attribute = 'model.model.layers' # Layer access path for Qwen model
elif self.model.__class__.__name__ == 'MistralForCausalLM':
self.layers_attribute = 'model.model.layers'
elif self.model.__class__.__name__ == 'MixtralForCausalLM':
self.layers_attribute = 'model.model.layers'
elif self.model.__class__.__name__ == 'GemmaForCausalLM':
self.layers_attribute = 'model.model.layers'
elif self.model.__class__.__name__ == 'GPT2LMHeadModel':
self.layers_attribute = 'model.transformer.h'
else:
self.layers_attribute = 'model.transformer.h' # General access path
self.layers_attribute = training_args.lisa_layers_attribute
self.total_layers = len(eval('self.' + self.layers_attribute)) # Dynamically execute to get the number of layers

self.active_layers_indices = []
Expand Down

0 comments on commit 3b3b658

Please sign in to comment.