Skip to content

Commit

Permalink
updates
Browse files Browse the repository at this point in the history
  • Loading branch information
sayakpaul committed Nov 20, 2024
1 parent 9eea656 commit 7e203db
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 5 deletions.
2 changes: 1 addition & 1 deletion training/mochi-1/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ def _get_optimizer_args(parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"--beta2",
type=float,
default=0.95,
default=0.999,
help="The beta2 parameter for the Adam and Prodigy optimizers.",
)
parser.add_argument(
Expand Down
8 changes: 6 additions & 2 deletions training/mochi-1/text_to_video_lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,7 @@ def load_model_hook(models, input_dir):
use_cpu_offload_optimizer=args.use_cpu_offload_optimizer,
offload_gradients=args.offload_gradients,
)
accelerator.print(f"Using {optimizer.__class__.__name__} optimizer.")

# Dataset and DataLoader
dataset_init_kwargs = {
Expand Down Expand Up @@ -635,9 +636,12 @@ def get_sigmas(timesteps, n_dim=4, dtype=torch.float32):
schedule_timesteps = noise_scheduler_copy.timesteps.to(accelerator.device)
timesteps = timesteps.to(accelerator.device)
# notice the reverse.
step_indices = [(schedule_timesteps == t).nonzero().item() for t in timesteps][::-1]
step_indices = [(schedule_timesteps == t).nonzero().item() for t in timesteps]

sigma = sigmas[step_indices].flatten()
if "invert_sigmas" in noise_scheduler_copy.config and noise_scheduler_copy.config.invert_sigmas:
# https://github.com/huggingface/diffusers/blob/99c0483b67427de467f11aa35d54678fd36a7ea2/src/diffusers/schedulers/scheduling_flow_match_euler_discrete.py#L209
sigma = 1.0 - sigma
while len(sigma.shape) < n_dim:
sigma = sigma.unsqueeze(-1)
return sigma
Expand Down Expand Up @@ -938,7 +942,7 @@ def get_sigmas(timesteps, n_dim=4, dtype=torch.float32):
repo_id=repo_id,
folder_path=args.output_dir,
commit_message="End of training",
ignore_patterns=["step_*", "epoch_*"],
ignore_patterns=["step_*", "epoch_*", "*.bin"],
)

accelerator.end_training()
Expand Down
4 changes: 2 additions & 2 deletions training/mochi-1/train.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@ cmd="accelerate launch --config_file deepspeed.yaml --gpu_ids $GPU_IDS text_to_v
--checkpointing_steps 50 \
--gradient_accumulation_steps 4 \
--gradient_checkpointing \
--learning_rate 0.0001 \
--learning_rate 1e-5 \
--lr_scheduler constant \
--lr_warmup_steps 0 \
--lr_num_cycles 1 \
--enable_slicing \
--enable_tiling \
--optimizer adamw \
--optimizer adamw --use_8bit \
--beta1 0.9 \
--beta2 0.95 \
--beta3 0.99 \
Expand Down

0 comments on commit 7e203db

Please sign in to comment.