From 776758b597e1e6bbf2f7943a60179b47aa283dc3 Mon Sep 17 00:00:00 2001 From: "[[ -z $EMAIL ]] && read -e -p \"Enter your email (for git configuration): \" EMAIL" Date: Mon, 13 Jan 2025 11:18:41 -0500 Subject: [PATCH 1/3] Add more rigerous non-slow grad accum tests --- tests/trainer/test_trainer.py | 36 +++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 6e90b3d7e4059d..3696d485de1fe9 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -151,6 +151,17 @@ from accelerate import Accelerator from accelerate.state import AcceleratorState +import contextlib +import io +import sys + +@contextlib.contextmanager +def nostdout(): + save_stdout = sys.stdout + sys.stdout = io.BytesIO() + yield + sys.stdout = save_stdout + PATH_SAMPLE_TEXT = f"{get_tests_dir()}/fixtures/sample_text.txt" @@ -762,35 +773,35 @@ def test_model_init(self): trainer.train() self.check_trained_model(trainer.model, alternate_seed=True) - @slow def test_gradient_accumulation_loss_alignment_with_model_loss(self): set_seed(42) import datasets - model_name = "nickypro/tinyllama-110M" + model_name = "nickypro/tinyllama-15M" dataset_name = "wikitext" dataset_config = "wikitext-2-raw-v1" - dataset = datasets.load_dataset(dataset_name, dataset_config, split="train[:500]") + dataset = datasets.load_dataset(dataset_name, dataset_config, split="train[:40]") dataset = dataset.train_test_split(test_size=0.2) tokenizer = AutoTokenizer.from_pretrained(model_name) tokenizer.pad_token = tokenizer.eos_token def tokenize_function(examples): - return tokenizer(examples["text"], max_length=128, padding="max_length", truncation=True) + return tokenizer(examples["text"], max_length=16, padding="max_length", truncation=True) tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=dataset["train"].column_names) data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False) model = AutoModelForCausalLM.from_pretrained(model_name) + state_dict = model.state_dict() base_loss_callback = StoreLossCallback() args_kwargs = { "report_to": "none", "logging_steps": 1, - "max_steps": 20, + "max_steps": 5, "learning_rate": 3e-4, "disable_tqdm": True, } @@ -830,7 +841,7 @@ def tokenize_function(examples): trainer.train() set_seed(42) - model = AutoModelForCausalLM.from_pretrained(model_name) + model.load_state_dict(state_dict) broken_loss_callback = StoreLossCallback() trainer = Trainer( model, @@ -855,22 +866,23 @@ def tokenize_function(examples): self.assertLess(max(diff_truth), 0.01, f"Difference {max(diff_truth)} is not within 0.01") # max diff broken should be very off - self.assertGreater(max(diff_broken), 3, f"Difference {max(diff_broken)} is not greater than 3") + self.assertGreater(max(diff_broken), 2, f"Difference {max(diff_broken)} is not greater than 2") - @slow def test_gradient_accumulation_loss_alignment_with_loss_func(self): set_seed(42) import datasets - model_name = "roneneldan/TinyStories-33M" + model_name = "nickypro/tinyllama-15M" dataset_name = "wikitext" dataset_config = "wikitext-2-raw-v1" - dataset = datasets.load_dataset(dataset_name, dataset_config, split="train[:500]") + dataset = datasets.load_dataset(dataset_name, dataset_config, split="train[:40]") dataset = dataset.train_test_split(test_size=0.2) tokenizer = AutoTokenizer.from_pretrained(model_name) + tokenizer.pad_token = tokenizer.eos_token + def tokenize_function(examples): - return tokenizer(examples["text"]) + return tokenizer(examples["text"], max_length=16, padding="max_length", truncation=True) tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=dataset["train"].column_names) @@ -891,7 +903,7 @@ def compute_loss(logits, labels, vocab_size, num_items_in_batch, disable_num_ite args_kwargs = { "report_to": "none", "logging_steps": 1, - "max_steps": 20, + "max_steps": 5, "learning_rate": 3e-4, "disable_tqdm": True, } From 7306624f4500b160c96c5f9e71b5b5b9e5c25160 Mon Sep 17 00:00:00 2001 From: "[[ -z $EMAIL ]] && read -e -p \"Enter your email (for git configuration): \" EMAIL" Date: Mon, 13 Jan 2025 11:24:12 -0500 Subject: [PATCH 2/3] Further nits --- tests/trainer/test_trainer.py | 30 ++++++++---------------------- 1 file changed, 8 insertions(+), 22 deletions(-) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 3696d485de1fe9..3b23fcc3214300 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -151,18 +151,6 @@ from accelerate import Accelerator from accelerate.state import AcceleratorState -import contextlib -import io -import sys - -@contextlib.contextmanager -def nostdout(): - save_stdout = sys.stdout - sys.stdout = io.BytesIO() - yield - sys.stdout = save_stdout - - PATH_SAMPLE_TEXT = f"{get_tests_dir()}/fixtures/sample_text.txt" @@ -781,7 +769,6 @@ def test_gradient_accumulation_loss_alignment_with_model_loss(self): dataset_name = "wikitext" dataset_config = "wikitext-2-raw-v1" dataset = datasets.load_dataset(dataset_name, dataset_config, split="train[:40]") - dataset = dataset.train_test_split(test_size=0.2) tokenizer = AutoTokenizer.from_pretrained(model_name) tokenizer.pad_token = tokenizer.eos_token @@ -789,7 +776,7 @@ def test_gradient_accumulation_loss_alignment_with_model_loss(self): def tokenize_function(examples): return tokenizer(examples["text"], max_length=16, padding="max_length", truncation=True) - tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=dataset["train"].column_names) + tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=dataset.column_names) data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False) @@ -814,7 +801,7 @@ def tokenize_function(examples): trainer = Trainer( model, args, - train_dataset=tokenized_dataset["train"], + train_dataset=tokenized_dataset, callbacks=[base_loss_callback], data_collator=data_collator, ) @@ -834,7 +821,7 @@ def tokenize_function(examples): trainer = Trainer( model, args, - train_dataset=tokenized_dataset["train"], + train_dataset=tokenized_dataset, callbacks=[grad_accum_loss_callback], data_collator=data_collator, ) @@ -846,7 +833,7 @@ def tokenize_function(examples): trainer = Trainer( model, args, - train_dataset=tokenized_dataset["train"], + train_dataset=tokenized_dataset, callbacks=[broken_loss_callback], data_collator=data_collator, ) @@ -876,7 +863,6 @@ def test_gradient_accumulation_loss_alignment_with_loss_func(self): dataset_name = "wikitext" dataset_config = "wikitext-2-raw-v1" dataset = datasets.load_dataset(dataset_name, dataset_config, split="train[:40]") - dataset = dataset.train_test_split(test_size=0.2) tokenizer = AutoTokenizer.from_pretrained(model_name) tokenizer.pad_token = tokenizer.eos_token @@ -884,7 +870,7 @@ def test_gradient_accumulation_loss_alignment_with_loss_func(self): def tokenize_function(examples): return tokenizer(examples["text"], max_length=16, padding="max_length", truncation=True) - tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=dataset["train"].column_names) + tokenized_dataset = dataset.map(tokenize_function, batched=True) tokenizer.pad_token = tokenizer.eos_token data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False) @@ -916,7 +902,7 @@ def compute_loss(logits, labels, vocab_size, num_items_in_batch, disable_num_ite trainer = Trainer( model, args, - train_dataset=tokenized_dataset["train"], + train_dataset=tokenized_dataset, callbacks=[base_loss_callback], compute_loss_func=loss_fn, data_collator=data_collator, @@ -936,7 +922,7 @@ def compute_loss(logits, labels, vocab_size, num_items_in_batch, disable_num_ite trainer = Trainer( model, args, - train_dataset=tokenized_dataset["train"], + train_dataset=tokenized_dataset, callbacks=[grad_accum_loss_callback], compute_loss_func=loss_fn, data_collator=data_collator, @@ -950,7 +936,7 @@ def compute_loss(logits, labels, vocab_size, num_items_in_batch, disable_num_ite trainer = Trainer( model, args, - train_dataset=tokenized_dataset["train"], + train_dataset=tokenized_dataset, callbacks=[broken_loss_callback], compute_loss_func=loss_fn, data_collator=data_collator, From b0be2eda9bde222935263bee2025878fbcde784d Mon Sep 17 00:00:00 2001 From: "[[ -z $EMAIL ]] && read -e -p \"Enter your email (for git configuration): \" EMAIL" Date: Mon, 13 Jan 2025 11:27:52 -0500 Subject: [PATCH 3/3] Re-add space --- tests/trainer/test_trainer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 3b23fcc3214300..aba6cfbe151ec1 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -151,6 +151,7 @@ from accelerate import Accelerator from accelerate.state import AcceleratorState + PATH_SAMPLE_TEXT = f"{get_tests_dir()}/fixtures/sample_text.txt"