From 70f85903f4f386d414498090e5e0892752b1e67d Mon Sep 17 00:00:00 2001 From: Chinthaka Gamanayakege Date: Thu, 13 Jun 2024 10:16:29 +0000 Subject: [PATCH] fix out of bound issue when n_proc is larger --- llmc/dataloader.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/llmc/dataloader.h b/llmc/dataloader.h index 73073872e..30e80f603 100644 --- a/llmc/dataloader.h +++ b/llmc/dataloader.h @@ -204,8 +204,9 @@ Copy pasting the section on the eval datafile format, from data_common.py: // for now, could relax later #define ASSUMED_NUM_COMPLETIONS 4 -// helper macro for ceildiv +// helper macro for ceildiv and floordiv #define CEIL_DIV(M, N) (((M) + (N)-1) / (N)) +#define FLOOR_DIV(M, N) ((M) / (N)) typedef struct { // variables related to distributed training @@ -236,16 +237,17 @@ void evalloader_reset(EvalLoader *loader) { // For example if there are N examples in the file and 4 processes, // then process 0 should start at 0, process 1 at N/4, process 2 at N/2, etc. // determine how much work there is for all processes - int examples_per_process = CEIL_DIV(loader->num_examples, loader->num_processes); - int can_fit_examples = loader->B / ASSUMED_NUM_COMPLETIONS; - loader->num_batches = CEIL_DIV(examples_per_process, can_fit_examples); + int examples_per_process = FLOOR_DIV(loader->num_examples, loader->num_processes); // determine the start and end example indices for this process loader->start_example_index = examples_per_process * loader->process_rank; loader->end_example_index = examples_per_process * (loader->process_rank + 1); - // crop the end example index to the total number of examples - if (loader->end_example_index > loader->num_examples) { + // extend the end example index to the total number of examples + if (loader->process_rank == loader->num_processes - 1) { loader->end_example_index = loader->num_examples; + examples_per_process = loader->end_example_index - loader->start_example_index; } + int can_fit_examples = loader->B / ASSUMED_NUM_COMPLETIONS; + loader->num_batches = CEIL_DIV(examples_per_process, can_fit_examples); // now seek through the file to the start of that example // utilize for efficiency int64_t header_bytes = HEADER_SIZE * sizeof(int);