Skip to content

Commit

Permalink
fix out of bound issue when n_proc is larger
Browse files Browse the repository at this point in the history
  • Loading branch information
Chinthaka Gamanayakege committed Jun 13, 2024
1 parent 95cef79 commit 70f8590
Showing 1 changed file with 8 additions and 6 deletions.
14 changes: 8 additions & 6 deletions llmc/dataloader.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,8 +204,9 @@ Copy pasting the section on the eval datafile format, from data_common.py:

// for now, could relax later
#define ASSUMED_NUM_COMPLETIONS 4
// helper macro for ceildiv
// helper macro for ceildiv and floordiv
#define CEIL_DIV(M, N) (((M) + (N)-1) / (N))
#define FLOOR_DIV(M, N) ((M) / (N))

typedef struct {
// variables related to distributed training
Expand Down Expand Up @@ -236,16 +237,17 @@ void evalloader_reset(EvalLoader *loader) {
// For example if there are N examples in the file and 4 processes,
// then process 0 should start at 0, process 1 at N/4, process 2 at N/2, etc.
// determine how much work there is for all processes
int examples_per_process = CEIL_DIV(loader->num_examples, loader->num_processes);
int can_fit_examples = loader->B / ASSUMED_NUM_COMPLETIONS;
loader->num_batches = CEIL_DIV(examples_per_process, can_fit_examples);
int examples_per_process = FLOOR_DIV(loader->num_examples, loader->num_processes);
// determine the start and end example indices for this process
loader->start_example_index = examples_per_process * loader->process_rank;
loader->end_example_index = examples_per_process * (loader->process_rank + 1);
// crop the end example index to the total number of examples
if (loader->end_example_index > loader->num_examples) {
// extend the end example index to the total number of examples
if (loader->process_rank == loader->num_processes - 1) {
loader->end_example_index = loader->num_examples;
examples_per_process = loader->end_example_index - loader->start_example_index;
}
int can_fit_examples = loader->B / ASSUMED_NUM_COMPLETIONS;
loader->num_batches = CEIL_DIV(examples_per_process, can_fit_examples);
// now seek through the file to the start of that example
// utilize <EXAMPLE_BYTES> for efficiency
int64_t header_bytes = HEADER_SIZE * sizeof(int);
Expand Down

0 comments on commit 70f8590

Please sign in to comment.