Skip to content

Commit

Permalink
integration data pr
Browse files Browse the repository at this point in the history
  • Loading branch information
Larspennig committed Dec 22, 2024
1 parent 25873df commit 4de41af
Show file tree
Hide file tree
Showing 9 changed files with 9 additions and 40,008 deletions.
1 change: 0 additions & 1 deletion config/dataset_args/expression.yaml

This file was deleted.

2 changes: 1 addition & 1 deletion config/run_specific_config/debug_config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
dataset_args:
dataset_name: expression_dataset
dataset_name: arithmetic

training_args:
trial: debug
Expand Down
File renamed without changes.
2,000 changes: 0 additions & 2,000 deletions data/expression-dataset/test.jsonl

This file was deleted.

20,000 changes: 0 additions & 20,000 deletions data/expression-dataset/test_no_negative_data.jsonl

This file was deleted.

16,000 changes: 0 additions & 16,000 deletions data/expression-dataset/train.jsonl

This file was deleted.

2,000 changes: 0 additions & 2,000 deletions data/expression-dataset/val.jsonl

This file was deleted.

2 changes: 1 addition & 1 deletion src/ntl/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,6 @@ class DatasetArguments:
dataset_name: str = field(
default="mathematics_dataset",
metadata={
"help": "Name of the dataset. Allowed: mathematics_dataset, gsm8k, multiplication, expression"
"help": "Name of the dataset. Allowed: mathematics_dataset, gsm8k, multiplication, arithmetics_dataset"
},
)
12 changes: 7 additions & 5 deletions src/ntl/run_language_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,15 +337,17 @@ def run_language_modeling(model_args: ModelArguments, training_args: TrainingArg
train_dataset = load_json_dataset(train_data_path)
eval_dataset = load_json_dataset(eval_data_path)
test_dataset = load_json_dataset(test_data_path)
elif dataset_args.dataset_name == "expression_dataset":
train_data_path = "data/expression-dataset/train.jsonl"
eval_data_path = "data/expression-dataset/val.jsonl"
test_data_path = "data/expression-dataset/test.jsonl"
elif dataset_args.dataset_name == "arithmetic":
train_data_path = "data/arithmetics_dataset/data/train.jsonl"
eval_data_path = "data/arithmetics_dataset/data/val.jsonl"
test_data_path = "data/arithmetics_dataset/data/test.jsonl"
train_dataset = load_json_dataset(train_data_path)
eval_dataset = load_json_dataset(eval_data_path)
test_dataset = load_json_dataset(test_data_path)
else:
raise ValueError(f"Unknown dataset: {dataset_args.dataset_name}. Allowed: gsm8k, mathematics_dataset, multiplication")
raise ValueError(
f"Unknown dataset: {dataset_args.dataset_name}. Allowed: gsm8k, mathematics_dataset, multiplication, arithmetic"
)

num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
logger.info(f"Number of parameters {num_params} of type {type(model)}")
Expand Down

0 comments on commit 4de41af

Please sign in to comment.