From 6e1eabee372b2a3a005cdefcb03c4b4ef712f7dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine?= Date: Tue, 23 Jul 2024 17:37:37 +0200 Subject: [PATCH 1/8] Added model info to models, edited mains to homogenenize them, added a pipeline for neater launches --- src/lighteval/main_accelerate.py | 122 +++-------- src/lighteval/main_nanotron.py | 194 +++++------------- src/lighteval/models/abstract_model.py | 9 + src/lighteval/models/base_model.py | 16 +- src/lighteval/models/dummy_model.py | 3 +- src/lighteval/models/endpoint_model.py | 9 +- src/lighteval/models/model_config.py | 15 +- src/lighteval/models/model_loader.py | 59 +----- src/lighteval/models/nanotron_model.py | 23 ++- src/lighteval/models/tgi_model.py | 12 +- src/lighteval/pipeline.py | 267 +++++++++++++++++++++++++ 11 files changed, 408 insertions(+), 321 deletions(-) create mode 100644 src/lighteval/pipeline.py diff --git a/src/lighteval/main_accelerate.py b/src/lighteval/main_accelerate.py index 12122c527..025fe7c80 100644 --- a/src/lighteval/main_accelerate.py +++ b/src/lighteval/main_accelerate.py @@ -21,22 +21,13 @@ # SOFTWARE. import os -import random -import shutil -from contextlib import nullcontext from datetime import timedelta -import numpy as np - -from lighteval.evaluator import evaluate, make_results_table from lighteval.logging.evaluation_tracker import EvaluationTracker -from lighteval.logging.hierarchical_logger import hlog, hlog_warn, htrack, htrack_block -from lighteval.models.model_config import EnvConfig, create_model_config -from lighteval.models.model_loader import load_model -from lighteval.tasks.lighteval_task import LightevalTask, create_requests_from_tasks -from lighteval.tasks.registry import Registry, taskinfo_selector +from lighteval.logging.hierarchical_logger import hlog_warn, htrack +from lighteval.models.model_config import create_model_config +from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters from lighteval.utils import is_accelerate_available, is_tgi_available -from lighteval.utils_parallelism import test_all_gather if not is_accelerate_available() and not is_tgi_available(): @@ -65,87 +56,30 @@ def main(args): public=args.public_run, token=TOKEN, ) - evaluation_tracker.general_config_logger.log_args_info( - args.num_fewshot_seeds, args.override_batch_size, args.max_samples, args.job_id + pipeline_params = PipelineParameters( + launcher_type=ParallelismManager.ACCELERATE, + envconfig=env_config, + job_id=args.job_id, + dataset_loading_processes=args.dataset_loading_processes, + custom_tasks_directory=args.custom_tasks, + override_batch_size=args.override_batch_size, + num_fewshot_seeds=args.num_fewshot_seeds, + max_samples=args.max_samples, + use_chat_template=args.use_chat_template, + system_prompt=args.system_prompt, + ) + + model_config = create_model_config(args=args, accelerator=accelerator) + + pipeline = Pipeline( + tasks=args.tasks, + pipeline_parameters=pipeline_params, + evaluation_tracker=evaluation_tracker, + model_config=model_config, ) - if args.max_samples: - hlog( - "WARNING: --max_samples WAS SET. THESE NUMBERS ARE ONLY PARTIAL AND SHOULD NOT BE USED FOR COMPARISON UNLESS YOU KNOW WHAT YOU ARE DOING." - ) - - with htrack_block("Test all gather"): - test_all_gather(accelerator) - - with htrack_block("Creating model configuration"): - model_config = create_model_config(args=args, accelerator=accelerator) - - with htrack_block("Model loading"): - with accelerator.main_process_first() if accelerator is not None else nullcontext(): - model, model_info = load_model(config=model_config, env_config=env_config) - evaluation_tracker.general_config_logger.log_model_info(model_info) - - with htrack_block("Tasks loading"): - with accelerator.main_process_first() if accelerator is not None else nullcontext(): - task_names_list, few_shots_dict = taskinfo_selector(args.tasks) - task_dict = Registry(cache_dir=env_config.cache_dir).get_task_dict( - task_names_list, custom_tasks=args.custom_tasks - ) - LightevalTask.load_datasets(task_dict.values(), args.dataset_loading_processes) - - evaluation_tracker.task_config_logger.log(task_dict) - - hlog("Loading documents, and requests") - requests, docs = create_requests_from_tasks( - task_dict=task_dict, - fewshot_dict=few_shots_dict, - num_fewshot_seeds=args.num_fewshot_seeds, - lm=model, - max_samples=args.max_samples, - evaluation_tracker=evaluation_tracker, - use_chat_template=args.use_chat_template, - system_prompt=args.system_prompt, - ) - - with htrack_block("Setting seeds and waiting for all processes"): - hlog(f"setting seed to {1234} for random and numpy") - random.seed(1234) - np.random.seed(1234) - if accelerator is not None: - accelerator.wait_for_everyone() - - with htrack_block("Evaluation"): - hlog(f"Evaluate on {len(task_names_list)} tasks.") - evaluation_tracker = evaluate( - lm=model, - requests_dict=requests, - docs=docs, - task_dict=task_dict, - override_bs=args.override_batch_size, - evaluation_tracker=evaluation_tracker, - ) - - if accelerator.is_main_process if accelerator is not None else nullcontext(): - with htrack_block("Compiling and saving results"): - evaluation_tracker.general_config_logger.log_end_time() - evaluation_tracker.metrics_logger.aggregate(task_dict=task_dict, bootstrap_iters=1000) - evaluation_tracker.details_logger.aggregate() - - if args.output_dir: - evaluation_tracker.save() - - final_dict = evaluation_tracker.generate_final_dict() - - with htrack_block("Cleaninp up"): - for weights in ["delta", "adapter"]: - try: - tmp_weights_dir = f"{evaluation_tracker.general_config_logger.model_name}-{weights}-applied" - hlog(f"Removing {tmp_weights_dir}") - shutil.rmtree(tmp_weights_dir) - except OSError: - pass - - print(make_results_table(final_dict)) - - model.cleanup() - return final_dict + pipeline.evaluate() + + pipeline.show_results() + + pipeline.save_and_push_results() diff --git a/src/lighteval/main_nanotron.py b/src/lighteval/main_nanotron.py index f479c5d7a..53349325a 100644 --- a/src/lighteval/main_nanotron.py +++ b/src/lighteval/main_nanotron.py @@ -22,179 +22,81 @@ # flake8: noqa: C901 import os -import random -from typing import Optional, Type +from typing import Optional -import numpy as np - -from lighteval.evaluator import evaluate, make_results_table from lighteval.logging.evaluation_tracker import EvaluationTracker -from lighteval.logging.hierarchical_logger import hlog, htrack, htrack_block +from lighteval.logging.hierarchical_logger import htrack, htrack_block from lighteval.models.model_config import EnvConfig -from lighteval.models.model_loader import ModelInfo -from lighteval.models.nanotron_model import NanotronLightevalModel -from lighteval.tasks.lighteval_task import LightevalTask, create_requests_from_tasks -from lighteval.tasks.registry import Registry, get_custom_tasks, taskinfo_selector +from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters from lighteval.utils import NO_NANOTRON_ERROR_MSG, is_nanotron_available -from lighteval.utils_parallelism import test_all_gather if not is_nanotron_available(): raise ImportError(NO_NANOTRON_ERROR_MSG) -from nanotron import distributed as dist from nanotron.config import Config, LightEvalConfig, get_config_from_file -from nanotron.logging import get_logger -from nanotron.parallel.context import ParallelContext -from nanotron.utils import local_ranks_zero_first - -logger = get_logger(__name__) SEED = 1234 -TOKEN = os.getenv("HF_TOKEN") -CACHE_DIR = os.getenv("HF_HOME", "/scratch") @htrack() def main( checkpoint_config_path: str, lighteval_config_path: Optional[str] = None, - cache_dir: Optional[str] = None, - config_cls: Type = Config, - model_config_cls: Optional[Type] = None, - model_cls: Optional[Type] = None, + cache_dir: Optional[str] = os.getenv("HF_HOME", "/scratch"), ): - if cache_dir is None: - cache_dir = CACHE_DIR - - env_config = EnvConfig(token=TOKEN, cache_dir=cache_dir) + env_config = EnvConfig(token=os.getenv("HF_TOKEN"), cache_dir=cache_dir) - dist.initialize_torch_distributed() - - with htrack_block("get config"): + with htrack_block("Load nanotron config"): + # Create nanotron config if not checkpoint_config_path.endswith(".yaml"): raise ValueError("The checkpoint path should point to a YAML file") - nanotron_config: config_cls = get_config_from_file( + model_config = get_config_from_file( checkpoint_config_path, - config_class=config_cls, - model_config_class=model_config_cls, + config_class=Config, + model_config_class=None, skip_unused_config_keys=True, skip_null_keys=True, ) - if lighteval_config_path: - lighteval_config: config_cls = get_config_from_file(lighteval_config_path, config_class=LightEvalConfig) - nanotron_config.lighteval = lighteval_config + lighteval_config = get_config_from_file(lighteval_config_path, config_class=LightEvalConfig) + model_config.lighteval = lighteval_config else: - lighteval_config = nanotron_config.lighteval - - parallel_context = ParallelContext( - tensor_parallel_size=lighteval_config.parallelism.tp, - pipeline_parallel_size=lighteval_config.parallelism.pp, - data_parallel_size=lighteval_config.parallelism.dp, - ) - - evaluation_tracker = EvaluationTracker( - token=TOKEN, - output_dir=lighteval_config.logging.local_output_path, - hub_results_org=lighteval_config.logging.hub_repo_tensorboard, - tensorboard_metric_prefix=lighteval_config.logging.tensorboard_metric_prefix, - nanotron_run_info=nanotron_config.general, - ) - evaluation_tracker.general_config_logger.log_args_info( - num_fewshot_seeds=1, - override_batch_size=None, - max_samples=lighteval_config.tasks.max_samples, - job_id=os.environ.get("SLURM_JOB_ID", None), - config=nanotron_config, - ) - - with htrack_block("Test all gather"): - test_all_gather(parallel_context=parallel_context) - - with htrack_block("Model loading"): - # We need to load the model in the main process first to avoid downloading the model multiple times - model = NanotronLightevalModel( - checkpoint_path=os.path.dirname(checkpoint_config_path), - model_args=nanotron_config.model, - tokenizer=nanotron_config.tokenizer, - parallel_context=parallel_context, - parallel_config=lighteval_config.parallelism, - lighteval_config=lighteval_config, - batch_size=lighteval_config.batch_size, - debug_one_layer_model=False, - model_class=model_cls, - env_config=env_config, - ) - model_info = ModelInfo(model_name=f"{nanotron_config.general.run}/{nanotron_config.general.step}") - evaluation_tracker.general_config_logger.log_model_info(model_info) - - with htrack_block("Tasks loading"): - with local_ranks_zero_first(): - tasks_selection = lighteval_config.tasks.tasks - if lighteval_config.tasks.custom_tasks: - _, tasks_groups_dict = get_custom_tasks(lighteval_config.tasks.custom_tasks) - if tasks_groups_dict and lighteval_config.tasks.tasks in tasks_groups_dict: - tasks_selection = tasks_groups_dict[lighteval_config.tasks.tasks] - - task_names_list, few_shots_dict = taskinfo_selector(tasks_selection) - task_dict = Registry(cache_dir=cache_dir).get_task_dict( - task_names_list, - custom_tasks=lighteval_config.tasks.custom_tasks, - ) - # Loading all the dataset in a distributed manner - LightevalTask.load_datasets(task_dict.values(), lighteval_config.tasks.dataset_loading_processes) - - evaluation_tracker.task_config_logger.log(task_dict) - - hlog("Loading documents, and requests") - requests, docs = create_requests_from_tasks( - task_dict=task_dict, - fewshot_dict=few_shots_dict, - num_fewshot_seeds=lighteval_config.tasks.num_fewshot_seeds or 1, - lm=model, - max_samples=lighteval_config.tasks.max_samples, - evaluation_tracker=evaluation_tracker, - use_chat_template=False, - system_prompt=None, - ) - - with htrack_block("Setting seeds and waiting for all processes"): - hlog(f"setting seed to {SEED} for random and numpy") - random.seed(SEED) - np.random.seed(SEED) - dist.barrier() - - with htrack_block("Evaluation"): - hlog(f"Evaluate on {len(task_names_list)} tasks.") - evaluation_tracker = evaluate( - lm=model, - requests_dict=requests, - docs=docs, - task_dict=task_dict, - override_bs=lighteval_config.batch_size, - evaluation_tracker=evaluation_tracker, - ) - - if dist.get_rank(parallel_context.world_pg) == 0: - with htrack_block("Compiling and saving results"): - evaluation_tracker.general_config_logger.log_end_time() - evaluation_tracker.metrics_logger.aggregate(task_dict=task_dict, bootstrap_iters=1000) - evaluation_tracker.details_logger.aggregate() - - if lighteval_config.logging.local_output_path: - evaluation_tracker.save( - output_dir=lighteval_config.logging.local_output_path, - push_results_to_hub=lighteval_config.logging.push_results_to_hub, - push_details_to_hub=lighteval_config.logging.push_details_to_hub, - public=False, - push_results_to_tensorboard=lighteval_config.logging.push_results_to_tensorboard, - ) - - final_dict = evaluation_tracker.generate_final_dict() - - hlog(make_results_table(final_dict)) - - return final_dict + lighteval_config = model_config.lighteval + + evaluation_tracker = EvaluationTracker( + token=os.getenv("HF_TOKEN"), + output_dir=lighteval_config.logging.local_output_path, + hub_results_org=lighteval_config.logging.hub_repo_tensorboard, + tensorboard_metric_prefix=lighteval_config.logging.tensorboard_metric_prefix, + nanotron_run_info=model_config.general, + ) + + pipeline_parameters = PipelineParameters( + launcher_type=ParallelismManager.NANOTRON, + env_config=env_config, + job_id=os.environ.get("SLURM_JOB_ID", None), + nanotron_checkpoint_path=checkpoint_config_path, + dataset_loading_processes=lighteval_config.tasks.dataset_loading_processes, + custom_tasks_directory=lighteval_config.tasks.custom_tasks, + override_batch_size=None, + num_fewshot_seeds=1, + max_samples=lighteval_config.tasks.max_samples, + use_chat_template=False, + system_prompt=None, + ) + + pipeline = Pipeline( + tasks=lighteval_config.tasks.tasks, + pipeline_parameters=pipeline_parameters, + evaluation_tracker=evaluation_tracker, + model_config=model_config, + ) + + pipeline.evaluate() + + pipeline.show_results() + + pipeline.save_and_push_results() diff --git a/src/lighteval/models/abstract_model.py b/src/lighteval/models/abstract_model.py index b9111c311..b7c8a8e47 100644 --- a/src/lighteval/models/abstract_model.py +++ b/src/lighteval/models/abstract_model.py @@ -21,6 +21,7 @@ # SOFTWARE. from abc import ABC, abstractmethod +from dataclasses import dataclass from typing import Optional, Union import torch @@ -45,6 +46,14 @@ TokenSequence = Union[list[int], torch.LongTensor, torch.Tensor, BatchEncoding] +@dataclass +class ModelInfo: + model_name: str + model_sha: Optional[str] = None + model_dtype: Optional[str] = None + model_size: Optional[str] = None + + class LightevalModel(ABC): DATASET_SPLITS = 4 diff --git a/src/lighteval/models/base_model.py b/src/lighteval/models/base_model.py index f1ba61517..0ef8c538c 100644 --- a/src/lighteval/models/base_model.py +++ b/src/lighteval/models/base_model.py @@ -33,7 +33,7 @@ from lighteval.data import GenerativeTaskDataset, LoglikelihoodDataset, LoglikelihoodSingleTokenDataset from lighteval.logging.hierarchical_logger import hlog, hlog_err, hlog_warn -from lighteval.models.abstract_model import LightevalModel +from lighteval.models.abstract_model import LightevalModel, ModelInfo from lighteval.models.model_config import BaseModelConfig, EnvConfig from lighteval.models.model_output import ( Batch, @@ -56,7 +56,7 @@ if is_accelerate_available(): - from accelerate.utils import get_max_memory + from accelerate.utils import calculate_maximum_sizes, convert_bytes, get_max_memory os.environ["TOKENIZERS_PARALLELISM"] = "false" @@ -97,6 +97,18 @@ def __init__( self.precision = _get_dtype(config.dtype, config=self._config) + if is_accelerate_available(): + model_size, _ = calculate_maximum_sizes(self.model) + model_size = convert_bytes(model_size) + else: + model_size = -1 + self.model_info = ModelInfo( + model_name=self.model_name, + model_sha=self.model_sha, + model_dtype=self.model_precision, + model_size=model_size, + ) + @property def tokenizer(self): return self._tokenizer diff --git a/src/lighteval/models/dummy_model.py b/src/lighteval/models/dummy_model.py index 08335db5f..ed93b403d 100644 --- a/src/lighteval/models/dummy_model.py +++ b/src/lighteval/models/dummy_model.py @@ -27,7 +27,7 @@ from transformers import AutoTokenizer -from lighteval.models.abstract_model import LightevalModel +from lighteval.models.abstract_model import LightevalModel, ModelInfo from lighteval.models.model_config import DummyModelConfig, EnvConfig from lighteval.models.model_output import GenerateReturn, LoglikelihoodReturn, LoglikelihoodSingleTokenReturn from lighteval.tasks.requests import ( @@ -50,6 +50,7 @@ def __init__( self.env_config = env_config self._random = random.Random(self.config.seed) self._tokenizer = None + self.model_info = ModelInfo(model_name="dummy", model_sha=str(config.seed)) @property def tokenizer(self): diff --git a/src/lighteval/models/endpoint_model.py b/src/lighteval/models/endpoint_model.py index 87959ef61..1e95e2685 100644 --- a/src/lighteval/models/endpoint_model.py +++ b/src/lighteval/models/endpoint_model.py @@ -39,7 +39,7 @@ from lighteval.data import GenerativeTaskDataset, LoglikelihoodDataset from lighteval.logging.hierarchical_logger import hlog, hlog_err, hlog_warn -from lighteval.models.abstract_model import LightevalModel +from lighteval.models.abstract_model import LightevalModel, ModelInfo from lighteval.models.model_config import EnvConfig, InferenceEndpointModelConfig, InferenceModelConfig from lighteval.models.model_output import GenerateReturn, LoglikelihoodReturn, LoglikelihoodSingleTokenReturn from lighteval.tasks.requests import ( @@ -122,6 +122,13 @@ def __init__( self._tokenizer = AutoTokenizer.from_pretrained(self.name) self._add_special_tokens = config.add_special_tokens if config.add_special_tokens is not None else False + self.model_info = ModelInfo( + model_name=self.name, + model_sha=self.revision, + model_dtype=config.model_dtype or "default", + model_size=-1, + ) + @property def tokenizer(self): return self._tokenizer diff --git a/src/lighteval/models/model_config.py b/src/lighteval/models/model_config.py index 75a29d02c..6244f1a7f 100644 --- a/src/lighteval/models/model_config.py +++ b/src/lighteval/models/model_config.py @@ -30,6 +30,7 @@ from lighteval.logging.hierarchical_logger import hlog from lighteval.models.utils import _get_model_sha +from lighteval.pipeline import EnvConfig from lighteval.utils import ( NO_AUTOGPTQ_ERROR_MSG, NO_BNB_ERROR_MSG, @@ -45,20 +46,6 @@ from accelerate import Accelerator -@dataclass -class EnvConfig: - """ - Configuration class for environment settings. - - Attributes: - cache_dir (str): directory for caching data. - token (str): authentication token used for accessing the HuggingFace Hub. - """ - - cache_dir: str = None - token: str = None - - @dataclass class BaseModelConfig: """ diff --git a/src/lighteval/models/model_loader.py b/src/lighteval/models/model_loader.py index c72d64038..e856dab93 100644 --- a/src/lighteval/models/model_loader.py +++ b/src/lighteval/models/model_loader.py @@ -20,8 +20,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from dataclasses import dataclass -from typing import Optional, Tuple, Union +from typing import Union from lighteval.logging.hierarchical_logger import hlog from lighteval.models.adapter_model import AdapterModel @@ -40,19 +39,7 @@ TGIModelConfig, ) from lighteval.models.tgi_model import ModelClient -from lighteval.utils import NO_TGI_ERROR_MSG, is_accelerate_available, is_tgi_available - - -if is_accelerate_available(): - from accelerate.utils import calculate_maximum_sizes, convert_bytes - - -@dataclass -class ModelInfo: - model_name: str - model_sha: Optional[str] = None - model_dtype: Optional[str] = None - model_size: Optional[str] = None +from lighteval.utils import NO_TGI_ERROR_MSG, is_tgi_available def load_model( # noqa: C901 @@ -65,7 +52,7 @@ def load_model( # noqa: C901 DummyModelConfig, ], env_config: EnvConfig, -) -> Tuple[Union[BaseModel, AdapterModel, DeltaModel, ModelClient, DummyModel], ModelInfo]: +) -> Union[BaseModel, AdapterModel, DeltaModel, ModelClient, DummyModel]: """Will load either a model from an inference server or a model from a checkpoint, depending on the config type. @@ -103,29 +90,13 @@ def load_model_with_tgi(config: TGIModelConfig): model = ModelClient( address=config.inference_server_address, auth_token=config.inference_server_auth, model_id=config.model_id ) - model_name = str(model.model_info["model_id"]) - model_sha = model.model_info["model_sha"] - model_precision = model.model_info["model_dtype"] - model_size = -1 - model_info = ModelInfo( - model_name=model_name, - model_sha=model_sha, - model_dtype=model_precision, - model_size=model_size, - ) - return model, model_info + return model def load_model_with_inference_endpoints(config: InferenceEndpointModelConfig, env_config: EnvConfig): hlog("Spin up model using inference endpoint.") model = InferenceEndpointModel(config=config, env_config=env_config) - model_info = ModelInfo( - model_name=model.name, - model_sha=model.revision, - model_dtype=config.model_dtype or "default", - model_size=-1, - ) - return model, model_info + return model def load_model_with_accelerate_or_default( @@ -138,24 +109,8 @@ def load_model_with_accelerate_or_default( else: model = BaseModel(config=config, env_config=env_config) - model_name = model.model_name - model_sha = model.model_sha - model_precision = str(model.precision) - if is_accelerate_available(): - model_size, _ = calculate_maximum_sizes(model.model) - model_size = convert_bytes(model_size) - else: - model_size = -1 - model_info = ModelInfo( - model_name=model_name, - model_sha=model_sha, - model_dtype=model_precision, - model_size=model_size, - ) - hlog(f"Model info: {model_info}") - - return model, model_info + return model def load_dummy_model(config: DummyModelConfig, env_config: EnvConfig): - return DummyModel(config=config, env_config=env_config), ModelInfo(model_name="dummy", model_sha=str(config.seed)) + return DummyModel(config=config, env_config=env_config) diff --git a/src/lighteval/models/nanotron_model.py b/src/lighteval/models/nanotron_model.py index 560b29b35..feb1ac7b3 100644 --- a/src/lighteval/models/nanotron_model.py +++ b/src/lighteval/models/nanotron_model.py @@ -31,7 +31,7 @@ from datasets.download.streaming_download_manager import xPath from nanotron import distributed as dist from nanotron import logging -from nanotron.config import LightEvalConfig, ModelArgs, ParallelismArgs, TokenizerArgs +from nanotron.config import LightEvalConfig, ModelArgs, TokenizerArgs from nanotron.generation.decode import decode_tokenized from nanotron.logging import human_format, log_rank from nanotron.models import build_model @@ -55,7 +55,7 @@ LoglikelihoodSingleTokenDataset, ) from lighteval.logging.hierarchical_logger import hlog_err, hlog_warn -from lighteval.models.base_model import LightevalModel +from lighteval.models.base_model import LightevalModel, ModelInfo from lighteval.models.model_config import EnvConfig from lighteval.models.model_output import Batch, GenerateReturn, LoglikelihoodReturn, LoglikelihoodSingleTokenReturn from lighteval.tasks.requests import ( @@ -63,7 +63,7 @@ LoglikelihoodRequest, LoglikelihoodRollingRequest, ) -from lighteval.utils import as_list +from lighteval.utils import as_list, is_nanotron_available from lighteval.utils_parallelism import find_executable_batch_size @@ -73,7 +73,8 @@ TokenSequence = Union[List[int], torch.LongTensor, torch.Tensor, BatchEncoding] -# _DeviceMapping = NewType("DeviceMapping", Mapping[str, Union[int, str, torch.device]]) +if is_nanotron_available(): + import nanotron class NanotronLightevalModel(LightevalModel): @@ -84,12 +85,8 @@ class NanotronLightevalModel(LightevalModel): def __init__( self, checkpoint_path: str, - model_args: ModelArgs, - tokenizer: TokenizerArgs, + nanotron_config: nanotron.config.Config, parallel_context: ParallelContext, - parallel_config: ParallelismArgs, - lighteval_config: LightEvalConfig, - batch_size: Optional[int] = -1, max_gen_toks: Optional[int] = 256, max_length: Optional[int] = None, add_special_tokens: Optional[bool] = True, @@ -102,8 +99,12 @@ def __init__( """Initializes a nanotron model for evaluation. Args: """ + model_args: ModelArgs = nanotron_config.model + tokenizer: TokenizerArgs = nanotron_config.tokenizer + lighteval_config: LightEvalConfig = nanotron_config.lighteval + parallel_config: ParallelContext = nanotron_config.lighteval.parallelism - self._batch_size = batch_size + self._batch_size = lighteval_config.batch_size self._max_gen_toks = max_gen_toks self._max_length = max_length self.parallel_config = parallel_config @@ -221,6 +222,8 @@ def __init__( self.multichoice_continuations_start_space = multichoice_continuations_start_space + self.model_info = ModelInfo(model_name=f"{nanotron_config.general.run}/{nanotron_config.general.step}") + @property def tokenizer(self): return self._tokenizer diff --git a/src/lighteval/models/tgi_model.py b/src/lighteval/models/tgi_model.py index 754152587..960a17cf3 100644 --- a/src/lighteval/models/tgi_model.py +++ b/src/lighteval/models/tgi_model.py @@ -27,7 +27,7 @@ from huggingface_hub import TextGenerationOutput from transformers import AutoTokenizer -from lighteval.models.endpoint_model import InferenceEndpointModel +from lighteval.models.endpoint_model import InferenceEndpointModel, ModelInfo from lighteval.utils import NO_TGI_ERROR_MSG, is_tgi_available @@ -65,6 +65,16 @@ def __init__(self, address, auth_token=None, model_id=None) -> None: self._add_special_tokens = True self.use_async = True + model_name = str(self.model_info["model_id"]) + model_sha = self.model_info["model_sha"] + model_precision = self.model_info["model_dtype"] + self.model_info = ModelInfo( + model_name=model_name, + model_sha=model_sha, + model_dtype=model_precision, + model_size=-1, + ) + def _async_process_request( self, context: str, stop_tokens: list[str], max_tokens: int ) -> Coroutine[None, list[TextGenerationOutput], str]: diff --git a/src/lighteval/pipeline.py b/src/lighteval/pipeline.py new file mode 100644 index 000000000..4f7d0388c --- /dev/null +++ b/src/lighteval/pipeline.py @@ -0,0 +1,267 @@ +# MIT License + +# Copyright (c) 2024 The HuggingFace Team + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import os +import random +import shutil +from contextlib import nullcontext +from dataclasses import dataclass +from datetime import timedelta +from enum import Enum, auto + +import numpy as np + +from lighteval.evaluator import evaluate, make_results_table +from lighteval.logging.evaluation_tracker import EvaluationTracker +from lighteval.logging.hierarchical_logger import hlog, htrack_block +from lighteval.models.model_loader import load_model +from lighteval.models.nanotron_model import NanotronLightevalModel +from lighteval.tasks.lighteval_task import LightevalTask, create_requests_from_tasks +from lighteval.tasks.registry import Registry, get_custom_tasks, taskinfo_selector +from lighteval.utils import ( + NO_ACCELERATE_ERROR_MSG, + NO_NANOTRON_ERROR_MSG, + NO_TGI_ERROR_MSG, + is_accelerate_available, + is_nanotron_available, + is_tgi_available, +) +from lighteval.utils_parallelism import test_all_gather + + +if is_accelerate_available(): + from accelerate import Accelerator, InitProcessGroupKwargs +if is_nanotron_available(): + from nanotron import distributed as dist + from nanotron.parallel.context import ParallelContext + from nanotron.utils import local_ranks_zero_first + + +@dataclass +class EnvConfig: + """ + Configuration class for environment settings. + + Attributes: + cache_dir (str): directory for caching data. + token (str): authentication token used for accessing the HuggingFace Hub. + """ + + cache_dir: str = os.getenv("HF_HOME", "/scratch") + token: str = os.getenv("HF_TOKEN") + + +class ParallelismManager(Enum): + ACCELERATE = auto() + NANOTRON = auto() + TGI = auto() + NONE = auto() + + +@dataclass +class PipelineParameters: + launcher_type: ParallelismManager + # Env parameters + env_config: EnvConfig = EnvConfig() + job_id: int = 0 + dataset_loading_processes: int = 1 + nanotron_checkpoint_path: str = None # only for nanotron models + # Dataset + custom_tasks_directory: str = None + # Generation parameters + override_batch_size: int = None + num_fewshot_seeds: int = 1 + max_samples: int = None + use_chat_template: bool = False + system_prompt: str = None + + def __post_init__(self): + if self.launcher_type == ParallelismManager.ACCELERATE: + if not is_accelerate_available(): + raise ImportError(NO_ACCELERATE_ERROR_MSG) + elif self.launcher_type == ParallelismManager.TGI: + if not is_tgi_available(): + raise ImportError(NO_TGI_ERROR_MSG) + elif self.launcher_type == ParallelismManager.NANOTRON: + if not is_nanotron_available(): + raise ImportError(NO_NANOTRON_ERROR_MSG) + + +class Pipeline: + def __init__( + self, + tasks: str, + pipeline_parameters: PipelineParameters, + evaluation_tracker: EvaluationTracker, + model=None, + model_config=None, + ): + if not (model or model_config): + raise ValueError("Must provide either a model or model config when creating a pipeline.") + + self.pipeline_parameters = pipeline_parameters + self.launcher_type = self.pipeline_parameters.launcher_type + if self.pipeline_parameters.max_samples: + hlog( + "WARNING: --max_samples WAS SET. THESE NUMBERS ARE ONLY PARTIAL AND SHOULD NOT BE USED FOR COMPARISON UNLESS YOU KNOW WHAT YOU ARE DOING." + ) + + self.accelerator, self.parallel_context = self._init_parallelism_manager() + + self.evaluation_tracker = evaluation_tracker + self.model_config = model_config + self.model = self._init_model(model_config, model, pipeline_parameters) + + self.evaluation_tracker.general_config_logger.log_model_info(model.model_info) + self._init_tasks_and_requests(tasks=tasks) + self._init_random_seeds() + + def _init_parallelism_manager(self): + accelerator, parallel_context = None, None + with htrack_block("Test all gather"): + if self.launcher_type == ParallelismManager.ACCELERATE: + accelerator = Accelerator(kwargs_handlers=[InitProcessGroupKwargs(timeout=timedelta(seconds=3000))]) + test_all_gather(accelerator=accelerator) + elif self.launcher_type == ParallelismManager.NANOTRON: + dist.initialize_torch_distributed() + parallel_context = ParallelContext( + tensor_parallel_size=self.model_config.parallelism.tp, + pipeline_parallel_size=self.model_config.parallelism.pp, + data_parallel_size=self.model_config.parallelism.dp, + ) + test_all_gather(parallel_context=parallel_context) + + return accelerator, parallel_context + + def _init_model(self, model_config, model): + with htrack_block("Model loading"): + if model_config is not None: + if self.parallel_context: + return NanotronLightevalModel( + checkpoint_path=os.path.dirname(self.pipeline_parameters.nanotron_checkpoint_path), + nanotron_config=self.model_config, + parallel_context=self.accelerator, + debug_one_layer_model=False, + model_class=None, + env_config=self.pipeline_parameters.env_config, + ) + else: + with self.accelerator.main_process_first() if self.accelerator is not None else nullcontext(): + return load_model(config=model_config, env_config=self.pipeline_parameters.env_config) + return model + + def _init_tasks_and_requests(self, tasks): + with htrack_block("Tasks loading"): + with local_ranks_zero_first() if self.launcher_type == ParallelismManager.NANOTRON else nullcontext(): + # If some tasks are provided as task groups, we load them separately + custom_tasks = self.pipeline_parameters.custom_tasks_directory + if custom_tasks: + _, tasks_groups_dict = get_custom_tasks(custom_tasks) + if tasks_groups_dict and tasks in tasks_groups_dict: + tasks = tasks_groups_dict[tasks] + + # Loading all tasks + task_names_list, fewshots_dict = taskinfo_selector(tasks) + task_dict = Registry(cache_dir=self.pipeline_parameters.env_config.cache_dir).get_task_dict( + task_names_list, custom_tasks=custom_tasks + ) + LightevalTask.load_datasets(task_dict.values(), self.pipeline_parameters.dataset_loading_processes) + + self.evaluation_tracker.task_config_logger.log(task_dict) + + hlog("Loading documents, and requests") + requests, docs = create_requests_from_tasks( + task_dict=task_dict, + fewshot_dict=fewshots_dict, + num_fewshot_seeds=self.pipeline_parameters.num_fewshot_seeds, + lm=self.model, + max_samples=self.pipeline_parameters.max_samples, + evaluation_tracker=self.evaluation_tracker, + use_chat_template=self.pipeline_parameters.use_chat_template, + system_prompt=self.pipeline_parameters.system_prompt, + ) + + self.task_names_list = task_names_list + self.task_dict = task_dict + self.fewshot_dict = fewshots_dict + self.requests = requests + self.docs = docs + + def _init_random_seeds(self): + with htrack_block("Setting seeds and waiting for all processes"): + hlog(f"setting seed to {1234} for random and numpy") + random.seed(1234) + np.random.seed(1234) + if self.accelerator is not None: + self.accelerator.wait_for_everyone() + if self.parallel_process is not None: + dist.barrier() + + def evaluate(self): + with htrack_block("Evaluation"): + self.evaluation_tracker.general_config_logger.log_args_info( + num_fewshot_seeds=self.pipeline_parameters.num_fewshot_seeds, + override_batch_size=self.pipeline_parameters.override_batch_size, + max_samples=self.pipeline_parameters.max_samples, + job_id=self.pipeline_parameters.job_id, + config=self.model_config, + ) + + hlog(f"Evaluate on {len(self.task_names_list)} tasks.") + self.evaluation_tracker = evaluate( + lm=self.model, + requests_dict=self.requests, + docs=self.docs, + task_dict=self.task_dict, + override_bs=self.pipeline_parameters.override_batch_size, + evaluation_tracker=self.evaluation_tracker, + ) + + if self.accelerator: + context = self.accelerator.is_main_process + elif self.parallel_context: + context = dist.get_rank(self.parallel_context.world_pg) == 0 + else: + context = nullcontext() + + if context: + with htrack_block("Compiling results"): + self.evaluation_tracker.general_config_logger.log_end_time() + self.evaluation_tracker.metrics_logger.aggregate(task_dict=self.task_dict, bootstrap_iters=1000) + self.evaluation_tracker.details_logger.aggregate() + + with htrack_block("Cleaninp up"): # For non nanotron models + for weights in ["delta", "adapter"]: + try: + tmp_weights_dir = f"{self.evaluation_tracker.general_config_logger.model_name}-{weights}-applied" + shutil.rmtree(tmp_weights_dir) + hlog(f"Removed {tmp_weights_dir}") + except OSError: + pass + self.model.cleanup() + + def save_and_push_results(self): + self.evaluation_tracker.save() + + def show_results(self): + final_dict = self.evaluation_tracker.generate_final_dict() + print(make_results_table(final_dict)) From e0bd17beeb19811402828c5101acd248b17a9512 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine?= Date: Tue, 23 Jul 2024 18:04:10 +0200 Subject: [PATCH 2/8] fix circular import --- src/lighteval/main_nanotron.py | 3 +-- src/lighteval/models/abstract_model.py | 2 +- src/lighteval/models/adapter_model.py | 4 ++-- src/lighteval/models/base_model.py | 4 ++-- src/lighteval/models/delta_model.py | 3 ++- src/lighteval/models/dummy_model.py | 3 ++- src/lighteval/models/endpoint_model.py | 3 +-- src/lighteval/models/nanotron_model.py | 3 +-- src/lighteval/pipeline.py | 15 +-------------- src/lighteval/utils.py | 17 ++++++++++++++++- 10 files changed, 29 insertions(+), 28 deletions(-) diff --git a/src/lighteval/main_nanotron.py b/src/lighteval/main_nanotron.py index 53349325a..eb04c7753 100644 --- a/src/lighteval/main_nanotron.py +++ b/src/lighteval/main_nanotron.py @@ -26,9 +26,8 @@ from lighteval.logging.evaluation_tracker import EvaluationTracker from lighteval.logging.hierarchical_logger import htrack, htrack_block -from lighteval.models.model_config import EnvConfig from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters -from lighteval.utils import NO_NANOTRON_ERROR_MSG, is_nanotron_available +from lighteval.utils import NO_NANOTRON_ERROR_MSG, EnvConfig, is_nanotron_available if not is_nanotron_available(): diff --git a/src/lighteval/models/abstract_model.py b/src/lighteval/models/abstract_model.py index b7c8a8e47..d2f52f3e9 100644 --- a/src/lighteval/models/abstract_model.py +++ b/src/lighteval/models/abstract_model.py @@ -27,7 +27,6 @@ import torch from transformers import BatchEncoding -from lighteval.models.model_config import EnvConfig from lighteval.models.model_output import ( GenerateMultiTurnReturn, GenerateReturn, @@ -41,6 +40,7 @@ LoglikelihoodRollingRequest, LoglikelihoodSingleTokenRequest, ) +from lighteval.utils import EnvConfig TokenSequence = Union[list[int], torch.LongTensor, torch.Tensor, BatchEncoding] diff --git a/src/lighteval/models/adapter_model.py b/src/lighteval/models/adapter_model.py index 54a64178b..74f8e5643 100644 --- a/src/lighteval/models/adapter_model.py +++ b/src/lighteval/models/adapter_model.py @@ -27,9 +27,9 @@ from lighteval.logging.hierarchical_logger import hlog from lighteval.models.base_model import BaseModel -from lighteval.models.model_config import AdapterModelConfig, EnvConfig +from lighteval.models.model_config import AdapterModelConfig from lighteval.models.utils import _get_dtype -from lighteval.utils import is_peft_available +from lighteval.utils import EnvConfig, is_peft_available if is_peft_available(): diff --git a/src/lighteval/models/base_model.py b/src/lighteval/models/base_model.py index 0ef8c538c..be2073d4a 100644 --- a/src/lighteval/models/base_model.py +++ b/src/lighteval/models/base_model.py @@ -34,7 +34,7 @@ from lighteval.data import GenerativeTaskDataset, LoglikelihoodDataset, LoglikelihoodSingleTokenDataset from lighteval.logging.hierarchical_logger import hlog, hlog_err, hlog_warn from lighteval.models.abstract_model import LightevalModel, ModelInfo -from lighteval.models.model_config import BaseModelConfig, EnvConfig +from lighteval.models.model_config import BaseModelConfig from lighteval.models.model_output import ( Batch, GenerateMultiTurnReturn, @@ -51,7 +51,7 @@ LoglikelihoodSingleTokenRequest, Request, ) -from lighteval.utils import as_list, is_accelerate_available +from lighteval.utils import EnvConfig, as_list, is_accelerate_available from lighteval.utils_parallelism import find_executable_batch_size diff --git a/src/lighteval/models/delta_model.py b/src/lighteval/models/delta_model.py index fafc3e28a..7afe06d75 100644 --- a/src/lighteval/models/delta_model.py +++ b/src/lighteval/models/delta_model.py @@ -28,8 +28,9 @@ from lighteval.logging.hierarchical_logger import hlog from lighteval.models.base_model import BaseModel -from lighteval.models.model_config import DeltaModelConfig, EnvConfig +from lighteval.models.model_config import DeltaModelConfig from lighteval.models.utils import _get_dtype +from lighteval.utils import EnvConfig class DeltaModel(BaseModel): diff --git a/src/lighteval/models/dummy_model.py b/src/lighteval/models/dummy_model.py index ed93b403d..54990a62f 100644 --- a/src/lighteval/models/dummy_model.py +++ b/src/lighteval/models/dummy_model.py @@ -28,7 +28,7 @@ from transformers import AutoTokenizer from lighteval.models.abstract_model import LightevalModel, ModelInfo -from lighteval.models.model_config import DummyModelConfig, EnvConfig +from lighteval.models.model_config import DummyModelConfig from lighteval.models.model_output import GenerateReturn, LoglikelihoodReturn, LoglikelihoodSingleTokenReturn from lighteval.tasks.requests import ( GreedyUntilRequest, @@ -36,6 +36,7 @@ LoglikelihoodRollingRequest, LoglikelihoodSingleTokenRequest, ) +from lighteval.utils import EnvConfig class DummyModel(LightevalModel): diff --git a/src/lighteval/models/endpoint_model.py b/src/lighteval/models/endpoint_model.py index 1e95e2685..f5384e9da 100644 --- a/src/lighteval/models/endpoint_model.py +++ b/src/lighteval/models/endpoint_model.py @@ -40,7 +40,6 @@ from lighteval.data import GenerativeTaskDataset, LoglikelihoodDataset from lighteval.logging.hierarchical_logger import hlog, hlog_err, hlog_warn from lighteval.models.abstract_model import LightevalModel, ModelInfo -from lighteval.models.model_config import EnvConfig, InferenceEndpointModelConfig, InferenceModelConfig from lighteval.models.model_output import GenerateReturn, LoglikelihoodReturn, LoglikelihoodSingleTokenReturn from lighteval.tasks.requests import ( GreedyUntilRequest, @@ -48,7 +47,7 @@ LoglikelihoodRollingRequest, LoglikelihoodSingleTokenRequest, ) -from lighteval.utils import as_list +from lighteval.utils import EnvConfig, InferenceEndpointModelConfig, InferenceModelConfig, as_list BATCH_SIZE = 50 diff --git a/src/lighteval/models/nanotron_model.py b/src/lighteval/models/nanotron_model.py index feb1ac7b3..3ea81e0b9 100644 --- a/src/lighteval/models/nanotron_model.py +++ b/src/lighteval/models/nanotron_model.py @@ -56,14 +56,13 @@ ) from lighteval.logging.hierarchical_logger import hlog_err, hlog_warn from lighteval.models.base_model import LightevalModel, ModelInfo -from lighteval.models.model_config import EnvConfig from lighteval.models.model_output import Batch, GenerateReturn, LoglikelihoodReturn, LoglikelihoodSingleTokenReturn from lighteval.tasks.requests import ( GreedyUntilRequest, LoglikelihoodRequest, LoglikelihoodRollingRequest, ) -from lighteval.utils import as_list, is_nanotron_available +from lighteval.utils import EnvConfig, as_list, is_nanotron_available from lighteval.utils_parallelism import find_executable_batch_size diff --git a/src/lighteval/pipeline.py b/src/lighteval/pipeline.py index 4f7d0388c..2cae65747 100644 --- a/src/lighteval/pipeline.py +++ b/src/lighteval/pipeline.py @@ -41,6 +41,7 @@ NO_ACCELERATE_ERROR_MSG, NO_NANOTRON_ERROR_MSG, NO_TGI_ERROR_MSG, + EnvConfig, is_accelerate_available, is_nanotron_available, is_tgi_available, @@ -56,20 +57,6 @@ from nanotron.utils import local_ranks_zero_first -@dataclass -class EnvConfig: - """ - Configuration class for environment settings. - - Attributes: - cache_dir (str): directory for caching data. - token (str): authentication token used for accessing the HuggingFace Hub. - """ - - cache_dir: str = os.getenv("HF_HOME", "/scratch") - token: str = os.getenv("HF_TOKEN") - - class ParallelismManager(Enum): ACCELERATE = auto() NANOTRON = auto() diff --git a/src/lighteval/utils.py b/src/lighteval/utils.py index 3e032d1f4..f3fc279ac 100644 --- a/src/lighteval/utils.py +++ b/src/lighteval/utils.py @@ -12,7 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. import importlib -from dataclasses import asdict, is_dataclass +import os +from dataclasses import asdict, dataclass, is_dataclass from typing import Any, Union import numpy as np @@ -145,6 +146,20 @@ def flatten(item: list[Union[list, str]]) -> list[str]: return flat_item +@dataclass +class EnvConfig: + """ + Configuration class for environment settings. + + Attributes: + cache_dir (str): directory for caching data. + token (str): authentication token used for accessing the HuggingFace Hub. + """ + + cache_dir: str = os.getenv("HF_HOME", "/scratch") + token: str = os.getenv("HF_TOKEN") + + def is_accelerate_available() -> bool: return importlib.util.find_spec("accelerate") is not None From 60cfef18af9496900991cdf1fd6383a2dd8532db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine?= Date: Tue, 23 Jul 2024 18:13:16 +0200 Subject: [PATCH 3/8] forgot one import --- src/lighteval/models/endpoint_model.py | 3 ++- src/lighteval/models/model_config.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/lighteval/models/endpoint_model.py b/src/lighteval/models/endpoint_model.py index f5384e9da..05a6e87c8 100644 --- a/src/lighteval/models/endpoint_model.py +++ b/src/lighteval/models/endpoint_model.py @@ -40,6 +40,7 @@ from lighteval.data import GenerativeTaskDataset, LoglikelihoodDataset from lighteval.logging.hierarchical_logger import hlog, hlog_err, hlog_warn from lighteval.models.abstract_model import LightevalModel, ModelInfo +from lighteval.models.model_config import InferenceEndpointModelConfig, InferenceModelConfig from lighteval.models.model_output import GenerateReturn, LoglikelihoodReturn, LoglikelihoodSingleTokenReturn from lighteval.tasks.requests import ( GreedyUntilRequest, @@ -47,7 +48,7 @@ LoglikelihoodRollingRequest, LoglikelihoodSingleTokenRequest, ) -from lighteval.utils import EnvConfig, InferenceEndpointModelConfig, InferenceModelConfig, as_list +from lighteval.utils import EnvConfig, as_list BATCH_SIZE = 50 diff --git a/src/lighteval/models/model_config.py b/src/lighteval/models/model_config.py index 6244f1a7f..d21de42ab 100644 --- a/src/lighteval/models/model_config.py +++ b/src/lighteval/models/model_config.py @@ -30,11 +30,11 @@ from lighteval.logging.hierarchical_logger import hlog from lighteval.models.utils import _get_model_sha -from lighteval.pipeline import EnvConfig from lighteval.utils import ( NO_AUTOGPTQ_ERROR_MSG, NO_BNB_ERROR_MSG, NO_PEFT_ERROR_MSG, + EnvConfig, is_accelerate_available, is_autogptq_available, is_bnb_available, From 796bc4df87a048233a30eb8c4479f7fccec0776c Mon Sep 17 00:00:00 2001 From: "clementine@huggingface.co" Date: Tue, 30 Jul 2024 10:11:16 +0000 Subject: [PATCH 4/8] mini fixes --- src/lighteval/logging/info_loggers.py | 2 +- src/lighteval/main_accelerate.py | 2 +- src/lighteval/models/base_model.py | 2 +- src/lighteval/models/nanotron_model.py | 32 +++++++++++++------------- src/lighteval/pipeline.py | 14 +++++++---- 5 files changed, 29 insertions(+), 23 deletions(-) diff --git a/src/lighteval/logging/info_loggers.py b/src/lighteval/logging/info_loggers.py index c211d2e4f..27ec2cf9b 100644 --- a/src/lighteval/logging/info_loggers.py +++ b/src/lighteval/logging/info_loggers.py @@ -33,7 +33,7 @@ from lighteval.logging.hierarchical_logger import hlog_warn from lighteval.metrics import MetricCategory from lighteval.metrics.stderr import get_stderr_function -from lighteval.models.model_loader import ModelInfo +from lighteval.models.abstract_model import ModelInfo from lighteval.models.model_output import ModelReturn from lighteval.tasks.lighteval_task import LightevalTask, LightevalTaskConfig from lighteval.tasks.requests import Doc diff --git a/src/lighteval/main_accelerate.py b/src/lighteval/main_accelerate.py index 025fe7c80..ebcd886f1 100644 --- a/src/lighteval/main_accelerate.py +++ b/src/lighteval/main_accelerate.py @@ -58,7 +58,7 @@ def main(args): ) pipeline_params = PipelineParameters( launcher_type=ParallelismManager.ACCELERATE, - envconfig=env_config, + env_config=env_config, job_id=args.job_id, dataset_loading_processes=args.dataset_loading_processes, custom_tasks_directory=args.custom_tasks, diff --git a/src/lighteval/models/base_model.py b/src/lighteval/models/base_model.py index be2073d4a..484f450d9 100644 --- a/src/lighteval/models/base_model.py +++ b/src/lighteval/models/base_model.py @@ -105,7 +105,7 @@ def __init__( self.model_info = ModelInfo( model_name=self.model_name, model_sha=self.model_sha, - model_dtype=self.model_precision, + model_dtype=self.precision, model_size=model_size, ) diff --git a/src/lighteval/models/nanotron_model.py b/src/lighteval/models/nanotron_model.py index 3ea81e0b9..da315bc9a 100644 --- a/src/lighteval/models/nanotron_model.py +++ b/src/lighteval/models/nanotron_model.py @@ -29,20 +29,6 @@ import torch.nn.functional as F import transformers from datasets.download.streaming_download_manager import xPath -from nanotron import distributed as dist -from nanotron import logging -from nanotron.config import LightEvalConfig, ModelArgs, TokenizerArgs -from nanotron.generation.decode import decode_tokenized -from nanotron.logging import human_format, log_rank -from nanotron.models import build_model -from nanotron.parallel.context import ParallelContext -from nanotron.parallel.parameters import sanity_check -from nanotron.parallel.pipeline_parallel.block import get_min_max_rank -from nanotron.parallel.pipeline_parallel.tensor_pointer import TensorPointer -from nanotron.parallel.tensor_parallel.enum import TensorParallelLinearMode -from nanotron.random import RandomStates, get_current_random_state, get_synced_random_state, set_random_seed -from nanotron.serialize import load_weights -from nanotron.trainer import CONFIG_TO_MODEL_CLASS, mark_tied_parameters from torch.utils.data import DataLoader from torch.utils.data.distributed import DistributedSampler from tqdm import tqdm @@ -68,12 +54,26 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false" -logger = logging.get_logger(__name__) - TokenSequence = Union[List[int], torch.LongTensor, torch.Tensor, BatchEncoding] if is_nanotron_available(): import nanotron + from nanotron import distributed as dist + from nanotron import logging + from nanotron.config import LightEvalConfig, ModelArgs, TokenizerArgs + from nanotron.generation.decode import decode_tokenized + from nanotron.logging import human_format, log_rank + from nanotron.models import build_model + from nanotron.parallel.context import ParallelContext + from nanotron.parallel.parameters import sanity_check + from nanotron.parallel.pipeline_parallel.block import get_min_max_rank + from nanotron.parallel.pipeline_parallel.tensor_pointer import TensorPointer + from nanotron.parallel.tensor_parallel.enum import TensorParallelLinearMode + from nanotron.random import RandomStates, get_current_random_state, get_synced_random_state, set_random_seed + from nanotron.serialize import load_weights + from nanotron.trainer import CONFIG_TO_MODEL_CLASS, mark_tied_parameters + +logger = logging.get_logger(__name__) class NanotronLightevalModel(LightevalModel): diff --git a/src/lighteval/pipeline.py b/src/lighteval/pipeline.py index 2cae65747..553aaac77 100644 --- a/src/lighteval/pipeline.py +++ b/src/lighteval/pipeline.py @@ -34,7 +34,6 @@ from lighteval.logging.evaluation_tracker import EvaluationTracker from lighteval.logging.hierarchical_logger import hlog, htrack_block from lighteval.models.model_loader import load_model -from lighteval.models.nanotron_model import NanotronLightevalModel from lighteval.tasks.lighteval_task import LightevalTask, create_requests_from_tasks from lighteval.tasks.registry import Registry, get_custom_tasks, taskinfo_selector from lighteval.utils import ( @@ -56,6 +55,8 @@ from nanotron.parallel.context import ParallelContext from nanotron.utils import local_ranks_zero_first + from lighteval.models.nanotron_model import NanotronLightevalModel + class ParallelismManager(Enum): ACCELERATE = auto() @@ -116,9 +117,9 @@ def __init__( self.evaluation_tracker = evaluation_tracker self.model_config = model_config - self.model = self._init_model(model_config, model, pipeline_parameters) + self.model = self._init_model(model_config, model) - self.evaluation_tracker.general_config_logger.log_model_info(model.model_info) + self.evaluation_tracker.general_config_logger.log_model_info(self.model.model_info) self._init_tasks_and_requests(tasks=tasks) self._init_random_seeds() @@ -126,9 +127,13 @@ def _init_parallelism_manager(self): accelerator, parallel_context = None, None with htrack_block("Test all gather"): if self.launcher_type == ParallelismManager.ACCELERATE: + if not is_accelerate_available(): + raise ValueError("You are trying to launch an accelerate model, but accelerate is not installed") accelerator = Accelerator(kwargs_handlers=[InitProcessGroupKwargs(timeout=timedelta(seconds=3000))]) test_all_gather(accelerator=accelerator) elif self.launcher_type == ParallelismManager.NANOTRON: + if not is_nanotron_available(): + raise ValueError("You are trying to launch a nanotron model, but nanotron is not installed") dist.initialize_torch_distributed() parallel_context = ParallelContext( tensor_parallel_size=self.model_config.parallelism.tp, @@ -161,6 +166,7 @@ def _init_tasks_and_requests(self, tasks): with local_ranks_zero_first() if self.launcher_type == ParallelismManager.NANOTRON else nullcontext(): # If some tasks are provided as task groups, we load them separately custom_tasks = self.pipeline_parameters.custom_tasks_directory + tasks_groups_dict = None if custom_tasks: _, tasks_groups_dict = get_custom_tasks(custom_tasks) if tasks_groups_dict and tasks in tasks_groups_dict: @@ -200,7 +206,7 @@ def _init_random_seeds(self): np.random.seed(1234) if self.accelerator is not None: self.accelerator.wait_for_everyone() - if self.parallel_process is not None: + if self.parallel_context is not None: dist.barrier() def evaluate(self): From 9e5fb9036c4cb3a6d080a61b0ca1f2db1df86a17 Mon Sep 17 00:00:00 2001 From: "clementine@huggingface.co" Date: Tue, 30 Jul 2024 10:37:35 +0000 Subject: [PATCH 5/8] fix for config logging --- src/lighteval/logging/evaluation_tracker.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/lighteval/logging/evaluation_tracker.py b/src/lighteval/logging/evaluation_tracker.py index 370758d05..4ce69d305 100644 --- a/src/lighteval/logging/evaluation_tracker.py +++ b/src/lighteval/logging/evaluation_tracker.py @@ -164,6 +164,8 @@ def save(self) -> None: config_general = copy.deepcopy(self.general_config_logger) config_general = asdict(config_general) + # We remove the config from logging, which contains context/accelerator objects + config_general.pop("config") to_dump = { "config_general": config_general, From 2f20a17e0b9831e090022f9844b13ce05bd460b8 Mon Sep 17 00:00:00 2001 From: "clementine@huggingface.co" Date: Tue, 30 Jul 2024 14:35:56 +0000 Subject: [PATCH 6/8] fix tests --- src/lighteval/main_accelerate.py | 4 +++- src/lighteval/pipeline.py | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/lighteval/main_accelerate.py b/src/lighteval/main_accelerate.py index ebcd886f1..7eb684e60 100644 --- a/src/lighteval/main_accelerate.py +++ b/src/lighteval/main_accelerate.py @@ -80,6 +80,8 @@ def main(args): pipeline.evaluate() - pipeline.show_results() + results = pipeline.show_results() pipeline.save_and_push_results() + + return results diff --git a/src/lighteval/pipeline.py b/src/lighteval/pipeline.py index 553aaac77..08bd4683e 100644 --- a/src/lighteval/pipeline.py +++ b/src/lighteval/pipeline.py @@ -258,3 +258,4 @@ def save_and_push_results(self): def show_results(self): final_dict = self.evaluation_tracker.generate_final_dict() print(make_results_table(final_dict)) + return final_dict From 5f1939dcac607a8f5f0c9011e5b60fadf22c8b18 Mon Sep 17 00:00:00 2001 From: "clementine@huggingface.co" Date: Mon, 12 Aug 2024 13:03:41 +0000 Subject: [PATCH 7/8] style --- src/lighteval/models/nanotron_model.py | 2 +- src/lighteval/utils.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lighteval/models/nanotron_model.py b/src/lighteval/models/nanotron_model.py index 64feec2ef..c4368907a 100644 --- a/src/lighteval/models/nanotron_model.py +++ b/src/lighteval/models/nanotron_model.py @@ -48,7 +48,7 @@ LoglikelihoodRequest, LoglikelihoodRollingRequest, ) -from lighteval.utils import EnvConfig, as_list, is_nanotron_available, boolstring_to_bool +from lighteval.utils import EnvConfig, as_list, boolstring_to_bool, is_nanotron_available from lighteval.utils_parallelism import find_executable_batch_size diff --git a/src/lighteval/utils.py b/src/lighteval/utils.py index 8ba0a1dfb..8013804e8 100644 --- a/src/lighteval/utils.py +++ b/src/lighteval/utils.py @@ -159,6 +159,7 @@ class EnvConfig: cache_dir: str = os.getenv("HF_HOME", "/scratch") token: str = os.getenv("HF_TOKEN") + def boolstring_to_bool(x: Union[str, bool, int]) -> Union[bool, None]: """Allows to manage string or bool to bool conversion, in case a configuration input is badly formatted. From 62a3d738daa8c4012ece624f728a39c0b53296a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine?= Date: Wed, 14 Aug 2024 17:45:49 +0200 Subject: [PATCH 8/8] code review --- src/lighteval/pipeline.py | 57 +++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/src/lighteval/pipeline.py b/src/lighteval/pipeline.py index 08bd4683e..191fa3dac 100644 --- a/src/lighteval/pipeline.py +++ b/src/lighteval/pipeline.py @@ -81,6 +81,8 @@ class PipelineParameters: max_samples: int = None use_chat_template: bool = False system_prompt: str = None + # Final results + final_dict: dict = None def __post_init__(self): if self.launcher_type == ParallelismManager.ACCELERATE: @@ -157,8 +159,7 @@ def _init_model(self, model_config, model): env_config=self.pipeline_parameters.env_config, ) else: - with self.accelerator.main_process_first() if self.accelerator is not None else nullcontext(): - return load_model(config=model_config, env_config=self.pipeline_parameters.env_config) + return load_model(config=model_config, env_config=self.pipeline_parameters.env_config) return model def _init_tasks_and_requests(self, tasks): @@ -209,6 +210,15 @@ def _init_random_seeds(self): if self.parallel_context is not None: dist.barrier() + def is_main_process(self): + if self.accelerator: + context = self.accelerator.is_main_process + elif self.parallel_context: + context = dist.get_rank(self.parallel_context.world_pg) == 0 + else: + context = nullcontext() + return context + def evaluate(self): with htrack_block("Evaluation"): self.evaluation_tracker.general_config_logger.log_args_info( @@ -229,33 +239,34 @@ def evaluate(self): evaluation_tracker=self.evaluation_tracker, ) - if self.accelerator: - context = self.accelerator.is_main_process - elif self.parallel_context: - context = dist.get_rank(self.parallel_context.world_pg) == 0 - else: - context = nullcontext() - - if context: + with self.get_context(): with htrack_block("Compiling results"): self.evaluation_tracker.general_config_logger.log_end_time() self.evaluation_tracker.metrics_logger.aggregate(task_dict=self.task_dict, bootstrap_iters=1000) self.evaluation_tracker.details_logger.aggregate() - with htrack_block("Cleaninp up"): # For non nanotron models - for weights in ["delta", "adapter"]: - try: - tmp_weights_dir = f"{self.evaluation_tracker.general_config_logger.model_name}-{weights}-applied" - shutil.rmtree(tmp_weights_dir) - hlog(f"Removed {tmp_weights_dir}") - except OSError: - pass - self.model.cleanup() + with htrack_block("Cleaning up"): # For non nanotron models + for weights in ["delta", "adapter"]: + try: + tmp_weights_dir = ( + f"{self.evaluation_tracker.general_config_logger.model_name}-{weights}-applied" + ) + shutil.rmtree(tmp_weights_dir) + hlog(f"Removed {tmp_weights_dir}") + except OSError: + pass + self.model.cleanup() def save_and_push_results(self): - self.evaluation_tracker.save() + with self.get_context(): + self.evaluation_tracker.save() def show_results(self): - final_dict = self.evaluation_tracker.generate_final_dict() - print(make_results_table(final_dict)) - return final_dict + if self.final_dict is None: + self.final_dict = self.evaluation_tracker.generate_final_dict() + print(make_results_table(self.final_dict)) + + def get_results(self): + if self.final_dict is None: + self.final_dict = self.evaluation_tracker.generate_final_dict() + return self.final_dict