From 64a9f7ad6e580d1c726962a50bd2933076c7b1a8 Mon Sep 17 00:00:00 2001 From: Maxime N Date: Thu, 26 Sep 2024 14:31:16 +0200 Subject: [PATCH 1/9] #7: Add JSON data file validator --- scripts/LBDatafile_schema.py | 101 ++++++ scripts/json_datafile_validator.py | 488 +++++++++++++++++++++++++++++ src/vt-tv/utility/json_reader.cc | 22 ++ src/vt-tv/utility/json_reader.h | 6 + src/vt-tv/utility/parse_render.cc | 14 +- tests/test_bindings.py | 30 +- tests/unit/generator.h | 13 +- tests/unit/render/test_render.cc | 15 +- 8 files changed, 678 insertions(+), 11 deletions(-) create mode 100644 scripts/LBDatafile_schema.py create mode 100644 scripts/json_datafile_validator.py diff --git a/scripts/LBDatafile_schema.py b/scripts/LBDatafile_schema.py new file mode 100644 index 000000000..b352ccfb7 --- /dev/null +++ b/scripts/LBDatafile_schema.py @@ -0,0 +1,101 @@ +"""LB data JSON file schema""" +from schema import And, Optional, Schema + +def validate_ids(field): + """ + Ensure that 1) either seq_id or id is provided, + and 2) if an object is migratable, collection_id has been set. + """ + if 'seq_id' not in field and 'id' not in field: + raise ValueError('Either id (bit-encoded) or seq_id must be provided.') + + if field['migratable'] and 'seq_id' in field and 'collection_id' not in field: + raise ValueError('If an entity is migratable, it must have a collection_id') + + return field + +LBDatafile_schema = Schema( + { + Optional('type'): And(str, "LBDatafile", error="'LBDatafile' must be chosen."), + Optional('metadata'): { + Optional('type'): And(str, "LBDatafile", error="'LBDatafile' must be chosen."), + Optional('rank'): int, + Optional('shared_node'): { + 'id': int, + 'size': int, + 'rank': int, + 'num_nodes': int, + }, + Optional('phases'): { + Optional('count'): int, + 'skipped': { + 'list': [int], + 'range': [[int]], + }, + 'identical_to_previous': { + 'list': [int], + 'range': [[int]], + }, + }, + Optional('attributes'): dict + }, + 'phases': [ + { + 'id': int, + 'tasks': [ + { + 'entity': And({ + Optional('collection_id'): int, + 'home': int, + Optional('id'): int, + Optional('seq_id'): int, + Optional('index'): [int], + 'type': str, + 'migratable': bool, + Optional('objgroup_id'): int + }, validate_ids), + 'node': int, + 'resource': str, + Optional('subphases'): [ + { + 'id': int, + 'time': float, + } + ], + 'time': float, + Optional('user_defined'): dict, + Optional('attributes'): dict + }, + ], + Optional('communications'): [ + { + 'type': str, + 'to': And({ + 'type': str, + Optional('id'): int, + Optional('seq_id'): int, + Optional('home'): int, + Optional('collection_id'): int, + Optional('migratable'): bool, + Optional('index'): [int], + Optional('objgroup_id'): int, + }, validate_ids), + 'messages': int, + 'from': And({ + 'type': str, + Optional('id'): int, + Optional('seq_id'): int, + Optional('home'): int, + Optional('collection_id'): int, + Optional('migratable'): bool, + Optional('index'): [int], + Optional('objgroup_id'): int, + }, validate_ids), + 'bytes': float + } + ], + Optional('user_defined'): dict + }, + ] + } +) diff --git a/scripts/json_datafile_validator.py b/scripts/json_datafile_validator.py new file mode 100644 index 000000000..4a749c6d4 --- /dev/null +++ b/scripts/json_datafile_validator.py @@ -0,0 +1,488 @@ +""" JSON datafile validator +""" +import sys +import os +import re +import argparse +from collections import Counter +from collections.abc import Iterable +import json +import logging +import brotli +import LBDatafile_schema + +try: + project_path = f"{os.sep}".join(os.path.abspath(__file__).split(os.sep)[:-1]) + sys.path.append(project_path) +except Exception as e: + print(f"Can not add project path to system path! Exiting!\nERROR: {e}") + raise SystemExit(1) + + +from schema import And, Optional, Schema +# Import VT related schemas + +def exc_handler(exception_type, exception, traceback): + """ Exception handler for hiding traceback. + """ + module_name = f"[{os.path.splitext(os.path.split(__file__)[-1])[0]}]" + print(f"{module_name} {exception_type.__name__} {exception}") + + +class SchemaValidator: + """ Validates schema of VT Object Map files (json) + """ + def __init__(self, schema_type: str): + self.schema_type = schema_type + self.valid_schema = self._get_valid_schema() + + @staticmethod + def get_error_message(iterable_collection: Iterable) -> str: + """ Return error message. + """ + return " or ".join(iterable_collection) + + def _get_valid_schema(self) -> Schema: + """ Returns representation of a valid schema + """ + valid_schema_data = LBDatafile_schema.LBDatafile_schema + allowed_types_stats = "LBStatsfile" + valid_schema_stats = Schema( + { + Optional('type'): And( + str, + lambda a: a in allowed_types_stats, + error = f"{self.get_error_message(allowed_types_stats)} must be chosen" + ), + Optional('metadata'): { + Optional('type'): And( + str, + lambda a: a in allowed_types_stats, + error = f"{self.get_error_message(allowed_types_stats)} must be chosen" + ), + }, + 'phases': [ + { + "id": int, + Optional("migration count"): int, + Optional("post-LB"): { + "Object_comm": { + "avg": float, + "car": float, + "imb": float, + "kur": float, + "max": float, + "min": float, + "npr": float, + "skw": float, + "std": float, + "sum": float, + "var": float + }, + "Object_load_modeled": { + "avg": float, + "car": float, + "imb": float, + "kur": float, + "max": float, + "min": float, + "npr": float, + "skw": float, + "std": float, + "sum": float, + "var": float + }, + "Object_load_raw": { + "avg": float, + "car": float, + "imb": float, + "kur": float, + "max": float, + "min": float, + "npr": float, + "skw": float, + "std": float, + "sum": float, + "var": float + }, + Optional("Object_strategy_specific_load_modeled"): { + "avg": float, + "car": float, + "imb": float, + "kur": float, + "max": float, + "min": float, + "npr": float, + "skw": float, + "std": float, + "sum": float, + "var": float + }, + "Rank_comm": { + "avg": float, + "car": float, + "imb": float, + "kur": float, + "max": float, + "min": float, + "npr": float, + "skw": float, + "std": float, + "sum": float, + "var": float + }, + "Rank_load_modeled": { + "avg": float, + "car": float, + "imb": float, + "kur": float, + "max": float, + "min": float, + "npr": float, + "skw": float, + "std": float, + "sum": float, + "var": float + }, + "Rank_load_raw": { + "avg": float, + "car": float, + "imb": float, + "kur": float, + "max": float, + "min": float, + "npr": float, + "skw": float, + "std": float, + "sum": float, + "var": float + }, + Optional("Rank_strategy_specific_load_modeled"): { + "avg": float, + "car": float, + "imb": float, + "kur": float, + "max": float, + "min": float, + "npr": float, + "skw": float, + "std": float, + "sum": float, + "var": float + } + }, + "pre-LB": { + "Object_comm": { + "avg": float, + "car": float, + "imb": float, + "kur": float, + "max": float, + "min": float, + "npr": float, + "skw": float, + "std": float, + "sum": float, + "var": float + }, + "Object_load_modeled": { + "avg": float, + "car": float, + "imb": float, + "kur": float, + "max": float, + "min": float, + "npr": float, + "skw": float, + "std": float, + "sum": float, + "var": float + }, + "Object_load_raw": { + "avg": float, + "car": float, + "imb": float, + "kur": float, + "max": float, + "min": float, + "npr": float, + "skw": float, + "std": float, + "sum": float, + "var": float + }, + Optional("Object_strategy_specific_load_modeled"): { + "avg": float, + "car": float, + "imb": float, + "kur": float, + "max": float, + "min": float, + "npr": float, + "skw": float, + "std": float, + "sum": float, + "var": float + }, + "Rank_comm": { + "avg": float, + "car": float, + "imb": float, + "kur": float, + "max": float, + "min": float, + "npr": float, + "skw": float, + "std": float, + "sum": float, + "var": float + }, + "Rank_load_modeled": { + "avg": float, + "car": float, + "imb": float, + "kur": float, + "max": float, + "min": float, + "npr": float, + "skw": float, + "std": float, + "sum": float, + "var": float + }, + "Rank_load_raw": { + "avg": float, + "car": float, + "imb": float, + "kur": float, + "max": float, + "min": float, + "npr": float, + "skw": float, + "std": float, + "sum": float, + "var": float + }, + Optional("Rank_strategy_specific_load_modeled"): { + "avg": float, + "car": float, + "imb": float, + "kur": float, + "max": float, + "min": float, + "npr": float, + "skw": float, + "std": float, + "sum": float, + "var": float + } + } + }, + ] + } + ) + + if self.schema_type == "LBDatafile": + return valid_schema_data + elif self.schema_type == "LBStatsfile": + return valid_schema_stats + + sys.excepthook = exc_handler + raise TypeError(f"Unsupported schema type: {self.schema_type} was given") + + def is_valid(self, schema_to_validate: dict) -> bool: + """ Returns True if schema_to_validate is valid with self.valid_schema else False. """ + is_valid = self.valid_schema.is_valid(schema_to_validate) + return is_valid + + def validate(self, schema_to_validate: dict): + """ Return validated schema. """ + sys.excepthook = exc_handler + return self.valid_schema.validate(schema_to_validate) + + +def get_json(file_path): + """ Always try to decompress in case '.br' extension is missing. """ + with open(file_path, "rb") as json_file: + content = json_file.read() + try: + content = brotli.decompress(content) + except brotli.error as e: + logging.debug(f"No decompression applied for {file_path}: {e}") + return json.loads(content.decode("utf-8")) + +class JSONDataFilesValidator: + """ Class validating VT data files according do defined schema. """ + def __init__(self, file_path: str = None, dir_path: str = None, + file_prefix: str = None, file_suffix: str = None, + validate_comm_links: bool = False): + self.__file_path = file_path + self.__dir_path = dir_path + self.__file_prefix = file_prefix + self.__file_suffix = file_suffix + self.__validate_comm_links = validate_comm_links + self.__cli() + + def __cli(self): + """ Support for common line arguments. """ + parser = argparse.ArgumentParser() + group = parser.add_mutually_exclusive_group() + group.add_argument("--dir_path", help="Path to directory where files for validation are located.") + group.add_argument("--file_path", help="Path to a validated file. Pass only when validating a single file.") + parser.add_argument("--file_prefix", help="File prefix. Optional. Pass only when --dir_path is provided.") + parser.add_argument("--file_suffix", help="File suffix. Optional. Pass only when --dir_path is provided.") + parser.add_argument("--validate_comm_links", help='Verify that comm links reference tasks.', action='store_true') + parser.add_argument("--debug", help="Enable debug logging", action="store_true") + args = parser.parse_args() + if args.file_path: + self.__file_path = os.path.abspath(args.file_path) + if args.dir_path: + self.__dir_path = os.path.abspath(args.dir_path) + if args.file_prefix: + self.__file_prefix = args.file_prefix + if args.file_suffix: + self.__file_suffix = args.file_suffix + self.__validate_comm_links = args.validate_comm_links + logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO, format='%(levelname)s - %(filename)s:%(lineno)d - %(message)s') + + @staticmethod + def __get_files_for_validation(dir_path: str, file_prefix: str, file_suffix: str) -> list: + """ Get a sorted list of files from directory. """ + list_of_files = os.listdir(dir_path) + + if not list_of_files: + sys.excepthook = exc_handler + raise FileNotFoundError(f"Directory: {dir_path} is EMPTY") + + if file_prefix is None and file_suffix is None: + logging.info("File prefix and file suffix not given") + file_prefix = Counter([file.split('.')[0] for file in list_of_files]).most_common()[0][0] + logging.info(f"Found most common prefix: {file_prefix}") + file_suffix = Counter([file.split('.')[-1] for file in list_of_files]).most_common()[0][0] + logging.info(f"Found most common suffix: {file_suffix}") + + if file_prefix is not None: + list_of_files = [file for file in list_of_files if file.split('.')[0] == file_prefix] + + if file_suffix is not None: + list_of_files = [file for file in list_of_files if file.split('.')[-1] == file_suffix] + + return sorted([os.path.join(dir_path, file) for file in list_of_files], + key=lambda x: int(x.split(os.sep)[-1].split('.')[-2])) + + @staticmethod + def get_complete_dataset(file_path): + """ Returns all json files that share the same basename. """ + dirname = os.path.dirname(file_path) + basename = os.path.basename(file_path) + index = basename.rfind('0') + base = basename[0:index] + files = [os.path.join(dirname, f) for f in os.listdir(dirname) + if f.startswith(base) and (f.endswith(".json") or f.endswith(".json.br"))] + logging.debug(f"Dataset: {files}") + + return files + + @staticmethod + def get_nodes_info(file_path): + """ Returns node information from file name. """ + basename = os.path.basename(file_path) + nodes_info = re.findall(r'\d+', basename) + if not nodes_info: + return '-1', '-1' + elif len(nodes_info) == 1: + return '-1', nodes_info[0] + else: + return nodes_info[0], nodes_info[1] + + def __validate_file(self, file_path): + """ Validates the file against the schema. """ + logging.info(f"Validating file: {file_path}") + json_data = get_json(file_path) + + # Extracting type from JSON data + schema_type = None + if json_data.get("metadata") is not None: + schema_type = json_data.get("metadata").get("type") + else: + if json_data.get("type") is not None: + schema_type = json_data.get("type") + + if schema_type is not None: + # Validate schema + if SchemaValidator(schema_type=schema_type).is_valid(schema_to_validate=json_data): + logging.info(f"Valid JSON schema in {file_path}") + else: + logging.error(f"Invalid JSON schema in {file_path}") + SchemaValidator(schema_type=schema_type).validate(schema_to_validate=json_data) + else: + logging.warning(f"Schema type not found in file: {file_path}. \n" + "Passing by default when schema type not found.") + + if self.__validate_comm_links and schema_type == "LBDatafile": + num_nodes, current_node = self.get_nodes_info(file_path) + if num_nodes == '-1' and current_node == '-1': + # validate single file + all_jsons = [json_data] + elif current_node == '0': + # validate complete dataset + dataset_files = self.get_complete_dataset(file_path) + all_jsons = [get_json(file) for file in dataset_files] + else: + # this dataset is already validated + return + + if not self.validate_comm_links(all_jsons): + logging.error(f" Invalid dataset for file: {file_path}!") + + + @staticmethod + def validate_comm_links(all_jsons): + for n in range(len(all_jsons[0]["phases"])): + comm_ids = set() + task_ids = set() + + for data in all_jsons: + tasks = data["phases"][n]["tasks"] + task_ids.update( + {int(task["entity"].get("id", task["entity"].get("seq_id"))) for task in tasks} + ) + + if data["phases"][n].get("communications") is not None: + comms = data["phases"][n]["communications"] + comm_ids.update({int(comm["from"].get("id", comm["from"].get("seq_id"))) for comm in comms}) + comm_ids.update({int(comm["to"].get("id", comm["to"].get("seq_id"))) for comm in comms}) + + if not comm_ids.issubset(task_ids): + logging.error( + f" Phase {n}: Task ids: {comm_ids - task_ids}. Tasks are " + "referenced in communication, but are not present in the " + "dataset." + ) + return False + return True + + def main(self): + if self.__file_path is not None: + if os.path.isfile(self.__file_path): + self.__validate_file(file_path=self.__file_path) + else: + sys.excepthook = exc_handler + raise FileNotFoundError(f"File: {self.__file_path} NOT found") + elif self.__dir_path is not None: + if os.path.isdir(self.__dir_path): + list_of_files_for_validation = self.__get_files_for_validation(dir_path=self.__dir_path, + file_prefix=self.__file_prefix, + file_suffix=self.__file_suffix) + for file in list_of_files_for_validation: + self.__validate_file(file_path=file) + else: + sys.excepthook = exc_handler + raise FileNotFoundError(f"Directory: {self.__dir_path} does NOT exist") + else: + sys.excepthook = exc_handler + raise Exception("FILE path or DIRECTORY path has to be given") + + +if __name__ == "__main__": + JSONDataFilesValidator().main() \ No newline at end of file diff --git a/src/vt-tv/utility/json_reader.cc b/src/vt-tv/utility/json_reader.cc index ec4b85b5a..0e859c715 100644 --- a/src/vt-tv/utility/json_reader.cc +++ b/src/vt-tv/utility/json_reader.cc @@ -52,6 +52,9 @@ #include #include +#include +#include +#include namespace vt::tv::utility { @@ -280,4 +283,23 @@ std::unique_ptr JSONReader::parse() { return std::make_unique(std::move(object_info), std::move(rank_info)); } +bool JSONReader::validate_datafile(std::string file_path) +{ + bool is_valid = true; + char cmd[256]; + strcpy(cmd, "python "); + strcpy(cmd, " "); + strcat(cmd, SRC_DIR); + strcat(cmd, "/scripts/json_datafile_validator.py"); + strcat(cmd, " "); + strcat(cmd, " --file_path="); + strcat(cmd, file_path.data()); + + if (!system(cmd)) { + is_valid = false; + } + + return is_valid; +} + } /* end namespace vt::tv::utility */ diff --git a/src/vt-tv/utility/json_reader.h b/src/vt-tv/utility/json_reader.h index 0c274a441..a7a8fce45 100644 --- a/src/vt-tv/utility/json_reader.h +++ b/src/vt-tv/utility/json_reader.h @@ -94,6 +94,12 @@ struct JSONReader { */ std::unique_ptr parse(); + /** + * \brief Check if a JSON data file is well formatted + * \param[in] file_path the data file path to validate + */ + bool validate_datafile(std::string file_path); + private: NodeType rank_ = 0; std::unique_ptr json_ = nullptr; diff --git a/src/vt-tv/utility/parse_render.cc b/src/vt-tv/utility/parse_render.cc index ce10a071d..325138dc9 100644 --- a/src/vt-tv/utility/parse_render.cc +++ b/src/vt-tv/utility/parse_render.cc @@ -95,14 +95,24 @@ void ParseRender::parseAndRender( for (int64_t rank = 0; rank < n_ranks; rank++) { fmt::print("Reading file for rank {}\n", rank); utility::JSONReader reader{static_cast(rank)}; - reader.readFile(input_dir + "data." + std::to_string(rank) + ".json"); - auto tmpInfo = reader.parse(); + + + // Validate the JSON data file + std::string data_file_path = input_dir + "data." + std::to_string(rank) + ".json"; + if (reader.validate_datafile(data_file_path)) { + reader.readFile(data_file_path); + auto tmpInfo = reader.parse(); + #ifdef VT_TV_OPENMP_ENABLED #if VT_TV_OPENMP_ENABLED #pragma omp critical #endif #endif { info->addInfo(tmpInfo->getObjectInfo(), tmpInfo->getRank(rank)); } + + } else { + throw std::runtime_error("JSON data file is invalid: " + data_file_path); + } } } diff --git a/tests/test_bindings.py b/tests/test_bindings.py index 45fbc9ce4..6c0ed1af1 100644 --- a/tests/test_bindings.py +++ b/tests/test_bindings.py @@ -1,11 +1,11 @@ """This module calls vttv module to test that vttv bindings work as expected""" import json import os - +import sys +import subprocess import yaml import vttv - # source dir is the directory a level above this file source_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) @@ -28,11 +28,33 @@ rank_data = [] for rank in range(n_ranks): - with open(f'{source_dir}/data/lb_test_data/data.{rank}.json', 'r', encoding='utf-8') as f: - data = json.load(f) + # JSON data file for rank + datafile = f'{source_dir}/data/lb_test_data/data.{rank}.json' + # Check JSON schema validity + IS_VALID: bool + try: + p = subprocess.run([ + 'python', + os.path.join(source_dir, 'scripts/json_datafile_validator.py'), + "--file_path=" + datafile + ], check=True, capture_output=True) + p.check_returncode() + IS_VALID = True + except subprocess.CalledProcessError as e: + IS_VALID = False + print(e.output.decode() + f"[JSON data file invalid] {datafile}") + + # If validation failed + if IS_VALID is False: + sys.exit(1) + + # Read JSON data file + with open(datafile, 'r', encoding='utf-8') as f: + data = json.load(f) data_serialized = json.dumps(data) + # Add the rank data line rank_data.append((data_serialized)) vttv.tvFromJson(rank_data, params_serialized, n_ranks) diff --git a/tests/unit/generator.h b/tests/unit/generator.h index 09bea5e70..c702edbdf 100644 --- a/tests/unit/generator.h +++ b/tests/unit/generator.h @@ -184,14 +184,23 @@ struct Generator { for (int64_t rank = 0; rank < n_ranks; rank++) { fmt::print("Reading file for rank {}\n", rank); JSONReader reader{static_cast(rank)}; - reader.readFile(input_dir + "data." + std::to_string(rank) + ".json"); - auto tmpInfo = reader.parse(); + + // Validate the JSON data file + std::string data_file_path = input_dir + "data." + std::to_string(rank) + ".json"; + if (reader.validate_datafile(data_file_path)) { + reader.readFile(data_file_path); + auto tmpInfo = reader.parse(); + #ifdef VT_TV_OPENMP_ENABLED #if VT_TV_OPENMP_ENABLED #pragma omp critical #endif #endif { info.addInfo(tmpInfo->getObjectInfo(), tmpInfo->getRank(rank)); } + + } else { + throw std::runtime_error("JSON data file is invalid: " + data_file_path); + } } return info; } diff --git a/tests/unit/render/test_render.cc b/tests/unit/render/test_render.cc index 6aa2cd8ed..6b35bc3ec 100644 --- a/tests/unit/render/test_render.cc +++ b/tests/unit/render/test_render.cc @@ -365,9 +365,18 @@ TEST_F(RenderTest, test_render_construct_from_info) { for (NodeType rank = 0; rank < n_ranks; rank++) { utility::JSONReader reader{rank}; - reader.readFile(path + "/data." + std::to_string(rank) + ".json"); - auto tmpInfo = reader.parse(); - info->addInfo(tmpInfo->getObjectInfo(), tmpInfo->getRank(rank)); + + // Validate the JSON data file + std::string data_file_path = path + "/data." + std::to_string(rank) + ".json"; + if (reader.validate_datafile(data_file_path)) { + reader.readFile(data_file_path); + auto tmpInfo = reader.parse(); + + info->addInfo(tmpInfo->getObjectInfo(), tmpInfo->getRank(rank)); + } else { + ADD_FAILURE() << "JSON data file is invalid: " + data_file_path; + } + } fmt::print("Num ranks={}\n", info->getNumRanks()); From 8ae16ff9c0b57f71414b8237322078848dec6f3d Mon Sep 17 00:00:00 2001 From: Maxime N Date: Thu, 26 Sep 2024 14:31:23 +0200 Subject: [PATCH 2/9] #7: Change command line creation and fix some nameing issues --- scripts/json_datafile_validator.py | 6 ++--- ...tafile_schema.py => lb_datafile_schema.py} | 8 +++--- src/vt-tv/utility/json_reader.cc | 27 ++++++++++++------- 3 files changed, 24 insertions(+), 17 deletions(-) rename scripts/{LBDatafile_schema.py => lb_datafile_schema.py} (91%) diff --git a/scripts/json_datafile_validator.py b/scripts/json_datafile_validator.py index 4a749c6d4..ee74a733d 100644 --- a/scripts/json_datafile_validator.py +++ b/scripts/json_datafile_validator.py @@ -9,14 +9,14 @@ import json import logging import brotli -import LBDatafile_schema +from lb_datafile_schema import LBDatafile_schema try: project_path = f"{os.sep}".join(os.path.abspath(__file__).split(os.sep)[:-1]) sys.path.append(project_path) except Exception as e: print(f"Can not add project path to system path! Exiting!\nERROR: {e}") - raise SystemExit(1) + sys.exit(1) from schema import And, Optional, Schema @@ -45,7 +45,7 @@ def get_error_message(iterable_collection: Iterable) -> str: def _get_valid_schema(self) -> Schema: """ Returns representation of a valid schema """ - valid_schema_data = LBDatafile_schema.LBDatafile_schema + valid_schema_data = LBDatafile_schema allowed_types_stats = "LBStatsfile" valid_schema_stats = Schema( { diff --git a/scripts/LBDatafile_schema.py b/scripts/lb_datafile_schema.py similarity index 91% rename from scripts/LBDatafile_schema.py rename to scripts/lb_datafile_schema.py index b352ccfb7..d56b757e7 100644 --- a/scripts/LBDatafile_schema.py +++ b/scripts/lb_datafile_schema.py @@ -6,11 +6,11 @@ def validate_ids(field): Ensure that 1) either seq_id or id is provided, and 2) if an object is migratable, collection_id has been set. """ - if 'seq_id' not in field and 'id' not in field: - raise ValueError('Either id (bit-encoded) or seq_id must be provided.') + if "seq_id" not in field and "id" not in field: + raise ValueError("Either id (bit-encoded) or seq_id must be provided.") - if field['migratable'] and 'seq_id' in field and 'collection_id' not in field: - raise ValueError('If an entity is migratable, it must have a collection_id') + if field.get("migratable") is True and "seq_id" in field and "collection_id" not in field: + raise ValueError("If an entity is migratable, it must have a collection_id") return field diff --git a/src/vt-tv/utility/json_reader.cc b/src/vt-tv/utility/json_reader.cc index 0e859c715..1bead4cfc 100644 --- a/src/vt-tv/utility/json_reader.cc +++ b/src/vt-tv/utility/json_reader.cc @@ -285,17 +285,24 @@ std::unique_ptr JSONReader::parse() { bool JSONReader::validate_datafile(std::string file_path) { + // Init bool is_valid = true; - char cmd[256]; - strcpy(cmd, "python "); - strcpy(cmd, " "); - strcat(cmd, SRC_DIR); - strcat(cmd, "/scripts/json_datafile_validator.py"); - strcat(cmd, " "); - strcat(cmd, " --file_path="); - strcat(cmd, file_path.data()); - - if (!system(cmd)) { + + // Prepare command line + std::string cmd; + cmd +="python"; + cmd +=" "; + cmd +=SRC_DIR; + cmd +="/scripts/json_datafile_validator.py"; + cmd +=" "; + cmd +=" --file_path="; + cmd +=file_path.data(); + + // Exit code + int exit_code = std::system(cmd.c_str()); + + // Launch + if (exit_code > 0) { is_valid = false; } From 6e5a55853eeb981b552264ebd0b1e51654b41da3 Mon Sep 17 00:00:00 2001 From: Maxime N Date: Thu, 26 Sep 2024 14:31:23 +0200 Subject: [PATCH 3/9] #7: Fix whitespaces --- scripts/json_datafile_validator.py | 2 +- src/vt-tv/utility/json_reader.cc | 2 +- src/vt-tv/utility/json_reader.h | 2 +- tests/unit/render/test_render.cc | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/json_datafile_validator.py b/scripts/json_datafile_validator.py index ee74a733d..fe1dc111f 100644 --- a/scripts/json_datafile_validator.py +++ b/scripts/json_datafile_validator.py @@ -1,4 +1,4 @@ -""" JSON datafile validator +""" JSON datafile validator """ import sys import os diff --git a/src/vt-tv/utility/json_reader.cc b/src/vt-tv/utility/json_reader.cc index 1bead4cfc..7829aa20c 100644 --- a/src/vt-tv/utility/json_reader.cc +++ b/src/vt-tv/utility/json_reader.cc @@ -300,7 +300,7 @@ bool JSONReader::validate_datafile(std::string file_path) // Exit code int exit_code = std::system(cmd.c_str()); - + // Launch if (exit_code > 0) { is_valid = false; diff --git a/src/vt-tv/utility/json_reader.h b/src/vt-tv/utility/json_reader.h index a7a8fce45..7c12139da 100644 --- a/src/vt-tv/utility/json_reader.h +++ b/src/vt-tv/utility/json_reader.h @@ -96,7 +96,7 @@ struct JSONReader { /** * \brief Check if a JSON data file is well formatted - * \param[in] file_path the data file path to validate + * \param[in] file_path the data file path to validate */ bool validate_datafile(std::string file_path); diff --git a/tests/unit/render/test_render.cc b/tests/unit/render/test_render.cc index 6b35bc3ec..ce10a4c21 100644 --- a/tests/unit/render/test_render.cc +++ b/tests/unit/render/test_render.cc @@ -365,13 +365,13 @@ TEST_F(RenderTest, test_render_construct_from_info) { for (NodeType rank = 0; rank < n_ranks; rank++) { utility::JSONReader reader{rank}; - + // Validate the JSON data file std::string data_file_path = path + "/data." + std::to_string(rank) + ".json"; if (reader.validate_datafile(data_file_path)) { reader.readFile(data_file_path); auto tmpInfo = reader.parse(); - + info->addInfo(tmpInfo->getObjectInfo(), tmpInfo->getRank(rank)); } else { ADD_FAILURE() << "JSON data file is invalid: " + data_file_path; From b336f99b37fa3993eefa43c5d90a59ddfefbbb9b Mon Sep 17 00:00:00 2001 From: Maxime N Date: Thu, 26 Sep 2024 14:33:03 +0200 Subject: [PATCH 4/9] #7: Missing python imports --- ci/docker/build-and-test-ubuntu.dockerfile | 6 ++++++ ci/python_build.sh | 3 +++ 2 files changed, 9 insertions(+) diff --git a/ci/docker/build-and-test-ubuntu.dockerfile b/ci/docker/build-and-test-ubuntu.dockerfile index 9a0e008a9..039bef8e5 100644 --- a/ci/docker/build-and-test-ubuntu.dockerfile +++ b/ci/docker/build-and-test-ubuntu.dockerfile @@ -8,6 +8,12 @@ FROM ${BASE_IMAGE} AS base ENV CONDA_PATH=/opt/conda ENV PATH=$PATH:$CONDA_PATH/bin +# Setup python requirements for JSON datafile validation +RUN pip install PyYAML +RUN pip install Brotli +RUN pip install schema +RUN pip install nanobind + COPY . /opt/src/vt-tv RUN mkdir -p /opt/build/vt-tv diff --git a/ci/python_build.sh b/ci/python_build.sh index 6c3fddb50..71b8022de 100644 --- a/ci/python_build.sh +++ b/ci/python_build.sh @@ -19,6 +19,9 @@ for env in $(conda env list | grep ^py | perl -lane 'print $F[-1]' | xargs ls -l # Build VT-TV python package pip install PyYAML + pip install Brotli + pip install schema + pip install nanobind pip install $VT_TV_SRC_DIR # Deactivate conda environment From 013fccd07623547f8c80cfca702b31ca69586dda Mon Sep 17 00:00:00 2001 From: Maxime N Date: Thu, 26 Sep 2024 15:16:51 +0200 Subject: [PATCH 5/9] #7: CI install missing python package --- ci/setup_conda.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ci/setup_conda.sh b/ci/setup_conda.sh index 9d89c5c01..abb528457 100644 --- a/ci/setup_conda.sh +++ b/ci/setup_conda.sh @@ -41,6 +41,9 @@ do . $CONDA_PATH/etc/profile.d/conda.sh && conda activate py${python_version} echo "Python version: $(python --version)" + pip install PyYAML + pip install Brotli + pip install schema pip install nanobind conda deactivate echo "::endgroup::" From 6ae5691211b07d7ab9234d101bbac7635231e034 Mon Sep 17 00:00:00 2001 From: Maxime N Date: Fri, 27 Sep 2024 15:43:31 +0200 Subject: [PATCH 6/9] Merge commit '6617f3a3aebf41bcd790182ff8ae0e73f4d600f4' into 7-add-json-validation --- bindings/python/config_validator.cc | 43 +++++++++++++++++++ bindings/python/config_validator.h | 64 +++++++++++++++++++++++++++++ bindings/python/tv.cc | 59 ++++++++++++++++++-------- tests/test_bindings.py | 34 ++++++++++----- 4 files changed, 171 insertions(+), 29 deletions(-) create mode 100644 bindings/python/config_validator.cc create mode 100644 bindings/python/config_validator.h diff --git a/bindings/python/config_validator.cc b/bindings/python/config_validator.cc new file mode 100644 index 000000000..c9066b4eb --- /dev/null +++ b/bindings/python/config_validator.cc @@ -0,0 +1,43 @@ +#include "config_validator.h" + +namespace vt::tv::bindings::python { + + /** + * Check if the configuration file is valid + * + * @return true if the configuration is valid + */ + bool ConfigValidator::isValid() + { + bool is_valid = true; + for (std::string parameter: required_parameters) { + if (!config[parameter]) { + is_valid = false; + break; + } + } + return is_valid; + } + + + /** + * Get the list of missing parameters + * + * @return A string containing the list of the missing parameters + */ + std::string ConfigValidator::getMissingRequiredParameters() + { + std::string parameters; + for (std::string parameter: required_parameters) { + if (!config[parameter]) { + if (parameters.empty()) { + parameters = parameter; + } else { + parameters = parameters + ", " + parameter; + } + } + } + return parameters; + } + +} /* end namespace vt::tv::bindings::python */ diff --git a/bindings/python/config_validator.h b/bindings/python/config_validator.h new file mode 100644 index 000000000..8354180bb --- /dev/null +++ b/bindings/python/config_validator.h @@ -0,0 +1,64 @@ +/* +//@HEADER +// ***************************************************************************** +// +// config_validator.h +// DARMA/vt-tv => Virtual Transport -- Task Visualizer +// +// Copyright 2019-2024 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ +#ifndef vt_tv_config_validator_h +#define vt_tv_config_validator_h + +#include + +namespace vt::tv::bindings::python { + /** + * ConfigValidator Class + */ + class ConfigValidator + { + public: + std::array required_parameters = {"output_visualization_dir", "output_visualization_file_stem"}; + YAML::Node config; + bool isValid(); + std::string getMissingRequiredParameters(); + ConfigValidator(YAML::Node in_config) + :config(in_config) {} + }; +} + +#endif diff --git a/bindings/python/tv.cc b/bindings/python/tv.cc index 2d0580d56..2276e1f0a 100644 --- a/bindings/python/tv.cc +++ b/bindings/python/tv.cc @@ -1,22 +1,42 @@ #include "tv.h" +#include "config_validator.h" namespace vt::tv::bindings::python { void tvFromJson(const std::vector& input_json_per_rank_list, const std::string& input_yaml_params_str, uint64_t num_ranks) { - std::string startup_logo = std::string(" __ __\n") - + std::string(" _ __/ /_ / /__ __\n") - + std::string("| | / / __/ _____ / __/ | / /\n") - + std::string("| |/ / / /____/ / /_ | |/ /\n") - + std::string("|___/\\__/ \\__/ |___/\n"); - fmt::print("==============================\n"); - fmt::print(startup_logo); - fmt::print("==============================\n"); + std::string startup_logo = std::string(" __ __\n") + + std::string(" _ __/ /_ / /__ __\n") + + std::string("| | / / __/ _____ / __/ | / /\n") + + std::string("| |/ / / /____/ / /_ | |/ /\n") + + std::string("|___/\\__/ \\__/ |___/\n"); + fmt::print("==============================\n"); + fmt::print(startup_logo); + fmt::print("==============================\n"); + + YAML::Node viz_config; + try { + // Load the configuration from serialized YAML + viz_config = YAML::Load(input_yaml_params_str); + } catch (std::exception const& e) { + throw std::runtime_error(fmt::format( + "vt-tv: Error reading the configuration file: {}", + e.what() + )); + } + + // Config Validator + ConfigValidator config_validator(viz_config); - // parse the input yaml parameters - try { - // Load the configuration from serialized YAML - YAML::Node viz_config = YAML::Load(input_yaml_params_str); + // Check configuration + bool is_config_valid = config_validator.isValid(); + // Throw error if configuration is invalid + if (!is_config_valid) { + throw std::runtime_error(fmt::format( + "vt-tv: Error validating the configuration file: {}", + config_validator.getMissingRequiredParameters() + )); + } std::array qoi_request = { viz_config["rank_qoi"].as(), @@ -41,10 +61,16 @@ void tvFromJson(const std::vector& input_json_per_rank_list, const // Throw an error if the output directory does not exist or is not absolute if (!std::filesystem::exists(output_path)) { - throw std::runtime_error(fmt::format("Visualization output directory does not exist at {}", output_dir)); + throw std::runtime_error(fmt::format( + "vt-tv: Visualization output directory does not exist at {}", + output_dir + )); } if (!output_path.is_absolute()) { - throw std::runtime_error("Visualization output directory must be absolute."); + throw std::runtime_error(fmt::format( + "vt-tv: Visualization output directory must be absolute: {}", + output_dir + )); } // append / to avoid problems with file stems @@ -123,11 +149,8 @@ void tvFromJson(const std::vector& input_json_per_rank_list, const output_dir, output_file_stem, 1.0, save_meshes, save_pngs, std::numeric_limits::max() ); render.generate(font_size, win_size); - } catch (std::exception const& e) { - throw std::runtime_error(fmt::format("vt-tv: Error reading the configuration file: {}", e.what())); - } - fmt::print("vt-tv: Done.\n"); + fmt::print("vt-tv: Done.\n"); } namespace nb = nanobind; diff --git a/tests/test_bindings.py b/tests/test_bindings.py index 6c0ed1af1..85f093b2b 100644 --- a/tests/test_bindings.py +++ b/tests/test_bindings.py @@ -1,32 +1,43 @@ """This module calls vttv module to test that vttv bindings work as expected""" -import json import os -import sys import subprocess +import json +import sys import yaml import vttv # source dir is the directory a level above this file source_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -with open(f'{source_dir}/tests/test_bindings_conf.yaml', 'r', encoding='utf-8') as stream: +# Read the YAML config file +with open(f"{source_dir}/tests/test_bindings_conf.yaml", 'r', encoding="utf-8") as stream: try: params = yaml.safe_load(stream) except yaml.YAMLError as exc: print(exc) - exit(1) + +# Check main key is "visualization" +if "visualization" not in params: + print( + "The YAML configuration file is not valid: "+\ + "missing required paramater \"visualization\"" + ) + sys.exit(1) # make output_visualization_dir directory parameter absolute -if not os.path.isabs(params["visualization"]["output_visualization_dir"]): - params["visualization"]["output_visualization_dir"] = source_dir + \ - "/" + params["visualization"]["output_visualization_dir"] +if "output_visualization_dir" in params["visualization"]: + if not os.path.isabs(params["visualization"]["output_visualization_dir"]): + params["visualization"]["output_visualization_dir"] = source_dir + \ + "/" + params["visualization"]["output_visualization_dir"] +# Serialize visualization parameters params_serialized = yaml.dump(params["visualization"]) +# Calculate n_ranks n_ranks = params["visualization"]["x_ranks"] * \ params["visualization"]["y_ranks"] * params["visualization"]["z_ranks"] -rank_data = [] +rank_data = [] for rank in range(n_ranks): # JSON data file for rank datafile = f'{source_dir}/data/lb_test_data/data.{rank}.json' @@ -50,11 +61,12 @@ sys.exit(1) # Read JSON data file - with open(datafile, 'r', encoding='utf-8') as f: + with open(datafile, 'r', encoding="utf-8") as f: data = json.load(f) data_serialized = json.dumps(data) - # Add the rank data line - rank_data.append((data_serialized)) + # Add serialized data into the rank + rank_data.append((json.dumps(data))) +# Launch VT TV from JSON data vttv.tvFromJson(rank_data, params_serialized, n_ranks) From 445e6800597350ec40f0a42546aa28ebd55897c1 Mon Sep 17 00:00:00 2001 From: Maxime N Date: Sat, 28 Sep 2024 18:40:22 +0200 Subject: [PATCH 7/9] #7: Apply TD reviews --- ci/docker/build-and-test-ubuntu.dockerfile | 6 ------ ci/setup_conda.sh | 4 ---- src/vt-tv/utility/json_reader.cc | 14 +++++++------- 3 files changed, 7 insertions(+), 17 deletions(-) diff --git a/ci/docker/build-and-test-ubuntu.dockerfile b/ci/docker/build-and-test-ubuntu.dockerfile index 039bef8e5..9a0e008a9 100644 --- a/ci/docker/build-and-test-ubuntu.dockerfile +++ b/ci/docker/build-and-test-ubuntu.dockerfile @@ -8,12 +8,6 @@ FROM ${BASE_IMAGE} AS base ENV CONDA_PATH=/opt/conda ENV PATH=$PATH:$CONDA_PATH/bin -# Setup python requirements for JSON datafile validation -RUN pip install PyYAML -RUN pip install Brotli -RUN pip install schema -RUN pip install nanobind - COPY . /opt/src/vt-tv RUN mkdir -p /opt/build/vt-tv diff --git a/ci/setup_conda.sh b/ci/setup_conda.sh index abb528457..467af5bc5 100644 --- a/ci/setup_conda.sh +++ b/ci/setup_conda.sh @@ -41,10 +41,6 @@ do . $CONDA_PATH/etc/profile.d/conda.sh && conda activate py${python_version} echo "Python version: $(python --version)" - pip install PyYAML - pip install Brotli - pip install schema - pip install nanobind conda deactivate echo "::endgroup::" done diff --git a/src/vt-tv/utility/json_reader.cc b/src/vt-tv/utility/json_reader.cc index 7829aa20c..ca9b6a24f 100644 --- a/src/vt-tv/utility/json_reader.cc +++ b/src/vt-tv/utility/json_reader.cc @@ -290,13 +290,13 @@ bool JSONReader::validate_datafile(std::string file_path) // Prepare command line std::string cmd; - cmd +="python"; - cmd +=" "; - cmd +=SRC_DIR; - cmd +="/scripts/json_datafile_validator.py"; - cmd +=" "; - cmd +=" --file_path="; - cmd +=file_path.data(); + cmd += "python"; + cmd += " "; + cmd += SRC_DIR; + cmd += "/scripts/json_datafile_validator.py"; + cmd += " "; + cmd += " --file_path="; + cmd += file_path.data(); // Exit code int exit_code = std::system(cmd.c_str()); From fdcc944310207596b2a7ad9c2452fe0070e5049c Mon Sep 17 00:00:00 2001 From: Maxime N Date: Sat, 28 Sep 2024 19:18:38 +0200 Subject: [PATCH 8/9] #7: Restore conda pip install package --- ci/setup_conda.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ci/setup_conda.sh b/ci/setup_conda.sh index 467af5bc5..abb528457 100644 --- a/ci/setup_conda.sh +++ b/ci/setup_conda.sh @@ -41,6 +41,10 @@ do . $CONDA_PATH/etc/profile.d/conda.sh && conda activate py${python_version} echo "Python version: $(python --version)" + pip install PyYAML + pip install Brotli + pip install schema + pip install nanobind conda deactivate echo "::endgroup::" done From 3a392860aa952e9230ec95233d24f26770005fe6 Mon Sep 17 00:00:00 2001 From: Maxime N Date: Sun, 29 Sep 2024 08:53:39 +0200 Subject: [PATCH 9/9] #7: Restore ubuntu pip install package --- ci/docker/build-and-test-ubuntu.dockerfile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ci/docker/build-and-test-ubuntu.dockerfile b/ci/docker/build-and-test-ubuntu.dockerfile index 9a0e008a9..039bef8e5 100644 --- a/ci/docker/build-and-test-ubuntu.dockerfile +++ b/ci/docker/build-and-test-ubuntu.dockerfile @@ -8,6 +8,12 @@ FROM ${BASE_IMAGE} AS base ENV CONDA_PATH=/opt/conda ENV PATH=$PATH:$CONDA_PATH/bin +# Setup python requirements for JSON datafile validation +RUN pip install PyYAML +RUN pip install Brotli +RUN pip install schema +RUN pip install nanobind + COPY . /opt/src/vt-tv RUN mkdir -p /opt/build/vt-tv