From 64a9f7ad6e580d1c726962a50bd2933076c7b1a8 Mon Sep 17 00:00:00 2001
From: Maxime N <maxime.noyelle@ng-analytics.com>
Date: Thu, 26 Sep 2024 14:31:16 +0200
Subject: [PATCH 1/9] #7: Add JSON data file validator

---
 scripts/LBDatafile_schema.py       | 101 ++++++
 scripts/json_datafile_validator.py | 488 +++++++++++++++++++++++++++++
 src/vt-tv/utility/json_reader.cc   |  22 ++
 src/vt-tv/utility/json_reader.h    |   6 +
 src/vt-tv/utility/parse_render.cc  |  14 +-
 tests/test_bindings.py             |  30 +-
 tests/unit/generator.h             |  13 +-
 tests/unit/render/test_render.cc   |  15 +-
 8 files changed, 678 insertions(+), 11 deletions(-)
 create mode 100644 scripts/LBDatafile_schema.py
 create mode 100644 scripts/json_datafile_validator.py

diff --git a/scripts/LBDatafile_schema.py b/scripts/LBDatafile_schema.py
new file mode 100644
index 000000000..b352ccfb7
--- /dev/null
+++ b/scripts/LBDatafile_schema.py
@@ -0,0 +1,101 @@
+"""LB data JSON file schema"""
+from schema import And, Optional, Schema
+
+def validate_ids(field):
+    """
+    Ensure that 1) either seq_id or id is provided,
+    and 2) if an object is migratable, collection_id has been set.
+    """
+    if 'seq_id' not in field and 'id' not in field:
+        raise ValueError('Either id (bit-encoded) or seq_id must be provided.')
+
+    if field['migratable'] and 'seq_id' in field and 'collection_id' not in field:
+        raise ValueError('If an entity is migratable, it must have a collection_id')
+
+    return field
+
+LBDatafile_schema = Schema(
+    {
+        Optional('type'): And(str, "LBDatafile", error="'LBDatafile' must be chosen."),
+        Optional('metadata'): {
+            Optional('type'): And(str, "LBDatafile", error="'LBDatafile' must be chosen."),
+            Optional('rank'): int,
+            Optional('shared_node'): {
+                'id': int,
+                'size': int,
+                'rank': int,
+                'num_nodes': int,
+            },
+            Optional('phases'): {
+                Optional('count'): int,
+                'skipped': {
+                    'list': [int],
+                    'range': [[int]],
+                },
+                'identical_to_previous': {
+                    'list': [int],
+                    'range': [[int]],
+                },
+            },
+            Optional('attributes'): dict
+        },
+        'phases': [
+            {
+                'id': int,
+                'tasks': [
+                    {
+                        'entity': And({
+                            Optional('collection_id'): int,
+                            'home': int,
+                            Optional('id'): int,
+                            Optional('seq_id'): int,
+                            Optional('index'): [int],
+                            'type': str,
+                            'migratable': bool,
+                            Optional('objgroup_id'): int
+                        }, validate_ids),
+                        'node': int,
+                        'resource': str,
+                        Optional('subphases'): [
+                            {
+                                'id': int,
+                                'time': float,
+                            }
+                        ],
+                        'time': float,
+                        Optional('user_defined'): dict,
+                        Optional('attributes'): dict
+                    },
+                ],
+                Optional('communications'): [
+                    {
+                        'type': str,
+                        'to': And({
+                            'type': str,
+                            Optional('id'): int,
+                            Optional('seq_id'): int,
+                            Optional('home'): int,
+                            Optional('collection_id'): int,
+                            Optional('migratable'): bool,
+                            Optional('index'): [int],
+                            Optional('objgroup_id'): int,
+                        }, validate_ids),
+                        'messages': int,
+                        'from': And({
+                            'type': str,
+                            Optional('id'): int,
+                            Optional('seq_id'): int,
+                            Optional('home'): int,
+                            Optional('collection_id'): int,
+                            Optional('migratable'): bool,
+                            Optional('index'): [int],
+                            Optional('objgroup_id'): int,
+                        }, validate_ids),
+                        'bytes': float
+                    }
+                ],
+                Optional('user_defined'): dict
+            },
+        ]
+    }
+)
diff --git a/scripts/json_datafile_validator.py b/scripts/json_datafile_validator.py
new file mode 100644
index 000000000..4a749c6d4
--- /dev/null
+++ b/scripts/json_datafile_validator.py
@@ -0,0 +1,488 @@
+""" JSON datafile validator 
+"""
+import sys
+import os
+import re
+import argparse
+from collections import Counter
+from collections.abc import Iterable
+import json
+import logging
+import brotli
+import LBDatafile_schema
+
+try:
+    project_path = f"{os.sep}".join(os.path.abspath(__file__).split(os.sep)[:-1])
+    sys.path.append(project_path)
+except Exception as e:
+    print(f"Can not add project path to system path! Exiting!\nERROR: {e}")
+    raise SystemExit(1)
+
+
+from schema import And, Optional, Schema
+# Import VT related schemas
+
+def exc_handler(exception_type, exception, traceback):
+    """ Exception handler for hiding traceback.
+    """
+    module_name = f"[{os.path.splitext(os.path.split(__file__)[-1])[0]}]"
+    print(f"{module_name} {exception_type.__name__} {exception}")
+
+
+class SchemaValidator:
+    """ Validates schema of VT Object Map files (json)
+    """
+    def __init__(self, schema_type: str):
+        self.schema_type = schema_type
+        self.valid_schema = self._get_valid_schema()
+
+    @staticmethod
+    def get_error_message(iterable_collection: Iterable) -> str:
+        """ Return error message.
+        """
+        return " or ".join(iterable_collection)
+
+    def _get_valid_schema(self) -> Schema:
+        """ Returns representation of a valid schema
+        """
+        valid_schema_data = LBDatafile_schema.LBDatafile_schema
+        allowed_types_stats = "LBStatsfile"
+        valid_schema_stats = Schema(
+            {
+                Optional('type'): And(
+                    str,
+                    lambda a: a in allowed_types_stats,
+                    error = f"{self.get_error_message(allowed_types_stats)} must be chosen"
+                ),
+                Optional('metadata'): {
+                    Optional('type'): And(
+                        str,
+                        lambda a: a in allowed_types_stats,
+                        error = f"{self.get_error_message(allowed_types_stats)} must be chosen"
+                    ),
+                },
+                'phases': [
+                    {
+                        "id": int,
+                        Optional("migration count"): int,
+                        Optional("post-LB"): {
+                            "Object_comm": {
+                                "avg": float,
+                                "car": float,
+                                "imb": float,
+                                "kur": float,
+                                "max": float,
+                                "min": float,
+                                "npr": float,
+                                "skw": float,
+                                "std": float,
+                                "sum": float,
+                                "var": float
+                            },
+                            "Object_load_modeled": {
+                                "avg": float,
+                                "car": float,
+                                "imb": float,
+                                "kur": float,
+                                "max": float,
+                                "min": float,
+                                "npr": float,
+                                "skw": float,
+                                "std": float,
+                                "sum": float,
+                                "var": float
+                            },
+                            "Object_load_raw": {
+                                "avg": float,
+                                "car": float,
+                                "imb": float,
+                                "kur": float,
+                                "max": float,
+                                "min": float,
+                                "npr": float,
+                                "skw": float,
+                                "std": float,
+                                "sum": float,
+                                "var": float
+                            },
+                            Optional("Object_strategy_specific_load_modeled"): {
+                                "avg": float,
+                                "car": float,
+                                "imb": float,
+                                "kur": float,
+                                "max": float,
+                                "min": float,
+                                "npr": float,
+                                "skw": float,
+                                "std": float,
+                                "sum": float,
+                                "var": float
+                            },
+                            "Rank_comm": {
+                                "avg": float,
+                                "car": float,
+                                "imb": float,
+                                "kur": float,
+                                "max": float,
+                                "min": float,
+                                "npr": float,
+                                "skw": float,
+                                "std": float,
+                                "sum": float,
+                                "var": float
+                            },
+                            "Rank_load_modeled": {
+                                "avg": float,
+                                "car": float,
+                                "imb": float,
+                                "kur": float,
+                                "max": float,
+                                "min": float,
+                                "npr": float,
+                                "skw": float,
+                                "std": float,
+                                "sum": float,
+                                "var": float
+                            },
+                            "Rank_load_raw": {
+                                "avg": float,
+                                "car": float,
+                                "imb": float,
+                                "kur": float,
+                                "max": float,
+                                "min": float,
+                                "npr": float,
+                                "skw": float,
+                                "std": float,
+                                "sum": float,
+                                "var": float
+                            },
+                            Optional("Rank_strategy_specific_load_modeled"): {
+                                "avg": float,
+                                "car": float,
+                                "imb": float,
+                                "kur": float,
+                                "max": float,
+                                "min": float,
+                                "npr": float,
+                                "skw": float,
+                                "std": float,
+                                "sum": float,
+                                "var": float
+                            }
+                        },
+                        "pre-LB": {
+                            "Object_comm": {
+                                "avg": float,
+                                "car": float,
+                                "imb": float,
+                                "kur": float,
+                                "max": float,
+                                "min": float,
+                                "npr": float,
+                                "skw": float,
+                                "std": float,
+                                "sum": float,
+                                "var": float
+                            },
+                            "Object_load_modeled": {
+                                "avg": float,
+                                "car": float,
+                                "imb": float,
+                                "kur": float,
+                                "max": float,
+                                "min": float,
+                                "npr": float,
+                                "skw": float,
+                                "std": float,
+                                "sum": float,
+                                "var": float
+                            },
+                            "Object_load_raw": {
+                                "avg": float,
+                                "car": float,
+                                "imb": float,
+                                "kur": float,
+                                "max": float,
+                                "min": float,
+                                "npr": float,
+                                "skw": float,
+                                "std": float,
+                                "sum": float,
+                                "var": float
+                            },
+                            Optional("Object_strategy_specific_load_modeled"): {
+                                "avg": float,
+                                "car": float,
+                                "imb": float,
+                                "kur": float,
+                                "max": float,
+                                "min": float,
+                                "npr": float,
+                                "skw": float,
+                                "std": float,
+                                "sum": float,
+                                "var": float
+                            },
+                            "Rank_comm": {
+                                "avg": float,
+                                "car": float,
+                                "imb": float,
+                                "kur": float,
+                                "max": float,
+                                "min": float,
+                                "npr": float,
+                                "skw": float,
+                                "std": float,
+                                "sum": float,
+                                "var": float
+                            },
+                            "Rank_load_modeled": {
+                                "avg": float,
+                                "car": float,
+                                "imb": float,
+                                "kur": float,
+                                "max": float,
+                                "min": float,
+                                "npr": float,
+                                "skw": float,
+                                "std": float,
+                                "sum": float,
+                                "var": float
+                            },
+                            "Rank_load_raw": {
+                                "avg": float,
+                                "car": float,
+                                "imb": float,
+                                "kur": float,
+                                "max": float,
+                                "min": float,
+                                "npr": float,
+                                "skw": float,
+                                "std": float,
+                                "sum": float,
+                                "var": float
+                            },
+                            Optional("Rank_strategy_specific_load_modeled"): {
+                                "avg": float,
+                                "car": float,
+                                "imb": float,
+                                "kur": float,
+                                "max": float,
+                                "min": float,
+                                "npr": float,
+                                "skw": float,
+                                "std": float,
+                                "sum": float,
+                                "var": float
+                            }
+                        }
+                    },
+                ]
+            }
+        )
+
+        if self.schema_type == "LBDatafile":
+            return valid_schema_data
+        elif self.schema_type == "LBStatsfile":
+            return valid_schema_stats
+
+        sys.excepthook = exc_handler
+        raise TypeError(f"Unsupported schema type: {self.schema_type} was given")
+
+    def is_valid(self, schema_to_validate: dict) -> bool:
+        """ Returns True if schema_to_validate is valid with self.valid_schema else False. """
+        is_valid = self.valid_schema.is_valid(schema_to_validate)
+        return is_valid
+
+    def validate(self, schema_to_validate: dict):
+        """ Return validated schema. """
+        sys.excepthook = exc_handler
+        return self.valid_schema.validate(schema_to_validate)
+
+
+def get_json(file_path):
+    """ Always try to decompress in case '.br' extension is missing. """
+    with open(file_path, "rb") as json_file:
+        content = json_file.read()
+        try:
+            content = brotli.decompress(content)
+        except brotli.error as e:
+            logging.debug(f"No decompression applied for {file_path}: {e}")
+        return json.loads(content.decode("utf-8"))
+
+class JSONDataFilesValidator:
+    """ Class validating VT data files according do defined schema. """
+    def __init__(self, file_path: str = None, dir_path: str = None,
+                 file_prefix: str = None, file_suffix: str = None,
+                 validate_comm_links: bool = False):
+        self.__file_path = file_path
+        self.__dir_path = dir_path
+        self.__file_prefix = file_prefix
+        self.__file_suffix = file_suffix
+        self.__validate_comm_links = validate_comm_links
+        self.__cli()
+
+    def __cli(self):
+        """ Support for common line arguments. """
+        parser = argparse.ArgumentParser()
+        group = parser.add_mutually_exclusive_group()
+        group.add_argument("--dir_path", help="Path to directory where files for validation are located.")
+        group.add_argument("--file_path", help="Path to a validated file. Pass only when validating a single file.")
+        parser.add_argument("--file_prefix", help="File prefix. Optional. Pass only when --dir_path is provided.")
+        parser.add_argument("--file_suffix", help="File suffix. Optional. Pass only when --dir_path is provided.")
+        parser.add_argument("--validate_comm_links", help='Verify that comm links reference tasks.', action='store_true')
+        parser.add_argument("--debug", help="Enable debug logging", action="store_true")
+        args = parser.parse_args()
+        if args.file_path:
+            self.__file_path = os.path.abspath(args.file_path)
+        if args.dir_path:
+            self.__dir_path = os.path.abspath(args.dir_path)
+        if args.file_prefix:
+            self.__file_prefix = args.file_prefix
+        if args.file_suffix:
+            self.__file_suffix = args.file_suffix
+        self.__validate_comm_links = args.validate_comm_links
+        logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO, format='%(levelname)s - %(filename)s:%(lineno)d - %(message)s')
+
+    @staticmethod
+    def __get_files_for_validation(dir_path: str, file_prefix: str, file_suffix: str) -> list:
+        """ Get a sorted list of files from directory. """
+        list_of_files = os.listdir(dir_path)
+
+        if not list_of_files:
+            sys.excepthook = exc_handler
+            raise FileNotFoundError(f"Directory: {dir_path} is EMPTY")
+
+        if file_prefix is None and file_suffix is None:
+            logging.info("File prefix and file suffix not given")
+            file_prefix = Counter([file.split('.')[0] for file in list_of_files]).most_common()[0][0]
+            logging.info(f"Found most common prefix: {file_prefix}")
+            file_suffix = Counter([file.split('.')[-1] for file in list_of_files]).most_common()[0][0]
+            logging.info(f"Found most common suffix: {file_suffix}")
+
+        if file_prefix is not None:
+            list_of_files = [file for file in list_of_files if file.split('.')[0] == file_prefix]
+
+        if file_suffix is not None:
+            list_of_files = [file for file in list_of_files if file.split('.')[-1] == file_suffix]
+
+        return sorted([os.path.join(dir_path, file) for file in list_of_files],
+                      key=lambda x: int(x.split(os.sep)[-1].split('.')[-2]))
+
+    @staticmethod
+    def get_complete_dataset(file_path):
+        """ Returns all json files that share the same basename. """
+        dirname = os.path.dirname(file_path)
+        basename = os.path.basename(file_path)
+        index = basename.rfind('0')
+        base = basename[0:index]
+        files = [os.path.join(dirname, f) for f in os.listdir(dirname)
+            if f.startswith(base) and (f.endswith(".json") or f.endswith(".json.br"))]
+        logging.debug(f"Dataset: {files}")
+
+        return files
+
+    @staticmethod
+    def get_nodes_info(file_path):
+        """ Returns node information from file name.  """
+        basename = os.path.basename(file_path)
+        nodes_info = re.findall(r'\d+', basename)
+        if not nodes_info:
+            return '-1', '-1'
+        elif len(nodes_info) == 1:
+            return '-1', nodes_info[0]
+        else:
+            return nodes_info[0], nodes_info[1]
+
+    def __validate_file(self, file_path):
+        """ Validates the file against the schema. """
+        logging.info(f"Validating file: {file_path}")
+        json_data = get_json(file_path)
+
+        # Extracting type from JSON data
+        schema_type = None
+        if json_data.get("metadata") is not None:
+            schema_type = json_data.get("metadata").get("type")
+        else:
+            if json_data.get("type") is not None:
+                schema_type = json_data.get("type")
+
+        if schema_type is not None:
+            # Validate schema
+            if SchemaValidator(schema_type=schema_type).is_valid(schema_to_validate=json_data):
+                logging.info(f"Valid JSON schema in {file_path}")
+            else:
+                logging.error(f"Invalid JSON schema in {file_path}")
+                SchemaValidator(schema_type=schema_type).validate(schema_to_validate=json_data)
+        else:
+            logging.warning(f"Schema type not found in file: {file_path}. \n"
+                            "Passing by default when schema type not found.")
+
+        if self.__validate_comm_links and schema_type == "LBDatafile":
+            num_nodes, current_node = self.get_nodes_info(file_path)
+            if num_nodes == '-1' and current_node == '-1':
+                # validate single file
+                all_jsons = [json_data]
+            elif current_node == '0':
+                # validate complete dataset
+                dataset_files = self.get_complete_dataset(file_path)
+                all_jsons = [get_json(file) for file in dataset_files]
+            else:
+                # this dataset is already validated
+                return
+
+            if not self.validate_comm_links(all_jsons):
+                logging.error(f" Invalid dataset for file: {file_path}!")
+
+
+    @staticmethod
+    def validate_comm_links(all_jsons):
+        for n in range(len(all_jsons[0]["phases"])):
+            comm_ids = set()
+            task_ids = set()
+
+            for data in all_jsons:
+                tasks = data["phases"][n]["tasks"]
+                task_ids.update(
+                    {int(task["entity"].get("id", task["entity"].get("seq_id"))) for task in tasks}
+                )
+
+                if data["phases"][n].get("communications") is not None:
+                    comms = data["phases"][n]["communications"]
+                    comm_ids.update({int(comm["from"].get("id", comm["from"].get("seq_id"))) for comm in comms})
+                    comm_ids.update({int(comm["to"].get("id", comm["to"].get("seq_id"))) for comm in comms})
+
+            if not comm_ids.issubset(task_ids):
+                logging.error(
+                    f" Phase {n}: Task ids: {comm_ids - task_ids}. Tasks are "
+                    "referenced in communication, but are not present in the "
+                    "dataset."
+                )
+                return False
+        return True
+
+    def main(self):
+        if self.__file_path is not None:
+            if os.path.isfile(self.__file_path):
+                self.__validate_file(file_path=self.__file_path)
+            else:
+                sys.excepthook = exc_handler
+                raise FileNotFoundError(f"File: {self.__file_path} NOT found")
+        elif self.__dir_path is not None:
+            if os.path.isdir(self.__dir_path):
+                list_of_files_for_validation = self.__get_files_for_validation(dir_path=self.__dir_path,
+                                                                               file_prefix=self.__file_prefix,
+                                                                               file_suffix=self.__file_suffix)
+                for file in list_of_files_for_validation:
+                    self.__validate_file(file_path=file)
+            else:
+                sys.excepthook = exc_handler
+                raise FileNotFoundError(f"Directory: {self.__dir_path} does NOT exist")
+        else:
+            sys.excepthook = exc_handler
+            raise Exception("FILE path or DIRECTORY path has to be given")
+
+
+if __name__ == "__main__":
+    JSONDataFilesValidator().main()
\ No newline at end of file
diff --git a/src/vt-tv/utility/json_reader.cc b/src/vt-tv/utility/json_reader.cc
index ec4b85b5a..0e859c715 100644
--- a/src/vt-tv/utility/json_reader.cc
+++ b/src/vt-tv/utility/json_reader.cc
@@ -52,6 +52,9 @@
 #include <fmt-vt/format.h>
 
 #include <fstream>
+#include <iostream>
+#include <stdlib.h>
+#include <string.h>
 
 namespace vt::tv::utility {
 
@@ -280,4 +283,23 @@ std::unique_ptr<Info> JSONReader::parse() {
   return std::make_unique<Info>(std::move(object_info), std::move(rank_info));
 }
 
+bool JSONReader::validate_datafile(std::string file_path)
+{
+  bool is_valid = true;
+  char cmd[256];
+  strcpy(cmd, "python ");
+  strcpy(cmd, " ");
+  strcat(cmd, SRC_DIR);
+  strcat(cmd, "/scripts/json_datafile_validator.py");
+  strcat(cmd, " ");
+  strcat(cmd, " --file_path=");
+  strcat(cmd, file_path.data());
+
+  if (!system(cmd)) {
+    is_valid = false;
+  }
+
+  return is_valid;
+}
+
 } /* end namespace vt::tv::utility */
diff --git a/src/vt-tv/utility/json_reader.h b/src/vt-tv/utility/json_reader.h
index 0c274a441..a7a8fce45 100644
--- a/src/vt-tv/utility/json_reader.h
+++ b/src/vt-tv/utility/json_reader.h
@@ -94,6 +94,12 @@ struct JSONReader {
    */
   std::unique_ptr<Info> parse();
 
+  /**
+   * \brief Check if a JSON data file is well formatted
+   * \param[in] file_path the data file path to validate 
+   */
+  bool validate_datafile(std::string file_path);
+
 private:
   NodeType rank_ = 0;
   std::unique_ptr<nlohmann::json> json_ = nullptr;
diff --git a/src/vt-tv/utility/parse_render.cc b/src/vt-tv/utility/parse_render.cc
index ce10a071d..325138dc9 100644
--- a/src/vt-tv/utility/parse_render.cc
+++ b/src/vt-tv/utility/parse_render.cc
@@ -95,14 +95,24 @@ void ParseRender::parseAndRender(
       for (int64_t rank = 0; rank < n_ranks; rank++) {
         fmt::print("Reading file for rank {}\n", rank);
         utility::JSONReader reader{static_cast<NodeType>(rank)};
-        reader.readFile(input_dir + "data." + std::to_string(rank) + ".json");
-        auto tmpInfo = reader.parse();
+
+
+        // Validate the JSON data file
+        std::string data_file_path = input_dir + "data." + std::to_string(rank) + ".json";
+        if (reader.validate_datafile(data_file_path)) {
+          reader.readFile(data_file_path);
+          auto tmpInfo = reader.parse();
+
 #ifdef VT_TV_OPENMP_ENABLED
 #if VT_TV_OPENMP_ENABLED
 #pragma omp critical
 #endif
 #endif
         { info->addInfo(tmpInfo->getObjectInfo(), tmpInfo->getRank(rank)); }
+
+        } else {
+          throw std::runtime_error("JSON data file is invalid: " + data_file_path);
+        }
       }
     }
 
diff --git a/tests/test_bindings.py b/tests/test_bindings.py
index 45fbc9ce4..6c0ed1af1 100644
--- a/tests/test_bindings.py
+++ b/tests/test_bindings.py
@@ -1,11 +1,11 @@
 """This module calls vttv module to test that vttv bindings work as expected"""
 import json
 import os
-
+import sys
+import subprocess
 import yaml
 import vttv
 
-
 # source dir is the directory a level above this file
 source_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 
@@ -28,11 +28,33 @@
 rank_data = []
 
 for rank in range(n_ranks):
-    with open(f'{source_dir}/data/lb_test_data/data.{rank}.json', 'r', encoding='utf-8') as f:
-        data = json.load(f)
+    # JSON data file for rank
+    datafile = f'{source_dir}/data/lb_test_data/data.{rank}.json'
 
+    # Check JSON schema validity
+    IS_VALID: bool
+    try:
+        p = subprocess.run([
+                'python',
+                os.path.join(source_dir, 'scripts/json_datafile_validator.py'),
+                "--file_path=" + datafile
+            ], check=True, capture_output=True)
+        p.check_returncode()
+        IS_VALID = True
+    except subprocess.CalledProcessError as e:
+        IS_VALID = False
+        print(e.output.decode() + f"[JSON data file invalid] {datafile}")
+
+    # If validation failed
+    if IS_VALID is False:
+        sys.exit(1)
+
+    # Read JSON data file
+    with open(datafile, 'r', encoding='utf-8') as f:
+        data = json.load(f)
     data_serialized = json.dumps(data)
 
+    # Add the rank data line
     rank_data.append((data_serialized))
 
 vttv.tvFromJson(rank_data, params_serialized, n_ranks)
diff --git a/tests/unit/generator.h b/tests/unit/generator.h
index 09bea5e70..c702edbdf 100644
--- a/tests/unit/generator.h
+++ b/tests/unit/generator.h
@@ -184,14 +184,23 @@ struct Generator {
     for (int64_t rank = 0; rank < n_ranks; rank++) {
       fmt::print("Reading file for rank {}\n", rank);
       JSONReader reader{static_cast<NodeType>(rank)};
-      reader.readFile(input_dir + "data." + std::to_string(rank) + ".json");
-      auto tmpInfo = reader.parse();
+
+      // Validate the JSON data file
+      std::string data_file_path = input_dir + "data." + std::to_string(rank) + ".json";
+      if (reader.validate_datafile(data_file_path)) {
+        reader.readFile(data_file_path);
+        auto tmpInfo = reader.parse();
+
 #ifdef VT_TV_OPENMP_ENABLED
 #if VT_TV_OPENMP_ENABLED
 #pragma omp critical
 #endif
 #endif
       { info.addInfo(tmpInfo->getObjectInfo(), tmpInfo->getRank(rank)); }
+
+      } else {
+        throw std::runtime_error("JSON data file is invalid: " + data_file_path);
+      }
     }
     return info;
   }
diff --git a/tests/unit/render/test_render.cc b/tests/unit/render/test_render.cc
index 6aa2cd8ed..6b35bc3ec 100644
--- a/tests/unit/render/test_render.cc
+++ b/tests/unit/render/test_render.cc
@@ -365,9 +365,18 @@ TEST_F(RenderTest, test_render_construct_from_info) {
 
   for (NodeType rank = 0; rank < n_ranks; rank++) {
     utility::JSONReader reader{rank};
-    reader.readFile(path + "/data." + std::to_string(rank) + ".json");
-    auto tmpInfo = reader.parse();
-    info->addInfo(tmpInfo->getObjectInfo(), tmpInfo->getRank(rank));
+    
+    // Validate the JSON data file
+    std::string data_file_path = path + "/data." + std::to_string(rank) + ".json";
+    if (reader.validate_datafile(data_file_path)) {
+      reader.readFile(data_file_path);
+      auto tmpInfo = reader.parse();
+      
+      info->addInfo(tmpInfo->getObjectInfo(), tmpInfo->getRank(rank));
+    } else {
+      ADD_FAILURE() << "JSON data file is invalid: " + data_file_path;
+    }
+
   }
 
   fmt::print("Num ranks={}\n", info->getNumRanks());

From 8ae16ff9c0b57f71414b8237322078848dec6f3d Mon Sep 17 00:00:00 2001
From: Maxime N <maxime.noyelle@ng-analytics.com>
Date: Thu, 26 Sep 2024 14:31:23 +0200
Subject: [PATCH 2/9] #7: Change command line creation  and fix some nameing
 issues

---
 scripts/json_datafile_validator.py            |  6 ++---
 ...tafile_schema.py => lb_datafile_schema.py} |  8 +++---
 src/vt-tv/utility/json_reader.cc              | 27 ++++++++++++-------
 3 files changed, 24 insertions(+), 17 deletions(-)
 rename scripts/{LBDatafile_schema.py => lb_datafile_schema.py} (91%)

diff --git a/scripts/json_datafile_validator.py b/scripts/json_datafile_validator.py
index 4a749c6d4..ee74a733d 100644
--- a/scripts/json_datafile_validator.py
+++ b/scripts/json_datafile_validator.py
@@ -9,14 +9,14 @@
 import json
 import logging
 import brotli
-import LBDatafile_schema
+from lb_datafile_schema import LBDatafile_schema
 
 try:
     project_path = f"{os.sep}".join(os.path.abspath(__file__).split(os.sep)[:-1])
     sys.path.append(project_path)
 except Exception as e:
     print(f"Can not add project path to system path! Exiting!\nERROR: {e}")
-    raise SystemExit(1)
+    sys.exit(1)
 
 
 from schema import And, Optional, Schema
@@ -45,7 +45,7 @@ def get_error_message(iterable_collection: Iterable) -> str:
     def _get_valid_schema(self) -> Schema:
         """ Returns representation of a valid schema
         """
-        valid_schema_data = LBDatafile_schema.LBDatafile_schema
+        valid_schema_data = LBDatafile_schema
         allowed_types_stats = "LBStatsfile"
         valid_schema_stats = Schema(
             {
diff --git a/scripts/LBDatafile_schema.py b/scripts/lb_datafile_schema.py
similarity index 91%
rename from scripts/LBDatafile_schema.py
rename to scripts/lb_datafile_schema.py
index b352ccfb7..d56b757e7 100644
--- a/scripts/LBDatafile_schema.py
+++ b/scripts/lb_datafile_schema.py
@@ -6,11 +6,11 @@ def validate_ids(field):
     Ensure that 1) either seq_id or id is provided,
     and 2) if an object is migratable, collection_id has been set.
     """
-    if 'seq_id' not in field and 'id' not in field:
-        raise ValueError('Either id (bit-encoded) or seq_id must be provided.')
+    if "seq_id" not in field and "id" not in field:
+        raise ValueError("Either id (bit-encoded) or seq_id must be provided.")
 
-    if field['migratable'] and 'seq_id' in field and 'collection_id' not in field:
-        raise ValueError('If an entity is migratable, it must have a collection_id')
+    if field.get("migratable") is True and "seq_id" in field and "collection_id" not in field:
+        raise ValueError("If an entity is migratable, it must have a collection_id")
 
     return field
 
diff --git a/src/vt-tv/utility/json_reader.cc b/src/vt-tv/utility/json_reader.cc
index 0e859c715..1bead4cfc 100644
--- a/src/vt-tv/utility/json_reader.cc
+++ b/src/vt-tv/utility/json_reader.cc
@@ -285,17 +285,24 @@ std::unique_ptr<Info> JSONReader::parse() {
 
 bool JSONReader::validate_datafile(std::string file_path)
 {
+  // Init
   bool is_valid = true;
-  char cmd[256];
-  strcpy(cmd, "python ");
-  strcpy(cmd, " ");
-  strcat(cmd, SRC_DIR);
-  strcat(cmd, "/scripts/json_datafile_validator.py");
-  strcat(cmd, " ");
-  strcat(cmd, " --file_path=");
-  strcat(cmd, file_path.data());
-
-  if (!system(cmd)) {
+
+  // Prepare command line
+  std::string cmd;
+  cmd +="python";
+  cmd +=" ";
+  cmd +=SRC_DIR;
+  cmd +="/scripts/json_datafile_validator.py";
+  cmd +=" ";
+  cmd +=" --file_path=";
+  cmd +=file_path.data();
+
+  // Exit code
+  int exit_code = std::system(cmd.c_str());
+  
+  // Launch
+  if (exit_code > 0) {
     is_valid = false;
   }
 

From 6e5a55853eeb981b552264ebd0b1e51654b41da3 Mon Sep 17 00:00:00 2001
From: Maxime N <maxime.noyelle@ng-analytics.com>
Date: Thu, 26 Sep 2024 14:31:23 +0200
Subject: [PATCH 3/9] #7: Fix whitespaces

---
 scripts/json_datafile_validator.py | 2 +-
 src/vt-tv/utility/json_reader.cc   | 2 +-
 src/vt-tv/utility/json_reader.h    | 2 +-
 tests/unit/render/test_render.cc   | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/scripts/json_datafile_validator.py b/scripts/json_datafile_validator.py
index ee74a733d..fe1dc111f 100644
--- a/scripts/json_datafile_validator.py
+++ b/scripts/json_datafile_validator.py
@@ -1,4 +1,4 @@
-""" JSON datafile validator 
+""" JSON datafile validator
 """
 import sys
 import os
diff --git a/src/vt-tv/utility/json_reader.cc b/src/vt-tv/utility/json_reader.cc
index 1bead4cfc..7829aa20c 100644
--- a/src/vt-tv/utility/json_reader.cc
+++ b/src/vt-tv/utility/json_reader.cc
@@ -300,7 +300,7 @@ bool JSONReader::validate_datafile(std::string file_path)
 
   // Exit code
   int exit_code = std::system(cmd.c_str());
-  
+
   // Launch
   if (exit_code > 0) {
     is_valid = false;
diff --git a/src/vt-tv/utility/json_reader.h b/src/vt-tv/utility/json_reader.h
index a7a8fce45..7c12139da 100644
--- a/src/vt-tv/utility/json_reader.h
+++ b/src/vt-tv/utility/json_reader.h
@@ -96,7 +96,7 @@ struct JSONReader {
 
   /**
    * \brief Check if a JSON data file is well formatted
-   * \param[in] file_path the data file path to validate 
+   * \param[in] file_path the data file path to validate
    */
   bool validate_datafile(std::string file_path);
 
diff --git a/tests/unit/render/test_render.cc b/tests/unit/render/test_render.cc
index 6b35bc3ec..ce10a4c21 100644
--- a/tests/unit/render/test_render.cc
+++ b/tests/unit/render/test_render.cc
@@ -365,13 +365,13 @@ TEST_F(RenderTest, test_render_construct_from_info) {
 
   for (NodeType rank = 0; rank < n_ranks; rank++) {
     utility::JSONReader reader{rank};
-    
+
     // Validate the JSON data file
     std::string data_file_path = path + "/data." + std::to_string(rank) + ".json";
     if (reader.validate_datafile(data_file_path)) {
       reader.readFile(data_file_path);
       auto tmpInfo = reader.parse();
-      
+
       info->addInfo(tmpInfo->getObjectInfo(), tmpInfo->getRank(rank));
     } else {
       ADD_FAILURE() << "JSON data file is invalid: " + data_file_path;

From b336f99b37fa3993eefa43c5d90a59ddfefbbb9b Mon Sep 17 00:00:00 2001
From: Maxime N <maxime.noyelle@ng-analytics.com>
Date: Thu, 26 Sep 2024 14:33:03 +0200
Subject: [PATCH 4/9] #7: Missing python imports

---
 ci/docker/build-and-test-ubuntu.dockerfile | 6 ++++++
 ci/python_build.sh                         | 3 +++
 2 files changed, 9 insertions(+)

diff --git a/ci/docker/build-and-test-ubuntu.dockerfile b/ci/docker/build-and-test-ubuntu.dockerfile
index 9a0e008a9..039bef8e5 100644
--- a/ci/docker/build-and-test-ubuntu.dockerfile
+++ b/ci/docker/build-and-test-ubuntu.dockerfile
@@ -8,6 +8,12 @@ FROM ${BASE_IMAGE} AS base
 ENV CONDA_PATH=/opt/conda
 ENV PATH=$PATH:$CONDA_PATH/bin
 
+# Setup python requirements for JSON datafile validation
+RUN pip install PyYAML
+RUN pip install Brotli
+RUN pip install schema
+RUN pip install nanobind
+
 COPY . /opt/src/vt-tv
 RUN mkdir -p /opt/build/vt-tv
 
diff --git a/ci/python_build.sh b/ci/python_build.sh
index 6c3fddb50..71b8022de 100644
--- a/ci/python_build.sh
+++ b/ci/python_build.sh
@@ -19,6 +19,9 @@ for env in $(conda env list | grep ^py | perl -lane 'print $F[-1]' | xargs ls -l
 
     # Build VT-TV python package
     pip install PyYAML
+    pip install Brotli
+    pip install schema
+    pip install nanobind
     pip install $VT_TV_SRC_DIR
 
     # Deactivate conda environment

From 013fccd07623547f8c80cfca702b31ca69586dda Mon Sep 17 00:00:00 2001
From: Maxime N <maxime.noyelle@ng-analytics.com>
Date: Thu, 26 Sep 2024 15:16:51 +0200
Subject: [PATCH 5/9] #7: CI install missing python package

---
 ci/setup_conda.sh | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/ci/setup_conda.sh b/ci/setup_conda.sh
index 9d89c5c01..abb528457 100644
--- a/ci/setup_conda.sh
+++ b/ci/setup_conda.sh
@@ -41,6 +41,9 @@ do
 
     . $CONDA_PATH/etc/profile.d/conda.sh && conda activate py${python_version}
     echo "Python version: $(python --version)"
+    pip install PyYAML
+    pip install Brotli
+    pip install schema
     pip install nanobind
     conda deactivate
     echo "::endgroup::"

From 6ae5691211b07d7ab9234d101bbac7635231e034 Mon Sep 17 00:00:00 2001
From: Maxime N <maxime.noyelle@ng-analytics.com>
Date: Fri, 27 Sep 2024 15:43:31 +0200
Subject: [PATCH 6/9] Merge commit '6617f3a3aebf41bcd790182ff8ae0e73f4d600f4'
 into 7-add-json-validation

---
 bindings/python/config_validator.cc | 43 +++++++++++++++++++
 bindings/python/config_validator.h  | 64 +++++++++++++++++++++++++++++
 bindings/python/tv.cc               | 59 ++++++++++++++++++--------
 tests/test_bindings.py              | 34 ++++++++++-----
 4 files changed, 171 insertions(+), 29 deletions(-)
 create mode 100644 bindings/python/config_validator.cc
 create mode 100644 bindings/python/config_validator.h

diff --git a/bindings/python/config_validator.cc b/bindings/python/config_validator.cc
new file mode 100644
index 000000000..c9066b4eb
--- /dev/null
+++ b/bindings/python/config_validator.cc
@@ -0,0 +1,43 @@
+#include "config_validator.h"
+
+namespace vt::tv::bindings::python {
+
+  /**
+   * Check if the configuration file is valid
+   *
+   * @return true if the configuration is valid
+   */
+  bool ConfigValidator::isValid()
+  {
+      bool is_valid = true;
+      for (std::string parameter: required_parameters) {
+        if (!config[parameter]) {
+          is_valid = false;
+          break;
+        }
+      }
+      return is_valid;
+  }
+
+
+  /**
+   * Get the list of missing parameters
+   *
+   * @return A string containing the list of the missing parameters
+   */
+  std::string ConfigValidator::getMissingRequiredParameters()
+  {
+    std::string parameters;
+    for (std::string parameter: required_parameters) {
+      if (!config[parameter]) {
+        if (parameters.empty()) {
+          parameters = parameter;
+        } else {
+          parameters = parameters + ", " + parameter;
+        }
+      }
+    }
+    return parameters;
+  }
+
+} /* end namespace vt::tv::bindings::python */
diff --git a/bindings/python/config_validator.h b/bindings/python/config_validator.h
new file mode 100644
index 000000000..8354180bb
--- /dev/null
+++ b/bindings/python/config_validator.h
@@ -0,0 +1,64 @@
+/*
+//@HEADER
+// *****************************************************************************
+//
+//                               config_validator.h
+//             DARMA/vt-tv => Virtual Transport -- Task Visualizer
+//
+// Copyright 2019-2024 National Technology & Engineering Solutions of Sandia, LLC
+// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S.
+// Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// * Neither the name of the copyright holder nor the names of its
+//   contributors may be used to endorse or promote products derived from this
+//   software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact darma@sandia.gov
+//
+// *****************************************************************************
+//@HEADER
+*/
+#ifndef vt_tv_config_validator_h
+#define vt_tv_config_validator_h
+
+#include <yaml-cpp/yaml.h>
+
+namespace vt::tv::bindings::python {
+    /**
+     * ConfigValidator Class
+     */
+    class ConfigValidator
+    {
+        public:
+            std::array<std::string, 2> required_parameters = {"output_visualization_dir", "output_visualization_file_stem"};
+            YAML::Node config;
+            bool isValid();
+            std::string getMissingRequiredParameters();
+            ConfigValidator(YAML::Node in_config)
+                :config(in_config) {}
+    };
+}
+
+#endif
diff --git a/bindings/python/tv.cc b/bindings/python/tv.cc
index 2d0580d56..2276e1f0a 100644
--- a/bindings/python/tv.cc
+++ b/bindings/python/tv.cc
@@ -1,22 +1,42 @@
 #include "tv.h"
+#include "config_validator.h"
 
 namespace vt::tv::bindings::python {
 
 void tvFromJson(const std::vector<std::string>& input_json_per_rank_list, const std::string& input_yaml_params_str, uint64_t num_ranks) {
-  std::string startup_logo = std::string("        __           __\n")
-                           + std::string(" _   __/ /_         / /__   __\n")
-                           + std::string("| | / / __/ _____  / __/ | / /\n")
-                           + std::string("| |/ / /   /____/ / /_ | |/ /\n")
-                           + std::string("|___/\\__/         \\__/ |___/\n");
-  fmt::print("==============================\n");
-  fmt::print(startup_logo);
-  fmt::print("==============================\n");
+    std::string startup_logo = std::string("        __           __\n")
+                             + std::string(" _   __/ /_         / /__   __\n")
+                             + std::string("| | / / __/ _____  / __/ | / /\n")
+                             + std::string("| |/ / /   /____/ / /_ | |/ /\n")
+                             + std::string("|___/\\__/         \\__/ |___/\n");
+    fmt::print("==============================\n");
+    fmt::print(startup_logo);
+    fmt::print("==============================\n");
+
+    YAML::Node viz_config;
+    try {
+      // Load the configuration from serialized YAML
+      viz_config = YAML::Load(input_yaml_params_str);
+    } catch (std::exception const& e) {
+        throw std::runtime_error(fmt::format(
+            "vt-tv: Error reading the configuration file: {}",
+            e.what()
+        ));
+    }
+
+    // Config Validator
+    ConfigValidator config_validator(viz_config);
 
-  // parse the input yaml parameters
-  try {
-    // Load the configuration from serialized YAML
-    YAML::Node viz_config = YAML::Load(input_yaml_params_str);
+    // Check configuration
+    bool is_config_valid = config_validator.isValid();
 
+    // Throw error if configuration is invalid
+    if (!is_config_valid) {
+        throw std::runtime_error(fmt::format(
+            "vt-tv: Error validating the configuration file: {}",
+            config_validator.getMissingRequiredParameters()
+        ));
+    }
 
     std::array<std::string, 3> qoi_request = {
       viz_config["rank_qoi"].as<std::string>(),
@@ -41,10 +61,16 @@ void tvFromJson(const std::vector<std::string>& input_json_per_rank_list, const
 
     // Throw an error if the output directory does not exist or is not absolute
     if (!std::filesystem::exists(output_path)) {
-      throw std::runtime_error(fmt::format("Visualization output directory does not exist at {}", output_dir));
+        throw std::runtime_error(fmt::format(
+            "vt-tv: Visualization output directory does not exist at {}",
+            output_dir
+        ));
     }
     if (!output_path.is_absolute()) {
-      throw std::runtime_error("Visualization output directory must be absolute.");
+        throw std::runtime_error(fmt::format(
+            "vt-tv: Visualization output directory must be absolute: {}",
+            output_dir
+        ));
     }
 
     // append / to avoid problems with file stems
@@ -123,11 +149,8 @@ void tvFromJson(const std::vector<std::string>& input_json_per_rank_list, const
       output_dir, output_file_stem, 1.0, save_meshes, save_pngs, std::numeric_limits<PhaseType>::max()
     );
     render.generate(font_size, win_size);
-  } catch (std::exception const& e) {
-    throw std::runtime_error(fmt::format("vt-tv: Error reading the configuration file: {}", e.what()));
-  }
 
-  fmt::print("vt-tv: Done.\n");
+    fmt::print("vt-tv: Done.\n");
 }
 
 namespace nb = nanobind;
diff --git a/tests/test_bindings.py b/tests/test_bindings.py
index 6c0ed1af1..85f093b2b 100644
--- a/tests/test_bindings.py
+++ b/tests/test_bindings.py
@@ -1,32 +1,43 @@
 """This module calls vttv module to test that vttv bindings work as expected"""
-import json
 import os
-import sys
 import subprocess
+import json
+import sys
 import yaml
 import vttv
 
 # source dir is the directory a level above this file
 source_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 
-with open(f'{source_dir}/tests/test_bindings_conf.yaml', 'r', encoding='utf-8') as stream:
+# Read the YAML config file
+with open(f"{source_dir}/tests/test_bindings_conf.yaml", 'r', encoding="utf-8") as stream:
     try:
         params = yaml.safe_load(stream)
     except yaml.YAMLError as exc:
         print(exc)
-        exit(1)
+
+# Check main key is "visualization"
+if "visualization" not in params:
+    print(
+        "The YAML configuration file is not valid: "+\
+        "missing required paramater \"visualization\""
+    )
+    sys.exit(1)
 
 # make output_visualization_dir directory parameter absolute
-if not os.path.isabs(params["visualization"]["output_visualization_dir"]):
-    params["visualization"]["output_visualization_dir"] = source_dir + \
-        "/" + params["visualization"]["output_visualization_dir"]
+if "output_visualization_dir" in params["visualization"]:
+    if not os.path.isabs(params["visualization"]["output_visualization_dir"]):
+        params["visualization"]["output_visualization_dir"] = source_dir + \
+            "/" + params["visualization"]["output_visualization_dir"]
 
+# Serialize visualization parameters
 params_serialized = yaml.dump(params["visualization"])
 
+# Calculate n_ranks
 n_ranks = params["visualization"]["x_ranks"] * \
     params["visualization"]["y_ranks"] * params["visualization"]["z_ranks"]
-rank_data = []
 
+rank_data = []
 for rank in range(n_ranks):
     # JSON data file for rank
     datafile = f'{source_dir}/data/lb_test_data/data.{rank}.json'
@@ -50,11 +61,12 @@
         sys.exit(1)
 
     # Read JSON data file
-    with open(datafile, 'r', encoding='utf-8') as f:
+    with open(datafile, 'r', encoding="utf-8") as f:
         data = json.load(f)
     data_serialized = json.dumps(data)
 
-    # Add the rank data line
-    rank_data.append((data_serialized))
+    # Add serialized data into the rank
+    rank_data.append((json.dumps(data)))
 
+# Launch VT TV from JSON data
 vttv.tvFromJson(rank_data, params_serialized, n_ranks)

From 445e6800597350ec40f0a42546aa28ebd55897c1 Mon Sep 17 00:00:00 2001
From: Maxime N <maxime.noyelle@ng-analytics.com>
Date: Sat, 28 Sep 2024 18:40:22 +0200
Subject: [PATCH 7/9] #7: Apply TD reviews

---
 ci/docker/build-and-test-ubuntu.dockerfile |  6 ------
 ci/setup_conda.sh                          |  4 ----
 src/vt-tv/utility/json_reader.cc           | 14 +++++++-------
 3 files changed, 7 insertions(+), 17 deletions(-)

diff --git a/ci/docker/build-and-test-ubuntu.dockerfile b/ci/docker/build-and-test-ubuntu.dockerfile
index 039bef8e5..9a0e008a9 100644
--- a/ci/docker/build-and-test-ubuntu.dockerfile
+++ b/ci/docker/build-and-test-ubuntu.dockerfile
@@ -8,12 +8,6 @@ FROM ${BASE_IMAGE} AS base
 ENV CONDA_PATH=/opt/conda
 ENV PATH=$PATH:$CONDA_PATH/bin
 
-# Setup python requirements for JSON datafile validation
-RUN pip install PyYAML
-RUN pip install Brotli
-RUN pip install schema
-RUN pip install nanobind
-
 COPY . /opt/src/vt-tv
 RUN mkdir -p /opt/build/vt-tv
 
diff --git a/ci/setup_conda.sh b/ci/setup_conda.sh
index abb528457..467af5bc5 100644
--- a/ci/setup_conda.sh
+++ b/ci/setup_conda.sh
@@ -41,10 +41,6 @@ do
 
     . $CONDA_PATH/etc/profile.d/conda.sh && conda activate py${python_version}
     echo "Python version: $(python --version)"
-    pip install PyYAML
-    pip install Brotli
-    pip install schema
-    pip install nanobind
     conda deactivate
     echo "::endgroup::"
 done
diff --git a/src/vt-tv/utility/json_reader.cc b/src/vt-tv/utility/json_reader.cc
index 7829aa20c..ca9b6a24f 100644
--- a/src/vt-tv/utility/json_reader.cc
+++ b/src/vt-tv/utility/json_reader.cc
@@ -290,13 +290,13 @@ bool JSONReader::validate_datafile(std::string file_path)
 
   // Prepare command line
   std::string cmd;
-  cmd +="python";
-  cmd +=" ";
-  cmd +=SRC_DIR;
-  cmd +="/scripts/json_datafile_validator.py";
-  cmd +=" ";
-  cmd +=" --file_path=";
-  cmd +=file_path.data();
+  cmd += "python";
+  cmd += " ";
+  cmd += SRC_DIR;
+  cmd += "/scripts/json_datafile_validator.py";
+  cmd += " ";
+  cmd += " --file_path=";
+  cmd += file_path.data();
 
   // Exit code
   int exit_code = std::system(cmd.c_str());

From fdcc944310207596b2a7ad9c2452fe0070e5049c Mon Sep 17 00:00:00 2001
From: Maxime N <maxime.noyelle@ng-analytics.com>
Date: Sat, 28 Sep 2024 19:18:38 +0200
Subject: [PATCH 8/9] #7: Restore conda pip install package

---
 ci/setup_conda.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/ci/setup_conda.sh b/ci/setup_conda.sh
index 467af5bc5..abb528457 100644
--- a/ci/setup_conda.sh
+++ b/ci/setup_conda.sh
@@ -41,6 +41,10 @@ do
 
     . $CONDA_PATH/etc/profile.d/conda.sh && conda activate py${python_version}
     echo "Python version: $(python --version)"
+    pip install PyYAML
+    pip install Brotli
+    pip install schema
+    pip install nanobind
     conda deactivate
     echo "::endgroup::"
 done

From 3a392860aa952e9230ec95233d24f26770005fe6 Mon Sep 17 00:00:00 2001
From: Maxime N <maxime.noyelle@ng-analytics.com>
Date: Sun, 29 Sep 2024 08:53:39 +0200
Subject: [PATCH 9/9] #7: Restore ubuntu pip  install package

---
 ci/docker/build-and-test-ubuntu.dockerfile | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/ci/docker/build-and-test-ubuntu.dockerfile b/ci/docker/build-and-test-ubuntu.dockerfile
index 9a0e008a9..039bef8e5 100644
--- a/ci/docker/build-and-test-ubuntu.dockerfile
+++ b/ci/docker/build-and-test-ubuntu.dockerfile
@@ -8,6 +8,12 @@ FROM ${BASE_IMAGE} AS base
 ENV CONDA_PATH=/opt/conda
 ENV PATH=$PATH:$CONDA_PATH/bin
 
+# Setup python requirements for JSON datafile validation
+RUN pip install PyYAML
+RUN pip install Brotli
+RUN pip install schema
+RUN pip install nanobind
+
 COPY . /opt/src/vt-tv
 RUN mkdir -p /opt/build/vt-tv