Skip to content

Commit

Permalink
Merge pull request #122 from DARMA-tasking/7-add-json-validation
Browse files Browse the repository at this point in the history
#7: Add JSON validation
  • Loading branch information
maxime-bfsquall authored Sep 29, 2024
2 parents 0814dd6 + 3a39286 commit 0030760
Show file tree
Hide file tree
Showing 11 changed files with 696 additions and 9 deletions.
6 changes: 6 additions & 0 deletions ci/docker/build-and-test-ubuntu.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@ FROM ${BASE_IMAGE} AS base
ENV CONDA_PATH=/opt/conda
ENV PATH=$PATH:$CONDA_PATH/bin

# Setup python requirements for JSON datafile validation
RUN pip install PyYAML
RUN pip install Brotli
RUN pip install schema
RUN pip install nanobind

COPY . /opt/src/vt-tv
RUN mkdir -p /opt/build/vt-tv

Expand Down
3 changes: 3 additions & 0 deletions ci/python_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ for env in $(conda env list | grep ^py | perl -lane 'print $F[-1]' | xargs ls -l

# Build VT-TV python package
pip install PyYAML
pip install Brotli
pip install schema
pip install nanobind
pip install $VT_TV_SRC_DIR

# Deactivate conda environment
Expand Down
3 changes: 3 additions & 0 deletions ci/setup_conda.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ do

. $CONDA_PATH/etc/profile.d/conda.sh && conda activate py${python_version}
echo "Python version: $(python --version)"
pip install PyYAML
pip install Brotli
pip install schema
pip install nanobind
conda deactivate
echo "::endgroup::"
Expand Down
488 changes: 488 additions & 0 deletions scripts/json_datafile_validator.py

Large diffs are not rendered by default.

101 changes: 101 additions & 0 deletions scripts/lb_datafile_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
"""LB data JSON file schema"""
from schema import And, Optional, Schema

def validate_ids(field):
"""
Ensure that 1) either seq_id or id is provided,
and 2) if an object is migratable, collection_id has been set.
"""
if "seq_id" not in field and "id" not in field:
raise ValueError("Either id (bit-encoded) or seq_id must be provided.")

if field.get("migratable") is True and "seq_id" in field and "collection_id" not in field:
raise ValueError("If an entity is migratable, it must have a collection_id")

return field

LBDatafile_schema = Schema(
{
Optional('type'): And(str, "LBDatafile", error="'LBDatafile' must be chosen."),
Optional('metadata'): {
Optional('type'): And(str, "LBDatafile", error="'LBDatafile' must be chosen."),
Optional('rank'): int,
Optional('shared_node'): {
'id': int,
'size': int,
'rank': int,
'num_nodes': int,
},
Optional('phases'): {
Optional('count'): int,
'skipped': {
'list': [int],
'range': [[int]],
},
'identical_to_previous': {
'list': [int],
'range': [[int]],
},
},
Optional('attributes'): dict
},
'phases': [
{
'id': int,
'tasks': [
{
'entity': And({
Optional('collection_id'): int,
'home': int,
Optional('id'): int,
Optional('seq_id'): int,
Optional('index'): [int],
'type': str,
'migratable': bool,
Optional('objgroup_id'): int
}, validate_ids),
'node': int,
'resource': str,
Optional('subphases'): [
{
'id': int,
'time': float,
}
],
'time': float,
Optional('user_defined'): dict,
Optional('attributes'): dict
},
],
Optional('communications'): [
{
'type': str,
'to': And({
'type': str,
Optional('id'): int,
Optional('seq_id'): int,
Optional('home'): int,
Optional('collection_id'): int,
Optional('migratable'): bool,
Optional('index'): [int],
Optional('objgroup_id'): int,
}, validate_ids),
'messages': int,
'from': And({
'type': str,
Optional('id'): int,
Optional('seq_id'): int,
Optional('home'): int,
Optional('collection_id'): int,
Optional('migratable'): bool,
Optional('index'): [int],
Optional('objgroup_id'): int,
}, validate_ids),
'bytes': float
}
],
Optional('user_defined'): dict
},
]
}
)
29 changes: 29 additions & 0 deletions src/vt-tv/utility/json_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@
#include <fmt-vt/format.h>

#include <fstream>
#include <iostream>
#include <stdlib.h>
#include <string.h>

namespace vt::tv::utility {

Expand Down Expand Up @@ -280,4 +283,30 @@ std::unique_ptr<Info> JSONReader::parse() {
return std::make_unique<Info>(std::move(object_info), std::move(rank_info));
}

bool JSONReader::validate_datafile(std::string file_path)
{
// Init
bool is_valid = true;

// Prepare command line
std::string cmd;
cmd += "python";
cmd += " ";
cmd += SRC_DIR;
cmd += "/scripts/json_datafile_validator.py";
cmd += " ";
cmd += " --file_path=";
cmd += file_path.data();

// Exit code
int exit_code = std::system(cmd.c_str());

// Launch
if (exit_code > 0) {
is_valid = false;
}

return is_valid;
}

} /* end namespace vt::tv::utility */
6 changes: 6 additions & 0 deletions src/vt-tv/utility/json_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,12 @@ struct JSONReader {
*/
std::unique_ptr<Info> parse();

/**
* \brief Check if a JSON data file is well formatted
* \param[in] file_path the data file path to validate
*/
bool validate_datafile(std::string file_path);

private:
NodeType rank_ = 0;
std::unique_ptr<nlohmann::json> json_ = nullptr;
Expand Down
13 changes: 11 additions & 2 deletions src/vt-tv/utility/parse_render.cc
Original file line number Diff line number Diff line change
Expand Up @@ -103,12 +103,21 @@ void ParseRender::parseAndRender(

fmt::print("Reading file for rank {}\n", rank);
utility::JSONReader reader{static_cast<NodeType>(rank)};
reader.readFile(filepath);
auto tmpInfo = reader.parse();

// Validate the JSON data file
std::string data_file_path = input_dir + "data." + std::to_string(rank) + ".json";
if (reader.validate_datafile(data_file_path)) {
reader.readFile(data_file_path);
auto tmpInfo = reader.parse();

#if VT_TV_OPENMP_ENABLED
#pragma omp critical
#endif
{ info->addInfo(tmpInfo->getObjectInfo(), tmpInfo->getRank(rank)); }

} else {
throw std::runtime_error("JSON data file is invalid: " + data_file_path);
}
}
std::size_t n_ranks = config["input"]["n_ranks"].as<std::size_t>();
if (info->getNumRanks() != n_ranks) {
Expand Down
28 changes: 26 additions & 2 deletions tests/test_bindings.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""This module calls vttv module to test that vttv bindings work as expected"""
import os
import subprocess
import json
import sys
import yaml
Expand All @@ -9,7 +10,7 @@
source_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

# Read the YAML config file
with open(f"{source_dir}/tests/test_bindings_conf.yaml", "r", encoding="utf-8") as stream:
with open(f"{source_dir}/tests/test_bindings_conf.yaml", 'r', encoding="utf-8") as stream:
try:
params = yaml.safe_load(stream)
except yaml.YAMLError as exc:
Expand Down Expand Up @@ -38,8 +39,31 @@

rank_data = []
for rank in range(n_ranks):
with open(f"{source_dir}/data/lb_test_data/data.{rank}.json", "r", encoding="utf-8") as f:
# JSON data file for rank
datafile = f'{source_dir}/data/lb_test_data/data.{rank}.json'

# Check JSON schema validity
IS_VALID: bool
try:
p = subprocess.run([
'python',
os.path.join(source_dir, 'scripts/json_datafile_validator.py'),
"--file_path=" + datafile
], check=True, capture_output=True)
p.check_returncode()
IS_VALID = True
except subprocess.CalledProcessError as e:
IS_VALID = False
print(e.output.decode() + f"[JSON data file invalid] {datafile}")

# If validation failed
if IS_VALID is False:
sys.exit(1)

# Read JSON data file
with open(datafile, 'r', encoding="utf-8") as f:
data = json.load(f)
data_serialized = json.dumps(data)

# Add serialized data into the rank
rank_data.append((json.dumps(data)))
Expand Down
13 changes: 11 additions & 2 deletions tests/unit/generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,14 +184,23 @@ struct Generator {
for (int64_t rank = 0; rank < n_ranks; rank++) {
fmt::print("Reading file for rank {}\n", rank);
JSONReader reader{static_cast<NodeType>(rank)};
reader.readFile(input_dir + "data." + std::to_string(rank) + ".json");
auto tmpInfo = reader.parse();

// Validate the JSON data file
std::string data_file_path = input_dir + "data." + std::to_string(rank) + ".json";
if (reader.validate_datafile(data_file_path)) {
reader.readFile(data_file_path);
auto tmpInfo = reader.parse();

#ifdef VT_TV_OPENMP_ENABLED
#if VT_TV_OPENMP_ENABLED
#pragma omp critical
#endif
#endif
{ info.addInfo(tmpInfo->getObjectInfo(), tmpInfo->getRank(rank)); }

} else {
throw std::runtime_error("JSON data file is invalid: " + data_file_path);
}
}
return info;
}
Expand Down
15 changes: 12 additions & 3 deletions tests/unit/render/test_render.cc
Original file line number Diff line number Diff line change
Expand Up @@ -365,9 +365,18 @@ TEST_F(RenderTest, test_render_construct_from_info) {

for (NodeType rank = 0; rank < n_ranks; rank++) {
utility::JSONReader reader{rank};
reader.readFile(path + "/data." + std::to_string(rank) + ".json");
auto tmpInfo = reader.parse();
info->addInfo(tmpInfo->getObjectInfo(), tmpInfo->getRank(rank));

// Validate the JSON data file
std::string data_file_path = path + "/data." + std::to_string(rank) + ".json";
if (reader.validate_datafile(data_file_path)) {
reader.readFile(data_file_path);
auto tmpInfo = reader.parse();

info->addInfo(tmpInfo->getObjectInfo(), tmpInfo->getRank(rank));
} else {
ADD_FAILURE() << "JSON data file is invalid: " + data_file_path;
}

}

fmt::print("Num ranks={}\n", info->getNumRanks());
Expand Down

0 comments on commit 0030760

Please sign in to comment.