Base RunId comparison and hash on all details (#271)

smarr · Nov 7, 2024 · 1d8b18a · 1d8b18a
2 parents 22ed497 + a64c9b3
commit 1d8b18a
Show file tree

Hide file tree

Showing 32 changed files with 1,488 additions and 240 deletions.
diff --git a/docs/config.md b/docs/config.md
@@ -104,7 +104,7 @@ So, in the case of the `input_sizes` example, the setting for `benchmark`
 overrides the settings in a suite or executor.
 
 These priorities and the ability to define different benchmarks, suites, VMs, etc,
-hopefully provides sufficient flexibility to encode all desired experiments.
+hopefully provide sufficient flexibility to encode all desired experiments.
 
 ## Root Elements
 

diff --git a/rebench/__init__.py b/rebench/__init__.py
@@ -1 +1 @@
-__version__ = "1.3.0.dev1"
+__version__ = "1.3.0.dev2"
diff --git a/rebench/configurator.py b/rebench/configurator.py
@@ -19,22 +19,26 @@
 # IN THE SOFTWARE.
 import logging
 from os.path import dirname, abspath
-from typing import Mapping
+from typing import Mapping, TYPE_CHECKING
 
 from pykwalify.core import Core
 from pykwalify.errors import SchemaError
 import yaml
 
 from .configuration_error import ConfigurationError
-from .model.build_cmd import BuildCommand
 from .model.experiment import Experiment
 from .model.exp_run_details import ExpRunDetails
+from .model.exp_variables import ExpVariables
 from .model.reporting import Reporting
 from .model.executor import Executor
+from .model.run_id import RunId
 from .output import UIError
 from .rebenchdb import ReBenchDB
 from .ui import escape_braces
 
+if TYPE_CHECKING:
+    from .model.build_cmd import BuildCommand
+
 # Disable most logging for pykwalify
 logging.getLogger("pykwalify").setLevel(logging.CRITICAL)
 logging.getLogger("pykwalify").addHandler(logging.NullHandler())
@@ -121,31 +125,24 @@ def _match(filters, bench):
         return False
 
 
+def validate_config(data, validator_list = None):
+    validator = Core(
+        source_data=data,
+        schema_files=[dirname(__file__) + "/rebench-schema.yml"])
+    if validator_list is not None:
+        validator_list.append(validator)
+    validator.validate(raise_exception=True)
+
+
 def load_config(file_name):
     """
     Load the file, verify that it conforms to the schema,
     and return the configuration.
     """
+    config_data = None
     try:
-        with open(file_name, "r") as conf_file:  # pylint: disable=unspecified-encoding
-            data = yaml.safe_load(conf_file)
-            validator = Core(
-                source_data=data,
-                schema_files=[dirname(__file__) + "/rebench-schema.yml"])
-            try:
-                validator.validate(raise_exception=True)
-                validate_gauge_adapters(data)
-
-                # add file name and directory to config to be able to use it when loading
-                # for instance gauge adapters
-                data["__file__"] = file_name
-                data["__dir__"] = dirname(abspath(file_name))
-            except SchemaError as err:
-                errors = [escape_braces(val_err) for val_err in validator.validation_errors]
-                raise UIError(
-                    "Validation of " + file_name + " failed.\n{ind}" +
-                    "\n{ind}".join(errors) + "\n", err)
-            return data
+        with open(file_name, 'r') as conf_file:  # pylint: disable=unspecified-encoding
+            config_data = yaml.safe_load(conf_file)
     except IOError as err:
         if err.errno == 2:
             assert err.strerror == "No such file or directory"
@@ -156,6 +153,22 @@ def load_config(file_name):
         raise UIError("Parsing of the config file "
                       + file_name + " failed.\nError " + str(err) + "\n", err)
 
+    try:
+        validators = []
+        validate_config(config_data, validators)
+        validate_gauge_adapters(config_data)
+
+        # add file name and directory to config to be able to use it when loading
+        # for instance gauge adapters
+        config_data['__file__'] = file_name
+        config_data['__dir__'] = dirname(abspath(file_name))
+    except SchemaError as err:
+        errors = [escape_braces(val_err) for val_err in validators[0].validation_errors]
+        raise UIError(
+            "Validation of " + file_name + " failed.\n{ind}" +
+            "\n{ind}".join(errors) + "\n", err)
+    return config_data
+
 
 def validate_gauge_adapters(raw_config):
     benchmark_suites = raw_config.get("benchmark_suites", {})
@@ -178,12 +191,12 @@ def __init__(self, raw_config: Mapping, data_store, ui, cli_options=None, cli_re
                  exp_name=None, data_file=None, build_log=None, run_filter=None):
         self._raw_config_for_debugging = raw_config  # kept around for debugging only
 
-        self.build_log = build_log or raw_config.get("build_log", "build.log")
-        self.data_file = data_file or raw_config.get("default_data_file", "rebench.data")
-        self._exp_name = exp_name or raw_config.get("default_experiment", "all")
-        self.artifact_review = raw_config.get("artifact_review", False)
-        self.config_dir = raw_config.get("__dir__", None)
-        self.config_file = raw_config.get("__file__", None)
+        self.build_log = build_log or raw_config.get('build_log', 'build.log')
+        self.data_file = data_file or raw_config.get('default_data_file', 'rebench.data')
+        self._exp_name = exp_name or raw_config.get('default_experiment', 'all')
+        self.artifact_review = raw_config.get('artifact_review', False)
+        self.config_dir = raw_config.get('__dir__', None)
+        self.config_file = raw_config.get('__file__', None)
 
         self._rebench_db_connector = None
 
@@ -195,9 +208,10 @@ def __init__(self, raw_config: Mapping, data_store, ui, cli_options=None, cli_re
                 invocations = 1
                 iterations = 1
 
-        self._root_run_details = ExpRunDetails.compile(
-            raw_config.get('runs', {}), ExpRunDetails.default(
-                invocations, iterations))
+        self.base_run_details = self._assemble_base_run_details(
+            raw_config.get('runs', {}), invocations, iterations)
+        self.base_variables = ExpVariables.empty()
+
         self._root_reporting = Reporting.compile(
             raw_config.get('reporting', {}), Reporting.empty(cli_reporter), cli_options, ui)
 
@@ -219,7 +233,7 @@ def __init__(self, raw_config: Mapping, data_store, ui, cli_options=None, cli_re
         self.data_store = data_store
         self._process_cli_options()
 
-        self.deduplicated_build_commands: Mapping[BuildCommand, BuildCommand] = {}
+        self.deduplicated_build_commands: dict[BuildCommand, BuildCommand] = {}
 
         self.run_filter = _RunFilter(run_filter)
 
@@ -229,6 +243,9 @@ def __init__(self, raw_config: Mapping, data_store, ui, cli_options=None, cli_re
         experiments = raw_config.get("experiments", {})
         self._experiments = self._compile_experiments(experiments)
 
+    def _assemble_base_run_details(self, run_config, invocations, iterations):
+        return ExpRunDetails.compile(run_config, ExpRunDetails.default(invocations, iterations))
+
     @property
     def use_rebench_db(self):
         report_results = self.options is None or self.options.use_data_reporting
@@ -284,10 +301,6 @@ def experiment_name(self):
     def reporting(self):
         return self._root_reporting
 
-    @property
-    def run_details(self):
-        return self._root_run_details
-
     def has_executor(self, executor_name):
         return executor_name in self._executors
 
@@ -315,15 +328,15 @@ def get_experiments(self):
     def get_experiment(self, name):
         return self._experiments[name]
 
-    def get_runs(self):
+    def get_runs(self) -> set[RunId]:
         runs = set()
         for exp in list(self._experiments.values()):
             runs |= exp.runs
 
         if self.options and self.options.setup_only:
             # filter out runs we don't need to trigger a build
             runs_with_builds = set()
-            build_commands = set()
+            build_commands: set[BuildCommand] = set()
 
             for run in runs:
                 commands = run.build_commands()

diff --git a/rebench/executor.py b/rebench/executor.py
@@ -26,15 +26,22 @@
 import subprocess
 from threading import Thread, RLock
 from time import time
+from typing import TYPE_CHECKING, Optional
 
 from . import subprocess_with_timeout as subprocess_timeout
 from .denoise import paths as denoise_paths
 from .denoise_client import add_denoise_python_path_to_env, get_number_of_cores
 from .interop.adapter import ExecutionDeliveredNoResults, instantiate_adapter, OutputNotParseable, \
     ResultsIndicatedAsInvalid
+from .model.build_cmd import BuildCommand
 from .ui import escape_braces
 
 
+
+if TYPE_CHECKING:
+    from .model.run_id import RunId
+
+
 class FailedBuilding(Exception):
     """The exception to be raised when building of the executor or suite failed."""
     def __init__(self, name, build_command):
@@ -109,9 +116,10 @@ def _indicate_progress(self, completed_task, run):
         self.ui.step_spinner(self._runs_completed, label)
 
     def indicate_build(self, run_id):
-        run_id_names = run_id.as_str_list()
+        exe_name = run_id.benchmark.suite.executor.name
+        suite_name = run_id.benchmark.suite.name
         self.ui.step_spinner(
-            self._runs_completed, "Run build for %s %s" % (run_id_names[1], run_id_names[2]))
+            self._runs_completed, f"Run build for {exe_name} {suite_name}")
 
     def execute(self):
         self._total_num_runs = len(self._executor.runs)
@@ -361,26 +369,31 @@ def _construct_cmdline(self, run_id, gauge_adapter):
 
         return cmdline
 
-    def _build_executor_and_suite(self, run_id):
+    def _build_executor_and_suite(self, run_id: "RunId"):
         name = "E:" + run_id.benchmark.suite.executor.name
         build = run_id.benchmark.suite.executor.build
-        self._process_builds(build, name, run_id)
+        self._process_builds(build, run_id.benchmark.suite.executor.path, name, run_id)
 
         name = "S:" + run_id.benchmark.suite.name
         build = run_id.benchmark.suite.build
-        self._process_builds(build, name, run_id)
+        self._process_builds(build, run_id.benchmark.suite.location, name, run_id)
 
-    def _process_builds(self, build, name, run_id):
+    def _process_builds(self, build: Optional[BuildCommand], location, name, run_id):
         if not build or build.is_built:
             return
 
         if build.build_failed:
             run_id.fail_immediately()
             raise FailedBuilding(name, build)
-        self._execute_build_cmd(build, name, run_id)
+        self._execute_build_cmd(build, location, name, run_id)
+
+    def _execute_build_cmd(self, build_command: BuildCommand, location: Optional[str],
+                           name: str, run_id: "RunId"):
+        assert build_command.location == location,\
+            "The location of the BuildCommand is only used for equality. "\
+            "And should always be equal to the one coming from the suite or executor"
 
-    def _execute_build_cmd(self, build_command, name, run_id):
-        path = build_command.location
+        path = location
         if not path or path == ".":
             path = os.getcwd()
 

diff --git a/rebench/model/benchmark.py b/rebench/model/benchmark.py
@@ -17,23 +17,21 @@
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 # IN THE SOFTWARE.
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Mapping, Any, Optional
 
 from . import value_with_optional_details
+from .benchmark_suite import BenchmarkSuite
 from .exp_run_details import ExpRunDetails
 from .exp_variables import ExpVariables
 
 if TYPE_CHECKING:
-    from .benchmark_suite import BenchmarkSuite
-    from .run_id import RunId
     from ..interop.adapter import GaugeAdapter
-    from ..persistence import DataStore
 
 
 class Benchmark(object):
 
     @classmethod
-    def compile(cls, bench, suite, data_store):
+    def compile(cls, bench, suite):
         """Specialization of the configurations which get executed by using the
            suite definitions.
         """
@@ -51,12 +49,11 @@ def compile(cls, bench, suite, data_store):
         variables = ExpVariables.compile(details, suite.variables)
 
         return Benchmark(name, command, gauge_adapter, suite,
-                         variables, extra_args, run_details, codespeed_name,
-                         data_store)
+                         variables, extra_args, run_details, codespeed_name)
 
-    def __init__(self, name: str, command: str, gauge_adapter: "GaugeAdapter",
-                 suite: "BenchmarkSuite", variables: str, extra_args: str,
-                 run_details: "ExpRunDetails", codespeed_name: str, data_store: "DataStore"):
+    def __init__(self, name: str, command: str, gauge_adapter: Optional["GaugeAdapter"],
+                 suite: "BenchmarkSuite", variables: Optional[ExpVariables], extra_args: str,
+                 run_details: "ExpRunDetails", codespeed_name: Optional[str]):
         assert run_details is None or isinstance(run_details, ExpRunDetails)
         self.name = name
 
@@ -77,19 +74,49 @@ def __init__(self, name: str, command: str, gauge_adapter: "GaugeAdapter",
         self.suite = suite
 
         self.variables = variables
-
-        # the compiled runs, these might be shared with other benchmarks/suites
-        self._runs: set[RunId] = set()
-
-        data_store.register_config(self)
-
-    def add_run(self, run):
-        self._runs.add(run)
+        self._hash = None
 
     @property
     def execute_exclusively(self):
         return self.run_details.execute_exclusively
 
+    def __eq__(self, other) -> bool:
+        return self is other or (
+            self.name == other.name and
+            self.command == other.command and
+            self.extra_args == other.extra_args and
+            self.run_details == other.run_details and
+            self.variables == other.variables and
+            self.suite == other.suite)
+
+    # pylint: disable-next=too-many-return-statements
+    def __lt__(self, other) -> bool:
+        if self is other:
+            return False
+
+        if self.name != other.name:
+            return self.name < other.name
+
+        if self.command != other.command:
+            return self.command < other.command
+
+        if self.suite != other.suite:
+            return self.suite < other.suite
+
+        if self.extra_args != other.extra_args:
+            return self.extra_args < other.extra_args
+
+        if self.run_details != other.run_details:
+            return self.run_details < other.run_details
+
+        return self.variables < other.variables
+
+    def __hash__(self):
+        if self._hash is None:
+            self._hash = hash((self.name, self.command, self.extra_args, self.run_details,
+                               self.variables, self.suite))
+        return self._hash
+
     def __str__(self):
         return "%s, executor:%s, suite:%s, args:'%s'" % (
             self.name, self.suite.executor.name, self.suite.name, self.extra_args or '')
@@ -106,16 +133,27 @@ def as_str_list(self):
                 '' if self.extra_args is None else str(self.extra_args)]
 
     def as_dict(self):
-        return {
+        result = {
             "name": self.name,
+            "command": self.command,
             "runDetails": self.run_details.as_dict(),
             "suite": self.suite.as_dict(),
+            "variables": self.variables.as_dict()
         }
 
+        if self.extra_args is not None:
+            result["extra_args"] = self.extra_args
+        return result
+
     @classmethod
-    def from_str_list(cls, data_store, str_list):
-        return data_store.get_config(str_list[0], str_list[1], str_list[2],
-                                     None if str_list[3] == '' else str_list[3])
+    def from_dict(cls, data: Mapping[str, Any]) -> "Benchmark":
+        run_details = ExpRunDetails.from_dict(data["runDetails"])
+        suite = BenchmarkSuite.from_dict(data["suite"])
+        variables = ExpVariables.from_dict(data.get("variables", None))
+
+        return Benchmark(data["name"], data["command"], None, suite, variables,
+                         data.get("extra_args", None), run_details, None)
+
     @classmethod
     def get_column_headers(cls):
         return ["benchmark", "executor", "suite", "extraArgs"]
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		__version__ = "1.3.0.dev1"
		__version__ = "1.3.0.dev2"