Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/sqlite bench #354

Open
wants to merge 29 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
ae7f63b
Merge pull request #352 from reagento/develop
zhPavel Dec 14, 2024
fc1c1aa
migration for sqlite schema
close2code-palm Dec 24, 2024
625ed99
write input typing
close2code-palm Dec 25, 2024
e17b437
database writes on each bench
close2code-palm Dec 25, 2024
a3f40d4
pysqlite for sqlite version pin
close2code-palm Dec 30, 2024
44035cc
writing to db through the interface
close2code-palm Dec 30, 2024
60fa1d5
abstraction over write and read, write implementation for sqlite3
close2code-palm Dec 31, 2024
2b0dadb
reads and writes implementation via abstraction
close2code-palm Dec 31, 2024
ab04ba4
rollback of zip into db, pull review corrections, interface cleanup
close2code-palm Jan 3, 2025
7cd3409
no pysqlite3 for tests, zip writing
close2code-palm Jan 3, 2025
b01eeeb
pysqlite3 deps rm
close2code-palm Jan 3, 2025
b3f47ef
sql formatting, naming, dead code rm
close2code-palm Jan 5, 2025
24bc231
rm dead, operator factory by args for executors
close2code-palm Jan 5, 2025
13bd90d
checker with factory
close2code-palm Jan 5, 2025
1219199
fix of impoosible multiple statements in one execution
close2code-palm Jan 5, 2025
48da7d0
connection close, no temp file, extra varibale rm
close2code-palm Jan 5, 2025
22e3b7b
release rework, protocol composition, linters fix, namings
close2code-palm Jan 6, 2025
04321db
docs env key (3.8 support until releaser fix)
close2code-palm Jan 6, 2025
a05290e
bench_nexus functionality returned back to module
close2code-palm Jan 8, 2025
3846131
imports fix
close2code-palm Jan 8, 2025
9cda529
execute script for database initialization
close2code-palm Jan 16, 2025
73aa5d8
env to files removal, sqlite no index
close2code-palm Jan 18, 2025
e72a207
no retrun in abstract methods
close2code-palm Jan 20, 2025
eb63cec
elipsis in abstract only
close2code-palm Jan 21, 2025
f100ed3
bench_result_file moved to filesys_operator
close2code-palm Jan 22, 2025
9168368
proper naming for method-struct processor
close2code-palm Jan 25, 2025
61a06b4
database automatic initialization in factory
close2code-palm Jan 26, 2025
81646f8
pr commentaries application
close2code-palm Jan 28, 2025
fabf207
release encapsulation fix
close2code-palm Jan 28, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
275 changes: 165 additions & 110 deletions benchmarks/benchmarks/bench_nexus.py

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions benchmarks/benchmarks/gh_issues/hub_dumping.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
bench_schematics,
)
from benchmarks.pybench.director_api import BenchmarkDirector, BenchSchema, CheckParams, PlotParams
from benchmarks.pybench.persistence.common import BenchMeta
from benchmarks.pybench.persistence.database import SQLite3BenchOperator, sqlite_operator_factory

director = BenchmarkDirector(
data_dir=Path(__file__).parent.parent.parent / "data" / "gh_issues" / "dumping",
Expand All @@ -29,6 +31,7 @@
check_params=lambda env_spec: CheckParams(
stdev_rel_threshold=0.07 if env_spec["py_impl"] == "pypy" else 0.04,
),
meta=BenchMeta(benchmark_name="gh_issues", benchmark_subname="dumping"),
)

director.add(
Expand Down
3 changes: 3 additions & 0 deletions benchmarks/benchmarks/gh_issues/hub_loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
bench_schematics,
)
from benchmarks.pybench.director_api import BenchmarkDirector, BenchSchema, CheckParams, PlotParams
from benchmarks.pybench.persistence.common import BenchMeta
from benchmarks.pybench.persistence.database import SQLite3BenchOperator, sqlite_operator_factory

director = BenchmarkDirector(
data_dir=Path(__file__).parent.parent.parent / "data" / "gh_issues" / "loading",
Expand All @@ -28,6 +30,7 @@
check_params=lambda env_spec: CheckParams(
stdev_rel_threshold=0.07 if env_spec["py_impl"] == "pypy" else 0.04,
),
meta=BenchMeta(benchmark_name="gh_issues", benchmark_subname="loading"),
)

director.add(
Expand Down
101 changes: 64 additions & 37 deletions benchmarks/benchmarks/pybench/director_api.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# pylint: disable=import-error,no-name-in-module
# ruff: noqa: T201, S603
import datetime
import importlib.metadata
import inspect
import json
Expand All @@ -18,6 +19,9 @@
import pyperf
from pyperf._cli import format_checks

from benchmarks.pybench.persistence.common import BenchAccessProto, BenchMeta, BenchOperator, BenchSchemaProto
from benchmarks.pybench.persistence.database import BenchRecord, sqlite_operator_factory
from benchmarks.pybench.persistence.filesystem import filesystem_operator_factory
from benchmarks.pybench.utils import get_function_object_ref, load_by_object_ref

__all__ = (
Expand All @@ -32,14 +36,20 @@
EnvSpec = Mapping[str, str]


def operator_factory(accessor: BenchAccessProto, *, sqlite: bool) -> BenchOperator:
if sqlite:
return sqlite_operator_factory(accessor)
return filesystem_operator_factory(accessor)


@dataclass(frozen=True)
class CheckParams:
stdev_rel_threshold: Optional[float] = None
ignore_pyperf_warnings: Optional[bool] = None


@dataclass(frozen=True)
class BenchSchema:
class BenchSchema(BenchSchemaProto):
entry_point: Union[Callable, str]
base: str
tags: Iterable[str]
Expand All @@ -63,14 +73,16 @@ class PlotParams:
)


class BenchAccessor:
class BenchAccessor(BenchAccessProto):
def __init__(
self,
data_dir: Path,
env_spec: EnvSpec,
check_params: Callable[[EnvSpec], CheckParams],
schemas: Sequence[BenchSchema],
meta: BenchMeta,
):
self.meta = meta
self.data_dir = data_dir
self.env_spec = env_spec
self.all_schemas = schemas
Expand All @@ -97,9 +109,6 @@ def override_state(self, env_spec: Optional[Iterable[str]], data_dir: Optional[P
def apply_state(self) -> None:
self.data_dir.mkdir(parents=True, exist_ok=True)

def bench_result_file(self, bench_id: str) -> Path:
return self.data_dir / f"{bench_id}.json"

def env_spec_str(self) -> str:
return "[" + "-".join(f"{k}={v}" for k, v in self.env_spec.items()) + "]"

Expand Down Expand Up @@ -172,12 +181,11 @@ def _process_pyperf_warnings(
if not line.startswith("Use")
]

def get_warnings(self, schema: BenchSchema) -> Optional[Sequence[str]]:
result_file_path = self.accessor.bench_result_file(self.accessor.get_id(schema))
if not result_file_path.exists():
def get_warnings(self, schema: BenchSchema, bench_operator: BenchOperator) -> Optional[Sequence[str]]:
data = bench_operator.get_bench_result(schema)
if data is None:
return None

bench = pyperf.Benchmark.load(str(result_file_path))
bench = pyperf.Benchmark.loads(data)
check_params = self.accessor.resolve_check_params(schema)
warnings = self._process_pyperf_warnings(schema, bench, check_params, format_checks(bench))
self_warnings = self._check_yourself(schema, bench, check_params)
Expand All @@ -195,11 +203,12 @@ def _check_yourself(self, schema: BenchSchema, bench: pyperf.Benchmark, check_pa
)
return lines

def check_results(self, *, local_id_list: bool = False):
def check_results(self, *, local_id_list: bool = False, sqlite: bool = False):
lines = []
schemas_with_warnings = []
reader = operator_factory(self.accessor, sqlite=sqlite)
for schema in self.accessor.schemas:
warnings = self.get_warnings(schema)
warnings = self.get_warnings(schema, reader)
if warnings is None:
lines.append(f"Result file of {self.accessor.get_id(schema)!r}")
lines.append("")
Expand All @@ -216,7 +225,8 @@ def check_results(self, *, local_id_list: bool = False):


class BenchRunner:
def __init__(self, accessor: BenchAccessor, checker: BenchChecker):
def __init__(self, accessor: BenchAccessor, checker: BenchChecker, meta: BenchMeta):
self.meta = meta
self.accessor = accessor
self.checker = checker

Expand All @@ -234,6 +244,7 @@ def add_arguments(self, parser: ArgumentParser) -> None:
"--unstable", action="store_true", required=False, default=False,
help="run only unstable or missing benchmarks",
)
parser.add_argument("--sqlite", action="store_true", required=False, default=False)

def run_benchmarks(
self,
Expand All @@ -242,17 +253,19 @@ def run_benchmarks(
exclude: Optional[Sequence[str]] = None,
missing: bool = False,
unstable: bool = False,
sqlite: bool = False,
) -> None:
operator = operator_factory(self.accessor, sqlite=sqlite)
schemas: Sequence[BenchSchema]
if missing:
schemas = [
schema for schema in self.accessor.schemas
if not self.accessor.bench_result_file(self.accessor.get_id(schema)).exists()
if not operator.get_bench_result(schema)
]
elif unstable:
schemas = [
schema for schema, warnings in (
(schema, self.checker.get_warnings(schema))
(schema, self.checker.get_warnings(schema, operator))
for schema in self.accessor.schemas
)
if warnings is None or warnings
Expand Down Expand Up @@ -282,16 +295,19 @@ def run_benchmarks(

print("Benchmarks to run: " + " ".join(benchmarks_to_run))
for tag in benchmarks_to_run:
self.run_one_benchmark(local_id_to_schema[tag])
self.run_one_benchmark(local_id_to_schema[tag], operator)

def run_one_benchmark(self, schema: BenchSchema) -> None:
def run_one_benchmark(self, schema: BenchSchema, bench_operator: BenchOperator) -> None:
distributions = {
dist: importlib.metadata.version(dist)
for dist in schema.used_distributions
}
bench_id = self.accessor.get_id(schema)
sig = inspect.signature(
load_by_object_ref(schema.entry_point)
if isinstance(schema.entry_point, str) else
schema.entry_point,
)
result_file = self.accessor.bench_result_file(bench_id)
with TemporaryDirectory() as dir_name:
temp_file = Path(dir_name) / f"{bench_id}.json"
print(f"start: {bench_id}")
Expand All @@ -310,23 +326,31 @@ def run_one_benchmark(self, schema: BenchSchema) -> None:
"base": schema.base,
"tags": schema.tags,
"kwargs": schema.kwargs,
"distributions": {
dist: importlib.metadata.version(dist)
for dist in schema.used_distributions
},
"distributions": distributions,
}
result_file.write_text(
json.dumps(
bench_data_text = json.dumps(
result_data,
ensure_ascii=False,
check_circular=False,
),
)
bench = pyperf.Benchmark.load(str(result_file))
bench = pyperf.Benchmark.loads(bench_data_text)
check_params = self.accessor.resolve_check_params(schema)
rel_stddev = bench.stdev() / bench.mean()
print(f"Relative stdev is {rel_stddev:.1%} (max allowed is {check_params.stdev_rel_threshold:.1%})")
print()
bench_data: BenchRecord = {
"base": schema.base,
"kwargs": schema.kwargs,
"distributions": distributions,
"is_actual": True,
"tags": schema.tags,
"data": bench_data_text,
"local_id": self.accessor.get_local_id(schema),
"global_id": self.accessor.get_id(schema),
"benchmark_subname": self.meta.benchmark_subname,
"benchmark_name": self.meta.benchmark_name,
}
bench_operator.write_bench_record(bench_data)

def launch_benchmark(
self,
Expand Down Expand Up @@ -360,22 +384,16 @@ def add_arguments(self, parser: ArgumentParser) -> None:
parser.add_argument("--output", "-o", action="store", required=False, type=Path)
parser.add_argument("--dpi", action="store", required=False, type=float, default=100)

def _load_benchmarks(self) -> Iterable[pyperf.Benchmark]:
return [
pyperf.Benchmark.load(
str(self.accessor.bench_result_file(self.accessor.get_id(schema))),
)
for schema in self.accessor.schemas
]

def draw_plot(self, output: Optional[Path], dpi: float):
def draw_plot(self, output: Optional[Path], dpi: float, *, sqlite: bool = False):
operator = operator_factory(self.accessor, sqlite=sqlite)
if output is None:
output = self.accessor.data_dir / f"plot{self.accessor.env_spec_str()}.png"

benches_data = operator.get_all_bench_results()
self._render_plot(
output=output,
dpi=dpi,
benchmarks=self._load_benchmarks(),
benchmarks=[pyperf.Benchmark.loads(data) for data in benches_data],
)

def _render_plot(self, output: Path, dpi: float, benchmarks: Iterable[pyperf.Benchmark]) -> None:
Expand Down Expand Up @@ -449,7 +467,9 @@ def __init__(
env_spec: EnvSpec,
check_params: Callable[[EnvSpec], CheckParams],
schemas: Iterable[BenchSchema] = (),
meta: BenchMeta,
):
self.meta = meta
self.data_dir = data_dir
self.env_spec = env_spec
self.plot_params = plot_params
Expand Down Expand Up @@ -495,6 +515,12 @@ def _make_parser(
checker: BenchChecker,
) -> ArgumentParser:
parser = ArgumentParser()
parser.add_argument(
"--sqlite",
action="store_true",
default=False,
required=False,
)

subparsers = parser.add_subparsers(required=True)

Expand Down Expand Up @@ -527,10 +553,11 @@ def make_accessor(self) -> BenchAccessor:
env_spec=self.env_spec,
check_params=self.check_params,
schemas=self.schemas,
meta=self.meta,
)

def make_bench_runner(self, accessor: BenchAccessor, checker: BenchChecker) -> BenchRunner:
return BenchRunner(accessor, checker)
return BenchRunner(accessor, checker, self.meta)

def make_bench_plotter(self, accessor: BenchAccessor) -> BenchPlotter:
return BenchPlotter(self.plot_params, accessor)
Expand Down
Empty file.
60 changes: 60 additions & 0 deletions benchmarks/benchmarks/pybench/persistence/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import abc
import datetime
from collections.abc import Iterable, Sequence
from dataclasses import dataclass
from functools import cached_property
from pathlib import Path
from typing import Any, Mapping, Optional, Protocol, TypedDict


@dataclass(frozen=True)
class BenchMeta:
benchmark_name: str
benchmark_subname: str


class BenchSchemaProto(Protocol):
tags: Iterable[str]
base: str


class BenchAccessProto(Protocol):
meta: BenchMeta
data_dir: Path

@abc.abstractmethod
@cached_property
def schemas(self) -> Sequence[BenchSchemaProto]:
...

@abc.abstractmethod
def get_id(self, schema) -> str:
...


class BenchRecord(TypedDict):
is_actual: bool
benchmark_name: str
benchmark_subname: str
base: str
local_id: str
global_id: str
tags: Iterable[str]
kwargs: Mapping[str, Any]
distributions: dict[str, str]
data: str


class BenchOperator(Protocol):

@abc.abstractmethod
def write_bench_record(self, record: BenchRecord) -> None:
...

@abc.abstractmethod
def get_all_bench_results(self) -> Sequence[str]:
...

@abc.abstractmethod
def get_bench_result(self, schema: Any) -> Optional[str]:
...
Loading