From d3941bb3d6947c042f2f5f6b0f14d15f84306f93 Mon Sep 17 00:00:00 2001 From: yakovypg Date: Wed, 27 Mar 2024 21:12:26 +0300 Subject: [PATCH] Add bindings and cli support for Fastod algorithm --- cli/cli.py | 29 ++++++++++++-- src/python_bindings/bindings.cpp | 5 ++- src/python_bindings/od/bind_od.cpp | 61 ++++++++++++++++++++++++++++++ src/python_bindings/od/bind_od.h | 9 +++++ 4 files changed, 99 insertions(+), 5 deletions(-) create mode 100644 src/python_bindings/od/bind_od.cpp create mode 100644 src/python_bindings/od/bind_od.h diff --git a/cli/cli.py b/cli/cli.py index 193ba44747..e4ba9dc358 100644 --- a/cli/cli.py +++ b/cli/cli.py @@ -13,6 +13,7 @@ class Task(StrEnum): fd = auto() afd = auto() + od = auto() pfd = auto() fd_verification = auto() afd_verification = auto() @@ -31,6 +32,7 @@ class Algorithm(StrEnum): fun = auto() fastfds = auto() aid = auto() + fastod = auto() naive_fd_verifier = auto() naive_afd_verifier = auto() icde09_mfd_verifier = auto() @@ -98,9 +100,10 @@ class Algorithm(StrEnum): 1) Discovery of exact functional dependencies 2) Discovery of approximate functional dependencies 3) Discovery of probabilistic functional dependencies -4) Verification of exact functional dependencies -5) Verification of approximate functional dependencies -6) Verification of metric dependencies +4) Discovery of exact canonical order dependencies +5) Verification of exact functional dependencies +6) Verification of approximate functional dependencies +7) Verification of metric dependencies If you need other types, you should look into the C++ code, the Python bindings or the Web version. @@ -146,6 +149,14 @@ class Algorithm(StrEnum): Algorithms: PYRO, TANE Default: PYRO ''' +OD_HELP = '''Discover order dependencies. For more information about the +primitive and algorithms, refer to the “Effective and complete discovery +of order dependencies via set-based axiomatization” paper by J. Szlichta +et al. + +Algorithms: FASTOD +Default: FASTOD +''' PFD_HELP = '''Discover minimal non-trivial probabilistic functional dependencies. Probabilitistic functional dependencies are defined in the “Functional Dependency Generation and Applications in pay-as-you-go @@ -232,6 +243,11 @@ class Algorithm(StrEnum): “Approximate Discovery of Functional Dependencies for Large Datasets” paper by T.Bleifus et al. ''' +FASTOD_HELP = '''A modern algorithm for discovery of canonical order +dependencies. For more information, refer to the “Effective and complete +discovery of order dependencies via set-based axiomatization” paper by +J. Szlichta et al. +''' NAIVE_FD_VERIFIER_HELP = '''A straightforward partition-based algorithm for verifying whether a given exact functional dependency holds on the specified dataset. For more information, refer to Lemma 2.2 from “TANE: An Efficient @@ -260,6 +276,7 @@ class Algorithm(StrEnum): TASK_HELP_PAGES = { Task.fd: FD_HELP, Task.afd: AFD_HELP, + Task.od: OD_HELP, Task.pfd: PFD_HELP, Task.fd_verification: FD_VERIFICATION_HELP, Task.afd_verification: AFD_VERIFICATION_HELP, @@ -278,6 +295,7 @@ class Algorithm(StrEnum): Algorithm.fun: FUN_HELP, Algorithm.fastfds: FASTFDS_HELP, Algorithm.aid: AID_HELP, + Algorithm.fastod: FASTOD_HELP, Algorithm.naive_fd_verifier: NAIVE_FD_VERIFIER_HELP, Algorithm.naive_afd_verifier: NAIVE_AFD_VERIFIER_HELP, Algorithm.icde09_mfd_verifier: ICDE09_MFD_VERIFIER_HELP @@ -293,6 +311,8 @@ class Algorithm(StrEnum): Algorithm.hyfd), Task.afd: TaskInfo([Algorithm.pyro, Algorithm.tane], Algorithm.pyro), + Task.od: TaskInfo([Algorithm.fastod], + Algorithm.fastod), Task.pfd: TaskInfo([Algorithm.pfdtane], Algorithm.pfdtane), Task.fd_verification: TaskInfo([Algorithm.naive_fd_verifier], Algorithm.naive_fd_verifier), @@ -314,6 +334,7 @@ class Algorithm(StrEnum): Algorithm.fun: desbordante.fd.algorithms.FUN, Algorithm.fastfds: desbordante.fd.algorithms.FastFDs, Algorithm.aid: desbordante.fd.algorithms.Aid, + Algorithm.fastod: desbordante.od.algorithms.Fastod, Algorithm.naive_fd_verifier: desbordante.fd_verification.algorithms.FDVerifier, Algorithm.naive_afd_verifier: desbordante.afd_verification.algorithms.FDVerifier, Algorithm.icde09_mfd_verifier: desbordante.mfd_verification.algorithms.MetricVerifier @@ -407,6 +428,8 @@ def get_algo_result(algo: desbordante.Algorithm, algo_name: str) -> Any: result = algo.mfd_holds() case algo_name if algo_name in TASK_INFO[Task.fd].algos: result = algo.get_fds() + case Algorithm.fastod: + result = algo.get_asc_ods() + algo.get_desc_ods() + algo.get_simple_ods() case _: assert False, 'No matching get_result function.' return result diff --git a/src/python_bindings/bindings.cpp b/src/python_bindings/bindings.cpp index c9267f4696..633eb2cb9f 100644 --- a/src/python_bindings/bindings.cpp +++ b/src/python_bindings/bindings.cpp @@ -11,6 +11,7 @@ #include "fd/bind_fd_verification.h" #include "ind/bind_ind.h" #include "mfd/bind_mfd_verification.h" +#include "od/bind_od.h" #include "statistics/bind_statistics.h" #include "ucc/bind_ucc.h" #include "ucc/bind_ucc_verification.h" @@ -31,8 +32,8 @@ PYBIND11_MODULE(desbordante, module) { } for (auto bind_func : - {BindMainClasses, BindDataTypes, BindFd, BindAr, BindUcc, BindAc, BindFdVerification, - BindMfdVerification, BindUccVerification, BindStatistics, BindInd}) { + {BindMainClasses, BindDataTypes, BindFd, BindAr, BindUcc, BindAc, BindOd, + BindFdVerification, BindMfdVerification, BindUccVerification, BindStatistics, BindInd}) { bind_func(module); } } diff --git a/src/python_bindings/od/bind_od.cpp b/src/python_bindings/od/bind_od.cpp new file mode 100644 index 0000000000..b4e8e08373 --- /dev/null +++ b/src/python_bindings/od/bind_od.cpp @@ -0,0 +1,61 @@ +#include "bind_od.h" + +#include +#include + +#include "algorithms/od/fastod/fastod.h" +#include "algorithms/od/fastod/model/canonical_od.h" +#include "algorithms/od/mining_algorithms.h" +#include "config/indices/type.h" +#include "py_util/bind_primitive.h" +#include "util/bitset_utils.h" + +namespace { +namespace py = pybind11; +} // namespace + +namespace python_bindings { + +void BindOd(py::module_& main_module) { + using namespace algos::fastod; + + auto od_module = main_module.def_submodule("od"); + + py::class_(od_module, "AscCanonicalOD") + .def("__str__", &AscCanonicalOD::ToString) + .def("__eq__", + [](AscCanonicalOD const& od1, AscCanonicalOD const& od2) { + return od1.ToString() == od2.ToString(); + }) + .def("__hash__", + [](AscCanonicalOD const& od) { return py::hash(py::str(od.ToString())); }); + + py::class_(od_module, "DescCanonicalOD") + .def("__str__", &DescCanonicalOD::ToString) + .def("__eq__", + [](DescCanonicalOD const& od1, DescCanonicalOD const& od2) { + return od1.ToString() == od2.ToString(); + }) + .def("__hash__", + [](DescCanonicalOD const& od) { return py::hash(py::str(od.ToString())); }); + + py::class_(od_module, "SimpleCanonicalOD") + .def("__str__", &SimpleCanonicalOD::ToString) + .def("__eq__", + [](SimpleCanonicalOD const& od1, SimpleCanonicalOD const& od2) { + return od1.ToString() == od2.ToString(); + }) + .def("__hash__", + [](SimpleCanonicalOD const& od) { return py::hash(py::str(od.ToString())); }); + + static constexpr auto kFastodName = "Fastod"; + + auto od_algos_module = BindPrimitiveNoBase(od_module, "Fastod") + .def("get_asc_ods", &algos::Fastod::GetAscendingDependencies) + .def("get_desc_ods", &algos::Fastod::GetDescendingDependencies) + .def("get_simple_ods", &algos::Fastod::GetSimpleDependencies); + + main_module.attr("od_module") = od_module; +} + +} // namespace python_bindings diff --git a/src/python_bindings/od/bind_od.h b/src/python_bindings/od/bind_od.h new file mode 100644 index 0000000000..4ee1671074 --- /dev/null +++ b/src/python_bindings/od/bind_od.h @@ -0,0 +1,9 @@ +#pragma once + +#include + +namespace python_bindings { + +void BindOd(pybind11::module_& main_module); + +} // namespace python_bindings