diff --git a/examples/algebraic_constraints.py b/examples/algebraic_constraints.py index b2481bf517..0b7c6e080f 100644 --- a/examples/algebraic_constraints.py +++ b/examples/algebraic_constraints.py @@ -1,17 +1,19 @@ import desbordante import pandas import operator +from datetime import datetime -TABLE = 'examples/datasets/cargo_march.csv' +# Note that dates in the given dataset must be in the format Y-M-D +TABLE = 'examples/datasets/ACShippingDates.csv' HEADER = 0 SEPARATOR = ',' -P_FUZZ = 0.85 -FUZZINESS = 0.2 +P_FUZZ = 0.7 +FUZZINESS = 0.3 BUMPS_LIMIT = 0 WEIGHT = 0.1 BIN_OPERATION = '-' AC_SEED = 11 -ITERATIONS_LIMIT = 4 +ITERATIONS_LIMIT = 10 OPERATIONS = { '+': (operator.add, 'Sum'), '-': (operator.sub, 'Difference'), @@ -23,7 +25,7 @@ algo = desbordante.ACAlgorithm() df = pandas.read_csv(TABLE, sep=SEPARATOR, header=HEADER) -df_without_id = df[['Delivery date', 'Dispatch date']] +df_without_id = df[['deliveryDate', 'shipDate']] algo.load_data(df=df_without_id) @@ -33,7 +35,7 @@ ac_ranges = algo.get_ac_ranges() for ac_range in ac_ranges: l_col = df_without_id.columns[ac_range.column_indices[0]] - r_col = df_without_id.columns[ac_range.column_indices[1]] + r_col = df_without_id.columns[ac_range.column_indices[1]] print(f'Discovered ranges for ({l_col} {BIN_OPERATION} {r_col}) are:') print(ac_range.ranges) @@ -41,10 +43,11 @@ print() print(f'Rows in which the result of the chosen operation ({BIN_OPERATION}) is outside of discovered ranges:') for ac_exception in ac_exceptions: - id, delivery_date, dispatch_date = df.iloc[ac_exception.row_index] + id, delievery_date, ship_date = df.iloc[ac_exception.row_index] print(f'id: {id}') - print(f'Dispatch date: {dispatch_date}') - print(f'Delivery date: {delivery_date}') - print(f'{operation_name}: {operation(delivery_date, dispatch_date)}') + print(f'Shipping date : {ship_date}') + print(f'Delivery date: {delievery_date}') + date1 = datetime.strptime(ship_date, '%Y-%m-%d').date() + date2 = datetime.strptime(delievery_date, '%Y-%m-%d').date() + print(f'{operation_name}: {operation(date2, date1).days}') print() - diff --git a/examples/datasets/ACShippingDates.csv b/examples/datasets/ACShippingDates.csv new file mode 100644 index 0000000000..58de1093d9 --- /dev/null +++ b/examples/datasets/ACShippingDates.csv @@ -0,0 +1,100 @@ +orderID,deliveryDate,shipDate +8E3,2008-08-02,2008-02-08 +2Z8,2008-05-22,2008-02-25 +0J0,2008-06-20,2008-06-12 +2U0,2008-07-22,2008-05-13 +4H8,2008-07-01,2008-05-26 +0A4,2008-05-03,2008-04-28 +6J2,2008-06-19,2008-04-11 +0W5,2008-06-30,2008-05-25 +7G9,2008-06-11,2008-01-23 +0H6,2008-08-10,2008-07-23 +9Y3,2008-07-30,2008-03-26 +5N6,2008-04-17,2008-02-10 +9N2,2008-05-07,2008-02-12 +5Y8,2008-03-09,2008-02-21 +1V2,2008-07-28,2008-06-14 +7Q4,2008-07-24,2008-07-11 +6H1,2008-07-24,2008-06-24 +5J7,2008-06-26,2008-03-16 +0H4,2008-07-22,2008-02-23 +5Z8,2008-07-03,2008-02-01 +7P2,2008-06-23,2008-06-21 +1Q0,2008-07-18,2008-07-10 +4W1,2008-07-21,2008-05-02 +3T4,2008-07-19,2008-05-12 +7C5,2008-08-03,2008-07-10 +5Z6,2008-05-08,2008-04-25 +9A6,2008-08-15,2008-06-25 +2Q7,2008-08-08,2008-03-29 +2V5,2008-04-15,2008-02-28 +5A4,2008-08-12,2008-07-28 +1Y3,2008-08-04,2008-07-17 +4H1,2008-07-01,2008-06-14 +1P9,2008-08-10,2008-03-29 +9E0,2008-05-24,2008-05-08 +6B4,2008-08-10,2008-03-31 +9R1,2008-06-14,2008-04-12 +0T9,2008-06-11,2008-06-09 +8M1,2008-07-24,2008-04-05 +1R7,2008-06-27,2008-05-29 +0V7,2008-05-10,2008-05-09 +2T1,2008-07-18,2008-04-30 +5G4,2008-07-09,2008-04-24 +2K8,2008-08-06,2008-08-04 +4O6,2008-06-30,2008-05-06 +0T4,2008-08-14,2008-07-26 +3Y9,2008-05-25,2008-03-23 +5S3,2008-05-28,2008-04-28 +0E0,2008-08-12,2008-08-03 +4H7,2008-08-09,2008-01-30 +4Q7,2008-04-04,2008-01-11 +4E2,2008-07-22,2008-05-10 +2I6,2008-07-13,2008-04-23 +8K2,2008-08-14,2008-02-05 +4U2,2008-04-03,2008-01-17 +6C9,2008-08-04,2008-03-17 +0J5,2008-05-04,2008-02-27 +0S3,2008-07-30,2008-07-22 +3B0,2008-05-31,2008-03-01 +4V4,2008-07-31,2008-03-09 +3C9,2008-01-24,2008-01-23 +2F1,2008-03-20,2008-01-14 +4L3,2008-06-23,2008-05-13 +1E4,2008-08-13,2008-08-12 +5H1,2008-08-08,2008-07-20 +2S5,2008-07-04,2008-04-09 +9A0,2008-03-19,2008-03-14 +0P0,2008-07-13,2008-01-15 +6V5,2008-07-09,2008-02-12 +0Z9,2008-07-10,2008-05-22 +5T6,2008-07-24,2008-06-09 +1E9,2008-08-02,2008-03-12 +0R7,2008-04-22,2008-01-26 +5G1,2008-05-04,2008-01-06 +2U2,2008-05-26,2008-03-25 +4H0,2008-06-24,2008-01-25 +9P6,2008-08-14,2008-08-04 +0V0,2008-08-01,2008-07-12 +6Q0,2008-08-12,2008-07-21 +8T9,2008-04-08,2008-01-17 +6O4,2008-05-28,2008-05-11 +8G4,2008-08-14,2008-07-29 +4E5,2008-07-20,2008-07-14 +2K6,2008-07-19,2008-06-16 +8O5,2008-04-12,2008-01-24 +8Q8,2008-08-10,2008-06-24 +9R6,2008-03-15,2008-03-08 +2C3,2008-04-30,2008-01-10 +8F9,2008-07-21,2008-07-13 +7E7,2008-07-26,2008-02-03 +2C1,2008-07-19,2008-03-22 +5F3,2008-08-15,2008-08-03 +1H0,2008-07-20,2008-05-12 +1R5,2008-08-11,2008-07-14 +5C8,2008-06-13,2008-05-24 +6E3,2008-06-05,2008-01-15 +9E7,2008-02-02,2008-01-24 +4C1,2008-03-31,2008-01-12 +4N8,2008-03-28,2008-01-30 +1S8,2008-08-07,2008-06-30 \ No newline at end of file diff --git a/src/core/algorithms/algebraic_constraints/ac_algorithm.cpp b/src/core/algorithms/algebraic_constraints/ac_algorithm.cpp index 8e36822c6b..8ba7c3f990 100644 --- a/src/core/algorithms/algebraic_constraints/ac_algorithm.cpp +++ b/src/core/algorithms/algebraic_constraints/ac_algorithm.cpp @@ -28,16 +28,20 @@ void ACAlgorithm::RegisterOptions() { auto check_and_set_binop = [this](Binop bin_operation) { switch (bin_operation) { case +Binop::Addition: - binop_pointer_ = &model::INumericType::Add; + num_binop_pointer_ = &model::INumericType::Add; + // date_binop_pointer remains nullptr because dates do not support addition break; case +Binop::Subtraction: - binop_pointer_ = &model::INumericType::Sub; + num_binop_pointer_ = &model::INumericType::Sub; + date_binop_pointer_ = &model::DateType::SubDate; break; case +Binop::Multiplication: - binop_pointer_ = &model::INumericType::Mul; + num_binop_pointer_ = &model::INumericType::Mul; + // date_binop_pointer remains nullptr because dates do not support multiplication break; case +Binop::Division: - binop_pointer_ = &model::INumericType::Div; + num_binop_pointer_ = &model::INumericType::Div; + // date_binop_pointer remains nullptr because dates do not support division break; default: throw config::ConfigurationError( @@ -147,9 +151,8 @@ std::vector ACAlgorithm::Sampling(std::vector(data.at(lhs_i).GetTypeId(), true), - std::move(ac_pairs), lhs_i, rhs_i)); + ac_pairs_.emplace_back( + ACPairsCollection(data.at(lhs_i).GetTypeId(), std::move(ac_pairs), lhs_i, rhs_i)); return ranges; } @@ -160,7 +163,6 @@ std::vector ACAlgorithm::SamplingIteration( std::vector const& rhs = data.at(rhs_i).GetData(); ac_pairs.clear(); std::mt19937 gen(seed_); - std::bernoulli_distribution d(probability); for (size_t i = 0; i < lhs.size(); ++i) { if (d(gen)) { @@ -169,10 +171,12 @@ std::vector ACAlgorithm::SamplingIteration( if (data[lhs_i].IsNullOrEmpty(i) || data[rhs_i].IsNullOrEmpty(i)) { continue; } - auto res = std::unique_ptr(num_type_->Allocate()); - num_type_->ValueFromStr(res.get(), "0"); + std::unique_ptr res = + std::unique_ptr(type_wrapper_.NumericAllocate()); + type_wrapper_.NumericFromStr(res.get(), "0"); + if (bin_operation_ == +Binop::Division && - num_type_->Compare(r, res.get()) == model::CompareResult::kEqual) { + type_wrapper_.NumericCompare(r, res.get()) == model::CompareResult::kEqual) { continue; } InvokeBinop(l, r, res.get()); @@ -186,7 +190,7 @@ std::vector ACAlgorithm::SamplingIteration( std::sort(ac_pairs.begin(), ac_pairs.end(), [this](std::unique_ptr const& a, std::unique_ptr const& b) { return model::CompareResult::kLess == - this->num_type_->Compare(a->GetRes(), b->GetRes()); + this->type_wrapper_.NumericCompare(a->GetRes(), b->GetRes()); }); return ConstructDisjunctiveRanges(ac_pairs); @@ -207,7 +211,7 @@ void ACAlgorithm::RestrictRangesAmount(std::vector& ranges) co double min_dist = -1; size_t min_index = 1; for (size_t i = min_index; i < bumps * 2 - 1; i += 2) { - double dist = num_type_->Dist(ranges.at(i), ranges.at(i + 1)); + double dist = type_wrapper_.Dist(ranges.at(i), ranges.at(i + 1)); if (min_dist == -1 || dist < min_dist) { min_dist = dist; min_index = i; @@ -241,7 +245,7 @@ ACPairsCollection const& ACAlgorithm::GetACPairsByColumns(size_t lhs_i, size_t r return *res; } -void ACAlgorithm::PrintRanges(std::vector const& data) const { +void ACAlgorithm::PrintRanges(std::vector const& data) { for (size_t i = 0; i < ranges_.size(); ++i) { LOG(DEBUG) << "lhs: " << data.at(ranges_[i].col_pair.col_i.first).ToString() << std::endl; LOG(DEBUG) << "rhs: " << data.at(ranges_[i].col_pair.col_i.second).ToString() << std::endl; @@ -249,10 +253,13 @@ void ACAlgorithm::PrintRanges(std::vector const& data) c LOG(DEBUG) << "No intervals were found." << std::endl; continue; } + for (size_t k = 0; k < ranges_[i].ranges.size() - 1; k += 2) { - auto* num_type = ranges_[i].col_pair.num_type.get(); - LOG(DEBUG) << "[" << num_type->ValueToString(ranges_[i].ranges[k]) << ", " - << num_type->ValueToString(ranges_[i].ranges[k + 1]) << "]"; + LOG(DEBUG) << "[" + << ranges_[i].col_pair.type_wrapper.NumericToString(ranges_[i].ranges[k]) + << ", " + << ranges_[i].col_pair.type_wrapper.NumericToString(ranges_[i].ranges[k + 1]) + << "]"; if (k != ranges_[i].ranges.size() - 2) { LOG(DEBUG) << ", "; } @@ -273,11 +280,11 @@ std::vector ACAlgorithm::ConstructDisjunctiveRanges( ACPair const* r_border = nullptr; if (weight_ < 1) { - double delta = num_type_->Dist(ac_pairs.front()->GetRes(), ac_pairs.back()->GetRes()) * + double delta = type_wrapper_.Dist(ac_pairs.front()->GetRes(), ac_pairs.back()->GetRes()) * (weight_ / (1 - weight_)); for (size_t i = 0; i < ac_pairs.size() - 1; ++i) { - if (num_type_->Dist(ac_pairs[i]->GetRes(), ac_pairs[i + 1]->GetRes()) <= delta) { + if (type_wrapper_.Dist(ac_pairs[i]->GetRes(), ac_pairs[i + 1]->GetRes()) <= delta) { r_border = ac_pairs[i + 1].get(); } else { ranges.emplace_back(l_border->GetRes()); @@ -305,9 +312,8 @@ RangesCollection ACAlgorithm::ReconstructRangesByColumns(size_t lhs_i, size_t rh ACPairsCollection const& constraints_collection = GetACPairsByColumns(lhs_i, rhs_i); ACPairs const& ac_pairs = constraints_collection.ac_pairs; std::vector ranges = ConstructDisjunctiveRanges(ac_pairs); - model::TypeId type_id = constraints_collection.col_pair.num_type->GetTypeId(); - return RangesCollection{model::CreateSpecificType(type_id, true), - std::move(ranges), lhs_i, rhs_i}; + model::TypeId type_id = constraints_collection.col_pair.type_wrapper.GetTypeId(); + return RangesCollection{type_id, std::move(ranges), lhs_i, rhs_i}; } unsigned long long ACAlgorithm::ExecuteInternal() { @@ -318,23 +324,23 @@ unsigned long long ACAlgorithm::ExecuteInternal() { auto start_time = std::chrono::system_clock::now(); for (size_t col_i = 0; col_i < data.size() - 1; ++col_i) { - if (!data.at(col_i).GetType().IsNumeric()) continue; - num_type_ = - model::CreateSpecificType(data.at(col_i).GetTypeId(), true); + if (!(data.at(col_i).GetType().IsNumeric() || data.at(col_i).GetType().IsDate())) continue; + type_wrapper_.Set(data.at(col_i).GetTypeId()); for (size_t col_k = col_i + 1; col_k < data.size(); ++col_k) { if (data.at(col_i).GetTypeId() == data.at(col_k).GetTypeId()) { - ranges_.emplace_back( - RangesCollection{model::CreateSpecificType( - data.at(col_i).GetTypeId(), true), - Sampling(data, col_i, col_k), col_i, col_k}); + if (data.at(col_i).GetTypeId() == +model::TypeId::kDate && + bin_operation_ != +Binop::Subtraction) { + continue; + } + + ranges_.emplace_back(data.at(col_i).GetTypeId(), Sampling(data, col_i, col_k), + col_i, col_k); /* Because of asymmetry and division by 0, we need to rediscover ranges. * We don't need to do that for minus: (column1 - column2) lies in *some ranges* * there we can express one column through another without possible problems */ if (bin_operation_ == +Binop::Division) { - ranges_.emplace_back( - RangesCollection{model::CreateSpecificType( - data.at(col_i).GetTypeId(), true), - Sampling(data, col_k, col_i), col_k, col_i}); + ranges_.emplace_back(data.at(col_i).GetTypeId(), Sampling(data, col_k, col_i), + col_k, col_i); } } } diff --git a/src/core/algorithms/algebraic_constraints/ac_algorithm.h b/src/core/algorithms/algebraic_constraints/ac_algorithm.h index a76688c4bc..1965f03b97 100644 --- a/src/core/algorithms/algebraic_constraints/ac_algorithm.h +++ b/src/core/algorithms/algebraic_constraints/ac_algorithm.h @@ -16,6 +16,7 @@ #include "model/table/column_layout_typed_relation_data.h" #include "model/types/types.h" #include "ranges_collection.h" +#include "type_wrapper.h" #include "typed_column_pair.h" namespace algos { @@ -57,8 +58,9 @@ class ACAlgorithm : public Algorithm { double seed_; std::vector ac_pairs_; std::vector ranges_; - model::INumericType::NumericBinop binop_pointer_ = nullptr; - std::unique_ptr num_type_; + model::INumericType::NumericBinop num_binop_pointer_ = nullptr; + model::DateType::DateBinop date_binop_pointer_ = nullptr; + TypeWrapper type_wrapper_; /* Returns vector with ranges boundaries constructed for columns with lhs_i and rhs_i indices. * Value pairs (by which ranges constructed) fall into sample selection with chosen probability. @@ -84,7 +86,11 @@ class ACAlgorithm : public Algorithm { public: void InvokeBinop(std::byte const* l, std::byte const* r, std::byte* res) const { - std::invoke(binop_pointer_, num_type_, l, r, res); + if (type_wrapper_.IsNumeric()) { + std::invoke(num_binop_pointer_, type_wrapper_.num_type, l, r, res); + } else { + std::invoke(date_binop_pointer_, type_wrapper_.date_type, l, r, res); + } } size_t CalculateSampleSize(size_t k_bumps) const; @@ -110,7 +116,7 @@ class ACAlgorithm : public Algorithm { return bin_operation_; } - void PrintRanges(std::vector const& data) const; + void PrintRanges(std::vector const& data); void CollectACExceptions() const { ac_exception_finder_->CollectExceptions(this); diff --git a/src/core/algorithms/algebraic_constraints/ac_exception_finder.cpp b/src/core/algorithms/algebraic_constraints/ac_exception_finder.cpp index 248a5dcff2..ce2d9d6195 100644 --- a/src/core/algorithms/algebraic_constraints/ac_exception_finder.cpp +++ b/src/core/algorithms/algebraic_constraints/ac_exception_finder.cpp @@ -2,21 +2,25 @@ #include "ac_algorithm.h" #include "bin_operation_enum.h" +#include "type_wrapper.h" namespace algos::algebraic_constraints { bool ACExceptionFinder::ValueBelongsToRanges(RangesCollection const& ranges_collection, std::byte const* val) { - model::INumericType* num_type = ranges_collection.col_pair.num_type.get(); for (size_t i = 0; i < ranges_collection.ranges.size() - 1; i += 2) { std::byte const* l_border = ranges_collection.ranges[i]; std::byte const* r_border = ranges_collection.ranges[i + 1]; - if (num_type->Compare(l_border, val) == model::CompareResult::kEqual || - num_type->Compare(val, r_border) == model::CompareResult::kEqual) { + if (ranges_collection.col_pair.type_wrapper.NumericCompare(l_border, val) == + model::CompareResult::kEqual || + ranges_collection.col_pair.type_wrapper.NumericCompare(val, r_border) == + model::CompareResult::kEqual) { return true; } - if (num_type->Compare(l_border, val) == model::CompareResult::kLess && - num_type->Compare(val, r_border) == model::CompareResult::kLess) { + if (ranges_collection.col_pair.type_wrapper.NumericCompare(l_border, val) == + model::CompareResult::kLess && + ranges_collection.col_pair.type_wrapper.NumericCompare(val, r_border) == + model::CompareResult::kLess) { return true; } } @@ -39,18 +43,19 @@ void ACExceptionFinder::CollectColumnPairExceptions(std::vector const& lhs = data.at(lhs_i).GetData(); std::vector const& rhs = data.at(rhs_i).GetData(); - std::unique_ptr num_type = - model::CreateSpecificType(data.at(lhs_i).GetTypeId(), true); + TypeWrapper type_wrapper(data.at(lhs_i).GetTypeId()); for (size_t i = 0; i < lhs.size(); ++i) { std::byte const* l = lhs.at(i); std::byte const* r = rhs.at(i); if (data[lhs_i].IsNullOrEmpty(i) || data[rhs_i].IsNullOrEmpty(i)) { continue; } - auto res = std::unique_ptr(num_type->Allocate()); - num_type->ValueFromStr(res.get(), "0"); + std::unique_ptr res = + std::unique_ptr(type_wrapper.NumericAllocate()); + type_wrapper.NumericFromStr(res.get(), "0"); + if (ac_alg_->GetBinOperation() == +Binop::Division && - num_type->Compare(r, res.get()) == model::CompareResult::kEqual) { + type_wrapper.NumericCompare(r, res.get()) == model::CompareResult::kEqual) { continue; } ac_alg_->InvokeBinop(l, r, res.get()); diff --git a/src/core/algorithms/algebraic_constraints/ac_pairs_collection.h b/src/core/algorithms/algebraic_constraints/ac_pairs_collection.h index c72a7e5f03..ef941f7267 100644 --- a/src/core/algorithms/algebraic_constraints/ac_pairs_collection.h +++ b/src/core/algorithms/algebraic_constraints/ac_pairs_collection.h @@ -4,6 +4,7 @@ #include #include "ac.h" +#include "type_wrapper.h" #include "typed_column_pair.h" namespace algos { @@ -12,9 +13,8 @@ using ACPairs = std::vector>; /* Contains value pairs for a specific pair of columns */ struct ACPairsCollection { - ACPairsCollection(std::unique_ptr num_type, ACPairs&& ac_pairs, - size_t lhs_i, size_t rhs_i) - : col_pair{{lhs_i, rhs_i}, std::move(num_type)}, ac_pairs(std::move(ac_pairs)) {} + ACPairsCollection(model::TypeId id, ACPairs&& ac_pairs, size_t lhs_i, size_t rhs_i) + : col_pair{{lhs_i, rhs_i}, TypeWrapper(id)}, ac_pairs(std::move(ac_pairs)) {} /* Column pair indices and pointer to their type */ TypedColumnPair col_pair; diff --git a/src/core/algorithms/algebraic_constraints/ranges_collection.h b/src/core/algorithms/algebraic_constraints/ranges_collection.h index 50f1a339e9..c5a5516e01 100644 --- a/src/core/algorithms/algebraic_constraints/ranges_collection.h +++ b/src/core/algorithms/algebraic_constraints/ranges_collection.h @@ -3,15 +3,16 @@ #include #include "numeric_type.h" +#include "type_wrapper.h" #include "typed_column_pair.h" namespace algos { /* A set of ranges for a specific pair of columns */ struct RangesCollection { - RangesCollection(std::unique_ptr num_type, - std::vector&& ranges, size_t lhs_i, size_t rhs_i) - : col_pair{{lhs_i, rhs_i}, std::move(num_type)}, ranges(std::move(ranges)) {} + RangesCollection(model::TypeId id, std::vector&& ranges, size_t lhs_i, + size_t rhs_i) + : col_pair{{lhs_i, rhs_i}, TypeWrapper(id)}, ranges(std::move(ranges)) {} TypedColumnPair col_pair; /* Border values of the intervals. Even element -- diff --git a/src/core/algorithms/algebraic_constraints/type_wrapper.h b/src/core/algorithms/algebraic_constraints/type_wrapper.h new file mode 100644 index 0000000000..c1da00a2be --- /dev/null +++ b/src/core/algorithms/algebraic_constraints/type_wrapper.h @@ -0,0 +1,73 @@ +#pragma once +#include +#include + +#include "builtin.h" +#include "create_type.h" +#include "date_type.h" +#include "numeric_type.h" + +namespace algos { +class TypeWrapper { +public: + std::unique_ptr num_type; + std::unique_ptr date_type; + + TypeWrapper() : num_type(nullptr), date_type(nullptr) {} + + bool IsNumeric() const { + return date_type == nullptr; + } + + void Set(model::TypeId id) { + switch (id) { + case model::TypeId::kDate: + date_type = model::CreateSpecificType(id, true); + num_type = + model::CreateSpecificType(+model::TypeId::kInt, true); + break; + default: + num_type = model::CreateSpecificType(id, true); + date_type = nullptr; + } + } + + explicit TypeWrapper(model::TypeId id) { + Set(id); + }; + + model::CompareResult NumericCompare(std::byte const* l, std::byte const* r) const { + return num_type->Compare(l, r); + } + + [[nodiscard]] std::byte* NumericAllocate(size_t count = 1) const { + return num_type->Allocate(count); + } + + void NumericFromStr(std::byte* buf, std::string const& s) const { + return num_type->ValueFromStr(buf, s); + } + + double Dist(std::byte const* l, std::byte const* r) const { + if (IsNumeric()) { + return num_type->Dist(l, r); + } + return date_type->Dist(l, r); + } + + [[nodiscard]] model::TypeId GetTypeId() const { + if (IsNumeric()) { + return num_type->GetTypeId(); + } + return date_type->GetTypeId(); + } + + [[nodiscard]] model::TypeId GetNumericId() const { + return num_type->GetTypeId(); + } + + std::string NumericToString(std::byte const* value) const { + return num_type->ValueToString(value); + } +}; +} // namespace algos \ No newline at end of file diff --git a/src/core/algorithms/algebraic_constraints/typed_column_pair.h b/src/core/algorithms/algebraic_constraints/typed_column_pair.h index 87b13518f1..0d5b45e8b2 100644 --- a/src/core/algorithms/algebraic_constraints/typed_column_pair.h +++ b/src/core/algorithms/algebraic_constraints/typed_column_pair.h @@ -3,6 +3,7 @@ #include #include "numeric_type.h" +#include "type_wrapper.h" namespace algos { @@ -11,7 +12,7 @@ struct TypedColumnPair { * for binop_, the second for the right */ std::pair col_i; /* Columns type */ - std::unique_ptr num_type; + TypeWrapper type_wrapper; }; } // namespace algos diff --git a/src/core/model/types/date_type.h b/src/core/model/types/date_type.h index 9f667da9d4..f5738e4dc6 100644 --- a/src/core/model/types/date_type.h +++ b/src/core/model/types/date_type.h @@ -14,6 +14,8 @@ namespace model { class DateType : public IMetrizableType { public: using Delta = boost::gregorian::date_duration; + using DateBinop = std::byte* (DateType::*)(std::byte const*, std::byte const*, + std::byte*) const; DateType() noexcept : IMetrizableType(TypeId::kDate) {} diff --git a/src/python_bindings/py_ac_ranges_collection.h b/src/python_bindings/py_ac_ranges_collection.h index 3c2a2b0ca4..2dfec8e32d 100644 --- a/src/python_bindings/py_ac_ranges_collection.h +++ b/src/python_bindings/py_ac_ranges_collection.h @@ -26,8 +26,9 @@ class PyRangesCollection { for (size_t i = 0; i < r_coll.ranges.size(); i += 2) { // TODO: change this once a proper conversion mechanism from `model::INumericType` is // implemented - std::string l_endpoint = r_coll.col_pair.num_type->ValueToString(r_coll.ranges[i]); - std::string r_endpoint = r_coll.col_pair.num_type->ValueToString(r_coll.ranges[i + 1]); + std::string l_endpoint = r_coll.col_pair.type_wrapper.NumericToString(r_coll.ranges[i]); + std::string r_endpoint = + r_coll.col_pair.type_wrapper.NumericToString(r_coll.ranges[i + 1]); res.emplace_back(pybind11::float_(pybind11::str(l_endpoint)), pybind11::float_(pybind11::str(r_endpoint))); } diff --git a/src/tests/test_ac_algorithm.cpp b/src/tests/test_ac_algorithm.cpp index fe0e0252ef..138e1e9f2a 100644 --- a/src/tests/test_ac_algorithm.cpp +++ b/src/tests/test_ac_algorithm.cpp @@ -16,10 +16,20 @@ void AssertRanges(std::vector& expected_ranges, ASSERT_EQ(expected_ranges.size(), byte_ranges.ranges.size()); model::DoubleType double_type; + model::IntType int_type; for (size_t i = 0; i < expected_ranges.size(); ++i) { - auto expected = std::unique_ptr(double_type.MakeValue(expected_ranges[i])); - double_type.CastTo(expected.get(), byte_ranges.col_pair.num_type->GetTypeId()); - EXPECT_EQ(byte_ranges.col_pair.num_type->Compare(expected.get(), byte_ranges.ranges[i]), + std::unique_ptr + expected; // = + // std::unique_ptr(double_type.MakeValue(expected_ranges[i])); + if (byte_ranges.col_pair.type_wrapper.IsNumeric()) { + expected = std::unique_ptr(double_type.MakeValue(expected_ranges[i])); + double_type.CastTo(expected.get(), byte_ranges.col_pair.type_wrapper.GetNumericId()); + } else { + expected = std::unique_ptr(int_type.MakeValue((expected_ranges[i]))); + int_type.CastTo(expected.get(), byte_ranges.col_pair.type_wrapper.GetNumericId()); + } + EXPECT_EQ(byte_ranges.col_pair.type_wrapper.NumericCompare(expected.get(), + byte_ranges.ranges[i]), model::CompareResult::kEqual); } } @@ -215,4 +225,26 @@ TEST_F(ACAlgorithmTest, RangesReconstruction) { AssertRanges(expected_ranges, ranges_collection); } + +TEST_F(ACAlgorithmTest, DatesIntegrationTest) { + auto a = CreateACAlgorithmInstance("ACShippingDates.csv", ',', true, algos::Binop::Subtraction, + 0.2, 0.85, 0.1, 0, 4); + a->Execute(); + a->CollectACExceptions(); + auto& ranges_collection = a->GetRangesByColumns(1, 2); + std::vector expected_ranges = {-213, -5, 0, 0}; + + algos::ACException e0(3, {{1, 2}}); + algos::ACException e1(10, {{1, 2}}); + algos::ACException e2(22, {{1, 2}}); + algos::ACException e3(26, {{1, 2}}); + algos::ACException e4(61, {{1, 2}}); + algos::ACException e5(71, {{1, 2}}); + algos::ACException e6(75, {{1, 2}}); + algos::ACException e7(79, {{1, 2}}); + algos::ACException e8(87, {{1, 2}}); + ACAlgorithmTest::ACExceptions expected = {e0, e1, e2, e3, e4, e5, e6, e7, e8}; + AssertRanges(expected_ranges, ranges_collection); + AssertACExceptions(expected, a->GetACExceptions()); +} } // namespace tests diff --git a/test_input_data/ACShippingDates.csv b/test_input_data/ACShippingDates.csv index b0ad4f17e6..72372ce78b 100644 --- a/test_input_data/ACShippingDates.csv +++ b/test_input_data/ACShippingDates.csv @@ -1,6 +1,98 @@ orderID,shipDate,deliveryDate -2A5,2001-01-03,2001-01-06 -3C2,2001-04-15,2001-04-27 -3B8,2002-11-25,2002-12-10 -2E1,2002-10-31,2002-12-02 -3D6,2002-07-25,2002-07-29 +7M1,2008-06-24,2008-09-23 +4E8,2008-11-03,2008-11-30 +3N5,2008-03-09,2008-08-22 +8J0,2008-01-13,2008-09-03 +5A1,2008-10-05,2008-10-21 +1S5,2008-04-22,2008-06-30 +5R1,2008-12-20,2008-12-29 +5Q4,2008-09-25,2008-11-06 +9B6,2008-12-31,2008-12-31 +6V2,2008-11-26,2008-12-09 +0S7,2008-12-24,2008-12-26 +4O4,2008-06-12,2008-06-25 +9H0,2008-02-18,2008-06-25 +1F2,2008-09-16,2008-12-19 +1P7,2008-06-28,2008-12-28 +7I8,2008-12-02,2008-12-14 +4C8,2008-09-05,2008-11-25 +8F1,2008-08-14,2008-12-17 +8R0,2008-12-21,2008-12-27 +5Q7,2008-12-20,2008-12-28 +0Q0,2008-03-10,2008-10-04 +7V6,2008-11-25,2008-12-19 +7T9,2008-01-24,2008-11-03 +7P1,2008-03-25,2008-07-10 +8C4,2008-11-14,2008-12-29 +5M0,2008-10-15,2008-10-27 +8B7,2008-12-06,2008-12-07 +9L6,2008-04-09,2008-05-20 +0Q4,2008-12-16,2008-12-21 +9O2,2008-06-17,2008-08-12 +0Y9,2008-05-07,2008-12-06 +0R7,2008-03-30,2008-10-21 +1D7,2008-06-08,2008-07-20 +4U2,2008-04-06,2008-10-29 +8A3,2008-12-16,2008-12-23 +0T7,2008-02-19,2008-05-13 +9S4,2008-05-05,2008-05-25 +0F4,2008-09-30,2008-10-29 +3R4,2008-07-04,2008-08-28 +2B7,2008-10-13,2008-11-23 +7N1,2008-07-26,2008-11-14 +6J5,2008-01-11,2008-01-20 +7W5,2008-06-16,2008-10-30 +8O7,2008-09-22,2008-12-29 +9J7,2008-10-02,2008-10-26 +9E9,2008-08-11,2008-10-25 +3D1,2008-06-10,2008-11-28 +1C4,2008-11-13,2008-12-22 +9T0,2008-02-08,2008-08-20 +8X9,2008-05-20,2008-08-20 +7D6,2008-08-17,2008-09-23 +4B5,2008-10-23,2008-12-24 +5A5,2008-06-26,2008-08-19 +4A5,2008-09-24,2008-10-30 +3U2,2008-06-03,2008-11-14 +8X7,2008-02-18,2008-08-05 +7Z4,2008-08-18,2008-09-26 +6R2,2008-07-06,2008-11-03 +4D3,2008-08-05,2008-12-06 +8P0,2008-10-29,2008-11-09 +2A4,2008-08-12,2008-11-13 +7R9,2008-12-22,2008-12-26 +8G0,2008-07-22,2008-07-31 +7L8,2008-10-29,2008-11-05 +5M6,2008-07-18,2008-10-07 +9Y9,2008-05-15,2008-10-05 +3P3,2008-10-30,2008-11-05 +3A3,2008-11-17,2008-12-15 +7C7,2008-01-28,2008-02-13 +8M8,2008-12-14,2008-12-30 +6E1,2008-05-22,2008-06-30 +7V8,2008-11-15,2008-11-17 +8J9,2008-10-01,2008-10-29 +3M6,2008-07-14,2008-11-16 +5F3,2008-06-13,2008-08-08 +0O5,2008-09-06,2008-09-07 +8T9,2008-10-18,2008-12-03 +1Y5,2008-03-23,2008-08-14 +9H8,2008-11-19,2008-12-26 +0G2,2008-03-27,2008-11-26 +3R0,2008-08-23,2008-11-21 +6M3,2008-06-28,2008-11-30 +7M4,2008-07-07,2008-07-25 +4B2,2008-03-24,2008-09-14 +5T9,2008-06-25,2008-10-16 +3X7,2008-03-06,2008-03-11 +4Z4,2008-01-10,2008-07-07 +0I2,2008-03-01,2008-12-23 +8H5,2008-12-09,2008-12-28 +0H0,2008-06-11,2008-06-19 +0Y2,2008-09-19,2008-11-30 +7Q9,2008-12-27,2009-01-01 +5X3,2008-05-02,2008-11-06 +0V9,2008-03-14,2008-03-21 +2D2,2008-09-01,2008-11-18 +3T5,2008-04-09,2008-09-29 +1F8,2008-11-06,2008-11-12 \ No newline at end of file