From 3dc088d882d9f44f21858aad04e100ece700df16 Mon Sep 17 00:00:00 2001 From: vdaleke Date: Mon, 7 Oct 2024 15:14:46 +0300 Subject: [PATCH] DynFD algorithm --- src/core/algorithms/fd/dynfd/dynfd.cpp | 237 ++++++++++++ src/core/algorithms/fd/dynfd/dynfd.h | 39 ++ .../fd/dynfd/model/compressed_column_data.h | 31 ++ .../model/dynamic_position_list_index.cpp | 166 +++++++++ .../dynfd/model/dynamic_position_list_index.h | 112 ++++++ .../fd/dynfd/model/dynamic_relation_data.cpp | 218 +++++++++++ .../fd/dynfd/model/dynamic_relation_data.h | 49 +++ .../algorithms/fd/dynfd/model/non_fd_tree.cpp | 150 ++++++++ .../algorithms/fd/dynfd/model/non_fd_tree.h | 101 ++++++ .../fd/dynfd/model/non_fd_tree_vertex.cpp | 337 ++++++++++++++++++ .../fd/dynfd/model/non_fd_tree_vertex.h | 197 ++++++++++ src/core/algorithms/fd/dynfd/validator.cpp | 200 +++++++++++ src/core/algorithms/fd/dynfd/validator.h | 43 +++ .../fd/hycommon/validator_helpers.cpp | 6 +- src/core/algorithms/fd/hyfd/hyfd.cpp | 3 +- src/core/algorithms/fd/hyfd/inductor.cpp | 2 +- src/core/algorithms/fd/hyfd/inductor.h | 6 +- src/core/algorithms/fd/hyfd/model/fd_tree.cpp | 68 ---- src/core/algorithms/fd/hyfd/model/fd_tree.h | 85 ----- .../fd/hyfd/model/fd_tree_vertex.cpp | 139 -------- src/core/algorithms/fd/hyfd/validator.cpp | 8 +- src/core/algorithms/fd/hyfd/validator.h | 9 +- src/core/algorithms/fd/mining_algorithms.h | 1 + src/core/model/FDTrees/fd_tree.cpp | 8 + src/core/model/FDTrees/fd_tree.h | 200 +++++++++++ src/core/model/FDTrees/fd_tree_vertex.cpp | 306 ++++++++++++++++ .../model => model/FDTrees}/fd_tree_vertex.h | 37 +- src/core/model/table/vertical_map.cpp | 1 + src/python_bindings/fd/bind_fd.cpp | 12 +- src/tests/all_csv_configs.cpp | 12 + src/tests/all_csv_configs.h | 6 + src/tests/test_dynamic_fd_algorithm.cpp | 86 +++++ src/tests/test_fd_algorithm.cpp | 25 +- src/tests/test_fd_util.h | 20 ++ .../dynamic_fd/TestDynamicAfterAll.csv | 13 + .../dynamic_fd/TestDynamicAfterDelete.csv | 10 + .../dynamic_fd/TestDynamicAfterInsert.csv | 16 + .../TestDynamicAfterInsertAndDelete.csv | 13 + .../TestDynamicAfterInsertAndUpdate.csv | 16 + .../TestDynamicAfterUpdateAndDelete.csv | 10 + 40 files changed, 2655 insertions(+), 343 deletions(-) create mode 100644 src/core/algorithms/fd/dynfd/dynfd.cpp create mode 100644 src/core/algorithms/fd/dynfd/dynfd.h create mode 100644 src/core/algorithms/fd/dynfd/model/compressed_column_data.h create mode 100644 src/core/algorithms/fd/dynfd/model/dynamic_position_list_index.cpp create mode 100644 src/core/algorithms/fd/dynfd/model/dynamic_position_list_index.h create mode 100644 src/core/algorithms/fd/dynfd/model/dynamic_relation_data.cpp create mode 100644 src/core/algorithms/fd/dynfd/model/dynamic_relation_data.h create mode 100644 src/core/algorithms/fd/dynfd/model/non_fd_tree.cpp create mode 100644 src/core/algorithms/fd/dynfd/model/non_fd_tree.h create mode 100644 src/core/algorithms/fd/dynfd/model/non_fd_tree_vertex.cpp create mode 100644 src/core/algorithms/fd/dynfd/model/non_fd_tree_vertex.h create mode 100644 src/core/algorithms/fd/dynfd/validator.cpp create mode 100644 src/core/algorithms/fd/dynfd/validator.h delete mode 100644 src/core/algorithms/fd/hyfd/model/fd_tree.cpp delete mode 100644 src/core/algorithms/fd/hyfd/model/fd_tree.h delete mode 100644 src/core/algorithms/fd/hyfd/model/fd_tree_vertex.cpp create mode 100644 src/core/model/FDTrees/fd_tree.cpp create mode 100644 src/core/model/FDTrees/fd_tree.h create mode 100644 src/core/model/FDTrees/fd_tree_vertex.cpp rename src/core/{algorithms/fd/hyfd/model => model/FDTrees}/fd_tree_vertex.h (71%) create mode 100644 src/tests/test_dynamic_fd_algorithm.cpp create mode 100644 test_input_data/dynamic_fd/TestDynamicAfterAll.csv create mode 100644 test_input_data/dynamic_fd/TestDynamicAfterDelete.csv create mode 100644 test_input_data/dynamic_fd/TestDynamicAfterInsert.csv create mode 100644 test_input_data/dynamic_fd/TestDynamicAfterInsertAndDelete.csv create mode 100644 test_input_data/dynamic_fd/TestDynamicAfterInsertAndUpdate.csv create mode 100644 test_input_data/dynamic_fd/TestDynamicAfterUpdateAndDelete.csv diff --git a/src/core/algorithms/fd/dynfd/dynfd.cpp b/src/core/algorithms/fd/dynfd/dynfd.cpp new file mode 100644 index 0000000000..914bddcee5 --- /dev/null +++ b/src/core/algorithms/fd/dynfd/dynfd.cpp @@ -0,0 +1,237 @@ +#include "dynfd.h" + +#include + +#include "algo_factory.h" +#include "algorithms/fd/hycommon/all_column_combinations.h" +#include "algorithms/fd/hycommon/preprocessor.h" +#include "algorithms/fd/hycommon/util/pli_util.h" +#include "algorithms/fd/hyfd/inductor.h" +#include "algorithms/fd/hyfd/sampler.h" +#include "algorithms/fd/hyfd/validator.h" +#include "algorithms/fd/raw_fd.h" +#include "indices/option.h" +#include "option_using.h" +#include "tabular_data/crud_operations/delete/option.h" +#include "tabular_data/crud_operations/insert/option.h" +#include "tabular_data/crud_operations/operations.h" +#include "tabular_data/crud_operations/update/option.h" +#include "tabular_data/input_table/option.h" + +namespace algos::dynfd { + +void DynFD::ExecuteHyFD() { + std::shared_ptr hy_fd_relation = ColumnLayoutRelationData::CreateFrom(*input_table_, true); + + auto [plis, pli_records, og_mapping] = hy::Preprocess(hy_fd_relation.get()); + auto plis_shared = std::make_shared(std::move(plis)); + auto const pli_records_shared = std::make_shared(std::move(pli_records)); + + hyfd::Sampler sampler(plis_shared, pli_records_shared); + + auto positive_cover_tree = std::make_shared(hy_fd_relation->GetNumColumns()); + hyfd::Inductor inductor(positive_cover_tree); + hyfd::Validator validator(positive_cover_tree, plis_shared, pli_records_shared); + + hy::IdPairs comparison_suggestions; + + while (true) { + auto non_fds = sampler.GetNonFDs(comparison_suggestions); + + inductor.UpdateFdTree(std::move(non_fds)); + + comparison_suggestions = validator.ValidateAndExtendCandidates(); + + if (comparison_suggestions.empty()) { + break; + } + + LOG(TRACE) << "Cycle done"; + } + + for (size_t rhs = 0; rhs < hy_fd_relation->GetNumColumns(); ++rhs) { + positive_cover_tree_->Remove(boost::dynamic_bitset(hy_fd_relation->GetNumColumns()), rhs); + } + for (auto fd : positive_cover_tree->FillFDs()) { + fd.lhs_ = hy::RestoreAgreeSet(fd.lhs_, og_mapping, hy_fd_relation->GetNumColumns()); + fd.rhs_ = og_mapping[fd.rhs_]; + positive_cover_tree_->AddFD(fd.lhs_, fd.rhs_); + } +} + +unsigned long long DynFD::ExecuteInternal() { + auto const start_time = std::chrono::system_clock::now(); + + bool const is_non_fd_validation_needed = + (!delete_statement_indices_.empty()) || (update_statements_table_ != nullptr); + bool const is_fd_validation_needed = + (update_statements_table_ != nullptr) || (insert_statements_table_ != nullptr); + + if (!delete_statement_indices_.empty()) { + relation_->DeleteBatch(delete_statement_indices_); + } + if (update_statements_table_ != nullptr) { + relation_->DeleteRecordsFromUpdateBatch(update_statements_table_); + } + + if (is_non_fd_validation_needed) { + validator_->ValidateNonFds(); + } + + size_t const first_insert_batch_id = relation_->GetNextRecordId(); + if (update_statements_table_ != nullptr) { + relation_->InsertRecordsFromUpdateBatch(update_statements_table_); + } + if (insert_statements_table_ != nullptr) { + relation_->InsertBatch(insert_statements_table_); + } + + if (is_fd_validation_needed) { + validator_->ValidateFds(first_insert_batch_id); + } + + SetProgress(kTotalProgressPercent); + RegisterFDs(positive_cover_tree_->FillFDs()); + auto const elapsed_milliseconds = std::chrono::duration_cast( + std::chrono::system_clock::now() - start_time); + return elapsed_milliseconds.count(); +} + +void DynFD::LoadDataInternal() { + relation_ = DynamicRelationData::CreateFrom(input_table_); + if (relation_->GetColumnData().empty()) { + throw std::runtime_error( + "Got an empty dataset: FD mining is meaningless. If you want to specify columns, " + "insert their names"); + } + positive_cover_tree_ = std::make_shared(GetRelation().GetNumColumns()); + + if (!relation_->Empty()) { + ExecuteHyFD(); + } + + negative_cover_tree_ = std::make_shared(GetRelation().GetNumColumns()); + + // Cover inversion + for (size_t i = 0; i < relation_->GetNumColumns(); i++) { + boost::dynamic_bitset<> lhs(relation_->GetNumColumns()); + lhs.set(); + lhs.reset(i); + negative_cover_tree_->AddNonFD(lhs, i, std::nullopt); + } + + for (auto&& [lhs, rhs] : positive_cover_tree_->FillFDs()) { + std::vector> violated = negative_cover_tree_->GetSpecials(lhs, rhs); + for (auto&& non_fd : violated) { + negative_cover_tree_->Remove(non_fd, rhs); + for (size_t bit = lhs.find_first(); bit != boost::dynamic_bitset<>::npos; + bit = lhs.find_next(bit)) { + boost::dynamic_bitset<> new_lhs = non_fd; + new_lhs.reset(bit); + if (!negative_cover_tree_->ContainsNonFdOrSpecial(new_lhs, rhs)) { + negative_cover_tree_->AddNonFD(new_lhs, rhs, std::nullopt); + } + } + } + } + + validator_ = std::make_shared(positive_cover_tree_, negative_cover_tree_, relation_); +} + +void DynFD::MakeExecuteOptsAvailableFDInternal() { + using namespace config::names; + MakeOptionsAvailable(kCrudOptions); +} + +void DynFD::RegisterOptions() { + DESBORDANTE_OPTION_USING; + + auto check_inserts = [this](config::InputTable const& insert_batch) { + if (insert_batch == nullptr || !insert_batch->HasNextRow()) { + return; + } + if (insert_batch->GetNumberOfColumns() != input_table_->GetNumberOfColumns()) { + throw config::ConfigurationError( + "Schema mismatch: insert statements must have the same number of columns as " + "the input table"); + } + for (size_t i = 0; i < input_table_->GetNumberOfColumns(); ++i) { + if (insert_batch->GetColumnName(i) != input_table_->GetColumnName(i)) { + throw config::ConfigurationError( + "Schema mismatch: insert statements' column names must match the input " + "table"); + } + } + }; + + auto check_deletes = [this](std::unordered_set const& delete_batch) { + if (delete_batch.empty()) { + return; + } + for (size_t const id : delete_batch) { + if (!relation_->IsRowIndexValid(id)) { + throw config::ConfigurationError("Attempt to delete a non-existing row"); + } + } + }; + + auto check_updates = [this](config::InputTable const& update_batch) { + if (update_batch == nullptr || !update_batch->HasNextRow()) { + return; + } + if (update_batch->GetNumberOfColumns() != input_table_->GetNumberOfColumns() + 1) { + throw config::ConfigurationError( + "Schema mismatch: update statements must have the number of columns one more " + "than the input table"); + } + for (size_t i = 0; i < input_table_->GetNumberOfColumns(); ++i) { + if (update_batch->GetColumnName(i + 1) != input_table_->GetColumnName(i)) { + throw config::ConfigurationError( + "Schema mismatch: update statements column names, except of first one, " + "must match the input table"); + } + } + std::unordered_set rows_to_update; + while (update_batch->HasNextRow()) { + auto row = update_batch->GetNextRow(); + size_t id = std::stoull(row.front()); + if (!relation_->IsRowIndexValid(id)) { + throw config::ConfigurationError("Attempt to update a non-existing row"); + } + if (rows_to_update.contains(id)) { + throw config::ConfigurationError("Update statements have duplicates"); + } + rows_to_update.emplace(id); + } + update_batch->Reset(); + }; + + RegisterOption(config::kTableOpt(&input_table_)); + RegisterOption( + config::kInsertStatementsOpt(&insert_statements_table_).SetValueCheck(check_inserts)); + RegisterOption( + config::kDeleteStatementsOpt(&delete_statement_indices_).SetValueCheck(check_deletes)); + RegisterOption( + config::kUpdateStatementsOpt(&update_statements_table_).SetValueCheck(check_updates)); +} + +void DynFD::RegisterFDs(std::vector&& fds) { + auto const* const schema = GetRelation().GetSchema(); + for (auto&& [lhs, rhs] : fds) { + Vertical lhs_v(schema, lhs); + Column rhs_c(schema, schema->GetColumn(rhs)->GetName(), rhs); + RegisterFd(std::move(lhs_v), std::move(rhs_c)); + } +} + +DynFD::DynFD() : FDAlgorithm({kDefaultPhaseName}) { + RegisterOptions(); + MakeOptionsAvailable({config::kTableOpt.GetName()}); +} + +DynamicRelationData const& DynFD::GetRelation() const { + assert(relation_ != nullptr); + return *relation_; +} + +} // namespace algos::dynfd diff --git a/src/core/algorithms/fd/dynfd/dynfd.h b/src/core/algorithms/fd/dynfd/dynfd.h new file mode 100644 index 0000000000..3a84c53b54 --- /dev/null +++ b/src/core/algorithms/fd/dynfd/dynfd.h @@ -0,0 +1,39 @@ +#pragma once +#include +#include +#include +#include +#include + +#include "fd/hycommon/types.h" +#include "model/dynamic_relation_data.h" +#include "model/non_fd_tree.h" +#include "validator.h" + +namespace algos::dynfd { +class DynFD final : public FDAlgorithm { + config::InputTable input_table_; + config::InputTable insert_statements_table_ = nullptr; + config::InputTable update_statements_table_ = nullptr; + std::unordered_set delete_statement_indices_; + std::shared_ptr relation_ = nullptr; + std::shared_ptr positive_cover_tree_ = nullptr; + std::shared_ptr negative_cover_tree_ = nullptr; + std::shared_ptr validator_ = nullptr; + +public: + DynFD(); + [[nodiscard]] DynamicRelationData const& GetRelation() const; + +private: + void RegisterOptions(); + void LoadDataInternal() override; + void MakeExecuteOptsAvailableFDInternal() override; + unsigned long long ExecuteInternal() override; + void RegisterFDs(std::vector&& fds); + void ExecuteHyFD(); + + void ResetStateFd() override {} +}; + +} // namespace algos::dynfd diff --git a/src/core/algorithms/fd/dynfd/model/compressed_column_data.h b/src/core/algorithms/fd/dynfd/model/compressed_column_data.h new file mode 100644 index 0000000000..3e23c9f555 --- /dev/null +++ b/src/core/algorithms/fd/dynfd/model/compressed_column_data.h @@ -0,0 +1,31 @@ +#pragma once + +#include + +#include "dynamic_position_list_index.h" +#include "model/table/abstract_column_data.h" + +namespace algos::dynfd { + +class CompressedColumnData : model::AbstractColumnData { + std::shared_ptr position_list_index_; + +public: + CompressedColumnData(Column const* column, + std::unique_ptr position_list_index) + : AbstractColumnData(column), position_list_index_(std::move(position_list_index)) {} + + [[nodiscard]] size_t GetNumRows() const { + return position_list_index_->GetSize(); + } + + [[nodiscard]] std::string ToString() const final { + return "Data for " + column_->ToString(); + } + + [[nodiscard]] std::shared_ptr GetPositionListIndex() const { + return position_list_index_; + } +}; + +} // namespace algos::dynfd diff --git a/src/core/algorithms/fd/dynfd/model/dynamic_position_list_index.cpp b/src/core/algorithms/fd/dynfd/model/dynamic_position_list_index.cpp new file mode 100644 index 0000000000..cf077147ac --- /dev/null +++ b/src/core/algorithms/fd/dynfd/model/dynamic_position_list_index.cpp @@ -0,0 +1,166 @@ +#include "dynamic_position_list_index.h" + +#include + +#include + +namespace algos::dynfd { +DynamicPositionListIndex::Cluster::Cluster(std::vector unsorted_records) { + std::ranges::sort(unsorted_records); + + for (std::vector sorted_records = std::move(unsorted_records); + size_t record_id : sorted_records) { + records_.push_back(record_id); + position_by_record_id_[record_id] = std::prev(records_.end()); + } +} + +void DynamicPositionListIndex::Cluster::PushBack(size_t const record_id) { + records_.push_back(record_id); + position_by_record_id_[record_id] = std::prev(records_.end()); +} + +void DynamicPositionListIndex::Cluster::Erase(size_t const record_id) { + records_.erase(position_by_record_id_[record_id]); +} + +size_t DynamicPositionListIndex::Cluster::Back() const { + assert(!Empty()); + return records_.back(); +} + +bool DynamicPositionListIndex::Cluster::Empty() const { + return records_.empty(); +} + +DynamicPositionListIndex::DynamicPositionListIndex( + std::list clusters, + std::unordered_map::iterator> inverted_index, + std::unordered_map hash_index, int const next_record_id, + unsigned int const size, model::ColumnIndex columnIndex) + : clusters_(std::move(clusters)), + inverted_index_(std::move(inverted_index)), + hash_index_(std::move(hash_index)), + next_record_id_(next_record_id), + size_(size), + columnIndex_(columnIndex) {} + +std::unique_ptr DynamicPositionListIndex::CreateFor( + std::vector const &data, model::ColumnIndex columnIndex) { + std::list clusters; + std::unordered_map::iterator> inverted_index; + std::unordered_map hash_index; + unsigned int size = data.size(); + + int next_record_id = 0; + for (int const value_id : data) { + hash_index[next_record_id] = value_id; + auto inverted_index_it = inverted_index.find(value_id); + if (inverted_index_it == inverted_index.end()) { + clusters.emplace_back(); + inverted_index_it = inverted_index.emplace(value_id, std::prev(clusters.end())).first; + } + inverted_index_it->second->PushBack(next_record_id++); + } + + return std::make_unique( + std::move(clusters), std::move(inverted_index), std::move(hash_index), next_record_id, + size, columnIndex); +} + +void DynamicPositionListIndex::Erase(size_t const record_id) { + int const value_id = hash_index_[record_id]; + auto const iterator = inverted_index_.find(value_id); + iterator->second->Erase(record_id); + hash_index_.erase(record_id); + size_--; + if (iterator->second->Empty()) { + clusters_.erase(iterator->second); + inverted_index_.erase(iterator); + } +} + +size_t DynamicPositionListIndex::Insert(int const value_id) { + int const record_id = next_record_id_++; + hash_index_[record_id] = value_id; + if (!inverted_index_.contains(value_id)) { + clusters_.emplace_back(); + inverted_index_[value_id] = std::prev(clusters_.end()); + } + inverted_index_[value_id]->PushBack(record_id); + size_++; + return record_id; +} + +unsigned int DynamicPositionListIndex::GetSize() const { + return size_; +} + +DynamicPositionListIndex::Cluster const &DynamicPositionListIndex::GetCluster(int cluster_id) { + auto it = clusters_.begin(); + std::advance(it, cluster_id); + // ReSharper disable once CppDFALocalValueEscapesFunction + return *it; +} + +unsigned int DynamicPositionListIndex::GetClustersNum() const { + return clusters_.size(); +} + +int DynamicPositionListIndex::GetRecordValue(size_t record_id) const { + return hash_index_.at(record_id); +} + +std::unique_ptr DynamicPositionListIndex::FullIntersect( + DynamicPositionListIndex const &that) const { + std::unordered_map> partial_index; + std::list new_clusters; + std::unordered_map::iterator> new_inverted_index; + std::unordered_map new_hash_index; + unsigned int new_size = 0; + + for (size_t const record_id : hash_index_ | std::views::keys) { + if (!that.hash_index_.contains(record_id)) { + LOG(WARNING) << "Record id " << record_id << " not found in that index"; + continue; + } + int that_value_id = that.hash_index_.at(record_id); + partial_index[that_value_id].push_back(record_id); + } + + for (auto &[value_id, cluster] : partial_index) { + new_clusters.emplace_back(cluster); + new_inverted_index[value_id] = std::prev(new_clusters.end()); + new_size += cluster.size(); + } + + return std::make_unique( + std::move(new_clusters), std::move(new_inverted_index), std::move(new_hash_index), + next_record_id_, new_size, columnIndex_); +} + +std::string DynamicPositionListIndex::ToString() const { + std::string res = "["; + for (auto const &cluster : clusters_) { + res.push_back('['); + for (int const v : cluster) { + res.append(std::to_string(v) + ", "); + } + res.erase(res.size() - 2); + res.push_back(']'); + res += ", "; + } + res.erase(res.size() - 2); + res.push_back(']'); + return res; +} + +model::ColumnIndex DynamicPositionListIndex::GetColumnIndex() const { + return columnIndex_; +} + +std::unordered_map const &DynamicPositionListIndex::GetHashIndex() const { + return hash_index_; +} + +} // namespace algos::dynfd diff --git a/src/core/algorithms/fd/dynfd/model/dynamic_position_list_index.h b/src/core/algorithms/fd/dynfd/model/dynamic_position_list_index.h new file mode 100644 index 0000000000..403674b907 --- /dev/null +++ b/src/core/algorithms/fd/dynfd/model/dynamic_position_list_index.h @@ -0,0 +1,112 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace algos::dynfd { +class DynamicPositionListIndex { +public: + class Cluster { + std::list records_; // contains record ids in sorted order + std::unordered_map::iterator> + position_by_record_id_; // record_id -> iterator + + public: + Cluster() = default; + explicit Cluster(std::vector unsorted_records); + + void PushBack(size_t record_id); + + void Erase(size_t record_id); + + size_t Back() const; + + bool Empty() const; + + // Iterator support + // NOLINTBEGIN(*-identifier-naming) + auto begin() { + return records_.begin(); + } + + auto end() { + return records_.end(); + } + + auto begin() const { + return records_.begin(); + } + + auto end() const { + return records_.end(); + } + + // NOLINTEND(*-identifier-naming) + }; + +private: + std::list clusters_; + std::unordered_map::iterator> + inverted_index_; // value -> cluster iterator + std::unordered_map hash_index_; // record_id -> value + int next_record_id_; + unsigned int size_; + model::ColumnIndex columnIndex_; + +public: + DynamicPositionListIndex(std::list clusters, + std::unordered_map::iterator> inverted_index, + std::unordered_map hash_index, int next_record_id, + unsigned int size, model::ColumnIndex columnIndex); + + static std::unique_ptr CreateFor(std::vector const& data, + model::ColumnIndex columnIndex); + + unsigned int GetSize() const; + + Cluster const& GetCluster(int cluster_id); + + unsigned int GetClustersNum() const; + + model::ColumnIndex GetColumnIndex() const; + + int GetRecordValue(size_t record_id) const; + + size_t Insert(int value_id); + + void Erase(size_t record_id); + + std::unique_ptr FullIntersect( + DynamicPositionListIndex const& that) const; + + std::string ToString() const; + + std::unordered_map const& GetHashIndex() const; + + // Iterator support + // NOLINTBEGIN(*-identifier-naming) + auto begin() { + return clusters_.begin(); + } + + auto end() { + return clusters_.end(); + } + + auto begin() const { + return clusters_.begin(); + } + + auto end() const { + return clusters_.end(); + } + + // NOLINTEND(*-identifier-naming) +}; + +using DPLI = DynamicPositionListIndex; +} // namespace algos::dynfd diff --git a/src/core/algorithms/fd/dynfd/model/dynamic_relation_data.cpp b/src/core/algorithms/fd/dynfd/model/dynamic_relation_data.cpp new file mode 100644 index 0000000000..115339dcae --- /dev/null +++ b/src/core/algorithms/fd/dynfd/model/dynamic_relation_data.cpp @@ -0,0 +1,218 @@ +#include "dynamic_relation_data.h" + +#include +#include + +#include + +namespace algos::dynfd { + +size_t DynamicRelationData::GetNumRows() const { + if (column_data_.empty()) { + return 0; + } + return column_data_[0].GetNumRows(); +} + +DynamicRelationData::DynamicRelationData(std::unique_ptr schema, + std::vector column_data, + std::unordered_set stored_row_ids, + std::unordered_map value_dictionary, + int const next_value_id, int const next_record_id) + : AbstractRelationData(std::move(schema), std::move(column_data)), + stored_row_ids_(std::move(stored_row_ids)), + value_dictionary_(std::move(value_dictionary)), + next_value_id_(next_value_id), + next_record_id_(next_record_id) {} + +size_t DynamicRelationData::GetNextRecordId() const { + return next_record_id_; +} + +std::unique_ptr DynamicRelationData::CreateFrom( + config::InputTable const& input_table) { + auto schema = std::make_unique(input_table->GetRelationName()); + int next_value_id = 1; + std::unordered_map value_dictionary; + size_t const num_columns = input_table->GetNumberOfColumns(); + std::vector> column_dictionary_encoded_data = + std::vector>(num_columns); + + while (input_table->HasNextRow()) { + std::vector row = input_table->GetNextRow(); + + if (row.size() != num_columns) { + LOG(WARNING) << "Unexpected number of columns for a row, skipping (expected " + << num_columns << ", got " << row.size() << ")"; + continue; + } + + for (size_t index = 0; index < row.size(); ++index) { + std::string const& field = row[index]; + + auto location = value_dictionary.find(field); + int value_id; + if (location == value_dictionary.end()) { + value_id = next_value_id; + value_dictionary[field] = value_id; + next_value_id++; + } else { + value_id = location->second; + } + + column_dictionary_encoded_data[index].push_back(value_id); + } + } + + std::vector column_data; + for (size_t i = 0; i < num_columns; ++i) { + auto column = Column(schema.get(), input_table->GetColumnName(i), i); + schema->AppendColumn(std::move(column)); + auto pli = DynamicPositionListIndex::CreateFor(column_dictionary_encoded_data[i], i); + column_data.emplace_back(schema->GetColumn(i), std::move(pli)); + } + + schema->Init(); + + size_t next_record_id = (!column_data.empty() ? column_data[0].GetNumRows() : 0); + std::vector all_ids(next_record_id); + std::iota(all_ids.begin(), all_ids.end(), 0); + + input_table->Reset(); + + return std::make_unique(std::move(schema), std::move(column_data), + std::unordered_set(all_ids.begin(), all_ids.end()), + std::move(value_dictionary), next_value_id, + next_record_id); +} + +// ReSharper disable once CppParameterMayBeConstPtrOrRef +void DynamicRelationData::InsertBatch(config::InputTable& insert_statements_table) { + if (insert_statements_table == nullptr) { + LOG(WARNING) << "Insert statements table is null, skipping insert batch"; + return; + } + + while (insert_statements_table->HasNextRow()) { + std::vector row = insert_statements_table->GetNextRow(); + + if (row.size() != GetNumColumns()) { + LOG(WARNING) << "Unexpected number of columns for a row, skipping (expected " + << GetNumColumns() << ", got " << row.size() << ")"; + continue; + } + + for (size_t index = 0; index < row.size(); ++index) { + std::string const& field = row[index]; + + int value_id; + if (auto location = value_dictionary_.find(field); + location == value_dictionary_.end()) { + value_id = next_value_id_++; + value_dictionary_[field] = value_id; + } else { + value_id = location->second; + } + + size_t const new_record_id = + column_data_[index].GetPositionListIndex()->Insert(value_id); + assert(new_record_id == next_record_id_); + } + + stored_row_ids_.insert(next_record_id_++); + } + + insert_statements_table->Reset(); +} + +void DynamicRelationData::DeleteBatch(std::unordered_set const& delete_statement_indices) { + for (size_t row_id : delete_statement_indices) { + if (!IsRowIndexValid(row_id)) { + LOG(WARNING) << "Row ID " << row_id << " is not valid, skipping update"; + continue; + } + + for (size_t i = 0; i < GetNumColumns(); ++i) { + column_data_[i].GetPositionListIndex()->Erase(row_id); + } + stored_row_ids_.erase(row_id); + } +} + +void DynamicRelationData::DeleteRecordsFromUpdateBatch( + config::InputTable& update_statements_table) { + if (update_statements_table == nullptr) { + LOG(WARNING) << "Update statements table is null, skipping update batch"; + return; + } + + while (update_statements_table->HasNextRow()) { + std::vector row = update_statements_table->GetNextRow(); + + if (row.size() != GetNumColumns() + 1) { + LOG(WARNING) << "Unexpected number of columns for a row, skipping (expected " + << GetNumColumns() + 1 << ", got " << row.size() << ")"; + continue; + } + + size_t row_id = std::stoull(row.front()); + if (!IsRowIndexValid(row_id)) { + LOG(WARNING) << "Row ID " << row_id << " is not valid, skipping update"; + continue; + } + + for (size_t i = 0; i < GetNumColumns(); ++i) { + column_data_[i].GetPositionListIndex()->Erase(row_id); + } + } + + update_statements_table->Reset(); +} + +void DynamicRelationData::InsertRecordsFromUpdateBatch( + config::InputTable& update_statements_table) { + if (update_statements_table == nullptr) { + LOG(WARNING) << "Update statements table is null, skipping update batch"; + return; + } + + while (update_statements_table->HasNextRow()) { + std::vector row = update_statements_table->GetNextRow(); + + if (row.size() != GetNumColumns() + 1) { + LOG(WARNING) << "Unexpected number of columns for a row, skipping (expected " + << GetNumColumns() + 1 << ", got " << row.size() << ")"; + continue; + } + + for (size_t index = 0; index < GetNumColumns(); ++index) { + std::string const& field = row[index + 1]; + + int value_id; + if (auto location = value_dictionary_.find(field); + location == value_dictionary_.end()) { + value_id = next_value_id_++; + value_dictionary_[field] = value_id; + } else { + value_id = location->second; + } + + size_t const new_record_id = + column_data_[index].GetPositionListIndex()->Insert(value_id); + assert(new_record_id == next_record_id_); + } + stored_row_ids_.insert(next_record_id_++); + } + + update_statements_table->Reset(); +} + +bool DynamicRelationData::IsRowIndexValid(size_t const row_id) const { + return stored_row_ids_.contains(row_id); +} + +bool DynamicRelationData::Empty() const { + return stored_row_ids_.empty(); +} + +} // namespace algos::dynfd diff --git a/src/core/algorithms/fd/dynfd/model/dynamic_relation_data.h b/src/core/algorithms/fd/dynfd/model/dynamic_relation_data.h new file mode 100644 index 0000000000..3ae8157290 --- /dev/null +++ b/src/core/algorithms/fd/dynfd/model/dynamic_relation_data.h @@ -0,0 +1,49 @@ +#pragma once + +#include +#include + +#include "compressed_column_data.h" +#include "fd/hycommon/preprocessor.h" +#include "fd/raw_fd.h" +#include "model/table/idataset_stream.h" +#include "model/table/relation_data.h" +#include "table/vertical.h" +#include "tabular_data/input_table_type.h" + +namespace algos::dynfd { + +class DynamicRelationData : public AbstractRelationData { + std::unordered_set stored_row_ids_; + std::unordered_map value_dictionary_; + int next_value_id_; + size_t next_record_id_; + +private: + [[nodiscard]] size_t GetNumRows() const final; + +public: + explicit DynamicRelationData(std::unique_ptr schema, + std::vector column_data, + std::unordered_set stored_row_ids, + std::unordered_map value_dictionary, + int next_value_id, int next_record_id); + + size_t GetNextRecordId() const; + + static std::unique_ptr CreateFrom(config::InputTable const& input_table); + + void InsertBatch(config::InputTable& insert_statements_table); + + void DeleteBatch(std::unordered_set const& delete_statement_indices); + + void DeleteRecordsFromUpdateBatch(config::InputTable& update_statements_table); + + void InsertRecordsFromUpdateBatch(config::InputTable& update_statements_table); + + [[nodiscard]] bool IsRowIndexValid(size_t row_id) const; + + bool Empty() const; +}; + +} // namespace algos::dynfd diff --git a/src/core/algorithms/fd/dynfd/model/non_fd_tree.cpp b/src/core/algorithms/fd/dynfd/model/non_fd_tree.cpp new file mode 100644 index 0000000000..e7e765392f --- /dev/null +++ b/src/core/algorithms/fd/dynfd/model/non_fd_tree.cpp @@ -0,0 +1,150 @@ +#include "non_fd_tree.h" + +#include +#include + +#include + +namespace algos::dynfd { + +std::shared_ptr NonFDTree::AddNonFD( + boost::dynamic_bitset<> const& lhs, size_t rhs, + ViolatingRecordPair violationPair = std::nullopt) { + NonFDTreeVertex* cur_node = root_.get(); + cur_node->SetAttribute(rhs); + + for (size_t bit = lhs.find_first(); bit != boost::dynamic_bitset<>::npos; + bit = lhs.find_next(bit)) { + bool is_new = cur_node->AddChild(bit); + + if (is_new && lhs.find_next(bit) == boost::dynamic_bitset<>::npos) { + auto added_node = cur_node->GetChildPtr(bit); + added_node->SetAttribute(rhs); + added_node->SetNonFd(rhs, violationPair); + return added_node; + } + + cur_node = cur_node->GetChild(bit); + cur_node->SetAttribute(rhs); + } + cur_node->SetNonFd(rhs, violationPair); + return nullptr; +} + +bool NonFDTree::ContainsNonFD(boost::dynamic_bitset<>& lhs, size_t rhs) { + NonFDTreeVertex const* cur_node = root_.get(); + + for (size_t bit = lhs.find_first(); bit != boost::dynamic_bitset<>::npos; + bit = lhs.find_next(bit)) { + if (!cur_node->HasChildren() || !cur_node->ContainsChildAt(bit)) { + return false; + } + + cur_node = cur_node->GetChild(bit); + } + + return cur_node->IsNonFd(rhs); +} + +std::shared_ptr NonFDTree::FindNonFdVertex(boost::dynamic_bitset<> const& lhs) { + auto cur_node = root_; + + for (size_t bit = lhs.find_first(); bit != boost::dynamic_bitset<>::npos; + bit = lhs.find_next(bit)) { + if (!cur_node->HasChildren() || !cur_node->ContainsChildAt(bit)) { + return nullptr; + } + + cur_node = cur_node->GetChildShared(bit); + } + + return cur_node; +} + +std::vector> NonFDTree::GetNonFdAndGenerals(boost::dynamic_bitset<>& lhs, + size_t rhs) const { + assert(lhs.count() != 0); + + std::vector> result; + boost::dynamic_bitset const empty_lhs(GetNumAttributes()); + size_t const starting_bit = lhs.find_first(); + + root_->GetNonFdAndGeneralsRecursive(lhs, empty_lhs, rhs, starting_bit, result); + + return result; +} + +std::vector> NonFDTree::GetGenerals(boost::dynamic_bitset<>& lhs, + size_t rhs) { + assert(lhs.count() != 0); + + std::vector> result; + boost::dynamic_bitset empty_lhs(GetNumAttributes()); + size_t const starting_bit = lhs.find_first(); + + root_->GetGeneralsRecursive(lhs, empty_lhs, rhs, starting_bit, result); + + return result; +} + +std::vector> NonFDTree::GetNonFdAndSpecials(boost::dynamic_bitset<>& lhs, + size_t rhs) { + std::vector> result; + boost::dynamic_bitset empty_lhs(GetNumAttributes()); + + root_->GetNonFdAndSpecialsRecursive(lhs, empty_lhs, rhs, 0, result); + + return result; +} + +void NonFDTree::RemoveGenerals(boost::dynamic_bitset<> const& lhs, size_t rhs) { + assert(lhs.count() != 0); + + boost::dynamic_bitset<> empty_lhs(GetNumAttributes()); + root_->RemoveGeneralsRecursive(lhs, empty_lhs, rhs, lhs.find_first()); +} + +std::vector> NonFDTree::GetSpecials(boost::dynamic_bitset<>& lhs, + size_t rhs) { + std::vector> result; + boost::dynamic_bitset empty_lhs(GetNumAttributes()); + + root_->GetSpecialsRecursive(lhs, empty_lhs, rhs, 0, result); + + return result; +} + +void NonFDTree::RemoveSpecials(boost::dynamic_bitset<>& lhs, size_t rhs) { + boost::dynamic_bitset empty_lhs(GetNumAttributes()); + root_->RemoveSpecialsRecursive(lhs, empty_lhs, rhs, 0); +} + +bool NonFDTree::ContainsNonFdOrSpecial(boost::dynamic_bitset<>& lhs, size_t rhs) const { + size_t next_after_last_lhs_set_bit = 0; + if (lhs.find_first() != boost::dynamic_bitset<>::npos) { + next_after_last_lhs_set_bit = lhs.find_first(); + while (lhs.find_next(next_after_last_lhs_set_bit) != boost::dynamic_bitset<>::npos) { + next_after_last_lhs_set_bit = lhs.find_next(next_after_last_lhs_set_bit); + } + ++next_after_last_lhs_set_bit; + } + + return root_->ContainsNonFdOrSpecialRecursive(lhs, rhs, next_after_last_lhs_set_bit, 0); +} + +std::vector NonFDTree::GetLevel(unsigned int target_level) { + boost::dynamic_bitset const empty_lhs(GetNumAttributes()); + + std::vector vertices; + root_->GetLevelRecursive(target_level, 0, empty_lhs, vertices); + return vertices; +} + +std::vector NonFDTree::FillNonFDs() const { + std::vector result; + boost::dynamic_bitset<> lhs_for_traverse(GetRoot().GetNumAttributes()); + GetRoot().FillNonFDs(result, lhs_for_traverse); + return result; +} + +} // namespace algos::dynfd diff --git a/src/core/algorithms/fd/dynfd/model/non_fd_tree.h b/src/core/algorithms/fd/dynfd/model/non_fd_tree.h new file mode 100644 index 0000000000..23b06aae23 --- /dev/null +++ b/src/core/algorithms/fd/dynfd/model/non_fd_tree.h @@ -0,0 +1,101 @@ +#pragma once + +#include +#include + +#include + +#include "algorithms/fd/raw_fd.h" +#include "non_fd_tree_vertex.h" + +namespace algos::dynfd { + +/** + * NonFD prefix tree. + * + * Provides global tree manipulation and traversing methods. + * + * @see NonFDTreeVertex + */ + +class NonFDTree { +private: + std::shared_ptr root_; + +public: + explicit NonFDTree(size_t num_attributes) + : root_(std::make_shared(num_attributes)) {} + + [[nodiscard]] size_t GetNumAttributes() const noexcept { + return root_->GetNumAttributes(); + } + + std::shared_ptr GetRootPtr() noexcept { + return root_; + } + + [[nodiscard]] NonFDTreeVertex const& GetRoot() const noexcept { + return *root_; + } + + std::shared_ptr AddNonFD(boost::dynamic_bitset<> const& lhs, size_t rhs, + ViolatingRecordPair violationPair); + + bool ContainsNonFD(boost::dynamic_bitset<>& lhs, size_t rhs); + + std::shared_ptr FindNonFdVertex(boost::dynamic_bitset<> const& lhs); + + /** + * Recursively finds node representing given lhs and removes given rhs bit from it. + * Destroys vertices whose children became empty. + */ + void Remove(boost::dynamic_bitset<> const& lhs, size_t rhs) { + root_->RemoveRecursive(lhs, rhs, lhs.find_first()); + } + + /** + * Gets LHSs of all NonFDs having at least given lhs and rhs. + + */ + [[nodiscard]] std::vector> GetNonFdAndGenerals( + boost::dynamic_bitset<>& lhs, size_t rhs) const; + + /** + * Gets LHSs of all NonFDs having a proper subset of giving lhs and rhs. + */ + std::vector> GetGenerals(boost::dynamic_bitset<>& lhs, size_t rhs); + + void RemoveGenerals(boost::dynamic_bitset<> const& lhs, size_t rhs); + + std::vector> GetNonFdAndSpecials(boost::dynamic_bitset<>& lhs, + size_t rhs); + + /** + * Gets LHSs of all NonFDs having given lhs as a proper subset and rhs. + */ + std::vector> GetSpecials(boost::dynamic_bitset<>& lhs, size_t rhs); + + void RemoveSpecials(boost::dynamic_bitset<>& lhs, size_t rhs); + + /** + * Checks if any NonFD has at least given lhs and rhs. + */ + [[nodiscard]] bool ContainsNonFdOrGeneral(boost::dynamic_bitset<> const& lhs, + size_t rhs) const { + return root_->ContainsNonFdOrGeneralRecursive(lhs, rhs, lhs.find_first()); + } + + [[nodiscard]] bool ContainsNonFdOrSpecial(boost::dynamic_bitset<>& lhs, size_t rhs) const; + + /** + * Gets nodes representing NonFDs with LHS of given arity. + * @param target_level arity of returned NonFDs LHSs + */ + std::vector GetLevel(unsigned target_level); + + /** + * @return vector of all NonFDs + */ + [[nodiscard]] std::vector FillNonFDs() const; +}; +} // namespace algos::dynfd diff --git a/src/core/algorithms/fd/dynfd/model/non_fd_tree_vertex.cpp b/src/core/algorithms/fd/dynfd/model/non_fd_tree_vertex.cpp new file mode 100644 index 0000000000..858a446ce8 --- /dev/null +++ b/src/core/algorithms/fd/dynfd/model/non_fd_tree_vertex.cpp @@ -0,0 +1,337 @@ +#include "non_fd_tree_vertex.h" + +#include +#include + +#include + +namespace algos::dynfd { + +bool NonFDTreeVertex::AddChild(size_t const pos) { + contains_children_ = true; + if (children_.empty()) { + children_.resize(num_attributes_); + } + + if (!ContainsChildAt(pos)) { + children_[pos] = std::make_shared(num_attributes_); + return true; + } + + return false; +} + +void NonFDTreeVertex::GetLevelRecursive(unsigned const target_level, unsigned const cur_level, + boost::dynamic_bitset<> lhs, + std::vector& vertices) { + if (cur_level == target_level) { + vertices.emplace_back(shared_from_this(), lhs); + return; + } + + if (!HasChildren()) { + return; + } + + for (size_t i = 0; i < num_attributes_; ++i) { + if (ContainsChildAt(i)) { + lhs.set(i); + + children_[i]->GetLevelRecursive(target_level, cur_level + 1, lhs, vertices); + + lhs.reset(i); + } + } +} + +void NonFDTreeVertex::GetNonFdAndGeneralsRecursive( + boost::dynamic_bitset<> const& lhs, boost::dynamic_bitset<> cur_lhs, size_t const rhs, + size_t cur_bit, std::vector>& result) const { + if (IsNonFd(rhs)) { + result.push_back(cur_lhs); + } + + if (!HasChildren()) { + return; + } + + for (; cur_bit != boost::dynamic_bitset<>::npos; cur_bit = lhs.find_next(cur_bit)) { + if (ContainsChildAt(cur_bit) && children_[cur_bit]->IsAttribute(rhs)) { + cur_lhs.set(cur_bit); + children_[cur_bit]->GetNonFdAndGeneralsRecursive(lhs, cur_lhs, rhs, + lhs.find_next(cur_bit), result); + cur_lhs.reset(cur_bit); + } + } +} + +void NonFDTreeVertex::GetGeneralsRecursive(boost::dynamic_bitset<> const& lhs, + boost::dynamic_bitset<>& cur_lhs, size_t const rhs, + size_t cur_bit, + std::vector>& result) const { + // TODO: optimize checking via counting bits + if (IsNonFd(rhs) && lhs != cur_lhs) { + result.push_back(cur_lhs); + } + + if (!HasChildren()) { + return; + } + + for (; cur_bit != boost::dynamic_bitset<>::npos; cur_bit = lhs.find_next(cur_bit)) { + if (ContainsChildAt(cur_bit) && children_[cur_bit]->IsAttribute(rhs)) { + cur_lhs.set(cur_bit); + children_[cur_bit]->GetGeneralsRecursive(lhs, cur_lhs, rhs, lhs.find_next(cur_bit), + result); + cur_lhs.reset(cur_bit); + } + } +} + +void NonFDTreeVertex::GetSpecialsRecursive(boost::dynamic_bitset<> const& lhs, + boost::dynamic_bitset<>& cur_lhs, size_t const rhs, + size_t cur_bit, + std::vector>& result) const { + // TODO: optimize checking via counting bits + if (IsNonFd(rhs) && lhs.is_proper_subset_of(cur_lhs)) { + result.push_back(cur_lhs); + } + + if (!HasChildren()) { + return; + } + + size_t next_lhs_bit = lhs.test(cur_bit) ? cur_bit : lhs.find_next(cur_bit); + + for (; cur_bit != num_attributes_ && + (next_lhs_bit == boost::dynamic_bitset<>::npos || cur_bit != next_lhs_bit + 1); + ++cur_bit) { + if (ContainsChildAt(cur_bit) && children_[cur_bit]->IsAttribute(rhs)) { + cur_lhs.set(cur_bit); + children_[cur_bit]->GetSpecialsRecursive(lhs, cur_lhs, rhs, cur_bit + 1, result); + cur_lhs.reset(cur_bit); + } + } +} + +void NonFDTreeVertex::GetNonFdAndSpecialsRecursive( + boost::dynamic_bitset<> const& lhs, boost::dynamic_bitset<>& cur_lhs, size_t rhs, + size_t cur_bit, std::vector>& result) const { + // TODO: optimize checking via counting bits + if (IsNonFd(rhs) && lhs.is_subset_of(cur_lhs)) { + result.push_back(cur_lhs); + } + + if (!HasChildren()) { + return; + } + + size_t next_lhs_bit = lhs.test(cur_bit) ? cur_bit : lhs.find_next(cur_bit); + + for (; cur_bit != num_attributes_ && + (next_lhs_bit == boost::dynamic_bitset<>::npos || cur_bit != next_lhs_bit + 1); + ++cur_bit) { + if (ContainsChildAt(cur_bit) && children_[cur_bit]->IsAttribute(rhs)) { + cur_lhs.set(cur_bit); + children_[cur_bit]->GetSpecialsRecursive(lhs, cur_lhs, rhs, cur_bit + 1, result); + cur_lhs.reset(cur_bit); + } + } +} + +bool NonFDTreeVertex::ContainsNonFdOrGeneralRecursive(boost::dynamic_bitset<> const& lhs, + size_t const rhs, + size_t const cur_bit) const { + if (IsNonFd(rhs)) { + return true; + } + + if (cur_bit == boost::dynamic_bitset<>::npos) { + return false; + } + + if (HasChildren() && ContainsChildAt(cur_bit) && children_[cur_bit]->IsAttribute(rhs) && + children_[cur_bit]->ContainsNonFdOrGeneralRecursive(lhs, rhs, lhs.find_next(cur_bit))) { + return true; + } + + return ContainsNonFdOrGeneralRecursive(lhs, rhs, lhs.find_next(cur_bit)); +} + +bool NonFDTreeVertex::ContainsNonFdOrSpecialRecursive(boost::dynamic_bitset<> const& lhs, + size_t rhs, + size_t next_after_last_lhs_set_bit, + size_t cur_bit) const { + if (IsNonFd(rhs) && cur_bit >= next_after_last_lhs_set_bit) { + return true; + } + + if (cur_bit == num_attributes_) { + return false; + } + + if (HasChildren() && ContainsChildAt(cur_bit) && children_[cur_bit]->IsAttribute(rhs) && + children_[cur_bit]->ContainsNonFdOrSpecialRecursive(lhs, rhs, next_after_last_lhs_set_bit, + cur_bit + 1)) { + return true; + } + + if (lhs.test(cur_bit)) { + return false; + } + + return ContainsNonFdOrSpecialRecursive(lhs, rhs, next_after_last_lhs_set_bit, cur_bit + 1); +} + +bool NonFDTreeVertex::RemoveRecursive(boost::dynamic_bitset<> const& lhs, size_t const rhs, + size_t current_lhs_attr) { + if (current_lhs_attr == boost::dynamic_bitset<>::npos) { + RemoveNonFd(rhs); + RemoveAttribute(rhs); + return true; + } + + if (HasChildren() && ContainsChildAt(current_lhs_attr)) { + if (!children_[current_lhs_attr]->RemoveRecursive(lhs, rhs, + lhs.find_next(current_lhs_attr))) { + return false; + } + + if (!children_[current_lhs_attr]->GetAttributes().any()) { + children_[current_lhs_attr].reset(); + children_[current_lhs_attr] = nullptr; + } + } + + if (IsLastNodeOf(rhs)) { + contains_children_ = false; + RemoveAttribute(rhs); + return true; + } + return false; +} + +void NonFDTreeVertex::RemoveGeneralsRecursive(boost::dynamic_bitset<> const& lhs, + boost::dynamic_bitset<> cur_lhs, size_t const rhs, + size_t cur_bit) { + // TODO: optimize checking via counting bits + if (IsNonFd(rhs) && lhs != cur_lhs) { + RemoveNonFd(rhs); + RemoveAttribute(rhs); + } + + if (!HasChildren()) { + return; + } + + for (; cur_bit != boost::dynamic_bitset<>::npos; cur_bit = lhs.find_next(cur_bit)) { + if (ContainsChildAt(cur_bit) && children_[cur_bit]->IsAttribute(rhs)) { + cur_lhs.set(cur_bit); + children_[cur_bit]->RemoveGeneralsRecursive(lhs, cur_lhs, rhs, lhs.find_next(cur_bit)); + cur_lhs.reset(cur_bit); + + if (!children_[cur_bit]->GetAttributes().any()) { + children_[cur_bit].reset(); + children_[cur_bit] = nullptr; + } + } + } + + if (IsLastNodeOf(rhs)) { + contains_children_ = false; + RemoveAttribute(rhs); + } +} + +void NonFDTreeVertex::RemoveSpecialsRecursive(boost::dynamic_bitset<> const& lhs, + boost::dynamic_bitset<> cur_lhs, size_t const rhs, + size_t cur_bit) { + // TODO: optimize checking via counting bits + if (IsNonFd(rhs) && lhs.is_subset_of(cur_lhs)) { + RemoveNonFd(rhs); + RemoveAttribute(rhs); + } + + if (!HasChildren()) { + return; + } + + size_t next_lhs_bit = lhs.test(cur_bit) ? cur_bit : lhs.find_next(cur_bit); + + for (; cur_bit != num_attributes_ && + (next_lhs_bit == boost::dynamic_bitset<>::npos || cur_bit != next_lhs_bit + 1); + ++cur_bit) { + if (ContainsChildAt(cur_bit) && children_[cur_bit]->IsAttribute(rhs)) { + cur_lhs.set(cur_bit); + children_[cur_bit]->RemoveSpecialsRecursive(lhs, cur_lhs, rhs, cur_bit + 1); + cur_lhs.reset(cur_bit); + + if (!children_[cur_bit]->GetAttributes().any()) { + children_[cur_bit].reset(); + children_[cur_bit] = nullptr; + } + } + } + + if (IsLastNodeOf(rhs)) { + contains_children_ = false; + RemoveAttribute(rhs); + } +} + +bool NonFDTreeVertex::IsLastNodeOf(size_t rhs) const noexcept { + if (!HasChildren()) { + return true; + } + return std::ranges::all_of(children_, [rhs](auto const& child) { + return (child != nullptr) && child->IsAttribute(rhs); + }); +} + +std::shared_ptr NonFDTreeVertex::GetChildIfExists(size_t const pos) const { + if (children_.empty()) { + return nullptr; + } + + assert(pos < children_.size()); + return children_[pos]; +} + +void NonFDTreeVertex::FillNonFDs(std::vector& fds, boost::dynamic_bitset<>& lhs) const { + for (size_t rhs = non_fds_.find_first(); rhs != boost::dynamic_bitset<>::npos; + rhs = non_fds_.find_next(rhs)) { + fds.emplace_back(lhs, rhs); + } + + if (!contains_children_) { + return; + } + + for (size_t i = 0; i < GetNumAttributes(); i++) { + if (!ContainsChildAt(i)) { + continue; + } + + lhs.set(i); + GetChild(i)->FillNonFDs(fds, lhs); + lhs.reset(i); + } +} + +bool NonFDTreeVertex::IsNonFdViolatingPairHolds(size_t pos, + std::shared_ptr relation_) { + if (!IsNonFd(pos) || !violations_[pos]) { + return false; + } + + ViolatingRecordPair& violating_pair = violations_[pos]; + if (relation_->IsRowIndexValid(violating_pair->first) && + relation_->IsRowIndexValid(violating_pair->second)) { + return true; + } else { + violating_pair = std::nullopt; + return false; + } +} + +} // namespace algos::dynfd diff --git a/src/core/algorithms/fd/dynfd/model/non_fd_tree_vertex.h b/src/core/algorithms/fd/dynfd/model/non_fd_tree_vertex.h new file mode 100644 index 0000000000..71a8d43948 --- /dev/null +++ b/src/core/algorithms/fd/dynfd/model/non_fd_tree_vertex.h @@ -0,0 +1,197 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include + +#include "algorithms/fd/raw_fd.h" +#include "dynamic_relation_data.h" + +namespace algos::dynfd { + +/** + * Pair of pointer ot FD tree node and the corresponding LHS. + */ + +struct NonFDTreeVertex; + +using LhsPair = std::pair, boost::dynamic_bitset<>>; +using ViolatingRecordPair = std::optional>; + +/** + * Node of FD prefix tree. + * + * LHS of the FD is represented by the path to the node, besides the path must be built in ascending + * order, i.e. LHS {0, 1} can be obtained by getting child with position 0, then its child with + * position 1. If we go first to child 1, it will not contain child 0. + * + * RHS of the FD is represented by the fds attribute of the node. + */ + +struct NonFDTreeVertex : public std::enable_shared_from_this { +private: + std::vector> children_; + boost::dynamic_bitset<> non_fds_; + std::vector violations_; + + /** + * Union of children RHSs + */ + boost::dynamic_bitset<> attributes_; + + /** + * Total number of attributes in the relation + */ + size_t num_attributes_; + + /** + * Flag for optimizing child existence check. Is true iff any children_ is set + */ + bool contains_children_ = false; + + friend class NonFDTree; + + NonFDTreeVertex* GetChild(size_t pos) { + return children_.at(pos).get(); + } + + std::shared_ptr GetChildShared(size_t pos) { + return children_.at(pos); + } + + boost::dynamic_bitset<> GetAttributes() const noexcept { + return attributes_; + } + + void SetAttribute(size_t pos) noexcept { + attributes_.set(pos); + } + + void RemoveAttribute(size_t pos) noexcept { + attributes_.reset(pos); + } + + bool IsAttribute(size_t pos) const noexcept { + return attributes_.test(pos); + } + + /** + * Constructs empty child node at the given position. Does nothing if the child already exists. + * + * @param pos child position + * @return whether a child was constructed + */ + bool AddChild(size_t pos); + + void GetLevelRecursive(unsigned target_level, unsigned cur_level, boost::dynamic_bitset<> lhs, + std::vector& vertices); + + void GetNonFdAndGeneralsRecursive(boost::dynamic_bitset<> const& lhs, + boost::dynamic_bitset<> cur_lhs, size_t rhs, size_t cur_bit, + std::vector>& result) const; + + void GetGeneralsRecursive(boost::dynamic_bitset<> const& lhs, boost::dynamic_bitset<>& cur_lhs, + size_t rhs, size_t cur_bit, + std::vector>& result) const; + + void GetSpecialsRecursive(boost::dynamic_bitset<> const& lhs, boost::dynamic_bitset<>& cur_lhs, + size_t rhs, size_t cur_bit, + std::vector>& result) const; + + void GetNonFdAndSpecialsRecursive(boost::dynamic_bitset<> const& lhs, + boost::dynamic_bitset<>& cur_lhs, size_t rhs, size_t cur_bit, + std::vector>& result) const; + + bool ContainsNonFdOrGeneralRecursive(boost::dynamic_bitset<> const& lhs, size_t rhs, + size_t cur_bit) const; + + bool ContainsNonFdOrSpecialRecursive(boost::dynamic_bitset<> const& lhs, size_t rhs, + size_t next_after_last_lhs_set_bit, size_t cur_bit) const; + + bool RemoveRecursive(boost::dynamic_bitset<> const& lhs, size_t rhs, size_t current_lhs_attr); + + void RemoveGeneralsRecursive(boost::dynamic_bitset<> const& lhs, + boost::dynamic_bitset<> cur_lhs, size_t rhs, size_t cur_bit); + + void RemoveSpecialsRecursive(boost::dynamic_bitset<> const& lhs, + boost::dynamic_bitset<> cur_lhs, size_t rhs, size_t cur_bit); + + bool IsLastNodeOf(size_t rhs) const noexcept; + + void FillNonFDs(std::vector& fds, boost::dynamic_bitset<>& lhs) const; + +public: + explicit NonFDTreeVertex(size_t numAttributes) noexcept + : non_fds_(numAttributes), + violations_(numAttributes), + attributes_(numAttributes), + num_attributes_(numAttributes) {} + + size_t GetNumAttributes() const noexcept { + return num_attributes_; + } + + boost::dynamic_bitset<> GetNonFDs() const noexcept { + return non_fds_; + } + + void SetViolation(size_t pos, ViolatingRecordPair violationPair) { + violations_[pos] = std::move(violationPair); + } + + void SetNonFd(size_t pos, ViolatingRecordPair violationPair) { + non_fds_.set(pos); + SetViolation(pos, std::move(violationPair)); + } + + /** + * Replaces stored RHS with provided one. + * @param new_non_fds RHS to replace with. + * @param violations + * */ + void SetNonFds(boost::dynamic_bitset<> new_non_fds, + std::vector violations) noexcept { + non_fds_ = std::move(new_non_fds); + violations_ = std::move(violations); + } + + void RemoveNonFd(size_t const pos) noexcept { + non_fds_.reset(pos); + violations_[pos] = std::nullopt; + } + + bool IsNonFd(size_t const pos) const noexcept { + return non_fds_.test(pos); + } + + bool IsNonFdViolatingPairHolds(size_t const pos, + std::shared_ptr relation_); + + std::shared_ptr GetChildPtr(size_t const pos) { + return children_.at(pos); + } + + NonFDTreeVertex const* GetChild(size_t const pos) const { + return children_.at(pos).get(); + } + + std::shared_ptr GetChildShared(size_t const pos) const { + return children_.at(pos); + } + + std::shared_ptr GetChildIfExists(size_t pos) const; + + bool ContainsChildAt(size_t const pos) const { + return children_.at(pos) != nullptr; + } + + bool HasChildren() const noexcept { + return contains_children_; + } +}; + +} // namespace algos::dynfd diff --git a/src/core/algorithms/fd/dynfd/validator.cpp b/src/core/algorithms/fd/dynfd/validator.cpp new file mode 100644 index 0000000000..b3eeb954d3 --- /dev/null +++ b/src/core/algorithms/fd/dynfd/validator.cpp @@ -0,0 +1,200 @@ +#include "validator.h" + +#include +#include +#include + +#include "fd/hycommon/preprocessor.h" + +namespace algos::dynfd { + +ViolatingRecordPair Validator::FindClusterViolating(const DPLI::Cluster& cluster, + size_t sortedPlisIndex, + std::vector>& sorted_plis, + size_t const rhs) { + sortedPlisIndex++; + if (sortedPlisIndex < sorted_plis.size()) { + auto pli = sorted_plis[sortedPlisIndex]; + auto hash_index = pli->GetHashIndex(); + std::vector intersection; + std::unordered_map inverted_index; // value -> cluster + for (size_t record_id : cluster) { + int value = hash_index[record_id]; + if (auto it = inverted_index.find(value); it == inverted_index.end()) { + intersection.emplace_back(std::vector({record_id})); + inverted_index[value] = &intersection.back(); + } else { + it->second->PushBack(record_id); + } + } + for (auto const& next_cluster : intersection) { + return FindClusterViolating(next_cluster, sortedPlisIndex, sorted_plis, rhs); + } + } else { + auto rhs_pli = relation_->GetColumnData(rhs).GetPositionListIndex(); + auto it = cluster.begin(); + int value = rhs_pli->GetRecordValue(*it); + size_t first_record = *it; + ++it; + for (; it != cluster.end(); ++it) { + if (value != rhs_pli->GetRecordValue(*it)) { + return {{first_record, *it}}; + } + } + } + return std::nullopt; +} + +ViolatingRecordPair Validator::FindEmptyLhsViolation(size_t const rhs) const { + auto const rhs_pli = relation_->GetColumnData(rhs).GetPositionListIndex(); + if (rhs_pli->GetClustersNum() <= 1) { + return std::nullopt; + } + + return {{rhs_pli->GetCluster(0).Back(), rhs_pli->GetCluster(1).Back()}}; +} + +ViolatingRecordPair Validator::FindNewViolation(RawFD const& nonFd) { + if (nonFd.lhs_.count() == 0) { + return FindEmptyLhsViolation(nonFd.rhs_); + } + + std::vector> sorted_plis = GetSortedPlisForLhs(nonFd.lhs_); + + for (auto const& cluster : *sorted_plis[0]) { + if (auto violation = FindClusterViolating(cluster, 0, sorted_plis, nonFd.rhs_)) { + return violation; + } + } + return std::nullopt; +} + +std::vector> Validator::GetSortedPlisForLhs( + boost::dynamic_bitset<> const& lhs) const { + std::vector> sorted_plis; + sorted_plis.reserve(lhs.count()); + for (size_t bit = lhs.find_first(); bit != boost::dynamic_bitset<>::npos; + bit = lhs.find_next(bit)) { + sorted_plis.push_back(relation_->GetColumnData(bit).GetPositionListIndex()); + } + + std::ranges::sort(sorted_plis, [](auto const& lhs, auto const& rhs) { + return lhs->GetClustersNum() > rhs->GetClustersNum(); + }); + + return sorted_plis; +} + +ViolatingRecordPair Validator::IsFdInvalidated(RawFD const& fd, size_t first_insert_batch_id) { + if (fd.lhs_.count() == 0) { + return FindEmptyLhsViolation(fd.rhs_); + } + + for (std::vector> sorted_plis = GetSortedPlisForLhs(fd.lhs_); + auto const& cluster : *sorted_plis[0]) { + if (cluster.Back() >= first_insert_batch_id) { + if (auto violation = FindClusterViolating(cluster, 0, sorted_plis, fd.rhs_)) { + return violation; + } + } + } + return std::nullopt; +} + +bool Validator::NeedsValidation([[maybe_unused]] RawFD const& non_fd) const { + auto const vertex = negative_cover_tree_->FindNonFdVertex(non_fd.lhs_); + if (vertex == nullptr) { + return false; + } + + return !vertex->IsNonFdViolatingPairHolds(non_fd.rhs_, relation_); +} + +void Validator::ValidateFds(size_t first_insert_batch_id) { + for (size_t level = 0; level <= relation_->GetNumColumns(); ++level) { + struct NonFd { + RawFD rawFd; + ViolatingRecordPair violation; + }; + + std::vector invalid_fds; + auto level_fds = positive_cover_tree_->GetLevel(level); + for (auto& [vertex, lhs] : level_fds) { + boost::dynamic_bitset<> fds = vertex->GetFDs(); + for (size_t rhs = fds.find_first(); rhs != boost::dynamic_bitset<>::npos; + rhs = fds.find_next(rhs)) { + RawFD fd(lhs, rhs); + if (auto violation = IsFdInvalidated(fd, first_insert_batch_id)) { + invalid_fds.push_back({std::move(fd), std::move(violation)}); + } + } + } + + for (auto const& non_fd : invalid_fds) { + positive_cover_tree_->Remove(non_fd.rawFd.lhs_, non_fd.rawFd.rhs_); + if (non_fd.rawFd.lhs_.count() > 0) { + negative_cover_tree_->RemoveGenerals(non_fd.rawFd.lhs_, non_fd.rawFd.rhs_); + } + negative_cover_tree_->AddNonFD(non_fd.rawFd.lhs_, non_fd.rawFd.rhs_, non_fd.violation); + for (size_t new_lhs_attribute = 0; new_lhs_attribute < relation_->GetNumColumns(); + ++new_lhs_attribute) { + if (new_lhs_attribute == non_fd.rawFd.rhs_ || + non_fd.rawFd.lhs_.test(new_lhs_attribute)) { + continue; + } + + boost::dynamic_bitset<> new_lhs = non_fd.rawFd.lhs_; + new_lhs.set(new_lhs_attribute); + if (!positive_cover_tree_->ContainsFdOrGeneral(new_lhs, non_fd.rawFd.rhs_)) { + positive_cover_tree_->AddFD(new_lhs, non_fd.rawFd.rhs_); + } + } + } + + if (static_cast(invalid_fds.size()) > 0.1 * static_cast(level_fds.size())) { + // TODO: progressive violation search + } + } +} + +void Validator::ValidateNonFds() { + for (int level = static_cast(relation_->GetNumColumns()); level >= 0; --level) { + std::vector valid_fds; + auto level_non_fds = negative_cover_tree_->GetLevel(level); + for (auto& [vertex, lhs] : level_non_fds) { + boost::dynamic_bitset<> non_fds = vertex->GetNonFDs(); + for (size_t rhs = non_fds.find_first(); rhs != boost::dynamic_bitset<>::npos; + rhs = non_fds.find_next(rhs)) { + if (RawFD non_fd(lhs, rhs); NeedsValidation(non_fd)) { + if (auto violation = FindNewViolation(non_fd)) { + vertex->SetViolation(rhs, violation); + } else { + valid_fds.push_back(non_fd); + } + } + } + } + + for (auto const& fd : valid_fds) { + negative_cover_tree_->Remove(fd.lhs_, fd.rhs_); + positive_cover_tree_->RemoveSpecials(fd.lhs_, fd.rhs_); + positive_cover_tree_->AddFD(fd.lhs_, fd.rhs_); + for (size_t removed_lhs_attribute = fd.lhs_.find_first(); + removed_lhs_attribute != boost::dynamic_bitset<>::npos; + removed_lhs_attribute = fd.lhs_.find_next(removed_lhs_attribute)) { + boost::dynamic_bitset<> new_lhs = fd.lhs_; + new_lhs.reset(removed_lhs_attribute); + if (!negative_cover_tree_->ContainsNonFdOrSpecial(new_lhs, fd.rhs_)) { + negative_cover_tree_->AddNonFD(new_lhs, fd.rhs_, std::nullopt); + } + } + } + + if (static_cast(valid_fds.size()) > + 0.1 * static_cast(level_non_fds.size())) { + // TODO: depth first search + } + } +} + +} // namespace algos::dynfd diff --git a/src/core/algorithms/fd/dynfd/validator.h b/src/core/algorithms/fd/dynfd/validator.h new file mode 100644 index 0000000000..7d74afee3a --- /dev/null +++ b/src/core/algorithms/fd/dynfd/validator.h @@ -0,0 +1,43 @@ +#pragma once +#include +#include + +#include "model/FDTrees/fd_tree.h" +#include "model/dynamic_position_list_index.h" +#include "model/dynamic_relation_data.h" +#include "model/non_fd_tree.h" + +namespace algos::dynfd { +class Validator { + std::shared_ptr positive_cover_tree_; + std::shared_ptr negative_cover_tree_; + std::shared_ptr relation_; + + ViolatingRecordPair FindClusterViolating(const DPLI::Cluster &cluster, size_t sortedPlisIndex, + std::vector> &sorted_plis, + size_t rhs); + + [[nodiscard]] ViolatingRecordPair FindEmptyLhsViolation(size_t rhs) const; + + ViolatingRecordPair FindNewViolation(RawFD const &nonFd); + + ViolatingRecordPair IsFdInvalidated(RawFD const &fd, size_t first_insert_batch_id); + + [[nodiscard]] std::vector> GetSortedPlisForLhs( + boost::dynamic_bitset<> const &lhs) const; + + [[nodiscard]] bool NeedsValidation(RawFD const &non_fd) const; + +public: + Validator(std::shared_ptr positive_cover_tree, + std::shared_ptr negative_cover_tree, + std::shared_ptr relation) noexcept + : positive_cover_tree_(std::move(positive_cover_tree)), + negative_cover_tree_(std::move(negative_cover_tree)), + relation_(std::move(relation)) {} + + void ValidateFds(size_t first_insert_batch_id); + + void ValidateNonFds(); +}; +} // namespace algos::dynfd diff --git a/src/core/algorithms/fd/hycommon/validator_helpers.cpp b/src/core/algorithms/fd/hycommon/validator_helpers.cpp index 291e7bd261..5e4442d7c9 100644 --- a/src/core/algorithms/fd/hycommon/validator_helpers.cpp +++ b/src/core/algorithms/fd/hycommon/validator_helpers.cpp @@ -1,7 +1,7 @@ #include "validator_helpers.h" #include "algorithms/fd/hycommon/util/pli_util.h" -#include "algorithms/fd/hyfd/model/fd_tree_vertex.h" +#include "model/FDTrees/fd_tree_vertex.h" #include "ucc/hyucc/model/ucc_tree_vertex.h" namespace algos::hy { @@ -46,8 +46,8 @@ std::vector CollectCurrentChildren( return next_level; } -using UCCLhsPair = algos::hyucc::LhsPair; -using FDLhsPair = algos::hyfd::fd_tree::LhsPair; +using UCCLhsPair = hyucc::LhsPair; +using FDLhsPair = model::LhsPair; template std::vector CollectCurrentChildren( std::vector const& cur_level_vertices, size_t num_attributes); template std::vector CollectCurrentChildren( diff --git a/src/core/algorithms/fd/hyfd/hyfd.cpp b/src/core/algorithms/fd/hyfd/hyfd.cpp index ce4adf07d7..9220ed6266 100644 --- a/src/core/algorithms/fd/hyfd/hyfd.cpp +++ b/src/core/algorithms/fd/hyfd/hyfd.cpp @@ -32,8 +32,7 @@ unsigned long long HyFD::ExecuteInternal() { Sampler sampler(plis_shared, pli_records_shared); - auto const positive_cover_tree = - std::make_shared(GetRelation().GetNumColumns()); + auto const positive_cover_tree = std::make_shared(GetRelation().GetNumColumns()); Inductor inductor(positive_cover_tree); Validator validator(positive_cover_tree, plis_shared, pli_records_shared); diff --git a/src/core/algorithms/fd/hyfd/inductor.cpp b/src/core/algorithms/fd/hyfd/inductor.cpp index 56a792bfc8..8165577bb9 100644 --- a/src/core/algorithms/fd/hyfd/inductor.cpp +++ b/src/core/algorithms/fd/hyfd/inductor.cpp @@ -33,7 +33,7 @@ void Inductor::SpecializeTreeForNonFd(boost::dynamic_bitset<> const& lhs_bits, s invalid_lhs_bits.set(i); - if (tree_->FindFdOrGeneral(invalid_lhs_bits, rhs_id)) { + if (tree_->ContainsFdOrGeneral(invalid_lhs_bits, rhs_id)) { invalid_lhs_bits.reset(i); continue; } diff --git a/src/core/algorithms/fd/hyfd/inductor.h b/src/core/algorithms/fd/hyfd/inductor.h index 51864f0b8f..95bcf9b14f 100644 --- a/src/core/algorithms/fd/hyfd/inductor.h +++ b/src/core/algorithms/fd/hyfd/inductor.h @@ -2,19 +2,19 @@ #include -#include "algorithms/fd/hyfd/model/fd_tree.h" #include "algorithms/fd/hyfd/model/non_fd_list.h" +#include "model/FDTrees/fd_tree.h" namespace algos::hyfd { class Inductor { private: - std::shared_ptr tree_; + std::shared_ptr tree_; void SpecializeTreeForNonFd(boost::dynamic_bitset<> const& lhs_bits, size_t rhs_id); public: - explicit Inductor(std::shared_ptr tree) noexcept : tree_(std::move(tree)) {} + explicit Inductor(std::shared_ptr tree) noexcept : tree_(std::move(tree)) {} void UpdateFdTree(NonFDList&& non_fds); }; diff --git a/src/core/algorithms/fd/hyfd/model/fd_tree.cpp b/src/core/algorithms/fd/hyfd/model/fd_tree.cpp deleted file mode 100644 index 3f76ab6b16..0000000000 --- a/src/core/algorithms/fd/hyfd/model/fd_tree.cpp +++ /dev/null @@ -1,68 +0,0 @@ -#include "fd_tree.h" - -#include -#include - -#include - -namespace algos::hyfd::fd_tree { - -std::shared_ptr FDTree::AddFD(boost::dynamic_bitset<> const& lhs, size_t rhs) { - FDTreeVertex* cur_node = root_.get(); - cur_node->SetAttribute(rhs); - - for (size_t bit = lhs.find_first(); bit != boost::dynamic_bitset<>::npos; - bit = lhs.find_next(bit)) { - bool is_new = cur_node->AddChild(bit); - - if (is_new && lhs.find_next(bit) == boost::dynamic_bitset<>::npos) { - auto added_node = cur_node->GetChildPtr(bit); - added_node->SetAttribute(rhs); - added_node->SetFd(rhs); - return added_node; - } - - cur_node = cur_node->GetChild(bit); - cur_node->SetAttribute(rhs); - } - cur_node->SetFd(rhs); - return nullptr; -} - -bool FDTree::ContainsFD(boost::dynamic_bitset<> const& lhs, size_t rhs) { - FDTreeVertex const* cur_node = root_.get(); - - for (size_t bit = lhs.find_first(); bit != boost::dynamic_bitset<>::npos; - bit = lhs.find_next(bit)) { - if (!cur_node->HasChildren() || !cur_node->ContainsChildAt(bit)) { - return false; - } - - cur_node = cur_node->GetChild(bit); - } - - return cur_node->IsFd(rhs); -} - -std::vector> FDTree::GetFdAndGenerals(boost::dynamic_bitset<> const& lhs, - size_t rhs) const { - assert(lhs.count() != 0); - - std::vector> result; - boost::dynamic_bitset<> const empty_lhs(GetNumAttributes()); - size_t const starting_bit = lhs.find_first(); - - root_->GetFdAndGeneralsRecursive(lhs, empty_lhs, rhs, starting_bit, result); - - return result; -} - -std::vector FDTree::GetLevel(unsigned target_level) { - boost::dynamic_bitset<> const empty_lhs(GetNumAttributes()); - - std::vector vertices; - root_->GetLevelRecursive(target_level, 0, empty_lhs, vertices); - return vertices; -} - -} // namespace algos::hyfd::fd_tree diff --git a/src/core/algorithms/fd/hyfd/model/fd_tree.h b/src/core/algorithms/fd/hyfd/model/fd_tree.h deleted file mode 100644 index a1f14094fc..0000000000 --- a/src/core/algorithms/fd/hyfd/model/fd_tree.h +++ /dev/null @@ -1,85 +0,0 @@ -#pragma once - -#include -#include - -#include - -#include "algorithms/fd/raw_fd.h" -#include "fd_tree_vertex.h" - -namespace algos::hyfd::fd_tree { - -/** - * FD prefix tree. - * - * Provides global tree manipulation and traversing methods. - * - * @see FDTreeVertex - */ -class FDTree { -private: - std::shared_ptr root_; - -public: - explicit FDTree(size_t num_attributes) : root_(std::make_shared(num_attributes)) { - for (size_t id = 0; id < num_attributes; id++) { - root_->SetFd(id); - } - } - - [[nodiscard]] size_t GetNumAttributes() const noexcept { - return root_->GetNumAttributes(); - } - - std::shared_ptr GetRootPtr() noexcept { - return root_; - } - - [[nodiscard]] FDTreeVertex const& GetRoot() const noexcept { - return *root_; - } - - std::shared_ptr AddFD(boost::dynamic_bitset<> const& lhs, size_t rhs); - - bool ContainsFD(boost::dynamic_bitset<> const& lhs, size_t rhs); - - /** - * Recursively finds node representing given lhs and removes given rhs bit from it. - * Destroys vertices whose children became empty. - */ - void Remove(boost::dynamic_bitset<> const& lhs, size_t rhs) { - root_->RemoveRecursive(lhs, rhs, lhs.find_first()); - } - - /** - * Gets LHSs of all FDs having at least given lhs and rhs. - */ - [[nodiscard]] std::vector> GetFdAndGenerals( - boost::dynamic_bitset<> const& lhs, size_t rhs) const; - - /** - * Checks if any FD has at least given lhs and rhs. - */ - [[nodiscard]] bool FindFdOrGeneral(boost::dynamic_bitset<> const& lhs, size_t rhs) const { - return root_->FindFdOrGeneralRecursive(lhs, rhs, lhs.find_first()); - } - - /** - * Gets nodes representing FDs with LHS of given arity. - * @param target_level arity of returned FDs LHSs - */ - std::vector GetLevel(unsigned target_level); - - /** - * @return vector of all FDs - */ - [[nodiscard]] std::vector FillFDs() const { - std::vector result; - boost::dynamic_bitset<> lhs_for_traverse(GetRoot().GetNumAttributes()); - GetRoot().FillFDs(result, lhs_for_traverse); - return result; - } -}; - -} // namespace algos::hyfd::fd_tree diff --git a/src/core/algorithms/fd/hyfd/model/fd_tree_vertex.cpp b/src/core/algorithms/fd/hyfd/model/fd_tree_vertex.cpp deleted file mode 100644 index 23c7e47079..0000000000 --- a/src/core/algorithms/fd/hyfd/model/fd_tree_vertex.cpp +++ /dev/null @@ -1,139 +0,0 @@ -#include "fd_tree_vertex.h" - -#include -#include - -#include - -namespace algos::hyfd::fd_tree { - -void FDTreeVertex::GetLevelRecursive(unsigned target_level, unsigned cur_level, - boost::dynamic_bitset<> lhs, std::vector& vertices) { - if (cur_level == target_level) { - vertices.emplace_back(shared_from_this(), lhs); - return; - } - - if (!HasChildren()) { - return; - } - - for (size_t i = 0; i < num_attributes_; ++i) { - if (ContainsChildAt(i)) { - lhs.set(i); - - children_[i]->GetLevelRecursive(target_level, cur_level + 1, lhs, vertices); - - lhs.reset(i); - } - } -} - -void FDTreeVertex::GetFdAndGeneralsRecursive(boost::dynamic_bitset<> const& lhs, - boost::dynamic_bitset<> cur_lhs, size_t rhs, - size_t cur_bit, - std::vector>& result) const { - if (IsFd(rhs)) { - result.push_back(cur_lhs); - } - - if (!HasChildren()) { - return; - } - - for (; cur_bit != boost::dynamic_bitset<>::npos; cur_bit = lhs.find_next(cur_bit)) { - if (ContainsChildAt(cur_bit) && children_[cur_bit]->IsAttribute(rhs)) { - cur_lhs.set(cur_bit); - children_[cur_bit]->GetFdAndGeneralsRecursive(lhs, cur_lhs, rhs, lhs.find_next(cur_bit), - result); - cur_lhs.reset(cur_bit); - } - } -} - -bool FDTreeVertex::FindFdOrGeneralRecursive(boost::dynamic_bitset<> const& lhs, size_t rhs, - size_t cur_bit) const { - if (IsFd(rhs)) { - return true; - } - - if (cur_bit == boost::dynamic_bitset<>::npos) { - return false; - } - - if (HasChildren() && ContainsChildAt(cur_bit) && children_[cur_bit]->IsAttribute(rhs) && - children_[cur_bit]->FindFdOrGeneralRecursive(lhs, rhs, lhs.find_next(cur_bit))) { - return true; - } - - return FindFdOrGeneralRecursive(lhs, rhs, lhs.find_next(cur_bit)); -} - -bool FDTreeVertex::RemoveRecursive(boost::dynamic_bitset<> const& lhs, size_t rhs, - size_t current_lhs_attr) { - if (current_lhs_attr == boost::dynamic_bitset<>::npos) { - RemoveFd(rhs); - RemoveAttribute(rhs); - return true; - } - - if (HasChildren() && ContainsChildAt(current_lhs_attr)) { - if (!children_[current_lhs_attr]->RemoveRecursive(lhs, rhs, - lhs.find_next(current_lhs_attr))) { - return false; - } - - if (!children_[current_lhs_attr]->GetAttributes().any()) { - children_[current_lhs_attr].reset(); - children_[current_lhs_attr] = nullptr; - } - } - - if (IsLastNodeOf(rhs)) { - contains_children_ = false; - RemoveAttribute(rhs); - return true; - } - return false; -} - -bool FDTreeVertex::IsLastNodeOf(size_t rhs) const noexcept { - if (!HasChildren()) { - return true; - } - return std::all_of(children_.cbegin(), children_.cend(), [rhs](auto const& child) { - return (child != nullptr) && child->IsAttribute(rhs); - }); -} - -std::shared_ptr FDTreeVertex::GetChildIfExists(size_t pos) const { - if (children_.empty()) { - return nullptr; - } - - assert(pos < children_.size()); - return children_[pos]; -} - -void FDTreeVertex::FillFDs(std::vector& fds, boost::dynamic_bitset<>& lhs) const { - for (size_t rhs = fds_.find_first(); rhs != boost::dynamic_bitset<>::npos; - rhs = fds_.find_next(rhs)) { - fds.emplace_back(lhs, rhs); - } - - if (!contains_children_) { - return; - } - - for (size_t i = 0; i < GetNumAttributes(); i++) { - if (!ContainsChildAt(i)) { - continue; - } - - lhs.set(i); - GetChild(i)->FillFDs(fds, lhs); - lhs.reset(i); - } -} - -} // namespace algos::hyfd::fd_tree diff --git a/src/core/algorithms/fd/hyfd/validator.cpp b/src/core/algorithms/fd/hyfd/validator.cpp index 54fbf23a4d..815ee0af22 100644 --- a/src/core/algorithms/fd/hyfd/validator.cpp +++ b/src/core/algorithms/fd/hyfd/validator.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -12,6 +13,7 @@ #include "algorithms/fd/hycommon/util/pli_util.h" #include "algorithms/fd/hycommon/validator_helpers.h" #include "hyfd_config.h" +#include "table/position_list_index.h" namespace { @@ -108,13 +110,13 @@ boost::dynamic_bitset<> Refine(algos::hy::IdPairs& comparison_suggestions, } size_t AddExtendedCandidatesFromInvalid(std::vector& next_level, - algos::hyfd::fd_tree::FDTree& fds_tree, + model::FDTree& fds_tree, std::vector const& invalid_fds, size_t num_attributes) { size_t candidates = 0; for (auto const& [lhs, rhs] : invalid_fds) { for (size_t attr = 0; attr < num_attributes; ++attr) { - if (lhs.test(attr) || rhs == attr || fds_tree.FindFdOrGeneral(lhs, attr) || + if (lhs.test(attr) || rhs == attr || fds_tree.ContainsFdOrGeneral(lhs, attr) || (fds_tree.GetRoot().HasChildren() && fds_tree.GetRoot().ContainsChildAt(attr) && fds_tree.GetRoot().GetChild(attr)->IsFd(rhs))) { continue; @@ -123,7 +125,7 @@ size_t AddExtendedCandidatesFromInvalid(std::vector& next_ boost::dynamic_bitset<> lhs_ext = lhs; lhs_ext.set(attr); - if (fds_tree.FindFdOrGeneral(lhs_ext, rhs)) { + if (fds_tree.ContainsFdOrGeneral(lhs_ext, rhs)) { continue; } diff --git a/src/core/algorithms/fd/hyfd/validator.h b/src/core/algorithms/fd/hyfd/validator.h index 4278a112eb..f4b359083b 100644 --- a/src/core/algorithms/fd/hyfd/validator.h +++ b/src/core/algorithms/fd/hyfd/validator.h @@ -5,20 +5,19 @@ #include #include "algorithms/fd/hycommon/primitive_validations.h" -#include "algorithms/fd/hyfd/model/fd_tree.h" #include "algorithms/fd/raw_fd.h" +#include "model/FDTrees/fd_tree.h" #include "model/table/position_list_index.h" #include "types.h" namespace algos::hyfd { -using LhsPair = fd_tree::LhsPair; +using LhsPair = model::LhsPair; class Validator { -private: using FDValidations = hy::PrimitiveValidations; - std::shared_ptr fds_; + std::shared_ptr fds_; hy::PLIsPtr plis_; hy::RowsPtr compressed_records_; @@ -38,7 +37,7 @@ class Validator { } public: - Validator(std::shared_ptr fds, hy::PLIsPtr plis, + Validator(std::shared_ptr fds, hy::PLIsPtr plis, hy::RowsPtr compressed_records) noexcept : fds_(std::move(fds)), plis_(std::move(plis)), diff --git a/src/core/algorithms/fd/mining_algorithms.h b/src/core/algorithms/fd/mining_algorithms.h index b12a806906..0b487ebc66 100644 --- a/src/core/algorithms/fd/mining_algorithms.h +++ b/src/core/algorithms/fd/mining_algorithms.h @@ -3,6 +3,7 @@ #include "algorithms/fd/aidfd/aid.h" #include "algorithms/fd/depminer/depminer.h" #include "algorithms/fd/dfd/dfd.h" +#include "algorithms/fd/dynfd/dynfd.h" #include "algorithms/fd/fastfds/fastfds.h" #include "algorithms/fd/fd_mine/fd_mine.h" #include "algorithms/fd/fdep/fdep.h" diff --git a/src/core/model/FDTrees/fd_tree.cpp b/src/core/model/FDTrees/fd_tree.cpp new file mode 100644 index 0000000000..61043f1fac --- /dev/null +++ b/src/core/model/FDTrees/fd_tree.cpp @@ -0,0 +1,8 @@ +#include "fd_tree.h" + +#include +#include + +#include + +namespace model {} // namespace model diff --git a/src/core/model/FDTrees/fd_tree.h b/src/core/model/FDTrees/fd_tree.h new file mode 100644 index 0000000000..25a472218c --- /dev/null +++ b/src/core/model/FDTrees/fd_tree.h @@ -0,0 +1,200 @@ +#pragma once + +#include +#include + +#include + +#include "algorithms/fd/raw_fd.h" +#include "model/FDTrees/fd_tree_vertex.h" + +namespace model { + +/** + * FD prefix tree. + * + * Provides global tree manipulation and traversing methods. + * + * @see FDTreeVertex + */ + +class FDTree { +private: + std::shared_ptr root_; + + using LhsPair = std::pair, boost::dynamic_bitset<>>; + +public: + explicit FDTree(size_t num_attributes) : root_(std::make_shared(num_attributes)) { + for (size_t id = 0; id < num_attributes; id++) { + root_->SetFd(id); + } + } + + [[nodiscard]] size_t GetNumAttributes() const noexcept { + return root_->GetNumAttributes(); + } + + std::shared_ptr GetRootPtr() noexcept { + return root_; + } + + [[nodiscard]] FDTreeVertex const& GetRoot() const noexcept { + return *root_; + } + + std::shared_ptr AddFD(boost::dynamic_bitset<> const& lhs, size_t rhs) { + FDTreeVertex* cur_node = root_.get(); + cur_node->SetAttribute(rhs); + + for (size_t bit = lhs.find_first(); bit != boost::dynamic_bitset<>::npos; + bit = lhs.find_next(bit)) { + bool is_new = cur_node->AddChild(bit); + + if (is_new && lhs.find_next(bit) == boost::dynamic_bitset<>::npos) { + auto added_node = cur_node->GetChildPtr(bit); + added_node->SetAttribute(rhs); + added_node->SetFd(rhs); + return added_node; + } + + cur_node = cur_node->GetChild(bit); + cur_node->SetAttribute(rhs); + } + cur_node->SetFd(rhs); + return nullptr; + } + + bool ContainsFD(boost::dynamic_bitset<> const& lhs, size_t rhs) { + FDTreeVertex const* cur_node = root_.get(); + + for (size_t bit = lhs.find_first(); bit != boost::dynamic_bitset<>::npos; + bit = lhs.find_next(bit)) { + if (!cur_node->HasChildren() || !cur_node->ContainsChildAt(bit)) { + return false; + } + + cur_node = cur_node->GetChild(bit); + } + + return cur_node->IsFd(rhs); + } + + /** + * Recursively finds node representing given lhs and removes given rhs bit from it. + * Destroys vertices whose children became empty. + */ + void Remove(boost::dynamic_bitset<> const& lhs, size_t rhs) { + root_->RemoveRecursive(lhs, rhs, lhs.find_first()); + } + + /** + * Gets LHSs of all FDs having at least given lhs and rhs. + */ + [[nodiscard]] std::vector> GetFdAndGenerals( + boost::dynamic_bitset<> const& lhs, size_t rhs) const { + assert(lhs.count() != 0); + + std::vector> result; + boost::dynamic_bitset const empty_lhs(GetNumAttributes()); + size_t const starting_bit = lhs.find_first(); + + root_->GetFdAndGeneralsRecursive(lhs, empty_lhs, rhs, starting_bit, result); + + return result; + } + + /** + * Gets LHSs of all FDs having a proper subset of giving lhs and rhs. + */ + std::vector> GetGenerals(boost::dynamic_bitset<> const& lhs, + size_t rhs) { + assert(lhs.count() != 0); + + std::vector> result; + boost::dynamic_bitset empty_lhs(GetNumAttributes()); + size_t const starting_bit = lhs.find_first(); + + root_->GetGeneralsRecursive(lhs, empty_lhs, rhs, starting_bit, result); + + return result; + } + + void RemoveGenerals(boost::dynamic_bitset<> const& lhs, size_t rhs) { + assert(lhs.count() != 0); + + boost::dynamic_bitset<> empty_lhs(GetNumAttributes()); + root_->RemoveGeneralsRecursive(lhs, empty_lhs, rhs, lhs.find_first()); + } + + std::vector> GetFdAndSpecials(boost::dynamic_bitset<> const& lhs, + size_t rhs) { + std::vector> result; + boost::dynamic_bitset empty_lhs(GetNumAttributes()); + + root_->GetFdAndSpecialsRecursive(lhs, empty_lhs, rhs, 0, result); + + return result; + } + + /** + * Gets LHSs of all FDs having given lhs as a proper subset and rhs. + */ + std::vector> GetSpecials(boost::dynamic_bitset<> const& lhs, + size_t rhs) { + std::vector> result; + boost::dynamic_bitset empty_lhs(GetNumAttributes()); + + root_->GetSpecialsRecursive(lhs, empty_lhs, rhs, 0, result); + + return result; + } + + void RemoveSpecials(boost::dynamic_bitset<> const& lhs, size_t rhs) { + boost::dynamic_bitset empty_lhs(GetNumAttributes()); + root_->RemoveSpecialsRecursive(lhs, empty_lhs, rhs, 0); + } + + /** + * Checks if any FD has at least given lhs and rhs. + */ + [[nodiscard]] bool ContainsFdOrGeneral(boost::dynamic_bitset<> const& lhs, size_t rhs) const { + return root_->ContainsFdOrGeneralRecursive(lhs, rhs, lhs.find_first()); + } + + [[nodiscard]] bool ContainsFdOrSpecial(boost::dynamic_bitset<> const& lhs, size_t rhs) const { + size_t next_after_last_lhs_set_bit = 0; + if (lhs.find_first() != boost::dynamic_bitset<>::npos) { + next_after_last_lhs_set_bit = lhs.find_first(); + while (lhs.find_next(next_after_last_lhs_set_bit) != boost::dynamic_bitset<>::npos) { + next_after_last_lhs_set_bit = lhs.find_next(next_after_last_lhs_set_bit); + } + ++next_after_last_lhs_set_bit; + } + + return root_->ContainsFdOrSpecialRecursive(lhs, rhs, next_after_last_lhs_set_bit, 0); + } + + /** + * Gets nodes representing FDs with LHS of given arity. + * @param target_level arity of returned FDs LHSs + */ + std::vector GetLevel(unsigned target_level) { + boost::dynamic_bitset const empty_lhs(GetNumAttributes()); + + std::vector vertices; + root_->GetLevelRecursive(target_level, 0, empty_lhs, vertices); + return vertices; + } + + /** + * @return vector of all FDs + */ + [[nodiscard]] std::vector FillFDs() const { + std::vector result; + boost::dynamic_bitset<> lhs_for_traverse(GetRoot().GetNumAttributes()); + GetRoot().FillFDs(result, lhs_for_traverse); + return result; + } +}; +} // namespace model diff --git a/src/core/model/FDTrees/fd_tree_vertex.cpp b/src/core/model/FDTrees/fd_tree_vertex.cpp new file mode 100644 index 0000000000..821539ccbb --- /dev/null +++ b/src/core/model/FDTrees/fd_tree_vertex.cpp @@ -0,0 +1,306 @@ +#include "fd_tree_vertex.h" + +#include +#include + +#include + +namespace model { + +void FDTreeVertex::GetLevelRecursive(unsigned target_level, unsigned cur_level, + boost::dynamic_bitset<> lhs, std::vector& vertices) { + if (cur_level == target_level) { + vertices.emplace_back(shared_from_this(), lhs); + return; + } + + if (!HasChildren()) { + return; + } + + for (size_t i = 0; i < num_attributes_; ++i) { + if (ContainsChildAt(i)) { + lhs.set(i); + + children_[i]->GetLevelRecursive(target_level, cur_level + 1, lhs, vertices); + + lhs.reset(i); + } + } +} + +void FDTreeVertex::GetGeneralsRecursive(boost::dynamic_bitset<> const& lhs, + boost::dynamic_bitset<>& cur_lhs, size_t rhs, + size_t cur_bit, + std::vector>& result) const { + // TODO: optimize checking via counting bits + if (IsFd(rhs) && lhs != cur_lhs) { + result.push_back(cur_lhs); + } + + if (!HasChildren()) { + return; + } + + for (; cur_bit != boost::dynamic_bitset<>::npos; cur_bit = lhs.find_next(cur_bit)) { + if (ContainsChildAt(cur_bit) && children_[cur_bit]->IsAttribute(rhs)) { + cur_lhs.set(cur_bit); + children_[cur_bit]->GetGeneralsRecursive(lhs, cur_lhs, rhs, lhs.find_next(cur_bit), + result); + cur_lhs.reset(cur_bit); + } + } +} + +void FDTreeVertex::GetFdAndGeneralsRecursive(boost::dynamic_bitset<> const& lhs, + boost::dynamic_bitset<> cur_lhs, size_t rhs, + size_t cur_bit, + std::vector>& result) const { + if (IsFd(rhs)) { + result.push_back(cur_lhs); + } + + if (!HasChildren()) { + return; + } + + for (; cur_bit != boost::dynamic_bitset<>::npos; cur_bit = lhs.find_next(cur_bit)) { + if (ContainsChildAt(cur_bit) && children_[cur_bit]->IsAttribute(rhs)) { + cur_lhs.set(cur_bit); + children_[cur_bit]->GetFdAndGeneralsRecursive(lhs, cur_lhs, rhs, lhs.find_next(cur_bit), + result); + cur_lhs.reset(cur_bit); + } + } +} + +void FDTreeVertex::GetSpecialsRecursive(boost::dynamic_bitset<> const& lhs, + boost::dynamic_bitset<>& cur_lhs, size_t rhs, + size_t cur_bit, + std::vector>& result) const { + // TODO: optimize checking via counting bits + if (IsFd(rhs) && lhs.is_proper_subset_of(cur_lhs)) { + result.push_back(cur_lhs); + } + + if (!HasChildren()) { + return; + } + + size_t next_lhs_bit = lhs.test(cur_bit) ? cur_bit : lhs.find_next(cur_bit); + + for (; cur_bit != num_attributes_ && + (next_lhs_bit == boost::dynamic_bitset<>::npos || cur_bit != next_lhs_bit + 1); + ++cur_bit) { + if (ContainsChildAt(cur_bit) && children_[cur_bit]->IsAttribute(rhs)) { + cur_lhs.set(cur_bit); + children_[cur_bit]->GetSpecialsRecursive(lhs, cur_lhs, rhs, cur_bit + 1, result); + cur_lhs.reset(cur_bit); + } + } +} + +void FDTreeVertex::GetFdAndSpecialsRecursive(boost::dynamic_bitset<> const& lhs, + boost::dynamic_bitset<>& cur_lhs, size_t rhs, + size_t cur_bit, + std::vector>& result) const { + // TODO: optimize checking via counting bits + if (IsFd(rhs) && lhs.is_subset_of(cur_lhs)) { + result.push_back(cur_lhs); + } + + if (!HasChildren()) { + return; + } + + size_t next_lhs_bit = lhs.test(cur_bit) ? cur_bit : lhs.find_next(cur_bit); + + for (; cur_bit != num_attributes_ && + (next_lhs_bit == boost::dynamic_bitset<>::npos || cur_bit != next_lhs_bit + 1); + ++cur_bit) { + if (ContainsChildAt(cur_bit) && children_[cur_bit]->IsAttribute(rhs)) { + cur_lhs.set(cur_bit); + children_[cur_bit]->GetSpecialsRecursive(lhs, cur_lhs, rhs, cur_bit + 1, result); + cur_lhs.reset(cur_bit); + } + } +} + +bool FDTreeVertex::ContainsFdOrGeneralRecursive(boost::dynamic_bitset<> const& lhs, size_t rhs, + size_t cur_bit) const { + if (IsFd(rhs)) { + return true; + } + + if (cur_bit == boost::dynamic_bitset<>::npos) { + return false; + } + + if (HasChildren() && ContainsChildAt(cur_bit) && children_[cur_bit]->IsAttribute(rhs) && + children_[cur_bit]->ContainsFdOrGeneralRecursive(lhs, rhs, lhs.find_next(cur_bit))) { + return true; + } + + return ContainsFdOrGeneralRecursive(lhs, rhs, lhs.find_next(cur_bit)); +} + +bool FDTreeVertex::ContainsFdOrSpecialRecursive(boost::dynamic_bitset<> const& lhs, size_t rhs, + size_t next_after_last_lhs_set_bit, + size_t cur_bit) const { + if (IsFd(rhs) && cur_bit >= next_after_last_lhs_set_bit) { + return true; + } + + if (cur_bit == num_attributes_) { + return false; + } + + if (HasChildren() && ContainsChildAt(cur_bit) && children_[cur_bit]->IsAttribute(rhs) && + children_[cur_bit]->ContainsFdOrSpecialRecursive(lhs, rhs, next_after_last_lhs_set_bit, + cur_bit + 1)) { + return true; + } + + if (lhs.test(cur_bit)) { + return false; + } + + return ContainsFdOrSpecialRecursive(lhs, rhs, next_after_last_lhs_set_bit, cur_bit + 1); +} + +bool FDTreeVertex::RemoveRecursive(boost::dynamic_bitset<> const& lhs, size_t rhs, + size_t current_lhs_attr) { + if (current_lhs_attr == boost::dynamic_bitset<>::npos) { + RemoveFd(rhs); + RemoveAttribute(rhs); + return true; + } + + if (HasChildren() && ContainsChildAt(current_lhs_attr)) { + if (!children_[current_lhs_attr]->RemoveRecursive(lhs, rhs, + lhs.find_next(current_lhs_attr))) { + return false; + } + + if (!children_[current_lhs_attr]->GetAttributes().any()) { + children_[current_lhs_attr].reset(); + children_[current_lhs_attr] = nullptr; + } + } + + if (IsLastNodeOf(rhs)) { + contains_children_ = false; + RemoveAttribute(rhs); + return true; + } + return false; +} + +void FDTreeVertex::RemoveGeneralsRecursive(boost::dynamic_bitset<> const& lhs, + boost::dynamic_bitset<> cur_lhs, size_t rhs, + size_t cur_bit) { + // TODO: optimize checking via counting bits + if (IsFd(rhs) && lhs != cur_lhs) { + RemoveFd(rhs); + RemoveAttribute(rhs); + } + + if (!HasChildren()) { + return; + } + + for (; cur_bit != boost::dynamic_bitset<>::npos; cur_bit = lhs.find_next(cur_bit)) { + if (ContainsChildAt(cur_bit) && children_[cur_bit]->IsAttribute(rhs)) { + cur_lhs.set(cur_bit); + children_[cur_bit]->RemoveGeneralsRecursive(lhs, cur_lhs, rhs, lhs.find_next(cur_bit)); + cur_lhs.reset(cur_bit); + + if (!children_[cur_bit]->GetAttributes().any()) { + children_[cur_bit].reset(); + children_[cur_bit] = nullptr; + } + } + } + + if (IsLastNodeOf(rhs)) { + contains_children_ = false; + RemoveAttribute(rhs); + } +} + +void FDTreeVertex::RemoveSpecialsRecursive(boost::dynamic_bitset<> const& lhs, + boost::dynamic_bitset<> cur_lhs, size_t rhs, + size_t cur_bit) { + // TODO: optimize checking via counting bits + if (IsFd(rhs) && lhs.is_subset_of(cur_lhs)) { + RemoveFd(rhs); + RemoveAttribute(rhs); + } + + if (!HasChildren()) { + return; + } + + size_t next_lhs_bit = lhs.test(cur_bit) ? cur_bit : lhs.find_next(cur_bit); + + for (; cur_bit != num_attributes_ && + (next_lhs_bit == boost::dynamic_bitset<>::npos || cur_bit != next_lhs_bit + 1); + ++cur_bit) { + if (ContainsChildAt(cur_bit) && children_[cur_bit]->IsAttribute(rhs)) { + cur_lhs.set(cur_bit); + children_[cur_bit]->RemoveSpecialsRecursive(lhs, cur_lhs, rhs, cur_bit + 1); + cur_lhs.reset(cur_bit); + + if (!children_[cur_bit]->GetAttributes().any()) { + children_[cur_bit].reset(); + children_[cur_bit] = nullptr; + } + } + } + + if (IsLastNodeOf(rhs)) { + contains_children_ = false; + RemoveAttribute(rhs); + } +} + +bool FDTreeVertex::IsLastNodeOf(size_t rhs) const noexcept { + if (!HasChildren()) { + return true; + } + return std::all_of(children_.cbegin(), children_.cend(), [rhs](auto const& child) { + return (child != nullptr) && child->IsAttribute(rhs); + }); +} + +std::shared_ptr FDTreeVertex::GetChildIfExists(size_t pos) const { + if (children_.empty()) { + return nullptr; + } + + assert(pos < children_.size()); + return children_[pos]; +} + +void FDTreeVertex::FillFDs(std::vector& fds, boost::dynamic_bitset<>& lhs) const { + for (size_t rhs = fds_.find_first(); rhs != boost::dynamic_bitset<>::npos; + rhs = fds_.find_next(rhs)) { + fds.emplace_back(lhs, rhs); + } + + if (!contains_children_) { + return; + } + + for (size_t i = 0; i < GetNumAttributes(); i++) { + if (!ContainsChildAt(i)) { + continue; + } + + lhs.set(i); + GetChild(i)->FillFDs(fds, lhs); + lhs.reset(i); + } +} + +} // namespace model diff --git a/src/core/algorithms/fd/hyfd/model/fd_tree_vertex.h b/src/core/model/FDTrees/fd_tree_vertex.h similarity index 71% rename from src/core/algorithms/fd/hyfd/model/fd_tree_vertex.h rename to src/core/model/FDTrees/fd_tree_vertex.h index 22b6725d73..9b36d5abdb 100644 --- a/src/core/algorithms/fd/hyfd/model/fd_tree_vertex.h +++ b/src/core/model/FDTrees/fd_tree_vertex.h @@ -8,13 +8,10 @@ #include "algorithms/fd/raw_fd.h" -namespace algos::hyfd::fd_tree { +namespace model { class FDTreeVertex; -/** - * Pair of pointer ot FD tree node and the corresponding LHS. - */ using LhsPair = std::pair, boost::dynamic_bitset<>>; /** @@ -27,6 +24,10 @@ using LhsPair = std::pair, boost::dynamic_bitset<> * RHS of the FD is represented by the fds attribute of the node. */ class FDTreeVertex : public std::enable_shared_from_this { + /** + * Pair of pointer ot FD tree node and the corresponding LHS. + */ + private: std::vector> children_; boost::dynamic_bitset<> fds_; @@ -95,15 +96,37 @@ class FDTreeVertex : public std::enable_shared_from_this { void GetLevelRecursive(unsigned target_level, unsigned cur_level, boost::dynamic_bitset<> lhs, std::vector& vertices); + void GetGeneralsRecursive(boost::dynamic_bitset<> const& lhs, boost::dynamic_bitset<>& cur_lhs, + size_t rhs, size_t cur_bit, + std::vector>& result) const; + void GetFdAndGeneralsRecursive(boost::dynamic_bitset<> const& lhs, boost::dynamic_bitset<> cur_lhs, size_t rhs, size_t cur_bit, std::vector>& result) const; - bool FindFdOrGeneralRecursive(boost::dynamic_bitset<> const& lhs, size_t rhs, - size_t cur_bit) const; + void GetSpecialsRecursive(boost::dynamic_bitset<> const& lhs, boost::dynamic_bitset<>& cur_lhs, + size_t rhs, size_t cur_bit, + std::vector>& result) const; + + void GetFdAndSpecialsRecursive(boost::dynamic_bitset<> const& lhs, + boost::dynamic_bitset<>& cur_lhs, size_t rhs, size_t cur_bit, + std::vector>& result) const; + + bool ContainsFdOrGeneralRecursive(boost::dynamic_bitset<> const& lhs, size_t rhs, + size_t cur_bit) const; + + // TODO: make shorter the name of the 3rd parameter + bool ContainsFdOrSpecialRecursive(boost::dynamic_bitset<> const& lhs, size_t rhs, + size_t next_after_last_lhs_set_bit, size_t cur_bit) const; bool RemoveRecursive(boost::dynamic_bitset<> const& lhs, size_t rhs, size_t current_lhs_attr); + void RemoveGeneralsRecursive(boost::dynamic_bitset<> const& lhs, + boost::dynamic_bitset<> cur_lhs, size_t rhs, size_t cur_bit); + + void RemoveSpecialsRecursive(boost::dynamic_bitset<> const& lhs, + boost::dynamic_bitset<> cur_lhs, size_t rhs, size_t cur_bit); + bool IsLastNodeOf(size_t rhs) const noexcept; void FillFDs(std::vector& fds, boost::dynamic_bitset<>& lhs) const; @@ -155,4 +178,4 @@ class FDTreeVertex : public std::enable_shared_from_this { } }; -} // namespace algos::hyfd::fd_tree +} // namespace model diff --git a/src/core/model/table/vertical_map.cpp b/src/core/model/table/vertical_map.cpp index 1e57a1ac6a..ff81142bbb 100644 --- a/src/core/model/table/vertical_map.cpp +++ b/src/core/model/table/vertical_map.cpp @@ -1,6 +1,7 @@ #include "vertical_map.h" #include +#include #include #include diff --git a/src/python_bindings/fd/bind_fd.cpp b/src/python_bindings/fd/bind_fd.cpp index 609f7eb8d8..79c25ecf5d 100644 --- a/src/python_bindings/fd/bind_fd.cpp +++ b/src/python_bindings/fd/bind_fd.cpp @@ -54,12 +54,12 @@ void BindFd(py::module_& main_module) { static constexpr auto kPyroName = "Pyro"; static constexpr auto kTaneName = "Tane"; static constexpr auto kPFDTaneName = "PFDTane"; - auto fd_algos_module = - BindPrimitive(fd_module, py::overload_cast<>(&FDAlgorithm::FdList, py::const_), - "FdAlgorithm", "get_fds", - {"HyFD", "Aid", "Depminer", "DFD", "FastFDs", "FDep", "FdMine", - "FUN", kPyroName, kTaneName, kPFDTaneName}); + auto fd_algos_module = BindPrimitive( + fd_module, py::overload_cast<>(&FDAlgorithm::FdList, py::const_), "FdAlgorithm", + "get_fds", + {"HyFD", "Aid", "Depminer", "DFD", "DynFD", "Fast FDs", "FDep", "FdMine", "FUN", + kPyroName, kTaneName, kPFDTaneName}); auto define_submodule = [&fd_algos_module, &main_module](char const* name, std::vector algorithms) { diff --git a/src/tests/all_csv_configs.cpp b/src/tests/all_csv_configs.cpp index 869a7a6ed3..85025dc727 100644 --- a/src/tests/all_csv_configs.cpp +++ b/src/tests/all_csv_configs.cpp @@ -101,6 +101,18 @@ CSVConfig const kTestDif1 = CreateCsvConfig("dif_tables/TestDif1.csv", ',', true CSVConfig const kTestDif2 = CreateCsvConfig("dif_tables/TestDif2.csv", ',', true); CSVConfig const kTestDif3 = CreateCsvConfig("dif_tables/TestDif3.csv", ',', true); CSVConfig const kSimpleTypes1 = CreateCsvConfig("SimpleTypes1.csv", ',', true); +CSVConfig const kTestDynamicFDAfterAll = + CreateCsvConfig("dynamic_fd/TestDynamicAfterAll.csv", ',', true); +CSVConfig const kTestDynamicFDAfterDelete = + CreateCsvConfig("dynamic_fd/TestDynamicAfterDelete.csv", ',', true); +CSVConfig const kTestDynamicFDAfterInsert = + CreateCsvConfig("dynamic_fd/TestDynamicAfterInsert.csv", ',', true); +CSVConfig const kTestDynamicFDAfterInsertAndDelete = + CreateCsvConfig("dynamic_fd/TestDynamicAfterInsertAndDelete.csv", ',', true); +CSVConfig const kTestDynamicFDAfterInsertAndUpdate = + CreateCsvConfig("dynamic_fd/TestDynamicAfterInsertAndUpdate.csv", ',', true); +CSVConfig const kTestDynamicFDAfterUpdateAndDelete = + CreateCsvConfig("dynamic_fd/TestDynamicAfterUpdateAndDelete.csv", ',', true); CSVConfig const kTestDynamicFDInit = CreateCsvConfig("dynamic_fd/TestDynamicInit.csv", ',', true); CSVConfig const kTestDynamicFDEmpty = CreateCsvConfig("dynamic_fd/TestDynamicEmpty.csv", ',', true); CSVConfig const kTestDynamicFDInsert = diff --git a/src/tests/all_csv_configs.h b/src/tests/all_csv_configs.h index 40e7c25e86..63640b3956 100644 --- a/src/tests/all_csv_configs.h +++ b/src/tests/all_csv_configs.h @@ -89,6 +89,12 @@ extern CSVConfig const kTestDif1; extern CSVConfig const kTestDif2; extern CSVConfig const kTestDif3; extern CSVConfig const kSimpleTypes1; +extern CSVConfig const kTestDynamicFDAfterAll; +extern CSVConfig const kTestDynamicFDAfterDelete; +extern CSVConfig const kTestDynamicFDAfterInsert; +extern CSVConfig const kTestDynamicFDAfterInsertAndDelete; +extern CSVConfig const kTestDynamicFDAfterInsertAndUpdate; +extern CSVConfig const kTestDynamicFDAfterUpdateAndDelete; extern CSVConfig const kTestDynamicFDInit; extern CSVConfig const kTestDynamicFDEmpty; extern CSVConfig const kTestDynamicFDInsert; diff --git a/src/tests/test_dynamic_fd_algorithm.cpp b/src/tests/test_dynamic_fd_algorithm.cpp new file mode 100644 index 0000000000..b0326c61dc --- /dev/null +++ b/src/tests/test_dynamic_fd_algorithm.cpp @@ -0,0 +1,86 @@ +#include + +#include +#include + +#include "algorithms/algo_factory.h" +#include "all_csv_configs.h" +#include "config/exceptions.h" +#include "config/indices/type.h" +#include "config/names.h" +#include "csv_config_util.h" +#include "fd/dynfd/dynfd.h" +#include "test_fd_util.h" + +namespace tests { +namespace onam = config::names; + +struct DynFDAlgorithmParams { + algos::StdParamsMap params_; + CSVConfig result_config_; + + explicit DynFDAlgorithmParams(CSVConfig const& insert_config = {}, + CSVConfig const& update_config = {}, + std::unordered_set delete_config = {}, + CSVConfig const& csv_config = kTestDynamicFDEmpty, + CSVConfig const& result_config = {}) + : params_({{onam::kCsvConfig, csv_config}}) { + if (!IsEmpty(insert_config)) { + params_[onam::kInsertStatements] = MakeInputTable(insert_config); + } + if (!IsEmpty(update_config)) { + params_[onam::kUpdateStatements] = MakeInputTable(update_config); + } + if (!delete_config.empty()) { + params_[onam::kDeleteStatements] = std::move(delete_config); + } + result_config_ = result_config; + params_[config::names::kMaximumLhs] = std::numeric_limits::max(); + } + + static bool IsEmpty(CSVConfig const& config) { + static CSVConfig const kEmpty{}; + return config.has_header == kEmpty.has_header && config.path == kEmpty.path && + config.separator == kEmpty.separator; + } +}; + +// clang-format on + +class TestDynFDAlgorithmModify : public ::testing::TestWithParam {}; + +TEST_P(TestDynFDAlgorithmModify, ModifyingTest) { + auto const& p = GetParam(); + auto const mp = algos::StdParamsMap(p.params_); + auto const algorithm = algos::CreateAndLoadAlgorithm(mp); + + algorithm->Execute(); + auto dynfd_list = algorithm->FdList(); + + auto const verification_algo_params = algos::StdParamsMap{ + {config::names::kCsvConfig, p.result_config_}, + {config::names::kMaximumLhs, std::numeric_limits::max()}}; + auto const verification_algorithm = + algos::CreateAndLoadAlgorithm(verification_algo_params); + + verification_algorithm->Execute(); + auto hyfd_list = verification_algorithm->FdList(); + + ASSERT_EQ(FDsToSet(dynfd_list), FDsToSet(hyfd_list)); +} + +// clang-format off +INSTANTIATE_TEST_SUITE_P( + DynamicFDAlgorihmTestSuite, TestDynFDAlgorithmModify, + ::testing::Values( + DynFDAlgorithmParams(kTestDynamicFDInsert, {}, {}, kTestDynamicFDEmpty, kTestDynamicFDInsert), + DynFDAlgorithmParams(kTestDynamicFDInsert, {}, {}, kTestDynamicFDInit, kTestDynamicFDAfterInsert), + DynFDAlgorithmParams({}, {}, {1, 6, 3}, kTestDynamicFDInit, kTestDynamicFDAfterDelete), + DynFDAlgorithmParams(kTestDynamicFDInsert, kTestDynamicFDUpdate, {}, kTestDynamicFDInit, kTestDynamicFDAfterInsertAndUpdate), + DynFDAlgorithmParams({}, kTestDynamicFDUpdate, {1, 6, 3}, kTestDynamicFDInit, kTestDynamicFDAfterUpdateAndDelete), + DynFDAlgorithmParams(kTestDynamicFDInsert, kTestDynamicFDUpdate, {1, 6, 3}, kTestDynamicFDInit, kTestDynamicFDAfterAll) + )); + +// clang-format onÏ + +} // namespace tests diff --git a/src/tests/test_fd_algorithm.cpp b/src/tests/test_fd_algorithm.cpp index 091cb1462f..4c0c3b4b8c 100644 --- a/src/tests/test_fd_algorithm.cpp +++ b/src/tests/test_fd_algorithm.cpp @@ -5,6 +5,7 @@ #include "algorithms/fd/depminer/depminer.h" #include "algorithms/fd/dfd/dfd.h" +#include "algorithms/fd/dynfd/dynfd.h" #include "algorithms/fd/fastfds/fastfds.h" #include "algorithms/fd/fdep/fdep.h" #include "algorithms/fd/fun/fun.h" @@ -34,15 +35,6 @@ namespace tests { * 2. in CreateAlgorithmInstance replace "Tane" with * */ -std::vector BitsetToIndexVector(boost::dynamic_bitset<> const& bitset) { - std::vector res; - for (size_t index = bitset.find_first(); index != boost::dynamic_bitset<>::npos; - index = bitset.find_next(index)) { - res.push_back(index); - } - return res; -} - testing::AssertionResult CheckFdListEquality( std::set, unsigned int>> actual, std::list const& expected) { @@ -63,15 +55,6 @@ testing::AssertionResult CheckFdListEquality( : testing::AssertionFailure() << "some FDs remain undiscovered"; } -std::set, unsigned int>> FDsToSet(std::list const& fds) { - std::set, unsigned int>> set; - for (auto const& fd : fds) { - auto const& raw_fd = fd.ToRawFD(); - set.emplace(BitsetToIndexVector(raw_fd.lhs_), raw_fd.rhs_); - } - return set; -} - TYPED_TEST_SUITE_P(AlgorithmTest); TYPED_TEST_P(AlgorithmTest, ThrowsOnEmpty) { @@ -156,9 +139,9 @@ REGISTER_TYPED_TEST_SUITE_P(AlgorithmTest, ThrowsOnEmpty, ReturnsEmptyOnSingleNo HeavyDatasetsConsistentHash, ConsistentRepeatedExecution, MaxLHSOptionWork); -using Algorithms = - ::testing::Types; +using Algorithms = ::testing::Types; INSTANTIATE_TYPED_TEST_SUITE_P(AlgorithmTest, AlgorithmTest, Algorithms); } // namespace tests diff --git a/src/tests/test_fd_util.h b/src/tests/test_fd_util.h index 0ec5fbf73b..4281f226a8 100644 --- a/src/tests/test_fd_util.h +++ b/src/tests/test_fd_util.h @@ -77,4 +77,24 @@ class AlgorithmTest : public ::testing::Test { {tests::kIowa1kk, 28573}, {tests::kLegacyPayors, 43612}}}; }; + +inline std::vector BitsetToIndexVector(boost::dynamic_bitset<> const& bitset) { + std::vector res; + for (size_t index = bitset.find_first(); index != boost::dynamic_bitset<>::npos; + index = bitset.find_next(index)) { + res.push_back(index); + } + return res; +} + +inline std::set, unsigned int>> FDsToSet( + std::list const& fds) { + std::set, unsigned int>> set; + for (auto const& fd : fds) { + auto const& raw_fd = fd.ToRawFD(); + set.emplace(BitsetToIndexVector(raw_fd.lhs_), raw_fd.rhs_); + } + return set; +} + } // namespace tests diff --git a/test_input_data/dynamic_fd/TestDynamicAfterAll.csv b/test_input_data/dynamic_fd/TestDynamicAfterAll.csv new file mode 100644 index 0000000000..c7c6cf5aa2 --- /dev/null +++ b/test_input_data/dynamic_fd/TestDynamicAfterAll.csv @@ -0,0 +1,13 @@ +Col0,Col1,Col2,Col3,Col4,Col5 +2,1,1,999,-,10 +1,1,2.1,0,8,3 +1,2,2,hjkl,444,5 +1,2,,hjkl,444,5 +1,3,3,0,9,6 +1,3,NULL,999,-,7 +1,4,4,hjkl,555,8 +1,4,4,hjkl,555,8 +1,4,abc,hjkl,555,9 +1,5,5,mnop,10,10 +1,5,5,mnop,10,11 +1,5,666,hjkl,666,11 diff --git a/test_input_data/dynamic_fd/TestDynamicAfterDelete.csv b/test_input_data/dynamic_fd/TestDynamicAfterDelete.csv new file mode 100644 index 0000000000..597a694bab --- /dev/null +++ b/test_input_data/dynamic_fd/TestDynamicAfterDelete.csv @@ -0,0 +1,10 @@ +Col0,Col1,Col2,Col3,Col4,Col5 +1,1,1,1.233,-,1 +1,1,2.1,0,8,3 +1,2,2,hjkl,444,4 +1,2,,hjkl,444,5 +1,3,3,0,9,6 +1,3,NULL,999,-,7 +1,4,4,hjkl,555,8 +1,4,4,hjkl,555,8 +1,4,abc,hjkl,555,9 diff --git a/test_input_data/dynamic_fd/TestDynamicAfterInsert.csv b/test_input_data/dynamic_fd/TestDynamicAfterInsert.csv new file mode 100644 index 0000000000..ac6a62dbda --- /dev/null +++ b/test_input_data/dynamic_fd/TestDynamicAfterInsert.csv @@ -0,0 +1,16 @@ +Col0,Col1,Col2,Col3,Col4,Col5 +1,1,1,1.233,-,1 +1,1,1,0,8,2 +1,1,2.1,0,8,3 +1,2,2,hjkl,444,4 +1,2,2,hjkl,444,4 +1,2,,hjkl,444,5 +1,3,3,0,9,6 +1,3,3,0,9,6 +1,3,NULL,999,-,7 +1,4,4,hjkl,555,8 +1,4,4,hjkl,555,8 +1,4,abc,hjkl,555,9 +1,5,5,mnop,10,10 +1,5,5,mnop,10,11 +1,5,666,hjkl,666,11 diff --git a/test_input_data/dynamic_fd/TestDynamicAfterInsertAndDelete.csv b/test_input_data/dynamic_fd/TestDynamicAfterInsertAndDelete.csv new file mode 100644 index 0000000000..8c8e75b6b4 --- /dev/null +++ b/test_input_data/dynamic_fd/TestDynamicAfterInsertAndDelete.csv @@ -0,0 +1,13 @@ +Col0,Col1,Col2,Col3,Col4,Col5 +1,1,1,1.233,-,1 +1,1,2.1,0,8,3 +1,2,2,hjkl,444,4 +1,2,,hjkl,444,5 +1,3,3,0,9,6 +1,3,NULL,999,-,7 +1,4,4,hjkl,555,8 +1,4,4,hjkl,555,8 +1,4,abc,hjkl,555,9 +1,5,5,mnop,10,10 +1,5,5,mnop,10,11 +1,5,666,hjkl,666,11 diff --git a/test_input_data/dynamic_fd/TestDynamicAfterInsertAndUpdate.csv b/test_input_data/dynamic_fd/TestDynamicAfterInsertAndUpdate.csv new file mode 100644 index 0000000000..3f980618eb --- /dev/null +++ b/test_input_data/dynamic_fd/TestDynamicAfterInsertAndUpdate.csv @@ -0,0 +1,16 @@ +Col0,Col1,Col2,Col3,Col4,Col5 +2,1,1,999,-,10 +1,1,1,0,8,2 +1,1,2.1,0,8,3 +1,2,2,hjkl,444,4 +1,2,2,hjkl,444,5 +1,2,,hjkl,444,5 +1,3,3,0,9,6 +1,3,3,0,9,6 +1,3,NULL,999,-,7 +1,4,4,hjkl,555,8 +1,4,4,hjkl,555,8 +1,4,abc,hjkl,555,9 +1,5,5,mnop,10,10 +1,5,5,mnop,10,11 +1,5,666,hjkl,666,11 diff --git a/test_input_data/dynamic_fd/TestDynamicAfterUpdateAndDelete.csv b/test_input_data/dynamic_fd/TestDynamicAfterUpdateAndDelete.csv new file mode 100644 index 0000000000..2df8eef971 --- /dev/null +++ b/test_input_data/dynamic_fd/TestDynamicAfterUpdateAndDelete.csv @@ -0,0 +1,10 @@ +Col0,Col1,Col2,Col3,Col4,Col5 +2,1,1,999,-,10 +1,1,2.1,0,8,3 +1,2,2,hjkl,444,5 +1,2,,hjkl,444,5 +1,3,3,0,9,6 +1,3,NULL,999,-,7 +1,4,4,hjkl,555,8 +1,4,4,hjkl,555,8 +1,4,abc,hjkl,555,9