From 22aee43e477f99d71195370e25399ffd5cf9d8fa Mon Sep 17 00:00:00 2001 From: DuckDB Labs GitHub Bot Date: Sat, 7 Sep 2024 00:31:14 +0000 Subject: [PATCH] Update vendored DuckDB sources to ebcae2f2 --- src/duckdb/extension/icu/icu-datefunc.cpp | 1 + src/duckdb/src/common/cgroups.cpp | 25 +++++++++++++++++++ .../tree_renderer/text_tree_renderer.cpp | 14 +++++++---- .../scalar/array/array_functions.cpp | 3 ++- src/duckdb/src/execution/index/art/art.cpp | 2 +- .../src/execution/index/art/iterator.cpp | 15 ++++++----- .../scanner/column_count_scanner.cpp | 2 +- .../csv_scanner/sniffer/dialect_detection.cpp | 2 +- .../csv_scanner/sniffer/type_detection.cpp | 19 ++++++++++++++ .../operator/csv_scanner/util/csv_error.cpp | 4 +-- .../function/table/version/pragma_version.cpp | 6 ++--- .../duckdb/execution/index/art/iterator.hpp | 7 +++++- .../operator/csv_scanner/csv_error.hpp | 2 +- .../csv_scanner/csv_state_machine.hpp | 4 +++ .../duckdb/planner/logical_operator.hpp | 1 + src/duckdb/src/planner/logical_operator.cpp | 17 +++++++++---- .../planner/operator/logical_aggregate.cpp | 1 + .../src/planner/operator/logical_any_join.cpp | 1 + .../operator/logical_comparison_join.cpp | 2 ++ .../src/planner/operator/logical_cteref.cpp | 1 + .../src/planner/operator/logical_distinct.cpp | 1 + .../src/planner/operator/logical_get.cpp | 1 + .../operator/logical_materialized_cte.cpp | 1 + .../src/planner/operator/logical_order.cpp | 1 + src/duckdb/src/storage/table/column_data.cpp | 8 ++++-- .../storage/table/row_group_collection.cpp | 4 +++ 26 files changed, 114 insertions(+), 31 deletions(-) diff --git a/src/duckdb/extension/icu/icu-datefunc.cpp b/src/duckdb/extension/icu/icu-datefunc.cpp index fd2f269c..b0202f8d 100644 --- a/src/duckdb/extension/icu/icu-datefunc.cpp +++ b/src/duckdb/extension/icu/icu-datefunc.cpp @@ -74,6 +74,7 @@ unique_ptr ICUDateFunc::Bind(ClientContext &context, ScalarFunctio void ICUDateFunc::SetTimeZone(icu::Calendar *calendar, const string_t &tz_id) { auto tz = icu_66::TimeZone::createTimeZone(icu::UnicodeString::fromUTF8(icu::StringPiece(tz_id.GetString()))); if (*tz == icu::TimeZone::getUnknown()) { + delete tz; throw NotImplementedException("Unknown TimeZone '%s'", tz_id.GetString()); } calendar->adoptTimeZone(tz); diff --git a/src/duckdb/src/common/cgroups.cpp b/src/duckdb/src/common/cgroups.cpp index e565e473..b9d2b820 100644 --- a/src/duckdb/src/common/cgroups.cpp +++ b/src/duckdb/src/common/cgroups.cpp @@ -22,6 +22,9 @@ optional_idx CGroups::GetMemoryLimit(FileSystem &fs) { } optional_idx CGroups::GetCGroupV2MemoryLimit(FileSystem &fs) { +#ifdef DUCKDB_WASM + return optional_idx(); +#else const char *cgroup_self = "/proc/self/cgroup"; const char *memory_max = "/sys/fs/cgroup/%s/memory.max"; @@ -42,9 +45,13 @@ optional_idx CGroups::GetCGroupV2MemoryLimit(FileSystem &fs) { } return ReadCGroupValue(fs, memory_max_path); +#endif } optional_idx CGroups::GetCGroupV1MemoryLimit(FileSystem &fs) { +#ifdef DUCKDB_WASM + return optional_idx(); +#else const char *cgroup_self = "/proc/self/cgroup"; const char *memory_limit = "/sys/fs/cgroup/memory/%s/memory.limit_in_bytes"; @@ -65,9 +72,13 @@ optional_idx CGroups::GetCGroupV1MemoryLimit(FileSystem &fs) { } return ReadCGroupValue(fs, memory_limit_path); +#endif } string CGroups::ReadCGroupPath(FileSystem &fs, const char *cgroup_file) { +#ifdef DUCKDB_WASM + return ""; +#else auto handle = fs.OpenFile(cgroup_file, FileFlags::FILE_FLAGS_READ); char buffer[1024]; auto bytes_read = fs.Read(*handle, buffer, sizeof(buffer) - 1); @@ -81,9 +92,13 @@ string CGroups::ReadCGroupPath(FileSystem &fs, const char *cgroup_file) { } return ""; +#endif } string CGroups::ReadMemoryCGroupPath(FileSystem &fs, const char *cgroup_file) { +#ifdef DUCKDB_WASM + return ""; +#else auto handle = fs.OpenFile(cgroup_file, FileFlags::FILE_FLAGS_READ); char buffer[1024]; auto bytes_read = fs.Read(*handle, buffer, sizeof(buffer) - 1); @@ -102,9 +117,13 @@ string CGroups::ReadMemoryCGroupPath(FileSystem &fs, const char *cgroup_file) { } return ""; +#endif } optional_idx CGroups::ReadCGroupValue(FileSystem &fs, const char *file_path) { +#ifdef DUCKDB_WASM + return optional_idx(); +#else auto handle = fs.OpenFile(file_path, FileFlags::FILE_FLAGS_READ); char buffer[100]; auto bytes_read = fs.Read(*handle, buffer, 99); @@ -115,9 +134,14 @@ optional_idx CGroups::ReadCGroupValue(FileSystem &fs, const char *file_path) { return optional_idx(value); } return optional_idx(); +#endif } idx_t CGroups::GetCPULimit(FileSystem &fs, idx_t physical_cores) { +#ifdef DUCKDB_WASM + return physical_cores; +#else + static constexpr const char *cpu_max = "/sys/fs/cgroup/cpu.max"; static constexpr const char *cfs_quota = "/sys/fs/cgroup/cpu/cpu.cfs_quota_us"; static constexpr const char *cfs_period = "/sys/fs/cgroup/cpu/cpu.cfs_period_us"; @@ -159,6 +183,7 @@ idx_t CGroups::GetCPULimit(FileSystem &fs, idx_t physical_cores) { } else { return physical_cores; } +#endif } } // namespace duckdb diff --git a/src/duckdb/src/common/tree_renderer/text_tree_renderer.cpp b/src/duckdb/src/common/tree_renderer/text_tree_renderer.cpp index 5b057d65..8e0fa425 100644 --- a/src/duckdb/src/common/tree_renderer/text_tree_renderer.cpp +++ b/src/duckdb/src/common/tree_renderer/text_tree_renderer.cpp @@ -65,17 +65,20 @@ static bool NodeHasMultipleChildren(RenderTreeNode &node) { } static bool ShouldRenderWhitespace(RenderTree &root, idx_t x, idx_t y) { + idx_t found_children = 0; for (;; x--) { auto node = root.GetNode(x, y); + if (root.HasNode(x, y + 1)) { + found_children++; + } if (node) { if (NodeHasMultipleChildren(*node)) { - return true; + if (found_children < node->child_positions.size()) { + return true; + } } return false; } - if (root.HasNode(x, y + 1)) { - break; - } if (x == 0) { break; } @@ -190,11 +193,12 @@ void TextTreeRenderer::RenderBoxContent(RenderTree &root, std::ostream &ss, idx_ if (root.HasNode(x, y + 1)) { // node right below this one ss << StringUtil::Repeat(config.HORIZONTAL, config.node_render_width / 2); - ss << config.RTCORNER; if (has_child_to_the_right) { + ss << config.TMIDDLE; // but we have another child to the right! keep rendering the line ss << StringUtil::Repeat(config.HORIZONTAL, config.node_render_width / 2); } else { + ss << config.RTCORNER; if (has_adjacent_nodes) { // only a child below this one: fill the rest with spaces ss << StringUtil::Repeat(" ", config.node_render_width / 2); diff --git a/src/duckdb/src/core_functions/scalar/array/array_functions.cpp b/src/duckdb/src/core_functions/scalar/array/array_functions.cpp index df6bc922..347ffcbd 100644 --- a/src/duckdb/src/core_functions/scalar/array/array_functions.cpp +++ b/src/duckdb/src/core_functions/scalar/array/array_functions.cpp @@ -123,10 +123,11 @@ static void ArrayFixedCombine(DataChunk &args, ExpressionState &state, Vector &r throw InvalidInputException( StringUtil::Format("%s: right argument can not contain NULL values", func_name)); } + const auto result_offset = i * N; const auto lhs_data_ptr = lhs_data + left_offset; const auto rhs_data_ptr = rhs_data + right_offset; - const auto res_data_ptr = res_data + right_offset; + const auto res_data_ptr = res_data + result_offset; OP::Operation(lhs_data_ptr, rhs_data_ptr, res_data_ptr, N); } diff --git a/src/duckdb/src/execution/index/art/art.cpp b/src/duckdb/src/execution/index/art/art.cpp index a9bba85e..be4beef1 100644 --- a/src/duckdb/src/execution/index/art/art.cpp +++ b/src/duckdb/src/execution/index/art/art.cpp @@ -873,7 +873,7 @@ bool ART::SearchLess(ARTKey &upper_bound, bool equal, idx_t max_count, unsafe_ve it.FindMinimum(tree); // Early-out, if the minimum value is higher than the upper bound. - if (it.current_key.GreaterThan(upper_bound, equal)) { + if (it.current_key.GreaterThan(upper_bound, equal, it.GetNestedDepth())) { return true; } diff --git a/src/duckdb/src/execution/index/art/iterator.cpp b/src/duckdb/src/execution/index/art/iterator.cpp index ca6f5c71..3f1f1f4f 100644 --- a/src/duckdb/src/execution/index/art/iterator.cpp +++ b/src/duckdb/src/execution/index/art/iterator.cpp @@ -23,7 +23,7 @@ bool IteratorKey::Contains(const ARTKey &key) const { return true; } -bool IteratorKey::GreaterThan(const ARTKey &key, const bool equal) const { +bool IteratorKey::GreaterThan(const ARTKey &key, const bool equal, const uint8_t nested_depth) const { for (idx_t i = 0; i < MinValue(Size(), key.len); i++) { if (key_bytes[i] > key.data[i]) { return true; @@ -31,12 +31,11 @@ bool IteratorKey::GreaterThan(const ARTKey &key, const bool equal) const { return false; } } - if (equal) { - // Returns true, if current_key is greater than key. - return Size() > key.len; - } - // Returns true, if current_key and key match or current_key is greater than key. - return Size() >= key.len; + + // Returns true, if current_key is greater than (or equal to) key. + D_ASSERT(Size() >= nested_depth); + auto this_len = Size() - nested_depth; + return equal ? this_len > key.len : this_len >= key.len; } //===--------------------------------------------------------------------===// @@ -48,7 +47,7 @@ bool Iterator::Scan(const ARTKey &upper_bound, const idx_t max_count, unsafe_vec do { // An empty upper bound indicates that no upper bound exists. if (!upper_bound.Empty() && status == GateStatus::GATE_NOT_SET) { - if (current_key.GreaterThan(upper_bound, equal)) { + if (current_key.GreaterThan(upper_bound, equal, nested_depth)) { return true; } } diff --git a/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp b/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp index 6e0d6048..f66b180e 100644 --- a/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +++ b/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp @@ -36,7 +36,7 @@ bool ColumnCountResult::AddRow(ColumnCountResult &result, idx_t buffer_pos) { } void ColumnCountResult::SetComment(ColumnCountResult &result, idx_t buffer_pos) { - if (result.current_column_count == 0) { + if (!result.states.WasStandard()) { result.cur_line_starts_as_comment = true; } result.comment = true; diff --git a/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp b/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp index de859d5b..44d17909 100644 --- a/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +++ b/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp @@ -518,7 +518,7 @@ void CSVSniffer::DetectDialect() { // if no dialect candidate was found, we throw an exception if (candidates.empty()) { - auto error = CSVError::DialectSniffingError(options, dialect_candidates.Print()); + auto error = CSVError::SniffingError(options, dialect_candidates.Print()); error_handler->Error(error); } } diff --git a/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp b/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp index a61acd6a..11d79c40 100644 --- a/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +++ b/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp @@ -413,6 +413,20 @@ void CSVSniffer::DetectTypes() { SetUserDefinedDateTimeFormat(*candidate->state_machine); // Parse chunk and read csv with info candidate auto &data_chunk = candidate->ParseChunk().ToChunk(); + if (!candidate->error_handler->errors.empty()) { + bool break_loop = false; + for (auto &errors : candidate->error_handler->errors) { + for (auto &error : errors.second) { + if (error.type != CSVErrorType::MAXIMUM_LINE_SIZE) { + break_loop = true; + break; + } + } + } + if (break_loop) { + continue; + } + } idx_t start_idx_detection = 0; idx_t chunk_size = data_chunk.size(); if (chunk_size > 1 && @@ -465,6 +479,11 @@ void CSVSniffer::DetectTypes() { } } } + if (!best_candidate) { + DialectCandidates dialect_candidates(options.dialect_options.state_machine_options); + auto error = CSVError::SniffingError(options, dialect_candidates.Print()); + error_handler->Error(error); + } // Assert that it's all good at this point. D_ASSERT(best_candidate && !best_format_candidates.empty()); } diff --git a/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp b/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp index 30450d5c..e7a41f3a 100644 --- a/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +++ b/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp @@ -226,12 +226,12 @@ CSVError CSVError::HeaderSniffingError(const CSVReaderOptions &options, const ve return CSVError(error.str(), SNIFFING, {}); } -CSVError CSVError::DialectSniffingError(const CSVReaderOptions &options, const string &search_space) { +CSVError CSVError::SniffingError(const CSVReaderOptions &options, const string &search_space) { std::ostringstream error; // 1. Which file error << "Error when sniffing file \"" << options.file_path << "\"." << '\n'; // 2. What's the error - error << "It was not possible to automatically detect the CSV Parsing dialect" << '\n'; + error << "It was not possible to automatically detect the CSV Parsing dialect/types" << '\n'; // 2. What was the search space? error << "The search space used was:" << '\n'; diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp index 9ea02723..5696a926 100644 --- a/src/duckdb/src/function/table/version/pragma_version.cpp +++ b/src/duckdb/src/function/table/version/pragma_version.cpp @@ -1,5 +1,5 @@ #ifndef DUCKDB_PATCH_VERSION -#define DUCKDB_PATCH_VERSION "1-dev5272" +#define DUCKDB_PATCH_VERSION "1-dev5313" #endif #ifndef DUCKDB_MINOR_VERSION #define DUCKDB_MINOR_VERSION 0 @@ -8,10 +8,10 @@ #define DUCKDB_MAJOR_VERSION 1 #endif #ifndef DUCKDB_VERSION -#define DUCKDB_VERSION "v1.0.1-dev5272" +#define DUCKDB_VERSION "v1.0.1-dev5313" #endif #ifndef DUCKDB_SOURCE_ID -#define DUCKDB_SOURCE_ID "4d18b9d05c" +#define DUCKDB_SOURCE_ID "64bacde85e" #endif #include "duckdb/function/table/system_functions.hpp" #include "duckdb/main/database.hpp" diff --git a/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp b/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp index 161b65d0..58a0f106 100644 --- a/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +++ b/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp @@ -47,7 +47,7 @@ class IteratorKey { //! Returns true, if key_bytes contains all bytes of key. bool Contains(const ARTKey &key) const; //! Returns true, if key_bytes is greater than [or equal to] the key. - bool GreaterThan(const ARTKey &key, bool equal) const; + bool GreaterThan(const ARTKey &key, const bool equal, const uint8_t nested_depth) const; private: unsafe_vector key_bytes; @@ -72,6 +72,11 @@ class Iterator { //! bound exceeds the maximum value of the ART. bool LowerBound(const Node &node, const ARTKey &key, const bool equal, idx_t depth); + //! Returns the nested depth. + uint8_t GetNestedDepth() const { + return nested_depth; + } + private: //! The ART. ART &art; diff --git a/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp b/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp index 1b4a71f9..2c254e26 100644 --- a/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp +++ b/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp @@ -65,7 +65,7 @@ class CSVError { static CSVError LineSizeError(const CSVReaderOptions &options, idx_t actual_size, LinesPerBoundary error_info, string &csv_row, idx_t byte_position, const string ¤t_path); //! Produces an error message for a dialect sniffing error. - static CSVError DialectSniffingError(const CSVReaderOptions &options, const string &search_space); + static CSVError SniffingError(const CSVReaderOptions &options, const string &search_space); //! Produces an error message for a header sniffing error. static CSVError HeaderSniffingError(const CSVReaderOptions &options, const vector &best_header_row, idx_t column_count, char delimiter); diff --git a/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine.hpp b/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine.hpp index 048dbbf2..13933a18 100644 --- a/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine.hpp +++ b/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine.hpp @@ -31,6 +31,10 @@ struct CSVStates { (states[1] == CSVState::RECORD_SEPARATOR || states[1] == CSVState::CARRIAGE_RETURN); } + inline bool WasStandard() { + return states[0] == CSVState::STANDARD; + } + inline bool EmptyLastValue() { // It is a new row, if the previous state is not a record separator, and the current one is return states[0] == CSVState::DELIMITER && diff --git a/src/duckdb/src/include/duckdb/planner/logical_operator.hpp b/src/duckdb/src/include/duckdb/planner/logical_operator.hpp index 33488d1f..b0611f7e 100644 --- a/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +++ b/src/duckdb/src/include/duckdb/planner/logical_operator.hpp @@ -64,6 +64,7 @@ class LogicalOperator { void AddChild(unique_ptr child); virtual idx_t EstimateCardinality(ClientContext &context); void SetEstimatedCardinality(idx_t _estimated_cardinality); + void SetParamsEstimatedCardinality(InsertionOrderPreservingMap &result) const; virtual void Serialize(Serializer &serializer) const; static unique_ptr Deserialize(Deserializer &deserializer); diff --git a/src/duckdb/src/planner/logical_operator.cpp b/src/duckdb/src/planner/logical_operator.cpp index 185e7887..17cd2d98 100644 --- a/src/duckdb/src/planner/logical_operator.cpp +++ b/src/duckdb/src/planner/logical_operator.cpp @@ -26,6 +26,17 @@ vector LogicalOperator::GetColumnBindings() { return {ColumnBinding(0, 0)}; } +void LogicalOperator::SetParamsEstimatedCardinality(InsertionOrderPreservingMap &result) const { + if (has_estimated_cardinality) { + result[RenderTreeNode::ESTIMATED_CARDINALITY] = StringUtil::Format("%llu", estimated_cardinality); + } +} + +void LogicalOperator::SetEstimatedCardinality(idx_t _estimated_cardinality) { + estimated_cardinality = _estimated_cardinality; + has_estimated_cardinality = true; +} + // LCOV_EXCL_START string LogicalOperator::ColumnBindingsToString(const vector &bindings) { string result = "{"; @@ -57,6 +68,7 @@ InsertionOrderPreservingMap LogicalOperator::ParamsToString() const { expressions_info += expressions[i]->GetName(); } result["Expressions"] = expressions_info; + SetParamsEstimatedCardinality(result); return result; } @@ -191,11 +203,6 @@ idx_t LogicalOperator::EstimateCardinality(ClientContext &context) { return estimated_cardinality; } -void LogicalOperator::SetEstimatedCardinality(idx_t _estimated_cardinality) { - estimated_cardinality = _estimated_cardinality; - has_estimated_cardinality = true; -} - void LogicalOperator::Print() { Printer::Print(ToString()); } diff --git a/src/duckdb/src/planner/operator/logical_aggregate.cpp b/src/duckdb/src/planner/operator/logical_aggregate.cpp index beb17c5b..3ee244c2 100644 --- a/src/duckdb/src/planner/operator/logical_aggregate.cpp +++ b/src/duckdb/src/planner/operator/logical_aggregate.cpp @@ -59,6 +59,7 @@ InsertionOrderPreservingMap LogicalAggregate::ParamsToString() const { expressions_info += expressions[i]->GetName(); } result["Expressions"] = expressions_info; + SetParamsEstimatedCardinality(result); return result; } diff --git a/src/duckdb/src/planner/operator/logical_any_join.cpp b/src/duckdb/src/planner/operator/logical_any_join.cpp index d5c0f7a6..07587dad 100644 --- a/src/duckdb/src/planner/operator/logical_any_join.cpp +++ b/src/duckdb/src/planner/operator/logical_any_join.cpp @@ -8,6 +8,7 @@ LogicalAnyJoin::LogicalAnyJoin(JoinType type) : LogicalJoin(type, LogicalOperato InsertionOrderPreservingMap LogicalAnyJoin::ParamsToString() const { InsertionOrderPreservingMap result; result["Condition"] = condition->ToString(); + SetParamsEstimatedCardinality(result); return result; } diff --git a/src/duckdb/src/planner/operator/logical_comparison_join.cpp b/src/duckdb/src/planner/operator/logical_comparison_join.cpp index 083e9f97..f844b182 100644 --- a/src/duckdb/src/planner/operator/logical_comparison_join.cpp +++ b/src/duckdb/src/planner/operator/logical_comparison_join.cpp @@ -23,6 +23,8 @@ InsertionOrderPreservingMap LogicalComparisonJoin::ParamsToString() cons conditions_info += expr->ToString(); } result["Conditions"] = conditions_info; + SetParamsEstimatedCardinality(result); + return result; } diff --git a/src/duckdb/src/planner/operator/logical_cteref.cpp b/src/duckdb/src/planner/operator/logical_cteref.cpp index 28082aa7..e82b1448 100644 --- a/src/duckdb/src/planner/operator/logical_cteref.cpp +++ b/src/duckdb/src/planner/operator/logical_cteref.cpp @@ -7,6 +7,7 @@ namespace duckdb { InsertionOrderPreservingMap LogicalCTERef::ParamsToString() const { InsertionOrderPreservingMap result; result["CTE Index"] = StringUtil::Format("%llu", cte_index); + SetParamsEstimatedCardinality(result); return result; } diff --git a/src/duckdb/src/planner/operator/logical_distinct.cpp b/src/duckdb/src/planner/operator/logical_distinct.cpp index fb87b66e..f6983b5e 100644 --- a/src/duckdb/src/planner/operator/logical_distinct.cpp +++ b/src/duckdb/src/planner/operator/logical_distinct.cpp @@ -18,6 +18,7 @@ InsertionOrderPreservingMap LogicalDistinct::ParamsToString() const { StringUtil::Join(distinct_targets, distinct_targets.size(), "\n", [](const unique_ptr &child) { return child->GetName(); }); } + SetParamsEstimatedCardinality(result); return result; } diff --git a/src/duckdb/src/planner/operator/logical_get.cpp b/src/duckdb/src/planner/operator/logical_get.cpp index 1ed160a7..d6e9fda9 100644 --- a/src/duckdb/src/planner/operator/logical_get.cpp +++ b/src/duckdb/src/planner/operator/logical_get.cpp @@ -59,6 +59,7 @@ InsertionOrderPreservingMap LogicalGet::ParamsToString() const { if (function.to_string) { result["__text__"] = function.to_string(bind_data.get()); } + SetParamsEstimatedCardinality(result); return result; } diff --git a/src/duckdb/src/planner/operator/logical_materialized_cte.cpp b/src/duckdb/src/planner/operator/logical_materialized_cte.cpp index 3b2dc54e..043695bb 100644 --- a/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +++ b/src/duckdb/src/planner/operator/logical_materialized_cte.cpp @@ -5,6 +5,7 @@ namespace duckdb { InsertionOrderPreservingMap LogicalMaterializedCTE::ParamsToString() const { InsertionOrderPreservingMap result; result["Table Index"] = StringUtil::Format("%llu", table_index); + SetParamsEstimatedCardinality(result); return result; } diff --git a/src/duckdb/src/planner/operator/logical_order.cpp b/src/duckdb/src/planner/operator/logical_order.cpp index bb8de9e6..78a0c357 100644 --- a/src/duckdb/src/planner/operator/logical_order.cpp +++ b/src/duckdb/src/planner/operator/logical_order.cpp @@ -29,6 +29,7 @@ InsertionOrderPreservingMap LogicalOrder::ParamsToString() const { orders_info += orders[i].expression->GetName(); } result["__order_by__"] = orders_info; + SetParamsEstimatedCardinality(result); return result; } diff --git a/src/duckdb/src/storage/table/column_data.cpp b/src/duckdb/src/storage/table/column_data.cpp index dd28a9bf..cc19a550 100644 --- a/src/duckdb/src/storage/table/column_data.cpp +++ b/src/duckdb/src/storage/table/column_data.cpp @@ -230,8 +230,12 @@ void ColumnData::UpdateInternal(TransactionData transaction, idx_t column_index, template idx_t ColumnData::ScanVector(TransactionData transaction, idx_t vector_index, ColumnScanState &state, Vector &result, idx_t target_scan) { - auto scan_count = ScanVector(state, result, target_scan, GetVectorScanType(state, target_scan)); - FetchUpdates(transaction, vector_index, result, scan_count, ALLOW_UPDATES, SCAN_COMMITTED); + auto scan_type = GetVectorScanType(state, target_scan); + auto scan_count = ScanVector(state, result, target_scan, scan_type); + if (scan_type != ScanVectorType::SCAN_ENTIRE_VECTOR) { + // if we are scanning an entire vector we cannot have updates + FetchUpdates(transaction, vector_index, result, scan_count, ALLOW_UPDATES, SCAN_COMMITTED); + } return scan_count; } diff --git a/src/duckdb/src/storage/table/row_group_collection.cpp b/src/duckdb/src/storage/table/row_group_collection.cpp index 62140832..f0e56ac1 100644 --- a/src/duckdb/src/storage/table/row_group_collection.cpp +++ b/src/duckdb/src/storage/table/row_group_collection.cpp @@ -417,6 +417,9 @@ void RowGroupCollection::FinalizeAppend(TransactionData transaction, TableAppend continue; } auto &local_stats = state.stats.GetStats(*local_stats_lock, col_idx); + if (!local_stats.HasDistinctStats()) { + continue; + } global_stats.DistinctStats().Merge(local_stats.DistinctStats()); } @@ -809,6 +812,7 @@ class VacuumTask : public BaseCheckpointTask { if (scan_chunk.size() == 0) { break; } + scan_chunk.Flatten(); idx_t remaining = scan_chunk.size(); while (remaining > 0) { idx_t append_count =