From f6bb0742addfc0ed4272066825a92e46c676c60f Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Fri, 24 Jan 2025 10:16:02 +0800 Subject: [PATCH 01/12] tmp save Signed-off-by: guo-shaoge --- dbms/src/Columns/ColumnAggregateFunction.h | 10 +- dbms/src/Columns/ColumnArray.cpp | 49 ++-- dbms/src/Columns/ColumnArray.h | 20 +- dbms/src/Columns/ColumnConst.h | 10 +- dbms/src/Columns/ColumnDecimal.cpp | 74 ++++-- dbms/src/Columns/ColumnDecimal.h | 64 ++--- dbms/src/Columns/ColumnFixedString.cpp | 75 +++++- dbms/src/Columns/ColumnFixedString.h | 56 +++-- dbms/src/Columns/ColumnFunction.h | 10 +- dbms/src/Columns/ColumnNullable.cpp | 31 ++- dbms/src/Columns/ColumnNullable.h | 12 +- dbms/src/Columns/ColumnString.cpp | 233 +++++++++++++----- dbms/src/Columns/ColumnString.h | 53 ++-- dbms/src/Columns/ColumnTuple.h | 17 +- dbms/src/Columns/ColumnVector.cpp | 47 ++-- dbms/src/Columns/ColumnVector.h | 28 ++- dbms/src/Columns/IColumn.h | 13 +- dbms/src/Columns/IColumnDummy.h | 10 +- .../gtest_column_serialize_deserialize.cpp | 62 ++--- 19 files changed, 577 insertions(+), 297 deletions(-) diff --git a/dbms/src/Columns/ColumnAggregateFunction.h b/dbms/src/Columns/ColumnAggregateFunction.h index f5e7963ef4d..1c1cd619d67 100644 --- a/dbms/src/Columns/ColumnAggregateFunction.h +++ b/dbms/src/Columns/ColumnAggregateFunction.h @@ -167,7 +167,8 @@ class ColumnAggregateFunction final : public COWPtrHelper & /* byte_size */, - const TiDB::TiDBCollatorPtr & /* collator */) const override + const TiDB::TiDBCollatorPtr & /* collator */, + const NullMap * /* nullmap */) const override { throw Exception( "Method countSerializeByteSizeForCmp is not supported for " + getName(), @@ -180,7 +181,8 @@ class ColumnAggregateFunction final : public COWPtrHelper & /* byte_size */, const IColumn::Offsets & /* offsets */, - const TiDB::TiDBCollatorPtr & /* collator */) const override + const TiDB::TiDBCollatorPtr & /* collator */, + const NullMap * /* nullmap */) const override { throw Exception( "Method countSerializeByteSizeForCmpColumnArray is not supported for " + getName(), @@ -199,7 +201,7 @@ class ColumnAggregateFunction final : public COWPtrHelper & /* pos */, size_t /* start */, size_t /* length */, - bool /* has_null */, + const NullMap * /* nullmap */, const TiDB::TiDBCollatorPtr & /* collator */, String * /* sort_key_container */) const override { @@ -217,7 +219,7 @@ class ColumnAggregateFunction final : public COWPtrHelper & /* pos */, size_t /* start */, size_t /* length */, - bool /* has_null */, + const NullMap * /* nullmap */, const IColumn::Offsets & /* offsets */, const TiDB::TiDBCollatorPtr & /* collator */, String * /* sort_key_container */) const override diff --git a/dbms/src/Columns/ColumnArray.cpp b/dbms/src/Columns/ColumnArray.cpp index 0ba742e092c..d661461b62f 100644 --- a/dbms/src/Columns/ColumnArray.cpp +++ b/dbms/src/Columns/ColumnArray.cpp @@ -220,18 +220,19 @@ const char * ColumnArray::deserializeAndInsertFromArena(const char * pos, const void ColumnArray::countSerializeByteSizeForCmp( PaddedPODArray & byte_size, - const TiDB::TiDBCollatorPtr & collator) const + const TiDB::TiDBCollatorPtr & collator, + const NullMap * nullmap) const { - countSerializeByteSizeImpl(byte_size, collator); + countSerializeByteSizeImpl(byte_size, collator, nullmap); } void ColumnArray::countSerializeByteSize(PaddedPODArray & byte_size) const { - countSerializeByteSizeImpl(byte_size, nullptr); + countSerializeByteSizeImpl(byte_size, nullptr, nullptr); } -template -void ColumnArray::countSerializeByteSizeImpl(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator) +template +void ColumnArray::countSerializeByteSizeImpl(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator, const NullMap * nullmap) const { RUNTIME_CHECK_MSG(byte_size.size() == size(), "size of byte_size({}) != column size({})", byte_size.size(), size()); @@ -251,8 +252,8 @@ void ColumnArray::countSerializeByteSizeImpl(PaddedPODArray & byte_size, for (size_t i = 0; i < size; ++i) byte_size[i] += sizeof(UInt32); - if constexpr (for_compare) - getData().countSerializeByteSizeForCmpColumnArray(byte_size, getOffsets(), collator); + if constexpr (compare_semantics) + getData().countSerializeByteSizeForCmpColumnArray(byte_size, getOffsets(), collator, nullmap); else getData().countSerializeByteSizeForColumnArray(byte_size, getOffsets()); } @@ -261,35 +262,39 @@ void ColumnArray::serializeToPosForCmp( PaddedPODArray & pos, size_t start, size_t length, - bool has_null, + const NullMap * nullmap, const TiDB::TiDBCollatorPtr & collator, String * sort_key_container) const { - if (has_null) - serializeToPosImpl(pos, start, length, collator, sort_key_container); + if (nullmap != nullptr) + serializeToPosImpl(pos, start, length, collator, sort_key_container, nullmap); else - serializeToPosImpl(pos, start, length, collator, sort_key_container); + serializeToPosImpl(pos, start, length, collator, sort_key_container, nullmap); } void ColumnArray::serializeToPos(PaddedPODArray & pos, size_t start, size_t length, bool has_null) const { if (has_null) - serializeToPosImpl(pos, start, length, nullptr, nullptr); + serializeToPosImpl(pos, start, length, nullptr, nullptr, nullptr); else - serializeToPosImpl(pos, start, length, nullptr, nullptr); + serializeToPosImpl(pos, start, length, nullptr, nullptr, nullptr); } -template +template void ColumnArray::serializeToPosImpl( PaddedPODArray & pos, size_t start, size_t length, const TiDB::TiDBCollatorPtr & collator, - String * sort_key_container) const + String * sort_key_container, + const NullMap * nullmap) const { RUNTIME_CHECK_MSG(length <= pos.size(), "length({}) > size of pos({})", length, pos.size()); RUNTIME_CHECK_MSG(start + length <= size(), "start({}) + length({}) > size of column({})", start, length, size()); + static_assert(!(has_null && has_nullmap)); + assert(!has_nullmap || (nullmap && nullmap->size() == size())); + /// countSerializeByteSize has already checked that the size of one element is not greater than UINT32_MAX for (size_t i = 0; i < length; ++i) { @@ -298,14 +303,20 @@ void ColumnArray::serializeToPosImpl( if (pos[i] == nullptr) continue; } + UInt32 len = sizeAt(start + i); + if constexpr (has_nullmap) + { + if ((*nullmap)[i] != 0) + len = 0; + } tiflash_compiler_builtin_memcpy(pos[i], &len, sizeof(UInt32)); pos[i] += sizeof(UInt32); } - if constexpr (for_compare) + if constexpr (compare_semantics) getData() - .serializeToPosForCmpColumnArray(pos, start, length, has_null, getOffsets(), collator, sort_key_container); + .serializeToPosForCmpColumnArray(pos, start, length, nullmap, getOffsets(), collator, sort_key_container); else getData().serializeToPosForColumnArray(pos, start, length, has_null, getOffsets()); } @@ -320,7 +331,7 @@ void ColumnArray::deserializeAndInsertFromPos(PaddedPODArray & pos, bool deserializeAndInsertFromPosImpl(pos, use_nt_align_buffer); } -template +template void ColumnArray::deserializeAndInsertFromPosImpl(PaddedPODArray & pos, bool use_nt_align_buffer) { auto & offsets = getOffsets(); @@ -336,7 +347,7 @@ void ColumnArray::deserializeAndInsertFromPosImpl(PaddedPODArray & pos, pos[i] += sizeof(UInt32); } - if constexpr (for_compare) + if constexpr (compare_semantics) getData().deserializeForCmpAndInsertFromPosColumnArray(pos, offsets, use_nt_align_buffer); else getData().deserializeAndInsertFromPosForColumnArray(pos, offsets, use_nt_align_buffer); diff --git a/dbms/src/Columns/ColumnArray.h b/dbms/src/Columns/ColumnArray.h index 7eb91feb028..64b890a354e 100644 --- a/dbms/src/Columns/ColumnArray.h +++ b/dbms/src/Columns/ColumnArray.h @@ -44,18 +44,19 @@ class ColumnArray final : public COWPtrHelper ColumnArray(const ColumnArray &) = default; - template - void countSerializeByteSizeImpl(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator) const; + template + void countSerializeByteSizeImpl(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator, const NullMap * nullmap) const; - template + template void serializeToPosImpl( PaddedPODArray & pos, size_t start, size_t length, const TiDB::TiDBCollatorPtr & collator, - String * sort_key_container) const; + String * sort_key_container, + const NullMap * nullmap) const; - template + template void deserializeAndInsertFromPosImpl(PaddedPODArray & pos, bool use_nt_align_buffer); public: @@ -96,14 +97,15 @@ class ColumnArray final : public COWPtrHelper String &) const override; const char * deserializeAndInsertFromArena(const char * pos, const TiDB::TiDBCollatorPtr &) override; - void countSerializeByteSizeForCmp(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator) + void countSerializeByteSizeForCmp(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator, const NullMap * nullmap) const override; void countSerializeByteSize(PaddedPODArray & byte_size) const override; void countSerializeByteSizeForCmpColumnArray( PaddedPODArray & /* byte_size */, const IColumn::Offsets & /* array_offsets */, - const TiDB::TiDBCollatorPtr & /* collator */) const override + const TiDB::TiDBCollatorPtr & /* collator */, + const NullMap * /* nullmap */) const override { throw Exception( "Method countSerializeByteSizeForCmpColumnArray is not supported for " + getName(), @@ -122,7 +124,7 @@ class ColumnArray final : public COWPtrHelper PaddedPODArray & pos, size_t start, size_t length, - bool has_null, + const NullMap * nullmap, const TiDB::TiDBCollatorPtr & collator, String * sort_key_container) const override; void serializeToPos(PaddedPODArray & pos, size_t start, size_t length, bool has_null) const override; @@ -131,7 +133,7 @@ class ColumnArray final : public COWPtrHelper PaddedPODArray & /* pos */, size_t /* start */, size_t /* length */, - bool /* has_null */, + const NullMap * /* nullmap */, const IColumn::Offsets & /* array_offsets */, const TiDB::TiDBCollatorPtr & /* collator */, String * /* sort_key_container */) const override diff --git a/dbms/src/Columns/ColumnConst.h b/dbms/src/Columns/ColumnConst.h index c17ba694ead..6d506236116 100644 --- a/dbms/src/Columns/ColumnConst.h +++ b/dbms/src/Columns/ColumnConst.h @@ -114,7 +114,8 @@ class ColumnConst final : public COWPtrHelper void countSerializeByteSizeForCmp( PaddedPODArray & /* byte_size */, - const TiDB::TiDBCollatorPtr & /* collator */) const override + const TiDB::TiDBCollatorPtr & /* collator */, + const NullMap * /* nullmap */) const override { throw Exception( "Method countSerializeByteSizeForCmp is not supported for " + getName(), @@ -128,7 +129,8 @@ class ColumnConst final : public COWPtrHelper void countSerializeByteSizeForCmpColumnArray( PaddedPODArray & /* byte_size */, const IColumn::Offsets & /* array_offsets */, - const TiDB::TiDBCollatorPtr & /* collaotr */) const override + const TiDB::TiDBCollatorPtr & /* collaotr */, + const NullMap * /* nullmap */) const override { throw Exception( "Method countSerializeByteSizeForCmpColumnArray is not supported for " + getName(), @@ -147,7 +149,7 @@ class ColumnConst final : public COWPtrHelper PaddedPODArray & /* pos */, size_t /* start */, size_t /* length */, - bool /* has_null */, + const NullMap * /* nullmap */, const TiDB::TiDBCollatorPtr & /* collator */, String * /* sort_key_container */) const override { @@ -166,7 +168,7 @@ class ColumnConst final : public COWPtrHelper PaddedPODArray & /* pos */, size_t /* start */, size_t /* length */, - bool /* has_null */, + const NullMap * /* nullmap */, const IColumn::Offsets & /* array_offsets */, const TiDB::TiDBCollatorPtr & /* collator */, String * /* sort_key_container */) const override diff --git a/dbms/src/Columns/ColumnDecimal.cpp b/dbms/src/Columns/ColumnDecimal.cpp index 36a774975ca..d2a3b2dc735 100644 --- a/dbms/src/Columns/ColumnDecimal.cpp +++ b/dbms/src/Columns/ColumnDecimal.cpp @@ -140,13 +140,13 @@ const char * ColumnDecimal::deserializeAndInsertFromArena(const char * pos, c } template -template -void ColumnDecimal::countSerializeByteSizeImpl(PaddedPODArray & byte_size) const +template +void ColumnDecimal::countSerializeByteSizeImpl(PaddedPODArray & byte_size, const NullMap *) const { RUNTIME_CHECK_MSG(byte_size.size() == size(), "size of byte_size({}) != column size({})", byte_size.size(), size()); size_t size = byte_size.size(); - if constexpr (for_compare && is_Decimal256) + if constexpr (compare_semantics && is_Decimal256) { for (size_t i = 0; i < size; ++i) { @@ -162,7 +162,7 @@ void ColumnDecimal::countSerializeByteSizeImpl(PaddedPODArray & byte_ // TODO add unit test template -template +template void ColumnDecimal::countSerializeByteSizeForColumnArrayImpl( PaddedPODArray & byte_size, const IColumn::Offsets & array_offsets) const @@ -173,7 +173,7 @@ void ColumnDecimal::countSerializeByteSizeForColumnArrayImpl( byte_size.size(), array_offsets.size()); - if constexpr (for_compare && is_Decimal256) + if constexpr (compare_semantics && is_Decimal256) { size_t size = array_offsets.size(); for (size_t i = 0; i < size; ++i) @@ -194,12 +194,16 @@ void ColumnDecimal::countSerializeByteSizeForColumnArrayImpl( } template -template -void ColumnDecimal::serializeToPosImpl(PaddedPODArray & pos, size_t start, size_t length) const +template +void ColumnDecimal::serializeToPosImpl(PaddedPODArray & pos, size_t start, size_t length, const NullMap * nullmap) const { RUNTIME_CHECK_MSG(length <= pos.size(), "length({}) > size of pos({})", length, pos.size()); RUNTIME_CHECK_MSG(start + length <= size(), "start({}) + length({}) > size of column({})", start, length, size()); + static_assert(!(has_null && has_nullmap)); + assert(!has_nullmap || (nullmap && nullmap->size() == size())); + + T def_val{}; for (size_t i = 0; i < length; ++i) { if constexpr (has_null) @@ -208,12 +212,29 @@ void ColumnDecimal::serializeToPosImpl(PaddedPODArray & pos, size_t s continue; } - if constexpr (for_compare && is_Decimal256) + if constexpr (compare_semantics && is_Decimal256) { + if constexpr (has_nullmap) + { + if ((*nullmap)[i] != 0) + { + pos[i] = serializeDecimal256Helper(pos[i], def_val); + continue; + } + } pos[i] = serializeDecimal256Helper(pos[i], data[start + i]); } else { + if constexpr (has_nullmap) + { + if ((*nullmap)[i] != 0) + { + tiflash_compiler_builtin_memcpy(pos[i], &def_val, sizeof(T)); + pos[i] += sizeof(T); + continue; + } + } tiflash_compiler_builtin_memcpy(pos[i], &data[start + i], sizeof(T)); pos[i] += sizeof(T); } @@ -221,12 +242,13 @@ void ColumnDecimal::serializeToPosImpl(PaddedPODArray & pos, size_t s } template -template +template void ColumnDecimal::serializeToPosForColumnArrayImpl( PaddedPODArray & pos, size_t start, size_t length, - const IColumn::Offsets & array_offsets) const + const IColumn::Offsets & array_offsets, + const NullMap * nullmap) const { RUNTIME_CHECK_MSG(length <= pos.size(), "length({}) > size of pos({})", length, pos.size()); RUNTIME_CHECK_MSG( @@ -241,6 +263,9 @@ void ColumnDecimal::serializeToPosForColumnArrayImpl( array_offsets.back(), size()); + static_assert(!(has_null && has_nullmap)); + assert(!has_nullmap || (nullmap && nullmap->size() == array_offsets.size())); + for (size_t i = 0; i < length; ++i) { if constexpr (has_null) @@ -250,32 +275,31 @@ void ColumnDecimal::serializeToPosForColumnArrayImpl( } size_t len = array_offsets[start + i] - array_offsets[start + i - 1]; - if constexpr (for_compare && is_Decimal256) + if constexpr (compare_semantics && is_Decimal256) { + if constexpr (has_nullmap) + { + if ((*nullmap)[i] != 0) + continue; + } for (size_t j = 0; j < len; ++j) pos[i] = serializeDecimal256Helper(pos[i], data[array_offsets[start + i - 1] + j]); } else { - if (len <= 4) - { - for (size_t j = 0; j < len; ++j) - tiflash_compiler_builtin_memcpy( - pos[i] + j * sizeof(T), - &data[array_offsets[start + i - 1] + j], - sizeof(T)); - } - else + if constexpr (has_nullmap) { - inline_memcpy(pos[i], &data[array_offsets[start + i - 1]], len * sizeof(T)); + if ((*nullmap)[i] != 0) + continue; } + inline_memcpy(pos[i], &data[array_offsets[start + i - 1]], len * sizeof(T)); pos[i] += len * sizeof(T); } } } template -template +template void ColumnDecimal::deserializeAndInsertFromPosImpl( PaddedPODArray & pos, bool use_nt_align_buffer [[maybe_unused]]) @@ -285,7 +309,7 @@ void ColumnDecimal::deserializeAndInsertFromPosImpl( // is_complex_decimal256 is true means Decimal256 is serialized by [bool, limb_count, n * limb]. // NT optimization is not implemented for simplicity. - static const bool is_complex_decimal256 = (for_compare && is_Decimal256); + static const bool is_complex_decimal256 = (compare_semantics && is_Decimal256); #ifdef TIFLASH_ENABLE_AVX_SUPPORT if (use_nt_align_buffer) @@ -383,7 +407,7 @@ void ColumnDecimal::deserializeAndInsertFromPosImpl( } template -template +template void ColumnDecimal::deserializeAndInsertFromPosForColumnArrayImpl( PaddedPODArray & pos, const IColumn::Offsets & array_offsets, @@ -410,7 +434,7 @@ void ColumnDecimal::deserializeAndInsertFromPosForColumnArrayImpl( for (size_t i = 0; i < size; ++i) { size_t len = array_offsets[start_point + i] - array_offsets[start_point + i - 1]; - if constexpr (for_compare && is_Decimal256) + if constexpr (compare_semantics && is_Decimal256) { for (size_t j = 0; j < len; ++j) pos[i] = const_cast( diff --git a/dbms/src/Columns/ColumnDecimal.h b/dbms/src/Columns/ColumnDecimal.h index 18211b13177..1f02419df8b 100644 --- a/dbms/src/Columns/ColumnDecimal.h +++ b/dbms/src/Columns/ColumnDecimal.h @@ -101,26 +101,27 @@ class ColumnDecimal final : public COWPtrHelper - void countSerializeByteSizeImpl(PaddedPODArray & byte_size) const; - template + template + void countSerializeByteSizeImpl(PaddedPODArray & byte_size, const NullMap *) const; + template void countSerializeByteSizeForColumnArrayImpl( PaddedPODArray & byte_size, const IColumn::Offsets & array_offsets) const; - template - void serializeToPosImpl(PaddedPODArray & pos, size_t start, size_t length) const; - template + template + void serializeToPosImpl(PaddedPODArray & pos, size_t start, size_t length, const NullMap * nullmap) const; + template void serializeToPosForColumnArrayImpl( PaddedPODArray & pos, size_t start, size_t length, - const IColumn::Offsets & array_offsets) const; + const IColumn::Offsets & array_offsets, + const NullMap * nullmap) const; - template + template void deserializeAndInsertFromPosImpl(PaddedPODArray & pos, bool use_nt_align_buffer [[maybe_unused]]); - template + template void deserializeAndInsertFromPosForColumnArrayImpl( PaddedPODArray & pos, const IColumn::Offsets & array_offsets, @@ -173,19 +174,20 @@ class ColumnDecimal final : public COWPtrHelper & byte_size, const TiDB::TiDBCollatorPtr &) const override + void countSerializeByteSizeForCmp(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr &, const NullMap *) const override { - countSerializeByteSizeImpl(byte_size); + countSerializeByteSizeImpl(byte_size, nullptr); } void countSerializeByteSize(PaddedPODArray & byte_size) const override { - countSerializeByteSizeImpl(byte_size); + countSerializeByteSizeImpl(byte_size, nullptr); } void countSerializeByteSizeForCmpColumnArray( PaddedPODArray & byte_size, const IColumn::Offsets & array_offsets, - const TiDB::TiDBCollatorPtr &) const override + const TiDB::TiDBCollatorPtr &, + const NullMap *) const override { countSerializeByteSizeForColumnArrayImpl(byte_size, array_offsets); } @@ -200,44 +202,46 @@ class ColumnDecimal final : public COWPtrHelper & pos, size_t start, size_t length, - bool has_null, + const NullMap * nullmap, const TiDB::TiDBCollatorPtr &, String *) const override { - if (has_null) - serializeToPosImpl(pos, start, length); + if (nullmap != nullptr) + serializeToPosImpl(pos, start, length, nullmap); else - serializeToPosImpl(pos, start, length); + serializeToPosImpl(pos, start, length, nullptr); } void serializeToPos(PaddedPODArray & pos, size_t start, size_t length, bool has_null) const override { if (has_null) - serializeToPosImpl(pos, start, length); + serializeToPosImpl(pos, start, length, nullptr); else - serializeToPosImpl(pos, start, length); + serializeToPosImpl(pos, start, length, nullptr); } void serializeToPosForCmpColumnArray( PaddedPODArray & pos, size_t start, size_t length, - bool has_null, + const NullMap * nullmap, const IColumn::Offsets & array_offsets, const TiDB::TiDBCollatorPtr &, String *) const override { - if (has_null) - serializeToPosForColumnArrayImpl( + if (nullmap != nullptr) + serializeToPosForColumnArrayImpl( pos, start, length, - array_offsets); + array_offsets, + nullmap); else - serializeToPosForColumnArrayImpl( + serializeToPosForColumnArrayImpl( pos, start, length, - array_offsets); + array_offsets, + nullptr); } void serializeToPosForColumnArray( PaddedPODArray & pos, @@ -247,17 +251,19 @@ class ColumnDecimal final : public COWPtrHelper( + serializeToPosForColumnArrayImpl( pos, start, length, - array_offsets); + array_offsets, + nullptr); else - serializeToPosForColumnArrayImpl( + serializeToPosForColumnArrayImpl( pos, start, length, - array_offsets); + array_offsets, + nullptr); } void deserializeForCmpAndInsertFromPos(PaddedPODArray & pos, bool use_nt_align_buffer) override diff --git a/dbms/src/Columns/ColumnFixedString.cpp b/dbms/src/Columns/ColumnFixedString.cpp index b15e420a1ce..dde147faef5 100644 --- a/dbms/src/Columns/ColumnFixedString.cpp +++ b/dbms/src/Columns/ColumnFixedString.cpp @@ -134,18 +134,33 @@ const char * ColumnFixedString::deserializeAndInsertFromArena(const char * pos, return pos + n; } -void ColumnFixedString::countSerializeByteSize(PaddedPODArray & byte_size) const +template +void ColumnFixedString::countSerializeByteSizeImpl(PaddedPODArray & byte_size, const NullMap * nullmap) const { RUNTIME_CHECK_MSG(byte_size.size() == size(), "size of byte_size({}) != column size({})", byte_size.size(), size()); + assert(!nullmap || nullmap->size() == size()); + size_t size = byte_size.size(); for (size_t i = 0; i < size; ++i) + { + if constexpr (has_nullmap) + { + if ((*nullmap)[i] != 0) + { + byte_size[i] += 1; + continue; + } + } byte_size[i] += n; + } } -void ColumnFixedString::countSerializeByteSizeForColumnArray( +template +void ColumnFixedString::countSerializeByteSizeForColumnArrayImpl( PaddedPODArray & byte_size, - const IColumn::Offsets & array_offsets) const + const IColumn::Offsets & array_offsets, + const NullMap * nullmap) const { RUNTIME_CHECK_MSG( byte_size.size() == array_offsets.size(), @@ -153,25 +168,40 @@ void ColumnFixedString::countSerializeByteSizeForColumnArray( byte_size.size(), array_offsets.size()); + assert(!nullmap || nullmap->size() == array_offsets.size()); + size_t size = array_offsets.size(); for (size_t i = 0; i < size; ++i) + { + if constexpr (has_nullmap) + { + if ((*nullmap)[i] != 0) + { + byte_size[i] += array_offsets[i] - array_offsets[i - 1]; + continue; + } + } byte_size[i] += n * (array_offsets[i] - array_offsets[i - 1]); + } } void ColumnFixedString::serializeToPos(PaddedPODArray & pos, size_t start, size_t length, bool has_null) const { if (has_null) - serializeToPosImpl(pos, start, length); + serializeToPosImpl(pos, start, length, nullptr); else - serializeToPosImpl(pos, start, length); + serializeToPosImpl(pos, start, length, nullptr); } -template -void ColumnFixedString::serializeToPosImpl(PaddedPODArray & pos, size_t start, size_t length) const +template +void ColumnFixedString::serializeToPosImpl(PaddedPODArray & pos, size_t start, size_t length, const NullMap * nullmap) const { RUNTIME_CHECK_MSG(length <= pos.size(), "length({}) > size of pos({})", length, pos.size()); RUNTIME_CHECK_MSG(start + length <= size(), "start({}) + length({}) > size of column({})", start, length, size()); + static_assert(!(has_null && has_nullmap)); + assert(!has_nullmap || (nullmap && nullmap->size() == size())); + for (size_t i = 0; i < length; ++i) { if constexpr (has_null) @@ -179,6 +209,18 @@ void ColumnFixedString::serializeToPosImpl(PaddedPODArray & pos, size_t if (pos[i] == nullptr) continue; } + if constexpr (has_nullmap) + { + if ((*nullmap)[i] != 0) + { + for (size_t j = 0; j < n; ++j) + { + *(pos[i]) = '\0'; + pos[i] += 1; + } + continue; + } + } inline_memcpy(pos[i], &chars[n * (start + i)], n); pos[i] += n; } @@ -192,17 +234,18 @@ void ColumnFixedString::serializeToPosForColumnArray( const IColumn::Offsets & array_offsets) const { if (has_null) - serializeToPosForColumnArrayImpl(pos, start, length, array_offsets); + serializeToPosForColumnArrayImpl(pos, start, length, array_offsets, nullptr); else - serializeToPosForColumnArrayImpl(pos, start, length, array_offsets); + serializeToPosForColumnArrayImpl(pos, start, length, array_offsets, nullptr); } -template +template void ColumnFixedString::serializeToPosForColumnArrayImpl( PaddedPODArray & pos, size_t start, size_t length, - const IColumn::Offsets & array_offsets) const + const IColumn::Offsets & array_offsets, + const NullMap * nullmap) const { RUNTIME_CHECK_MSG(length <= pos.size(), "length({}) > size of pos({})", length, pos.size()); RUNTIME_CHECK_MSG( @@ -217,6 +260,9 @@ void ColumnFixedString::serializeToPosForColumnArrayImpl( array_offsets.back(), size()); + static_assert(!(has_null && has_nullmap)); + assert(!has_nullmap || (nullmap && nullmap->size() == array_offsets.size())); + for (size_t i = 0; i < length; ++i) { if constexpr (has_null) @@ -224,8 +270,13 @@ void ColumnFixedString::serializeToPosForColumnArrayImpl( if (pos[i] == nullptr) continue; } - size_t len = array_offsets[start + i] - array_offsets[start + i - 1]; + if constexpr (has_nullmap) + { + if ((*nullmap)[i] != 0) + continue; + } + inline_memcpy(pos[i], &chars[n * array_offsets[start + i - 1]], n * len); pos[i] += n * len; } diff --git a/dbms/src/Columns/ColumnFixedString.h b/dbms/src/Columns/ColumnFixedString.h index 161cef5cf8a..0ac2ff89aa4 100644 --- a/dbms/src/Columns/ColumnFixedString.h +++ b/dbms/src/Columns/ColumnFixedString.h @@ -54,16 +54,25 @@ class ColumnFixedString final : public COWPtrHelper , chars(src.chars.begin(), src.chars.end()) , n(src.n){}; + template + void countSerializeByteSizeImpl(PaddedPODArray & byte_size, const NullMap * nullmap) const; - template - void serializeToPosImpl(PaddedPODArray & pos, size_t start, size_t length) const; + template + void countSerializeByteSizeForColumnArrayImpl( + PaddedPODArray & byte_size, + const IColumn::Offsets & array_offsets, + const NullMap * nullmap) const; - template + template + void serializeToPosImpl(PaddedPODArray & pos, size_t start, size_t length, const NullMap * nullmap) const; + + template void serializeToPosForColumnArrayImpl( PaddedPODArray & pos, size_t start, size_t length, - const IColumn::Offsets & array_offsets) const; + const IColumn::Offsets & array_offsets, + const NullMap * nullmap) const; public: std::string getName() const override { return "FixedString(" + std::to_string(n) + ")"; } @@ -115,7 +124,7 @@ class ColumnFixedString final : public COWPtrHelper const char * deserializeAndInsertFromArena(const char * pos, const TiDB::TiDBCollatorPtr &) override; - void countSerializeByteSizeForCmp(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator) + void countSerializeByteSizeForCmp(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator, const NullMap * nullmap) const override { // collator->sortKey() will change the string length, which may exceeds n. @@ -123,35 +132,51 @@ class ColumnFixedString final : public COWPtrHelper !collator, "{} doesn't support countSerializeByteSizeForCmp when collator is not null", getName()); - countSerializeByteSize(byte_size); + if (nullmap != nullptr) + countSerializeByteSizeImpl(byte_size, nullmap); + else + countSerializeByteSizeImpl(byte_size, nullptr); + } + void countSerializeByteSize(PaddedPODArray & byte_size) const override + { + countSerializeByteSizeImpl(byte_size, nullptr); } - void countSerializeByteSize(PaddedPODArray & byte_size) const override; void countSerializeByteSizeForCmpColumnArray( PaddedPODArray & byte_size, const IColumn::Offsets & array_offsets, - const TiDB::TiDBCollatorPtr & collator) const override + const TiDB::TiDBCollatorPtr & collator, + const NullMap * nullmap) const override { RUNTIME_CHECK_MSG( !collator, "{} doesn't support countSerializeByteSizeForCmpColumnArray when collator is not null", getName()); - countSerializeByteSizeForColumnArray(byte_size, array_offsets); + if (nullmap != nullptr) + countSerializeByteSizeForColumnArrayImpl(byte_size, array_offsets, nullmap); + else + countSerializeByteSizeForColumnArrayImpl(byte_size, array_offsets, nullptr); } void countSerializeByteSizeForColumnArray( PaddedPODArray & byte_size, - const IColumn::Offsets & array_offsets) const override; + const IColumn::Offsets & array_offsets) const override + { + countSerializeByteSizeForColumnArrayImpl(byte_size, array_offsets, nullptr); + } void serializeToPosForCmp( PaddedPODArray & pos, size_t start, size_t length, - bool has_null, + const NullMap * nullmap, const TiDB::TiDBCollatorPtr & collator, String *) const override { RUNTIME_CHECK_MSG(!collator, "{} doesn't support serializeToPosForCmp when collator is not null", getName()); - serializeToPos(pos, start, length, has_null); + if (nullmap != nullptr) + serializeToPosImpl(pos, start, length, nullmap); + else + serializeToPosImpl(pos, start, length, nullptr); } void serializeToPos(PaddedPODArray & pos, size_t start, size_t length, bool has_null) const override; @@ -159,7 +184,7 @@ class ColumnFixedString final : public COWPtrHelper PaddedPODArray & pos, size_t start, size_t length, - bool has_null, + const NullMap * nullmap, const IColumn::Offsets & array_offsets, const TiDB::TiDBCollatorPtr & collator, String *) const override @@ -168,7 +193,10 @@ class ColumnFixedString final : public COWPtrHelper !collator, "{} doesn't support serializeToPosForCmpColumnArray when collator is not null", getName()); - serializeToPosForColumnArray(pos, start, length, has_null, array_offsets); + if (nullmap != nullptr) + serializeToPosForColumnArrayImpl(pos, start, length, array_offsets, nullmap); + else + serializeToPosForColumnArrayImpl(pos, start, length, array_offsets, nullptr); } void serializeToPosForColumnArray( PaddedPODArray & pos, diff --git a/dbms/src/Columns/ColumnFunction.h b/dbms/src/Columns/ColumnFunction.h index fd43650d255..24fdcb3f1e7 100644 --- a/dbms/src/Columns/ColumnFunction.h +++ b/dbms/src/Columns/ColumnFunction.h @@ -122,7 +122,8 @@ class ColumnFunction final : public COWPtrHelper void countSerializeByteSizeForCmp( PaddedPODArray & /* byte_size */, - const TiDB::TiDBCollatorPtr & /* collator */) const override + const TiDB::TiDBCollatorPtr & /* collator */, + const NullMap * /* nullmap */) const override { throw Exception( "Method countSerializeByteSizeForCmp is not supported for " + getName(), @@ -136,7 +137,8 @@ class ColumnFunction final : public COWPtrHelper void countSerializeByteSizeForCmpColumnArray( PaddedPODArray & /* byte_size */, const IColumn::Offsets & /* offsets */, - const TiDB::TiDBCollatorPtr & /* collator */) const override + const TiDB::TiDBCollatorPtr & /* collator */, + const NullMap * /* nullmap */) const override { throw Exception( "Method countSerializeByteSizeForCmpColumnArray is not supported for " + getName(), @@ -155,7 +157,7 @@ class ColumnFunction final : public COWPtrHelper PaddedPODArray & /* pos */, size_t /* start */, size_t /* length */, - bool /* has_null */, + const NullMap * /* nullmap */, const TiDB::TiDBCollatorPtr & /* collator */, String * /*sort_key_container */) const override { @@ -174,7 +176,7 @@ class ColumnFunction final : public COWPtrHelper PaddedPODArray & /* pos */, size_t /* start */, size_t /* length */, - bool /* has_null */, + const NullMap * /* nullmap */, const IColumn::Offsets & /* array_offsets */, const TiDB::TiDBCollatorPtr & /* collator */, String * /* sort_key_container */) const override diff --git a/dbms/src/Columns/ColumnNullable.cpp b/dbms/src/Columns/ColumnNullable.cpp index acfa920ca7d..fa175995cd4 100644 --- a/dbms/src/Columns/ColumnNullable.cpp +++ b/dbms/src/Columns/ColumnNullable.cpp @@ -284,10 +284,12 @@ const char * ColumnNullable::deserializeAndInsertFromArena(const char * pos, con void ColumnNullable::countSerializeByteSizeForCmp( PaddedPODArray & byte_size, - const TiDB::TiDBCollatorPtr & collator) const + const TiDB::TiDBCollatorPtr & collator, + const NullMap * nullmap) const { - getNullMapColumn().countSerializeByteSizeForCmp(byte_size, collator); - getNestedColumn().countSerializeByteSizeForCmp(byte_size, collator); + assert(!nullmap); + getNullMapColumn().countSerializeByteSizeForCmp(byte_size, collator, nullptr); + getNestedColumn().countSerializeByteSizeForCmp(byte_size, collator, &getNullMapData()); } void ColumnNullable::countSerializeByteSize(PaddedPODArray & byte_size) const { @@ -298,10 +300,12 @@ void ColumnNullable::countSerializeByteSize(PaddedPODArray & byte_size) void ColumnNullable::countSerializeByteSizeForCmpColumnArray( PaddedPODArray & byte_size, const IColumn::Offsets & array_offsets, - const TiDB::TiDBCollatorPtr & collator) const + const TiDB::TiDBCollatorPtr & collator, + const NullMap * nullmap) const { - getNullMapColumn().countSerializeByteSizeForCmpColumnArray(byte_size, array_offsets, collator); - getNestedColumn().countSerializeByteSizeForCmpColumnArray(byte_size, array_offsets, collator); + assert(!nullmap); + getNullMapColumn().countSerializeByteSizeForCmpColumnArray(byte_size, array_offsets, collator, nullptr); + getNestedColumn().countSerializeByteSizeForCmpColumnArray(byte_size, array_offsets, collator, &getNullMapData()); } void ColumnNullable::countSerializeByteSizeForColumnArray( PaddedPODArray & byte_size, @@ -315,13 +319,15 @@ void ColumnNullable::serializeToPosForCmp( PaddedPODArray & pos, size_t start, size_t length, - bool has_null, + const NullMap * nullmap, const TiDB::TiDBCollatorPtr & collator, String * sort_key_container) const { - getNullMapColumn().serializeToPosForCmp(pos, start, length, has_null, collator, sort_key_container); - getNestedColumn().serializeToPosForCmp(pos, start, length, has_null, collator, sort_key_container); + assert(!nullmap); + getNullMapColumn().serializeToPosForCmp(pos, start, length, nullptr, collator, sort_key_container); + getNestedColumn().serializeToPosForCmp(pos, start, length, &getNullMapData(), collator, sort_key_container); } + void ColumnNullable::serializeToPos(PaddedPODArray & pos, size_t start, size_t length, bool has_null) const { getNullMapColumn().serializeToPos(pos, start, length, has_null); @@ -332,15 +338,16 @@ void ColumnNullable::serializeToPosForCmpColumnArray( PaddedPODArray & pos, size_t start, size_t length, - bool has_null, + const NullMap * nullmap, const IColumn::Offsets & array_offsets, const TiDB::TiDBCollatorPtr & collator, String * sort_key_container) const { + assert(!nullmap); getNullMapColumn() - .serializeToPosForCmpColumnArray(pos, start, length, has_null, array_offsets, collator, sort_key_container); + .serializeToPosForCmpColumnArray(pos, start, length, nullptr, array_offsets, collator, sort_key_container); getNestedColumn() - .serializeToPosForCmpColumnArray(pos, start, length, has_null, array_offsets, collator, sort_key_container); + .serializeToPosForCmpColumnArray(pos, start, length, &getNullMapData(), array_offsets, collator, sort_key_container); } void ColumnNullable::serializeToPosForColumnArray( PaddedPODArray & pos, diff --git a/dbms/src/Columns/ColumnNullable.h b/dbms/src/Columns/ColumnNullable.h index 76b5a6708b5..6f79c5b7b81 100644 --- a/dbms/src/Columns/ColumnNullable.h +++ b/dbms/src/Columns/ColumnNullable.h @@ -21,8 +21,7 @@ namespace DB { -using NullMap = ColumnUInt8::Container; -using ConstNullMapPtr = const NullMap *; +static_assert(std::is_same_v); /// Class that specifies nullable columns. A nullable column represents /// a column, which may have any type, provided with the possibility of @@ -78,14 +77,15 @@ class ColumnNullable final : public COWPtrHelper String &) const override; const char * deserializeAndInsertFromArena(const char * pos, const TiDB::TiDBCollatorPtr &) override; - void countSerializeByteSizeForCmp(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator) + void countSerializeByteSizeForCmp(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator, const NullMap * nullmap) const override; void countSerializeByteSize(PaddedPODArray & byte_size) const override; void countSerializeByteSizeForCmpColumnArray( PaddedPODArray & byte_size, const IColumn::Offsets & array_offsets, - const TiDB::TiDBCollatorPtr & collator) const override; + const TiDB::TiDBCollatorPtr & collator, + const NullMap * nullmap) const override; void countSerializeByteSizeForColumnArray( PaddedPODArray & byte_size, const IColumn::Offsets & array_offsets) const override; @@ -94,7 +94,7 @@ class ColumnNullable final : public COWPtrHelper PaddedPODArray & pos, size_t start, size_t length, - bool has_null, + const NullMap * nullmap, const TiDB::TiDBCollatorPtr & collator, String * sort_key_container) const override; void serializeToPos(PaddedPODArray & pos, size_t start, size_t length, bool has_null) const override; @@ -103,7 +103,7 @@ class ColumnNullable final : public COWPtrHelper PaddedPODArray & pos, size_t start, size_t length, - bool has_null, + const NullMap * nullmap, const IColumn::Offsets & array_offsets, const TiDB::TiDBCollatorPtr & collator, String * sort_key_container) const override; diff --git a/dbms/src/Columns/ColumnString.cpp b/dbms/src/Columns/ColumnString.cpp index 537ea145a59..50a65d26836 100644 --- a/dbms/src/Columns/ColumnString.cpp +++ b/dbms/src/Columns/ColumnString.cpp @@ -484,30 +484,44 @@ void ColumnString::getPermutationWithCollationImpl( void ColumnString::countSerializeByteSizeForCmp( PaddedPODArray & byte_size, - const TiDB::TiDBCollatorPtr & collator) const + const TiDB::TiDBCollatorPtr & collator, + const NullMap * nullmap) const { if likely (collator != nullptr) { if (collator->maxBytesForOneChar() > 1) - countSerializeByteSizeImpl(byte_size, collator); + countSerializeByteSizeNullMap(byte_size, collator, nullmap); else - countSerializeByteSizeImpl(byte_size, collator); + countSerializeByteSizeNullMap(byte_size, collator, nullmap); } else { - countSerializeByteSizeImpl(byte_size, nullptr); + countSerializeByteSizeNullMap(byte_size, nullptr, nullmap); } } void ColumnString::countSerializeByteSize(PaddedPODArray & byte_size) const { - countSerializeByteSizeImpl(byte_size, nullptr); + countSerializeByteSizeNullMap(byte_size, nullptr, nullptr); +} + +template +ALWAYS_INLINE inline void ColumnString::countSerializeByteSizeNullMap( + PaddedPODArray & byte_size, + const TiDB::TiDBCollatorPtr & collator, + const NullMap * nullmap) const +{ + if (nullmap != nullptr) + countSerializeByteSizeImpl(byte_size, collator, nullmap); + else + countSerializeByteSizeImpl(byte_size, collator, nullptr); } -template +template void ColumnString::countSerializeByteSizeImpl( PaddedPODArray & byte_size, - const TiDB::TiDBCollatorPtr & collator) const + const TiDB::TiDBCollatorPtr & collator, + const NullMap * nullmap) const { RUNTIME_CHECK_MSG(byte_size.size() == size(), "size of byte_size({}) != column size({})", byte_size.size(), size()); @@ -522,15 +536,25 @@ void ColumnString::countSerializeByteSizeImpl( sizeAt(i)); } - if constexpr (has_collator) + if constexpr (compare_semantics) { RUNTIME_CHECK(collator); + assert(!has_nullmap || (nullmap && nullmap->size() == size())); const size_t size = byte_size.size(); const size_t max_bytes_one_char = collator->maxBytesForOneChar(); for (size_t i = 0; i < size; ++i) { assert(sizeAt(i) > 0); + if constexpr (has_nullmap) + { + if ((*nullmap)[i] != 0) + { + byte_size[i] += sizeof(UInt32) + 1; + continue; + } + } + if constexpr (count_code_points) { const auto num_char = UTF8::countCodePoints(&chars[offsetAt(i)], sizeAt(i) - 1); @@ -545,6 +569,7 @@ void ColumnString::countSerializeByteSizeImpl( } else { + assert(!has_nullmap); size_t size = byte_size.size(); for (size_t i = 0; i < size; ++i) byte_size[i] += sizeof(UInt32) + sizeAt(i); @@ -554,26 +579,30 @@ void ColumnString::countSerializeByteSizeImpl( void ColumnString::countSerializeByteSizeForCmpColumnArray( PaddedPODArray & byte_size, const IColumn::Offsets & array_offsets, - const TiDB::TiDBCollatorPtr & collator) const + const TiDB::TiDBCollatorPtr & collator, + const NullMap * nullmap) const { if likely (collator != nullptr) { if (collator->maxBytesForOneChar() > 1) - countSerializeByteSizeForColumnArrayImpl( + countSerializeByteSizeForColumnArrayNullMap( byte_size, array_offsets, - collator); + collator, + nullmap); else - countSerializeByteSizeForColumnArrayImpl( + countSerializeByteSizeForColumnArrayNullMap( byte_size, array_offsets, - collator); + collator, + nullmap); } else { - countSerializeByteSizeForColumnArrayImpl( + countSerializeByteSizeForColumnArrayNullMap( byte_size, array_offsets, + nullptr, nullptr); } } @@ -582,17 +611,32 @@ void ColumnString::countSerializeByteSizeForColumnArray( PaddedPODArray & byte_size, const IColumn::Offsets & array_offsets) const { - countSerializeByteSizeForColumnArrayImpl( + countSerializeByteSizeForColumnArrayNullMap( byte_size, array_offsets, + nullptr, nullptr); } -template +template +void ColumnString::countSerializeByteSizeForColumnArrayNullMap( + PaddedPODArray & byte_size, + const IColumn::Offsets & array_offsets, + const TiDB::TiDBCollatorPtr & collator, + const NullMap * nullmap) const +{ + if (nullmap != nullptr) + countSerializeByteSizeForColumnArrayImpl(byte_size, array_offsets, collator, nullmap); + else + countSerializeByteSizeForColumnArrayImpl(byte_size, array_offsets, collator, nullptr); +} + +template void ColumnString::countSerializeByteSizeForColumnArrayImpl( PaddedPODArray & byte_size, const IColumn::Offsets & array_offsets, - const TiDB::TiDBCollatorPtr & collator) const + const TiDB::TiDBCollatorPtr & collator, + const NullMap * nullmap) const { RUNTIME_CHECK_MSG( byte_size.size() == array_offsets.size(), @@ -616,9 +660,10 @@ void ColumnString::countSerializeByteSizeForColumnArrayImpl( sizeAt(i)); } - if constexpr (has_collator) + if constexpr (compare_semantics) { RUNTIME_CHECK(collator); + assert(!has_nullmap || (nullmap && nullmap->size() == array_offsets.size())); size_t size = array_offsets.size(); const auto max_bytes_one_char = collator->maxBytesForOneChar(); @@ -626,6 +671,15 @@ void ColumnString::countSerializeByteSizeForColumnArrayImpl( { const size_t ele_count = array_offsets[i] - array_offsets[i - 1]; assert(offsetAt(array_offsets[i]) - offsetAt(array_offsets[i - 1]) >= ele_count); + if constexpr (has_nullmap) + { + if ((*nullmap)[i] != 0) + { + byte_size[i] += (sizeof(UInt32) + 1) * ele_count; + continue; + } + } + if constexpr (count_code_points) { size_t cur_row_bytes = 0; @@ -641,6 +695,7 @@ void ColumnString::countSerializeByteSizeForColumnArrayImpl( } else { + // NOTE: didn't check nullmap because we have to iterate through all rows, it's slow. byte_size[i] += sizeof(UInt32) * ele_count + offsetAt(array_offsets[i]) - offsetAt(array_offsets[i - 1]); } @@ -648,6 +703,7 @@ void ColumnString::countSerializeByteSizeForColumnArrayImpl( } else { + assert(!has_nullmap); size_t size = array_offsets.size(); for (size_t i = 0; i < size; ++i) byte_size[i] += sizeof(UInt32) * (array_offsets[i] - array_offsets[i - 1]) + offsetAt(array_offsets[i]) @@ -659,60 +715,63 @@ void ColumnString::serializeToPosForCmp( PaddedPODArray & pos, size_t start, size_t length, - bool has_null, + const NullMap * nullmap, const TiDB::TiDBCollatorPtr & collator, String * sort_key_container) const { - if (has_null) + if (nullmap != nullptr) { if likely (collator != nullptr) - serializeToPosImplType( + serializeToPosImplType( pos, start, length, collator, - sort_key_container); + sort_key_container, + nullmap); else - serializeToPosImplType(pos, start, length, nullptr, nullptr); + serializeToPosImplType(pos, start, length, nullptr, nullptr, nullmap); } else { if likely (collator != nullptr) - serializeToPosImplType( + serializeToPosImplType( pos, start, length, collator, - sort_key_container); + sort_key_container, + nullptr); else - serializeToPosImplType(pos, start, length, nullptr, nullptr); + serializeToPosImplType(pos, start, length, nullptr, nullptr, nullptr); } } void ColumnString::serializeToPos(PaddedPODArray & pos, size_t start, size_t length, bool has_null) const { if (has_null) - serializeToPosImplType(pos, start, length, nullptr, nullptr); + serializeToPosImplType(pos, start, length, nullptr, nullptr, nullptr); else - serializeToPosImplType(pos, start, length, nullptr, nullptr); + serializeToPosImplType(pos, start, length, nullptr, nullptr, nullptr); } -template +template void ColumnString::serializeToPosImplType( PaddedPODArray & pos, size_t start, size_t length, const TiDB::TiDBCollatorPtr & collator, - String * sort_key_container) const + String * sort_key_container, + const NullMap * nullmap) const { - if constexpr (has_collator) + if constexpr (compare_semantics) { RUNTIME_CHECK(collator && sort_key_container); #define M(VAR_PREFIX, COLLATOR_NAME, IMPL_TYPE, COLLATOR_ID) \ case (COLLATOR_ID): \ { \ - serializeToPosImpl(pos, start, length, collator, sort_key_container); \ + serializeToPosImpl(pos, start, length, collator, sort_key_container, nullmap); \ break; \ } @@ -728,41 +787,52 @@ void ColumnString::serializeToPosImplType( } else { - serializeToPosImpl( + assert(!nullmap); + serializeToPosImpl( pos, start, length, collator, - sort_key_container); + sort_key_container, + nullptr); } } -template +template void ColumnString::serializeToPosImpl( PaddedPODArray & pos, size_t start, size_t length, const TiDB::TiDBCollatorPtr & collator, - String * sort_key_container) const + String * sort_key_container, + const NullMap * nullmap) const { RUNTIME_CHECK_MSG(length <= pos.size(), "length({}) > size of pos({})", length, pos.size()); RUNTIME_CHECK_MSG(start + length <= size(), "start({}) + length({}) > size of column({})", start, length, size()); + static_assert(!(has_null && has_nullmap)); + assert(!has_nullmap || (nullmap && nullmap->size() == size())); + /// To avoid virtual function call of sortKey(). const auto * derived_collator = static_cast(collator); /// countSerializeByteSizeImpl has already checked that the size of one element is not greater than UINT32_MAX for (size_t i = 0; i < length; ++i) { - if constexpr (has_null) - { - if (pos[i] == nullptr) - continue; - } - - UInt32 str_size = sizeAt(start + i); - const void * src = &chars[offsetAt(start + i)]; - if constexpr (has_collator) + if constexpr (compare_semantics) { + UInt32 str_size = sizeAt(start + i); + const void * src = &chars[offsetAt(start + i)]; + if constexpr (has_nullmap) + { + if ((*nullmap)[i] != 0) + { + UInt32 str_size = 1; + tiflash_compiler_builtin_memcpy(pos[i], &str_size, sizeof(UInt32)); + *(pos[i]) = '\0'; + pos[i] += 1; + continue; + } + } auto sort_key = derived_collator->sortKey(reinterpret_cast(src), str_size - 1, *sort_key_container); // For terminating zero. @@ -777,6 +847,17 @@ void ColumnString::serializeToPosImpl( } else { + assert(!has_nullmap); + if constexpr (has_null) + { + if (pos[i] == nullptr) + continue; + } + + UInt32 str_size = sizeAt(start + i); + const void * src = &chars[offsetAt(start + i)]; + + assert(!nullmap); tiflash_compiler_builtin_memcpy(pos[i], &str_size, sizeof(UInt32)); pos[i] += sizeof(UInt32); inline_memcpy(pos[i], src, str_size); @@ -789,47 +870,51 @@ void ColumnString::serializeToPosForCmpColumnArray( PaddedPODArray & pos, size_t start, size_t length, - bool has_null, + const NullMap * nullmap, const IColumn::Offsets & array_offsets, const TiDB::TiDBCollatorPtr & collator, String * sort_key_container) const { - if (has_null) + if (nullmap != nullptr) { if likely (collator != nullptr) - serializeToPosForColumnArrayImplType( + serializeToPosForColumnArrayImplType( pos, start, length, array_offsets, collator, - sort_key_container); + sort_key_container, + nullmap); else - serializeToPosForColumnArrayImplType( + serializeToPosForColumnArrayImplType( pos, start, length, array_offsets, nullptr, - nullptr); + nullptr, + nullmap); } else { if likely (collator != nullptr) - serializeToPosForColumnArrayImplType( + serializeToPosForColumnArrayImplType( pos, start, length, array_offsets, collator, - sort_key_container); + sort_key_container, + nullptr); else - serializeToPosForColumnArrayImplType( + serializeToPosForColumnArrayImplType( pos, start, length, array_offsets, nullptr, + nullptr, nullptr); } } @@ -842,46 +927,50 @@ void ColumnString::serializeToPosForColumnArray( const IColumn::Offsets & array_offsets) const { if (has_null) - serializeToPosForColumnArrayImplType( + serializeToPosForColumnArrayImplType( pos, start, length, array_offsets, nullptr, + nullptr, nullptr); else - serializeToPosForColumnArrayImplType( + serializeToPosForColumnArrayImplType( pos, start, length, array_offsets, nullptr, + nullptr, nullptr); } -template +template void ColumnString::serializeToPosForColumnArrayImplType( PaddedPODArray & pos, size_t start, size_t length, const IColumn::Offsets & array_offsets, const TiDB::TiDBCollatorPtr & collator, - String * sort_key_container) const + String * sort_key_container, + const NullMap * nullmap) const { - if constexpr (has_collator) + if constexpr (compare_semantics) { RUNTIME_CHECK(collator && sort_key_container); #define M(VAR_PREFIX, COLLATOR_NAME, IMPL_TYPE, COLLATOR_ID) \ case (COLLATOR_ID): \ { \ - serializeToPosForColumnArrayImpl( \ + serializeToPosForColumnArrayImpl( \ pos, \ start, \ length, \ array_offsets, \ collator, \ - sort_key_container); \ + sort_key_container, \ + nullmap); \ break; \ } @@ -897,24 +986,27 @@ void ColumnString::serializeToPosForColumnArrayImplType( } else { - serializeToPosForColumnArrayImpl( + assert(!nullmap); + serializeToPosForColumnArrayImpl( pos, start, length, array_offsets, collator, - sort_key_container); + sort_key_container, + nullptr); } } -template +template void ColumnString::serializeToPosForColumnArrayImpl( PaddedPODArray & pos, size_t start, size_t length, const IColumn::Offsets & array_offsets, const TiDB::TiDBCollatorPtr & collator, - String * sort_key_container) const + String * sort_key_container, + const NullMap * nullmap) const { RUNTIME_CHECK_MSG(length <= pos.size(), "length({}) > size of pos({})", length, pos.size()); RUNTIME_CHECK_MSG( @@ -929,18 +1021,22 @@ void ColumnString::serializeToPosForColumnArrayImpl( array_offsets.back(), size()); + static_assert(!(has_null && has_nullmap)); + assert(!has_nullmap || (nullmap && nullmap->size() == array_offsets.size())); + /// countSerializeByteSizeForCmpColumnArray has already checked that the size of one element is not greater than UINT32_MAX - if constexpr (has_collator) + if constexpr (compare_semantics) { /// To avoid virtual function call of sortKey(). const auto * derived_collator = static_cast(collator); for (size_t i = 0; i < length; ++i) { - if constexpr (has_null) + if constexpr (has_nullmap) { - if (pos[i] == nullptr) + if ((*nullmap)[i] != 0) continue; } + for (size_t j = array_offsets[start + i - 1]; j < array_offsets[start + i]; ++j) { UInt32 str_size = sizeAt(j); @@ -961,6 +1057,7 @@ void ColumnString::serializeToPosForColumnArrayImpl( } else { + assert(!has_nullmap); for (size_t i = 0; i < length; ++i) { if constexpr (has_null) diff --git a/dbms/src/Columns/ColumnString.h b/dbms/src/Columns/ColumnString.h index da9eec75cb8..e187bc30a10 100644 --- a/dbms/src/Columns/ColumnString.h +++ b/dbms/src/Columns/ColumnString.h @@ -107,45 +107,65 @@ class ColumnString final : public COWPtrHelper } } - template - void countSerializeByteSizeImpl(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator) const; - template + template + ALWAYS_INLINE inline void countSerializeByteSizeNullMap( + PaddedPODArray & byte_size, + const TiDB::TiDBCollatorPtr & collator, + const NullMap * nullmap) const; + template + void countSerializeByteSizeImpl( + PaddedPODArray & byte_size, + const TiDB::TiDBCollatorPtr & collator, + const NullMap * nullmap) const; + + template + void countSerializeByteSizeForColumnArrayNullMap( + PaddedPODArray & byte_size, + const IColumn::Offsets & array_offsets, + const TiDB::TiDBCollatorPtr & collator, + const NullMap * nullmap) const; + template void countSerializeByteSizeForColumnArrayImpl( PaddedPODArray & byte_size, const IColumn::Offsets & array_offsets, - const TiDB::TiDBCollatorPtr & collator) const; + const TiDB::TiDBCollatorPtr & collator, + const NullMap * nullmap) const; - template + template void serializeToPosImplType( PaddedPODArray & pos, size_t start, size_t length, const TiDB::TiDBCollatorPtr & collator, - String * sort_key_container) const; - template + String * sort_key_container, + const NullMap * nullmap) const; + template void serializeToPosImpl( PaddedPODArray & pos, size_t start, size_t length, const TiDB::TiDBCollatorPtr & collator, - String * sort_key_container) const; + String * sort_key_container, + const NullMap * nullmap) const; - template + template void serializeToPosForColumnArrayImplType( PaddedPODArray & pos, size_t start, size_t length, const IColumn::Offsets & array_offsets, const TiDB::TiDBCollatorPtr & collator, - String * sort_key_container) const; - template + String * sort_key_container, + const NullMap * nullmap) const; + template void serializeToPosForColumnArrayImpl( PaddedPODArray & pos, size_t start, size_t length, const IColumn::Offsets & array_offsets, const TiDB::TiDBCollatorPtr & collator, - String * sort_key_container) const; + String * sort_key_container, + const NullMap * nullmap) const; void deserializeAndInsertFromPosImpl(PaddedPODArray & pos, bool use_nt_align_buffer); template @@ -297,14 +317,15 @@ class ColumnString final : public COWPtrHelper return pos + string_size; } - void countSerializeByteSizeForCmp(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator) + void countSerializeByteSizeForCmp(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator, const NullMap * nullmap) const override; void countSerializeByteSize(PaddedPODArray & byte_size) const override; void countSerializeByteSizeForCmpColumnArray( PaddedPODArray & byte_size, const IColumn::Offsets & array_offsets, - const TiDB::TiDBCollatorPtr & collator) const override; + const TiDB::TiDBCollatorPtr & collator, + const NullMap * nullmap) const override; void countSerializeByteSizeForColumnArray( PaddedPODArray & byte_size, const IColumn::Offsets & array_offsets) const override; @@ -313,7 +334,7 @@ class ColumnString final : public COWPtrHelper PaddedPODArray & pos, size_t start, size_t length, - bool has_null, + const NullMap * nullmap, const TiDB::TiDBCollatorPtr & collator, String * sort_key_container) const override; void serializeToPos(PaddedPODArray & pos, size_t start, size_t length, bool has_null) const override; @@ -322,7 +343,7 @@ class ColumnString final : public COWPtrHelper PaddedPODArray & pos, size_t start, size_t length, - bool has_null, + const NullMap * nullmap, const IColumn::Offsets & array_offsets, const TiDB::TiDBCollatorPtr & collator, String * sort_key_container) const override; diff --git a/dbms/src/Columns/ColumnTuple.h b/dbms/src/Columns/ColumnTuple.h index 9e89ff7b937..6629f3390db 100644 --- a/dbms/src/Columns/ColumnTuple.h +++ b/dbms/src/Columns/ColumnTuple.h @@ -95,11 +95,11 @@ class ColumnTuple final : public COWPtrHelper String &) const override; const char * deserializeAndInsertFromArena(const char * pos, const TiDB::TiDBCollatorPtr &) override; - void countSerializeByteSizeForCmp(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator) + void countSerializeByteSizeForCmp(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator, const NullMap * nullmap) const override { for (const auto & column : columns) - column->countSerializeByteSizeForCmp(byte_size, collator); + column->countSerializeByteSizeForCmp(byte_size, collator, nullmap); } void countSerializeByteSize(PaddedPODArray & byte_size) const override { @@ -110,10 +110,11 @@ class ColumnTuple final : public COWPtrHelper void countSerializeByteSizeForCmpColumnArray( PaddedPODArray & byte_size, const IColumn::Offsets & array_offsets, - const TiDB::TiDBCollatorPtr & collator) const override + const TiDB::TiDBCollatorPtr & collator, + const NullMap * nullmap) const override { for (const auto & column : columns) - column->countSerializeByteSizeForCmpColumnArray(byte_size, array_offsets, collator); + column->countSerializeByteSizeForCmpColumnArray(byte_size, array_offsets, collator, nullmap); } void countSerializeByteSizeForColumnArray( PaddedPODArray & byte_size, @@ -127,12 +128,12 @@ class ColumnTuple final : public COWPtrHelper PaddedPODArray & pos, size_t start, size_t length, - bool has_null, + const NullMap * nullmap, const TiDB::TiDBCollatorPtr & collator, String * sort_key_container) const override { for (const auto & column : columns) - column->serializeToPosForCmp(pos, start, length, has_null, collator, sort_key_container); + column->serializeToPosForCmp(pos, start, length, nullmap, collator, sort_key_container); } void serializeToPos(PaddedPODArray & pos, size_t start, size_t length, bool has_null) const override { @@ -144,7 +145,7 @@ class ColumnTuple final : public COWPtrHelper PaddedPODArray & pos, size_t start, size_t length, - bool has_null, + const NullMap * nullmap, const IColumn::Offsets & array_offsets, const TiDB::TiDBCollatorPtr & collator, String * sort_key_container) const override @@ -154,7 +155,7 @@ class ColumnTuple final : public COWPtrHelper pos, start, length, - has_null, + nullmap, array_offsets, collator, sort_key_container); diff --git a/dbms/src/Columns/ColumnVector.cpp b/dbms/src/Columns/ColumnVector.cpp index 20a823124ed..f7d7692b965 100644 --- a/dbms/src/Columns/ColumnVector.cpp +++ b/dbms/src/Columns/ColumnVector.cpp @@ -88,18 +88,22 @@ template void ColumnVector::serializeToPos(PaddedPODArray & pos, size_t start, size_t length, bool has_null) const { if (has_null) - serializeToPosImpl(pos, start, length); + serializeToPosImpl(pos, start, length, nullptr); else - serializeToPosImpl(pos, start, length); + serializeToPosImpl(pos, start, length, nullptr); } template -template -void ColumnVector::serializeToPosImpl(PaddedPODArray & pos, size_t start, size_t length) const +template +void ColumnVector::serializeToPosImpl(PaddedPODArray & pos, size_t start, size_t length, const NullMap * nullmap) const { RUNTIME_CHECK_MSG(length <= pos.size(), "length({}) > size of pos({})", length, pos.size()); RUNTIME_CHECK_MSG(start + length <= size(), "start({}) + length({}) > size of column({})", start, length, size()); + static_assert(!(has_null && has_nullmap)); + assert(!has_nullmap || (nullmap && nullmap->size() == size())); + + T val{}; for (size_t i = 0; i < length; ++i) { if constexpr (has_null) @@ -107,6 +111,15 @@ void ColumnVector::serializeToPosImpl(PaddedPODArray & pos, size_t st if (pos[i] == nullptr) continue; } + if constexpr (has_nullmap) + { + if ((*nullmap)[start + i] != 0) + { + tiflash_compiler_builtin_memcpy(pos[i], &val, sizeof(T)); + pos[i] += sizeof(T); + continue; + } + } tiflash_compiler_builtin_memcpy(pos[i], &data[start + i], sizeof(T)); pos[i] += sizeof(T); } @@ -121,18 +134,19 @@ void ColumnVector::serializeToPosForColumnArray( const IColumn::Offsets & array_offsets) const { if (has_null) - serializeToPosForColumnArrayImpl(pos, start, length, array_offsets); + serializeToPosForColumnArrayImpl(pos, start, length, array_offsets, nullptr); else - serializeToPosForColumnArrayImpl(pos, start, length, array_offsets); + serializeToPosForColumnArrayImpl(pos, start, length, array_offsets, nullptr); } template -template +template void ColumnVector::serializeToPosForColumnArrayImpl( PaddedPODArray & pos, size_t start, size_t length, - const IColumn::Offsets & array_offsets) const + const IColumn::Offsets & array_offsets, + const NullMap * nullmap) const { RUNTIME_CHECK_MSG(length <= pos.size(), "length({}) > size of pos({})", length, pos.size()); RUNTIME_CHECK_MSG( @@ -147,6 +161,9 @@ void ColumnVector::serializeToPosForColumnArrayImpl( array_offsets.back(), size()); + static_assert(!(has_null && has_nullmap)); + assert(!has_nullmap || (nullmap && nullmap->size() == array_offsets.size())); + for (size_t i = 0; i < length; ++i) { if constexpr (has_null) @@ -155,18 +172,12 @@ void ColumnVector::serializeToPosForColumnArrayImpl( continue; } size_t len = array_offsets[start + i] - array_offsets[start + i - 1]; - if (len <= 4) + if constexpr (has_nullmap) { - for (size_t j = 0; j < len; ++j) - tiflash_compiler_builtin_memcpy( - pos[i] + j * sizeof(T), - &data[array_offsets[start + i - 1] + j], - sizeof(T)); - } - else - { - inline_memcpy(pos[i], &data[array_offsets[start + i - 1]], len * sizeof(T)); + if ((*nullmap)[i] != 0) + continue; } + inline_memcpy(pos[i], &data[array_offsets[start + i - 1]], len * sizeof(T)); pos[i] += len * sizeof(T); } } diff --git a/dbms/src/Columns/ColumnVector.h b/dbms/src/Columns/ColumnVector.h index 57275b6905a..212ca33f3b5 100644 --- a/dbms/src/Columns/ColumnVector.h +++ b/dbms/src/Columns/ColumnVector.h @@ -198,15 +198,16 @@ class ColumnVector final : public COWPtrHelper - void serializeToPosImpl(PaddedPODArray & pos, size_t start, size_t length) const; + template + void serializeToPosImpl(PaddedPODArray & pos, size_t start, size_t length, const NullMap * nullmap) const; - template + template void serializeToPosForColumnArrayImpl( PaddedPODArray & pos, size_t start, size_t length, - const IColumn::Offsets & array_offsets) const; + const IColumn::Offsets & array_offsets, + const NullMap * nullmap) const; public: bool isNumeric() const override { return is_arithmetic_v; } @@ -327,7 +328,7 @@ class ColumnVector final : public COWPtrHelper & byte_size, const TiDB::TiDBCollatorPtr &) const override + void countSerializeByteSizeForCmp(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr &, const NullMap *) const override { countSerializeByteSize(byte_size); } @@ -336,7 +337,8 @@ class ColumnVector final : public COWPtrHelper & byte_size, const IColumn::Offsets & array_offsets, - const TiDB::TiDBCollatorPtr &) const override + const TiDB::TiDBCollatorPtr &, + const NullMap *) const override { countSerializeByteSizeForColumnArray(byte_size, array_offsets); } @@ -348,11 +350,14 @@ class ColumnVector final : public COWPtrHelper & pos, size_t start, size_t length, - bool has_null, + const NullMap * nullmap, const TiDB::TiDBCollatorPtr &, String *) const override { - serializeToPos(pos, start, length, has_null); + if (nullmap != nullptr) + serializeToPosImpl(pos, start, length, nullmap); + else + serializeToPosImpl(pos, start, length, nullptr); } void serializeToPos(PaddedPODArray & pos, size_t start, size_t length, bool has_null) const override; @@ -360,12 +365,15 @@ class ColumnVector final : public COWPtrHelper & pos, size_t start, size_t length, - bool has_null, + const NullMap * nullmap, const IColumn::Offsets & array_offsets, const TiDB::TiDBCollatorPtr &, String *) const override { - serializeToPosForColumnArray(pos, start, length, has_null, array_offsets); + if (nullmap != nullptr) + serializeToPosForColumnArrayImpl(pos, start, length, array_offsets, nullmap); + else + serializeToPosForColumnArrayImpl(pos, start, length, array_offsets, nullptr); } void serializeToPosForColumnArray( PaddedPODArray & pos, diff --git a/dbms/src/Columns/IColumn.h b/dbms/src/Columns/IColumn.h index 4321ed0755a..0011d8454e0 100644 --- a/dbms/src/Columns/IColumn.h +++ b/dbms/src/Columns/IColumn.h @@ -38,6 +38,9 @@ extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; class Arena; class ColumnGathererStream; +using NullMap = PaddedPODArray; +using ConstNullMapPtr = const NullMap *; + /// Declares interface to store columns in memory. class IColumn : public COWPtr { @@ -240,7 +243,8 @@ class IColumn : public COWPtr /// The byte_size.size() must be equal to the column size. virtual void countSerializeByteSizeForCmp( PaddedPODArray & /* byte_size */, - const TiDB::TiDBCollatorPtr & /* collator */) const + const TiDB::TiDBCollatorPtr & /* collator */, + const NullMap * /* nullmap */) const = 0; virtual void countSerializeByteSize(PaddedPODArray & /* byte_size */) const = 0; @@ -250,7 +254,8 @@ class IColumn : public COWPtr virtual void countSerializeByteSizeForCmpColumnArray( PaddedPODArray & /* byte_size */, const Offsets & /* array_offsets */, - const TiDB::TiDBCollatorPtr & /* collator */) const + const TiDB::TiDBCollatorPtr & /* collator */, + const NullMap * /* nullmap */) const = 0; virtual void countSerializeByteSizeForColumnArray( PaddedPODArray & /* byte_size */, @@ -266,7 +271,7 @@ class IColumn : public COWPtr PaddedPODArray & /* pos */, size_t /* start */, size_t /* length */, - bool /* has_null */, + const NullMap * /*nullmap*/, const TiDB::TiDBCollatorPtr & /* collator */, String * /* sort_key_container */) const = 0; @@ -284,7 +289,7 @@ class IColumn : public COWPtr PaddedPODArray & /* pos */, size_t /* start */, size_t /* length */, - bool /* has_null */, + const NullMap * /*nullmap*/, const Offsets & /* array_offsets */, const TiDB::TiDBCollatorPtr & /* collator */, String * /* sort_key_container */) const diff --git a/dbms/src/Columns/IColumnDummy.h b/dbms/src/Columns/IColumnDummy.h index 3882cb4080f..3b0550a9651 100644 --- a/dbms/src/Columns/IColumnDummy.h +++ b/dbms/src/Columns/IColumnDummy.h @@ -90,7 +90,8 @@ class IColumnDummy : public IColumn void countSerializeByteSizeForCmp( PaddedPODArray & /* byte_size */, - const TiDB::TiDBCollatorPtr & /* collator */) const override + const TiDB::TiDBCollatorPtr & /* collator */, + const NullMap * /* nullmap */) const override { throw Exception( "Method countSerializeByteSizeForCmp is not supported for " + getName(), @@ -104,7 +105,8 @@ class IColumnDummy : public IColumn void countSerializeByteSizeForCmpColumnArray( PaddedPODArray & /* byte_size */, const IColumn::Offsets & /* array_offsets */, - const TiDB::TiDBCollatorPtr & /* collator */) const override + const TiDB::TiDBCollatorPtr & /* collator */, + const NullMap * /* nullmap */) const override { throw Exception( "Method countSerializeByteSizeForCmpColumnArray is not supported for " + getName(), @@ -123,7 +125,7 @@ class IColumnDummy : public IColumn PaddedPODArray & /* pos */, size_t /* start */, size_t /* length */, - bool /* has_null */, + const NullMap * /* nullmap */, const TiDB::TiDBCollatorPtr & /* collator */, String * /* sort_key_container */) const override { @@ -142,7 +144,7 @@ class IColumnDummy : public IColumn PaddedPODArray & /* pos */, size_t /* start */, size_t /* length */, - bool /* has_null */, + const NullMap * /* nullmap */, const IColumn::Offsets & /* array_offsets */, const TiDB::TiDBCollatorPtr & /* collator */, String * /* sort_key_container */) const override diff --git a/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp b/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp index 6cf5742aba3..fd414d8c5e1 100644 --- a/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp +++ b/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp @@ -31,14 +31,14 @@ class TestColumnSerializeDeserialize : public ::testing::Test static void testCountSerializeByteSize( const ColumnPtr & column_ptr, const PaddedPODArray & result_byte_size, - bool for_compare = false, + bool compare_semantics = false, const TiDB::TiDBCollatorPtr & collator = nullptr) { PaddedPODArray byte_size; byte_size.resize_fill_zero(column_ptr->size()); for (size_t i = 0; i < column_ptr->size(); ++i) byte_size[i] = i; - if (!for_compare) + if (!compare_semantics) column_ptr->countSerializeByteSize(byte_size); else column_ptr->countSerializeByteSizeForCmp(byte_size, collator); @@ -51,7 +51,7 @@ class TestColumnSerializeDeserialize : public ::testing::Test const ColumnPtr & column_ptr, const ColumnPtr & offsets, const PaddedPODArray & result_byte_size, - bool for_compare = false, + bool compare_semantics = false, const TiDB::TiDBCollatorPtr & collator = nullptr) { auto column_array = ColumnArray::create(column_ptr->cloneFullColumn(), offsets->cloneFullColumn()); @@ -59,7 +59,7 @@ class TestColumnSerializeDeserialize : public ::testing::Test byte_size.resize_fill_zero(column_array->size()); for (size_t i = 0; i < column_array->size(); ++i) byte_size[i] = i; - if (!for_compare) + if (!compare_semantics) column_array->countSerializeByteSize(byte_size); else column_array->countSerializeByteSizeForCmp(byte_size, collator); @@ -164,26 +164,26 @@ class TestColumnSerializeDeserialize : public ::testing::Test static void testSerializeAndDeserialize( const ColumnPtr & column_ptr, - bool for_compare = false, + bool compare_semantics = false, const TiDB::TiDBCollatorPtr & collator = nullptr, String * sort_key_container = nullptr) { - doTestSerializeAndDeserialize(column_ptr, false, for_compare, collator, sort_key_container); - doTestSerializeAndDeserialize2(column_ptr, false, for_compare, collator, sort_key_container); - doTestSerializeAndDeserialize(column_ptr, true, for_compare, collator, sort_key_container); - doTestSerializeAndDeserialize2(column_ptr, true, for_compare, collator, sort_key_container); + doTestSerializeAndDeserialize(column_ptr, false, compare_semantics, collator, sort_key_container); + doTestSerializeAndDeserialize2(column_ptr, false, compare_semantics, collator, sort_key_container); + doTestSerializeAndDeserialize(column_ptr, true, compare_semantics, collator, sort_key_container); + doTestSerializeAndDeserialize2(column_ptr, true, compare_semantics, collator, sort_key_container); } static void doTestSerializeAndDeserialize( const ColumnPtr & column_ptr, bool use_nt_align_buffer, - bool for_compare = false, + bool compare_semantics = false, const TiDB::TiDBCollatorPtr & collator = nullptr, String * sort_key_container = nullptr) { PaddedPODArray byte_size; byte_size.resize_fill_zero(column_ptr->size()); - if (!for_compare) + if (!compare_semantics) column_ptr->countSerializeByteSize(byte_size); else column_ptr->countSerializeByteSizeForCmp(byte_size, collator); @@ -201,15 +201,15 @@ class TestColumnSerializeDeserialize : public ::testing::Test PaddedPODArray ori_pos; for (auto * ptr : pos) ori_pos.push_back(ptr); - if (!for_compare) + if (!compare_semantics) column_ptr->serializeToPos(pos, 0, byte_size.size() / 2, false); else - column_ptr->serializeToPosForCmp(pos, 0, byte_size.size() / 2, false, collator, sort_key_container); + column_ptr->serializeToPosForCmp(pos, 0, byte_size.size() / 2, nullptr, collator, sort_key_container); auto new_col_ptr = column_ptr->cloneEmpty(); if (use_nt_align_buffer) new_col_ptr->reserveAlign(byte_size.size(), FULL_VECTOR_SIZE_AVX2); - if (!for_compare) + if (!compare_semantics) new_col_ptr->deserializeAndInsertFromPos(ori_pos, use_nt_align_buffer); else new_col_ptr->deserializeForCmpAndInsertFromPos(ori_pos, use_nt_align_buffer); @@ -225,20 +225,20 @@ class TestColumnSerializeDeserialize : public ::testing::Test pos.push_back(nullptr); for (auto * ptr : pos) ori_pos.push_back(ptr); - if (!for_compare) + if (!compare_semantics) column_ptr->serializeToPos(pos, byte_size.size() / 2, byte_size.size() - byte_size.size() / 2, true); else column_ptr->serializeToPosForCmp( pos, byte_size.size() / 2, byte_size.size() - byte_size.size() / 2, - true, + nullptr, collator, sort_key_container); pos.resize(pos.size() - 1); ori_pos.resize(ori_pos.size() - 1); - if (!for_compare) + if (!compare_semantics) new_col_ptr->deserializeAndInsertFromPos(ori_pos, use_nt_align_buffer); else new_col_ptr->deserializeForCmpAndInsertFromPos(ori_pos, use_nt_align_buffer); @@ -253,12 +253,12 @@ class TestColumnSerializeDeserialize : public ::testing::Test } for (auto * ptr : pos) ori_pos.push_back(ptr); - if (!for_compare) + if (!compare_semantics) column_ptr->serializeToPos(pos, 0, byte_size.size(), true); else - column_ptr->serializeToPosForCmp(pos, 0, byte_size.size(), true, collator, sort_key_container); + column_ptr->serializeToPosForCmp(pos, 0, byte_size.size(), nullptr, collator, sort_key_container); - if (!for_compare) + if (!compare_semantics) new_col_ptr->deserializeAndInsertFromPos(ori_pos, use_nt_align_buffer); else new_col_ptr->deserializeForCmpAndInsertFromPos(ori_pos, use_nt_align_buffer); @@ -279,7 +279,7 @@ class TestColumnSerializeDeserialize : public ::testing::Test static void doTestSerializeAndDeserialize2( const ColumnPtr & column_ptr, bool use_nt_align_buffer, - bool for_compare = false, + bool compare_semantics = false, const TiDB::TiDBCollatorPtr & collator = nullptr, String * sort_key_container = nullptr) { @@ -287,7 +287,7 @@ class TestColumnSerializeDeserialize : public ::testing::Test return; PaddedPODArray byte_size; byte_size.resize_fill_zero(column_ptr->size()); - if (!for_compare) + if (!compare_semantics) column_ptr->countSerializeByteSize(byte_size); else column_ptr->countSerializeByteSizeForCmp(byte_size, collator); @@ -306,17 +306,17 @@ class TestColumnSerializeDeserialize : public ::testing::Test pos.push_back(nullptr); for (auto * ptr : pos) ori_pos.push_back(ptr); - if (!for_compare) + if (!compare_semantics) column_ptr->serializeToPos(pos, 0, byte_size.size() / 2, true); else - column_ptr->serializeToPosForCmp(pos, 0, byte_size.size() / 2, true, collator, sort_key_container); + column_ptr->serializeToPosForCmp(pos, 0, byte_size.size() / 2, nullptr, collator, sort_key_container); pos.resize(pos.size() - 1); ori_pos.resize(ori_pos.size() - 1); auto new_col_ptr = column_ptr->cloneEmpty(); if (use_nt_align_buffer) new_col_ptr->reserveAlign(byte_size.size(), FULL_VECTOR_SIZE_AVX2); - if (!for_compare) + if (!compare_semantics) new_col_ptr->deserializeAndInsertFromPos(ori_pos, use_nt_align_buffer); else new_col_ptr->deserializeForCmpAndInsertFromPos(ori_pos, use_nt_align_buffer); @@ -331,7 +331,7 @@ class TestColumnSerializeDeserialize : public ::testing::Test } for (auto * ptr : pos) ori_pos.push_back(ptr); - if (!for_compare) + if (!compare_semantics) column_ptr ->serializeToPos(pos, byte_size.size() / 2 - 1, byte_size.size() - byte_size.size() / 2 + 1, false); else @@ -339,10 +339,10 @@ class TestColumnSerializeDeserialize : public ::testing::Test pos, byte_size.size() / 2 - 1, byte_size.size() - byte_size.size() / 2 + 1, - false, + nullptr, collator, sort_key_container); - if (!for_compare) + if (!compare_semantics) new_col_ptr->deserializeAndInsertFromPos(ori_pos, use_nt_align_buffer); else new_col_ptr->deserializeForCmpAndInsertFromPos(ori_pos, use_nt_align_buffer); @@ -357,12 +357,12 @@ class TestColumnSerializeDeserialize : public ::testing::Test } for (auto * ptr : pos) ori_pos.push_back(ptr); - if (!for_compare) + if (!compare_semantics) column_ptr->serializeToPos(pos, 0, byte_size.size(), true); else - column_ptr->serializeToPosForCmp(pos, 0, byte_size.size(), true, collator, sort_key_container); + column_ptr->serializeToPosForCmp(pos, 0, byte_size.size(), nullptr, collator, sort_key_container); - if (!for_compare) + if (!compare_semantics) new_col_ptr->deserializeAndInsertFromPos(ori_pos, use_nt_align_buffer); else new_col_ptr->deserializeForCmpAndInsertFromPos(ori_pos, use_nt_align_buffer); From 51e24d86ae7367996e0c4acaf5308553d5626670 Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Fri, 24 Jan 2025 10:49:19 +0800 Subject: [PATCH 02/12] refine Signed-off-by: guo-shaoge --- dbms/src/Columns/ColumnVector.cpp | 4 ++-- .../Columns/tests/gtest_column_serialize_deserialize.cpp | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dbms/src/Columns/ColumnVector.cpp b/dbms/src/Columns/ColumnVector.cpp index f7d7692b965..51dc2869183 100644 --- a/dbms/src/Columns/ColumnVector.cpp +++ b/dbms/src/Columns/ColumnVector.cpp @@ -103,7 +103,7 @@ void ColumnVector::serializeToPosImpl(PaddedPODArray & pos, size_t st static_assert(!(has_null && has_nullmap)); assert(!has_nullmap || (nullmap && nullmap->size() == size())); - T val{}; + T def_val{}; for (size_t i = 0; i < length; ++i) { if constexpr (has_null) @@ -115,7 +115,7 @@ void ColumnVector::serializeToPosImpl(PaddedPODArray & pos, size_t st { if ((*nullmap)[start + i] != 0) { - tiflash_compiler_builtin_memcpy(pos[i], &val, sizeof(T)); + tiflash_compiler_builtin_memcpy(pos[i], &def_val, sizeof(T)); pos[i] += sizeof(T); continue; } diff --git a/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp b/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp index fd414d8c5e1..035da450b7e 100644 --- a/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp +++ b/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp @@ -41,7 +41,7 @@ class TestColumnSerializeDeserialize : public ::testing::Test if (!compare_semantics) column_ptr->countSerializeByteSize(byte_size); else - column_ptr->countSerializeByteSizeForCmp(byte_size, collator); + column_ptr->countSerializeByteSizeForCmp(byte_size, collator, nullptr); ASSERT_EQ(byte_size.size(), result_byte_size.size()); for (size_t i = 0; i < byte_size.size(); ++i) ASSERT_EQ(byte_size[i], i + result_byte_size[i]); @@ -62,7 +62,7 @@ class TestColumnSerializeDeserialize : public ::testing::Test if (!compare_semantics) column_array->countSerializeByteSize(byte_size); else - column_array->countSerializeByteSizeForCmp(byte_size, collator); + column_array->countSerializeByteSizeForCmp(byte_size, collator, nullptr); ASSERT_EQ(byte_size.size(), result_byte_size.size()); for (size_t i = 0; i < byte_size.size(); ++i) ASSERT_EQ(byte_size[i], sizeof(UInt32) + i + result_byte_size[i]); @@ -186,7 +186,7 @@ class TestColumnSerializeDeserialize : public ::testing::Test if (!compare_semantics) column_ptr->countSerializeByteSize(byte_size); else - column_ptr->countSerializeByteSizeForCmp(byte_size, collator); + column_ptr->countSerializeByteSizeForCmp(byte_size, collator, nullptr); size_t total_size = 0; for (const auto size : byte_size) total_size += size; @@ -290,7 +290,7 @@ class TestColumnSerializeDeserialize : public ::testing::Test if (!compare_semantics) column_ptr->countSerializeByteSize(byte_size); else - column_ptr->countSerializeByteSizeForCmp(byte_size, collator); + column_ptr->countSerializeByteSizeForCmp(byte_size, collator, nullptr); size_t total_size = 0; for (const auto size : byte_size) total_size += size; From 543f1b826b201a97e3783475de18dc8d2dc94864 Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Fri, 24 Jan 2025 10:50:46 +0800 Subject: [PATCH 03/12] plan 1 code Signed-off-by: guo-shaoge --- dbms/src/Columns/ColumnArray.cpp | 6 +- dbms/src/Columns/ColumnArray.h | 11 ++- dbms/src/Columns/ColumnDecimal.cpp | 6 +- dbms/src/Columns/ColumnDecimal.h | 29 +++++- dbms/src/Columns/ColumnFixedString.cpp | 6 +- dbms/src/Columns/ColumnFixedString.h | 12 ++- dbms/src/Columns/ColumnNullable.cpp | 10 +- dbms/src/Columns/ColumnNullable.h | 6 +- dbms/src/Columns/ColumnString.cpp | 124 ++++++++++++++++++------- dbms/src/Columns/ColumnString.h | 6 +- dbms/src/Columns/ColumnTuple.h | 6 +- dbms/src/Columns/ColumnVector.cpp | 6 +- dbms/src/Columns/ColumnVector.h | 5 +- 13 files changed, 170 insertions(+), 63 deletions(-) diff --git a/dbms/src/Columns/ColumnArray.cpp b/dbms/src/Columns/ColumnArray.cpp index d661461b62f..8223cc5ced1 100644 --- a/dbms/src/Columns/ColumnArray.cpp +++ b/dbms/src/Columns/ColumnArray.cpp @@ -232,8 +232,10 @@ void ColumnArray::countSerializeByteSize(PaddedPODArray & byte_size) con } template -void ColumnArray::countSerializeByteSizeImpl(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator, const NullMap * nullmap) - const +void ColumnArray::countSerializeByteSizeImpl( + PaddedPODArray & byte_size, + const TiDB::TiDBCollatorPtr & collator, + const NullMap * nullmap) const { RUNTIME_CHECK_MSG(byte_size.size() == size(), "size of byte_size({}) != column size({})", byte_size.size(), size()); diff --git a/dbms/src/Columns/ColumnArray.h b/dbms/src/Columns/ColumnArray.h index 64b890a354e..48c3c044ea6 100644 --- a/dbms/src/Columns/ColumnArray.h +++ b/dbms/src/Columns/ColumnArray.h @@ -45,7 +45,10 @@ class ColumnArray final : public COWPtrHelper ColumnArray(const ColumnArray &) = default; template - void countSerializeByteSizeImpl(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator, const NullMap * nullmap) const; + void countSerializeByteSizeImpl( + PaddedPODArray & byte_size, + const TiDB::TiDBCollatorPtr & collator, + const NullMap * nullmap) const; template void serializeToPosImpl( @@ -97,8 +100,10 @@ class ColumnArray final : public COWPtrHelper String &) const override; const char * deserializeAndInsertFromArena(const char * pos, const TiDB::TiDBCollatorPtr &) override; - void countSerializeByteSizeForCmp(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator, const NullMap * nullmap) - const override; + void countSerializeByteSizeForCmp( + PaddedPODArray & byte_size, + const TiDB::TiDBCollatorPtr & collator, + const NullMap * nullmap) const override; void countSerializeByteSize(PaddedPODArray & byte_size) const override; void countSerializeByteSizeForCmpColumnArray( diff --git a/dbms/src/Columns/ColumnDecimal.cpp b/dbms/src/Columns/ColumnDecimal.cpp index d2a3b2dc735..cd1ea095b66 100644 --- a/dbms/src/Columns/ColumnDecimal.cpp +++ b/dbms/src/Columns/ColumnDecimal.cpp @@ -195,7 +195,11 @@ void ColumnDecimal::countSerializeByteSizeForColumnArrayImpl( template template -void ColumnDecimal::serializeToPosImpl(PaddedPODArray & pos, size_t start, size_t length, const NullMap * nullmap) const +void ColumnDecimal::serializeToPosImpl( + PaddedPODArray & pos, + size_t start, + size_t length, + const NullMap * nullmap) const { RUNTIME_CHECK_MSG(length <= pos.size(), "length({}) > size of pos({})", length, pos.size()); RUNTIME_CHECK_MSG(start + length <= size(), "start({}) + length({}) > size of column({})", start, length, size()); diff --git a/dbms/src/Columns/ColumnDecimal.h b/dbms/src/Columns/ColumnDecimal.h index 1f02419df8b..91ea2ede0db 100644 --- a/dbms/src/Columns/ColumnDecimal.h +++ b/dbms/src/Columns/ColumnDecimal.h @@ -174,7 +174,10 @@ class ColumnDecimal final : public COWPtrHelper & byte_size, const TiDB::TiDBCollatorPtr &, const NullMap *) const override + void countSerializeByteSizeForCmp( + PaddedPODArray & byte_size, + const TiDB::TiDBCollatorPtr &, + const NullMap *) const override { countSerializeByteSizeImpl(byte_size, nullptr); } @@ -207,16 +210,32 @@ class ColumnDecimal final : public COWPtrHelper(pos, start, length, nullmap); + serializeToPosImpl( + pos, + start, + length, + nullmap); else - serializeToPosImpl(pos, start, length, nullptr); + serializeToPosImpl( + pos, + start, + length, + nullptr); } void serializeToPos(PaddedPODArray & pos, size_t start, size_t length, bool has_null) const override { if (has_null) - serializeToPosImpl(pos, start, length, nullptr); + serializeToPosImpl( + pos, + start, + length, + nullptr); else - serializeToPosImpl(pos, start, length, nullptr); + serializeToPosImpl( + pos, + start, + length, + nullptr); } void serializeToPosForCmpColumnArray( diff --git a/dbms/src/Columns/ColumnFixedString.cpp b/dbms/src/Columns/ColumnFixedString.cpp index dde147faef5..3176fab6783 100644 --- a/dbms/src/Columns/ColumnFixedString.cpp +++ b/dbms/src/Columns/ColumnFixedString.cpp @@ -194,7 +194,11 @@ void ColumnFixedString::serializeToPos(PaddedPODArray & pos, size_t star } template -void ColumnFixedString::serializeToPosImpl(PaddedPODArray & pos, size_t start, size_t length, const NullMap * nullmap) const +void ColumnFixedString::serializeToPosImpl( + PaddedPODArray & pos, + size_t start, + size_t length, + const NullMap * nullmap) const { RUNTIME_CHECK_MSG(length <= pos.size(), "length({}) > size of pos({})", length, pos.size()); RUNTIME_CHECK_MSG(start + length <= size(), "start({}) + length({}) > size of column({})", start, length, size()); diff --git a/dbms/src/Columns/ColumnFixedString.h b/dbms/src/Columns/ColumnFixedString.h index 0ac2ff89aa4..1cd2713542b 100644 --- a/dbms/src/Columns/ColumnFixedString.h +++ b/dbms/src/Columns/ColumnFixedString.h @@ -59,9 +59,9 @@ class ColumnFixedString final : public COWPtrHelper template void countSerializeByteSizeForColumnArrayImpl( - PaddedPODArray & byte_size, - const IColumn::Offsets & array_offsets, - const NullMap * nullmap) const; + PaddedPODArray & byte_size, + const IColumn::Offsets & array_offsets, + const NullMap * nullmap) const; template void serializeToPosImpl(PaddedPODArray & pos, size_t start, size_t length, const NullMap * nullmap) const; @@ -124,8 +124,10 @@ class ColumnFixedString final : public COWPtrHelper const char * deserializeAndInsertFromArena(const char * pos, const TiDB::TiDBCollatorPtr &) override; - void countSerializeByteSizeForCmp(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator, const NullMap * nullmap) - const override + void countSerializeByteSizeForCmp( + PaddedPODArray & byte_size, + const TiDB::TiDBCollatorPtr & collator, + const NullMap * nullmap) const override { // collator->sortKey() will change the string length, which may exceeds n. RUNTIME_CHECK_MSG( diff --git a/dbms/src/Columns/ColumnNullable.cpp b/dbms/src/Columns/ColumnNullable.cpp index fa175995cd4..40a8c1a339b 100644 --- a/dbms/src/Columns/ColumnNullable.cpp +++ b/dbms/src/Columns/ColumnNullable.cpp @@ -346,8 +346,14 @@ void ColumnNullable::serializeToPosForCmpColumnArray( assert(!nullmap); getNullMapColumn() .serializeToPosForCmpColumnArray(pos, start, length, nullptr, array_offsets, collator, sort_key_container); - getNestedColumn() - .serializeToPosForCmpColumnArray(pos, start, length, &getNullMapData(), array_offsets, collator, sort_key_container); + getNestedColumn().serializeToPosForCmpColumnArray( + pos, + start, + length, + &getNullMapData(), + array_offsets, + collator, + sort_key_container); } void ColumnNullable::serializeToPosForColumnArray( PaddedPODArray & pos, diff --git a/dbms/src/Columns/ColumnNullable.h b/dbms/src/Columns/ColumnNullable.h index 6f79c5b7b81..ba747fe8a8e 100644 --- a/dbms/src/Columns/ColumnNullable.h +++ b/dbms/src/Columns/ColumnNullable.h @@ -77,8 +77,10 @@ class ColumnNullable final : public COWPtrHelper String &) const override; const char * deserializeAndInsertFromArena(const char * pos, const TiDB::TiDBCollatorPtr &) override; - void countSerializeByteSizeForCmp(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator, const NullMap * nullmap) - const override; + void countSerializeByteSizeForCmp( + PaddedPODArray & byte_size, + const TiDB::TiDBCollatorPtr & collator, + const NullMap * nullmap) const override; void countSerializeByteSize(PaddedPODArray & byte_size) const override; void countSerializeByteSizeForCmpColumnArray( diff --git a/dbms/src/Columns/ColumnString.cpp b/dbms/src/Columns/ColumnString.cpp index 50a65d26836..0789a6ac1fb 100644 --- a/dbms/src/Columns/ColumnString.cpp +++ b/dbms/src/Columns/ColumnString.cpp @@ -490,19 +490,31 @@ void ColumnString::countSerializeByteSizeForCmp( if likely (collator != nullptr) { if (collator->maxBytesForOneChar() > 1) - countSerializeByteSizeNullMap(byte_size, collator, nullmap); + countSerializeByteSizeNullMap( + byte_size, + collator, + nullmap); else - countSerializeByteSizeNullMap(byte_size, collator, nullmap); + countSerializeByteSizeNullMap( + byte_size, + collator, + nullmap); } else { - countSerializeByteSizeNullMap(byte_size, nullptr, nullmap); + countSerializeByteSizeNullMap( + byte_size, + nullptr, + nullmap); } } void ColumnString::countSerializeByteSize(PaddedPODArray & byte_size) const { - countSerializeByteSizeNullMap(byte_size, nullptr, nullptr); + countSerializeByteSizeNullMap( + byte_size, + nullptr, + nullptr); } template @@ -512,9 +524,15 @@ ALWAYS_INLINE inline void ColumnString::countSerializeByteSizeNullMap( const NullMap * nullmap) const { if (nullmap != nullptr) - countSerializeByteSizeImpl(byte_size, collator, nullmap); + countSerializeByteSizeImpl( + byte_size, + collator, + nullmap); else - countSerializeByteSizeImpl(byte_size, collator, nullptr); + countSerializeByteSizeImpl( + byte_size, + collator, + nullptr); } template @@ -626,9 +644,17 @@ void ColumnString::countSerializeByteSizeForColumnArrayNullMap( const NullMap * nullmap) const { if (nullmap != nullptr) - countSerializeByteSizeForColumnArrayImpl(byte_size, array_offsets, collator, nullmap); + countSerializeByteSizeForColumnArrayImpl( + byte_size, + array_offsets, + collator, + nullmap); else - countSerializeByteSizeForColumnArrayImpl(byte_size, array_offsets, collator, nullptr); + countSerializeByteSizeForColumnArrayImpl( + byte_size, + array_offsets, + collator, + nullptr); } template @@ -730,7 +756,13 @@ void ColumnString::serializeToPosForCmp( sort_key_container, nullmap); else - serializeToPosImplType(pos, start, length, nullptr, nullptr, nullmap); + serializeToPosImplType( + pos, + start, + length, + nullptr, + nullptr, + nullmap); } else { @@ -743,16 +775,34 @@ void ColumnString::serializeToPosForCmp( sort_key_container, nullptr); else - serializeToPosImplType(pos, start, length, nullptr, nullptr, nullptr); + serializeToPosImplType( + pos, + start, + length, + nullptr, + nullptr, + nullptr); } } void ColumnString::serializeToPos(PaddedPODArray & pos, size_t start, size_t length, bool has_null) const { if (has_null) - serializeToPosImplType(pos, start, length, nullptr, nullptr, nullptr); + serializeToPosImplType( + pos, + start, + length, + nullptr, + nullptr, + nullptr); else - serializeToPosImplType(pos, start, length, nullptr, nullptr, nullptr); + serializeToPosImplType( + pos, + start, + length, + nullptr, + nullptr, + nullptr); } template @@ -768,11 +818,17 @@ void ColumnString::serializeToPosImplType( { RUNTIME_CHECK(collator && sort_key_container); -#define M(VAR_PREFIX, COLLATOR_NAME, IMPL_TYPE, COLLATOR_ID) \ - case (COLLATOR_ID): \ - { \ - serializeToPosImpl(pos, start, length, collator, sort_key_container, nullmap); \ - break; \ +#define M(VAR_PREFIX, COLLATOR_NAME, IMPL_TYPE, COLLATOR_ID) \ + case (COLLATOR_ID): \ + { \ + serializeToPosImpl( \ + pos, \ + start, \ + length, \ + collator, \ + sort_key_container, \ + nullmap); \ + break; \ } switch (collator->getCollatorId()) @@ -908,14 +964,10 @@ void ColumnString::serializeToPosForCmpColumnArray( sort_key_container, nullptr); else - serializeToPosForColumnArrayImplType( - pos, - start, - length, - array_offsets, - nullptr, - nullptr, - nullptr); + serializeToPosForColumnArrayImplType< + /*has_null=*/false, + /*compare_semantics=*/false, + /*has_nullmap=*/false>(pos, start, length, array_offsets, nullptr, nullptr, nullptr); } } @@ -960,18 +1012,18 @@ void ColumnString::serializeToPosForColumnArrayImplType( { RUNTIME_CHECK(collator && sort_key_container); -#define M(VAR_PREFIX, COLLATOR_NAME, IMPL_TYPE, COLLATOR_ID) \ - case (COLLATOR_ID): \ - { \ +#define M(VAR_PREFIX, COLLATOR_NAME, IMPL_TYPE, COLLATOR_ID) \ + case (COLLATOR_ID): \ + { \ serializeToPosForColumnArrayImpl( \ - pos, \ - start, \ - length, \ - array_offsets, \ - collator, \ - sort_key_container, \ - nullmap); \ - break; \ + pos, \ + start, \ + length, \ + array_offsets, \ + collator, \ + sort_key_container, \ + nullmap); \ + break; \ } switch (collator->getCollatorId()) diff --git a/dbms/src/Columns/ColumnString.h b/dbms/src/Columns/ColumnString.h index e187bc30a10..8d9ee722c14 100644 --- a/dbms/src/Columns/ColumnString.h +++ b/dbms/src/Columns/ColumnString.h @@ -317,8 +317,10 @@ class ColumnString final : public COWPtrHelper return pos + string_size; } - void countSerializeByteSizeForCmp(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator, const NullMap * nullmap) - const override; + void countSerializeByteSizeForCmp( + PaddedPODArray & byte_size, + const TiDB::TiDBCollatorPtr & collator, + const NullMap * nullmap) const override; void countSerializeByteSize(PaddedPODArray & byte_size) const override; void countSerializeByteSizeForCmpColumnArray( diff --git a/dbms/src/Columns/ColumnTuple.h b/dbms/src/Columns/ColumnTuple.h index 6629f3390db..651a7bfca12 100644 --- a/dbms/src/Columns/ColumnTuple.h +++ b/dbms/src/Columns/ColumnTuple.h @@ -95,8 +95,10 @@ class ColumnTuple final : public COWPtrHelper String &) const override; const char * deserializeAndInsertFromArena(const char * pos, const TiDB::TiDBCollatorPtr &) override; - void countSerializeByteSizeForCmp(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator, const NullMap * nullmap) - const override + void countSerializeByteSizeForCmp( + PaddedPODArray & byte_size, + const TiDB::TiDBCollatorPtr & collator, + const NullMap * nullmap) const override { for (const auto & column : columns) column->countSerializeByteSizeForCmp(byte_size, collator, nullmap); diff --git a/dbms/src/Columns/ColumnVector.cpp b/dbms/src/Columns/ColumnVector.cpp index 51dc2869183..48db5882a7a 100644 --- a/dbms/src/Columns/ColumnVector.cpp +++ b/dbms/src/Columns/ColumnVector.cpp @@ -95,7 +95,11 @@ void ColumnVector::serializeToPos(PaddedPODArray & pos, size_t start, template template -void ColumnVector::serializeToPosImpl(PaddedPODArray & pos, size_t start, size_t length, const NullMap * nullmap) const +void ColumnVector::serializeToPosImpl( + PaddedPODArray & pos, + size_t start, + size_t length, + const NullMap * nullmap) const { RUNTIME_CHECK_MSG(length <= pos.size(), "length({}) > size of pos({})", length, pos.size()); RUNTIME_CHECK_MSG(start + length <= size(), "start({}) + length({}) > size of column({})", start, length, size()); diff --git a/dbms/src/Columns/ColumnVector.h b/dbms/src/Columns/ColumnVector.h index 212ca33f3b5..344a3d10fbe 100644 --- a/dbms/src/Columns/ColumnVector.h +++ b/dbms/src/Columns/ColumnVector.h @@ -328,7 +328,10 @@ class ColumnVector final : public COWPtrHelper & byte_size, const TiDB::TiDBCollatorPtr &, const NullMap *) const override + void countSerializeByteSizeForCmp( + PaddedPODArray & byte_size, + const TiDB::TiDBCollatorPtr &, + const NullMap *) const override { countSerializeByteSize(byte_size); } From 370bf0ae1346af260b4c7dc061e94ae0d3d9b572 Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Fri, 24 Jan 2025 15:56:55 +0800 Subject: [PATCH 04/12] refine Signed-off-by: guo-shaoge --- dbms/src/Columns/ColumnArray.cpp | 3 +- dbms/src/Columns/ColumnDecimal.cpp | 22 ++- dbms/src/Columns/ColumnFixedString.cpp | 8 +- dbms/src/Columns/ColumnNullable.cpp | 111 +++++++++-- dbms/src/Columns/ColumnString.cpp | 23 +-- dbms/src/Columns/ColumnVector.cpp | 19 +- dbms/src/Columns/IColumn.h | 16 ++ .../gtest_column_serialize_deserialize.cpp | 186 ++++++++++-------- 8 files changed, 255 insertions(+), 133 deletions(-) diff --git a/dbms/src/Columns/ColumnArray.cpp b/dbms/src/Columns/ColumnArray.cpp index 8223cc5ced1..d6d2657c402 100644 --- a/dbms/src/Columns/ColumnArray.cpp +++ b/dbms/src/Columns/ColumnArray.cpp @@ -238,6 +238,7 @@ void ColumnArray::countSerializeByteSizeImpl( const NullMap * nullmap) const { RUNTIME_CHECK_MSG(byte_size.size() == size(), "size of byte_size({}) != column size({})", byte_size.size(), size()); + assert(!nullmap || (nullmap->size() == size())); if unlikely (!getOffsets().empty() && getOffsets().back() > UINT32_MAX) { @@ -309,7 +310,7 @@ void ColumnArray::serializeToPosImpl( UInt32 len = sizeAt(start + i); if constexpr (has_nullmap) { - if ((*nullmap)[i] != 0) + if (DB::isNullAt(*nullmap, i)) len = 0; } tiflash_compiler_builtin_memcpy(pos[i], &len, sizeof(UInt32)); diff --git a/dbms/src/Columns/ColumnDecimal.cpp b/dbms/src/Columns/ColumnDecimal.cpp index cd1ea095b66..867b86e1b31 100644 --- a/dbms/src/Columns/ColumnDecimal.cpp +++ b/dbms/src/Columns/ColumnDecimal.cpp @@ -160,7 +160,6 @@ void ColumnDecimal::countSerializeByteSizeImpl(PaddedPODArray & byte_ } } -// TODO add unit test template template void ColumnDecimal::countSerializeByteSizeForColumnArrayImpl( @@ -220,7 +219,7 @@ void ColumnDecimal::serializeToPosImpl( { if constexpr (has_nullmap) { - if ((*nullmap)[i] != 0) + if (DB::isNullAt(*nullmap, i)) { pos[i] = serializeDecimal256Helper(pos[i], def_val); continue; @@ -232,7 +231,7 @@ void ColumnDecimal::serializeToPosImpl( { if constexpr (has_nullmap) { - if ((*nullmap)[i] != 0) + if (DB::isNullAt(*nullmap, i)) { tiflash_compiler_builtin_memcpy(pos[i], &def_val, sizeof(T)); pos[i] += sizeof(T); @@ -283,7 +282,7 @@ void ColumnDecimal::serializeToPosForColumnArrayImpl( { if constexpr (has_nullmap) { - if ((*nullmap)[i] != 0) + if (DB::isNullAt(*nullmap, i)) continue; } for (size_t j = 0; j < len; ++j) @@ -293,10 +292,21 @@ void ColumnDecimal::serializeToPosForColumnArrayImpl( { if constexpr (has_nullmap) { - if ((*nullmap)[i] != 0) + if (DB::isNullAt(*nullmap, i)) continue; } - inline_memcpy(pos[i], &data[array_offsets[start + i - 1]], len * sizeof(T)); + if (len <= 4) + { + for (size_t j = 0; j < len; ++j) + tiflash_compiler_builtin_memcpy( + pos[i] + j * sizeof(T), + &data[array_offsets[start + i - 1] + j], + sizeof(T)); + } + else + { + inline_memcpy(pos[i], &data[array_offsets[start + i - 1]], len * sizeof(T)); + } pos[i] += len * sizeof(T); } } diff --git a/dbms/src/Columns/ColumnFixedString.cpp b/dbms/src/Columns/ColumnFixedString.cpp index 3176fab6783..615d5b68b55 100644 --- a/dbms/src/Columns/ColumnFixedString.cpp +++ b/dbms/src/Columns/ColumnFixedString.cpp @@ -146,7 +146,7 @@ void ColumnFixedString::countSerializeByteSizeImpl(PaddedPODArray & byte { if constexpr (has_nullmap) { - if ((*nullmap)[i] != 0) + if (DB::isNullAt(*nullmap, i)) { byte_size[i] += 1; continue; @@ -175,7 +175,7 @@ void ColumnFixedString::countSerializeByteSizeForColumnArrayImpl( { if constexpr (has_nullmap) { - if ((*nullmap)[i] != 0) + if (DB::isNullAt(*nullmap, i)) { byte_size[i] += array_offsets[i] - array_offsets[i - 1]; continue; @@ -215,7 +215,7 @@ void ColumnFixedString::serializeToPosImpl( } if constexpr (has_nullmap) { - if ((*nullmap)[i] != 0) + if (DB::isNullAt(*nullmap, i)) { for (size_t j = 0; j < n; ++j) { @@ -277,7 +277,7 @@ void ColumnFixedString::serializeToPosForColumnArrayImpl( size_t len = array_offsets[start + i] - array_offsets[start + i - 1]; if constexpr (has_nullmap) { - if ((*nullmap)[i] != 0) + if (DB::isNullAt(*nullmap, i)) continue; } diff --git a/dbms/src/Columns/ColumnNullable.cpp b/dbms/src/Columns/ColumnNullable.cpp index 40a8c1a339b..b5e098cd38e 100644 --- a/dbms/src/Columns/ColumnNullable.cpp +++ b/dbms/src/Columns/ColumnNullable.cpp @@ -287,9 +287,18 @@ void ColumnNullable::countSerializeByteSizeForCmp( const TiDB::TiDBCollatorPtr & collator, const NullMap * nullmap) const { - assert(!nullmap); - getNullMapColumn().countSerializeByteSizeForCmp(byte_size, collator, nullptr); - getNestedColumn().countSerializeByteSizeForCmp(byte_size, collator, &getNullMapData()); + if unlikely (nullmap != nullptr) + { + auto new_nullmap_col = ColumnUInt8::create(); + DB::mergeNullMap(*nullmap, getNullMapData(), new_nullmap_col->getData()); + new_nullmap_col->countSerializeByteSizeForCmp(byte_size, collator, nullptr); + getNestedColumn().countSerializeByteSizeForCmp(byte_size, collator, &(new_nullmap_col->getData())); + } + else + { + getNullMapColumn().countSerializeByteSizeForCmp(byte_size, collator, nullptr); + getNestedColumn().countSerializeByteSizeForCmp(byte_size, collator, &getNullMapData()); + } } void ColumnNullable::countSerializeByteSize(PaddedPODArray & byte_size) const { @@ -303,9 +312,32 @@ void ColumnNullable::countSerializeByteSizeForCmpColumnArray( const TiDB::TiDBCollatorPtr & collator, const NullMap * nullmap) const { - assert(!nullmap); - getNullMapColumn().countSerializeByteSizeForCmpColumnArray(byte_size, array_offsets, collator, nullptr); - getNestedColumn().countSerializeByteSizeForCmpColumnArray(byte_size, array_offsets, collator, &getNullMapData()); + const auto & nested_nullmap = getNullMapData(); + assert(nested_nullmap.size() == array_offsets.back()); + if unlikely (nullmap != nullptr) + { + assert(nullmap->size() == array_offsets.size()); + auto new_nullmap_col = ColumnUInt8::create(); + auto & new_nullmap_data = new_nullmap_col->getData(); + new_nullmap_data.assign(nested_nullmap); + for (size_t i = 0; i < array_offsets.size(); ++i) + { + if (DB::isNullAt(*nullmap, i)) + { + const auto row_size = array_offsets[i] - array_offsets[i - 1]; + const auto row_offset = array_offsets[i - 1]; + for (size_t j = row_offset; j < row_offset + row_size; ++j) + setNullAt(new_nullmap_data, j); + } + } + new_nullmap_col->countSerializeByteSizeForCmpColumnArray(byte_size, array_offsets, collator, nullptr); + getNestedColumn().countSerializeByteSizeForCmpColumnArray(byte_size, array_offsets, collator, &new_nullmap_data); + } + else + { + getNullMapColumn().countSerializeByteSizeForCmpColumnArray(byte_size, array_offsets, collator, nullptr); + getNestedColumn().countSerializeByteSizeForCmpColumnArray(byte_size, array_offsets, collator, &nested_nullmap); + } } void ColumnNullable::countSerializeByteSizeForColumnArray( PaddedPODArray & byte_size, @@ -323,9 +355,18 @@ void ColumnNullable::serializeToPosForCmp( const TiDB::TiDBCollatorPtr & collator, String * sort_key_container) const { - assert(!nullmap); - getNullMapColumn().serializeToPosForCmp(pos, start, length, nullptr, collator, sort_key_container); - getNestedColumn().serializeToPosForCmp(pos, start, length, &getNullMapData(), collator, sort_key_container); + if unlikely (nullmap != nullptr) + { + auto new_nullmap_col = ColumnUInt8::create(); + DB::mergeNullMap(*nullmap, getNullMapData(), new_nullmap_col->getData()); + getNullMapColumn().serializeToPosForCmp(pos, start, length, nullptr, collator, sort_key_container); + getNestedColumn().serializeToPosForCmp(pos, start, length, &(new_nullmap_col->getData()), collator, sort_key_container); + } + else + { + getNullMapColumn().serializeToPosForCmp(pos, start, length, nullptr, collator, sort_key_container); + getNestedColumn().serializeToPosForCmp(pos, start, length, &getNullMapData(), collator, sort_key_container); + } } void ColumnNullable::serializeToPos(PaddedPODArray & pos, size_t start, size_t length, bool has_null) const @@ -343,17 +384,47 @@ void ColumnNullable::serializeToPosForCmpColumnArray( const TiDB::TiDBCollatorPtr & collator, String * sort_key_container) const { - assert(!nullmap); - getNullMapColumn() - .serializeToPosForCmpColumnArray(pos, start, length, nullptr, array_offsets, collator, sort_key_container); - getNestedColumn().serializeToPosForCmpColumnArray( - pos, - start, - length, - &getNullMapData(), - array_offsets, - collator, - sort_key_container); + const auto & nested_nullmap = getNullMapData(); + assert(nested_nullmap.size() == array_offsets.back()); + if unlikely (nullmap != nullptr) + { + assert(nullmap->size() == array_offsets.size()); + auto new_nullmap_col = ColumnUInt8::create(); + auto & new_nullmap_data = new_nullmap_col->getData(); + new_nullmap_data.assign(nested_nullmap); + for (size_t i = start; i < start + length; ++i) + { + if (DB::isNullAt(*nullmap, i)) + { + const auto row_size = array_offsets[i] - array_offsets[i - 1]; + const auto row_offset = array_offsets[i - 1]; + for (size_t j = row_offset; j < row_offset + row_size; ++j) + setNullAt(new_nullmap_data, j); + } + } + new_nullmap_col->serializeToPosForCmpColumnArray(pos, start, length, nullptr, array_offsets, collator, sort_key_container); + getNestedColumn().serializeToPosForCmpColumnArray( + pos, + start, + length, + &new_nullmap_data, + array_offsets, + collator, + sort_key_container); + } + else + { + getNullMapColumn() + .serializeToPosForCmpColumnArray(pos, start, length, nullptr, array_offsets, collator, sort_key_container); + getNestedColumn().serializeToPosForCmpColumnArray( + pos, + start, + length, + &getNullMapData(), + array_offsets, + collator, + sort_key_container); + } } void ColumnNullable::serializeToPosForColumnArray( PaddedPODArray & pos, diff --git a/dbms/src/Columns/ColumnString.cpp b/dbms/src/Columns/ColumnString.cpp index 0789a6ac1fb..8d54b16ae65 100644 --- a/dbms/src/Columns/ColumnString.cpp +++ b/dbms/src/Columns/ColumnString.cpp @@ -566,7 +566,7 @@ void ColumnString::countSerializeByteSizeImpl( assert(sizeAt(i) > 0); if constexpr (has_nullmap) { - if ((*nullmap)[i] != 0) + if (DB::isNullAt(*nullmap, i)) { byte_size[i] += sizeof(UInt32) + 1; continue; @@ -699,11 +699,8 @@ void ColumnString::countSerializeByteSizeForColumnArrayImpl( assert(offsetAt(array_offsets[i]) - offsetAt(array_offsets[i - 1]) >= ele_count); if constexpr (has_nullmap) { - if ((*nullmap)[i] != 0) - { - byte_size[i] += (sizeof(UInt32) + 1) * ele_count; + if (DB::isNullAt(*nullmap, i)) continue; - } } if constexpr (count_code_points) @@ -721,7 +718,6 @@ void ColumnString::countSerializeByteSizeForColumnArrayImpl( } else { - // NOTE: didn't check nullmap because we have to iterate through all rows, it's slow. byte_size[i] += sizeof(UInt32) * ele_count + offsetAt(array_offsets[i]) - offsetAt(array_offsets[i - 1]); } @@ -866,7 +862,6 @@ void ColumnString::serializeToPosImpl( RUNTIME_CHECK_MSG(length <= pos.size(), "length({}) > size of pos({})", length, pos.size()); RUNTIME_CHECK_MSG(start + length <= size(), "start({}) + length({}) > size of column({})", start, length, size()); - static_assert(!(has_null && has_nullmap)); assert(!has_nullmap || (nullmap && nullmap->size() == size())); /// To avoid virtual function call of sortKey(). @@ -876,14 +871,16 @@ void ColumnString::serializeToPosImpl( { if constexpr (compare_semantics) { + static_assert(!has_null); UInt32 str_size = sizeAt(start + i); const void * src = &chars[offsetAt(start + i)]; if constexpr (has_nullmap) { - if ((*nullmap)[i] != 0) + if (DB::isNullAt(*nullmap, i)) { UInt32 str_size = 1; tiflash_compiler_builtin_memcpy(pos[i], &str_size, sizeof(UInt32)); + pos[i] += sizeof(UInt32); *(pos[i]) = '\0'; pos[i] += 1; continue; @@ -903,7 +900,7 @@ void ColumnString::serializeToPosImpl( } else { - assert(!has_nullmap); + static_assert(!has_nullmap); if constexpr (has_null) { if (pos[i] == nullptr) @@ -913,7 +910,6 @@ void ColumnString::serializeToPosImpl( UInt32 str_size = sizeAt(start + i); const void * src = &chars[offsetAt(start + i)]; - assert(!nullmap); tiflash_compiler_builtin_memcpy(pos[i], &str_size, sizeof(UInt32)); pos[i] += sizeof(UInt32); inline_memcpy(pos[i], src, str_size); @@ -1038,7 +1034,6 @@ void ColumnString::serializeToPosForColumnArrayImplType( } else { - assert(!nullmap); serializeToPosForColumnArrayImpl( pos, start, @@ -1073,19 +1068,19 @@ void ColumnString::serializeToPosForColumnArrayImpl( array_offsets.back(), size()); - static_assert(!(has_null && has_nullmap)); assert(!has_nullmap || (nullmap && nullmap->size() == array_offsets.size())); /// countSerializeByteSizeForCmpColumnArray has already checked that the size of one element is not greater than UINT32_MAX if constexpr (compare_semantics) { + static_assert(!has_null); /// To avoid virtual function call of sortKey(). const auto * derived_collator = static_cast(collator); for (size_t i = 0; i < length; ++i) { if constexpr (has_nullmap) { - if ((*nullmap)[i] != 0) + if (DB::isNullAt(*nullmap, i)) continue; } @@ -1109,7 +1104,7 @@ void ColumnString::serializeToPosForColumnArrayImpl( } else { - assert(!has_nullmap); + static_assert(!has_nullmap); for (size_t i = 0; i < length; ++i) { if constexpr (has_null) diff --git a/dbms/src/Columns/ColumnVector.cpp b/dbms/src/Columns/ColumnVector.cpp index 48db5882a7a..7a8a159e217 100644 --- a/dbms/src/Columns/ColumnVector.cpp +++ b/dbms/src/Columns/ColumnVector.cpp @@ -117,7 +117,7 @@ void ColumnVector::serializeToPosImpl( } if constexpr (has_nullmap) { - if ((*nullmap)[start + i] != 0) + if (DB::isNullAt(*nullmap, start + i)) { tiflash_compiler_builtin_memcpy(pos[i], &def_val, sizeof(T)); pos[i] += sizeof(T); @@ -175,13 +175,24 @@ void ColumnVector::serializeToPosForColumnArrayImpl( if (pos[i] == nullptr) continue; } - size_t len = array_offsets[start + i] - array_offsets[start + i - 1]; if constexpr (has_nullmap) { - if ((*nullmap)[i] != 0) + if (DB::isNullAt(*nullmap, i)) continue; } - inline_memcpy(pos[i], &data[array_offsets[start + i - 1]], len * sizeof(T)); + size_t len = array_offsets[start + i] - array_offsets[start + i - 1]; + if (len <= 4) + { + for (size_t j = 0; j < len; ++j) + tiflash_compiler_builtin_memcpy( + pos[i] + j * sizeof(T), + &data[array_offsets[start + i - 1] + j], + sizeof(T)); + } + else + { + inline_memcpy(pos[i], &data[array_offsets[start + i - 1]], len * sizeof(T)); + } pos[i] += len * sizeof(T); } } diff --git a/dbms/src/Columns/IColumn.h b/dbms/src/Columns/IColumn.h index 0011d8454e0..e668841edf2 100644 --- a/dbms/src/Columns/IColumn.h +++ b/dbms/src/Columns/IColumn.h @@ -41,6 +41,22 @@ class ColumnGathererStream; using NullMap = PaddedPODArray; using ConstNullMapPtr = const NullMap *; +inline bool isNullAt(const NullMap & nullmap, size_t n) +{ + return nullmap[n] != 0; +} +inline void mergeNullMap(const NullMap & m1, const NullMap & m2, NullMap & m3) +{ + RUNTIME_CHECK(m1.size() == m2.size()); + m3.resize_fill_zero(m1.size()); + for (size_t i = 0; i < m1.size(); ++i) + m3[i] = (DB::isNullAt(m1, i) || DB::isNullAt(m2, i)); +} +inline void setNullAt(NullMap & nullmap, size_t n) +{ + nullmap[n] = 1; +} + /// Declares interface to store columns in memory. class IColumn : public COWPtr { diff --git a/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp b/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp index 035da450b7e..d2b0e629aaa 100644 --- a/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp +++ b/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp @@ -168,25 +168,26 @@ class TestColumnSerializeDeserialize : public ::testing::Test const TiDB::TiDBCollatorPtr & collator = nullptr, String * sort_key_container = nullptr) { - doTestSerializeAndDeserialize(column_ptr, false, compare_semantics, collator, sort_key_container); - doTestSerializeAndDeserialize2(column_ptr, false, compare_semantics, collator, sort_key_container); - doTestSerializeAndDeserialize(column_ptr, true, compare_semantics, collator, sort_key_container); - doTestSerializeAndDeserialize2(column_ptr, true, compare_semantics, collator, sort_key_container); + if (compare_semantics) + { + doTestSerializeAndDeserializeForCmp(column_ptr, compare_semantics, collator, sort_key_container); + } + else + { + doTestSerializeAndDeserialize(column_ptr, false); + doTestSerializeAndDeserialize2(column_ptr, false); + doTestSerializeAndDeserialize(column_ptr, true); + doTestSerializeAndDeserialize2(column_ptr, true); + } } static void doTestSerializeAndDeserialize( const ColumnPtr & column_ptr, - bool use_nt_align_buffer, - bool compare_semantics = false, - const TiDB::TiDBCollatorPtr & collator = nullptr, - String * sort_key_container = nullptr) + bool use_nt_align_buffer) { PaddedPODArray byte_size; byte_size.resize_fill_zero(column_ptr->size()); - if (!compare_semantics) - column_ptr->countSerializeByteSize(byte_size); - else - column_ptr->countSerializeByteSizeForCmp(byte_size, collator, nullptr); + column_ptr->countSerializeByteSize(byte_size); size_t total_size = 0; for (const auto size : byte_size) total_size += size; @@ -201,18 +202,12 @@ class TestColumnSerializeDeserialize : public ::testing::Test PaddedPODArray ori_pos; for (auto * ptr : pos) ori_pos.push_back(ptr); - if (!compare_semantics) - column_ptr->serializeToPos(pos, 0, byte_size.size() / 2, false); - else - column_ptr->serializeToPosForCmp(pos, 0, byte_size.size() / 2, nullptr, collator, sort_key_container); + column_ptr->serializeToPos(pos, 0, byte_size.size() / 2, false); auto new_col_ptr = column_ptr->cloneEmpty(); if (use_nt_align_buffer) new_col_ptr->reserveAlign(byte_size.size(), FULL_VECTOR_SIZE_AVX2); - if (!compare_semantics) - new_col_ptr->deserializeAndInsertFromPos(ori_pos, use_nt_align_buffer); - else - new_col_ptr->deserializeForCmpAndInsertFromPos(ori_pos, use_nt_align_buffer); + new_col_ptr->deserializeAndInsertFromPos(ori_pos, use_nt_align_buffer); current_size = 0; pos.clear(); @@ -225,23 +220,11 @@ class TestColumnSerializeDeserialize : public ::testing::Test pos.push_back(nullptr); for (auto * ptr : pos) ori_pos.push_back(ptr); - if (!compare_semantics) - column_ptr->serializeToPos(pos, byte_size.size() / 2, byte_size.size() - byte_size.size() / 2, true); - else - column_ptr->serializeToPosForCmp( - pos, - byte_size.size() / 2, - byte_size.size() - byte_size.size() / 2, - nullptr, - collator, - sort_key_container); + column_ptr->serializeToPos(pos, byte_size.size() / 2, byte_size.size() - byte_size.size() / 2, true); pos.resize(pos.size() - 1); ori_pos.resize(ori_pos.size() - 1); - if (!compare_semantics) - new_col_ptr->deserializeAndInsertFromPos(ori_pos, use_nt_align_buffer); - else - new_col_ptr->deserializeForCmpAndInsertFromPos(ori_pos, use_nt_align_buffer); + new_col_ptr->deserializeAndInsertFromPos(ori_pos, use_nt_align_buffer); current_size = 0; pos.clear(); @@ -253,15 +236,9 @@ class TestColumnSerializeDeserialize : public ::testing::Test } for (auto * ptr : pos) ori_pos.push_back(ptr); - if (!compare_semantics) - column_ptr->serializeToPos(pos, 0, byte_size.size(), true); - else - column_ptr->serializeToPosForCmp(pos, 0, byte_size.size(), nullptr, collator, sort_key_container); + column_ptr->serializeToPos(pos, 0, byte_size.size(), true); - if (!compare_semantics) - new_col_ptr->deserializeAndInsertFromPos(ori_pos, use_nt_align_buffer); - else - new_col_ptr->deserializeForCmpAndInsertFromPos(ori_pos, use_nt_align_buffer); + new_col_ptr->deserializeAndInsertFromPos(ori_pos, use_nt_align_buffer); if (use_nt_align_buffer) new_col_ptr->flushNTAlignBuffer(); @@ -270,27 +247,18 @@ class TestColumnSerializeDeserialize : public ::testing::Test for (size_t i = 0; i < column_ptr->size(); ++i) result_col_ptr->insertFrom(*column_ptr, i); - if (collator != nullptr) - checkForColumnWithCollator(std::move(result_col_ptr), std::move(new_col_ptr), collator); - else - ASSERT_COLUMN_EQ(std::move(result_col_ptr), std::move(new_col_ptr)); + ASSERT_COLUMN_EQ(std::move(result_col_ptr), std::move(new_col_ptr)); } static void doTestSerializeAndDeserialize2( const ColumnPtr & column_ptr, - bool use_nt_align_buffer, - bool compare_semantics = false, - const TiDB::TiDBCollatorPtr & collator = nullptr, - String * sort_key_container = nullptr) + bool use_nt_align_buffer) { if (column_ptr->size() < 2) return; PaddedPODArray byte_size; byte_size.resize_fill_zero(column_ptr->size()); - if (!compare_semantics) - column_ptr->countSerializeByteSize(byte_size); - else - column_ptr->countSerializeByteSizeForCmp(byte_size, collator, nullptr); + column_ptr->countSerializeByteSize(byte_size); size_t total_size = 0; for (const auto size : byte_size) total_size += size; @@ -306,20 +274,14 @@ class TestColumnSerializeDeserialize : public ::testing::Test pos.push_back(nullptr); for (auto * ptr : pos) ori_pos.push_back(ptr); - if (!compare_semantics) - column_ptr->serializeToPos(pos, 0, byte_size.size() / 2, true); - else - column_ptr->serializeToPosForCmp(pos, 0, byte_size.size() / 2, nullptr, collator, sort_key_container); + column_ptr->serializeToPos(pos, 0, byte_size.size() / 2, true); pos.resize(pos.size() - 1); ori_pos.resize(ori_pos.size() - 1); auto new_col_ptr = column_ptr->cloneEmpty(); if (use_nt_align_buffer) new_col_ptr->reserveAlign(byte_size.size(), FULL_VECTOR_SIZE_AVX2); - if (!compare_semantics) - new_col_ptr->deserializeAndInsertFromPos(ori_pos, use_nt_align_buffer); - else - new_col_ptr->deserializeForCmpAndInsertFromPos(ori_pos, use_nt_align_buffer); + new_col_ptr->deserializeAndInsertFromPos(ori_pos, use_nt_align_buffer); current_size = 0; pos.clear(); @@ -331,21 +293,9 @@ class TestColumnSerializeDeserialize : public ::testing::Test } for (auto * ptr : pos) ori_pos.push_back(ptr); - if (!compare_semantics) - column_ptr - ->serializeToPos(pos, byte_size.size() / 2 - 1, byte_size.size() - byte_size.size() / 2 + 1, false); - else - column_ptr->serializeToPosForCmp( - pos, - byte_size.size() / 2 - 1, - byte_size.size() - byte_size.size() / 2 + 1, - nullptr, - collator, - sort_key_container); - if (!compare_semantics) - new_col_ptr->deserializeAndInsertFromPos(ori_pos, use_nt_align_buffer); - else - new_col_ptr->deserializeForCmpAndInsertFromPos(ori_pos, use_nt_align_buffer); + column_ptr + ->serializeToPos(pos, byte_size.size() / 2 - 1, byte_size.size() - byte_size.size() / 2 + 1, false); + new_col_ptr->deserializeAndInsertFromPos(ori_pos, use_nt_align_buffer); current_size = 0; pos.clear(); @@ -357,15 +307,83 @@ class TestColumnSerializeDeserialize : public ::testing::Test } for (auto * ptr : pos) ori_pos.push_back(ptr); - if (!compare_semantics) - column_ptr->serializeToPos(pos, 0, byte_size.size(), true); - else - column_ptr->serializeToPosForCmp(pos, 0, byte_size.size(), nullptr, collator, sort_key_container); + column_ptr->serializeToPos(pos, 0, byte_size.size(), true); - if (!compare_semantics) new_col_ptr->deserializeAndInsertFromPos(ori_pos, use_nt_align_buffer); - else - new_col_ptr->deserializeForCmpAndInsertFromPos(ori_pos, use_nt_align_buffer); + if (use_nt_align_buffer) + new_col_ptr->flushNTAlignBuffer(); + + auto result_col_ptr = column_ptr->cloneFullColumn(); + for (size_t i = 0; i < column_ptr->size(); ++i) + result_col_ptr->insertFrom(*column_ptr, i); + + ASSERT_COLUMN_EQ(std::move(result_col_ptr), std::move(new_col_ptr)); + } + + static void doTestSerializeAndDeserializeForCmp( + const ColumnPtr & column_ptr, + bool use_nt_align_buffer, + const TiDB::TiDBCollatorPtr & collator = nullptr, + String * sort_key_container = nullptr) + { + PaddedPODArray byte_size; + byte_size.resize_fill_zero(column_ptr->size()); + column_ptr->countSerializeByteSizeForCmp(byte_size, collator, nullptr); + size_t total_size = 0; + for (const auto size : byte_size) + total_size += size; + PaddedPODArray memory(total_size); + PaddedPODArray pos; + size_t current_size = 0; + for (size_t i = 0; i < byte_size.size() / 2; ++i) + { + pos.push_back(memory.data() + current_size); + current_size += byte_size[i]; + } + PaddedPODArray ori_pos; + for (auto * ptr : pos) + ori_pos.push_back(ptr); + column_ptr->serializeToPosForCmp(pos, 0, byte_size.size() / 2, nullptr, collator, sort_key_container); + + auto new_col_ptr = column_ptr->cloneEmpty(); + if (use_nt_align_buffer) + new_col_ptr->reserveAlign(byte_size.size(), FULL_VECTOR_SIZE_AVX2); + new_col_ptr->deserializeForCmpAndInsertFromPos(ori_pos, use_nt_align_buffer); + + current_size = 0; + pos.clear(); + ori_pos.clear(); + for (size_t i = byte_size.size() / 2; i < byte_size.size(); ++i) + { + pos.push_back(memory.data() + current_size); + current_size += byte_size[i]; + } + for (auto * ptr : pos) + ori_pos.push_back(ptr); + column_ptr->serializeToPosForCmp( + pos, + byte_size.size() / 2, + byte_size.size() - byte_size.size() / 2, + nullptr, + collator, + sort_key_container); + + new_col_ptr->deserializeForCmpAndInsertFromPos(ori_pos, use_nt_align_buffer); + + current_size = 0; + pos.clear(); + ori_pos.clear(); + for (const auto size : byte_size) + { + pos.push_back(memory.data() + current_size); + current_size += size; + } + for (auto * ptr : pos) + ori_pos.push_back(ptr); + + column_ptr->serializeToPosForCmp(pos, 0, byte_size.size(), nullptr, collator, sort_key_container); + new_col_ptr->deserializeForCmpAndInsertFromPos(ori_pos, use_nt_align_buffer); + if (use_nt_align_buffer) new_col_ptr->flushNTAlignBuffer(); From a13008f5065fbd93058270df8aff5016d87a34a8 Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Sat, 25 Jan 2025 22:00:43 +0800 Subject: [PATCH 05/12] remove changes in count Signed-off-by: guo-shaoge --- dbms/src/Columns/ColumnAggregateFunction.h | 6 +- dbms/src/Columns/ColumnArray.cpp | 16 +-- dbms/src/Columns/ColumnArray.h | 14 +-- dbms/src/Columns/ColumnConst.h | 6 +- dbms/src/Columns/ColumnDecimal.h | 8 +- dbms/src/Columns/ColumnFixedString.cpp | 31 +---- dbms/src/Columns/ColumnFixedString.h | 30 ++--- dbms/src/Columns/ColumnFunction.h | 6 +- dbms/src/Columns/ColumnNullable.cpp | 54 ++------- dbms/src/Columns/ColumnNullable.h | 9 +- dbms/src/Columns/ColumnString.cpp | 113 +++--------------- dbms/src/Columns/ColumnString.h | 27 ++--- dbms/src/Columns/ColumnTuple.h | 13 +- dbms/src/Columns/ColumnVector.h | 8 +- dbms/src/Columns/IColumn.h | 6 +- dbms/src/Columns/IColumnDummy.h | 6 +- .../gtest_column_serialize_deserialize.cpp | 13 +- 17 files changed, 83 insertions(+), 283 deletions(-) diff --git a/dbms/src/Columns/ColumnAggregateFunction.h b/dbms/src/Columns/ColumnAggregateFunction.h index 1c1cd619d67..61b2d8d515a 100644 --- a/dbms/src/Columns/ColumnAggregateFunction.h +++ b/dbms/src/Columns/ColumnAggregateFunction.h @@ -167,8 +167,7 @@ class ColumnAggregateFunction final : public COWPtrHelper & /* byte_size */, - const TiDB::TiDBCollatorPtr & /* collator */, - const NullMap * /* nullmap */) const override + const TiDB::TiDBCollatorPtr & /* collator */) const override { throw Exception( "Method countSerializeByteSizeForCmp is not supported for " + getName(), @@ -181,8 +180,7 @@ class ColumnAggregateFunction final : public COWPtrHelper & /* byte_size */, const IColumn::Offsets & /* offsets */, - const TiDB::TiDBCollatorPtr & /* collator */, - const NullMap * /* nullmap */) const override + const TiDB::TiDBCollatorPtr & /* collator */) const override { throw Exception( "Method countSerializeByteSizeForCmpColumnArray is not supported for " + getName(), diff --git a/dbms/src/Columns/ColumnArray.cpp b/dbms/src/Columns/ColumnArray.cpp index d6d2657c402..6fb2d039111 100644 --- a/dbms/src/Columns/ColumnArray.cpp +++ b/dbms/src/Columns/ColumnArray.cpp @@ -220,25 +220,21 @@ const char * ColumnArray::deserializeAndInsertFromArena(const char * pos, const void ColumnArray::countSerializeByteSizeForCmp( PaddedPODArray & byte_size, - const TiDB::TiDBCollatorPtr & collator, - const NullMap * nullmap) const + const TiDB::TiDBCollatorPtr & collator) const { - countSerializeByteSizeImpl(byte_size, collator, nullmap); + countSerializeByteSizeImpl(byte_size, collator); } void ColumnArray::countSerializeByteSize(PaddedPODArray & byte_size) const { - countSerializeByteSizeImpl(byte_size, nullptr, nullptr); + countSerializeByteSizeImpl(byte_size, nullptr); } template -void ColumnArray::countSerializeByteSizeImpl( - PaddedPODArray & byte_size, - const TiDB::TiDBCollatorPtr & collator, - const NullMap * nullmap) const +void ColumnArray::countSerializeByteSizeImpl(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator) + const { RUNTIME_CHECK_MSG(byte_size.size() == size(), "size of byte_size({}) != column size({})", byte_size.size(), size()); - assert(!nullmap || (nullmap->size() == size())); if unlikely (!getOffsets().empty() && getOffsets().back() > UINT32_MAX) { @@ -256,7 +252,7 @@ void ColumnArray::countSerializeByteSizeImpl( byte_size[i] += sizeof(UInt32); if constexpr (compare_semantics) - getData().countSerializeByteSizeForCmpColumnArray(byte_size, getOffsets(), collator, nullmap); + getData().countSerializeByteSizeForCmpColumnArray(byte_size, getOffsets(), collator); else getData().countSerializeByteSizeForColumnArray(byte_size, getOffsets()); } diff --git a/dbms/src/Columns/ColumnArray.h b/dbms/src/Columns/ColumnArray.h index 48c3c044ea6..9052890d1fe 100644 --- a/dbms/src/Columns/ColumnArray.h +++ b/dbms/src/Columns/ColumnArray.h @@ -45,10 +45,7 @@ class ColumnArray final : public COWPtrHelper ColumnArray(const ColumnArray &) = default; template - void countSerializeByteSizeImpl( - PaddedPODArray & byte_size, - const TiDB::TiDBCollatorPtr & collator, - const NullMap * nullmap) const; + void countSerializeByteSizeImpl(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator) const; template void serializeToPosImpl( @@ -100,17 +97,14 @@ class ColumnArray final : public COWPtrHelper String &) const override; const char * deserializeAndInsertFromArena(const char * pos, const TiDB::TiDBCollatorPtr &) override; - void countSerializeByteSizeForCmp( - PaddedPODArray & byte_size, - const TiDB::TiDBCollatorPtr & collator, - const NullMap * nullmap) const override; + void countSerializeByteSizeForCmp(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator) + const override; void countSerializeByteSize(PaddedPODArray & byte_size) const override; void countSerializeByteSizeForCmpColumnArray( PaddedPODArray & /* byte_size */, const IColumn::Offsets & /* array_offsets */, - const TiDB::TiDBCollatorPtr & /* collator */, - const NullMap * /* nullmap */) const override + const TiDB::TiDBCollatorPtr & /* collator */) const override { throw Exception( "Method countSerializeByteSizeForCmpColumnArray is not supported for " + getName(), diff --git a/dbms/src/Columns/ColumnConst.h b/dbms/src/Columns/ColumnConst.h index 6d506236116..a2a46c04754 100644 --- a/dbms/src/Columns/ColumnConst.h +++ b/dbms/src/Columns/ColumnConst.h @@ -114,8 +114,7 @@ class ColumnConst final : public COWPtrHelper void countSerializeByteSizeForCmp( PaddedPODArray & /* byte_size */, - const TiDB::TiDBCollatorPtr & /* collator */, - const NullMap * /* nullmap */) const override + const TiDB::TiDBCollatorPtr & /* collator */) const override { throw Exception( "Method countSerializeByteSizeForCmp is not supported for " + getName(), @@ -129,8 +128,7 @@ class ColumnConst final : public COWPtrHelper void countSerializeByteSizeForCmpColumnArray( PaddedPODArray & /* byte_size */, const IColumn::Offsets & /* array_offsets */, - const TiDB::TiDBCollatorPtr & /* collaotr */, - const NullMap * /* nullmap */) const override + const TiDB::TiDBCollatorPtr & /* collaotr */) const override { throw Exception( "Method countSerializeByteSizeForCmpColumnArray is not supported for " + getName(), diff --git a/dbms/src/Columns/ColumnDecimal.h b/dbms/src/Columns/ColumnDecimal.h index 91ea2ede0db..ddfacb98bae 100644 --- a/dbms/src/Columns/ColumnDecimal.h +++ b/dbms/src/Columns/ColumnDecimal.h @@ -174,10 +174,7 @@ class ColumnDecimal final : public COWPtrHelper & byte_size, - const TiDB::TiDBCollatorPtr &, - const NullMap *) const override + void countSerializeByteSizeForCmp(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr &) const override { countSerializeByteSizeImpl(byte_size, nullptr); } @@ -189,8 +186,7 @@ class ColumnDecimal final : public COWPtrHelper & byte_size, const IColumn::Offsets & array_offsets, - const TiDB::TiDBCollatorPtr &, - const NullMap *) const override + const TiDB::TiDBCollatorPtr &) const override { countSerializeByteSizeForColumnArrayImpl(byte_size, array_offsets); } diff --git a/dbms/src/Columns/ColumnFixedString.cpp b/dbms/src/Columns/ColumnFixedString.cpp index 615d5b68b55..8c808726b68 100644 --- a/dbms/src/Columns/ColumnFixedString.cpp +++ b/dbms/src/Columns/ColumnFixedString.cpp @@ -134,33 +134,18 @@ const char * ColumnFixedString::deserializeAndInsertFromArena(const char * pos, return pos + n; } -template -void ColumnFixedString::countSerializeByteSizeImpl(PaddedPODArray & byte_size, const NullMap * nullmap) const +void ColumnFixedString::countSerializeByteSizeImpl(PaddedPODArray & byte_size) const { RUNTIME_CHECK_MSG(byte_size.size() == size(), "size of byte_size({}) != column size({})", byte_size.size(), size()); - assert(!nullmap || nullmap->size() == size()); - size_t size = byte_size.size(); for (size_t i = 0; i < size; ++i) - { - if constexpr (has_nullmap) - { - if (DB::isNullAt(*nullmap, i)) - { - byte_size[i] += 1; - continue; - } - } byte_size[i] += n; - } } -template void ColumnFixedString::countSerializeByteSizeForColumnArrayImpl( PaddedPODArray & byte_size, - const IColumn::Offsets & array_offsets, - const NullMap * nullmap) const + const IColumn::Offsets & array_offsets) const { RUNTIME_CHECK_MSG( byte_size.size() == array_offsets.size(), @@ -168,21 +153,9 @@ void ColumnFixedString::countSerializeByteSizeForColumnArrayImpl( byte_size.size(), array_offsets.size()); - assert(!nullmap || nullmap->size() == array_offsets.size()); - size_t size = array_offsets.size(); for (size_t i = 0; i < size; ++i) - { - if constexpr (has_nullmap) - { - if (DB::isNullAt(*nullmap, i)) - { - byte_size[i] += array_offsets[i] - array_offsets[i - 1]; - continue; - } - } byte_size[i] += n * (array_offsets[i] - array_offsets[i - 1]); - } } void ColumnFixedString::serializeToPos(PaddedPODArray & pos, size_t start, size_t length, bool has_null) const diff --git a/dbms/src/Columns/ColumnFixedString.h b/dbms/src/Columns/ColumnFixedString.h index 1cd2713542b..b909cf37927 100644 --- a/dbms/src/Columns/ColumnFixedString.h +++ b/dbms/src/Columns/ColumnFixedString.h @@ -54,14 +54,11 @@ class ColumnFixedString final : public COWPtrHelper , chars(src.chars.begin(), src.chars.end()) , n(src.n){}; - template - void countSerializeByteSizeImpl(PaddedPODArray & byte_size, const NullMap * nullmap) const; + void countSerializeByteSizeImpl(PaddedPODArray & byte_size) const; - template void countSerializeByteSizeForColumnArrayImpl( PaddedPODArray & byte_size, - const IColumn::Offsets & array_offsets, - const NullMap * nullmap) const; + const IColumn::Offsets & array_offsets) const; template void serializeToPosImpl(PaddedPODArray & pos, size_t start, size_t length, const NullMap * nullmap) const; @@ -124,46 +121,37 @@ class ColumnFixedString final : public COWPtrHelper const char * deserializeAndInsertFromArena(const char * pos, const TiDB::TiDBCollatorPtr &) override; - void countSerializeByteSizeForCmp( - PaddedPODArray & byte_size, - const TiDB::TiDBCollatorPtr & collator, - const NullMap * nullmap) const override + void countSerializeByteSizeForCmp(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator) + const override { // collator->sortKey() will change the string length, which may exceeds n. RUNTIME_CHECK_MSG( !collator, "{} doesn't support countSerializeByteSizeForCmp when collator is not null", getName()); - if (nullmap != nullptr) - countSerializeByteSizeImpl(byte_size, nullmap); - else - countSerializeByteSizeImpl(byte_size, nullptr); + countSerializeByteSizeImpl(byte_size); } void countSerializeByteSize(PaddedPODArray & byte_size) const override { - countSerializeByteSizeImpl(byte_size, nullptr); + countSerializeByteSizeImpl(byte_size); } void countSerializeByteSizeForCmpColumnArray( PaddedPODArray & byte_size, const IColumn::Offsets & array_offsets, - const TiDB::TiDBCollatorPtr & collator, - const NullMap * nullmap) const override + const TiDB::TiDBCollatorPtr & collator) const override { RUNTIME_CHECK_MSG( !collator, "{} doesn't support countSerializeByteSizeForCmpColumnArray when collator is not null", getName()); - if (nullmap != nullptr) - countSerializeByteSizeForColumnArrayImpl(byte_size, array_offsets, nullmap); - else - countSerializeByteSizeForColumnArrayImpl(byte_size, array_offsets, nullptr); + countSerializeByteSizeForColumnArrayImpl(byte_size, array_offsets); } void countSerializeByteSizeForColumnArray( PaddedPODArray & byte_size, const IColumn::Offsets & array_offsets) const override { - countSerializeByteSizeForColumnArrayImpl(byte_size, array_offsets, nullptr); + countSerializeByteSizeForColumnArrayImpl(byte_size, array_offsets); } void serializeToPosForCmp( diff --git a/dbms/src/Columns/ColumnFunction.h b/dbms/src/Columns/ColumnFunction.h index 24fdcb3f1e7..8343ea4d6c5 100644 --- a/dbms/src/Columns/ColumnFunction.h +++ b/dbms/src/Columns/ColumnFunction.h @@ -122,8 +122,7 @@ class ColumnFunction final : public COWPtrHelper void countSerializeByteSizeForCmp( PaddedPODArray & /* byte_size */, - const TiDB::TiDBCollatorPtr & /* collator */, - const NullMap * /* nullmap */) const override + const TiDB::TiDBCollatorPtr & /* collator */) const override { throw Exception( "Method countSerializeByteSizeForCmp is not supported for " + getName(), @@ -137,8 +136,7 @@ class ColumnFunction final : public COWPtrHelper void countSerializeByteSizeForCmpColumnArray( PaddedPODArray & /* byte_size */, const IColumn::Offsets & /* offsets */, - const TiDB::TiDBCollatorPtr & /* collator */, - const NullMap * /* nullmap */) const override + const TiDB::TiDBCollatorPtr & /* collator */) const override { throw Exception( "Method countSerializeByteSizeForCmpColumnArray is not supported for " + getName(), diff --git a/dbms/src/Columns/ColumnNullable.cpp b/dbms/src/Columns/ColumnNullable.cpp index b5e098cd38e..7ae5e4c6ba4 100644 --- a/dbms/src/Columns/ColumnNullable.cpp +++ b/dbms/src/Columns/ColumnNullable.cpp @@ -284,21 +284,10 @@ const char * ColumnNullable::deserializeAndInsertFromArena(const char * pos, con void ColumnNullable::countSerializeByteSizeForCmp( PaddedPODArray & byte_size, - const TiDB::TiDBCollatorPtr & collator, - const NullMap * nullmap) const + const TiDB::TiDBCollatorPtr & collator) const { - if unlikely (nullmap != nullptr) - { - auto new_nullmap_col = ColumnUInt8::create(); - DB::mergeNullMap(*nullmap, getNullMapData(), new_nullmap_col->getData()); - new_nullmap_col->countSerializeByteSizeForCmp(byte_size, collator, nullptr); - getNestedColumn().countSerializeByteSizeForCmp(byte_size, collator, &(new_nullmap_col->getData())); - } - else - { - getNullMapColumn().countSerializeByteSizeForCmp(byte_size, collator, nullptr); - getNestedColumn().countSerializeByteSizeForCmp(byte_size, collator, &getNullMapData()); - } + getNullMapColumn().countSerializeByteSizeForCmp(byte_size, collator); + getNestedColumn().countSerializeByteSizeForCmp(byte_size, collator); } void ColumnNullable::countSerializeByteSize(PaddedPODArray & byte_size) const { @@ -309,35 +298,10 @@ void ColumnNullable::countSerializeByteSize(PaddedPODArray & byte_size) void ColumnNullable::countSerializeByteSizeForCmpColumnArray( PaddedPODArray & byte_size, const IColumn::Offsets & array_offsets, - const TiDB::TiDBCollatorPtr & collator, - const NullMap * nullmap) const + const TiDB::TiDBCollatorPtr & collator) const { - const auto & nested_nullmap = getNullMapData(); - assert(nested_nullmap.size() == array_offsets.back()); - if unlikely (nullmap != nullptr) - { - assert(nullmap->size() == array_offsets.size()); - auto new_nullmap_col = ColumnUInt8::create(); - auto & new_nullmap_data = new_nullmap_col->getData(); - new_nullmap_data.assign(nested_nullmap); - for (size_t i = 0; i < array_offsets.size(); ++i) - { - if (DB::isNullAt(*nullmap, i)) - { - const auto row_size = array_offsets[i] - array_offsets[i - 1]; - const auto row_offset = array_offsets[i - 1]; - for (size_t j = row_offset; j < row_offset + row_size; ++j) - setNullAt(new_nullmap_data, j); - } - } - new_nullmap_col->countSerializeByteSizeForCmpColumnArray(byte_size, array_offsets, collator, nullptr); - getNestedColumn().countSerializeByteSizeForCmpColumnArray(byte_size, array_offsets, collator, &new_nullmap_data); - } - else - { - getNullMapColumn().countSerializeByteSizeForCmpColumnArray(byte_size, array_offsets, collator, nullptr); - getNestedColumn().countSerializeByteSizeForCmpColumnArray(byte_size, array_offsets, collator, &nested_nullmap); - } + getNullMapColumn().countSerializeByteSizeForCmpColumnArray(byte_size, array_offsets, collator); + getNestedColumn().countSerializeByteSizeForCmpColumnArray(byte_size, array_offsets, collator); } void ColumnNullable::countSerializeByteSizeForColumnArray( PaddedPODArray & byte_size, @@ -360,7 +324,8 @@ void ColumnNullable::serializeToPosForCmp( auto new_nullmap_col = ColumnUInt8::create(); DB::mergeNullMap(*nullmap, getNullMapData(), new_nullmap_col->getData()); getNullMapColumn().serializeToPosForCmp(pos, start, length, nullptr, collator, sort_key_container); - getNestedColumn().serializeToPosForCmp(pos, start, length, &(new_nullmap_col->getData()), collator, sort_key_container); + getNestedColumn() + .serializeToPosForCmp(pos, start, length, &(new_nullmap_col->getData()), collator, sort_key_container); } else { @@ -402,7 +367,8 @@ void ColumnNullable::serializeToPosForCmpColumnArray( setNullAt(new_nullmap_data, j); } } - new_nullmap_col->serializeToPosForCmpColumnArray(pos, start, length, nullptr, array_offsets, collator, sort_key_container); + new_nullmap_col + ->serializeToPosForCmpColumnArray(pos, start, length, nullptr, array_offsets, collator, sort_key_container); getNestedColumn().serializeToPosForCmpColumnArray( pos, start, diff --git a/dbms/src/Columns/ColumnNullable.h b/dbms/src/Columns/ColumnNullable.h index ba747fe8a8e..ad4abce3916 100644 --- a/dbms/src/Columns/ColumnNullable.h +++ b/dbms/src/Columns/ColumnNullable.h @@ -77,17 +77,14 @@ class ColumnNullable final : public COWPtrHelper String &) const override; const char * deserializeAndInsertFromArena(const char * pos, const TiDB::TiDBCollatorPtr &) override; - void countSerializeByteSizeForCmp( - PaddedPODArray & byte_size, - const TiDB::TiDBCollatorPtr & collator, - const NullMap * nullmap) const override; + void countSerializeByteSizeForCmp(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator) + const override; void countSerializeByteSize(PaddedPODArray & byte_size) const override; void countSerializeByteSizeForCmpColumnArray( PaddedPODArray & byte_size, const IColumn::Offsets & array_offsets, - const TiDB::TiDBCollatorPtr & collator, - const NullMap * nullmap) const override; + const TiDB::TiDBCollatorPtr & collator) const override; void countSerializeByteSizeForColumnArray( PaddedPODArray & byte_size, const IColumn::Offsets & array_offsets) const override; diff --git a/dbms/src/Columns/ColumnString.cpp b/dbms/src/Columns/ColumnString.cpp index 8d54b16ae65..b2ef0e5187e 100644 --- a/dbms/src/Columns/ColumnString.cpp +++ b/dbms/src/Columns/ColumnString.cpp @@ -484,62 +484,31 @@ void ColumnString::getPermutationWithCollationImpl( void ColumnString::countSerializeByteSizeForCmp( PaddedPODArray & byte_size, - const TiDB::TiDBCollatorPtr & collator, - const NullMap * nullmap) const + const TiDB::TiDBCollatorPtr & collator) const { if likely (collator != nullptr) { if (collator->maxBytesForOneChar() > 1) - countSerializeByteSizeNullMap( - byte_size, - collator, - nullmap); + countSerializeByteSizeImpl(byte_size, collator); else - countSerializeByteSizeNullMap( - byte_size, - collator, - nullmap); + countSerializeByteSizeImpl(byte_size, collator); } else { - countSerializeByteSizeNullMap( - byte_size, - nullptr, - nullmap); + countSerializeByteSizeImpl(byte_size, nullptr); } } -void ColumnString::countSerializeByteSize(PaddedPODArray & byte_size) const -{ - countSerializeByteSizeNullMap( - byte_size, - nullptr, - nullptr); -} -template -ALWAYS_INLINE inline void ColumnString::countSerializeByteSizeNullMap( - PaddedPODArray & byte_size, - const TiDB::TiDBCollatorPtr & collator, - const NullMap * nullmap) const +void ColumnString::countSerializeByteSize(PaddedPODArray & byte_size) const { - if (nullmap != nullptr) - countSerializeByteSizeImpl( - byte_size, - collator, - nullmap); - else - countSerializeByteSizeImpl( - byte_size, - collator, - nullptr); + countSerializeByteSizeImpl(byte_size, nullptr); } -template +template void ColumnString::countSerializeByteSizeImpl( PaddedPODArray & byte_size, - const TiDB::TiDBCollatorPtr & collator, - const NullMap * nullmap) const + const TiDB::TiDBCollatorPtr & collator) const { RUNTIME_CHECK_MSG(byte_size.size() == size(), "size of byte_size({}) != column size({})", byte_size.size(), size()); @@ -554,24 +523,15 @@ void ColumnString::countSerializeByteSizeImpl( sizeAt(i)); } - if constexpr (compare_semantics) + if constexpr (has_collator) { RUNTIME_CHECK(collator); - assert(!has_nullmap || (nullmap && nullmap->size() == size())); const size_t size = byte_size.size(); const size_t max_bytes_one_char = collator->maxBytesForOneChar(); for (size_t i = 0; i < size; ++i) { assert(sizeAt(i) > 0); - if constexpr (has_nullmap) - { - if (DB::isNullAt(*nullmap, i)) - { - byte_size[i] += sizeof(UInt32) + 1; - continue; - } - } if constexpr (count_code_points) { @@ -587,7 +547,6 @@ void ColumnString::countSerializeByteSizeImpl( } else { - assert(!has_nullmap); size_t size = byte_size.size(); for (size_t i = 0; i < size; ++i) byte_size[i] += sizeof(UInt32) + sizeAt(i); @@ -597,30 +556,26 @@ void ColumnString::countSerializeByteSizeImpl( void ColumnString::countSerializeByteSizeForCmpColumnArray( PaddedPODArray & byte_size, const IColumn::Offsets & array_offsets, - const TiDB::TiDBCollatorPtr & collator, - const NullMap * nullmap) const + const TiDB::TiDBCollatorPtr & collator) const { if likely (collator != nullptr) { if (collator->maxBytesForOneChar() > 1) - countSerializeByteSizeForColumnArrayNullMap( + countSerializeByteSizeForColumnArrayImpl( byte_size, array_offsets, - collator, - nullmap); + collator); else - countSerializeByteSizeForColumnArrayNullMap( + countSerializeByteSizeForColumnArrayImpl( byte_size, array_offsets, - collator, - nullmap); + collator); } else { - countSerializeByteSizeForColumnArrayNullMap( + countSerializeByteSizeForColumnArrayImpl( byte_size, array_offsets, - nullptr, nullptr); } } @@ -629,40 +584,17 @@ void ColumnString::countSerializeByteSizeForColumnArray( PaddedPODArray & byte_size, const IColumn::Offsets & array_offsets) const { - countSerializeByteSizeForColumnArrayNullMap( + countSerializeByteSizeForColumnArrayImpl( byte_size, array_offsets, - nullptr, nullptr); } -template -void ColumnString::countSerializeByteSizeForColumnArrayNullMap( - PaddedPODArray & byte_size, - const IColumn::Offsets & array_offsets, - const TiDB::TiDBCollatorPtr & collator, - const NullMap * nullmap) const -{ - if (nullmap != nullptr) - countSerializeByteSizeForColumnArrayImpl( - byte_size, - array_offsets, - collator, - nullmap); - else - countSerializeByteSizeForColumnArrayImpl( - byte_size, - array_offsets, - collator, - nullptr); -} - -template +template void ColumnString::countSerializeByteSizeForColumnArrayImpl( PaddedPODArray & byte_size, const IColumn::Offsets & array_offsets, - const TiDB::TiDBCollatorPtr & collator, - const NullMap * nullmap) const + const TiDB::TiDBCollatorPtr & collator) const { RUNTIME_CHECK_MSG( byte_size.size() == array_offsets.size(), @@ -686,10 +618,9 @@ void ColumnString::countSerializeByteSizeForColumnArrayImpl( sizeAt(i)); } - if constexpr (compare_semantics) + if constexpr (has_collator) { RUNTIME_CHECK(collator); - assert(!has_nullmap || (nullmap && nullmap->size() == array_offsets.size())); size_t size = array_offsets.size(); const auto max_bytes_one_char = collator->maxBytesForOneChar(); @@ -697,11 +628,6 @@ void ColumnString::countSerializeByteSizeForColumnArrayImpl( { const size_t ele_count = array_offsets[i] - array_offsets[i - 1]; assert(offsetAt(array_offsets[i]) - offsetAt(array_offsets[i - 1]) >= ele_count); - if constexpr (has_nullmap) - { - if (DB::isNullAt(*nullmap, i)) - continue; - } if constexpr (count_code_points) { @@ -725,7 +651,6 @@ void ColumnString::countSerializeByteSizeForColumnArrayImpl( } else { - assert(!has_nullmap); size_t size = array_offsets.size(); for (size_t i = 0; i < size; ++i) byte_size[i] += sizeof(UInt32) * (array_offsets[i] - array_offsets[i - 1]) + offsetAt(array_offsets[i]) diff --git a/dbms/src/Columns/ColumnString.h b/dbms/src/Columns/ColumnString.h index 8d9ee722c14..97de1b9cf73 100644 --- a/dbms/src/Columns/ColumnString.h +++ b/dbms/src/Columns/ColumnString.h @@ -112,24 +112,14 @@ class ColumnString final : public COWPtrHelper PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator, const NullMap * nullmap) const; - template - void countSerializeByteSizeImpl( - PaddedPODArray & byte_size, - const TiDB::TiDBCollatorPtr & collator, - const NullMap * nullmap) const; - template - void countSerializeByteSizeForColumnArrayNullMap( - PaddedPODArray & byte_size, - const IColumn::Offsets & array_offsets, - const TiDB::TiDBCollatorPtr & collator, - const NullMap * nullmap) const; - template + void countSerializeByteSizeImpl(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator) const; + + template void countSerializeByteSizeForColumnArrayImpl( PaddedPODArray & byte_size, const IColumn::Offsets & array_offsets, - const TiDB::TiDBCollatorPtr & collator, - const NullMap * nullmap) const; + const TiDB::TiDBCollatorPtr & collator) const; template void serializeToPosImplType( @@ -317,17 +307,14 @@ class ColumnString final : public COWPtrHelper return pos + string_size; } - void countSerializeByteSizeForCmp( - PaddedPODArray & byte_size, - const TiDB::TiDBCollatorPtr & collator, - const NullMap * nullmap) const override; + void countSerializeByteSizeForCmp(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator) + const override; void countSerializeByteSize(PaddedPODArray & byte_size) const override; void countSerializeByteSizeForCmpColumnArray( PaddedPODArray & byte_size, const IColumn::Offsets & array_offsets, - const TiDB::TiDBCollatorPtr & collator, - const NullMap * nullmap) const override; + const TiDB::TiDBCollatorPtr & collator) const override; void countSerializeByteSizeForColumnArray( PaddedPODArray & byte_size, const IColumn::Offsets & array_offsets) const override; diff --git a/dbms/src/Columns/ColumnTuple.h b/dbms/src/Columns/ColumnTuple.h index 651a7bfca12..d049fc74ebf 100644 --- a/dbms/src/Columns/ColumnTuple.h +++ b/dbms/src/Columns/ColumnTuple.h @@ -95,13 +95,11 @@ class ColumnTuple final : public COWPtrHelper String &) const override; const char * deserializeAndInsertFromArena(const char * pos, const TiDB::TiDBCollatorPtr &) override; - void countSerializeByteSizeForCmp( - PaddedPODArray & byte_size, - const TiDB::TiDBCollatorPtr & collator, - const NullMap * nullmap) const override + void countSerializeByteSizeForCmp(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator) + const override { for (const auto & column : columns) - column->countSerializeByteSizeForCmp(byte_size, collator, nullmap); + column->countSerializeByteSizeForCmp(byte_size, collator); } void countSerializeByteSize(PaddedPODArray & byte_size) const override { @@ -112,11 +110,10 @@ class ColumnTuple final : public COWPtrHelper void countSerializeByteSizeForCmpColumnArray( PaddedPODArray & byte_size, const IColumn::Offsets & array_offsets, - const TiDB::TiDBCollatorPtr & collator, - const NullMap * nullmap) const override + const TiDB::TiDBCollatorPtr & collator) const override { for (const auto & column : columns) - column->countSerializeByteSizeForCmpColumnArray(byte_size, array_offsets, collator, nullmap); + column->countSerializeByteSizeForCmpColumnArray(byte_size, array_offsets, collator); } void countSerializeByteSizeForColumnArray( PaddedPODArray & byte_size, diff --git a/dbms/src/Columns/ColumnVector.h b/dbms/src/Columns/ColumnVector.h index 344a3d10fbe..307eba1c40c 100644 --- a/dbms/src/Columns/ColumnVector.h +++ b/dbms/src/Columns/ColumnVector.h @@ -328,10 +328,7 @@ class ColumnVector final : public COWPtrHelper & byte_size, - const TiDB::TiDBCollatorPtr &, - const NullMap *) const override + void countSerializeByteSizeForCmp(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr &) const override { countSerializeByteSize(byte_size); } @@ -340,8 +337,7 @@ class ColumnVector final : public COWPtrHelper & byte_size, const IColumn::Offsets & array_offsets, - const TiDB::TiDBCollatorPtr &, - const NullMap *) const override + const TiDB::TiDBCollatorPtr &) const override { countSerializeByteSizeForColumnArray(byte_size, array_offsets); } diff --git a/dbms/src/Columns/IColumn.h b/dbms/src/Columns/IColumn.h index e668841edf2..5286e581808 100644 --- a/dbms/src/Columns/IColumn.h +++ b/dbms/src/Columns/IColumn.h @@ -259,8 +259,7 @@ class IColumn : public COWPtr /// The byte_size.size() must be equal to the column size. virtual void countSerializeByteSizeForCmp( PaddedPODArray & /* byte_size */, - const TiDB::TiDBCollatorPtr & /* collator */, - const NullMap * /* nullmap */) const + const TiDB::TiDBCollatorPtr & /* collator */) const = 0; virtual void countSerializeByteSize(PaddedPODArray & /* byte_size */) const = 0; @@ -270,8 +269,7 @@ class IColumn : public COWPtr virtual void countSerializeByteSizeForCmpColumnArray( PaddedPODArray & /* byte_size */, const Offsets & /* array_offsets */, - const TiDB::TiDBCollatorPtr & /* collator */, - const NullMap * /* nullmap */) const + const TiDB::TiDBCollatorPtr & /* collator */) const = 0; virtual void countSerializeByteSizeForColumnArray( PaddedPODArray & /* byte_size */, diff --git a/dbms/src/Columns/IColumnDummy.h b/dbms/src/Columns/IColumnDummy.h index 3b0550a9651..250f4bedc14 100644 --- a/dbms/src/Columns/IColumnDummy.h +++ b/dbms/src/Columns/IColumnDummy.h @@ -90,8 +90,7 @@ class IColumnDummy : public IColumn void countSerializeByteSizeForCmp( PaddedPODArray & /* byte_size */, - const TiDB::TiDBCollatorPtr & /* collator */, - const NullMap * /* nullmap */) const override + const TiDB::TiDBCollatorPtr & /* collator */) const override { throw Exception( "Method countSerializeByteSizeForCmp is not supported for " + getName(), @@ -105,8 +104,7 @@ class IColumnDummy : public IColumn void countSerializeByteSizeForCmpColumnArray( PaddedPODArray & /* byte_size */, const IColumn::Offsets & /* array_offsets */, - const TiDB::TiDBCollatorPtr & /* collator */, - const NullMap * /* nullmap */) const override + const TiDB::TiDBCollatorPtr & /* collator */) const override { throw Exception( "Method countSerializeByteSizeForCmpColumnArray is not supported for " + getName(), diff --git a/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp b/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp index d2b0e629aaa..de6f0c5e49d 100644 --- a/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp +++ b/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp @@ -181,9 +181,7 @@ class TestColumnSerializeDeserialize : public ::testing::Test } } - static void doTestSerializeAndDeserialize( - const ColumnPtr & column_ptr, - bool use_nt_align_buffer) + static void doTestSerializeAndDeserialize(const ColumnPtr & column_ptr, bool use_nt_align_buffer) { PaddedPODArray byte_size; byte_size.resize_fill_zero(column_ptr->size()); @@ -250,9 +248,7 @@ class TestColumnSerializeDeserialize : public ::testing::Test ASSERT_COLUMN_EQ(std::move(result_col_ptr), std::move(new_col_ptr)); } - static void doTestSerializeAndDeserialize2( - const ColumnPtr & column_ptr, - bool use_nt_align_buffer) + static void doTestSerializeAndDeserialize2(const ColumnPtr & column_ptr, bool use_nt_align_buffer) { if (column_ptr->size() < 2) return; @@ -293,8 +289,7 @@ class TestColumnSerializeDeserialize : public ::testing::Test } for (auto * ptr : pos) ori_pos.push_back(ptr); - column_ptr - ->serializeToPos(pos, byte_size.size() / 2 - 1, byte_size.size() - byte_size.size() / 2 + 1, false); + column_ptr->serializeToPos(pos, byte_size.size() / 2 - 1, byte_size.size() - byte_size.size() / 2 + 1, false); new_col_ptr->deserializeAndInsertFromPos(ori_pos, use_nt_align_buffer); current_size = 0; @@ -309,7 +304,7 @@ class TestColumnSerializeDeserialize : public ::testing::Test ori_pos.push_back(ptr); column_ptr->serializeToPos(pos, 0, byte_size.size(), true); - new_col_ptr->deserializeAndInsertFromPos(ori_pos, use_nt_align_buffer); + new_col_ptr->deserializeAndInsertFromPos(ori_pos, use_nt_align_buffer); if (use_nt_align_buffer) new_col_ptr->flushNTAlignBuffer(); From 817854b7a9418c3b3be90ab9529fc505ca4b51ab Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Sun, 26 Jan 2025 11:45:04 +0800 Subject: [PATCH 06/12] refine Signed-off-by: guo-shaoge --- dbms/src/Columns/ColumnNullable.cpp | 14 +++++++++----- dbms/src/Columns/ColumnString.h | 8 +------- dbms/src/Columns/IColumn.h | 5 +++-- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/dbms/src/Columns/ColumnNullable.cpp b/dbms/src/Columns/ColumnNullable.cpp index 7ae5e4c6ba4..f2de4b51bb8 100644 --- a/dbms/src/Columns/ColumnNullable.cpp +++ b/dbms/src/Columns/ColumnNullable.cpp @@ -321,9 +321,12 @@ void ColumnNullable::serializeToPosForCmp( { if unlikely (nullmap != nullptr) { + // This code path is not efficient, because of the temporary `new_nullmap_col`. + // But only got this code path when the column is like ColumnNullable(ColumnTuple(ColumnNullable)), + // which is rare for TiFlash, because ColumnTuple is not used for now. auto new_nullmap_col = ColumnUInt8::create(); - DB::mergeNullMap(*nullmap, getNullMapData(), new_nullmap_col->getData()); - getNullMapColumn().serializeToPosForCmp(pos, start, length, nullptr, collator, sort_key_container); + DB::mergeNullMap(start, length, *nullmap, getNullMapData(), new_nullmap_col->getData()); + new_nullmap_col->serializeToPosForCmp(pos, start, length, nullptr, collator, sort_key_container); getNestedColumn() .serializeToPosForCmp(pos, start, length, &(new_nullmap_col->getData()), collator, sort_key_container); } @@ -350,10 +353,11 @@ void ColumnNullable::serializeToPosForCmpColumnArray( String * sort_key_container) const { const auto & nested_nullmap = getNullMapData(); - assert(nested_nullmap.size() == array_offsets.back()); - if unlikely (nullmap != nullptr) + RUNTIME_CHECK(nested_nullmap.size() == array_offsets.back()); + if (nullmap != nullptr) { - assert(nullmap->size() == array_offsets.size()); + // Got this code path when the column is like ColumnNullable(ColumnArray(ColumnNullable)), + RUNTIME_CHECK(nullmap->size() == array_offsets.size()); auto new_nullmap_col = ColumnUInt8::create(); auto & new_nullmap_data = new_nullmap_col->getData(); new_nullmap_data.assign(nested_nullmap); diff --git a/dbms/src/Columns/ColumnString.h b/dbms/src/Columns/ColumnString.h index 97de1b9cf73..dd4f3c1f70f 100644 --- a/dbms/src/Columns/ColumnString.h +++ b/dbms/src/Columns/ColumnString.h @@ -107,14 +107,8 @@ class ColumnString final : public COWPtrHelper } } - template - ALWAYS_INLINE inline void countSerializeByteSizeNullMap( - PaddedPODArray & byte_size, - const TiDB::TiDBCollatorPtr & collator, - const NullMap * nullmap) const; - template + template void countSerializeByteSizeImpl(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr & collator) const; - template void countSerializeByteSizeForColumnArrayImpl( PaddedPODArray & byte_size, diff --git a/dbms/src/Columns/IColumn.h b/dbms/src/Columns/IColumn.h index 5286e581808..be2171a0f4b 100644 --- a/dbms/src/Columns/IColumn.h +++ b/dbms/src/Columns/IColumn.h @@ -45,11 +45,12 @@ inline bool isNullAt(const NullMap & nullmap, size_t n) { return nullmap[n] != 0; } -inline void mergeNullMap(const NullMap & m1, const NullMap & m2, NullMap & m3) +inline void mergeNullMap(size_t start, size_t length, const NullMap & m1, const NullMap & m2, NullMap & m3) { RUNTIME_CHECK(m1.size() == m2.size()); + RUNTIME_CHECK(start + length < m1.size()); m3.resize_fill_zero(m1.size()); - for (size_t i = 0; i < m1.size(); ++i) + for (size_t i = start; i < start + length; ++i) m3[i] = (DB::isNullAt(m1, i) || DB::isNullAt(m2, i)); } inline void setNullAt(NullMap & nullmap, size_t n) From d9aeae7e502010f45dddb63b38e10454b1983dd9 Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Sun, 26 Jan 2025 13:08:22 +0800 Subject: [PATCH 07/12] refine Signed-off-by: guo-shaoge --- dbms/src/Columns/ColumnArray.cpp | 10 ++-- dbms/src/Columns/ColumnDecimal.cpp | 46 ++++++------------- dbms/src/Columns/ColumnFixedString.cpp | 7 ++- dbms/src/Columns/ColumnNullable.cpp | 41 +++++++++++++---- dbms/src/Columns/ColumnString.cpp | 8 +--- dbms/src/Columns/ColumnVector.cpp | 41 +++++++++++++++-- dbms/src/Columns/ColumnVector.h | 16 +------ dbms/src/Columns/IColumn.h | 29 ++++++------ .../gtest_column_serialize_deserialize.cpp | 39 ++++++++++++++-- 9 files changed, 144 insertions(+), 93 deletions(-) diff --git a/dbms/src/Columns/ColumnArray.cpp b/dbms/src/Columns/ColumnArray.cpp index 6fb2d039111..6f5f68b9a7b 100644 --- a/dbms/src/Columns/ColumnArray.cpp +++ b/dbms/src/Columns/ColumnArray.cpp @@ -266,17 +266,17 @@ void ColumnArray::serializeToPosForCmp( String * sort_key_container) const { if (nullmap != nullptr) - serializeToPosImpl(pos, start, length, collator, sort_key_container, nullmap); + serializeToPosImpl(pos, start, length, collator, sort_key_container, nullmap); else - serializeToPosImpl(pos, start, length, collator, sort_key_container, nullmap); + serializeToPosImpl(pos, start, length, collator, sort_key_container, nullptr); } void ColumnArray::serializeToPos(PaddedPODArray & pos, size_t start, size_t length, bool has_null) const { if (has_null) - serializeToPosImpl(pos, start, length, nullptr, nullptr, nullptr); + serializeToPosImpl(pos, start, length, nullptr, nullptr, nullptr); else - serializeToPosImpl(pos, start, length, nullptr, nullptr, nullptr); + serializeToPosImpl(pos, start, length, nullptr, nullptr, nullptr); } template @@ -306,7 +306,7 @@ void ColumnArray::serializeToPosImpl( UInt32 len = sizeAt(start + i); if constexpr (has_nullmap) { - if (DB::isNullAt(*nullmap, i)) + if (DB::isNullAt(*nullmap, start + i)) len = 0; } tiflash_compiler_builtin_memcpy(pos[i], &len, sizeof(UInt32)); diff --git a/dbms/src/Columns/ColumnDecimal.cpp b/dbms/src/Columns/ColumnDecimal.cpp index 867b86e1b31..a15e4734396 100644 --- a/dbms/src/Columns/ColumnDecimal.cpp +++ b/dbms/src/Columns/ColumnDecimal.cpp @@ -214,30 +214,19 @@ void ColumnDecimal::serializeToPosImpl( if (pos[i] == nullptr) continue; } + if constexpr (has_nullmap) + { + if (DB::isNullAt(*nullmap, start + i)) + pos[i] = serializeDecimal256Helper(pos[i], def_val); + continue; + } if constexpr (compare_semantics && is_Decimal256) { - if constexpr (has_nullmap) - { - if (DB::isNullAt(*nullmap, i)) - { - pos[i] = serializeDecimal256Helper(pos[i], def_val); - continue; - } - } pos[i] = serializeDecimal256Helper(pos[i], data[start + i]); } else { - if constexpr (has_nullmap) - { - if (DB::isNullAt(*nullmap, i)) - { - tiflash_compiler_builtin_memcpy(pos[i], &def_val, sizeof(T)); - pos[i] += sizeof(T); - continue; - } - } tiflash_compiler_builtin_memcpy(pos[i], &data[start + i], sizeof(T)); pos[i] += sizeof(T); } @@ -267,7 +256,7 @@ void ColumnDecimal::serializeToPosForColumnArrayImpl( size()); static_assert(!(has_null && has_nullmap)); - assert(!has_nullmap || (nullmap && nullmap->size() == array_offsets.size())); + assert(!has_nullmap || (nullmap && nullmap->size() == array_offsets.back())); for (size_t i = 0; i < length; ++i) { @@ -276,32 +265,27 @@ void ColumnDecimal::serializeToPosForColumnArrayImpl( if (pos[i] == nullptr) continue; } + if constexpr (has_nullmap) + { + if (DB::isNullAt(*nullmap, start + i)) + continue; + } size_t len = array_offsets[start + i] - array_offsets[start + i - 1]; if constexpr (compare_semantics && is_Decimal256) { - if constexpr (has_nullmap) - { - if (DB::isNullAt(*nullmap, i)) - continue; - } for (size_t j = 0; j < len; ++j) pos[i] = serializeDecimal256Helper(pos[i], data[array_offsets[start + i - 1] + j]); } else { - if constexpr (has_nullmap) - { - if (DB::isNullAt(*nullmap, i)) - continue; - } if (len <= 4) { for (size_t j = 0; j < len; ++j) tiflash_compiler_builtin_memcpy( - pos[i] + j * sizeof(T), - &data[array_offsets[start + i - 1] + j], - sizeof(T)); + pos[i] + j * sizeof(T), + &data[array_offsets[start + i - 1] + j], + sizeof(T)); } else { diff --git a/dbms/src/Columns/ColumnFixedString.cpp b/dbms/src/Columns/ColumnFixedString.cpp index 8c808726b68..244f21bbc54 100644 --- a/dbms/src/Columns/ColumnFixedString.cpp +++ b/dbms/src/Columns/ColumnFixedString.cpp @@ -188,7 +188,7 @@ void ColumnFixedString::serializeToPosImpl( } if constexpr (has_nullmap) { - if (DB::isNullAt(*nullmap, i)) + if (DB::isNullAt(*nullmap, start + i)) { for (size_t j = 0; j < n; ++j) { @@ -247,13 +247,12 @@ void ColumnFixedString::serializeToPosForColumnArrayImpl( if (pos[i] == nullptr) continue; } - size_t len = array_offsets[start + i] - array_offsets[start + i - 1]; if constexpr (has_nullmap) { - if (DB::isNullAt(*nullmap, i)) + if (DB::isNullAt(*nullmap, start + i)) continue; } - + size_t len = array_offsets[start + i] - array_offsets[start + i - 1]; inline_memcpy(pos[i], &chars[n * array_offsets[start + i - 1]], n * len); pos[i] += n * len; } diff --git a/dbms/src/Columns/ColumnNullable.cpp b/dbms/src/Columns/ColumnNullable.cpp index f2de4b51bb8..5cd04385a57 100644 --- a/dbms/src/Columns/ColumnNullable.cpp +++ b/dbms/src/Columns/ColumnNullable.cpp @@ -354,6 +354,9 @@ void ColumnNullable::serializeToPosForCmpColumnArray( { const auto & nested_nullmap = getNullMapData(); RUNTIME_CHECK(nested_nullmap.size() == array_offsets.back()); + const auto nested_start = array_offsets[start - 1]; + const auto nested_length = array_offsets[start + length - 1] - array_offsets[start - 1]; + if (nullmap != nullptr) { // Got this code path when the column is like ColumnNullable(ColumnArray(ColumnNullable)), @@ -371,27 +374,45 @@ void ColumnNullable::serializeToPosForCmpColumnArray( setNullAt(new_nullmap_data, j); } } + // new_nullmap_col + // ->serializeToPosForCmpColumnArray(pos, start, length, nullptr, array_offsets, collator, sort_key_container); + // getNestedColumn().serializeToPosForCmpColumnArray( + // pos, + // start, + // length, + // &new_nullmap_data, + // array_offsets, + // collator, + // sort_key_container); new_nullmap_col - ->serializeToPosForCmpColumnArray(pos, start, length, nullptr, array_offsets, collator, sort_key_container); - getNestedColumn().serializeToPosForCmpColumnArray( + ->serializeToPosForCmp(pos, nested_start, nested_length, nullptr, collator, sort_key_container); + getNestedColumn().serializeToPosForCmp( pos, - start, - length, + nested_start, + nested_length, &new_nullmap_data, - array_offsets, collator, sort_key_container); } else { + // getNullMapColumn() + // .serializeToPosForCmpColumnArray(pos, start, length, nullptr, array_offsets, collator, sort_key_container); + // getNestedColumn().serializeToPosForCmpColumnArray( + // pos, + // start, + // length, + // &getNullMapData(), + // array_offsets, + // collator, + // sort_key_container); getNullMapColumn() - .serializeToPosForCmpColumnArray(pos, start, length, nullptr, array_offsets, collator, sort_key_container); - getNestedColumn().serializeToPosForCmpColumnArray( + .serializeToPosForCmp(pos, nested_start, nested_length, nullptr, collator, sort_key_container); + getNestedColumn().serializeToPosForCmp( pos, - start, - length, + nested_start, + nested_length, &getNullMapData(), - array_offsets, collator, sort_key_container); } diff --git a/dbms/src/Columns/ColumnString.cpp b/dbms/src/Columns/ColumnString.cpp index b2ef0e5187e..8af794c664e 100644 --- a/dbms/src/Columns/ColumnString.cpp +++ b/dbms/src/Columns/ColumnString.cpp @@ -499,7 +499,6 @@ void ColumnString::countSerializeByteSizeForCmp( } } - void ColumnString::countSerializeByteSize(PaddedPODArray & byte_size) const { countSerializeByteSizeImpl(byte_size, nullptr); @@ -532,7 +531,6 @@ void ColumnString::countSerializeByteSizeImpl( for (size_t i = 0; i < size; ++i) { assert(sizeAt(i) > 0); - if constexpr (count_code_points) { const auto num_char = UTF8::countCodePoints(&chars[offsetAt(i)], sizeAt(i) - 1); @@ -628,7 +626,6 @@ void ColumnString::countSerializeByteSizeForColumnArrayImpl( { const size_t ele_count = array_offsets[i] - array_offsets[i - 1]; assert(offsetAt(array_offsets[i]) - offsetAt(array_offsets[i - 1]) >= ele_count); - if constexpr (count_code_points) { size_t cur_row_bytes = 0; @@ -801,7 +798,7 @@ void ColumnString::serializeToPosImpl( const void * src = &chars[offsetAt(start + i)]; if constexpr (has_nullmap) { - if (DB::isNullAt(*nullmap, i)) + if (DB::isNullAt(*nullmap, start + i)) { UInt32 str_size = 1; tiflash_compiler_builtin_memcpy(pos[i], &str_size, sizeof(UInt32)); @@ -1005,10 +1002,9 @@ void ColumnString::serializeToPosForColumnArrayImpl( { if constexpr (has_nullmap) { - if (DB::isNullAt(*nullmap, i)) + if (DB::isNullAt(*nullmap, start + i)) continue; } - for (size_t j = array_offsets[start + i - 1]; j < array_offsets[start + i]; ++j) { UInt32 str_size = sizeAt(j); diff --git a/dbms/src/Columns/ColumnVector.cpp b/dbms/src/Columns/ColumnVector.cpp index 7a8a159e217..f1e07bef6bc 100644 --- a/dbms/src/Columns/ColumnVector.cpp +++ b/dbms/src/Columns/ColumnVector.cpp @@ -93,6 +93,21 @@ void ColumnVector::serializeToPos(PaddedPODArray & pos, size_t start, serializeToPosImpl(pos, start, length, nullptr); } +template +void ColumnVector::serializeToPosForCmp( + PaddedPODArray & pos, + size_t start, + size_t length, + const NullMap * nullmap, + const TiDB::TiDBCollatorPtr &, + String *) const +{ + if (nullmap != nullptr) + serializeToPosImpl(pos, start, length, nullmap); + else + serializeToPosImpl(pos, start, length, nullptr); +} + template template void ColumnVector::serializeToPosImpl( @@ -143,6 +158,22 @@ void ColumnVector::serializeToPosForColumnArray( serializeToPosForColumnArrayImpl(pos, start, length, array_offsets, nullptr); } +template +void ColumnVector::serializeToPosForCmpColumnArray( + PaddedPODArray & pos, + size_t start, + size_t length, + const NullMap * nullmap, + const IColumn::Offsets & array_offsets, + const TiDB::TiDBCollatorPtr &, + String *) const +{ + if (nullmap != nullptr) + serializeToPosForColumnArrayImpl(pos, start, length, array_offsets, nullmap); + else + serializeToPosForColumnArrayImpl(pos, start, length, array_offsets, nullptr); +} + template template void ColumnVector::serializeToPosForColumnArrayImpl( @@ -175,19 +206,19 @@ void ColumnVector::serializeToPosForColumnArrayImpl( if (pos[i] == nullptr) continue; } + size_t len = array_offsets[start + i] - array_offsets[start + i - 1]; if constexpr (has_nullmap) { - if (DB::isNullAt(*nullmap, i)) + if (DB::isNullAt(*nullmap, start + i)) continue; } - size_t len = array_offsets[start + i] - array_offsets[start + i - 1]; if (len <= 4) { for (size_t j = 0; j < len; ++j) tiflash_compiler_builtin_memcpy( - pos[i] + j * sizeof(T), - &data[array_offsets[start + i - 1] + j], - sizeof(T)); + pos[i] + j * sizeof(T), + &data[array_offsets[start + i - 1] + j], + sizeof(T)); } else { diff --git a/dbms/src/Columns/ColumnVector.h b/dbms/src/Columns/ColumnVector.h index 307eba1c40c..fd49028bf70 100644 --- a/dbms/src/Columns/ColumnVector.h +++ b/dbms/src/Columns/ColumnVector.h @@ -351,13 +351,7 @@ class ColumnVector final : public COWPtrHelper(pos, start, length, nullmap); - else - serializeToPosImpl(pos, start, length, nullptr); - } + String *) const override; void serializeToPos(PaddedPODArray & pos, size_t start, size_t length, bool has_null) const override; void serializeToPosForCmpColumnArray( @@ -367,13 +361,7 @@ class ColumnVector final : public COWPtrHelper(pos, start, length, array_offsets, nullmap); - else - serializeToPosForColumnArrayImpl(pos, start, length, array_offsets, nullptr); - } + String *) const override; void serializeToPosForColumnArray( PaddedPODArray & pos, size_t start, diff --git a/dbms/src/Columns/IColumn.h b/dbms/src/Columns/IColumn.h index be2171a0f4b..7912208e514 100644 --- a/dbms/src/Columns/IColumn.h +++ b/dbms/src/Columns/IColumn.h @@ -282,24 +282,34 @@ class IColumn : public COWPtr /// Note: /// 1. The pos.size() must be greater than or equal to length. /// 2. If has_null is true, then the pos[i] could be nullptr, which means the i-th element does not need to be serialized. - virtual void serializeToPosForCmp( + virtual void serializeToPos( PaddedPODArray & /* pos */, size_t /* start */, size_t /* length */, - const NullMap * /*nullmap*/, - const TiDB::TiDBCollatorPtr & /* collator */, - String * /* sort_key_container */) const + bool /* has_null */) const = 0; - virtual void serializeToPos( + // Similar to serializeToPos, but there are two changes to make sure compare semantics is kept: + // 1. For ColumnString with collator, this method decode using collator first and then serialize to pos. + // 2. For ColumnNullable, a default value of nested column will be serialized if this row is null. + virtual void serializeToPosForCmp( PaddedPODArray & /* pos */, size_t /* start */, size_t /* length */, - bool /* has_null */) const + const NullMap * /*nullmap*/, + const TiDB::TiDBCollatorPtr & /* collator */, + String * /* sort_key_container */) const = 0; /// Serialize data of column from start to start + length into pointer of pos and forward each pos[i] to the end of /// serialized data. /// Only called by ColumnArray. + virtual void serializeToPosForColumnArray( + PaddedPODArray & /* pos */, + size_t /* start */, + size_t /* length */, + bool /* has_null */, + const Offsets & /* array_offsets */) const + = 0; virtual void serializeToPosForCmpColumnArray( PaddedPODArray & /* pos */, size_t /* start */, @@ -309,13 +319,6 @@ class IColumn : public COWPtr const TiDB::TiDBCollatorPtr & /* collator */, String * /* sort_key_container */) const = 0; - virtual void serializeToPosForColumnArray( - PaddedPODArray & /* pos */, - size_t /* start */, - size_t /* length */, - bool /* has_null */, - const Offsets & /* array_offsets */) const - = 0; /// Deserialize and insert data from pos and forward each pos[i] to the end of serialized data. /// Note: diff --git a/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp b/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp index de6f0c5e49d..53aaea3e972 100644 --- a/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp +++ b/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp @@ -41,7 +41,7 @@ class TestColumnSerializeDeserialize : public ::testing::Test if (!compare_semantics) column_ptr->countSerializeByteSize(byte_size); else - column_ptr->countSerializeByteSizeForCmp(byte_size, collator, nullptr); + column_ptr->countSerializeByteSizeForCmp(byte_size, collator); ASSERT_EQ(byte_size.size(), result_byte_size.size()); for (size_t i = 0; i < byte_size.size(); ++i) ASSERT_EQ(byte_size[i], i + result_byte_size[i]); @@ -62,7 +62,7 @@ class TestColumnSerializeDeserialize : public ::testing::Test if (!compare_semantics) column_array->countSerializeByteSize(byte_size); else - column_array->countSerializeByteSizeForCmp(byte_size, collator, nullptr); + column_array->countSerializeByteSizeForCmp(byte_size, collator); ASSERT_EQ(byte_size.size(), result_byte_size.size()); for (size_t i = 0; i < byte_size.size(); ++i) ASSERT_EQ(byte_size[i], sizeof(UInt32) + i + result_byte_size[i]); @@ -148,6 +148,19 @@ class TestColumnSerializeDeserialize : public ::testing::Test } } } + // else if (result_col_ptr->getFamilyName() == String("Nullable")) + // { + // for (size_t i = 0; i < result_col_ptr->size(); ++i) + // { + // ASSERT_EQ(result_col_ptr->isNullAt(i), new_col_ptr->isNullAt(i)); + // } + // const auto & nested_result_col_ptr = checkAndGetColumn(result_col_ptr.get())->getNestedColumnPtr(); + // const auto & nested_new_col_ptr = checkAndGetColumn(new_col_ptr.get())->getNestedColumnPtr(); + // checkForColumnWithCollator( + // nested_result_col_ptr, + // nested_new_col_ptr, + // collator); + // } else { for (size_t i = 0; i < result_col_ptr->size(); ++i) @@ -156,8 +169,9 @@ class TestColumnSerializeDeserialize : public ::testing::Test if (result_col_ptr->isNullAt(i)) continue; auto res = result_col_ptr->getDataAt(i); - auto sort_key = collator->sortKey(res.data, res.size, sort_key_container); - ASSERT_TRUE(sort_key == new_col_ptr->getDataAt(i)); + auto res_sort_key = collator->sortKey(res.data, res.size, sort_key_container); + auto act = new_col_ptr->getDataAt(i); + ASSERT_TRUE(res_sort_key == act); } } } @@ -323,7 +337,7 @@ class TestColumnSerializeDeserialize : public ::testing::Test { PaddedPODArray byte_size; byte_size.resize_fill_zero(column_ptr->size()); - column_ptr->countSerializeByteSizeForCmp(byte_size, collator, nullptr); + column_ptr->countSerializeByteSizeForCmp(byte_size, collator); size_t total_size = 0; for (const auto size : byte_size) total_size += size; @@ -603,6 +617,21 @@ try testCountSerializeByteSize(col_nullable_array_vec, {1 + 4 + 4, 1 + 4 + 8, 1 + 4 + 12}); testSerializeAndDeserialize(col_nullable_array_vec); testSerializeAndDeserialize(col_nullable_array_vec, true, nullptr, nullptr); + + // ColumnNullable(ColumnArray(ColumnNullable(ColumnString))) + auto col_offsets_1 = createColumn({1, 3, 6}).column; + auto col_array_string = ColumnArray::create(col_nullable_string, col_offsets_1); + auto col_nullable_array_string = ColumnNullable::create(col_array_string, createColumn({0, 1, 0}).column); + testCountSerializeByteSize(col_nullable_array_string, + {1 + 4 + 1 + 4 + 4, + 1 + 4 + 2 + 8 + 4, + 1 + 4 + 3 + 12 + 7}, true, nullptr); + testSerializeAndDeserialize(col_nullable_array_string); + testSerializeAndDeserialize(col_nullable_array_string, true, collator_utf8_bin, &sort_key_container); + testSerializeAndDeserialize(col_nullable_array_string, true, collator_utf8_general_ci, &sort_key_container); + testSerializeAndDeserialize(col_nullable_array_string, true, collator_utf8_unicode_ci, &sort_key_container); + + // ColumnNullable(ColumnTuple(ColumnNullable(ColumnString))) } CATCH From eb01d9eb3ca6d1653a3f9107d5690f228171c9ac Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Sun, 26 Jan 2025 18:16:57 +0800 Subject: [PATCH 08/12] refine Signed-off-by: guo-shaoge --- dbms/src/Columns/ColumnArray.cpp | 32 +++++- dbms/src/Columns/ColumnDecimal.cpp | 8 +- dbms/src/Columns/ColumnFixedString.cpp | 2 +- dbms/src/Columns/ColumnNullable.cpp | 104 +++--------------- dbms/src/Columns/ColumnString.cpp | 2 +- dbms/src/Columns/ColumnVector.cpp | 8 +- .../gtest_column_serialize_deserialize.cpp | 41 ++++--- 7 files changed, 78 insertions(+), 119 deletions(-) diff --git a/dbms/src/Columns/ColumnArray.cpp b/dbms/src/Columns/ColumnArray.cpp index 6f5f68b9a7b..c3dbbcf5cac 100644 --- a/dbms/src/Columns/ColumnArray.cpp +++ b/dbms/src/Columns/ColumnArray.cpp @@ -266,17 +266,41 @@ void ColumnArray::serializeToPosForCmp( String * sort_key_container) const { if (nullmap != nullptr) - serializeToPosImpl(pos, start, length, collator, sort_key_container, nullmap); + serializeToPosImpl( + pos, + start, + length, + collator, + sort_key_container, + nullmap); else - serializeToPosImpl(pos, start, length, collator, sort_key_container, nullptr); + serializeToPosImpl( + pos, + start, + length, + collator, + sort_key_container, + nullptr); } void ColumnArray::serializeToPos(PaddedPODArray & pos, size_t start, size_t length, bool has_null) const { if (has_null) - serializeToPosImpl(pos, start, length, nullptr, nullptr, nullptr); + serializeToPosImpl( + pos, + start, + length, + nullptr, + nullptr, + nullptr); else - serializeToPosImpl(pos, start, length, nullptr, nullptr, nullptr); + serializeToPosImpl( + pos, + start, + length, + nullptr, + nullptr, + nullptr); } template diff --git a/dbms/src/Columns/ColumnDecimal.cpp b/dbms/src/Columns/ColumnDecimal.cpp index a15e4734396..fe57e19cba8 100644 --- a/dbms/src/Columns/ColumnDecimal.cpp +++ b/dbms/src/Columns/ColumnDecimal.cpp @@ -256,7 +256,7 @@ void ColumnDecimal::serializeToPosForColumnArrayImpl( size()); static_assert(!(has_null && has_nullmap)); - assert(!has_nullmap || (nullmap && nullmap->size() == array_offsets.back())); + RUNTIME_CHECK(!has_nullmap || (nullmap && nullmap->size() == array_offsets.size())); for (size_t i = 0; i < length; ++i) { @@ -283,9 +283,9 @@ void ColumnDecimal::serializeToPosForColumnArrayImpl( { for (size_t j = 0; j < len; ++j) tiflash_compiler_builtin_memcpy( - pos[i] + j * sizeof(T), - &data[array_offsets[start + i - 1] + j], - sizeof(T)); + pos[i] + j * sizeof(T), + &data[array_offsets[start + i - 1] + j], + sizeof(T)); } else { diff --git a/dbms/src/Columns/ColumnFixedString.cpp b/dbms/src/Columns/ColumnFixedString.cpp index 244f21bbc54..de75137a705 100644 --- a/dbms/src/Columns/ColumnFixedString.cpp +++ b/dbms/src/Columns/ColumnFixedString.cpp @@ -238,7 +238,7 @@ void ColumnFixedString::serializeToPosForColumnArrayImpl( size()); static_assert(!(has_null && has_nullmap)); - assert(!has_nullmap || (nullmap && nullmap->size() == array_offsets.size())); + RUNTIME_CHECK(!has_nullmap || (nullmap && nullmap->size() == array_offsets.size())); for (size_t i = 0; i < length; ++i) { diff --git a/dbms/src/Columns/ColumnNullable.cpp b/dbms/src/Columns/ColumnNullable.cpp index 5cd04385a57..d93a4d6c983 100644 --- a/dbms/src/Columns/ColumnNullable.cpp +++ b/dbms/src/Columns/ColumnNullable.cpp @@ -319,22 +319,10 @@ void ColumnNullable::serializeToPosForCmp( const TiDB::TiDBCollatorPtr & collator, String * sort_key_container) const { - if unlikely (nullmap != nullptr) - { - // This code path is not efficient, because of the temporary `new_nullmap_col`. - // But only got this code path when the column is like ColumnNullable(ColumnTuple(ColumnNullable)), - // which is rare for TiFlash, because ColumnTuple is not used for now. - auto new_nullmap_col = ColumnUInt8::create(); - DB::mergeNullMap(start, length, *nullmap, getNullMapData(), new_nullmap_col->getData()); - new_nullmap_col->serializeToPosForCmp(pos, start, length, nullptr, collator, sort_key_container); - getNestedColumn() - .serializeToPosForCmp(pos, start, length, &(new_nullmap_col->getData()), collator, sort_key_container); - } - else - { - getNullMapColumn().serializeToPosForCmp(pos, start, length, nullptr, collator, sort_key_container); - getNestedColumn().serializeToPosForCmp(pos, start, length, &getNullMapData(), collator, sort_key_container); - } + // Nested ColumnNullable like ColumnNullable(ColumnArray(ColumnNullable(ColumnXXX))) not support. + RUNTIME_CHECK_MSG(!nullmap, "serializeToPosForCmp cannot handle nested nullable"); + getNullMapColumn().serializeToPosForCmp(pos, start, length, nullptr, collator, sort_key_container); + getNestedColumn().serializeToPosForCmp(pos, start, length, &getNullMapData(), collator, sort_key_container); } void ColumnNullable::serializeToPos(PaddedPODArray & pos, size_t start, size_t length, bool has_null) const @@ -344,78 +332,18 @@ void ColumnNullable::serializeToPos(PaddedPODArray & pos, size_t start, } void ColumnNullable::serializeToPosForCmpColumnArray( - PaddedPODArray & pos, - size_t start, - size_t length, - const NullMap * nullmap, - const IColumn::Offsets & array_offsets, - const TiDB::TiDBCollatorPtr & collator, - String * sort_key_container) const -{ - const auto & nested_nullmap = getNullMapData(); - RUNTIME_CHECK(nested_nullmap.size() == array_offsets.back()); - const auto nested_start = array_offsets[start - 1]; - const auto nested_length = array_offsets[start + length - 1] - array_offsets[start - 1]; - - if (nullmap != nullptr) - { - // Got this code path when the column is like ColumnNullable(ColumnArray(ColumnNullable)), - RUNTIME_CHECK(nullmap->size() == array_offsets.size()); - auto new_nullmap_col = ColumnUInt8::create(); - auto & new_nullmap_data = new_nullmap_col->getData(); - new_nullmap_data.assign(nested_nullmap); - for (size_t i = start; i < start + length; ++i) - { - if (DB::isNullAt(*nullmap, i)) - { - const auto row_size = array_offsets[i] - array_offsets[i - 1]; - const auto row_offset = array_offsets[i - 1]; - for (size_t j = row_offset; j < row_offset + row_size; ++j) - setNullAt(new_nullmap_data, j); - } - } - // new_nullmap_col - // ->serializeToPosForCmpColumnArray(pos, start, length, nullptr, array_offsets, collator, sort_key_container); - // getNestedColumn().serializeToPosForCmpColumnArray( - // pos, - // start, - // length, - // &new_nullmap_data, - // array_offsets, - // collator, - // sort_key_container); - new_nullmap_col - ->serializeToPosForCmp(pos, nested_start, nested_length, nullptr, collator, sort_key_container); - getNestedColumn().serializeToPosForCmp( - pos, - nested_start, - nested_length, - &new_nullmap_data, - collator, - sort_key_container); - } - else - { - // getNullMapColumn() - // .serializeToPosForCmpColumnArray(pos, start, length, nullptr, array_offsets, collator, sort_key_container); - // getNestedColumn().serializeToPosForCmpColumnArray( - // pos, - // start, - // length, - // &getNullMapData(), - // array_offsets, - // collator, - // sort_key_container); - getNullMapColumn() - .serializeToPosForCmp(pos, nested_start, nested_length, nullptr, collator, sort_key_container); - getNestedColumn().serializeToPosForCmp( - pos, - nested_start, - nested_length, - &getNullMapData(), - collator, - sort_key_container); - } + PaddedPODArray & /* pos */, + size_t /* start */, + size_t /* length */, + const NullMap * /* nullmap */, + const IColumn::Offsets & /* array_offsets */, + const TiDB::TiDBCollatorPtr & /* collator */, + String * /* sort_key_container */) const +{ + // Doesn't support ColumnArray(ColumnNullable(ColumnXXX)) + throw Exception( + "Method serializeToPosForCmpColumnArray is not supported for " + getName(), + ErrorCodes::NOT_IMPLEMENTED); } void ColumnNullable::serializeToPosForColumnArray( PaddedPODArray & pos, diff --git a/dbms/src/Columns/ColumnString.cpp b/dbms/src/Columns/ColumnString.cpp index 8af794c664e..6d498491638 100644 --- a/dbms/src/Columns/ColumnString.cpp +++ b/dbms/src/Columns/ColumnString.cpp @@ -990,7 +990,7 @@ void ColumnString::serializeToPosForColumnArrayImpl( array_offsets.back(), size()); - assert(!has_nullmap || (nullmap && nullmap->size() == array_offsets.size())); + RUNTIME_CHECK(!has_nullmap || (nullmap && nullmap->size() == array_offsets.size())); /// countSerializeByteSizeForCmpColumnArray has already checked that the size of one element is not greater than UINT32_MAX if constexpr (compare_semantics) diff --git a/dbms/src/Columns/ColumnVector.cpp b/dbms/src/Columns/ColumnVector.cpp index f1e07bef6bc..2c449b06915 100644 --- a/dbms/src/Columns/ColumnVector.cpp +++ b/dbms/src/Columns/ColumnVector.cpp @@ -197,7 +197,7 @@ void ColumnVector::serializeToPosForColumnArrayImpl( size()); static_assert(!(has_null && has_nullmap)); - assert(!has_nullmap || (nullmap && nullmap->size() == array_offsets.size())); + RUNTIME_CHECK(!has_nullmap || (nullmap && nullmap->size() == array_offsets.size())); for (size_t i = 0; i < length; ++i) { @@ -216,9 +216,9 @@ void ColumnVector::serializeToPosForColumnArrayImpl( { for (size_t j = 0; j < len; ++j) tiflash_compiler_builtin_memcpy( - pos[i] + j * sizeof(T), - &data[array_offsets[start + i - 1] + j], - sizeof(T)); + pos[i] + j * sizeof(T), + &data[array_offsets[start + i - 1] + j], + sizeof(T)); } else { diff --git a/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp b/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp index 53aaea3e972..a6d5419030e 100644 --- a/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp +++ b/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp @@ -618,20 +618,25 @@ try testSerializeAndDeserialize(col_nullable_array_vec); testSerializeAndDeserialize(col_nullable_array_vec, true, nullptr, nullptr); - // ColumnNullable(ColumnArray(ColumnNullable(ColumnString))) - auto col_offsets_1 = createColumn({1, 3, 6}).column; - auto col_array_string = ColumnArray::create(col_nullable_string, col_offsets_1); - auto col_nullable_array_string = ColumnNullable::create(col_array_string, createColumn({0, 1, 0}).column); - testCountSerializeByteSize(col_nullable_array_string, - {1 + 4 + 1 + 4 + 4, - 1 + 4 + 2 + 8 + 4, - 1 + 4 + 3 + 12 + 7}, true, nullptr); - testSerializeAndDeserialize(col_nullable_array_string); - testSerializeAndDeserialize(col_nullable_array_string, true, collator_utf8_bin, &sort_key_container); - testSerializeAndDeserialize(col_nullable_array_string, true, collator_utf8_general_ci, &sort_key_container); - testSerializeAndDeserialize(col_nullable_array_string, true, collator_utf8_unicode_ci, &sort_key_container); - - // ColumnNullable(ColumnTuple(ColumnNullable(ColumnString))) + // ColumnNullable(ColumnArray(ColumnString)) + auto col_string = createColumn({"123", "2", "34", "456", "5678", "6"}).column; + auto col_array_string = ColumnArray::create(col_vector, col_offsets); + auto col_nullable_array_string = ColumnNullable::create(col_array_vec, createColumn({1, 0, 1}).column); + testSerializeAndDeserialize(col_nullable_array_vec); + testSerializeAndDeserialize(col_nullable_array_vec, true, nullptr, nullptr); + + // Nested ColumnNullable like ColumnNullable(ColumnArray(ColumnNullable(ColumnString))) not support. + // auto col_offsets_1 = createColumn({1, 3, 6}).column; + // auto col_array_string = ColumnArray::create(col_nullable_string, col_offsets_1); + // auto col_nullable_array_string = ColumnNullable::create(col_array_string, createColumn({0, 1, 0}).column); + // testCountSerializeByteSize(col_nullable_array_string, + // {1 + 4 + 1 + 4 + 4, + // 1 + 4 + 2 + 8 + 4, + // 1 + 4 + 3 + 12 + 7}, true, nullptr); + // testSerializeAndDeserialize(col_nullable_array_string); + // testSerializeAndDeserialize(col_nullable_array_string, true, collator_utf8_bin, &sort_key_container); + // testSerializeAndDeserialize(col_nullable_array_string, true, collator_utf8_general_ci, &sort_key_container); + // testSerializeAndDeserialize(col_nullable_array_string, true, collator_utf8_unicode_ci, &sort_key_container); } CATCH @@ -667,9 +672,10 @@ try auto col_array_nullable_string = ColumnArray::create(col_nullable_string, col_offsets); testCountSerializeByteSize(col_array_nullable_string, {4 + 5 + 4, 4 + 10 + 4, 4 + 15 + 7}); testSerializeAndDeserialize(col_array_nullable_string); - testSerializeAndDeserialize(col_array_nullable_string, true, collator_utf8_bin, &sort_key_container); - testSerializeAndDeserialize(col_array_nullable_string, true, collator_utf8_general_ci, &sort_key_container); - testSerializeAndDeserialize(col_array_nullable_string, true, collator_utf8_unicode_ci, &sort_key_container); + // compare semantics not support ColumnArray(ColumnNullable(ColumnString)). + // testSerializeAndDeserialize(col_array_nullable_string, true, collator_utf8_bin, &sort_key_container); + // testSerializeAndDeserialize(col_array_nullable_string, true, collator_utf8_general_ci, &sort_key_container); + // testSerializeAndDeserialize(col_array_nullable_string, true, collator_utf8_unicode_ci, &sort_key_container); // ColumnArray(ColumnDecimal) auto col_decimal_256 = createColumn( @@ -820,6 +826,7 @@ try true, collator_utf8_unicode_ci); + // ColumnString String sort_key_container; testSerializeAndDeserialize(col_string, true, collator_utf8_bin, &sort_key_container); testSerializeAndDeserialize(col_string, true, collator_utf8_general_ci, &sort_key_container); From 62fbbede009c9a949ca677d1eb4055c550730283 Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Sun, 26 Jan 2025 18:20:56 +0800 Subject: [PATCH 09/12] refine Signed-off-by: guo-shaoge --- dbms/src/Columns/ColumnArray.cpp | 2 +- dbms/src/Columns/ColumnDecimal.cpp | 2 +- dbms/src/Columns/ColumnFixedString.cpp | 2 +- dbms/src/Columns/ColumnString.cpp | 2 +- dbms/src/Columns/ColumnVector.cpp | 2 +- .../tests/gtest_column_serialize_deserialize.cpp | 13 ------------- 6 files changed, 5 insertions(+), 18 deletions(-) diff --git a/dbms/src/Columns/ColumnArray.cpp b/dbms/src/Columns/ColumnArray.cpp index c3dbbcf5cac..ad20ed2a43b 100644 --- a/dbms/src/Columns/ColumnArray.cpp +++ b/dbms/src/Columns/ColumnArray.cpp @@ -316,7 +316,7 @@ void ColumnArray::serializeToPosImpl( RUNTIME_CHECK_MSG(start + length <= size(), "start({}) + length({}) > size of column({})", start, length, size()); static_assert(!(has_null && has_nullmap)); - assert(!has_nullmap || (nullmap && nullmap->size() == size())); + RUNTIME_CHECK(!has_nullmap || (nullmap && nullmap->size() == size())); /// countSerializeByteSize has already checked that the size of one element is not greater than UINT32_MAX for (size_t i = 0; i < length; ++i) diff --git a/dbms/src/Columns/ColumnDecimal.cpp b/dbms/src/Columns/ColumnDecimal.cpp index fe57e19cba8..0ae87c6ab66 100644 --- a/dbms/src/Columns/ColumnDecimal.cpp +++ b/dbms/src/Columns/ColumnDecimal.cpp @@ -204,7 +204,7 @@ void ColumnDecimal::serializeToPosImpl( RUNTIME_CHECK_MSG(start + length <= size(), "start({}) + length({}) > size of column({})", start, length, size()); static_assert(!(has_null && has_nullmap)); - assert(!has_nullmap || (nullmap && nullmap->size() == size())); + RUNTIME_CHECK(!has_nullmap || (nullmap && nullmap->size() == size())); T def_val{}; for (size_t i = 0; i < length; ++i) diff --git a/dbms/src/Columns/ColumnFixedString.cpp b/dbms/src/Columns/ColumnFixedString.cpp index de75137a705..b945fba690e 100644 --- a/dbms/src/Columns/ColumnFixedString.cpp +++ b/dbms/src/Columns/ColumnFixedString.cpp @@ -177,7 +177,7 @@ void ColumnFixedString::serializeToPosImpl( RUNTIME_CHECK_MSG(start + length <= size(), "start({}) + length({}) > size of column({})", start, length, size()); static_assert(!(has_null && has_nullmap)); - assert(!has_nullmap || (nullmap && nullmap->size() == size())); + RUNTIME_CHECK(!has_nullmap || (nullmap && nullmap->size() == size())); for (size_t i = 0; i < length; ++i) { diff --git a/dbms/src/Columns/ColumnString.cpp b/dbms/src/Columns/ColumnString.cpp index 6d498491638..2fcd6d83c5b 100644 --- a/dbms/src/Columns/ColumnString.cpp +++ b/dbms/src/Columns/ColumnString.cpp @@ -784,7 +784,7 @@ void ColumnString::serializeToPosImpl( RUNTIME_CHECK_MSG(length <= pos.size(), "length({}) > size of pos({})", length, pos.size()); RUNTIME_CHECK_MSG(start + length <= size(), "start({}) + length({}) > size of column({})", start, length, size()); - assert(!has_nullmap || (nullmap && nullmap->size() == size())); + RUNTIME_CHECK(!has_nullmap || (nullmap && nullmap->size() == size())); /// To avoid virtual function call of sortKey(). const auto * derived_collator = static_cast(collator); diff --git a/dbms/src/Columns/ColumnVector.cpp b/dbms/src/Columns/ColumnVector.cpp index 2c449b06915..9b0769436f8 100644 --- a/dbms/src/Columns/ColumnVector.cpp +++ b/dbms/src/Columns/ColumnVector.cpp @@ -120,7 +120,7 @@ void ColumnVector::serializeToPosImpl( RUNTIME_CHECK_MSG(start + length <= size(), "start({}) + length({}) > size of column({})", start, length, size()); static_assert(!(has_null && has_nullmap)); - assert(!has_nullmap || (nullmap && nullmap->size() == size())); + RUNTIME_CHECK(!has_nullmap || (nullmap && nullmap->size() == size())); T def_val{}; for (size_t i = 0; i < length; ++i) diff --git a/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp b/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp index a6d5419030e..69052507f65 100644 --- a/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp +++ b/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp @@ -148,19 +148,6 @@ class TestColumnSerializeDeserialize : public ::testing::Test } } } - // else if (result_col_ptr->getFamilyName() == String("Nullable")) - // { - // for (size_t i = 0; i < result_col_ptr->size(); ++i) - // { - // ASSERT_EQ(result_col_ptr->isNullAt(i), new_col_ptr->isNullAt(i)); - // } - // const auto & nested_result_col_ptr = checkAndGetColumn(result_col_ptr.get())->getNestedColumnPtr(); - // const auto & nested_new_col_ptr = checkAndGetColumn(new_col_ptr.get())->getNestedColumnPtr(); - // checkForColumnWithCollator( - // nested_result_col_ptr, - // nested_new_col_ptr, - // collator); - // } else { for (size_t i = 0; i < result_col_ptr->size(); ++i) From 0ce2f534078ce84c3e373c5f725ba8e0786f4508 Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Mon, 27 Jan 2025 13:52:48 +0800 Subject: [PATCH 10/12] refine Signed-off-by: guo-shaoge --- dbms/src/Columns/ColumnNullable.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dbms/src/Columns/ColumnNullable.cpp b/dbms/src/Columns/ColumnNullable.cpp index d93a4d6c983..c9c7e6e82ee 100644 --- a/dbms/src/Columns/ColumnNullable.cpp +++ b/dbms/src/Columns/ColumnNullable.cpp @@ -340,9 +340,11 @@ void ColumnNullable::serializeToPosForCmpColumnArray( const TiDB::TiDBCollatorPtr & /* collator */, String * /* sort_key_container */) const { - // Doesn't support ColumnArray(ColumnNullable(ColumnXXX)) + // Unable to handle ColumnArray(ColumnNullable(ColumnXXX)). Because the pos vector corresponds to the rows of ColumnArray, + // while ColumnNullable::nullmap corresponds to the rows of ColumnNullable. + // This means it's not easy to correctly serialize the row in ColumnNullable to the corresponding position in pos. throw Exception( - "Method serializeToPosForCmpColumnArray is not supported for " + getName(), + "serializeToPosForCmpColumnArray cannot handle ColumnArray(" + getName() + ")", ErrorCodes::NOT_IMPLEMENTED); } void ColumnNullable::serializeToPosForColumnArray( From c4c0eb9d3c10eda4d9ecb89287165af88715820b Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Mon, 27 Jan 2025 16:54:02 +0800 Subject: [PATCH 11/12] refine Signed-off-by: guo-shaoge --- dbms/src/Columns/ColumnDecimal.cpp | 2 +- dbms/src/Columns/ColumnFixedString.cpp | 7 ++--- dbms/src/Columns/ColumnNullable.h | 5 +++- dbms/src/Columns/ColumnString.cpp | 29 ++++++++++++------- dbms/src/Columns/ColumnVector.cpp | 4 +-- dbms/src/Columns/IColumn.h | 19 +++--------- .../gtest_column_serialize_deserialize.cpp | 2 ++ 7 files changed, 33 insertions(+), 35 deletions(-) diff --git a/dbms/src/Columns/ColumnDecimal.cpp b/dbms/src/Columns/ColumnDecimal.cpp index 0ae87c6ab66..610739d15c9 100644 --- a/dbms/src/Columns/ColumnDecimal.cpp +++ b/dbms/src/Columns/ColumnDecimal.cpp @@ -206,7 +206,7 @@ void ColumnDecimal::serializeToPosImpl( static_assert(!(has_null && has_nullmap)); RUNTIME_CHECK(!has_nullmap || (nullmap && nullmap->size() == size())); - T def_val{}; + static constexpr T def_val{}; for (size_t i = 0; i < length; ++i) { if constexpr (has_null) diff --git a/dbms/src/Columns/ColumnFixedString.cpp b/dbms/src/Columns/ColumnFixedString.cpp index b945fba690e..217b19cf913 100644 --- a/dbms/src/Columns/ColumnFixedString.cpp +++ b/dbms/src/Columns/ColumnFixedString.cpp @@ -190,11 +190,8 @@ void ColumnFixedString::serializeToPosImpl( { if (DB::isNullAt(*nullmap, start + i)) { - for (size_t j = 0; j < n; ++j) - { - *(pos[i]) = '\0'; - pos[i] += 1; - } + memset(pos[i], '\0', n); + pos[i] += n; continue; } } diff --git a/dbms/src/Columns/ColumnNullable.h b/dbms/src/Columns/ColumnNullable.h index ad4abce3916..5b216572e66 100644 --- a/dbms/src/Columns/ColumnNullable.h +++ b/dbms/src/Columns/ColumnNullable.h @@ -60,7 +60,10 @@ class ColumnNullable final : public COWPtrHelper std::string getName() const override { return "Nullable(" + nested_column->getName() + ")"; } MutableColumnPtr cloneResized(size_t size) const override; size_t size() const override { return static_cast(*null_map).size(); } - bool isNullAt(size_t n) const override { return static_cast(*null_map).getData()[n] != 0; } + bool isNullAt(size_t n) const override + { + return DB::isNullAt(static_cast(*null_map).getData(), n); + } Field operator[](size_t n) const override; void get(size_t n, Field & res) const override; UInt64 get64(size_t n) const override { return nested_column->get64(n); } diff --git a/dbms/src/Columns/ColumnString.cpp b/dbms/src/Columns/ColumnString.cpp index 2fcd6d83c5b..d2e1ab47bec 100644 --- a/dbms/src/Columns/ColumnString.cpp +++ b/dbms/src/Columns/ColumnString.cpp @@ -35,6 +35,18 @@ extern const int PARAMETER_OUT_OF_BOUND; extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; } // namespace ErrorCodes +struct ColumnStringDefaultValue +{ + char mem[sizeof(UInt32) + 1] = {0}; + ColumnStringDefaultValue() + { + UInt32 str_size = 1; + tiflash_compiler_builtin_memcpy(&mem[0], &str_size, sizeof(str_size)); + } +}; + +static ColumnStringDefaultValue col_str_def_val; + MutableColumnPtr ColumnString::cloneResized(size_t to_size) const { auto res = ColumnString::create(); @@ -784,6 +796,7 @@ void ColumnString::serializeToPosImpl( RUNTIME_CHECK_MSG(length <= pos.size(), "length({}) > size of pos({})", length, pos.size()); RUNTIME_CHECK_MSG(start + length <= size(), "start({}) + length({}) > size of column({})", start, length, size()); + static_assert(!(has_null && has_nullmap)); RUNTIME_CHECK(!has_nullmap || (nullmap && nullmap->size() == size())); /// To avoid virtual function call of sortKey(). @@ -793,21 +806,17 @@ void ColumnString::serializeToPosImpl( { if constexpr (compare_semantics) { - static_assert(!has_null); - UInt32 str_size = sizeAt(start + i); - const void * src = &chars[offsetAt(start + i)]; if constexpr (has_nullmap) { if (DB::isNullAt(*nullmap, start + i)) { - UInt32 str_size = 1; - tiflash_compiler_builtin_memcpy(pos[i], &str_size, sizeof(UInt32)); - pos[i] += sizeof(UInt32); - *(pos[i]) = '\0'; - pos[i] += 1; + tiflash_compiler_builtin_memcpy(pos[i], &col_str_def_val.mem[0], sizeof(col_str_def_val.mem)); + pos[i] += sizeof(col_str_def_val.mem); continue; } } + UInt32 str_size = sizeAt(start + i); + const void * src = &chars[offsetAt(start + i)]; auto sort_key = derived_collator->sortKey(reinterpret_cast(src), str_size - 1, *sort_key_container); // For terminating zero. @@ -822,7 +831,6 @@ void ColumnString::serializeToPosImpl( } else { - static_assert(!has_nullmap); if constexpr (has_null) { if (pos[i] == nullptr) @@ -990,12 +998,12 @@ void ColumnString::serializeToPosForColumnArrayImpl( array_offsets.back(), size()); + static_assert(!(has_null && has_nullmap)); RUNTIME_CHECK(!has_nullmap || (nullmap && nullmap->size() == array_offsets.size())); /// countSerializeByteSizeForCmpColumnArray has already checked that the size of one element is not greater than UINT32_MAX if constexpr (compare_semantics) { - static_assert(!has_null); /// To avoid virtual function call of sortKey(). const auto * derived_collator = static_cast(collator); for (size_t i = 0; i < length; ++i) @@ -1025,7 +1033,6 @@ void ColumnString::serializeToPosForColumnArrayImpl( } else { - static_assert(!has_nullmap); for (size_t i = 0; i < length; ++i) { if constexpr (has_null) diff --git a/dbms/src/Columns/ColumnVector.cpp b/dbms/src/Columns/ColumnVector.cpp index 9b0769436f8..be92ec62c5f 100644 --- a/dbms/src/Columns/ColumnVector.cpp +++ b/dbms/src/Columns/ColumnVector.cpp @@ -122,7 +122,7 @@ void ColumnVector::serializeToPosImpl( static_assert(!(has_null && has_nullmap)); RUNTIME_CHECK(!has_nullmap || (nullmap && nullmap->size() == size())); - T def_val{}; + static constexpr T def_val{}; for (size_t i = 0; i < length; ++i) { if constexpr (has_null) @@ -206,12 +206,12 @@ void ColumnVector::serializeToPosForColumnArrayImpl( if (pos[i] == nullptr) continue; } - size_t len = array_offsets[start + i] - array_offsets[start + i - 1]; if constexpr (has_nullmap) { if (DB::isNullAt(*nullmap, start + i)) continue; } + size_t len = array_offsets[start + i] - array_offsets[start + i - 1]; if (len <= 4) { for (size_t j = 0; j < len; ++j) diff --git a/dbms/src/Columns/IColumn.h b/dbms/src/Columns/IColumn.h index 7912208e514..309c0c5dcac 100644 --- a/dbms/src/Columns/IColumn.h +++ b/dbms/src/Columns/IColumn.h @@ -45,18 +45,6 @@ inline bool isNullAt(const NullMap & nullmap, size_t n) { return nullmap[n] != 0; } -inline void mergeNullMap(size_t start, size_t length, const NullMap & m1, const NullMap & m2, NullMap & m3) -{ - RUNTIME_CHECK(m1.size() == m2.size()); - RUNTIME_CHECK(start + length < m1.size()); - m3.resize_fill_zero(m1.size()); - for (size_t i = start; i < start + length; ++i) - m3[i] = (DB::isNullAt(m1, i) || DB::isNullAt(m2, i)); -} -inline void setNullAt(NullMap & nullmap, size_t n) -{ - nullmap[n] = 1; -} /// Declares interface to store columns in memory. class IColumn : public COWPtr @@ -288,9 +276,9 @@ class IColumn : public COWPtr size_t /* length */, bool /* has_null */) const = 0; - // Similar to serializeToPos, but there are two changes to make sure compare semantics is kept: - // 1. For ColumnString with collator, this method decode using collator first and then serialize to pos. - // 2. For ColumnNullable, a default value of nested column will be serialized if this row is null. + /// Similar to serializeToPos, but there are two changes to make sure compare semantics is kept: + /// 1. For ColumnString with collator, this method first decode collator and then serialize to pos. + /// 2. For ColumnNullable(ColumnXXX), a default value of the nested column will be serialized if this row is null. virtual void serializeToPosForCmp( PaddedPODArray & /* pos */, size_t /* start */, @@ -310,6 +298,7 @@ class IColumn : public COWPtr bool /* has_null */, const Offsets & /* array_offsets */) const = 0; + /// Similary to serializeToPosForCmp, but only called by ColumnArray. virtual void serializeToPosForCmpColumnArray( PaddedPODArray & /* pos */, size_t /* start */, diff --git a/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp b/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp index 69052507f65..26eabc0b616 100644 --- a/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp +++ b/dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp @@ -612,6 +612,8 @@ try testSerializeAndDeserialize(col_nullable_array_vec); testSerializeAndDeserialize(col_nullable_array_vec, true, nullptr, nullptr); + // ColumnArray(ColumnNullable(ColumnVector)) not support. + // Nested ColumnNullable like ColumnNullable(ColumnArray(ColumnNullable(ColumnString))) not support. // auto col_offsets_1 = createColumn({1, 3, 6}).column; // auto col_array_string = ColumnArray::create(col_nullable_string, col_offsets_1); From 2a48af6e8f2391a2593370bffa086dcea7b7806c Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Thu, 6 Feb 2025 11:13:06 +0800 Subject: [PATCH 12/12] fix Signed-off-by: guo-shaoge --- dbms/src/Columns/ColumnDecimal.cpp | 173 +++++++++++++++---------- dbms/src/Columns/ColumnFixedString.cpp | 34 +++++ dbms/src/Columns/ColumnFixedString.h | 21 +-- 3 files changed, 138 insertions(+), 90 deletions(-) diff --git a/dbms/src/Columns/ColumnDecimal.cpp b/dbms/src/Columns/ColumnDecimal.cpp index 03a41894877..f8efecc8585 100644 --- a/dbms/src/Columns/ColumnDecimal.cpp +++ b/dbms/src/Columns/ColumnDecimal.cpp @@ -139,6 +139,7 @@ const char * ColumnDecimal::deserializeAndInsertFromArena(const char * pos, c } } +template void ColumnDecimal::countSerializeByteSizeForCmp(PaddedPODArray & byte_size, const TiDB::TiDBCollatorPtr &) const { @@ -151,6 +152,23 @@ void ColumnDecimal::countSerializeByteSize(PaddedPODArray & byte_size countSerializeByteSizeImpl(byte_size); } +template +void ColumnDecimal::countSerializeByteSizeForCmpColumnArray( + PaddedPODArray & byte_size, + const IColumn::Offsets & array_offsets, + const TiDB::TiDBCollatorPtr &) const +{ + countSerializeByteSizeForColumnArrayImpl(byte_size, array_offsets); +} + +template +void ColumnDecimal::countSerializeByteSizeForColumnArray( + PaddedPODArray & byte_size, + const IColumn::Offsets & array_offsets) const +{ + countSerializeByteSizeForColumnArrayImpl(byte_size, array_offsets); +} + template template void ColumnDecimal::countSerializeByteSizeImpl(PaddedPODArray & byte_size) const @@ -172,23 +190,6 @@ void ColumnDecimal::countSerializeByteSizeImpl(PaddedPODArray & byte_ } } -template -void ColumnDecimal::countSerializeByteSizeForCmpColumnArray( - PaddedPODArray & byte_size, - const IColumn::Offsets & array_offsets, - const TiDB::TiDBCollatorPtr &) const -{ - countSerializeByteSizeForColumnArrayImpl(byte_size, array_offsets); -} - -template -void ColumnDecimal::countSerializeByteSizeForColumnArray( - PaddedPODArray & byte_size, - const IColumn::Offsets & array_offsets) const -{ - countSerializeByteSizeForColumnArrayImpl(byte_size, array_offsets); -} - template template void ColumnDecimal::countSerializeByteSizeForColumnArrayImpl( @@ -223,25 +224,25 @@ void ColumnDecimal::countSerializeByteSizeForColumnArrayImpl( template void ColumnDecimal::serializeToPosForCmp( - PaddedPODArray & pos, - size_t start, - size_t length, - const NullMap * nullmap, - const TiDB::TiDBCollatorPtr &, - String *) const + PaddedPODArray & pos, + size_t start, + size_t length, + const NullMap * nullmap, + const TiDB::TiDBCollatorPtr &, + String *) const { if (nullmap != nullptr) serializeToPosImpl( - pos, - start, - length, - nullmap); + pos, + start, + length, + nullmap); else serializeToPosImpl( - pos, - start, - length, - nullptr); + pos, + start, + length, + nullptr); } template @@ -249,66 +250,66 @@ void ColumnDecimal::serializeToPos(PaddedPODArray & pos, size_t start { if (has_null) serializeToPosImpl( - pos, - start, - length, - nullptr); + pos, + start, + length, + nullptr); else serializeToPosImpl( - pos, - start, - length, - nullptr); + pos, + start, + length, + nullptr); } template void ColumnDecimal::serializeToPosForCmpColumnArray( - PaddedPODArray & pos, - size_t start, - size_t length, - const NullMap * nullmap, - const IColumn::Offsets & array_offsets, - const TiDB::TiDBCollatorPtr &, - String *) const + PaddedPODArray & pos, + size_t start, + size_t length, + const NullMap * nullmap, + const IColumn::Offsets & array_offsets, + const TiDB::TiDBCollatorPtr &, + String *) const { if (nullmap != nullptr) serializeToPosForColumnArrayImpl( - pos, - start, - length, - array_offsets, - nullmap); + pos, + start, + length, + array_offsets, + nullmap); else serializeToPosForColumnArrayImpl( - pos, - start, - length, - array_offsets, - nullptr); + pos, + start, + length, + array_offsets, + nullptr); } template void ColumnDecimal::serializeToPosForColumnArray( - PaddedPODArray & pos, - size_t start, - size_t length, - bool has_null, - const IColumn::Offsets & array_offsets) const + PaddedPODArray & pos, + size_t start, + size_t length, + bool has_null, + const IColumn::Offsets & array_offsets) const { if (has_null) serializeToPosForColumnArrayImpl( - pos, - start, - length, - array_offsets, - nullptr); + pos, + start, + length, + array_offsets, + nullptr); else serializeToPosForColumnArrayImpl( - pos, - start, - length, - array_offsets, - nullptr); + pos, + start, + length, + array_offsets, + nullptr); } template @@ -415,6 +416,36 @@ void ColumnDecimal::serializeToPosForColumnArrayImpl( } } +template +void ColumnDecimal::deserializeForCmpAndInsertFromPos(PaddedPODArray & pos, bool use_nt_align_buffer) +{ + deserializeAndInsertFromPosImpl(pos, use_nt_align_buffer); +} + +template +void ColumnDecimal::deserializeAndInsertFromPos(PaddedPODArray & pos, bool use_nt_align_buffer) +{ + deserializeAndInsertFromPosImpl(pos, use_nt_align_buffer); +} + +template +void ColumnDecimal::deserializeForCmpAndInsertFromPosColumnArray( + PaddedPODArray & pos, + const IColumn::Offsets & array_offsets, + bool use_nt_align_buffer) +{ + deserializeAndInsertFromPosForColumnArrayImpl(pos, array_offsets, use_nt_align_buffer); +} + +template +void ColumnDecimal::deserializeAndInsertFromPosForColumnArray( + PaddedPODArray & pos, + const IColumn::Offsets & array_offsets, + bool use_nt_align_buffer) +{ + deserializeAndInsertFromPosForColumnArrayImpl(pos, array_offsets, use_nt_align_buffer); +} + template template void ColumnDecimal::deserializeAndInsertFromPosImpl( diff --git a/dbms/src/Columns/ColumnFixedString.cpp b/dbms/src/Columns/ColumnFixedString.cpp index 217b19cf913..0ea7cc3fd07 100644 --- a/dbms/src/Columns/ColumnFixedString.cpp +++ b/dbms/src/Columns/ColumnFixedString.cpp @@ -158,6 +158,21 @@ void ColumnFixedString::countSerializeByteSizeForColumnArrayImpl( byte_size[i] += n * (array_offsets[i] - array_offsets[i - 1]); } +void ColumnFixedString::serializeToPosForCmp( + PaddedPODArray & pos, + size_t start, + size_t length, + const NullMap * nullmap, + const TiDB::TiDBCollatorPtr & collator, + String *) const +{ + RUNTIME_CHECK_MSG(!collator, "{} doesn't support serializeToPosForCmp when collator is not null", getName()); + if (nullmap != nullptr) + serializeToPosImpl(pos, start, length, nullmap); + else + serializeToPosImpl(pos, start, length, nullptr); +} + void ColumnFixedString::serializeToPos(PaddedPODArray & pos, size_t start, size_t length, bool has_null) const { if (has_null) @@ -200,6 +215,25 @@ void ColumnFixedString::serializeToPosImpl( } } +void ColumnFixedString::serializeToPosForCmpColumnArray( + PaddedPODArray & pos, + size_t start, + size_t length, + const NullMap * nullmap, + const IColumn::Offsets & array_offsets, + const TiDB::TiDBCollatorPtr & collator, + String *) const +{ + RUNTIME_CHECK_MSG( + !collator, + "{} doesn't support serializeToPosForCmpColumnArray when collator is not null", + getName()); + if (nullmap != nullptr) + serializeToPosForColumnArrayImpl(pos, start, length, array_offsets, nullmap); + else + serializeToPosForColumnArrayImpl(pos, start, length, array_offsets, nullptr); +} + void ColumnFixedString::serializeToPosForColumnArray( PaddedPODArray & pos, size_t start, diff --git a/dbms/src/Columns/ColumnFixedString.h b/dbms/src/Columns/ColumnFixedString.h index b909cf37927..d6fbdafcdfb 100644 --- a/dbms/src/Columns/ColumnFixedString.h +++ b/dbms/src/Columns/ColumnFixedString.h @@ -160,14 +160,7 @@ class ColumnFixedString final : public COWPtrHelper size_t length, const NullMap * nullmap, const TiDB::TiDBCollatorPtr & collator, - String *) const override - { - RUNTIME_CHECK_MSG(!collator, "{} doesn't support serializeToPosForCmp when collator is not null", getName()); - if (nullmap != nullptr) - serializeToPosImpl(pos, start, length, nullmap); - else - serializeToPosImpl(pos, start, length, nullptr); - } + String *) const override; void serializeToPos(PaddedPODArray & pos, size_t start, size_t length, bool has_null) const override; void serializeToPosForCmpColumnArray( @@ -177,17 +170,7 @@ class ColumnFixedString final : public COWPtrHelper const NullMap * nullmap, const IColumn::Offsets & array_offsets, const TiDB::TiDBCollatorPtr & collator, - String *) const override - { - RUNTIME_CHECK_MSG( - !collator, - "{} doesn't support serializeToPosForCmpColumnArray when collator is not null", - getName()); - if (nullmap != nullptr) - serializeToPosForColumnArrayImpl(pos, start, length, array_offsets, nullmap); - else - serializeToPosForColumnArrayImpl(pos, start, length, array_offsets, nullptr); - } + String *) const override; void serializeToPosForColumnArray( PaddedPODArray & pos, size_t start,