Skip to content

Commit

Permalink
refine
Browse files Browse the repository at this point in the history
Signed-off-by: guo-shaoge <[email protected]>
  • Loading branch information
guo-shaoge committed Jan 25, 2025
1 parent 543f1b8 commit 370bf0a
Show file tree
Hide file tree
Showing 8 changed files with 255 additions and 133 deletions.
3 changes: 2 additions & 1 deletion dbms/src/Columns/ColumnArray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,7 @@ void ColumnArray::countSerializeByteSizeImpl(
const NullMap * nullmap) const
{
RUNTIME_CHECK_MSG(byte_size.size() == size(), "size of byte_size({}) != column size({})", byte_size.size(), size());
assert(!nullmap || (nullmap->size() == size()));

if unlikely (!getOffsets().empty() && getOffsets().back() > UINT32_MAX)
{
Expand Down Expand Up @@ -309,7 +310,7 @@ void ColumnArray::serializeToPosImpl(
UInt32 len = sizeAt(start + i);
if constexpr (has_nullmap)
{
if ((*nullmap)[i] != 0)
if (DB::isNullAt(*nullmap, i))
len = 0;
}
tiflash_compiler_builtin_memcpy(pos[i], &len, sizeof(UInt32));
Expand Down
22 changes: 16 additions & 6 deletions dbms/src/Columns/ColumnDecimal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,6 @@ void ColumnDecimal<T>::countSerializeByteSizeImpl(PaddedPODArray<size_t> & byte_
}
}

// TODO add unit test
template <typename T>
template <bool compare_semantics>
void ColumnDecimal<T>::countSerializeByteSizeForColumnArrayImpl(
Expand Down Expand Up @@ -220,7 +219,7 @@ void ColumnDecimal<T>::serializeToPosImpl(
{
if constexpr (has_nullmap)
{
if ((*nullmap)[i] != 0)
if (DB::isNullAt(*nullmap, i))
{
pos[i] = serializeDecimal256Helper(pos[i], def_val);
continue;
Expand All @@ -232,7 +231,7 @@ void ColumnDecimal<T>::serializeToPosImpl(
{
if constexpr (has_nullmap)
{
if ((*nullmap)[i] != 0)
if (DB::isNullAt(*nullmap, i))
{
tiflash_compiler_builtin_memcpy(pos[i], &def_val, sizeof(T));
pos[i] += sizeof(T);
Expand Down Expand Up @@ -283,7 +282,7 @@ void ColumnDecimal<T>::serializeToPosForColumnArrayImpl(
{
if constexpr (has_nullmap)
{
if ((*nullmap)[i] != 0)
if (DB::isNullAt(*nullmap, i))
continue;
}
for (size_t j = 0; j < len; ++j)
Expand All @@ -293,10 +292,21 @@ void ColumnDecimal<T>::serializeToPosForColumnArrayImpl(
{
if constexpr (has_nullmap)
{
if ((*nullmap)[i] != 0)
if (DB::isNullAt(*nullmap, i))
continue;
}
inline_memcpy(pos[i], &data[array_offsets[start + i - 1]], len * sizeof(T));
if (len <= 4)
{
for (size_t j = 0; j < len; ++j)
tiflash_compiler_builtin_memcpy(
pos[i] + j * sizeof(T),
&data[array_offsets[start + i - 1] + j],
sizeof(T));
}
else
{
inline_memcpy(pos[i], &data[array_offsets[start + i - 1]], len * sizeof(T));
}
pos[i] += len * sizeof(T);
}
}
Expand Down
8 changes: 4 additions & 4 deletions dbms/src/Columns/ColumnFixedString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ void ColumnFixedString::countSerializeByteSizeImpl(PaddedPODArray<size_t> & byte
{
if constexpr (has_nullmap)
{
if ((*nullmap)[i] != 0)
if (DB::isNullAt(*nullmap, i))
{
byte_size[i] += 1;
continue;
Expand Down Expand Up @@ -175,7 +175,7 @@ void ColumnFixedString::countSerializeByteSizeForColumnArrayImpl(
{
if constexpr (has_nullmap)
{
if ((*nullmap)[i] != 0)
if (DB::isNullAt(*nullmap, i))
{
byte_size[i] += array_offsets[i] - array_offsets[i - 1];
continue;
Expand Down Expand Up @@ -215,7 +215,7 @@ void ColumnFixedString::serializeToPosImpl(
}
if constexpr (has_nullmap)
{
if ((*nullmap)[i] != 0)
if (DB::isNullAt(*nullmap, i))
{
for (size_t j = 0; j < n; ++j)
{
Expand Down Expand Up @@ -277,7 +277,7 @@ void ColumnFixedString::serializeToPosForColumnArrayImpl(
size_t len = array_offsets[start + i] - array_offsets[start + i - 1];
if constexpr (has_nullmap)
{
if ((*nullmap)[i] != 0)
if (DB::isNullAt(*nullmap, i))
continue;
}

Expand Down
111 changes: 91 additions & 20 deletions dbms/src/Columns/ColumnNullable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,9 +287,18 @@ void ColumnNullable::countSerializeByteSizeForCmp(
const TiDB::TiDBCollatorPtr & collator,
const NullMap * nullmap) const
{
assert(!nullmap);
getNullMapColumn().countSerializeByteSizeForCmp(byte_size, collator, nullptr);
getNestedColumn().countSerializeByteSizeForCmp(byte_size, collator, &getNullMapData());
if unlikely (nullmap != nullptr)
{
auto new_nullmap_col = ColumnUInt8::create();
DB::mergeNullMap(*nullmap, getNullMapData(), new_nullmap_col->getData());
new_nullmap_col->countSerializeByteSizeForCmp(byte_size, collator, nullptr);
getNestedColumn().countSerializeByteSizeForCmp(byte_size, collator, &(new_nullmap_col->getData()));
}
else
{
getNullMapColumn().countSerializeByteSizeForCmp(byte_size, collator, nullptr);
getNestedColumn().countSerializeByteSizeForCmp(byte_size, collator, &getNullMapData());
}
}
void ColumnNullable::countSerializeByteSize(PaddedPODArray<size_t> & byte_size) const
{
Expand All @@ -303,9 +312,32 @@ void ColumnNullable::countSerializeByteSizeForCmpColumnArray(
const TiDB::TiDBCollatorPtr & collator,
const NullMap * nullmap) const
{
assert(!nullmap);
getNullMapColumn().countSerializeByteSizeForCmpColumnArray(byte_size, array_offsets, collator, nullptr);
getNestedColumn().countSerializeByteSizeForCmpColumnArray(byte_size, array_offsets, collator, &getNullMapData());
const auto & nested_nullmap = getNullMapData();
assert(nested_nullmap.size() == array_offsets.back());
if unlikely (nullmap != nullptr)
{
assert(nullmap->size() == array_offsets.size());
auto new_nullmap_col = ColumnUInt8::create();
auto & new_nullmap_data = new_nullmap_col->getData();
new_nullmap_data.assign(nested_nullmap);
for (size_t i = 0; i < array_offsets.size(); ++i)
{
if (DB::isNullAt(*nullmap, i))
{
const auto row_size = array_offsets[i] - array_offsets[i - 1];
const auto row_offset = array_offsets[i - 1];
for (size_t j = row_offset; j < row_offset + row_size; ++j)
setNullAt(new_nullmap_data, j);
}
}
new_nullmap_col->countSerializeByteSizeForCmpColumnArray(byte_size, array_offsets, collator, nullptr);
getNestedColumn().countSerializeByteSizeForCmpColumnArray(byte_size, array_offsets, collator, &new_nullmap_data);
}
else
{
getNullMapColumn().countSerializeByteSizeForCmpColumnArray(byte_size, array_offsets, collator, nullptr);
getNestedColumn().countSerializeByteSizeForCmpColumnArray(byte_size, array_offsets, collator, &nested_nullmap);
}
}
void ColumnNullable::countSerializeByteSizeForColumnArray(
PaddedPODArray<size_t> & byte_size,
Expand All @@ -323,9 +355,18 @@ void ColumnNullable::serializeToPosForCmp(
const TiDB::TiDBCollatorPtr & collator,
String * sort_key_container) const
{
assert(!nullmap);
getNullMapColumn().serializeToPosForCmp(pos, start, length, nullptr, collator, sort_key_container);
getNestedColumn().serializeToPosForCmp(pos, start, length, &getNullMapData(), collator, sort_key_container);
if unlikely (nullmap != nullptr)
{
auto new_nullmap_col = ColumnUInt8::create();
DB::mergeNullMap(*nullmap, getNullMapData(), new_nullmap_col->getData());
getNullMapColumn().serializeToPosForCmp(pos, start, length, nullptr, collator, sort_key_container);
getNestedColumn().serializeToPosForCmp(pos, start, length, &(new_nullmap_col->getData()), collator, sort_key_container);
}
else
{
getNullMapColumn().serializeToPosForCmp(pos, start, length, nullptr, collator, sort_key_container);
getNestedColumn().serializeToPosForCmp(pos, start, length, &getNullMapData(), collator, sort_key_container);
}
}

void ColumnNullable::serializeToPos(PaddedPODArray<char *> & pos, size_t start, size_t length, bool has_null) const
Expand All @@ -343,17 +384,47 @@ void ColumnNullable::serializeToPosForCmpColumnArray(
const TiDB::TiDBCollatorPtr & collator,
String * sort_key_container) const
{
assert(!nullmap);
getNullMapColumn()
.serializeToPosForCmpColumnArray(pos, start, length, nullptr, array_offsets, collator, sort_key_container);
getNestedColumn().serializeToPosForCmpColumnArray(
pos,
start,
length,
&getNullMapData(),
array_offsets,
collator,
sort_key_container);
const auto & nested_nullmap = getNullMapData();
assert(nested_nullmap.size() == array_offsets.back());
if unlikely (nullmap != nullptr)
{
assert(nullmap->size() == array_offsets.size());
auto new_nullmap_col = ColumnUInt8::create();
auto & new_nullmap_data = new_nullmap_col->getData();
new_nullmap_data.assign(nested_nullmap);
for (size_t i = start; i < start + length; ++i)
{
if (DB::isNullAt(*nullmap, i))
{
const auto row_size = array_offsets[i] - array_offsets[i - 1];
const auto row_offset = array_offsets[i - 1];
for (size_t j = row_offset; j < row_offset + row_size; ++j)
setNullAt(new_nullmap_data, j);
}
}
new_nullmap_col->serializeToPosForCmpColumnArray(pos, start, length, nullptr, array_offsets, collator, sort_key_container);
getNestedColumn().serializeToPosForCmpColumnArray(
pos,
start,
length,
&new_nullmap_data,
array_offsets,
collator,
sort_key_container);
}
else
{
getNullMapColumn()
.serializeToPosForCmpColumnArray(pos, start, length, nullptr, array_offsets, collator, sort_key_container);
getNestedColumn().serializeToPosForCmpColumnArray(
pos,
start,
length,
&getNullMapData(),
array_offsets,
collator,
sort_key_container);
}
}
void ColumnNullable::serializeToPosForColumnArray(
PaddedPODArray<char *> & pos,
Expand Down
23 changes: 9 additions & 14 deletions dbms/src/Columns/ColumnString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -566,7 +566,7 @@ void ColumnString::countSerializeByteSizeImpl(
assert(sizeAt(i) > 0);
if constexpr (has_nullmap)
{
if ((*nullmap)[i] != 0)
if (DB::isNullAt(*nullmap, i))
{
byte_size[i] += sizeof(UInt32) + 1;
continue;
Expand Down Expand Up @@ -699,11 +699,8 @@ void ColumnString::countSerializeByteSizeForColumnArrayImpl(
assert(offsetAt(array_offsets[i]) - offsetAt(array_offsets[i - 1]) >= ele_count);
if constexpr (has_nullmap)
{
if ((*nullmap)[i] != 0)
{
byte_size[i] += (sizeof(UInt32) + 1) * ele_count;
if (DB::isNullAt(*nullmap, i))
continue;
}
}

if constexpr (count_code_points)
Expand All @@ -721,7 +718,6 @@ void ColumnString::countSerializeByteSizeForColumnArrayImpl(
}
else
{
// NOTE: didn't check nullmap because we have to iterate through all rows, it's slow.
byte_size[i]
+= sizeof(UInt32) * ele_count + offsetAt(array_offsets[i]) - offsetAt(array_offsets[i - 1]);
}
Expand Down Expand Up @@ -866,7 +862,6 @@ void ColumnString::serializeToPosImpl(
RUNTIME_CHECK_MSG(length <= pos.size(), "length({}) > size of pos({})", length, pos.size());
RUNTIME_CHECK_MSG(start + length <= size(), "start({}) + length({}) > size of column({})", start, length, size());

static_assert(!(has_null && has_nullmap));
assert(!has_nullmap || (nullmap && nullmap->size() == size()));

/// To avoid virtual function call of sortKey().
Expand All @@ -876,14 +871,16 @@ void ColumnString::serializeToPosImpl(
{
if constexpr (compare_semantics)
{
static_assert(!has_null);
UInt32 str_size = sizeAt(start + i);
const void * src = &chars[offsetAt(start + i)];
if constexpr (has_nullmap)
{
if ((*nullmap)[i] != 0)
if (DB::isNullAt(*nullmap, i))
{
UInt32 str_size = 1;
tiflash_compiler_builtin_memcpy(pos[i], &str_size, sizeof(UInt32));
pos[i] += sizeof(UInt32);
*(pos[i]) = '\0';
pos[i] += 1;
continue;
Expand All @@ -903,7 +900,7 @@ void ColumnString::serializeToPosImpl(
}
else
{
assert(!has_nullmap);
static_assert(!has_nullmap);
if constexpr (has_null)
{
if (pos[i] == nullptr)
Expand All @@ -913,7 +910,6 @@ void ColumnString::serializeToPosImpl(
UInt32 str_size = sizeAt(start + i);
const void * src = &chars[offsetAt(start + i)];

assert(!nullmap);
tiflash_compiler_builtin_memcpy(pos[i], &str_size, sizeof(UInt32));
pos[i] += sizeof(UInt32);
inline_memcpy(pos[i], src, str_size);
Expand Down Expand Up @@ -1038,7 +1034,6 @@ void ColumnString::serializeToPosForColumnArrayImplType(
}
else
{
assert(!nullmap);
serializeToPosForColumnArrayImpl<has_null, compare_semantics, TiDB::ITiDBCollator, false>(
pos,
start,
Expand Down Expand Up @@ -1073,19 +1068,19 @@ void ColumnString::serializeToPosForColumnArrayImpl(
array_offsets.back(),
size());

static_assert(!(has_null && has_nullmap));
assert(!has_nullmap || (nullmap && nullmap->size() == array_offsets.size()));

/// countSerializeByteSizeForCmpColumnArray has already checked that the size of one element is not greater than UINT32_MAX
if constexpr (compare_semantics)
{
static_assert(!has_null);
/// To avoid virtual function call of sortKey().
const auto * derived_collator = static_cast<const DerivedCollator *>(collator);
for (size_t i = 0; i < length; ++i)
{
if constexpr (has_nullmap)
{
if ((*nullmap)[i] != 0)
if (DB::isNullAt(*nullmap, i))
continue;
}

Expand All @@ -1109,7 +1104,7 @@ void ColumnString::serializeToPosForColumnArrayImpl(
}
else
{
assert(!has_nullmap);
static_assert(!has_nullmap);
for (size_t i = 0; i < length; ++i)
{
if constexpr (has_null)
Expand Down
19 changes: 15 additions & 4 deletions dbms/src/Columns/ColumnVector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ void ColumnVector<T>::serializeToPosImpl(
}
if constexpr (has_nullmap)
{
if ((*nullmap)[start + i] != 0)
if (DB::isNullAt(*nullmap, start + i))
{
tiflash_compiler_builtin_memcpy(pos[i], &def_val, sizeof(T));
pos[i] += sizeof(T);
Expand Down Expand Up @@ -175,13 +175,24 @@ void ColumnVector<T>::serializeToPosForColumnArrayImpl(
if (pos[i] == nullptr)
continue;
}
size_t len = array_offsets[start + i] - array_offsets[start + i - 1];
if constexpr (has_nullmap)
{
if ((*nullmap)[i] != 0)
if (DB::isNullAt(*nullmap, i))
continue;
}
inline_memcpy(pos[i], &data[array_offsets[start + i - 1]], len * sizeof(T));
size_t len = array_offsets[start + i] - array_offsets[start + i - 1];
if (len <= 4)
{
for (size_t j = 0; j < len; ++j)
tiflash_compiler_builtin_memcpy(
pos[i] + j * sizeof(T),
&data[array_offsets[start + i - 1] + j],
sizeof(T));
}
else
{
inline_memcpy(pos[i], &data[array_offsets[start + i - 1]], len * sizeof(T));
}
pos[i] += len * sizeof(T);
}
}
Expand Down
Loading

0 comments on commit 370bf0a

Please sign in to comment.