Skip to content

Commit

Permalink
Update vendored DuckDB sources to 2981fc6
Browse files Browse the repository at this point in the history
  • Loading branch information
duckdblabs-bot committed Sep 5, 2024
1 parent 2981fc6 commit 412147b
Show file tree
Hide file tree
Showing 35 changed files with 713 additions and 559 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt

Large diffs are not rendered by default.

7 changes: 5 additions & 2 deletions src/duckdb/extension/icu/icu-datefunc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ unique_ptr<FunctionData> ICUDateFunc::Bind(ClientContext &context, ScalarFunctio

void ICUDateFunc::SetTimeZone(icu::Calendar *calendar, const string_t &tz_id) {
auto tz = icu_66::TimeZone::createTimeZone(icu::UnicodeString::fromUTF8(icu::StringPiece(tz_id.GetString())));
if (*tz == icu::TimeZone::getUnknown()) {
throw NotImplementedException("Unknown TimeZone '%s'", tz_id.GetString());
}
calendar->adoptTimeZone(tz);
}

Expand All @@ -83,7 +86,7 @@ timestamp_t ICUDateFunc::GetTimeUnsafe(icu::Calendar *calendar, uint64_t micros)
if (U_FAILURE(status)) {
throw InternalException("Unable to get ICU calendar time.");
}
return timestamp_t(millis * Interval::MICROS_PER_MSEC + micros);
return timestamp_t(millis * Interval::MICROS_PER_MSEC + int64_t(micros));
}

bool ICUDateFunc::TryGetTime(icu::Calendar *calendar, uint64_t micros, timestamp_t &result) {
Expand All @@ -98,7 +101,7 @@ bool ICUDateFunc::TryGetTime(icu::Calendar *calendar, uint64_t micros, timestamp
if (!TryMultiplyOperator::Operation<int64_t, int64_t, int64_t>(millis, Interval::MICROS_PER_MSEC, millis)) {
return false;
}
if (!TryAddOperator::Operation<int64_t, int64_t, int64_t>(millis, micros, millis)) {
if (!TryAddOperator::Operation<int64_t, int64_t, int64_t>(millis, int64_t(micros), millis)) {
return false;
}

Expand Down
6 changes: 3 additions & 3 deletions src/duckdb/extension/icu/icu-timezone.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ struct ICUFromNaiveTimestamp : public ICUDateFunc {
int32_t secs;
int32_t frac;
Time::Convert(local_time, hr, mn, secs, frac);
int32_t millis = frac / Interval::MICROS_PER_MSEC;
int32_t millis = frac / int32_t(Interval::MICROS_PER_MSEC);
uint64_t micros = frac % Interval::MICROS_PER_MSEC;

// Use them to set the time in the time zone
Expand Down Expand Up @@ -199,7 +199,7 @@ struct ICUToNaiveTimestamp : public ICUDateFunc {
}

// Extract the time zone parts
auto micros = SetTime(calendar, instant);
auto micros = int32_t(SetTime(calendar, instant));
const auto era = ExtractField(calendar, UCAL_ERA);
const auto year = ExtractField(calendar, UCAL_YEAR);
const auto mm = ExtractField(calendar, UCAL_MONTH) + 1;
Expand All @@ -216,7 +216,7 @@ struct ICUToNaiveTimestamp : public ICUDateFunc {
const auto secs = ExtractField(calendar, UCAL_SECOND);
const auto millis = ExtractField(calendar, UCAL_MILLISECOND);

micros += millis * Interval::MICROS_PER_MSEC;
micros += millis * int32_t(Interval::MICROS_PER_MSEC);
dtime_t local_time = Time::FromTime(hr, mn, secs, micros);

timestamp_t naive;
Expand Down
13 changes: 7 additions & 6 deletions src/duckdb/extension/icu/icu_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ struct IcuBindData : public FunctionData {
string country;
string tag;

IcuBindData(duckdb::unique_ptr<icu::Collator> collator_p) : collator(std::move(collator_p)) {
explicit IcuBindData(duckdb::unique_ptr<icu::Collator> collator_p) : collator(std::move(collator_p)) {
}

IcuBindData(string language_p, string country_p) : language(std::move(language_p)), country(std::move(country_p)) {
Expand All @@ -59,7 +59,7 @@ struct IcuBindData : public FunctionData {
}
}

IcuBindData(string tag_p) : tag(std::move(tag_p)) {
explicit IcuBindData(string tag_p) : tag(std::move(tag_p)) {
UErrorCode status = U_ZERO_ERROR;
UCollator *ucollator = ucol_open(tag.c_str(), &status);
if (U_FAILURE(status)) {
Expand Down Expand Up @@ -120,7 +120,7 @@ const string IcuBindData::FUNCTION_PREFIX = "icu_collate_";
static int32_t ICUGetSortKey(icu::Collator &collator, string_t input, duckdb::unique_ptr<char[]> &buffer,
int32_t &buffer_size) {
icu::UnicodeString unicode_string =
icu::UnicodeString::fromUTF8(icu::StringPiece(input.GetData(), input.GetSize()));
icu::UnicodeString::fromUTF8(icu::StringPiece(input.GetData(), int32_t(input.GetSize())));
int32_t string_size = collator.getSortKey(unicode_string, reinterpret_cast<uint8_t *>(buffer.get()), buffer_size);
if (string_size > buffer_size) {
// have to resize the buffer
Expand Down Expand Up @@ -204,18 +204,19 @@ static ScalarFunction GetICUCollateFunction(const string &collation, const strin
string fname = IcuBindData::EncodeFunctionName(collation);
ScalarFunction result(fname, {LogicalType::VARCHAR}, LogicalType::VARCHAR, ICUCollateFunction, ICUCollateBind);
//! collation tag is added into the Function extra info
result.extra_info = std::move(tag);
result.extra_info = tag;
result.serialize = IcuBindData::Serialize;
result.deserialize = IcuBindData::Deserialize;
return result;
}

static void SetICUTimeZone(ClientContext &context, SetScope scope, Value &parameter) {
icu::StringPiece utf8(StringValue::Get(parameter));
auto str = StringValue::Get(parameter);
icu::StringPiece utf8(str);
const auto uid = icu::UnicodeString::fromUTF8(utf8);
duckdb::unique_ptr<icu::TimeZone> tz(icu::TimeZone::createTimeZone(uid));
if (*tz == icu::TimeZone::getUnknown()) {
throw NotImplementedException("Unknown TimeZone setting");
throw NotImplementedException("Unknown TimeZone '%s'", str);
}
}

Expand Down
10 changes: 10 additions & 0 deletions src/duckdb/src/common/arrow/arrow_appender.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,16 @@ static void InitializeFunctionPointers(ArrowAppendData &append_data, const Logic
InitializeAppenderForType<ArrowScalarData<int64_t>>(append_data);
break;
case LogicalTypeId::UUID:
if (append_data.options.arrow_lossless_conversion) {
InitializeAppenderForType<ArrowScalarData<hugeint_t, hugeint_t, ArrowUUIDBlobConverter>>(append_data);
} else {
if (append_data.options.arrow_offset_size == ArrowOffsetSize::LARGE) {
InitializeAppenderForType<ArrowVarcharData<hugeint_t, ArrowUUIDConverter>>(append_data);
} else {
InitializeAppenderForType<ArrowVarcharData<hugeint_t, ArrowUUIDConverter, int32_t>>(append_data);
}
}
break;
case LogicalTypeId::HUGEINT:
InitializeAppenderForType<ArrowScalarData<hugeint_t>>(append_data);
break;
Expand Down
22 changes: 17 additions & 5 deletions src/duckdb/src/common/arrow/arrow_converter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,11 +146,23 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
child.format = "g";
break;
case LogicalTypeId::UUID: {
// This is a canonical extension, hence needs the "arrow." prefix
child.format = "w:16";
auto schema_metadata = ArrowSchemaMetadata::MetadataFromName("arrow.uuid");
root_holder.metadata_info.emplace_back(schema_metadata.SerializeMetadata());
child.metadata = root_holder.metadata_info.back().get();
if (options.arrow_lossless_conversion) {
// This is a canonical extension, hence needs the "arrow." prefix
child.format = "w:16";
auto schema_metadata = ArrowSchemaMetadata::MetadataFromName("arrow.uuid");
root_holder.metadata_info.emplace_back(schema_metadata.SerializeMetadata());
child.metadata = root_holder.metadata_info.back().get();
} else {
if (options.produce_arrow_string_view) {
child.format = "vu";
} else {
if (options.arrow_offset_size == ArrowOffsetSize::LARGE) {
child.format = "U";
} else {
child.format = "u";
}
}
}
break;
}
case LogicalTypeId::VARCHAR:
Expand Down
24 changes: 24 additions & 0 deletions src/duckdb/src/common/enums/file_compression_type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,28 @@ FileCompressionType FileCompressionTypeFromString(const string &input) {
}
}

string CompressionExtensionFromType(const FileCompressionType type) {
switch (type) {
case FileCompressionType::GZIP:
return ".gz";
case FileCompressionType::ZSTD:
return ".zst";
default:
throw NotImplementedException("Compression Extension of file compression type is not implemented");
}
}

bool IsFileCompressed(string path, FileCompressionType type) {
auto extension = CompressionExtensionFromType(type);
std::size_t question_mark_pos = std::string::npos;
if (!StringUtil::StartsWith(path, "\\\\?\\")) {
question_mark_pos = path.find('?');
}
path = path.substr(0, question_mark_pos);
if (StringUtil::EndsWith(path, extension)) {
return true;
}
return false;
}

} // namespace duckdb
6 changes: 3 additions & 3 deletions src/duckdb/src/common/virtual_file_system.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@ unique_ptr<FileHandle> VirtualFileSystem::OpenFile(const string &path, FileOpenF
optional_ptr<FileOpener> opener) {
auto compression = flags.Compression();
if (compression == FileCompressionType::AUTO_DETECT) {
// auto detect compression settings based on file name
// auto-detect compression settings based on file name
auto lower_path = StringUtil::Lower(path);
if (StringUtil::EndsWith(lower_path, ".tmp")) {
// strip .tmp
lower_path = lower_path.substr(0, lower_path.length() - 4);
}
if (StringUtil::EndsWith(lower_path, ".gz")) {
if (IsFileCompressed(path, FileCompressionType::GZIP)) {
compression = FileCompressionType::GZIP;
} else if (StringUtil::EndsWith(lower_path, ".zst")) {
} else if (IsFileCompressed(path, FileCompressionType::ZSTD)) {
compression = FileCompressionType::ZSTD;
} else {
compression = FileCompressionType::UNCOMPRESSED;
Expand Down
10 changes: 8 additions & 2 deletions src/duckdb/src/core_functions/function_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,10 @@ static const StaticFunctionDefinition internal_functions[] = {
DUCKDB_SCALAR_FUNCTION_SET(BitwiseAndFun),
DUCKDB_SCALAR_FUNCTION_ALIAS(ListHasAnyFunAlias),
DUCKDB_SCALAR_FUNCTION(PowOperatorFun),
DUCKDB_SCALAR_FUNCTION_SET_ALIAS(ListInnerProductFunAlias),
DUCKDB_SCALAR_FUNCTION_SET_ALIAS(ListNegativeInnerProductFunAlias),
DUCKDB_SCALAR_FUNCTION_SET_ALIAS(ListDistanceFunAlias),
DUCKDB_SCALAR_FUNCTION_SET(LeftShiftFun),
DUCKDB_SCALAR_FUNCTION_SET_ALIAS(ListCosineSimilarityFunAlias),
DUCKDB_SCALAR_FUNCTION_SET_ALIAS(ListCosineDistanceFunAlias),
DUCKDB_SCALAR_FUNCTION_ALIAS(ListHasAllFunAlias2),
DUCKDB_SCALAR_FUNCTION_SET(RightShiftFun),
DUCKDB_SCALAR_FUNCTION_SET(AbsOperatorFun),
Expand All @@ -82,6 +82,7 @@ static const StaticFunctionDefinition internal_functions[] = {
DUCKDB_SCALAR_FUNCTION_ALIAS(ArrayAggrFun),
DUCKDB_SCALAR_FUNCTION_ALIAS(ArrayAggregateFun),
DUCKDB_SCALAR_FUNCTION_ALIAS(ArrayApplyFun),
DUCKDB_SCALAR_FUNCTION_SET(ArrayCosineDistanceFun),
DUCKDB_SCALAR_FUNCTION_SET(ArrayCosineSimilarityFun),
DUCKDB_SCALAR_FUNCTION_SET(ArrayCrossProductFun),
DUCKDB_SCALAR_FUNCTION_SET(ArrayDistanceFun),
Expand All @@ -92,6 +93,8 @@ static const StaticFunctionDefinition internal_functions[] = {
DUCKDB_SCALAR_FUNCTION_ALIAS(ArrayHasAllFun),
DUCKDB_SCALAR_FUNCTION_ALIAS(ArrayHasAnyFun),
DUCKDB_SCALAR_FUNCTION_SET(ArrayInnerProductFun),
DUCKDB_SCALAR_FUNCTION_SET_ALIAS(ArrayNegativeDotProductFun),
DUCKDB_SCALAR_FUNCTION_SET(ArrayNegativeInnerProductFun),
DUCKDB_SCALAR_FUNCTION_ALIAS(ArrayReduceFun),
DUCKDB_SCALAR_FUNCTION_SET_ALIAS(ArrayReverseSortFun),
DUCKDB_SCALAR_FUNCTION_SET_ALIAS(ArraySliceFun),
Expand Down Expand Up @@ -230,6 +233,7 @@ static const StaticFunctionDefinition internal_functions[] = {
DUCKDB_SCALAR_FUNCTION_ALIAS(ListAggrFun),
DUCKDB_SCALAR_FUNCTION(ListAggregateFun),
DUCKDB_SCALAR_FUNCTION_ALIAS(ListApplyFun),
DUCKDB_SCALAR_FUNCTION_SET(ListCosineDistanceFun),
DUCKDB_SCALAR_FUNCTION_SET(ListCosineSimilarityFun),
DUCKDB_SCALAR_FUNCTION_SET(ListDistanceFun),
DUCKDB_SCALAR_FUNCTION(ListDistinctFun),
Expand All @@ -239,6 +243,8 @@ static const StaticFunctionDefinition internal_functions[] = {
DUCKDB_SCALAR_FUNCTION(ListHasAllFun),
DUCKDB_SCALAR_FUNCTION(ListHasAnyFun),
DUCKDB_SCALAR_FUNCTION_SET(ListInnerProductFun),
DUCKDB_SCALAR_FUNCTION_SET_ALIAS(ListNegativeDotProductFun),
DUCKDB_SCALAR_FUNCTION_SET(ListNegativeInnerProductFun),
DUCKDB_SCALAR_FUNCTION_ALIAS(ListPackFun),
DUCKDB_SCALAR_FUNCTION(ListReduceFun),
DUCKDB_SCALAR_FUNCTION_SET(ListReverseSortFun),
Expand Down
Loading

0 comments on commit 412147b

Please sign in to comment.