Skip to content

Commit

Permalink
Update vendored DuckDB sources to eadda4a
Browse files Browse the repository at this point in the history
  • Loading branch information
duckdblabs-bot committed Sep 19, 2024
1 parent eadda4a commit be8126c
Show file tree
Hide file tree
Showing 23 changed files with 176 additions and 52 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt

Large diffs are not rendered by default.

18 changes: 14 additions & 4 deletions src/duckdb/extension/json/include/json_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,12 +241,16 @@ struct JSONCommon {
};

//! Get JSON value using JSON path query (safe, checks the path query)
static inline yyjson_val *Get(yyjson_val *val, const string_t &path_str) {
static inline yyjson_val *Get(yyjson_val *val, const string_t &path_str, bool integral_argument) {
auto ptr = path_str.GetData();
auto len = path_str.GetSize();
if (len == 0) {
return GetUnsafe(val, ptr, len);
}
if (integral_argument) {
auto str = "$[" + path_str.GetString() + "]";
return GetUnsafe(val, str.c_str(), str.length());
}
switch (*ptr) {
case '/': {
// '/' notation must be '\0'-terminated
Expand All @@ -260,9 +264,15 @@ struct JSONCommon {
}
return GetUnsafe(val, ptr, len);
}
default:
auto str = "/" + string(ptr, len);
return GetUnsafe(val, str.c_str(), len + 1);
default: {
string path;
if (memchr(ptr, '"', len)) {
path = "/" + string(ptr, len);
} else {
path = "$.\"" + path_str.GetString() + "\"";
}
return GetUnsafe(val, path.c_str(), path.length());
}
}
}

Expand Down
14 changes: 11 additions & 3 deletions src/duckdb/extension/json/include/json_executors.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#pragma once

#include "duckdb/common/vector_operations/vector_operations.hpp"
#include "duckdb/execution/expression_executor.hpp"
#include "json_functions.hpp"

Expand Down Expand Up @@ -88,11 +89,18 @@ struct JSONExecutors {
}
} else { // Columnref path
D_ASSERT(info.path_type == JSONCommon::JSONPathType::REGULAR);
auto &paths = args.data[1];
unique_ptr<Vector> casted_paths;
if (args.data[1].GetType().id() == LogicalTypeId::VARCHAR) {
casted_paths = make_uniq<Vector>(args.data[1]);
} else {
casted_paths = make_uniq<Vector>(LogicalTypeId::VARCHAR);
VectorOperations::DefaultCast(args.data[1], *casted_paths, args.size(), true);
}
BinaryExecutor::ExecuteWithNulls<string_t, string_t, T>(
inputs, paths, result, args.size(), [&](string_t input, string_t path, ValidityMask &mask, idx_t idx) {
inputs, *casted_paths, result, args.size(),
[&](string_t input, string_t path, ValidityMask &mask, idx_t idx) {
auto doc = JSONCommon::ReadDocument(input, JSONCommon::READ_FLAG, lstate.json_allocator.GetYYAlc());
auto val = JSONCommon::Get(doc->root, path);
auto val = JSONCommon::Get(doc->root, path, args.data[1].GetType().IsIntegral());
if (SET_NULL_IF_NOT_FOUND && !val) {
mask.SetInvalid(idx);
return T {};
Expand Down
2 changes: 1 addition & 1 deletion src/duckdb/extension/json/json_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ static DefaultMacro json_macros[] = {
"json_group_structure",
{"x", nullptr},
{{nullptr, nullptr}},
"json_structure(json_group_array(x))->'0'"},
"json_structure(json_group_array(x))->0"},
{DEFAULT_SCHEMA, "json", {"x", nullptr}, {{nullptr, nullptr}}, "json_extract(x, '$')"},
{nullptr, nullptr, {nullptr}, {{nullptr, nullptr}}, nullptr}};

Expand Down
23 changes: 16 additions & 7 deletions src/duckdb/extension/json/json_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,25 @@ static JSONPathType CheckPath(const Value &path_val, string &path, size_t &len)
const auto path_str_val = path_val.DefaultCastAs(LogicalType::VARCHAR);
auto path_str = path_str_val.GetValueUnsafe<string_t>();
len = path_str.GetSize();
auto ptr = path_str.GetData();
const auto ptr = path_str.GetData();
// Empty strings and invalid $ paths yield an error
if (len == 0) {
throw BinderException("Empty JSON path");
}
JSONPathType path_type = JSONPathType::REGULAR;
if (*ptr == '$') {
path_type = JSONCommon::ValidatePath(ptr, len, true);
}
// Copy over string to the bind data
if (*ptr == '/' || *ptr == '$') {
path = string(ptr, len);
} else {
} else if (path_val.type().IsIntegral()) {
path = "$[" + string(ptr, len) + "]";
} else if (memchr(ptr, '"', len)) {
path = "/" + string(ptr, len);
len++;
} else {
path = "$.\"" + string(ptr, len) + "\"";
}
len = path.length();
if (*path.c_str() == '$') {
path_type = JSONCommon::ValidatePath(path.c_str(), len, true);
}
return path_type;
}
Expand Down Expand Up @@ -67,7 +71,11 @@ unique_ptr<FunctionData> JSONReadFunctionData::Bind(ClientContext &context, Scal
path_type = CheckPath(path_val, path, len);
}
}
bound_function.arguments[1] = LogicalType::VARCHAR;
if (arguments[1]->return_type.IsIntegral()) {
bound_function.arguments[1] = LogicalType::BIGINT;
} else {
bound_function.arguments[1] = LogicalType::VARCHAR;
}
if (path_type == JSONCommon::JSONPathType::WILDCARD) {
bound_function.return_type = LogicalType::LIST(bound_function.return_type);
}
Expand Down Expand Up @@ -117,6 +125,7 @@ unique_ptr<FunctionData> JSONReadManyFunctionData::Bind(ClientContext &context,

JSONFunctionLocalState::JSONFunctionLocalState(Allocator &allocator) : json_allocator(allocator) {
}

JSONFunctionLocalState::JSONFunctionLocalState(ClientContext &context)
: JSONFunctionLocalState(BufferAllocator::Get(context)) {
}
Expand Down
3 changes: 2 additions & 1 deletion src/duckdb/extension/parquet/parquet_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,8 @@ ParquetWriter::ParquetWriter(ClientContext &context, FileSystem &fs, string file
file_meta_data.version = 1;

file_meta_data.__isset.created_by = true;
file_meta_data.created_by = "DuckDB";
file_meta_data.created_by =
StringUtil::Format("DuckDB version %s (build %s)", DuckDB::LibraryVersion(), DuckDB::SourceID());

file_meta_data.schema.resize(1);

Expand Down
2 changes: 1 addition & 1 deletion src/duckdb/src/common/arrow/arrow_converter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
break;
}
case LogicalTypeId::VARCHAR:
if (type.IsJSONType()) {
if (type.IsJSONType() && options.arrow_lossless_conversion) {
auto schema_metadata = ArrowSchemaMetadata::MetadataFromName("arrow.json");
root_holder.metadata_info.emplace_back(schema_metadata.SerializeMetadata());
child.metadata = root_holder.metadata_info.back().get();
Expand Down
16 changes: 16 additions & 0 deletions src/duckdb/src/common/types/column/column_data_allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "duckdb/common/types/column/column_data_collection_segment.hpp"
#include "duckdb/storage/buffer/block_handle.hpp"
#include "duckdb/storage/buffer/buffer_pool.hpp"
#include "duckdb/storage/buffer_manager.hpp"

namespace duckdb {
Expand Down Expand Up @@ -45,6 +46,21 @@ ColumnDataAllocator::ColumnDataAllocator(ColumnDataAllocator &other) {
}
}

ColumnDataAllocator::~ColumnDataAllocator() {
if (type == ColumnDataAllocatorType::IN_MEMORY_ALLOCATOR) {
return;
}
for (auto &block : blocks) {
block.handle->SetDestroyBufferUpon(DestroyBufferUpon::UNPIN);
}
const auto data_size = SizeInBytes();
blocks.clear();
if (Allocator::SupportsFlush() &&
data_size > alloc.buffer_manager->GetBufferPool().GetAllocatorBulkDeallocationFlushThreshold()) {
Allocator::FlushAll();
}
}

BufferHandle ColumnDataAllocator::Pin(uint32_t block_id) {
D_ASSERT(type == ColumnDataAllocatorType::BUFFER_MANAGER_ALLOCATOR || type == ColumnDataAllocatorType::HYBRID);
shared_ptr<BlockHandle> handle;
Expand Down
5 changes: 5 additions & 0 deletions src/duckdb/src/common/types/row/tuple_data_segment.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "duckdb/common/types/row/tuple_data_segment.hpp"

#include "duckdb/common/types/row/tuple_data_allocator.hpp"
#include "duckdb/storage/buffer/buffer_pool.hpp"

namespace duckdb {

Expand Down Expand Up @@ -118,6 +119,10 @@ TupleDataSegment::~TupleDataSegment() {
}
pinned_row_handles.clear();
pinned_heap_handles.clear();
if (Allocator::SupportsFlush() && allocator &&
data_size > allocator->GetBufferManager().GetBufferPool().GetAllocatorBulkDeallocationFlushThreshold()) {
Allocator::FlushAll();
}
allocator.reset();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -618,7 +618,12 @@ OperatorResultType PhysicalPiecewiseMergeJoin::ResolveComplexJoin(ExecutionConte

if (tail_count < result_count) {
result_count = tail_count;
chunk.Slice(*sel, result_count);
if (result_count == 0) {
// Need to reset here otherwise we may use the non-flat chunk when constructing LEFT/OUTER
chunk.Reset();
} else {
chunk.Slice(*sel, result_count);
}
}
}

Expand Down
36 changes: 33 additions & 3 deletions src/duckdb/src/function/cast/decimal_cast.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,11 +114,42 @@ struct DecimalScaleDownOperator {
}
};

// This function detects if we can scale a decimal down to another.
template <class INPUT_TYPE>
bool CanScaleDownDecimal(INPUT_TYPE input, DecimalScaleInput<INPUT_TYPE> &data) {
int64_t divisor = UnsafeNumericCast<int64_t>(NumericHelper::POWERS_OF_TEN[data.source_scale]);
auto value = input % divisor;
auto rounded_input = input;
if (rounded_input < 0) {
rounded_input *= -1;
value *= -1;
}
if (value >= divisor / 2) {
rounded_input += divisor;
}
return rounded_input < data.limit && rounded_input > -data.limit;
}

template <>
bool CanScaleDownDecimal<hugeint_t>(hugeint_t input, DecimalScaleInput<hugeint_t> &data) {
auto divisor = UnsafeNumericCast<hugeint_t>(Hugeint::POWERS_OF_TEN[data.source_scale]);
hugeint_t value = input % divisor;
hugeint_t rounded_input = input;
if (rounded_input < 0) {
rounded_input *= -1;
value *= -1;
}
if (value >= divisor / 2) {
rounded_input += divisor;
}
return rounded_input < data.limit && rounded_input > -data.limit;
}

struct DecimalScaleDownCheckOperator {
template <class INPUT_TYPE, class RESULT_TYPE>
static RESULT_TYPE Operation(INPUT_TYPE input, ValidityMask &mask, idx_t idx, void *dataptr) {
auto data = (DecimalScaleInput<INPUT_TYPE> *)dataptr;
if (input >= data->limit || input <= -data->limit) {
auto data = static_cast<DecimalScaleInput<INPUT_TYPE> *>(dataptr);
if (!CanScaleDownDecimal(input, *data)) {
auto error = StringUtil::Format("Casting value \"%s\" to type %s failed: value is out of range!",
Decimal::ToString(input, data->source_width, data->source_scale),
data->result.GetType().ToString());
Expand All @@ -145,7 +176,6 @@ bool TemplatedDecimalScaleDown(Vector &source, Vector &result, idx_t count, Cast
return true;
} else {
// type might not fit: check limit

auto limit = UnsafeNumericCast<SOURCE>(POWERS_SOURCE::POWERS_OF_TEN[target_width]);
DecimalScaleInput<SOURCE> input(result, limit, divide_factor, parameters, source_width, source_scale);
UnaryExecutor::GenericExecute<SOURCE, DEST, DecimalScaleDownCheckOperator>(source, result, count, &input,
Expand Down
6 changes: 3 additions & 3 deletions src/duckdb/src/function/table/version/pragma_version.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#ifndef DUCKDB_PATCH_VERSION
#define DUCKDB_PATCH_VERSION "1-dev196"
#define DUCKDB_PATCH_VERSION "1-dev238"
#endif
#ifndef DUCKDB_MINOR_VERSION
#define DUCKDB_MINOR_VERSION 1
Expand All @@ -8,10 +8,10 @@
#define DUCKDB_MAJOR_VERSION 1
#endif
#ifndef DUCKDB_VERSION
#define DUCKDB_VERSION "v1.1.1-dev196"
#define DUCKDB_VERSION "v1.1.1-dev238"
#endif
#ifndef DUCKDB_SOURCE_ID
#define DUCKDB_SOURCE_ID "44bba02cea"
#define DUCKDB_SOURCE_ID "f0f78913a5"
#endif
#include "duckdb/function/table/system_functions.hpp"
#include "duckdb/main/database.hpp"
Expand Down
2 changes: 2 additions & 0 deletions src/duckdb/src/include/duckdb.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,8 @@ typedef enum DUCKDB_TYPE {
DUCKDB_TYPE_ANY = 34,
// duckdb_varint
DUCKDB_TYPE_VARINT = 35,
// SQLNULL type
DUCKDB_TYPE_SQLNULL = 36,
} duckdb_type;
//! An enum over the returned state of different functions.
typedef enum duckdb_state { DuckDBSuccess = 0, DuckDBError = 1 } duckdb_state;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class ColumnDataAllocator {
explicit ColumnDataAllocator(BufferManager &buffer_manager);
ColumnDataAllocator(ClientContext &context, ColumnDataAllocatorType allocator_type);
ColumnDataAllocator(ColumnDataAllocator &allocator);
~ColumnDataAllocator();

//! Returns an allocator object to allocate with. This returns the allocator in IN_MEMORY_ALLOCATOR, and a buffer
//! allocator in case of BUFFER_MANAGER_ALLOCATOR.
Expand Down
1 change: 0 additions & 1 deletion src/duckdb/src/include/duckdb/common/types/hugeint.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,6 @@ class Hugeint {

static int Sign(hugeint_t n);
static hugeint_t Abs(hugeint_t n);

// comparison operators
// note that everywhere here we intentionally use bitwise ops
// this is because they seem to be consistently much faster (benchmarked on a Macbook Pro)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ class BufferPool {

//! If bulk deallocation larger than this occurs, flush outstanding allocations
void SetAllocatorBulkDeallocationFlushThreshold(idx_t threshold);
idx_t GetAllocatorBulkDeallocationFlushThreshold();

void UpdateUsedMemory(MemoryTag tag, int64_t size);

Expand Down
4 changes: 4 additions & 0 deletions src/duckdb/src/main/capi/helper-c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ LogicalTypeId ConvertCTypeToCPP(duckdb_type c_type) {
return LogicalTypeId::TIMESTAMP_TZ;
case DUCKDB_TYPE_ANY:
return LogicalTypeId::ANY;
case DUCKDB_TYPE_SQLNULL:
return LogicalTypeId::SQLNULL;
default: // LCOV_EXCL_START
D_ASSERT(0);
return LogicalTypeId::INVALID;
Expand Down Expand Up @@ -154,6 +156,8 @@ duckdb_type ConvertCPPTypeToC(const LogicalType &sql_type) {
return DUCKDB_TYPE_ARRAY;
case LogicalTypeId::ANY:
return DUCKDB_TYPE_ANY;
case LogicalTypeId::SQLNULL:
return DUCKDB_TYPE_SQLNULL;
default: // LCOV_EXCL_START
D_ASSERT(0);
return DUCKDB_TYPE_INVALID;
Expand Down
7 changes: 6 additions & 1 deletion src/duckdb/src/optimizer/filter_pushdown.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,14 @@ unique_ptr<LogicalOperator> FilterPushdown::Rewrite(unique_ptr<LogicalOperator>
// we can just push directly through these operations without any rewriting
op->children[0] = Rewrite(std::move(op->children[0]));
return op;
case LogicalOperatorType::LOGICAL_MATERIALIZED_CTE:
case LogicalOperatorType::LOGICAL_MATERIALIZED_CTE: {
// we can't push filters into the materialized CTE (LHS), but we do want to recurse into it
FilterPushdown pushdown(optimizer, convert_mark_joins);
op->children[0] = pushdown.Rewrite(std::move(op->children[0]));
// we can push filters into the rest of the query plan (RHS)
op->children[1] = Rewrite(std::move(op->children[1]));
return op;
}
case LogicalOperatorType::LOGICAL_GET:
return PushdownGet(std::move(op));
case LogicalOperatorType::LOGICAL_LIMIT:
Expand Down
Loading

0 comments on commit be8126c

Please sign in to comment.