Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Logging #132

Merged
merged 8 commits into from
Jan 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions .github/workflows/MainDistributionPipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,12 @@ jobs:
name: Build extension binaries
uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main
with:
# pip install duckdb==1.1.4.dev2005
duckdb_version: b470dea7ee
duckdb_version: main
ci_tools_version: main
extension_name: delta
enable_rust: true
exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64_mingw'
exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64_mingw;linux_amd64_musl'
extra_toolchains: 'python3'
vcpkg_commit: c82f74667287d3dc386bce81e44964370c91a289

duckdb-stable-deploy:
name: Deploy extension binaries
Expand All @@ -32,6 +30,6 @@ jobs:
secrets: inherit
with:
extension_name: delta
duckdb_version: main
ci_tools_version: main
duckdb_version: b470dea7ee
exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64_mingw'
exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64_mingw;linux_amd64_musl'
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ test_debug: export DELTA_KERNEL_TESTS_PATH=./build/debug/rust/src/delta_kernel/k
test_debug: export DAT_PATH=./build/debug/rust/src/delta_kernel/acceptance/tests/dat

# Core extensions that we need for testing
#CORE_EXTENSIONS='tpcds;tpch;aws;azure;httpfs'
CORE_EXTENSIONS='tpcds;tpch;aws;azure;httpfs'

# Set this flag during building to enable the benchmark runner
ifeq (${BUILD_BENCHMARK}, 1)
Expand Down
2 changes: 1 addition & 1 deletion duckdb
Submodule duckdb updated 1887 files
12 changes: 11 additions & 1 deletion src/delta_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@

#include "delta_extension.hpp"

#include "delta_utils.hpp"
#include "delta_functions.hpp"
#include "duckdb.hpp"
#include "duckdb/common/exception.hpp"
#include "duckdb/main/extension_util.hpp"
#include "duckdb/storage/storage_extension.hpp"
#include "storage/delta_catalog.hpp"
#include "storage/delta_transaction_manager.hpp"
#include "duckdb/main/config.hpp"

namespace duckdb {

Expand Down Expand Up @@ -59,9 +61,17 @@ static void LoadInternal(DatabaseInstance &instance) {
config.storage_extensions["delta"] = make_uniq<DeltaStorageExtension>();

config.AddExtensionOption("delta_scan_explain_files_filtered",
"Adds the filtered files to the explain output. Warning: this may change performance of "
"Adds the filtered files to the explain output. Warning: this may impact performance of "
"delta scan during explain analyze queries.",
LogicalType::BOOLEAN, Value(true));

config.AddExtensionOption(
"delta_kernel_logging",
"Forwards the internal logging of the Delta Kernel to the duckdb logger. Warning: this may impact "
"performance even with DuckDB logging disabled.",
LogicalType::BOOLEAN, Value(false), LoggerCallback::DuckDBSettingCallBack);

LoggerCallback::Initialize(instance);
}

void DeltaExtension::Load(DuckDB &db) {
Expand Down
72 changes: 71 additions & 1 deletion src/delta_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include "duckdb.hpp"
#include "duckdb/main/extension_util.hpp"
#include "duckdb/main/database.hpp"

#include <duckdb/parser/parsed_data/create_scalar_function_info.hpp>
#include <duckdb/planner/filter/null_filter.hpp>
Expand Down Expand Up @@ -171,7 +172,7 @@
}

void ExpressionVisitor::VisitTimestampLiteral(void *state, uintptr_t sibling_list_id, int64_t value) {
auto expression = make_uniq<ConstantExpression>(Value::TIMESTAMPTZ(static_cast<timestamp_t>(value)));
auto expression = make_uniq<ConstantExpression>(Value::TIMESTAMPTZ(timestamp_tz_t(value)));
static_cast<ExpressionVisitor *>(state)->AppendToList(sibling_list_id, std::move(expression));
}

Expand Down Expand Up @@ -634,4 +635,73 @@
}
}

void LoggerCallback::Initialize(DatabaseInstance &db_p) {
auto &instance = GetInstance();
unique_lock<mutex> lck(instance.lock);
if (instance.db.expired()) {
instance.db = db_p.shared_from_this();
}
}

void LoggerCallback::CallbackEvent(ffi::Event event) {
auto &instance = GetInstance();
auto db_locked = instance.db.lock();
if (db_locked) {
auto transformed_log_level = GetDuckDBLogLevel(event.level);
string constructed_log_message;
Logger::Log("delta.Kernel", *db_locked, transformed_log_level, [&]() {
auto log_type = KernelUtils::FromDeltaString(event.target);
auto message = KernelUtils::FromDeltaString(event.message);
auto file = KernelUtils::FromDeltaString(event.file);
if (!file.empty()) {
constructed_log_message = StringUtil::Format("[%s] %s@%u : %s ", log_type, file, event.line, message);
} else {
constructed_log_message = message;
}

return constructed_log_message.c_str();
});
}
}

LogLevel LoggerCallback::GetDuckDBLogLevel(ffi::Level level) {
switch (level) {
case ffi::Level::TRACE:
return LogLevel::LOG_TRACE;
case ffi::Level::DEBUGGING:
return LogLevel::LOG_DEBUG;
case ffi::Level::INFO:
return LogLevel::LOG_INFO;
case ffi::Level::WARN:
return LogLevel::LOG_WARN;
case ffi::Level::ERROR:
return LogLevel::LOG_ERROR;
}
}

Check warning on line 680 in src/delta_utils.cpp

View workflow job for this annotation

GitHub Actions / Azurite (local azure test server) tests (Linux)

control reaches end of non-void function [-Wreturn-type]

Check warning on line 680 in src/delta_utils.cpp

View workflow job for this annotation

GitHub Actions / Azurite (local azure test server) tests (Linux)

control reaches end of non-void function [-Wreturn-type]

Check warning on line 680 in src/delta_utils.cpp

View workflow job for this annotation

GitHub Actions / Generated Tests (Linux)

control reaches end of non-void function [-Wreturn-type]

Check warning on line 680 in src/delta_utils.cpp

View workflow job for this annotation

GitHub Actions / Generated Tests (Linux)

control reaches end of non-void function [-Wreturn-type]

Check warning on line 680 in src/delta_utils.cpp

View workflow job for this annotation

GitHub Actions / Performance Regression Tests

control reaches end of non-void function [-Wreturn-type]

Check warning on line 680 in src/delta_utils.cpp

View workflow job for this annotation

GitHub Actions / Performance Regression Tests

control reaches end of non-void function [-Wreturn-type]

Check warning on line 680 in src/delta_utils.cpp

View workflow job for this annotation

GitHub Actions / Minio (local S3 test server) tests (Linux)

control reaches end of non-void function [-Wreturn-type]

Check warning on line 680 in src/delta_utils.cpp

View workflow job for this annotation

GitHub Actions / Minio (local S3 test server) tests (Linux)

control reaches end of non-void function [-Wreturn-type]

Check warning on line 680 in src/delta_utils.cpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / Windows (windows_amd64, x64-windows-static-md)

'duckdb::LoggerCallback::GetDuckDBLogLevel': not all control paths return a value [D:\a\duckdb-delta\duckdb-delta\build\release\extension\delta\delta_extension.vcxproj]

Check warning on line 680 in src/delta_utils.cpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / Windows (windows_amd64, x64-windows-static-md)

'duckdb::LoggerCallback::GetDuckDBLogLevel': not all control paths return a value [D:\a\duckdb-delta\duckdb-delta\build\release\extension\delta\delta_loadable_extension.vcxproj]

LoggerCallback &LoggerCallback::GetInstance() {
static LoggerCallback instance;
return instance;
}

void LoggerCallback::DuckDBSettingCallBack(ClientContext &context, SetScope scope, Value &parameter) {
Value current_setting;
auto res = context.TryGetCurrentSetting("delta_kernel_logging", current_setting);

if (res.GetScope() == SettingScope::INVALID) {
throw InternalException("Failed to find setting 'delta_kernel_logging'");
}

if (current_setting.GetValue<bool>() && !parameter.GetValue<bool>()) {
throw InvalidInputException("Can not disable 'delta_kernel_logging' after enabling it. You can disable DuckDB "
"logging with SET enable_logging=false, but there will still be some performance "
"overhead from 'delta_kernel_logging'"
"that can only be mitigated by restarting DuckDB");
}

if (!current_setting.GetValue<bool>() && parameter.GetValue<bool>()) {
ffi::enable_event_tracing(LoggerCallback::CallbackEvent, ffi::Level::TRACE);
}
}

}; // namespace duckdb
22 changes: 22 additions & 0 deletions src/include/delta_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
// TODO: clean up this file as we go

namespace duckdb {
class DatabaseInstance;

class ExpressionVisitor : public ffi::EngineExpressionVisitor {
using FieldList = vector<unique_ptr<ParsedExpression>>;
Expand Down Expand Up @@ -336,4 +337,25 @@ class PredicateVisitor : public ffi::EnginePredicate {
uintptr_t VisitFilter(const string &col_name, const TableFilter &filter, ffi::KernelExpressionVisitorState *state);
};

// Singleton class to forward logs to DuckDB
class LoggerCallback {
public:
//! The Callback for the DuckDB setting to hook up Delta Kernel Logging to the DuckDB logger
static void DuckDBSettingCallBack(ClientContext &context, SetScope scope, Value &parameter);

//! Singleton GetInstance
static LoggerCallback &GetInstance();
static void Initialize(DatabaseInstance &db);
static void CallbackEvent(ffi::Event log_line);

static LogLevel GetDuckDBLogLevel(ffi::Level);

protected:
LoggerCallback() {
}

mutex lock;
weak_ptr<DatabaseInstance> db;
};

} // namespace duckdb
12 changes: 6 additions & 6 deletions src/storage/delta_catalog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,22 +60,22 @@ bool DeltaCatalog::UseCachedSnapshot() {

optional_idx DeltaCatalog::GetCatalogVersion(ClientContext &context) {
auto &delta_transaction = DeltaTransaction::Get(context, *this);
idx_t version = DConstants::INVALID_INDEX;
idx_t version = DConstants::INVALID_INDEX;

// Option 1: snapshot is cached table-wide
auto cached_snapshot = main_schema->GetCachedTable();
if (cached_snapshot) {
version = cached_snapshot->snapshot->GetVersion();
version = cached_snapshot->snapshot->GetVersion();
}

// Option 2: snapshot is cached in transaction
if (delta_transaction.table_entry) {
version = delta_transaction.table_entry->snapshot->GetVersion();
version = delta_transaction.table_entry->snapshot->GetVersion();
}

if (version != DConstants::INVALID_INDEX) {
return version;
}
if (version != DConstants::INVALID_INDEX) {
return version;
}

return optional_idx::Invalid();
}
Expand Down
44 changes: 44 additions & 0 deletions test/sql/delta_kernel_rs/logging.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# name: test/sql/delta_kernel_rs/logging.test
# description: test that delta kernel log entries are properly propagated to the DuckDB logger
# group: [delta_kernel_rs]

require parquet

require delta

require-env DELTA_KERNEL_TESTS_PATH

statement ok
set enable_logging=true;

statement ok
SELECT * FROM delta_scan('${DELTA_KERNEL_TESTS_PATH}/basic_partitioned')

# No kernel logging available yet: we need to set delta_kernel_logging=true
query I
SELECT count(*) FROM duckdb_logs WHERE starts_with(type, 'delta.Kernel')
----
0

statement ok
set delta_kernel_logging=true;

statement ok
set logging_level = 'TRACE';

statement ok
SELECT * FROM delta_scan('${DELTA_KERNEL_TESTS_PATH}/basic_partitioned')

# Now we have log!
query I
SELECT count(*) > 50 FROM duckdb_logs WHERE starts_with(type, 'delta.Kernel')
----
true

statement ok
set delta_kernel_logging=true;

statement error
set delta_kernel_logging=false;
----
Invalid Input Error: Can not disable 'delta_kernel_logging' after enabling it
Loading