Skip to content

Commit

Permalink
Fixing style and review issues
Browse files Browse the repository at this point in the history
Signed-off-by: Teague Sterling <[email protected]>
  • Loading branch information
teaguesterling committed Nov 24, 2024
1 parent 0d73160 commit 617a914
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 17 deletions.
35 changes: 22 additions & 13 deletions src/common/iceberg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,30 +196,26 @@ string IcebergSnapshot::GetMetaDataPath(ClientContext &context, const string &pa
return path;
} else if (!fs.DirectoryExists(meta_path)) {
// Make sure we have a metadata directory to look in
throw IOException("Cannot open \""+path+"\": Metadata directory does not exist");
throw IOException("Cannot open \"%s\": Metadata directory does not exist", path);
} else if(StringUtil::EndsWith(table_version, ".text")||StringUtil::EndsWith(table_version, ".txt")) {
// We were given a hint filename
version_hint = GetTableVersionFromHint(meta_path, fs, table_version);
return GenerateMetaDataUrl(fs, meta_path, version_hint, metadata_compression_codec, version_format);
} else if(!StringUtil::StartsWith(table_version, "?")) {
} else if (table_version != UNKNOWN_TABLE_VERSION) {
// We were given an explicit version number
version_hint = table_version;
return GenerateMetaDataUrl(fs, meta_path, version_hint, metadata_compression_codec, version_format);
} else if(fs.FileExists(fs.JoinPath(meta_path, DEFAULT_VERSION_HINT_FILE))) {
} else if (fs.FileExists(fs.JoinPath(meta_path, DEFAULT_VERSION_HINT_FILE))) {
// We're guessing, but a version-hint.text exists so we'll use that
version_hint = GetTableVersionFromHint(meta_path, fs, DEFAULT_VERSION_HINT_FILE);
return GenerateMetaDataUrl(fs, meta_path, version_hint, metadata_compression_codec, version_format);
} else if (!UnsafeVersionGuessingEnabled(context)) {
// Make sure we're allowed to guess versions
throw InvalidInputException("No version was provided and no version-hint could be found, globbing the filesystem to locate the latest version is disabled by default as this is considered unsafe and could result in reading uncommitted data. To enable this use 'SET %s = true;'", VERSION_GUESSING_CONFIG_VARIABLE);
} else {
Value result;
(void)context.TryGetCurrentSetting("unsafe_enable_version_guessing", result);
if (result.IsNull() || !result.GetValue<bool>()) {
throw InvalidInputException("No version was provided and no version-hint could be found, globbing the filesystem to locate the latest version is disabled by default as this is considered unsafe and could result in reading uncommitted data. To enable this use 'SET unsafe_enable_version_guessing = true;'");
}

// We need to guess from file paths
// We are allowed to guess to guess from file paths
return GuessTableVersion(meta_path, fs, table_version, metadata_compression_codec, version_format);
}

}


Expand Down Expand Up @@ -270,10 +266,19 @@ string IcebergSnapshot::GetTableVersionFromHint(const string &meta_path, FileSys
}
}

bool IcebergSnapshot::UnsafeVersionGuessingEnabled(ClientContext &context) {
Value result;
(void)context.TryGetCurrentSetting(VERSION_GUESSING_CONFIG_VARIABLE, result);
return !result.IsNull() && result.GetValue<bool>();
}


string IcebergSnapshot::GuessTableVersion(const string &meta_path, FileSystem &fs, string &table_version, string &metadata_compression_codec, string &version_format = DEFAULT_TABLE_VERSION_FORMAT) {
string selected_metadata;
string version_pattern = "*"; // TODO: Different "table_version" strings could customize this
string compression_suffix = "";


if (metadata_compression_codec == "gzip") {
compression_suffix = ".gz";
}
Expand All @@ -298,8 +303,12 @@ string IcebergSnapshot::GuessTableVersion(const string &meta_path, FileSystem &f
string IcebergSnapshot::PickTableVersion(vector<string> &found_metadata, string &version_pattern, string &glob) {
// TODO: Different "table_version" strings could customize this
// For now: just sort the versions and take the largest
std::sort(found_metadata.begin(), found_metadata.end());
return found_metadata.back();
if(!found_metadata.empty()) {
std::sort(found_metadata.begin(), found_metadata.end());
return found_metadata.back();
} else {
return string();
}
}


Expand Down
19 changes: 15 additions & 4 deletions src/include/iceberg_metadata.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,24 @@ using namespace duckdb_yyjson;

namespace duckdb {

static string VERSION_GUESSING_CONFIG_VARIABLE = "unsafe_enable_version_guessing";

// When this is provided (and unsafe_enable_version_guessing is true)
// we first look for DEFAULT_VERSION_HINT_FILE, if it doesn't exist we
// then search for versions matching the DEFAULT_TABLE_VERSION_FORMAT
// We take the lexographically "greatest" one as the latest version
// Note that this will voliate ACID constraints in some situations.
static string UNKNOWN_TABLE_VERSION = "?";

// First arg is version string, arg is either empty or ".gz" if gzip
// Allows for both "v###.gz.metadata.json" and "###.metadata.json" styles
static string DEFAULT_TABLE_VERSION_FORMAT = "v%s%s.metadata.json,%s%s.metadata.json";

// This isn't explicitly in the standard, but is a commonly used technique
static string DEFAULT_VERSION_HINT_FILE = "version-hint.text";

// Will first look for DEFAULT_VERSION_HINT_FILE then search for versions
// matching the DEFAULT_TABLE_VERSION_FORMAT, taking the "last" one
static string DEFAULT_TABLE_VERSION = "?";
// By default we will use the unknown version behavior mentioned above
static string DEFAULT_TABLE_VERSION = UNKNOWN_TABLE_VERSION;

struct IcebergColumnDefinition {
public:
Expand Down Expand Up @@ -83,10 +92,12 @@ class IcebergSnapshot {
static unique_ptr<SnapshotParseInfo> GetParseInfo(yyjson_doc &metadata_json);

protected:
//! Internal JSON parsing functions
//! Version extraction and identification
static bool UnsafeVersionGuessingEnabled(ClientContext &context);
static string GetTableVersionFromHint(const string &path, FileSystem &fs, string version_format);
static string GuessTableVersion(const string &meta_path, FileSystem &fs, string &table_version, string &metadata_compression_codec, string &version_format);
static string PickTableVersion(vector<string> &found_metadata, string &version_pattern, string &glob);
//! Internal JSON parsing functions
static yyjson_val *FindLatestSnapshotInternal(yyjson_val *snapshots);
static yyjson_val *FindSnapshotByIdInternal(yyjson_val *snapshots, idx_t target_id);
static yyjson_val *FindSnapshotByIdTimestampInternal(yyjson_val *snapshots, timestamp_t timestamp);
Expand Down

0 comments on commit 617a914

Please sign in to comment.