Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enum type support #193

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ SRCS = src/scan/heap_reader.cpp \
src/scan/postgres_seq_scan.cpp \
src/utility/copy.cpp \
src/vendor/pg_explain.cpp \
src/types/pgduckdb_enum.cpp \
src/pgduckdb_metadata_cache.cpp \
src/pgduckdb_detoast.cpp \
src/pgduckdb_duckdb.cpp \
Expand All @@ -26,6 +27,7 @@ SRCS = src/scan/heap_reader.cpp \
src/catalog/pgduckdb_storage.cpp \
src/catalog/pgduckdb_schema.cpp \
src/catalog/pgduckdb_table.cpp \
src/catalog/pgduckdb_type.cpp \
src/catalog/pgduckdb_transaction.cpp \
src/catalog/pgduckdb_transaction_manager.cpp \
src/catalog/pgduckdb_catalog.cpp
Expand Down
3 changes: 3 additions & 0 deletions include/pgduckdb/catalog/pgduckdb_transaction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "duckdb/transaction/transaction.hpp"
#include "pgduckdb/catalog/pgduckdb_table.hpp"
#include "pgduckdb/catalog/pgduckdb_type.hpp"
#include "pgduckdb/catalog/pgduckdb_schema.hpp"

namespace duckdb {
Expand All @@ -15,11 +16,13 @@ class SchemaItems {

public:
optional_ptr<CatalogEntry> GetTable(const string &name, PlannerInfo *planner_info);
optional_ptr<CatalogEntry> GetType(const string &name);

public:
string name;
unique_ptr<PostgresSchema> schema;
case_insensitive_map_t<unique_ptr<PostgresTable>> tables;
case_insensitive_map_t<unique_ptr<PostgresType>> types;
};

class PostgresTransaction : public Transaction {
Expand Down
31 changes: 31 additions & 0 deletions include/pgduckdb/catalog/pgduckdb_type.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#pragma once

#include "duckdb/catalog/catalog_entry/type_catalog_entry.hpp"
#include "duckdb/parser/parsed_data/create_type_info.hpp"
#include "duckdb/storage/table_storage_info.hpp"

extern "C" {
#include "postgres.h"
#include "utils/snapshot.h"
#include "postgres.h"
#include "catalog/namespace.h"
#include "catalog/pg_class.h"
#include "optimizer/planmain.h"
#include "optimizer/planner.h"
#include "utils/builtins.h"
#include "utils/regproc.h"
#include "utils/snapmgr.h"
#include "utils/syscache.h"
#include "access/htup_details.h"
}

namespace duckdb {

class PostgresType : public TypeCatalogEntry {
public:
~PostgresType() {
}
PostgresType(Catalog &catalog, SchemaCatalogEntry &schema, CreateTypeInfo &info);
};

} // namespace duckdb
2 changes: 2 additions & 0 deletions include/pgduckdb/duckdb_vendor/.clang-format
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
DisableFormat: true
SortIncludes: false
57 changes: 57 additions & 0 deletions include/pgduckdb/duckdb_vendor/enum_type_info_templated.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#pragma once
Tishj marked this conversation as resolved.
Show resolved Hide resolved

#include "duckdb/common/extra_type_info.hpp"
#include "duckdb/common/vector.hpp"
#include "duckdb/common/types/vector.hpp"
#include "duckdb/common/string_map_set.hpp"
#include "duckdb/common/serializer/deserializer.hpp"

// This is copied directly without changes from 'duckdb/src/common/extra_type_info.cpp'
// The reason this is copied is to be able to inherit from it.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// The reason this is copied is to be able to inherit from it.
// The reason this is copied is to be able to inherit from it.
// This can be removed once DuckDB 1.2.0 is released, which contains this PR:
// https://github.com/duckdb/duckdb/pull/14038


namespace duckdb {
template <class T>
struct EnumTypeInfoTemplated : public EnumTypeInfo {
explicit EnumTypeInfoTemplated(Vector &values_insert_order_p, idx_t size_p)
: EnumTypeInfo(values_insert_order_p, size_p) {
D_ASSERT(values_insert_order_p.GetType().InternalType() == PhysicalType::VARCHAR);

UnifiedVectorFormat vdata;
values_insert_order.ToUnifiedFormat(size_p, vdata);

auto data = UnifiedVectorFormat::GetData<string_t>(vdata);
for (idx_t i = 0; i < size_p; i++) {
auto idx = vdata.sel->get_index(i);
if (!vdata.validity.RowIsValid(idx)) {
throw InternalException("Attempted to create ENUM type with NULL value");
}
if (values.count(data[idx]) > 0) {
throw InvalidInputException("Attempted to create ENUM type with duplicate value %s",
data[idx].GetString());
}
values[data[idx]] = UnsafeNumericCast<T>(i);
}
}

static shared_ptr<EnumTypeInfoTemplated> Deserialize(Deserializer &deserializer, uint32_t size) {
Vector values_insert_order(LogicalType::VARCHAR, size);
auto strings = FlatVector::GetData<string_t>(values_insert_order);

deserializer.ReadList(201, "values", [&](Deserializer::List &list, idx_t i) {
strings[i] = StringVector::AddStringOrBlob(values_insert_order, list.ReadElement<string>());
});
return make_shared_ptr<EnumTypeInfoTemplated>(values_insert_order, size);
}

const string_map_t<T> &GetValues() const {
return values;
}

EnumTypeInfoTemplated(const EnumTypeInfoTemplated &) = delete;
EnumTypeInfoTemplated &operator=(const EnumTypeInfoTemplated &) = delete;

private:
string_map_t<T> values;
};

} // namespace duckdb
65 changes: 65 additions & 0 deletions include/pgduckdb/types/pgduckdb_enum.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#pragma once

#include "pgduckdb/duckdb_vendor/enum_type_info_templated.hpp"
#include "duckdb/common/types/vector.hpp"
#include "duckdb/common/types.hpp"

extern "C" {
#include "postgres.h"
#include "catalog/pg_enum.h"
#include "catalog/pg_type.h"
#include "utils/syscache.h"
#include "access/htup_details.h"
}

namespace pgduckdb {

using duckdb::idx_t;
using duckdb::LogicalType;
using duckdb::Vector;

// To store additional metadata for a type, DuckDB's LogicalType class contains a 'type_info_' field.
// The type of this is ExtraTypeInfo, ENUMs make use of this in the form of EnumTypeInfo (see
// duckdb/include/common/extra_type_info.hpp). We derive from this further to store the ENUMs connection with the oids
// of Postgres' enum members.
Comment on lines +21 to +24
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a super helpful comment, thanks a lot.


template <class T>
Tishj marked this conversation as resolved.
Show resolved Hide resolved
class PGDuckDBEnumTypeInfo : public duckdb::EnumTypeInfoTemplated<T> {
public:
PGDuckDBEnumTypeInfo(Vector &values_insert_order_p, idx_t dict_size_p, Vector &enum_member_oids)
: duckdb::EnumTypeInfoTemplated<T>(values_insert_order_p, dict_size_p), enum_member_oids(enum_member_oids) {
}

public:
const Vector &
GetMemberOids() const {
return enum_member_oids;
}

duckdb::shared_ptr<duckdb::ExtraTypeInfo>
Copy() const override {
auto &insert_order = this->GetValuesInsertOrder();
Vector values_insert_order_copy(LogicalType::VARCHAR, false, false, 0);
values_insert_order_copy.Reference(insert_order);

Vector enum_member_oids_copy(LogicalType::UINTEGER, false, false, 0);
enum_member_oids_copy.Reference(enum_member_oids);

return duckdb::make_shared_ptr<PGDuckDBEnumTypeInfo>(values_insert_order_copy, this->GetDictSize(),
enum_member_oids_copy);
}

private:
Vector enum_member_oids;
};

struct PGDuckDBEnum {
Tishj marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be changed to a namespace? Now that this struct doesn't have any fields anymore?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea possibly, in our codebase we're not too strict on static-membered-structs vs namespaces, I think when we also need a constexpr value here we can't use a namespace but can use a struct, so I almost always go for using a struct

static LogicalType CreateEnumType(std::vector<HeapTuple> &enum_members);
static idx_t GetDuckDBEnumPosition(duckdb::Value &val);
static idx_t GetEnumPosition(Datum enum_member_oid, const duckdb::LogicalType &type);
static bool IsEnumType(Oid type_oid);
static Oid GetEnumTypeOid(const Vector &oids);
static const Vector &GetMemberOids(const duckdb::LogicalType &type);
};

} // namespace pgduckdb
77 changes: 68 additions & 9 deletions src/catalog/pgduckdb_transaction.cpp
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
#include "pgduckdb/catalog/pgduckdb_catalog.hpp"
#include "pgduckdb/catalog/pgduckdb_transaction.hpp"
#include "pgduckdb/catalog/pgduckdb_table.hpp"
#include "pgduckdb/catalog/pgduckdb_type.hpp"
#include "pgduckdb/pgduckdb_types.hpp"
#include "pgduckdb/scan/postgres_scan.hpp"
#include "duckdb/parser/parsed_data/create_table_info.hpp"
#include "duckdb/parser/parsed_data/create_type_info.hpp"
#include "duckdb/parser/parsed_data/create_schema_info.hpp"
#include "duckdb/catalog/catalog.hpp"

extern "C" {
#include "postgres.h"
#include "catalog/namespace.h"
#include "catalog/pg_class.h"
#include "catalog/pg_type.h"
#include "optimizer/planmain.h"
#include "optimizer/planner.h"
#include "utils/builtins.h"
Expand All @@ -23,6 +27,7 @@ extern "C" {
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "parser/parsetree.h"
#include "parser/parse_type.h"
#include "utils/rel.h"
}

Expand Down Expand Up @@ -68,6 +73,56 @@ IsIndexScan(const Path *nodePath) {
return false;
}

optional_ptr<CatalogEntry>
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's add a little comment about the assumptions of this function (feel free to correct this comment if my assumptions are incorrect)

Suggested change
optional_ptr<CatalogEntry>
/* Looks up the relevant postgres type and caches it in the SchemaItems.types for the next lookup. This is currently only used to lookup enum types. */
optional_ptr<CatalogEntry>

SchemaItems::GetType(const string &entry_name) {
auto it = types.find(entry_name);
if (it != types.end()) {
return it->second.get();
}

auto &catalog = schema->catalog;

List *name_list = NIL;
name_list = lappend(name_list, makeString(pstrdup(name.c_str())));
name_list = lappend(name_list, makeString(pstrdup(entry_name.c_str())));

TypeName *type_name = makeTypeNameFromNameList(name_list);

// Try to look up the type by name
Oid type_oid = InvalidOid;
HeapTuple type_entry = LookupTypeName(NULL, type_name, NULL, false);

if (HeapTupleIsValid(type_entry)) {
// Cast to the correct form to access the fields
Form_pg_type type_form = (Form_pg_type)GETSTRUCT(type_entry);

// Extract the OID from the type entry
type_oid = type_form->oid;

// Release the system cache entry
ReleaseSysCache(type_entry);
}

// If the type could not be found, handle it here
if (type_oid == InvalidOid) {
return nullptr;
}

FormData_pg_attribute dummy_attr;
dummy_attr.atttypid = type_oid;
dummy_attr.atttypmod = 0;
dummy_attr.attndims = 0;
Comment on lines +113 to +114
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems incorrect if e.g. it's an array an of enum. I think it would be good to add a test for that, it doesn't have to actually work but at least it should throw a somewhat clear error and not crash.

Also I think it would be good to check here for now that the type is an enum, because that's really the only kind of type that we're comfortable with handling here.

Form_pg_attribute attribute = &dummy_attr;

CreateTypeInfo info;
info.name = entry_name;
info.type = pgduckdb::ConvertPostgresToDuckColumnType(attribute);

auto type = make_uniq<PostgresType>(catalog, *schema, info);
types[entry_name] = std::move(type);
return types[entry_name].get();
}

optional_ptr<CatalogEntry>
SchemaItems::GetTable(const string &entry_name, PlannerInfo *planner_info) {
auto it = tables.find(entry_name);
Expand Down Expand Up @@ -157,23 +212,27 @@ PostgresTransaction::GetCatalogEntry(CatalogType type, const string &schema, con
throw InternalException("Could not find 'postgres_state' in 'PostgresTransaction::GetCatalogEntry'");
}
auto planner_info = scan_data->m_query_planner_info;
if (type == CatalogType::SCHEMA_ENTRY) {
return GetSchema(schema);
}

auto it = schemas.find(schema);
if (it == schemas.end()) {
return nullptr;
}
auto &schema_entry = it->second;

switch (type) {
case CatalogType::TABLE_ENTRY: {
auto it = schemas.find(schema);
if (it == schemas.end()) {
return nullptr;
}
auto &schema_entry = it->second;
return schema_entry.GetTable(name, planner_info);
}
case CatalogType::SCHEMA_ENTRY: {
return GetSchema(schema);
case CatalogType::TYPE_ENTRY: {
return schema_entry.GetType(name);
}
default:
D_ASSERT(type != CatalogType::SCHEMA_ENTRY);
return nullptr;
}
}

} // namespace duckdb

// namespace duckdb
28 changes: 28 additions & 0 deletions src/catalog/pgduckdb_type.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#include "pgduckdb/pgduckdb_types.hpp"
#include "pgduckdb/catalog/pgduckdb_schema.hpp"
#include "pgduckdb/catalog/pgduckdb_type.hpp"

extern "C" {
#include "postgres.h"
#include "access/tableam.h"
#include "access/heapam.h"
#include "storage/bufmgr.h"
#include "catalog/namespace.h"
#include "catalog/pg_class.h"
#include "optimizer/planmain.h"
#include "optimizer/planner.h"
#include "utils/builtins.h"
#include "utils/regproc.h"
#include "utils/snapmgr.h"
#include "utils/syscache.h"
#include "access/htup_details.h"
#include "parser/parsetree.h"
}

namespace duckdb {

PostgresType::PostgresType(Catalog &catalog, SchemaCatalogEntry &schema, CreateTypeInfo &info)
: TypeCatalogEntry(catalog, schema, info) {
}

} // namespace duckdb
17 changes: 16 additions & 1 deletion src/pgduckdb_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ extern "C" {
}

#include "pgduckdb/pgduckdb_filter.hpp"
#include "pgduckdb/types/pgduckdb_enum.hpp"
#include "pgduckdb/pgduckdb_detoast.hpp"
#include "pgduckdb/pgduckdb_types.hpp"

Expand Down Expand Up @@ -71,8 +72,22 @@ FilterOperationSwitch(Datum &value, duckdb::Value &constant, Oid type_oid) {
case VARCHAROID:
return StringFilterOperation<OP>(value, constant);
default:
if (PGDuckDBEnum::IsEnumType(type_oid)) {
auto position = PGDuckDBEnum::GetEnumPosition(value, constant.type());
auto physical_type = duckdb::EnumType::GetPhysicalType(constant.type());
switch (physical_type) {
case duckdb::PhysicalType::UINT8:
return TemplatedFilterOperation<uint8_t, OP>(static_cast<uint8_t>(position), constant);
case duckdb::PhysicalType::UINT16:
return TemplatedFilterOperation<uint16_t, OP>(static_cast<uint16_t>(position), constant);
case duckdb::PhysicalType::UINT32:
return TemplatedFilterOperation<uint32_t, OP>(static_cast<uint32_t>(position), constant);
default:
throw duckdb::InternalException("Invalid Physical Type for ENUMs");
}
}
throw duckdb::InvalidTypeException(
duckdb::string("(DuckDB/FilterOperationSwitch) Unsupported duckdb type: %d", type_oid));
duckdb::StringUtil::Format("(DuckDB/FilterOperationSwitch) Unsupported duckdb type: %d", type_oid));
}
}

Expand Down
Loading