From 13a430bcf48470bfcfc122a1c9563a8580ba5d7c Mon Sep 17 00:00:00 2001 From: dentiny Date: Thu, 26 Dec 2024 01:36:24 +0000 Subject: [PATCH 1/3] fix pg star expression bind --- src/core/functions/table/match.cpp | 102 +++++++++++++++--- .../duckpgq/core/functions/table/match.hpp | 10 +- test/sql/optional_columns.test | 2 +- .../pattern_matching/inheritance_support.test | 24 ++--- 4 files changed, 111 insertions(+), 27 deletions(-) diff --git a/src/core/functions/table/match.cpp b/src/core/functions/table/match.cpp index 9f621c3e..e5f3227c 100644 --- a/src/core/functions/table/match.cpp +++ b/src/core/functions/table/match.cpp @@ -82,6 +82,53 @@ case_insensitive_set_t GetFullyQualifiedColFromPg( return col_names; } +// Get all fully-qualified column names from the given property graph [pg] for +// the given relation [alias]. +// +// Return a vector of column names, each of them represents a stack of names in +// order of which they appear +// (column_names[0].column_names[1].column_names[2]....). +vector> GetRegisteredColFromPg( + const case_insensitive_map_t> &alias_map, + const std::string &alias) { + vector> registered_col_names; + auto iter = alias_map.find(alias); + D_ASSERT(iter != alias_map.end()); + const auto &tbl = iter->second; + registered_col_names.reserve(tbl->column_names.size()); + for (const auto &cur_col : tbl->column_names) { + registered_col_names.emplace_back(vector{"", ""}); + auto &new_col_names = registered_col_names.back(); + new_col_names[0] = alias; + new_col_names[1] = cur_col; + } + return registered_col_names; +} + +// Get all fully-qualified column names from the given property graph [pg] for +// all relations. +// +// Return a vector of column names, each of them represents a stack of names in +// order of which they appear +// (column_names[0].column_names[1].column_names[2]....). +vector> GetRegisteredColFromPg( + const case_insensitive_map_t> &alias_map) { + vector> registered_col_names; + for (const auto &alias_and_table : alias_map) { + const auto &alias = alias_and_table.first; + const auto &tbl = alias_and_table.second; + registered_col_names.reserve(registered_col_names.size() + + tbl->column_names.size()); + for (const auto &cur_col : tbl->column_names) { + registered_col_names.emplace_back(vector{"", ""}); + auto &new_col_names = registered_col_names.back(); + new_col_names[0] = alias; + new_col_names[1] = cur_col; + } + } + return registered_col_names; +} + } // namespace shared_ptr @@ -970,7 +1017,7 @@ void PGQMatchFunction::ProcessPathList( } } -void PGQMatchFunction::PopulateGraphTableAliasMap( +/*static*/ void PGQMatchFunction::PopulateGraphTableAliasMap( const CreatePropertyGraphInfo &pg_table, const unique_ptr &path_reference, case_insensitive_map_t> @@ -999,20 +1046,26 @@ void PGQMatchFunction::PopulateGraphTableAliasMap( } } -/*static*/ void -PGQMatchFunction::CheckColumnBinding(const CreatePropertyGraphInfo &pg_table, - const MatchExpression &ref) { - // Maps from table alias to table, including vertex and edge tables. +/*static*/ case_insensitive_map_t> +PGQMatchFunction::PopulateGraphTableAliasMap( + const CreatePropertyGraphInfo &pg_table, + const MatchExpression &match_expr) { case_insensitive_map_t> alias_to_vertex_and_edge_tables; - for (idx_t idx_i = 0; idx_i < ref.path_patterns.size(); idx_i++) { - const auto &path_list = ref.path_patterns[idx_i]->path_elements; + for (idx_t idx_i = 0; idx_i < match_expr.path_patterns.size(); idx_i++) { + const auto &path_list = match_expr.path_patterns[idx_i]->path_elements; for (const auto &cur_path : path_list) { PopulateGraphTableAliasMap(pg_table, cur_path, alias_to_vertex_and_edge_tables); } } + return alias_to_vertex_and_edge_tables; +} +/*static*/ void PGQMatchFunction::CheckColumnBinding( + const CreatePropertyGraphInfo &pg_table, const MatchExpression &ref, + const case_insensitive_map_t> + &alias_to_vertex_and_edge_tables) { // All fully-qualified column names for vertex tables and edge tables. const auto all_fq_col_names = GetFullyQualifiedColFromPg(pg_table, alias_to_vertex_and_edge_tables); @@ -1081,7 +1134,10 @@ PGQMatchFunction::MatchBindReplace(ClientContext &context, conditions.push_back(std::move(ref->where_clause)); } - CheckColumnBinding(*pg_table, *ref); + // Maps from table alias to table, including vertex and edge tables. + auto alias_to_vertex_and_edge_tables = + PopulateGraphTableAliasMap(*pg_table, *ref); + CheckColumnBinding(*pg_table, *ref, alias_to_vertex_and_edge_tables); std::vector> final_column_list; @@ -1134,10 +1190,32 @@ PGQMatchFunction::MatchBindReplace(ClientContext &context, continue; } - // TODO(hjiang): For star expression, only select columns in vertex or edge - // table, but not those unspecified in property graph. - // Issue reference: https://github.com/cwida/duckpgq-extension/issues/192 - final_column_list.push_back(std::move(expression)); + // Handle StarExpression. + auto *star_expression = dynamic_cast(expression.get()); + if (star_expression != nullptr) { + auto selected_col_names = + star_expression->relation_name.empty() + ? GetRegisteredColFromPg(alias_to_vertex_and_edge_tables) + : GetRegisteredColFromPg(alias_to_vertex_and_edge_tables, + star_expression->relation_name); + + // Fallback to star expression if cannot figure out the columns to query. + if (selected_col_names.empty()) { + final_column_list.emplace_back(std::move(expression)); + continue; + } + + final_column_list.reserve(final_column_list.size() + + selected_col_names.size()); + for (auto &col : selected_col_names) { + final_column_list.emplace_back( + make_uniq(std::move(col))); + } + continue; + } + + // By default, directly handle expression without further processing. + final_column_list.emplace_back(std::move(expression)); } final_select_node->where_clause = CreateWhereClause(conditions); diff --git a/src/include/duckpgq/core/functions/table/match.hpp b/src/include/duckpgq/core/functions/table/match.hpp index 9d83b5c2..409909ff 100644 --- a/src/include/duckpgq/core/functions/table/match.hpp +++ b/src/include/duckpgq/core/functions/table/match.hpp @@ -62,6 +62,10 @@ struct PGQMatchFunction : public TableFunction { case_insensitive_map_t> &alias_to_vertex_and_edge_tables); + static case_insensitive_map_t> + PopulateGraphTableAliasMap(const CreatePropertyGraphInfo &pg_table, + const MatchExpression &match_expr); + static PathElement * GetPathElement(const unique_ptr &path_reference); @@ -171,8 +175,10 @@ struct PGQMatchFunction : public TableFunction { // Check whether columns to query are valid against the property graph, throws // BinderException if error. - static void CheckColumnBinding(const CreatePropertyGraphInfo &pg_table, - const MatchExpression &ref); + static void CheckColumnBinding( + const CreatePropertyGraphInfo &pg_table, const MatchExpression &ref, + const case_insensitive_map_t> + &alias_to_vertex_and_edge_tables); }; } // namespace core diff --git a/test/sql/optional_columns.test b/test/sql/optional_columns.test index 1c20303d..a1faaea8 100644 --- a/test/sql/optional_columns.test +++ b/test/sql/optional_columns.test @@ -25,7 +25,7 @@ EDGE TABLES ( query IIIIIIIIIII -FROM GRAPH_TABLE (snb MATCH (p:Person)) limit 1; ---- -2010-01-03 23:10:31.499+00 14 Hossein Forouhar male 1984-03-11 77.245.239.11 Firefox 1166 fa;ku;en Hossein14@hotmail.com +1166 1984-03-11 Firefox 2010-01-03 23:10:31.499+00 Hossein14@hotmail.com Hossein male 14 Forouhar 77.245.239.11 fa;ku;en query I -FROM GRAPH_TABLE (snb MATCH (p:Person) COLUMNS (p.id)) limit 10; diff --git a/test/sql/pattern_matching/inheritance_support.test b/test/sql/pattern_matching/inheritance_support.test index 3f716df6..4e03b10f 100644 --- a/test/sql/pattern_matching/inheritance_support.test +++ b/test/sql/pattern_matching/inheritance_support.test @@ -191,23 +191,23 @@ EDGE TABLES ( DESTINATION KEY(collegeID) REFERENCES College(id) PROPERTIES (classYear) LABEL studiesAt ); -query IIII +query III -FROM GRAPH_TABLE (pg MATCH (a:Student) COLUMNS(*)) tmp; ---- -1 Ana 2000-10-01 1 -2 Bo 2000-01-10 3 -2 Ed 2001-10-10 1 -2 Jo 2001-01-01 1 +1 Ana 2000-10-01 +2 Bo 2000-01-10 +2 Ed 2001-10-10 +2 Jo 2001-01-01 -query IIII +query III -FROM GRAPH_TABLE (pg MATCH (a:Person) COLUMNS(*)) tmp; ---- -1 Ana 2000-10-01 1 -2 Bo 2000-01-10 3 -2 Ed 2001-10-10 1 -2 Jo 2001-01-01 1 +1 Ana 2000-10-01 +2 Bo 2000-01-10 +2 Ed 2001-10-10 +2 Jo 2001-01-01 -query IIII +query III -FROM GRAPH_TABLE (pg MATCH (a:TA) COLUMNS(*)) tmp; ---- -2 Bo 2000-01-10 3 +2 Bo 2000-01-10 From 8c73d82ca5dbe142367d357ed48d2336e03ec93e Mon Sep 17 00:00:00 2001 From: dentiny Date: Sat, 25 Jan 2025 19:55:57 +0000 Subject: [PATCH 2/3] skip edge tables --- src/core/functions/table/match.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/core/functions/table/match.cpp b/src/core/functions/table/match.cpp index b088c492..af190752 100644 --- a/src/core/functions/table/match.cpp +++ b/src/core/functions/table/match.cpp @@ -83,7 +83,7 @@ case_insensitive_set_t GetFullyQualifiedColFromPg( } // Get all fully-qualified column names from the given property graph [pg] for -// the given relation [alias]. +// the given relation [alias], only vertex table is selected. // // Return a vector of column names, each of them represents a stack of names in // order of which they appear @@ -95,6 +95,10 @@ vector> GetRegisteredColFromPg( auto iter = alias_map.find(alias); D_ASSERT(iter != alias_map.end()); const auto &tbl = iter->second; + // Skip edge table. + if (!tbl->is_vertex_table) { + return registered_col_names; + } registered_col_names.reserve(tbl->column_names.size()); for (const auto &cur_col : tbl->column_names) { registered_col_names.emplace_back(vector{"", ""}); @@ -106,7 +110,7 @@ vector> GetRegisteredColFromPg( } // Get all fully-qualified column names from the given property graph [pg] for -// all relations. +// all vertex relations. // // Return a vector of column names, each of them represents a stack of names in // order of which they appear @@ -117,6 +121,10 @@ vector> GetRegisteredColFromPg( for (const auto &alias_and_table : alias_map) { const auto &alias = alias_and_table.first; const auto &tbl = alias_and_table.second; + // Skip edge table. + if (!tbl->is_vertex_table) { + continue; + } registered_col_names.reserve(registered_col_names.size() + tbl->column_names.size()); for (const auto &cur_col : tbl->column_names) { From 655dc15974c5e9d1d69a8f15b6ef5a4d2e556f7c Mon Sep 17 00:00:00 2001 From: dentiny Date: Tue, 4 Feb 2025 06:09:38 +0000 Subject: [PATCH 3/3] return column ref expression --- src/core/functions/table/match.cpp | 43 ++++++++++++++---------------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/src/core/functions/table/match.cpp b/src/core/functions/table/match.cpp index af190752..713125fd 100644 --- a/src/core/functions/table/match.cpp +++ b/src/core/functions/table/match.cpp @@ -85,13 +85,11 @@ case_insensitive_set_t GetFullyQualifiedColFromPg( // Get all fully-qualified column names from the given property graph [pg] for // the given relation [alias], only vertex table is selected. // -// Return a vector of column names, each of them represents a stack of names in -// order of which they appear -// (column_names[0].column_names[1].column_names[2]....). -vector> GetRegisteredColFromPg( +// Return column reference expressions which represent columns to select. +vector> GetColRefExprFromPg( const case_insensitive_map_t> &alias_map, const std::string &alias) { - vector> registered_col_names; + vector> registered_col_names; auto iter = alias_map.find(alias); D_ASSERT(iter != alias_map.end()); const auto &tbl = iter->second; @@ -101,10 +99,11 @@ vector> GetRegisteredColFromPg( } registered_col_names.reserve(tbl->column_names.size()); for (const auto &cur_col : tbl->column_names) { - registered_col_names.emplace_back(vector{"", ""}); - auto &new_col_names = registered_col_names.back(); + auto new_col_names = vector{"", ""}; new_col_names[0] = alias; new_col_names[1] = cur_col; + registered_col_names.emplace_back( + make_uniq(std::move(new_col_names))); } return registered_col_names; } @@ -112,12 +111,10 @@ vector> GetRegisteredColFromPg( // Get all fully-qualified column names from the given property graph [pg] for // all vertex relations. // -// Return a vector of column names, each of them represents a stack of names in -// order of which they appear -// (column_names[0].column_names[1].column_names[2]....). -vector> GetRegisteredColFromPg( +// Return column reference expressions which represent columns to select. +vector> GetColRefExprFromPg( const case_insensitive_map_t> &alias_map) { - vector> registered_col_names; + vector> registered_col_names; for (const auto &alias_and_table : alias_map) { const auto &alias = alias_and_table.first; const auto &tbl = alias_and_table.second; @@ -128,10 +125,11 @@ vector> GetRegisteredColFromPg( registered_col_names.reserve(registered_col_names.size() + tbl->column_names.size()); for (const auto &cur_col : tbl->column_names) { - registered_col_names.emplace_back(vector{"", ""}); - auto &new_col_names = registered_col_names.back(); + auto new_col_names = vector{"", ""}; new_col_names[0] = alias; new_col_names[1] = cur_col; + registered_col_names.emplace_back( + make_uniq(std::move(new_col_names))); } } return registered_col_names; @@ -1206,23 +1204,22 @@ PGQMatchFunction::MatchBindReplace(ClientContext &context, // Handle StarExpression. auto *star_expression = dynamic_cast(expression.get()); if (star_expression != nullptr) { - auto selected_col_names = + auto selected_col_exprs = star_expression->relation_name.empty() - ? GetRegisteredColFromPg(alias_to_vertex_and_edge_tables) - : GetRegisteredColFromPg(alias_to_vertex_and_edge_tables, - star_expression->relation_name); + ? GetColRefExprFromPg(alias_to_vertex_and_edge_tables) + : GetColRefExprFromPg(alias_to_vertex_and_edge_tables, + star_expression->relation_name); // Fallback to star expression if cannot figure out the columns to query. - if (selected_col_names.empty()) { + if (selected_col_exprs.empty()) { final_column_list.emplace_back(std::move(expression)); continue; } final_column_list.reserve(final_column_list.size() + - selected_col_names.size()); - for (auto &col : selected_col_names) { - final_column_list.emplace_back( - make_uniq(std::move(col))); + selected_col_exprs.size()); + for (auto &expr : selected_col_exprs) { + final_column_list.emplace_back(std::move(expr)); } continue; }