Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix star expression bind for property graph #193

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 97 additions & 10 deletions src/core/functions/table/match.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,61 @@ case_insensitive_set_t GetFullyQualifiedColFromPg(
return col_names;
}

// Get all fully-qualified column names from the given property graph [pg] for
// the given relation [alias], only vertex table is selected.
//
// Return a vector of column names, each of them represents a stack of names in
// order of which they appear
// (column_names[0].column_names[1].column_names[2]....).
vector<vector<string>> GetRegisteredColFromPg(
const case_insensitive_map_t<shared_ptr<PropertyGraphTable>> &alias_map,
const std::string &alias) {
vector<vector<string>> registered_col_names;
auto iter = alias_map.find(alias);
D_ASSERT(iter != alias_map.end());
const auto &tbl = iter->second;
// Skip edge table.
if (!tbl->is_vertex_table) {
return registered_col_names;
}
registered_col_names.reserve(tbl->column_names.size());
for (const auto &cur_col : tbl->column_names) {
registered_col_names.emplace_back(vector<string>{"", ""});
auto &new_col_names = registered_col_names.back();
new_col_names[0] = alias;
new_col_names[1] = cur_col;
}
return registered_col_names;
}

// Get all fully-qualified column names from the given property graph [pg] for
// all vertex relations.
//
// Return a vector of column names, each of them represents a stack of names in
// order of which they appear
// (column_names[0].column_names[1].column_names[2]....).
vector<vector<string>> GetRegisteredColFromPg(
const case_insensitive_map_t<shared_ptr<PropertyGraphTable>> &alias_map) {
vector<vector<string>> registered_col_names;
for (const auto &alias_and_table : alias_map) {
const auto &alias = alias_and_table.first;
const auto &tbl = alias_and_table.second;
// Skip edge table.
if (!tbl->is_vertex_table) {
continue;
}
registered_col_names.reserve(registered_col_names.size() +
tbl->column_names.size());
for (const auto &cur_col : tbl->column_names) {
registered_col_names.emplace_back(vector<string>{"", ""});
auto &new_col_names = registered_col_names.back();
new_col_names[0] = alias;
new_col_names[1] = cur_col;
}
}
return registered_col_names;
}

} // namespace

shared_ptr<PropertyGraphTable>
Expand Down Expand Up @@ -999,19 +1054,26 @@ void PGQMatchFunction::PopulateGraphTableAliasMap(
}
}

void PGQMatchFunction::CheckColumnBinding(
const CreatePropertyGraphInfo &pg_table, const MatchExpression &ref) {
// Maps from table alias to table, including vertex and edge tables.
case_insensitive_map_t<shared_ptr<PropertyGraphTable>>
PGQMatchFunction::PopulateGraphTableAliasMap(
const CreatePropertyGraphInfo &pg_table,
const MatchExpression &match_expr) {
case_insensitive_map_t<shared_ptr<PropertyGraphTable>>
alias_to_vertex_and_edge_tables;
for (idx_t idx_i = 0; idx_i < ref.path_patterns.size(); idx_i++) {
const auto &path_list = ref.path_patterns[idx_i]->path_elements;
for (idx_t idx_i = 0; idx_i < match_expr.path_patterns.size(); idx_i++) {
const auto &path_list = match_expr.path_patterns[idx_i]->path_elements;
for (const auto &cur_path : path_list) {
PopulateGraphTableAliasMap(pg_table, cur_path,
alias_to_vertex_and_edge_tables);
}
}
return alias_to_vertex_and_edge_tables;
}

void PGQMatchFunction::CheckColumnBinding(
const CreatePropertyGraphInfo &pg_table, const MatchExpression &ref,
const case_insensitive_map_t<shared_ptr<PropertyGraphTable>>
&alias_to_vertex_and_edge_tables) {
// All fully-qualified column names for vertex tables and edge tables.
const auto all_fq_col_names =
GetFullyQualifiedColFromPg(pg_table, alias_to_vertex_and_edge_tables);
Expand Down Expand Up @@ -1085,7 +1147,10 @@ PGQMatchFunction::MatchBindReplace(ClientContext &context,
conditions.push_back(std::move(ref->where_clause));
}

CheckColumnBinding(*pg_table, *ref);
// Maps from table alias to table, including vertex and edge tables.
auto alias_to_vertex_and_edge_tables =
PopulateGraphTableAliasMap(*pg_table, *ref);
CheckColumnBinding(*pg_table, *ref, alias_to_vertex_and_edge_tables);

std::vector<unique_ptr<ParsedExpression>> final_column_list;

Expand Down Expand Up @@ -1138,10 +1203,32 @@ PGQMatchFunction::MatchBindReplace(ClientContext &context,
continue;
}

// TODO(hjiang): For star expression, only select columns in vertex or edge
// table, but not those unspecified in property graph.
// Issue reference: https://github.com/cwida/duckpgq-extension/issues/192
final_column_list.push_back(std::move(expression));
// Handle StarExpression.
auto *star_expression = dynamic_cast<StarExpression *>(expression.get());
if (star_expression != nullptr) {
auto selected_col_names =
star_expression->relation_name.empty()
? GetRegisteredColFromPg(alias_to_vertex_and_edge_tables)
: GetRegisteredColFromPg(alias_to_vertex_and_edge_tables,
star_expression->relation_name);

// Fallback to star expression if cannot figure out the columns to query.
if (selected_col_names.empty()) {
final_column_list.emplace_back(std::move(expression));
continue;
}

final_column_list.reserve(final_column_list.size() +
selected_col_names.size());
for (auto &col : selected_col_names) {
final_column_list.emplace_back(
make_uniq<ColumnRefExpression>(std::move(col)));
}
continue;
}

// By default, directly handle expression without further processing.
final_column_list.emplace_back(std::move(expression));
}

final_select_node->where_clause = CreateWhereClause(conditions);
Expand Down
10 changes: 8 additions & 2 deletions src/include/duckpgq/core/functions/table/match.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ struct PGQMatchFunction : public TableFunction {
case_insensitive_map_t<shared_ptr<PropertyGraphTable>>
&alias_to_vertex_and_edge_tables);

static case_insensitive_map_t<shared_ptr<PropertyGraphTable>>
PopulateGraphTableAliasMap(const CreatePropertyGraphInfo &pg_table,
const MatchExpression &match_expr);

static PathElement *
GetPathElement(const unique_ptr<PathReference> &path_reference);

Expand Down Expand Up @@ -171,8 +175,10 @@ struct PGQMatchFunction : public TableFunction {

// Check whether columns to query are valid against the property graph, throws
// BinderException if error.
static void CheckColumnBinding(const CreatePropertyGraphInfo &pg_table,
const MatchExpression &ref);
static void CheckColumnBinding(
const CreatePropertyGraphInfo &pg_table, const MatchExpression &ref,
const case_insensitive_map_t<shared_ptr<PropertyGraphTable>>
&alias_to_vertex_and_edge_tables);
};

} // namespace core
Expand Down
2 changes: 1 addition & 1 deletion test/sql/optional_columns.test
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ EDGE TABLES (
query IIIIIIIIIII
-FROM GRAPH_TABLE (snb MATCH (p:Person)) limit 1;
----
2010-01-03 23:10:31.499+00 14 Hossein Forouhar male 1984-03-11 77.245.239.11 Firefox 1166 fa;ku;en [email protected]
1166 1984-03-11 Firefox 2010-01-03 23:10:31.499+00 [email protected] Hossein male 14 Forouhar 77.245.239.11 fa;ku;en
Copy link
Contributor Author

@dentiny dentiny Dec 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NOTICE: This is a behavior change.


query I
-FROM GRAPH_TABLE (snb MATCH (p:Person) COLUMNS (p.id)) limit 10;
Expand Down
24 changes: 12 additions & 12 deletions test/sql/pattern_matching/inheritance_support.test
Original file line number Diff line number Diff line change
Expand Up @@ -191,23 +191,23 @@ EDGE TABLES (
DESTINATION KEY(collegeID) REFERENCES College(id)
PROPERTIES (classYear) LABEL studiesAt );

query IIII
query III
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Test cases to verify only registered columns are returned.

-FROM GRAPH_TABLE (pg MATCH (a:Student) COLUMNS(*)) tmp;
----
1 Ana 2000-10-01 1
2 Bo 2000-01-10 3
2 Ed 2001-10-10 1
2 Jo 2001-01-01 1
1 Ana 2000-10-01
2 Bo 2000-01-10
2 Ed 2001-10-10
2 Jo 2001-01-01

query IIII
query III
-FROM GRAPH_TABLE (pg MATCH (a:Person) COLUMNS(*)) tmp;
----
1 Ana 2000-10-01 1
2 Bo 2000-01-10 3
2 Ed 2001-10-10 1
2 Jo 2001-01-01 1
1 Ana 2000-10-01
2 Bo 2000-01-10
2 Ed 2001-10-10
2 Jo 2001-01-01

query IIII
query III
-FROM GRAPH_TABLE (pg MATCH (a:TA) COLUMNS(*)) tmp;
----
2 Bo 2000-01-10 3
2 Bo 2000-01-10
Loading