diff --git a/.changes/unreleased/Fixes-20250118-084103.yaml b/.changes/unreleased/Fixes-20250118-084103.yaml new file mode 100644 index 00000000..66dcd3ec --- /dev/null +++ b/.changes/unreleased/Fixes-20250118-084103.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Optimize slow query that uses a high amount of temporary disk space to find relations +time: 2025-01-18T08:41:03.022013Z +custom: + Author: michalc + Issue: "189" diff --git a/dbt/include/postgres/macros/relations.sql b/dbt/include/postgres/macros/relations.sql index dd50cf00..79961950 100644 --- a/dbt/include/postgres/macros/relations.sql +++ b/dbt/include/postgres/macros/relations.sql @@ -7,68 +7,32 @@ #} {%- call statement('relations', fetch_result=True) -%} - with relation as ( - select - pg_rewrite.ev_class as class, - pg_rewrite.oid as id - from pg_rewrite - ), - class as ( - select - oid as id, - relname as name, - relnamespace as schema, - relkind as kind - from pg_class - ), - dependency as ( - select distinct - pg_depend.objid as id, - pg_depend.refobjid as ref - from pg_depend - ), - schema as ( - select - pg_namespace.oid as id, - pg_namespace.nspname as name - from pg_namespace - where nspname != 'information_schema' and nspname not like 'pg\_%' - ), - referenced as ( - select - relation.id AS id, - referenced_class.name , - referenced_class.schema , - referenced_class.kind - from relation - join class as referenced_class on relation.class=referenced_class.id - where referenced_class.kind in ('r', 'v', 'm') - ), - relationships as ( - select - referenced.name as referenced_name, - referenced.schema as referenced_schema_id, - dependent_class.name as dependent_name, - dependent_class.schema as dependent_schema_id, - referenced.kind as kind - from referenced - join dependency on referenced.id=dependency.id - join class as dependent_class on dependency.ref=dependent_class.id - where - (referenced.name != dependent_class.name or - referenced.schema != dependent_class.schema) - ) + select distinct + dependent_namespace.nspname as dependent_schema, + dependent_class.relname as dependent_name, + referenced_namespace.nspname as referenced_schema, + referenced_class.relname as referenced_name - select - referenced_schema.name as referenced_schema, - relationships.referenced_name as referenced_name, - dependent_schema.name as dependent_schema, - relationships.dependent_name as dependent_name - from relationships - join schema as dependent_schema on relationships.dependent_schema_id=dependent_schema.id - join schema as referenced_schema on relationships.referenced_schema_id=referenced_schema.id - group by referenced_schema, referenced_name, dependent_schema, dependent_name - order by referenced_schema, referenced_name, dependent_schema, dependent_name; + -- Query for views: views are entries in pg_class with an entry in pg_rewrite + from pg_class as dependent_class + join pg_namespace as dependent_namespace on dependent_namespace.oid = dependent_class.relnamespace + join pg_rewrite as dependent_rewrite on dependent_rewrite.ev_class = dependent_class.oid + + -- ... and via pg_depend + join pg_depend on pg_depend.objid = dependent_rewrite.oid + + -- ... we can find the tables they query from in pg_class + join pg_class as referenced_class on referenced_class.oid = pg_depend.refobjid + join pg_namespace as referenced_namespace on referenced_namespace.oid = referenced_class.relnamespace + + -- ... and we exclude system catalogs, and exclude views depending on themselves + where + dependent_class.oid != referenced_class.oid + and dependent_namespace.nspname != 'information_schema' and dependent_namespace.nspname not like 'pg\_%' + and referenced_namespace.nspname != 'information_schema' and referenced_namespace.nspname not like 'pg\_%' + + order by + dependent_schema, dependent_name, referenced_schema, referenced_name; {%- endcall -%}