diff --git a/docs/generated/UNDERLAY_CONFIG.md b/docs/generated/UNDERLAY_CONFIG.md index ac8cbd37f..21765749b 100644 --- a/docs/generated/UNDERLAY_CONFIG.md +++ b/docs/generated/UNDERLAY_CONFIG.md @@ -140,6 +140,13 @@ List of attributes grouped together for search optimization. Order matter. Each entry is a list of attributes that are search for together. For example search is typically performed for contig and position together. +### SZAttributeSearch.includeEntityMainColumns +**optional** boolean + +Whether all columns in the entity main table should also be included in this search table. Improves performance if other attributes are also fetched when performing this search by attributes. + +*Default value:* `false` + ### SZAttributeSearch.includeNullValues **optional** boolean diff --git a/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/WriteEntitySearchByAttributes.java b/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/WriteEntitySearchByAttributes.java index 7736ebf05..370231716 100644 --- a/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/WriteEntitySearchByAttributes.java +++ b/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/WriteEntitySearchByAttributes.java @@ -2,6 +2,7 @@ import bio.terra.tanagra.indexing.job.BigQueryJob; import bio.terra.tanagra.indexing.job.dataflow.beam.BigQueryBeamUtils; +import bio.terra.tanagra.underlay.ColumnSchema; import bio.terra.tanagra.underlay.entitymodel.Entity; import bio.terra.tanagra.underlay.indextable.ITEntityMain; import bio.terra.tanagra.underlay.indextable.ITEntitySearchByAttributes; @@ -52,45 +53,26 @@ public JobStatus checkStatus() { @Override public void run(boolean isDryRun) { - List fields = - searchTable.getColumnSchemas().stream() - .map( - columnSchema -> - Field.newBuilder( - columnSchema.getColumnName(), - BigQueryBeamUtils.fromDataType(columnSchema.getDataType())) - .setMode(searchTable.includeNullValues() ? Mode.NULLABLE : Mode.REQUIRED) - .build()) - .toList(); - - // Build a clustering specification. - Clustering clustering = - Clustering.newBuilder().setFields(searchTable.getAttributeNames()).build(); - - // Create an empty table with this schema. - TableId destinationTable = - TableId.of( - indexerConfig.bigQuery.indexData.projectId, - indexerConfig.bigQuery.indexData.datasetId, - getOutputTableName()); - googleBigQuery.createTableFromSchema(destinationTable, Schema.of(fields), clustering, isDryRun); - - // Build the query to insert to the search table using a select from the main entity table. + // Create table definition & build the query to insert into the search table + // using a select from the main entity table. + List fields = new ArrayList<>(); List insertColumns = new ArrayList<>(); - insertColumns.add(entity.getIdAttribute().getName()); - List selectColumns = new ArrayList<>(); - selectColumns.add(entity.getIdAttribute().getName()); - List crossJoins = new ArrayList<>(); List whereClauses = new ArrayList<>(); + searchTable - .getAttributeNames() + .getColumnSchemas() .forEach( - attribute -> { + colSchema -> { + String attribute = colSchema.getColumnName(); insertColumns.add(attribute); - if (entity.getAttribute(attribute).isDataTypeRepeated()) { + if (entityTable.getColumnSchemas().stream() + .filter(col -> col.getColumnName().equals(attribute)) + .anyMatch(ColumnSchema::isDataTypeRepeated) + != colSchema.isDataTypeRepeated()) { + // entityTable.repeated != searchTable.notRepeated String alias = "flattened_" + attribute; selectColumns.add(alias); crossJoins.add(" CROSS JOIN UNNEST(" + attribute + ") AS " + alias); @@ -98,11 +80,42 @@ public void run(boolean isDryRun) { selectColumns.add(attribute); } - if (!searchTable.includeNullValues()) { - whereClauses.add(attribute + " IS NOT NULL"); + Mode mode; + if (searchTable.getAttributeNames().contains(attribute)) { + if (searchTable.includeNullValues()) { + mode = Mode.NULLABLE; + } else { + mode = Mode.REQUIRED; + whereClauses.add(attribute + " IS NOT NULL"); + } + } else { + // all other attributes + mode = + colSchema.isRequired() + ? Mode.REQUIRED + : (colSchema.isDataTypeRepeated() ? Mode.REPEATED : Mode.NULLABLE); } + + fields.add( + Field.newBuilder( + colSchema.getColumnName(), + BigQueryBeamUtils.fromDataType(colSchema.getDataType())) + .setMode(mode) + .build()); }); + // Build a clustering specification. + Clustering clustering = + Clustering.newBuilder().setFields(searchTable.getAttributeNames()).build(); + + // Create an empty table with this schema. + TableId destinationTable = + TableId.of( + indexerConfig.bigQuery.indexData.projectId, + indexerConfig.bigQuery.indexData.datasetId, + getOutputTableName()); + googleBigQuery.createTableFromSchema(destinationTable, Schema.of(fields), clustering, isDryRun); + String whereSql = whereClauses.isEmpty() ? StringUtils.EMPTY : " WHERE " + String.join(" AND ", whereClauses); diff --git a/ui/src/tanagra-underlay/underlayConfig.ts b/ui/src/tanagra-underlay/underlayConfig.ts index ef1c06ea0..15c13043d 100644 --- a/ui/src/tanagra-underlay/underlayConfig.ts +++ b/ui/src/tanagra-underlay/underlayConfig.ts @@ -15,6 +15,7 @@ export type SZAttribute = { export type SZAttributeSearch = { attributes: string[]; + includeEntityMainColumns?: boolean; includeNullValues?: boolean; }; diff --git a/underlay/src/main/java/bio/terra/tanagra/api/filter/AttributeFilter.java b/underlay/src/main/java/bio/terra/tanagra/api/filter/AttributeFilter.java index 9460eb7f8..0941f0e28 100644 --- a/underlay/src/main/java/bio/terra/tanagra/api/filter/AttributeFilter.java +++ b/underlay/src/main/java/bio/terra/tanagra/api/filter/AttributeFilter.java @@ -57,8 +57,9 @@ public AttributeFilter( this.values = ImmutableList.copyOf(values); } - public Attribute getAttribute() { - return attribute; + @Override + public List getFilterAttributes() { + return List.of(attribute); } public UnaryOperator getUnaryOperator() { diff --git a/underlay/src/main/java/bio/terra/tanagra/api/filter/BooleanAndOrFilter.java b/underlay/src/main/java/bio/terra/tanagra/api/filter/BooleanAndOrFilter.java index 24f1b6365..b1f2a7aa2 100644 --- a/underlay/src/main/java/bio/terra/tanagra/api/filter/BooleanAndOrFilter.java +++ b/underlay/src/main/java/bio/terra/tanagra/api/filter/BooleanAndOrFilter.java @@ -1,6 +1,7 @@ package bio.terra.tanagra.api.filter; import bio.terra.tanagra.exception.InvalidQueryException; +import bio.terra.tanagra.underlay.entitymodel.Attribute; import bio.terra.tanagra.underlay.entitymodel.Entity; import com.google.common.collect.ImmutableList; import java.util.List; @@ -43,6 +44,15 @@ private static Entity getSubFiltersEntity(List filters) { return entity; } + @Override + public List getFilterAttributes() { + return subFilters.stream() + .map(EntityFilter::getFilterAttributes) + .flatMap(List::stream) + .distinct() + .toList(); + } + @Override public boolean equals(Object o) { if (!super.equals(o)) { diff --git a/underlay/src/main/java/bio/terra/tanagra/api/filter/BooleanNotFilter.java b/underlay/src/main/java/bio/terra/tanagra/api/filter/BooleanNotFilter.java index bf3db2316..3d251e850 100644 --- a/underlay/src/main/java/bio/terra/tanagra/api/filter/BooleanNotFilter.java +++ b/underlay/src/main/java/bio/terra/tanagra/api/filter/BooleanNotFilter.java @@ -1,5 +1,7 @@ package bio.terra.tanagra.api.filter; +import bio.terra.tanagra.underlay.entitymodel.Attribute; +import java.util.List; import java.util.Objects; import org.slf4j.LoggerFactory; @@ -18,6 +20,11 @@ public EntityFilter getSubFilter() { return subFilter; } + @Override + public List getFilterAttributes() { + return subFilter.getFilterAttributes(); + } + @Override public boolean equals(Object o) { if (!super.equals(o)) { diff --git a/underlay/src/main/java/bio/terra/tanagra/api/filter/EntityFilter.java b/underlay/src/main/java/bio/terra/tanagra/api/filter/EntityFilter.java index a4ea34096..dfcd333fa 100644 --- a/underlay/src/main/java/bio/terra/tanagra/api/filter/EntityFilter.java +++ b/underlay/src/main/java/bio/terra/tanagra/api/filter/EntityFilter.java @@ -1,6 +1,7 @@ package bio.terra.tanagra.api.filter; import bio.terra.tanagra.underlay.Underlay; +import bio.terra.tanagra.underlay.entitymodel.Attribute; import bio.terra.tanagra.underlay.entitymodel.Entity; import java.util.List; import java.util.Objects; @@ -30,6 +31,11 @@ public Entity getEntity() { return entity; } + public List getFilterAttributes() { + // not supported or not implemented + return List.of(); + } + // TODO: Add logic here to merge filters automatically to get a simpler filter overall. public boolean isMergeable(EntityFilter entityFilter) { return false; diff --git a/underlay/src/main/java/bio/terra/tanagra/api/filter/OccurrenceForPrimaryFilter.java b/underlay/src/main/java/bio/terra/tanagra/api/filter/OccurrenceForPrimaryFilter.java index c8f4858b2..4c90d40e8 100644 --- a/underlay/src/main/java/bio/terra/tanagra/api/filter/OccurrenceForPrimaryFilter.java +++ b/underlay/src/main/java/bio/terra/tanagra/api/filter/OccurrenceForPrimaryFilter.java @@ -1,9 +1,11 @@ package bio.terra.tanagra.api.filter; import bio.terra.tanagra.underlay.Underlay; +import bio.terra.tanagra.underlay.entitymodel.Attribute; import bio.terra.tanagra.underlay.entitymodel.Entity; import bio.terra.tanagra.underlay.entitymodel.entitygroup.CriteriaOccurrence; import jakarta.annotation.Nullable; +import java.util.List; import java.util.Objects; import org.slf4j.LoggerFactory; @@ -25,6 +27,15 @@ public OccurrenceForPrimaryFilter( this.criteriaSubFilter = criteriaSubFilter; } + @Override + public List getFilterAttributes() { + Attribute attribute = + criteriaOccurrence + .getOccurrencePrimaryRelationship(getOccurrenceEntity().getName()) + .getForeignKeyAttribute(getOccurrenceEntity()); + return attribute != null ? List.of(attribute) : List.of(); + } + public CriteriaOccurrence getCriteriaOccurrence() { return criteriaOccurrence; } diff --git a/underlay/src/main/java/bio/terra/tanagra/api/filter/RelationshipFilter.java b/underlay/src/main/java/bio/terra/tanagra/api/filter/RelationshipFilter.java index fd5fcef08..e92d6207d 100644 --- a/underlay/src/main/java/bio/terra/tanagra/api/filter/RelationshipFilter.java +++ b/underlay/src/main/java/bio/terra/tanagra/api/filter/RelationshipFilter.java @@ -47,6 +47,12 @@ public RelationshipFilter( this.groupByCountValue = groupByCountValue; } + @Override + public List getFilterAttributes() { + Attribute attribute = relationship.getForeignKeyAttribute(getSelectEntity()); + return attribute != null ? List.of(attribute) : List.of(); + } + public EntityGroup getEntityGroup() { return entityGroup; } diff --git a/underlay/src/main/java/bio/terra/tanagra/api/filter/TextSearchFilter.java b/underlay/src/main/java/bio/terra/tanagra/api/filter/TextSearchFilter.java index eddcdb001..d179a2a6a 100644 --- a/underlay/src/main/java/bio/terra/tanagra/api/filter/TextSearchFilter.java +++ b/underlay/src/main/java/bio/terra/tanagra/api/filter/TextSearchFilter.java @@ -4,6 +4,7 @@ import bio.terra.tanagra.underlay.entitymodel.Attribute; import bio.terra.tanagra.underlay.entitymodel.Entity; import jakarta.annotation.Nullable; +import java.util.List; import java.util.Objects; import org.slf4j.LoggerFactory; @@ -30,13 +31,9 @@ public TextSearchFilter( this.attribute = attribute; } - public boolean isForSpecificAttribute() { - return attribute != null; - } - - @Nullable - public Attribute getAttribute() { - return attribute; + @Override + public List getFilterAttributes() { + return (attribute != null) ? List.of(attribute) : List.of(); } public TextSearchOperator getOperator() { diff --git a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQExecutor.java b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQExecutor.java index 7814b27ae..9be0a476e 100644 --- a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQExecutor.java +++ b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQExecutor.java @@ -43,8 +43,7 @@ public BQExecutor(BQExecutorInfrastructure queryInfrastructure) { } public SqlQueryResult run(SqlQueryRequest queryRequest) { - // Log the SQL statement with parameters substituted locally (i.e. not by BQ) to help with - // debugging. + // Log the SQL statement with parameters substituted locally (i.e. not by BQ) for debugging. String sqlNoParams = queryRequest.getSql(); for (String paramName : queryRequest.getSqlParams().getParamNamesLongestFirst()) { sqlNoParams = diff --git a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQQueryRunner.java b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQQueryRunner.java index 9ce5fc5ba..509c8ca9e 100644 --- a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQQueryRunner.java +++ b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQQueryRunner.java @@ -34,8 +34,9 @@ import bio.terra.tanagra.underlay.entitymodel.Attribute; import bio.terra.tanagra.underlay.entitymodel.Entity; import bio.terra.tanagra.underlay.indextable.ITEntityLevelDisplayHints; -import bio.terra.tanagra.underlay.indextable.ITEntityMain; +import bio.terra.tanagra.underlay.indextable.ITEntitySearchByAttributes; import bio.terra.tanagra.underlay.indextable.ITInstanceLevelDisplayHints; +import bio.terra.tanagra.underlay.indextable.IndexTable; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.HashBasedTable; import com.google.common.collect.Table; @@ -48,6 +49,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; import org.apache.commons.lang3.NotImplementedException; import org.apache.commons.lang3.tuple.Pair; @@ -395,17 +397,13 @@ private SqlQueryRequest buildQuerySqlAgainstIndexData( BQApiTranslator bqTranslator = new BQApiTranslator(); // Build the list of entities we need for the select fields and filters. - Set entities = new HashSet<>(); - selectFields.forEach(selectField -> entities.add(selectField.getEntity())); - entities.addAll(filters.keySet()); + Set entities = + selectFields.stream().map(ValueDisplayField::getEntity).collect(Collectors.toSet()); if (entities.size() > 1) { throw new NotImplementedException("Queries with more than one entity are not yet supported"); } - // Build the list of entity main tables we need to query or join. - Entity singleEntity = entities.iterator().next(); - ITEntityMain entityMain = underlay.getIndexSchema().getEntityMain(singleEntity.getName()); - + List selectFieldNames = new ArrayList<>(); List selectFieldSqls = new ArrayList<>(); List joinTableSqls = new ArrayList<>(); selectFields.forEach( @@ -428,14 +426,40 @@ private SqlQueryRequest buildQuerySqlAgainstIndexData( } sqlQueryFields.forEach( - sqlQueryField -> selectFieldSqls.add(sqlQueryField.renderForSelect())); + sqlQueryField -> { + selectFieldNames.add(sqlQueryField.getField().getColumnName()); + selectFieldSqls.add(sqlQueryField.renderForSelect()); + }); }); + // Build the list of entity tables we need to query or join. + Entity singleEntity = entities.iterator().next(); + EntityFilter singleEntityFilter = filters.get(singleEntity); + + // get a list of attributes filtered on, if implemented for the filter + List filterAttributeNames = + singleEntityFilter != null + ? singleEntityFilter.getFilterAttributes().stream().map(Attribute::getName).toList() + : List.of(); + + // default: use entityMain table + // check if: entity is optimized for search on filterAttributes & + // (either search table contains entityMain fields OR all selectFields) + IndexTable entityTable = underlay.getIndexSchema().getEntityMain(singleEntity.getName()); + if (singleEntity.containsOptimizeSearchByAttributes(filterAttributeNames)) { + ITEntitySearchByAttributes searchTable = + underlay.getIndexSchema().getEntitySearchByAttributes(singleEntity, filterAttributeNames); + if (searchTable.includeEntityMainColumns() + || new HashSet<>(searchTable.getColumnNames()).containsAll(selectFieldNames)) { + entityTable = searchTable; + } + } + // SELECT [select fields] FROM [entity main] JOIN [join tables] sql.append("SELECT ") .append(String.join(", ", selectFieldSqls)) .append(" FROM ") - .append(entityMain.getTablePointer().render()); + .append(entityTable.getTablePointer().render()); // JOIN [join tables] if (!joinTableSqls.isEmpty()) { @@ -443,7 +467,6 @@ private SqlQueryRequest buildQuerySqlAgainstIndexData( } // WHERE [filter] - EntityFilter singleEntityFilter = filters.get(singleEntity); if (singleEntityFilter != null) { sql.append(" WHERE ") .append(bqTranslator.translator(singleEntityFilter).buildSql(sqlParams, null)); diff --git a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/filter/BQAttributeFilterTranslator.java b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/filter/BQAttributeFilterTranslator.java index 1e8614ad1..a7301c04d 100644 --- a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/filter/BQAttributeFilterTranslator.java +++ b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/filter/BQAttributeFilterTranslator.java @@ -56,7 +56,7 @@ private String buildSqlForSingleFilter(SqlParams sqlParams, String tableAlias) { Entity entity = attributeFilter.getEntity(); ITEntityMain entityTable = attributeFilter.getUnderlay().getIndexSchema().getEntityMain(entity.getName()); - Attribute attribute = attributeFilter.getAttribute(); + Attribute attribute = attributeFilter.getFilterAttributes().get(0); SqlField valueField = fetchSelectField(entityTable, attribute); // search attribute-specific table if attribute is optimized for search @@ -110,7 +110,7 @@ private String buildSqlForList(SqlParams sqlParams, String tableAlias) { filter, sqlParams, tableAlias, - fetchSelectField(entityTable, filter.getAttribute()))) + fetchSelectField(entityTable, filter.getFilterAttributes().get(0)))) .toList() .toArray(new String[0]); @@ -126,7 +126,9 @@ private String searchOptimizedSql(AttributeFilter filter, String tableAlias, Str filter .getUnderlay() .getIndexSchema() - .getEntitySearchByAttributes(firstEntity, List.of(filter.getAttribute().getName())); + .getEntitySearchByAttributes( + firstEntity, + filter.getFilterAttributes().stream().map(Attribute::getName).toList()); SqlQueryField id = SqlQueryField.of(fetchSelectField(searchTable, firstEntity.getIdAttribute())); return id.renderForWhere(tableAlias) @@ -157,16 +159,18 @@ private String buildWhereSql( @Override public boolean isFilterOnAttribute(Attribute attribute) { - return attributeFilter != null && attribute.equals(attributeFilter.getAttribute()); + return attributeFilter != null + && attribute.equals(attributeFilter.getFilterAttributes().get(0)); } public static boolean canMergeTranslation(List attributeFilters) { // Can merge (AND) the 'where' clauses if are all optimized on search together AttributeFilter firstFilter = attributeFilters.get(0); Entity firstEntity = firstFilter.getEntity(); - List firstAttributeName = List.of(firstFilter.getAttribute().getName()); + List attributeNames = + firstFilter.getFilterAttributes().stream().map(Attribute::getName).toList(); - if (!firstEntity.containsOptimizeSearchByAttributes(firstAttributeName)) { + if (!firstEntity.containsOptimizeSearchByAttributes(List.of(attributeNames.get(0)))) { // first attribute itself is not optimized for search return false; } @@ -175,7 +179,7 @@ public static boolean canMergeTranslation(List attributeFilters firstFilter .getUnderlay() .getIndexSchema() - .getEntitySearchByAttributes(firstEntity, firstAttributeName) + .getEntitySearchByAttributes(firstEntity, attributeNames) .getAttributeNames(); // check if all attributes in the filters are in the same search table for the same entity @@ -183,6 +187,7 @@ public static boolean canMergeTranslation(List attributeFilters .allMatch( filter -> filter.getEntity().getName().equals(firstEntity.getName()) - && searchTableAttributes.contains(filter.getAttribute().getName())); + && searchTableAttributes.contains( + filter.getFilterAttributes().get(0).getName())); } } diff --git a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/filter/BQTextSearchFilterTranslator.java b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/filter/BQTextSearchFilterTranslator.java index cc28b47d7..de274fe9f 100644 --- a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/filter/BQTextSearchFilterTranslator.java +++ b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/filter/BQTextSearchFilterTranslator.java @@ -8,6 +8,7 @@ import bio.terra.tanagra.query.sql.translator.ApiTranslator; import bio.terra.tanagra.underlay.entitymodel.Attribute; import bio.terra.tanagra.underlay.indextable.ITEntityMain; +import java.util.List; import java.util.Map; public class BQTextSearchFilterTranslator extends ApiFilterTranslator { @@ -28,14 +29,15 @@ public String buildSql(SqlParams sqlParams, String tableAlias) { .getUnderlay() .getIndexSchema() .getEntityMain(textSearchFilter.getEntity().getName()); - SqlField textSearchField; - if (textSearchFilter.isForSpecificAttribute()) { - // Search only on the specified attribute. - textSearchField = fetchSelectField(indexTable, textSearchFilter.getAttribute()); - } else { - // Search the text index specified in the underlay config. - textSearchField = indexTable.getTextSearchField(); - } + + // Search the text index specified in the underlay config if not filtered + // on a specific attribute + List filterAttributes = textSearchFilter.getFilterAttributes(); + SqlField textSearchField = + filterAttributes.isEmpty() + ? indexTable.getTextSearchField() + : fetchSelectField(indexTable, textSearchFilter.getFilterAttributes().get(0)); + return apiTranslator.textSearchFilterSql( textSearchField, textSearchFilter.getOperator(), @@ -46,7 +48,6 @@ public String buildSql(SqlParams sqlParams, String tableAlias) { @Override public boolean isFilterOnAttribute(Attribute attribute) { - return textSearchFilter.isForSpecificAttribute() - && textSearchFilter.getAttribute().equals(attribute); + return textSearchFilter.getFilterAttributes().stream().anyMatch(attr -> attr.equals(attribute)); } } diff --git a/underlay/src/main/java/bio/terra/tanagra/underlay/IndexSchema.java b/underlay/src/main/java/bio/terra/tanagra/underlay/IndexSchema.java index 3ef8cd910..3f0774709 100644 --- a/underlay/src/main/java/bio/terra/tanagra/underlay/IndexSchema.java +++ b/underlay/src/main/java/bio/terra/tanagra/underlay/IndexSchema.java @@ -210,8 +210,7 @@ private static void fromConfigEntity( entityGroupsWithCount.add(szCriteriaOccurrence.name); } }); - entityMainTables.put( - szEntity.name, + ITEntityMain entityMain = new ITEntityMain( nameHelper, szBigQueryIndexData, @@ -219,7 +218,8 @@ private static void fromConfigEntity( szEntity.attributes, szEntity.hierarchies, szEntity.textSearch != null, - entityGroupsWithCount)); + entityGroupsWithCount); + entityMainTables.put(szEntity.name, entityMain); // EntityLevelDisplayHints table. entityLevelDisplayHintTables.put( @@ -232,7 +232,7 @@ private static void fromConfigEntity( attributeSearch -> entitySearchByAttributesTables.add( new ITEntitySearchByAttributes( - nameHelper, szBigQueryIndexData, szEntity, attributeSearch))); + nameHelper, szBigQueryIndexData, entityMain, szEntity, attributeSearch))); } szEntity.hierarchies.forEach( diff --git a/underlay/src/main/java/bio/terra/tanagra/underlay/indextable/ITEntitySearchByAttributes.java b/underlay/src/main/java/bio/terra/tanagra/underlay/indextable/ITEntitySearchByAttributes.java index 37f39b633..38ed35c1f 100644 --- a/underlay/src/main/java/bio/terra/tanagra/underlay/indextable/ITEntitySearchByAttributes.java +++ b/underlay/src/main/java/bio/terra/tanagra/underlay/indextable/ITEntitySearchByAttributes.java @@ -1,9 +1,7 @@ package bio.terra.tanagra.underlay.indextable; import bio.terra.tanagra.underlay.ColumnSchema; -import bio.terra.tanagra.underlay.ConfigReader; import bio.terra.tanagra.underlay.NameHelper; -import bio.terra.tanagra.underlay.serialization.SZAttribute; import bio.terra.tanagra.underlay.serialization.SZAttributeSearch; import bio.terra.tanagra.underlay.serialization.SZBigQuery; import bio.terra.tanagra.underlay.serialization.SZEntity; @@ -13,44 +11,51 @@ public class ITEntitySearchByAttributes extends IndexTable { public static final String TABLE_NAME = "ESA"; - private final String entity; private final ImmutableList attributeNames; private final boolean includeNullValues; + private final boolean includeEntityMainColumns; private final ImmutableList columnSchemas; public ITEntitySearchByAttributes( NameHelper namer, SZBigQuery.IndexData bigQueryConfig, + ITEntityMain entityMain, SZEntity entity, SZAttributeSearch attributeSearch) { super(namer, bigQueryConfig); this.entity = entity.name; - // id + columns in tables optimized for search (clustered) cannot be repeated - List attrNames = new ArrayList<>(); - List attrSchemas = new ArrayList<>(); - - SZAttribute idAttribute = entity.getAttribute(entity.idAttribute); - attrSchemas.add( - new ColumnSchema( - idAttribute.name, ConfigReader.deserializeDataType(idAttribute.dataType), false, true)); + this.attributeNames = + ImmutableList.copyOf( + attributeSearch.attributes.stream() + .map(attribute -> entity.getAttribute(attribute).name) + .toList()); - attributeSearch.attributes.forEach( - attribute -> { - SZAttribute searchAttribute = entity.getAttribute(attribute); - attrNames.add(searchAttribute.name); - attrSchemas.add( - new ColumnSchema( - searchAttribute.name, - ConfigReader.deserializeDataType(searchAttribute.dataType), - false, - false)); - }); + List attrSchemas = new ArrayList<>(); + entityMain + .getColumnSchemas() + .forEach( + colSchema -> { + String colName = colSchema.getColumnName(); - this.attributeNames = ImmutableList.copyOf(attrNames); - this.includeNullValues = attributeSearch.includeNullValues; + if (entity.idAttribute.equals(colName) || (attributeNames.contains(colName))) { + // id + attr in tables optimized for search (clustered) cannot be repeated + attrSchemas.add( + new ColumnSchema( + colName, colSchema.getDataType(), false, colSchema.isRequired())); + } else if (attributeSearch.includeEntityMainColumns) { + attrSchemas.add( + new ColumnSchema( + colName, + colSchema.getDataType(), + colSchema.isDataTypeRepeated(), + colSchema.isRequired())); + } + }); this.columnSchemas = ImmutableList.copyOf(attrSchemas); + this.includeNullValues = attributeSearch.includeNullValues; + this.includeEntityMainColumns = attributeSearch.includeEntityMainColumns; } @Override @@ -74,4 +79,8 @@ public ImmutableList getAttributeNames() { public boolean includeNullValues() { return includeNullValues; } + + public boolean includeEntityMainColumns() { + return includeEntityMainColumns; + } } diff --git a/underlay/src/main/java/bio/terra/tanagra/underlay/indextable/IndexTable.java b/underlay/src/main/java/bio/terra/tanagra/underlay/indextable/IndexTable.java index 1c7ee6b00..391549359 100644 --- a/underlay/src/main/java/bio/terra/tanagra/underlay/indextable/IndexTable.java +++ b/underlay/src/main/java/bio/terra/tanagra/underlay/indextable/IndexTable.java @@ -8,6 +8,7 @@ import bio.terra.tanagra.underlay.indextable.ITEntityMain.ColumnTemplate; import bio.terra.tanagra.underlay.serialization.SZBigQuery; import com.google.common.collect.ImmutableList; +import java.util.List; public abstract class IndexTable { private final NameHelper namer; @@ -39,6 +40,10 @@ public BQTable getTablePointer() { public abstract ImmutableList getColumnSchemas(); + public List getColumnNames() { + return getColumnSchemas().stream().map(ColumnSchema::getColumnName).toList(); + } + public boolean isGeneratedIndexTable() { return true; } diff --git a/underlay/src/main/java/bio/terra/tanagra/underlay/serialization/SZAttributeSearch.java b/underlay/src/main/java/bio/terra/tanagra/underlay/serialization/SZAttributeSearch.java index 38b6b477e..879733875 100644 --- a/underlay/src/main/java/bio/terra/tanagra/underlay/serialization/SZAttributeSearch.java +++ b/underlay/src/main/java/bio/terra/tanagra/underlay/serialization/SZAttributeSearch.java @@ -25,4 +25,14 @@ public class SZAttributeSearch { optional = true, defaultValue = "false") public boolean includeNullValues; + + @AnnotatedField( + name = "SZAttributeSearch.includeEntityMainColumns", + markdown = + "Whether all columns in the entity main table should also be included " + + "in this search table. Improves performance if other attributes are also fetched " + + "when performing this search by attributes.", + optional = true, + defaultValue = "false") + public boolean includeEntityMainColumns; } diff --git a/underlay/src/main/java/bio/terra/tanagra/utils/GoogleBigQuery.java b/underlay/src/main/java/bio/terra/tanagra/utils/GoogleBigQuery.java index 091021862..bac51ad32 100644 --- a/underlay/src/main/java/bio/terra/tanagra/utils/GoogleBigQuery.java +++ b/underlay/src/main/java/bio/terra/tanagra/utils/GoogleBigQuery.java @@ -12,6 +12,7 @@ import com.google.cloud.bigquery.DatasetId; import com.google.cloud.bigquery.ExtractJobConfiguration; import com.google.cloud.bigquery.Job; +import com.google.cloud.bigquery.JobId; import com.google.cloud.bigquery.JobInfo; import com.google.cloud.bigquery.JobStatistics; import com.google.cloud.bigquery.QueryJobConfiguration; @@ -252,23 +253,24 @@ public TableResult runQuery( destinationTable, clustering, queryTimeout); + JobId jobId = JobId.of(); + LOGGER.info("BQ SQL run: jobId: {}, sql: {}", jobId.getJob(), sql); return callWithRetries( () -> { - Job job = bigQuery.create(JobInfo.newBuilder(queryJobConfig.getLeft()).build()); + Job job = + bigQuery.create(JobInfo.newBuilder(queryJobConfig.getLeft()).setJobId(jobId).build()); TableResult tableResult = job.getQueryResults( queryJobConfig.getRight().toArray(new BigQuery.QueryResultsOption[0])); Job completedJob = job.waitFor(); JobStatistics.QueryStatistics stats = completedJob.getStatistics(); - Long totalBytesProcessed = stats.getTotalBytesProcessed(); - String jobId = completedJob.getJobId().getJob(); LOGGER.info( - "BQ job: {}, total rows: {}, cache hit: {}, total data processed: {}MB, sql: {}", - jobId, + "BQ SQL run stats: jobId={}, totalRows={}, cacheHit={}, totalMegaBytesProcessed={}, totalSlotMs={}", + jobId.getJob(), tableResult.getTotalRows(), stats.getCacheHit(), - totalBytesProcessed / 1_048_576, - sql); + stats.getTotalBytesProcessed() / 1_048_576, + stats.getTotalSlotMs()); return tableResult; }, "Error running query: " + queryJobConfig.getLeft().getQuery()); @@ -284,17 +286,20 @@ public JobStatistics.QueryStatistics dryRunQuery( Pair> queryJobConfig = buildQueryJobConfig( sql, true, queryParams, pageToken, pageSize, destinationTable, clustering, null); + JobId jobId = JobId.of(); + LOGGER.info("BQ SQL dry run sql: jobId: {}, sql: {}", jobId.getJob(), sql); return callWithRetries( () -> { - Job job = bigQuery.create(JobInfo.newBuilder(queryJobConfig.getLeft()).build()); - JobStatistics.QueryStatistics queryStatistics = job.getStatistics(); + Job job = + bigQuery.create(JobInfo.newBuilder(queryJobConfig.getLeft()).setJobId(jobId).build()); + JobStatistics.QueryStatistics stats = job.getStatistics(); LOGGER.info( - "SQL dry run: statementType={}, cacheHit={}, totalBytesProcessed={}, totalSlotMs={}", - queryStatistics.getStatementType(), - queryStatistics.getCacheHit(), - queryStatistics.getTotalBytesProcessed(), - queryStatistics.getTotalSlotMs()); - return queryStatistics; + "BQ SQL dry run stats: statementType={}, cacheHit={}, totalMegaBytesProcessed={}, totalSlotMs={}", + stats.getStatementType(), + stats.getCacheHit(), + stats.getTotalBytesProcessed() / 1_048_576, + stats.getTotalSlotMs()); + return stats; }, "Error getting job statistics for query: " + queryJobConfig.getLeft().getQuery()); } diff --git a/underlay/src/main/resources/config/datamapping/aouRT/entity/conditionOccurrence/entity.json b/underlay/src/main/resources/config/datamapping/aouRT/entity/conditionOccurrence/entity.json index aa4aee970..513cb3ca6 100644 --- a/underlay/src/main/resources/config/datamapping/aouRT/entity/conditionOccurrence/entity.json +++ b/underlay/src/main/resources/config/datamapping/aouRT/entity/conditionOccurrence/entity.json @@ -77,6 +77,9 @@ ], "idAttribute": "id", "optimizeGroupByAttributes": [ "condition_concept_id" ], + "optimizeSearchByAttributes": [ + { "attributes": [ "person_id" ], "includeEntityMainColumns": true } + ], "temporalQuery": { "visitDateAttribute": "condition_start_datetime", "visitIdAttribute": "visit_occurrence_id" diff --git a/underlay/src/main/resources/config/datamapping/aouRT/entity/ingredientOccurrence/entity.json b/underlay/src/main/resources/config/datamapping/aouRT/entity/ingredientOccurrence/entity.json index 186938da1..7e8614294 100644 --- a/underlay/src/main/resources/config/datamapping/aouRT/entity/ingredientOccurrence/entity.json +++ b/underlay/src/main/resources/config/datamapping/aouRT/entity/ingredientOccurrence/entity.json @@ -84,6 +84,9 @@ ], "idAttribute": "id", "optimizeGroupByAttributes": [ "drug_concept_id" ], + "optimizeSearchByAttributes": [ + { "attributes": [ "person_id" ], "includeEntityMainColumns": true } + ], "temporalQuery": { "visitDateAttribute": "drug_exposure_start_datetime", "visitIdAttribute": "visit_occurrence_id" diff --git a/underlay/src/main/resources/config/datamapping/aouRT/entity/measurementOccurrence/entity.json b/underlay/src/main/resources/config/datamapping/aouRT/entity/measurementOccurrence/entity.json index d10c81959..ccdbec6d1 100644 --- a/underlay/src/main/resources/config/datamapping/aouRT/entity/measurementOccurrence/entity.json +++ b/underlay/src/main/resources/config/datamapping/aouRT/entity/measurementOccurrence/entity.json @@ -97,6 +97,9 @@ ], "idAttribute": "id", "optimizeGroupByAttributes": [ "measurement_concept_id" ], + "optimizeSearchByAttributes": [ + { "attributes": [ "person_id" ], "includeEntityMainColumns": true } + ], "temporalQuery": { "visitDateAttribute": "measurement_datetime", "visitIdAttribute": "visit_occurrence_id" diff --git a/underlay/src/main/resources/config/datamapping/aouRT/entity/observationOccurrence/entity.json b/underlay/src/main/resources/config/datamapping/aouRT/entity/observationOccurrence/entity.json index a7515d127..152c45bb6 100644 --- a/underlay/src/main/resources/config/datamapping/aouRT/entity/observationOccurrence/entity.json +++ b/underlay/src/main/resources/config/datamapping/aouRT/entity/observationOccurrence/entity.json @@ -97,6 +97,9 @@ ], "idAttribute": "id", "optimizeGroupByAttributes": [ "observation_concept_id" ], + "optimizeSearchByAttributes": [ + { "attributes": [ "person_id" ], "includeEntityMainColumns": true } + ], "temporalQuery": { "visitDateAttribute": "observation_datetime", "visitIdAttribute": "visit_occurrence_id" diff --git a/underlay/src/main/resources/config/datamapping/aouRT/entity/procedureOccurrence/entity.json b/underlay/src/main/resources/config/datamapping/aouRT/entity/procedureOccurrence/entity.json index 61ba2337b..1647687c6 100644 --- a/underlay/src/main/resources/config/datamapping/aouRT/entity/procedureOccurrence/entity.json +++ b/underlay/src/main/resources/config/datamapping/aouRT/entity/procedureOccurrence/entity.json @@ -76,6 +76,9 @@ ], "idAttribute": "id", "optimizeGroupByAttributes": [ "procedure_concept_id" ], + "optimizeSearchByAttributes": [ + { "attributes": [ "person_id" ], "includeEntityMainColumns": true } + ], "temporalQuery": { "visitDateAttribute": "procedure_datetime", "visitIdAttribute": "visit_occurrence_id"