From 063600b9fc682fca2d665c8c11bfcd42b804ae8a Mon Sep 17 00:00:00 2001 From: Brian Freeman Date: Tue, 21 Jan 2025 11:12:24 -0600 Subject: [PATCH] DT-323 Update variant configs (#1138) * DT-323 adding config for variants * updating config for variant search --- .../criteriaselector/variant/selector.json | 15 ++ .../criteriaselector/variant/variant.json | 137 ++++++++++++++++++ .../datamapping/aouCT/entity/variant/all.sql | 49 +++++++ .../aouCT/entity/variant/entity.json | 24 +++ .../variantPerson/entityGroup.json | 14 ++ .../entitygroup/variantPerson/idPairs.sql | 3 + .../variantPerson/rollupCounts.sql | 3 + .../underlay/aouC2024Q3R4/underlay.json | 9 +- .../underlay/aouSC2023Q3R2/underlay.json | 9 +- 9 files changed, 259 insertions(+), 4 deletions(-) create mode 100644 underlay/src/main/resources/config/criteria/aouCT/criteriaselector/variant/selector.json create mode 100644 underlay/src/main/resources/config/criteria/aouCT/criteriaselector/variant/variant.json create mode 100644 underlay/src/main/resources/config/datamapping/aouCT/entity/variant/all.sql create mode 100644 underlay/src/main/resources/config/datamapping/aouCT/entity/variant/entity.json create mode 100644 underlay/src/main/resources/config/datamapping/aouCT/entitygroup/variantPerson/entityGroup.json create mode 100644 underlay/src/main/resources/config/datamapping/aouCT/entitygroup/variantPerson/idPairs.sql create mode 100644 underlay/src/main/resources/config/datamapping/aouCT/entitygroup/variantPerson/rollupCounts.sql diff --git a/underlay/src/main/resources/config/criteria/aouCT/criteriaselector/variant/selector.json b/underlay/src/main/resources/config/criteria/aouCT/criteriaselector/variant/selector.json new file mode 100644 index 000000000..6834b3f0f --- /dev/null +++ b/underlay/src/main/resources/config/criteria/aouCT/criteriaselector/variant/selector.json @@ -0,0 +1,15 @@ +{ + "name": "tanagra-variant", + "displayName": "SNP/Indel Variant", + "isEnabledForCohorts": true, + "isEnabledForDataFeatureSets": false, + "display": { + "category": "Genomics", + "tags": null + }, + "filterBuilder": "core.FilterableGroupFilterBuilder", + "plugin": "filterableGroup", + "pluginConfig": null, + "pluginConfigFile": "variant.json", + "modifiers": null +} diff --git a/underlay/src/main/resources/config/criteria/aouCT/criteriaselector/variant/variant.json b/underlay/src/main/resources/config/criteria/aouCT/criteriaselector/variant/variant.json new file mode 100644 index 000000000..702c416a3 --- /dev/null +++ b/underlay/src/main/resources/config/criteria/aouCT/criteriaselector/variant/variant.json @@ -0,0 +1,137 @@ +{ + "columns": [ + { + "key": "id", + "widthString": "100%", + "title": "Variant id" + }, + { + "key": "gene", + "widthDouble": 100, + "title": "Gene" + }, + { + "key": "rs_number", + "widthDouble": 100, + "title": "RS number" + }, + { + "key": "consequence", + "widthDouble": 100, + "title": "Consequence" + }, + { + "key": "clinvar_significance", + "widthDouble": 100, + "title": "ClinVar significance" + }, + { + "key": "protein_change", + "widthDouble": 100, + "title": "Protein change" + }, + { + "key": "allele_count", + "widthDouble": 100, + "title": "Allele count" + }, + { + "key": "allele_number", + "widthDouble": 100, + "title": "Allele number" + }, + { + "key": "allele_frequency", + "widthDouble": 100, + "title": "Allele frequency" + }, + { + "key": "t_item_count", + "widthDouble": 150, + "title": "Participant count" + } + ], + "entityGroup": "variantPerson", + "valueConfigs": [ + { + "attribute": "gene", + "title": "Gene" + }, + { + "attribute": "consequence", + "title": "Consequence" + }, + { + "attribute": "clinvar_significance", + "title": "ClinVar significance" + }, + { + "attribute": "allele_count", + "title": "Allele count" + }, + { + "attribute": "allele_number", + "title": "Allele number" + }, + { + "attribute": "allele_frequency", + "title": "Allele frequency" + } + ], + "searchConfigs": [ + { + "name": "RS number", + "example": "rs558865434", + "regex": "rs\\d+", + "parameters": [ + { + "attribute": "rs_number", + "operator": "OPERATOR_EQUALS" + } + ] + }, + { + "name": "Variant id", + "example": "20-38623282-G-A", + "regex": "\\d+-\\d+-\\w+-\\w+", + "parameters": [ + { + "attribute": "id", + "operator": "OPERATOR_EQUALS" + } + ] + }, + { + "name": "Genomic region", + "example": "chr20:38623000-38623379", + "regex": "(\\w+):(\\d+)-(\\d+)", + "parameters": [ + { + "attribute": "contig", + "operator": "OPERATOR_EQUALS" + }, + { + "attribute": "position", + "operator": "OPERATOR_GREATER_THAN_OR_EQUAL" + }, + { + "attribute": "position", + "operator": "OPERATOR_LESS_THAN_OR_EQUAL" + } + ] + }, + { + "name": "Gene", + "example": "WFDC2", + "regex": "\\w+", + "displayOrder": -1, + "parameters": [ + { + "attribute": "gene", + "operator": "OPERATOR_EQUALS", + "case": "CASE_UPPER" + } + ] + } + ] +} diff --git a/underlay/src/main/resources/config/datamapping/aouCT/entity/variant/all.sql b/underlay/src/main/resources/config/datamapping/aouCT/entity/variant/all.sql new file mode 100644 index 000000000..1d119813a --- /dev/null +++ b/underlay/src/main/resources/config/datamapping/aouCT/entity/variant/all.sql @@ -0,0 +1,49 @@ +WITH sorted_transcripts AS ( + SELECT vid, + consequence, + aa_change, + contig, + position, + ref_allele, + alt_allele, + dbsnp_rsid, + transcript, + dna_change_in_transcript, + clinvar_classification, + gvs_all_ac, + gvs_all_an, + gvs_all_af, + ROW_NUMBER() OVER( + PARTITION BY vid ORDER BY + CASE ARRAY_TO_STRING(consequence, ', ') + WHEN 'upstream_gene_variant' + THEN 4 + WHEN 'downstream_gene_variant' + THEN 5 + ELSE 1 + END) AS row_number + FROM `${omopDataset}.prep_vat` + WHERE is_canonical_transcript OR transcript IS NULL + ORDER BY vid, row_number), + + genes AS ( + SELECT vid, ARRAY_AGG(DISTINCT gene_symbol IGNORE NULLS ORDER BY gene_symbol) AS genes + FROM `${omopDataset}.prep_vat` + GROUP BY vid + ) + +SELECT + sorted_transcripts.vid, + genes.genes as gene_symbol, + sorted_transcripts.dbsnp_rsid, + sorted_transcripts.consequence, + sorted_transcripts.aa_change, + sorted_transcripts.clinvar_classification, + sorted_transcripts.gvs_all_ac, + sorted_transcripts.gvs_all_an, + sorted_transcripts.gvs_all_af, + sorted_transcripts.contig, + sorted_transcripts.position +FROM sorted_transcripts, genes +WHERE genes.vid = sorted_transcripts.vid + AND (sorted_transcripts.row_number =1 or sorted_transcripts.transcript is NULL) diff --git a/underlay/src/main/resources/config/datamapping/aouCT/entity/variant/entity.json b/underlay/src/main/resources/config/datamapping/aouCT/entity/variant/entity.json new file mode 100644 index 000000000..a7d37b306 --- /dev/null +++ b/underlay/src/main/resources/config/datamapping/aouCT/entity/variant/entity.json @@ -0,0 +1,24 @@ +{ + "name": "variant", + "allInstancesSqlFile": "all.sql", + "attributes": [ + { "name": "id", "dataType": "STRING", "valueFieldName": "vid" }, + { "name": "gene", "dataType": "STRING", "isDataTypeRepeated": true, "valueFieldName": "gene_symbol", "isComputeDisplayHint": true }, + { "name": "rs_number", "dataType": "STRING", "isDataTypeRepeated": true, "valueFieldName": "dbsnp_rsid" }, + { "name": "consequence", "dataType": "STRING", "isDataTypeRepeated": true, "isComputeDisplayHint": true }, + { "name": "protein_change", "dataType": "STRING", "valueFieldName": "aa_change" }, + { "name": "clinvar_significance", "dataType": "STRING", "isDataTypeRepeated": true, "valueFieldName": "clinvar_classification", "isComputeDisplayHint": true }, + { "name": "allele_count", "dataType": "INT64", "valueFieldName": "gvs_all_ac", "isComputeDisplayHint": true }, + { "name": "allele_number", "dataType": "INT64", "valueFieldName": "gvs_all_an", "isComputeDisplayHint": true }, + { "name": "allele_frequency", "dataType": "DOUBLE", "valueFieldName": "gvs_all_af", "isComputeDisplayHint": true }, + { "name": "contig", "dataType": "STRING" }, + { "name": "position", "dataType": "INT64" } + ], + "idAttribute": "id", + "optimizeGroupByAttributes": [ "id" ], + "optimizeSearchByAttributes": [ + { "attributes": [ "gene" ] }, + { "attributes": [ "rs_number" ] }, + { "attributes": [ "contig", "position" ] } + ] +} \ No newline at end of file diff --git a/underlay/src/main/resources/config/datamapping/aouCT/entitygroup/variantPerson/entityGroup.json b/underlay/src/main/resources/config/datamapping/aouCT/entitygroup/variantPerson/entityGroup.json new file mode 100644 index 000000000..c39900630 --- /dev/null +++ b/underlay/src/main/resources/config/datamapping/aouCT/entitygroup/variantPerson/entityGroup.json @@ -0,0 +1,14 @@ +{ + "name": "variantPerson", + "groupEntity": "variant", + "itemsEntity": "person", + "idPairsSqlFile": "idPairs.sql", + "useSourceIdPairsSql": true, + "groupEntityIdFieldName": "vid", + "itemsEntityIdFieldName": "flattened_person_id", + "rollupCountsSql": { + "sqlFile": "rollupCounts.sql", + "entityIdFieldName": "vid", + "rollupCountFieldName": "num_persons" + } +} diff --git a/underlay/src/main/resources/config/datamapping/aouCT/entitygroup/variantPerson/idPairs.sql b/underlay/src/main/resources/config/datamapping/aouCT/entitygroup/variantPerson/idPairs.sql new file mode 100644 index 000000000..2c2a98d83 --- /dev/null +++ b/underlay/src/main/resources/config/datamapping/aouCT/entitygroup/variantPerson/idPairs.sql @@ -0,0 +1,3 @@ +SELECT DISTINCT vid, flattened_person_id +FROM `${omopDataset}.cb_variant_to_person` +CROSS JOIN UNNEST(person_ids) AS flattened_person_id diff --git a/underlay/src/main/resources/config/datamapping/aouCT/entitygroup/variantPerson/rollupCounts.sql b/underlay/src/main/resources/config/datamapping/aouCT/entitygroup/variantPerson/rollupCounts.sql new file mode 100644 index 000000000..92a852ce7 --- /dev/null +++ b/underlay/src/main/resources/config/datamapping/aouCT/entitygroup/variantPerson/rollupCounts.sql @@ -0,0 +1,3 @@ +SELECT vid, ARRAY_LENGTH(person_ids) AS num_persons +/* Wrap variant_to_person table in a SELECT DISTINCT because there is a duplicate row in the test data. */ +FROM (SELECT DISTINCT vid, person_ids FROM `${omopDataset}.cb_variant_to_person` WHERE REGEXP_CONTAINS(vid, r"{indexIdRegex}")) diff --git a/underlay/src/main/resources/config/underlay/aouC2024Q3R4/underlay.json b/underlay/src/main/resources/config/underlay/aouC2024Q3R4/underlay.json index efb1ceb05..5bb39fe90 100644 --- a/underlay/src/main/resources/config/underlay/aouC2024Q3R4/underlay.json +++ b/underlay/src/main/resources/config/underlay/aouC2024Q3R4/underlay.json @@ -59,7 +59,9 @@ "aouRT/surveySocialDeterminantsOfHealth", "aouRT/surveyCovidVaccine", "aouRT/surveyCope", - "aouRT/surveyOccurrence" + "aouRT/surveyOccurrence", + + "aouCT/variant" ], "groupItemsEntityGroups": [ "aouRT/brandIngredientConcept", @@ -79,7 +81,9 @@ "aouRT/weightPerson", "aouRT/bmiPerson", "aouRT/waistCircumferencePerson", - "aouRT/hipCircumferencePerson" + "aouRT/hipCircumferencePerson", + + "aouCT/variantPerson" ], "criteriaOccurrenceEntityGroups": [ "aouRT/conditionPerson", @@ -149,6 +153,7 @@ "aouCT/longReadWGS", "aouCT/globalDiversityArray", "aouCT/structuralVariants", + "aouCT/variant", "aouRT/hasPMData", "aouRT/bloodPressure", "aouRT/heartRate", diff --git a/underlay/src/main/resources/config/underlay/aouSC2023Q3R2/underlay.json b/underlay/src/main/resources/config/underlay/aouSC2023Q3R2/underlay.json index 2fe1ee8c2..132c6ea60 100644 --- a/underlay/src/main/resources/config/underlay/aouSC2023Q3R2/underlay.json +++ b/underlay/src/main/resources/config/underlay/aouSC2023Q3R2/underlay.json @@ -59,7 +59,9 @@ "aouRT/surveySocialDeterminantsOfHealth", "aouRT/surveyCovidVaccine", "aouRT/surveyCope", - "aouRT/surveyOccurrence" + "aouRT/surveyOccurrence", + + "aouCT/variant" ], "groupItemsEntityGroups": [ "aouRT/brandIngredientConcept", @@ -79,7 +81,9 @@ "aouRT/weightPerson", "aouRT/bmiPerson", "aouRT/waistCircumferencePerson", - "aouRT/hipCircumferencePerson" + "aouRT/hipCircumferencePerson", + + "aouCT/variantPerson" ], "criteriaOccurrenceEntityGroups": [ "aouRT/conditionPerson", @@ -149,6 +153,7 @@ "aouCT/longReadWGS", "aouCT/globalDiversityArray", "aouCT/structuralVariants", + "aouCT/variant", "aouRT/hasPMData", "aouRT/bloodPressure", "aouRT/heartRate",