-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
DT-323 Update variant configs (#1138)
* DT-323 adding config for variants * updating config for variant search
- Loading branch information
Showing
9 changed files
with
259 additions
and
4 deletions.
There are no files selected for viewing
15 changes: 15 additions & 0 deletions
15
underlay/src/main/resources/config/criteria/aouCT/criteriaselector/variant/selector.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
{ | ||
"name": "tanagra-variant", | ||
"displayName": "SNP/Indel Variant", | ||
"isEnabledForCohorts": true, | ||
"isEnabledForDataFeatureSets": false, | ||
"display": { | ||
"category": "Genomics", | ||
"tags": null | ||
}, | ||
"filterBuilder": "core.FilterableGroupFilterBuilder", | ||
"plugin": "filterableGroup", | ||
"pluginConfig": null, | ||
"pluginConfigFile": "variant.json", | ||
"modifiers": null | ||
} |
137 changes: 137 additions & 0 deletions
137
underlay/src/main/resources/config/criteria/aouCT/criteriaselector/variant/variant.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
{ | ||
"columns": [ | ||
{ | ||
"key": "id", | ||
"widthString": "100%", | ||
"title": "Variant id" | ||
}, | ||
{ | ||
"key": "gene", | ||
"widthDouble": 100, | ||
"title": "Gene" | ||
}, | ||
{ | ||
"key": "rs_number", | ||
"widthDouble": 100, | ||
"title": "RS number" | ||
}, | ||
{ | ||
"key": "consequence", | ||
"widthDouble": 100, | ||
"title": "Consequence" | ||
}, | ||
{ | ||
"key": "clinvar_significance", | ||
"widthDouble": 100, | ||
"title": "ClinVar significance" | ||
}, | ||
{ | ||
"key": "protein_change", | ||
"widthDouble": 100, | ||
"title": "Protein change" | ||
}, | ||
{ | ||
"key": "allele_count", | ||
"widthDouble": 100, | ||
"title": "Allele count" | ||
}, | ||
{ | ||
"key": "allele_number", | ||
"widthDouble": 100, | ||
"title": "Allele number" | ||
}, | ||
{ | ||
"key": "allele_frequency", | ||
"widthDouble": 100, | ||
"title": "Allele frequency" | ||
}, | ||
{ | ||
"key": "t_item_count", | ||
"widthDouble": 150, | ||
"title": "Participant count" | ||
} | ||
], | ||
"entityGroup": "variantPerson", | ||
"valueConfigs": [ | ||
{ | ||
"attribute": "gene", | ||
"title": "Gene" | ||
}, | ||
{ | ||
"attribute": "consequence", | ||
"title": "Consequence" | ||
}, | ||
{ | ||
"attribute": "clinvar_significance", | ||
"title": "ClinVar significance" | ||
}, | ||
{ | ||
"attribute": "allele_count", | ||
"title": "Allele count" | ||
}, | ||
{ | ||
"attribute": "allele_number", | ||
"title": "Allele number" | ||
}, | ||
{ | ||
"attribute": "allele_frequency", | ||
"title": "Allele frequency" | ||
} | ||
], | ||
"searchConfigs": [ | ||
{ | ||
"name": "RS number", | ||
"example": "rs558865434", | ||
"regex": "rs\\d+", | ||
"parameters": [ | ||
{ | ||
"attribute": "rs_number", | ||
"operator": "OPERATOR_EQUALS" | ||
} | ||
] | ||
}, | ||
{ | ||
"name": "Variant id", | ||
"example": "20-38623282-G-A", | ||
"regex": "\\d+-\\d+-\\w+-\\w+", | ||
"parameters": [ | ||
{ | ||
"attribute": "id", | ||
"operator": "OPERATOR_EQUALS" | ||
} | ||
] | ||
}, | ||
{ | ||
"name": "Genomic region", | ||
"example": "chr20:38623000-38623379", | ||
"regex": "(\\w+):(\\d+)-(\\d+)", | ||
"parameters": [ | ||
{ | ||
"attribute": "contig", | ||
"operator": "OPERATOR_EQUALS" | ||
}, | ||
{ | ||
"attribute": "position", | ||
"operator": "OPERATOR_GREATER_THAN_OR_EQUAL" | ||
}, | ||
{ | ||
"attribute": "position", | ||
"operator": "OPERATOR_LESS_THAN_OR_EQUAL" | ||
} | ||
] | ||
}, | ||
{ | ||
"name": "Gene", | ||
"example": "WFDC2", | ||
"regex": "\\w+", | ||
"displayOrder": -1, | ||
"parameters": [ | ||
{ | ||
"attribute": "gene", | ||
"operator": "OPERATOR_EQUALS", | ||
"case": "CASE_UPPER" | ||
} | ||
] | ||
} | ||
] | ||
} |
49 changes: 49 additions & 0 deletions
49
underlay/src/main/resources/config/datamapping/aouCT/entity/variant/all.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
WITH sorted_transcripts AS ( | ||
SELECT vid, | ||
consequence, | ||
aa_change, | ||
contig, | ||
position, | ||
ref_allele, | ||
alt_allele, | ||
dbsnp_rsid, | ||
transcript, | ||
dna_change_in_transcript, | ||
clinvar_classification, | ||
gvs_all_ac, | ||
gvs_all_an, | ||
gvs_all_af, | ||
ROW_NUMBER() OVER( | ||
PARTITION BY vid ORDER BY | ||
CASE ARRAY_TO_STRING(consequence, ', ') | ||
WHEN 'upstream_gene_variant' | ||
THEN 4 | ||
WHEN 'downstream_gene_variant' | ||
THEN 5 | ||
ELSE 1 | ||
END) AS row_number | ||
FROM `${omopDataset}.prep_vat` | ||
WHERE is_canonical_transcript OR transcript IS NULL | ||
ORDER BY vid, row_number), | ||
|
||
genes AS ( | ||
SELECT vid, ARRAY_AGG(DISTINCT gene_symbol IGNORE NULLS ORDER BY gene_symbol) AS genes | ||
FROM `${omopDataset}.prep_vat` | ||
GROUP BY vid | ||
) | ||
|
||
SELECT | ||
sorted_transcripts.vid, | ||
genes.genes as gene_symbol, | ||
sorted_transcripts.dbsnp_rsid, | ||
sorted_transcripts.consequence, | ||
sorted_transcripts.aa_change, | ||
sorted_transcripts.clinvar_classification, | ||
sorted_transcripts.gvs_all_ac, | ||
sorted_transcripts.gvs_all_an, | ||
sorted_transcripts.gvs_all_af, | ||
sorted_transcripts.contig, | ||
sorted_transcripts.position | ||
FROM sorted_transcripts, genes | ||
WHERE genes.vid = sorted_transcripts.vid | ||
AND (sorted_transcripts.row_number =1 or sorted_transcripts.transcript is NULL) |
24 changes: 24 additions & 0 deletions
24
underlay/src/main/resources/config/datamapping/aouCT/entity/variant/entity.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
{ | ||
"name": "variant", | ||
"allInstancesSqlFile": "all.sql", | ||
"attributes": [ | ||
{ "name": "id", "dataType": "STRING", "valueFieldName": "vid" }, | ||
{ "name": "gene", "dataType": "STRING", "isDataTypeRepeated": true, "valueFieldName": "gene_symbol", "isComputeDisplayHint": true }, | ||
{ "name": "rs_number", "dataType": "STRING", "isDataTypeRepeated": true, "valueFieldName": "dbsnp_rsid" }, | ||
{ "name": "consequence", "dataType": "STRING", "isDataTypeRepeated": true, "isComputeDisplayHint": true }, | ||
{ "name": "protein_change", "dataType": "STRING", "valueFieldName": "aa_change" }, | ||
{ "name": "clinvar_significance", "dataType": "STRING", "isDataTypeRepeated": true, "valueFieldName": "clinvar_classification", "isComputeDisplayHint": true }, | ||
{ "name": "allele_count", "dataType": "INT64", "valueFieldName": "gvs_all_ac", "isComputeDisplayHint": true }, | ||
{ "name": "allele_number", "dataType": "INT64", "valueFieldName": "gvs_all_an", "isComputeDisplayHint": true }, | ||
{ "name": "allele_frequency", "dataType": "DOUBLE", "valueFieldName": "gvs_all_af", "isComputeDisplayHint": true }, | ||
{ "name": "contig", "dataType": "STRING" }, | ||
{ "name": "position", "dataType": "INT64" } | ||
], | ||
"idAttribute": "id", | ||
"optimizeGroupByAttributes": [ "id" ], | ||
"optimizeSearchByAttributes": [ | ||
{ "attributes": [ "gene" ] }, | ||
{ "attributes": [ "rs_number" ] }, | ||
{ "attributes": [ "contig", "position" ] } | ||
] | ||
} |
14 changes: 14 additions & 0 deletions
14
...ay/src/main/resources/config/datamapping/aouCT/entitygroup/variantPerson/entityGroup.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
{ | ||
"name": "variantPerson", | ||
"groupEntity": "variant", | ||
"itemsEntity": "person", | ||
"idPairsSqlFile": "idPairs.sql", | ||
"useSourceIdPairsSql": true, | ||
"groupEntityIdFieldName": "vid", | ||
"itemsEntityIdFieldName": "flattened_person_id", | ||
"rollupCountsSql": { | ||
"sqlFile": "rollupCounts.sql", | ||
"entityIdFieldName": "vid", | ||
"rollupCountFieldName": "num_persons" | ||
} | ||
} |
3 changes: 3 additions & 0 deletions
3
underlay/src/main/resources/config/datamapping/aouCT/entitygroup/variantPerson/idPairs.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
SELECT DISTINCT vid, flattened_person_id | ||
FROM `${omopDataset}.cb_variant_to_person` | ||
CROSS JOIN UNNEST(person_ids) AS flattened_person_id |
3 changes: 3 additions & 0 deletions
3
...ay/src/main/resources/config/datamapping/aouCT/entitygroup/variantPerson/rollupCounts.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
SELECT vid, ARRAY_LENGTH(person_ids) AS num_persons | ||
/* Wrap variant_to_person table in a SELECT DISTINCT because there is a duplicate row in the test data. */ | ||
FROM (SELECT DISTINCT vid, person_ids FROM `${omopDataset}.cb_variant_to_person` WHERE REGEXP_CONTAINS(vid, r"{indexIdRegex}")) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters