Skip to content

Commit

Permalink
DT-323 Update variant configs (#1138)
Browse files Browse the repository at this point in the history
* DT-323 adding config for variants

* updating config for variant search
  • Loading branch information
freemabd authored Jan 21, 2025
1 parent 7eac2f0 commit 063600b
Show file tree
Hide file tree
Showing 9 changed files with 259 additions and 4 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"name": "tanagra-variant",
"displayName": "SNP/Indel Variant",
"isEnabledForCohorts": true,
"isEnabledForDataFeatureSets": false,
"display": {
"category": "Genomics",
"tags": null
},
"filterBuilder": "core.FilterableGroupFilterBuilder",
"plugin": "filterableGroup",
"pluginConfig": null,
"pluginConfigFile": "variant.json",
"modifiers": null
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
{
"columns": [
{
"key": "id",
"widthString": "100%",
"title": "Variant id"
},
{
"key": "gene",
"widthDouble": 100,
"title": "Gene"
},
{
"key": "rs_number",
"widthDouble": 100,
"title": "RS number"
},
{
"key": "consequence",
"widthDouble": 100,
"title": "Consequence"
},
{
"key": "clinvar_significance",
"widthDouble": 100,
"title": "ClinVar significance"
},
{
"key": "protein_change",
"widthDouble": 100,
"title": "Protein change"
},
{
"key": "allele_count",
"widthDouble": 100,
"title": "Allele count"
},
{
"key": "allele_number",
"widthDouble": 100,
"title": "Allele number"
},
{
"key": "allele_frequency",
"widthDouble": 100,
"title": "Allele frequency"
},
{
"key": "t_item_count",
"widthDouble": 150,
"title": "Participant count"
}
],
"entityGroup": "variantPerson",
"valueConfigs": [
{
"attribute": "gene",
"title": "Gene"
},
{
"attribute": "consequence",
"title": "Consequence"
},
{
"attribute": "clinvar_significance",
"title": "ClinVar significance"
},
{
"attribute": "allele_count",
"title": "Allele count"
},
{
"attribute": "allele_number",
"title": "Allele number"
},
{
"attribute": "allele_frequency",
"title": "Allele frequency"
}
],
"searchConfigs": [
{
"name": "RS number",
"example": "rs558865434",
"regex": "rs\\d+",
"parameters": [
{
"attribute": "rs_number",
"operator": "OPERATOR_EQUALS"
}
]
},
{
"name": "Variant id",
"example": "20-38623282-G-A",
"regex": "\\d+-\\d+-\\w+-\\w+",
"parameters": [
{
"attribute": "id",
"operator": "OPERATOR_EQUALS"
}
]
},
{
"name": "Genomic region",
"example": "chr20:38623000-38623379",
"regex": "(\\w+):(\\d+)-(\\d+)",
"parameters": [
{
"attribute": "contig",
"operator": "OPERATOR_EQUALS"
},
{
"attribute": "position",
"operator": "OPERATOR_GREATER_THAN_OR_EQUAL"
},
{
"attribute": "position",
"operator": "OPERATOR_LESS_THAN_OR_EQUAL"
}
]
},
{
"name": "Gene",
"example": "WFDC2",
"regex": "\\w+",
"displayOrder": -1,
"parameters": [
{
"attribute": "gene",
"operator": "OPERATOR_EQUALS",
"case": "CASE_UPPER"
}
]
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
WITH sorted_transcripts AS (
SELECT vid,
consequence,
aa_change,
contig,
position,
ref_allele,
alt_allele,
dbsnp_rsid,
transcript,
dna_change_in_transcript,
clinvar_classification,
gvs_all_ac,
gvs_all_an,
gvs_all_af,
ROW_NUMBER() OVER(
PARTITION BY vid ORDER BY
CASE ARRAY_TO_STRING(consequence, ', ')
WHEN 'upstream_gene_variant'
THEN 4
WHEN 'downstream_gene_variant'
THEN 5
ELSE 1
END) AS row_number
FROM `${omopDataset}.prep_vat`
WHERE is_canonical_transcript OR transcript IS NULL
ORDER BY vid, row_number),

genes AS (
SELECT vid, ARRAY_AGG(DISTINCT gene_symbol IGNORE NULLS ORDER BY gene_symbol) AS genes
FROM `${omopDataset}.prep_vat`
GROUP BY vid
)

SELECT
sorted_transcripts.vid,
genes.genes as gene_symbol,
sorted_transcripts.dbsnp_rsid,
sorted_transcripts.consequence,
sorted_transcripts.aa_change,
sorted_transcripts.clinvar_classification,
sorted_transcripts.gvs_all_ac,
sorted_transcripts.gvs_all_an,
sorted_transcripts.gvs_all_af,
sorted_transcripts.contig,
sorted_transcripts.position
FROM sorted_transcripts, genes
WHERE genes.vid = sorted_transcripts.vid
AND (sorted_transcripts.row_number =1 or sorted_transcripts.transcript is NULL)
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"name": "variant",
"allInstancesSqlFile": "all.sql",
"attributes": [
{ "name": "id", "dataType": "STRING", "valueFieldName": "vid" },
{ "name": "gene", "dataType": "STRING", "isDataTypeRepeated": true, "valueFieldName": "gene_symbol", "isComputeDisplayHint": true },
{ "name": "rs_number", "dataType": "STRING", "isDataTypeRepeated": true, "valueFieldName": "dbsnp_rsid" },
{ "name": "consequence", "dataType": "STRING", "isDataTypeRepeated": true, "isComputeDisplayHint": true },
{ "name": "protein_change", "dataType": "STRING", "valueFieldName": "aa_change" },
{ "name": "clinvar_significance", "dataType": "STRING", "isDataTypeRepeated": true, "valueFieldName": "clinvar_classification", "isComputeDisplayHint": true },
{ "name": "allele_count", "dataType": "INT64", "valueFieldName": "gvs_all_ac", "isComputeDisplayHint": true },
{ "name": "allele_number", "dataType": "INT64", "valueFieldName": "gvs_all_an", "isComputeDisplayHint": true },
{ "name": "allele_frequency", "dataType": "DOUBLE", "valueFieldName": "gvs_all_af", "isComputeDisplayHint": true },
{ "name": "contig", "dataType": "STRING" },
{ "name": "position", "dataType": "INT64" }
],
"idAttribute": "id",
"optimizeGroupByAttributes": [ "id" ],
"optimizeSearchByAttributes": [
{ "attributes": [ "gene" ] },
{ "attributes": [ "rs_number" ] },
{ "attributes": [ "contig", "position" ] }
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"name": "variantPerson",
"groupEntity": "variant",
"itemsEntity": "person",
"idPairsSqlFile": "idPairs.sql",
"useSourceIdPairsSql": true,
"groupEntityIdFieldName": "vid",
"itemsEntityIdFieldName": "flattened_person_id",
"rollupCountsSql": {
"sqlFile": "rollupCounts.sql",
"entityIdFieldName": "vid",
"rollupCountFieldName": "num_persons"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SELECT DISTINCT vid, flattened_person_id
FROM `${omopDataset}.cb_variant_to_person`
CROSS JOIN UNNEST(person_ids) AS flattened_person_id
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SELECT vid, ARRAY_LENGTH(person_ids) AS num_persons
/* Wrap variant_to_person table in a SELECT DISTINCT because there is a duplicate row in the test data. */
FROM (SELECT DISTINCT vid, person_ids FROM `${omopDataset}.cb_variant_to_person` WHERE REGEXP_CONTAINS(vid, r"{indexIdRegex}"))
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,9 @@
"aouRT/surveySocialDeterminantsOfHealth",
"aouRT/surveyCovidVaccine",
"aouRT/surveyCope",
"aouRT/surveyOccurrence"
"aouRT/surveyOccurrence",

"aouCT/variant"
],
"groupItemsEntityGroups": [
"aouRT/brandIngredientConcept",
Expand All @@ -79,7 +81,9 @@
"aouRT/weightPerson",
"aouRT/bmiPerson",
"aouRT/waistCircumferencePerson",
"aouRT/hipCircumferencePerson"
"aouRT/hipCircumferencePerson",

"aouCT/variantPerson"
],
"criteriaOccurrenceEntityGroups": [
"aouRT/conditionPerson",
Expand Down Expand Up @@ -149,6 +153,7 @@
"aouCT/longReadWGS",
"aouCT/globalDiversityArray",
"aouCT/structuralVariants",
"aouCT/variant",
"aouRT/hasPMData",
"aouRT/bloodPressure",
"aouRT/heartRate",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,9 @@
"aouRT/surveySocialDeterminantsOfHealth",
"aouRT/surveyCovidVaccine",
"aouRT/surveyCope",
"aouRT/surveyOccurrence"
"aouRT/surveyOccurrence",

"aouCT/variant"
],
"groupItemsEntityGroups": [
"aouRT/brandIngredientConcept",
Expand All @@ -79,7 +81,9 @@
"aouRT/weightPerson",
"aouRT/bmiPerson",
"aouRT/waistCircumferencePerson",
"aouRT/hipCircumferencePerson"
"aouRT/hipCircumferencePerson",

"aouCT/variantPerson"
],
"criteriaOccurrenceEntityGroups": [
"aouRT/conditionPerson",
Expand Down Expand Up @@ -149,6 +153,7 @@
"aouCT/longReadWGS",
"aouCT/globalDiversityArray",
"aouCT/structuralVariants",
"aouCT/variant",
"aouRT/hasPMData",
"aouRT/bloodPressure",
"aouRT/heartRate",
Expand Down

0 comments on commit 063600b

Please sign in to comment.