Skip to content

Commit

Permalink
Merge pull request #323 from bioinfo-chru-strasbourg/fix_rename_info_…
Browse files Browse the repository at this point in the history
…tags

fix info/tags rename #307, docs #4
  • Loading branch information
antonylebechec authored Dec 9, 2024
2 parents c159f13 + 81498ee commit bb96703
Show file tree
Hide file tree
Showing 16 changed files with 247 additions and 221 deletions.
Binary file modified README.pdf
Binary file not shown.
Binary file modified RELEASE_NOTES.pdf
Binary file not shown.
Binary file modified docs/docs.pdf
Binary file not shown.
Binary file modified docs/help.configuration.calculation.pdf
Binary file not shown.
Binary file modified docs/help.configuration.pdf
Binary file not shown.
Binary file modified docs/help.configuration.prioritization.pdf
Binary file not shown.
Binary file modified docs/help.parameters.databases.pdf
Binary file not shown.
Binary file modified docs/help.parameters.pdf
Binary file not shown.
Binary file modified docs/help.pdf
Binary file not shown.
445 changes: 230 additions & 215 deletions docs/pdoc/howard/objects/variants.html

Large diffs are not rendered by default.

Binary file modified docs/tips.pdf
Binary file not shown.
Binary file modified docs/user_guide.pdf
Binary file not shown.
9 changes: 7 additions & 2 deletions howard/objects/variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -11713,9 +11713,9 @@ def rename_info_fields(
del header.infos[field_to_rename]

# Rename INFO patterns
field_pattern = rf'(^|;)({field_to_rename})=([^;]*)'
field_pattern = rf'(^|;)({field_to_rename})($|;|=[^;]*)'
if field_renamed is not None:
field_renamed_pattern = rf'\1{field_renamed}=\3'
field_renamed_pattern = rf'\1{field_renamed}\3'
else:
field_renamed_pattern = ''

Expand All @@ -11736,6 +11736,11 @@ def rename_info_fields(
else:
log.info(f"Rename or remove fields - field '{field_to_rename}' removed")

else:

log.warning(f"Rename or remove fields - field '{field_to_rename}' not in header")


# Rename INFO
for regex_replace_key, regex_replace in regex_replace_dict.items():
log.info(f"Rename or remove fields - Process [{regex_replace_key+1}/{len(regex_replace_dict)}]...")
Expand Down
Binary file modified plugins/README.pdf
Binary file not shown.
7 changes: 4 additions & 3 deletions tests/data/example.annotation_names.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
##INFO=<ID=CLNSIG,Number=1,Type=String,Description="CLNSIG">
##INFO=<ID=PREFIXCLNSIG,Number=1,Type=String,Description="CLNSIG">
##INFO=<ID=CLNSIGSUFFIX,Number=1,Type=String,Description="CLNSIG">
##INFO=<ID=SPiP_Alt,Number=0,Type=Flag,Description="SPiP">
##contig=<ID=chr1,length=249250621,assembly=hg19>
##contig=<ID=chr7,length=159138663,assembly=hg19>
##contig=<ID=1,length=249250621,assembly=hg19>
Expand Down Expand Up @@ -57,8 +58,8 @@
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample1 sample2 sample3 sample4
chr1 28736 . A C 100 PASS CLNSIG=pathogenic GT:AD:DP:GQ 0/1:525,204:729:99 0/1:12659,4994:17664:99 1/1:12658,4995:17663:99 1/1:401,175:576:99
chr1 35144 . A C 100 PASS CLNSIG=non-pathogenic GT:AD:DP:GQ ./.:.:.:. 0/1:12659,4994:17664:99 0/1:12658,4995:17663:99 0/1:401,175:576:99
chr1 69101 . A G 100 PASS DP=50;CLNSIG=non-pathogenic;SIFT=D GT:AD:DP:GQ 0/1:525,204:729:99 ./.:.:.:. 0/1:12658,4995:17663:99 0/1:401,175:576:99
chr1 768251 . A G 100 PASS CLNSIG=NP;PREFIXCLNSIG=NP;CLNSIGSUFFIX=P GT:AD:DP:GQ 0/1:525,204:729:99 ./.:.:.:. 0/1:12658,4995:17663:99 0/1:401,175:576:99
chr1 69101 . A G 100 PASS DP=50;CLNSIG=non-pathogenic;SIFT=D;SPiP_Alt GT:AD:DP:GQ 0/1:525,204:729:99 ./.:.:.:. 0/1:12658,4995:17663:99 0/1:401,175:576:99
chr1 768251 . A G 100 PASS CLNSIG=NP;PREFIXCLNSIG=NP;SPiP_Alt;CLNSIGSUFFIX=P GT:AD:DP:GQ 0/1:525,204:729:99 ./.:.:.:. 0/1:12658,4995:17663:99 0/1:401,175:576:99
chr1 768252 . A G 100 PASS PREFIXCLNSIG=NP GT:AD:DP:GQ 0/1:525,204:729:99 ./.:.:.:. 0/1:12658,4995:17663:99 0/1:401,175:576:99
chr1 768253 . A G 100 PASS CLNSIGSUFFIX=P GT:AD:DP:GQ 0/1:525,204:729:99 ./.:.:.:. 0/1:12658,4995:17663:99 0/1:401,175:576:99
chr1 768253 . A G 100 PASS SPiP_Alt;CLNSIGSUFFIX=P GT:AD:DP:GQ 0/1:525,204:729:99 ./.:.:.:. 0/1:12658,4995:17663:99 0/1:401,175:576:99
chr7 55249063 rs1050171 G A 5777 PASS CLNSIG=NP;DP=125;CLNSIG=NP GT:AD:DP:GQ 0/1:525,204:729:99 0/1:12659,4994:17664:99 ./.:.:.:. 0/1:401,175:576:99
7 changes: 6 additions & 1 deletion tests/test_objects_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2196,13 +2196,16 @@ def test_rename_fields():
"field_not_in_header": "field_not_in_header_renamed",
"": "",
"SIFT": None,
"SPiP_Alt": "SPiP_alternative",
"SPiP_alternative": None,
}

# Rename fields
fields_renamed = variants.rename_info_fields(fields_to_rename=fields_to_rename)
assert fields_renamed == {'CLNSIG': 'CLNSIG_renamed', 'PREFIXCLNSIG': 'PREFIXCLNSIG_renamed', 'DP': 'depth', 'SIFT': None}
assert fields_renamed == {'CLNSIG': 'CLNSIG_renamed', 'PREFIXCLNSIG': 'PREFIXCLNSIG_renamed', 'DP': 'depth', 'SIFT': None, 'SPiP_Alt': 'SPiP_alternative', 'SPiP_alternative': None}
assert len(variants.get_query_to_df("SELECT INFO FROM variants WHERE INFO LIKE '%SIFT%'")) == 0
assert len(variants.get_query_to_df("SELECT INFO FROM variants WHERE INFO LIKE '%None=%'")) == 0
assert len(variants.get_query_to_df("SELECT INFO FROM variants WHERE INFO LIKE '%SPiP%'")) == 0

# Check if VCF is in correct format with pyVCF
remove_if_exists([output_vcf])
Expand Down Expand Up @@ -2235,6 +2238,8 @@ def test_rename_fields_to_param_and_export():
"field_not_in_header": "field_not_in_header_renamed",
"": "",
"SIFT": None,
"SPiP_Alt": "SPiP_alternative",
"SPiP_alternative": None,
}
}
}
Expand Down

0 comments on commit bb96703

Please sign in to comment.