From 7daee23c9afe8136680ca53904f9ae0756cafd28 Mon Sep 17 00:00:00 2001 From: Xiao Chen Date: Thu, 27 Aug 2020 10:17:58 -0700 Subject: [PATCH 01/10] Added code to call variants --- caller/call_variants.py | 63 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 caller/call_variants.py diff --git a/caller/call_variants.py b/caller/call_variants.py new file mode 100644 index 0000000..cdafd31 --- /dev/null +++ b/caller/call_variants.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +# +# SMNCopyNumberCaller +# Copyright 2019-2020 Illumina, Inc. +# All rights reserved. +# +# Author: Xiao Chen +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +import os +import sys + +dir_name = os.path.join(os.path.dirname(os.path.dirname(__file__)), "depth_calling") +if os.path.exists(dir_name): + sys.path.append(dir_name) +from depth_calling.copy_number_call import call_reg1_cn, process_raw_call_denovo + + +def call_cn_var_homo(total_cn, lsnp1, lsnp2, max_cn=None): + """ + Call CN for variant sites in homology regions. + """ + cn_prob = [] + for i, count1 in enumerate(lsnp1): + count2 = lsnp2[i] + cn_prob.append(call_reg1_cn(total_cn, count1, count2, 4)) + cn_call = [] + for site_call in process_raw_call_denovo(cn_prob, 0.8, 0.65): + if site_call is None: + cn_call.append(None) + else: + if max_cn is None: + cn_call.append(min(site_call, total_cn - 2)) + else: + cn_call.append(min(site_call, max_cn)) + return cn_call + + +def get_called_variants(var_list, cn_prob_processed, starting_index=0): + """ + Return called variant names based on called copy number and list of variant names + """ + total_callset = [] + if starting_index != 0: + assert len(var_list) == len(cn_prob_processed) + starting_index + for i, cn_called in enumerate(cn_prob_processed): + if cn_called is not None and cn_called != 0: + for _ in range(cn_called): + total_callset.append(var_list[i + starting_index]) + return total_callset From 221b0ab9938016987f4cf943fbf3ab967a8267a6 Mon Sep 17 00:00:00 2001 From: Joseph Date: Wed, 4 Nov 2020 03:51:15 -0800 Subject: [PATCH 02/10] HJ updates to call_smn12.py and smn_caller.py --- caller/call_smn12.py | 25 ++++++++++++++++++++----- smn_caller.py | 12 ++++++++++++ 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/caller/call_smn12.py b/caller/call_smn12.py index c79dac0..c33d727 100644 --- a/caller/call_smn12.py +++ b/caller/call_smn12.py @@ -28,13 +28,16 @@ from scipy.stats import poisson dir_name = os.path.join(os.path.dirname(os.path.dirname(__file__)), "depth_calling") -if os.path.exists(dir_name): - sys.path.append(dir_name) + from depth_calling.copy_number_call import ( call_reg1_cn, process_raw_call_gc, process_raw_call_denovo, ) +from caller.call_variants import ( + call_cn_var_homo, + get_called_variants, +) SMA_CUTOFF = 1e-6 TOTAL_NUM_SITES = 16 @@ -203,13 +206,13 @@ def get_carrier_status(site_calls, cn_prob, cn_smn1, sma_likelihood_ratio): return None -def get_smn12_call(raw_cn_call, lsnp1, lsnp2, var_ref, var_alt, mdepth): +def get_smn12_call(raw_cn_call, lsnp1, lsnp2, var_ref, var_alt, mdepth, var_name): """Return the copy nubmer call of SMN1, SMN2 and SMNstar.""" smn1_fraction = get_fraction(lsnp1, lsnp2) smn_call = namedtuple( "smn_call", "SMN1 SMN2 SMN2delta78 isCarrier isSMA \ - SMN1_CN_raw Info Confidence g27134TG_raw g27134TG_CN", + SMN1_CN_raw Info Confidence g27134TG_raw g27134TG_CN variants_called", ) raw_cn_call = update_full_length_cn(raw_cn_call) full_length_cn = raw_cn_call.exon78_cn @@ -292,10 +295,21 @@ def get_smn12_call(raw_cn_call, lsnp1, lsnp2, var_ref, var_alt, mdepth): ) # targeted variant(s) + #Adding from -add_pathogenic_variants + #The lines for printing can be deleted here + + print(var_alt, var_ref) + cn = call_cn_var_homo(full_length_cn, var_alt, var_ref) + #print(cn) + new_call = get_called_variants(var_name, cn) + print(new_call) + + # + # Chaning from 0 to 5 var_cn_confident = None raw_var_cn = None var_fraction = get_fraction(var_alt, var_ref) - raw_var_cn = get_raw_smn1_cn(full_length_cn, var_fraction)[0] + raw_var_cn = get_raw_smn1_cn(full_length_cn, var_fraction)[0] var_cn = [call_reg1_cn(full_length_cn, var_alt[0], var_ref[0])] var_cn_filtered = process_raw_call_denovo( var_cn, POSTERIOR_CUTOFF_MEDIUM, POSTERIOR_CUTOFF_LOOSE, keep_none=False @@ -330,6 +344,7 @@ def get_smn12_call(raw_cn_call, lsnp1, lsnp2, var_ref, var_alt, mdepth): cn_prob, raw_var_cn, var_cn_confident, + new_call, ) return dout diff --git a/smn_caller.py b/smn_caller.py index 7489076..8f664a2 100755 --- a/smn_caller.py +++ b/smn_caller.py @@ -92,6 +92,7 @@ def smn_cn_caller( gmm_parameter, snp_db, variant_db, + var_name, #add variant name threads, count_file=None, reference_fasta=None, @@ -153,6 +154,7 @@ def smn_cn_caller( var_ref_count, var_alt_count, normalized_depth.mediandepth, + var_name, ) # 5. Prepare final call set @@ -193,6 +195,7 @@ def write_to_tsv(final_output, out_tsv): "Full_length_CN_raw", "g.27134T>G_CN", "SMN1_CN_raw", + "variants_called", ] with open(out_tsv, "w") as tsv_output: tsv_output.write("\t".join(header) + "\n") @@ -209,6 +212,7 @@ def write_to_tsv(final_output, out_tsv): final_call["Full_length_CN_raw"], final_call["g27134TG_CN"], ",".join([str(a) for a in final_call["SMN1_CN_raw"]]), + final_call["variants_called"] ] tsv_output.write("\t".join([str(a) for a in output_per_sample]) + "\n") @@ -226,9 +230,16 @@ def main(): datadir = os.path.join(os.path.dirname(__file__), "data") # Region file to use + var_name = [] region_file = os.path.join(datadir, "SMN_region_%s.bed" % genome) snp_file = os.path.join(datadir, "SMN_SNP_%s.txt" % genome) variant_file = os.path.join(datadir, "SMN_target_variant_%s.txt" % genome) + #Open variant files + with open(variant_file, 'r') as j: + for line in j: + variants = line.split() + annotation = variants[5] + var_name.append(annotation) gmm_file = os.path.join(datadir, "SMN_gmm.txt") for required_file in [region_file, snp_file, variant_file, gmm_file]: @@ -270,6 +281,7 @@ def main(): gmm_parameter, snp_db, variant_db, + var_name, threads, count_file, reference_fasta, From 5a4d737ecc251f69f6ad1541922ea5dd022a8996 Mon Sep 17 00:00:00 2001 From: Joseph Date: Wed, 4 Nov 2020 09:06:48 -0800 Subject: [PATCH 03/10] here what changes are made --- caller/call_smn12.py | 1 - 1 file changed, 1 deletion(-) diff --git a/caller/call_smn12.py b/caller/call_smn12.py index c33d727..3ca2040 100644 --- a/caller/call_smn12.py +++ b/caller/call_smn12.py @@ -305,7 +305,6 @@ def get_smn12_call(raw_cn_call, lsnp1, lsnp2, var_ref, var_alt, mdepth, var_name print(new_call) # - # Chaning from 0 to 5 var_cn_confident = None raw_var_cn = None var_fraction = get_fraction(var_alt, var_ref) From 78787c80d92e865587e5ba5a33d3af46623722fa Mon Sep 17 00:00:00 2001 From: Joseph Date: Wed, 4 Nov 2020 09:18:33 -0800 Subject: [PATCH 04/10] HJ updates to caller.py + caller_smn12.py --- caller/call_smn12.py | 1 - 1 file changed, 1 deletion(-) diff --git a/caller/call_smn12.py b/caller/call_smn12.py index 3ca2040..31f9ef8 100644 --- a/caller/call_smn12.py +++ b/caller/call_smn12.py @@ -303,7 +303,6 @@ def get_smn12_call(raw_cn_call, lsnp1, lsnp2, var_ref, var_alt, mdepth, var_name #print(cn) new_call = get_called_variants(var_name, cn) print(new_call) - # var_cn_confident = None raw_var_cn = None From d187344c0276d1c74aed7282b4fdddc4f0e23e05 Mon Sep 17 00:00:00 2001 From: Joseph Date: Fri, 6 Nov 2020 12:13:29 -0800 Subject: [PATCH 05/10] removed the print command from call_smn12.py --- caller/call_smn12.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/caller/call_smn12.py b/caller/call_smn12.py index 31f9ef8..28c5817 100644 --- a/caller/call_smn12.py +++ b/caller/call_smn12.py @@ -296,14 +296,11 @@ def get_smn12_call(raw_cn_call, lsnp1, lsnp2, var_ref, var_alt, mdepth, var_name # targeted variant(s) #Adding from -add_pathogenic_variants - #The lines for printing can be deleted here - print(var_alt, var_ref) + (var_alt, var_ref) cn = call_cn_var_homo(full_length_cn, var_alt, var_ref) - #print(cn) new_call = get_called_variants(var_name, cn) - print(new_call) - # + (new_call) var_cn_confident = None raw_var_cn = None var_fraction = get_fraction(var_alt, var_ref) From 76419d25dd6e7ffc10f87777e299b1a9b4c5a345 Mon Sep 17 00:00:00 2001 From: Joseph Date: Thu, 12 Nov 2020 17:12:15 -0800 Subject: [PATCH 06/10] Updating Variant File --- data/SMN_target_variant_38.txt | 29 +++++++++++++++++++++++++++ hj | 36 ++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 hj diff --git a/data/SMN_target_variant_38.txt b/data/SMN_target_variant_38.txt index 956069a..0962979 100644 --- a/data/SMN_target_variant_38.txt +++ b/data/SMN_target_variant_38.txt @@ -1,2 +1,31 @@ #chr pos_SMN1 base_REF pos_SMN2 base_ALT annotation chr5 70952074 T 70076654 G g.27134T>G +chr5 70925108 C 70049690 G c.5C>G +chr5 70925146 C 70049728 T c.43C>T +chr5 70938845 G 70063421 A c.88G>A +chr5 70938888 A 70063464 T c.131A>T +chr5 70941423 C 70065999 A c.188C>A +chr5 70942359 G 70066935 C c.275G>C +chr5 70942367 G 70066943 C c.283G>C +chr5 70942389 G 70066965 A c.305G>A +chr5 70942394 GAAG 70066970 GAAAG c.312dupA +chr5 70942416 C 70066992 G c.332C>G +chr5 70942430 A 70067006 T c.346A>T +chr5 70942472 T 70067048 C c.388T>C +chr5 70942473 A 70067049 G c.389A>G +chr5 70942480 TAGAGAGG 70067056 TAGG c.399_402delAGAG +chr5 70942484 G 70067060 A c.400G>A +chr5 70942490 C 70067066 G c.406C>G +chr5 70944713 T 70069290 A c.683T>A +chr5 70946065 G 70070640 C c.724-1G>C +chr5 70946076 C 70070651 T c.734C>T +chr5 70946112 CTGATGCTTTGGG 70070687 CTGATGCTTTGCT c.770_780dup11 +chr5 70946126 A 70070701 G c.784A>G +chr5 70946127 G 70070702 T c.785G>T +chr5 70946150 A 70070725 C c.808A>C +chr5 70946157 A 70070732 G c.815A>G +chr5 70946163 C 70070738 T c.821C>T +chr5 70946165 G 70070740 A c.823G>A +chr5 70951941 G 70076521 T c.835G>T +chr5 70951942 G 70076522 T c.836G>T +chr5 70952074 T 70076654 G g.27134T>G diff --git a/hj b/hj new file mode 100644 index 0000000..dab80a2 --- /dev/null +++ b/hj @@ -0,0 +1,36 @@ +diff --git a/data/SMN_target_variant_38.txt b/data/SMN_target_variant_38.txt +index 956069a..0962979 100644 +--- a/data/SMN_target_variant_38.txt ++++ b/data/SMN_target_variant_38.txt +@@ -1,2 +1,31 @@ + #chr pos_SMN1 base_REF pos_SMN2 base_ALT annotation + chr5 70952074 T 70076654 G g.27134T>G ++chr5 70925108 C 70049690 G c.5C>G ++chr5 70925146 C 70049728 T c.43C>T ++chr5 70938845 G 70063421 A c.88G>A ++chr5 70938888 A 70063464 T c.131A>T ++chr5 70941423 C 70065999 A c.188C>A ++chr5 70942359 G 70066935 C c.275G>C ++chr5 70942367 G 70066943 C c.283G>C ++chr5 70942389 G 70066965 A c.305G>A ++chr5 70942394 GAAG 70066970 GAAAG c.312dupA ++chr5 70942416 C 70066992 G c.332C>G ++chr5 70942430 A 70067006 T c.346A>T ++chr5 70942472 T 70067048 C c.388T>C ++chr5 70942473 A 70067049 G c.389A>G ++chr5 70942480 TAGAGAGG 70067056 TAGG c.399_402delAGAG ++chr5 70942484 G 70067060 A c.400G>A ++chr5 70942490 C 70067066 G c.406C>G ++chr5 70944713 T 70069290 A c.683T>A ++chr5 70946065 G 70070640 C c.724-1G>C ++chr5 70946076 C 70070651 T c.734C>T ++chr5 70946112 CTGATGCTTTGGG 70070687 CTGATGCTTTGCT c.770_780dup11 ++chr5 70946126 A 70070701 G c.784A>G ++chr5 70946127 G 70070702 T c.785G>T ++chr5 70946150 A 70070725 C c.808A>C ++chr5 70946157 A 70070732 G c.815A>G ++chr5 70946163 C 70070738 T c.821C>T ++chr5 70946165 G 70070740 A c.823G>A ++chr5 70951941 G 70076521 T c.835G>T ++chr5 70951942 G 70076522 T c.836G>T ++chr5 70952074 T 70076654 G g.27134T>G From f50f2bcc12c3e016d13b3e1d45ff21a6bfec385d Mon Sep 17 00:00:00 2001 From: Joseph Date: Fri, 20 Nov 2020 11:16:15 -0800 Subject: [PATCH 07/10] Removing trailing spaces, and replacing necessary commands --- caller/call_smn12.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/caller/call_smn12.py b/caller/call_smn12.py index 28c5817..fecb01f 100644 --- a/caller/call_smn12.py +++ b/caller/call_smn12.py @@ -28,7 +28,8 @@ from scipy.stats import poisson dir_name = os.path.join(os.path.dirname(os.path.dirname(__file__)), "depth_calling") - +if os.path.exists(dir_name): + sys.path.append(dir_name) from depth_calling.copy_number_call import ( call_reg1_cn, process_raw_call_gc, @@ -296,7 +297,6 @@ def get_smn12_call(raw_cn_call, lsnp1, lsnp2, var_ref, var_alt, mdepth, var_name # targeted variant(s) #Adding from -add_pathogenic_variants - (var_alt, var_ref) cn = call_cn_var_homo(full_length_cn, var_alt, var_ref) new_call = get_called_variants(var_name, cn) @@ -304,7 +304,7 @@ def get_smn12_call(raw_cn_call, lsnp1, lsnp2, var_ref, var_alt, mdepth, var_name var_cn_confident = None raw_var_cn = None var_fraction = get_fraction(var_alt, var_ref) - raw_var_cn = get_raw_smn1_cn(full_length_cn, var_fraction)[0] + raw_var_cn = get_raw_smn1_cn(full_length_cn, var_fraction)[0] var_cn = [call_reg1_cn(full_length_cn, var_alt[0], var_ref[0])] var_cn_filtered = process_raw_call_denovo( var_cn, POSTERIOR_CUTOFF_MEDIUM, POSTERIOR_CUTOFF_LOOSE, keep_none=False From 8828319c51fd4eee84c5afaf906bcc17c3ed32ba Mon Sep 17 00:00:00 2001 From: Joseph Date: Wed, 2 Dec 2020 11:01:31 -0800 Subject: [PATCH 08/10] HJ updates to caller.py, removing header from the variant output --- smn_caller.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/smn_caller.py b/smn_caller.py index 8f664a2..0440bc3 100755 --- a/smn_caller.py +++ b/smn_caller.py @@ -198,7 +198,9 @@ def write_to_tsv(final_output, out_tsv): "variants_called", ] with open(out_tsv, "w") as tsv_output: - tsv_output.write("\t".join(header) + "\n") + f = open(out_tsv,'r') + lines = f.readlines()[1:] + f.close() for sample_id in final_output: final_call = final_output[sample_id] output_per_sample = [ From 4254f55a67e085f56449ab16dce4b6904abbdb5a Mon Sep 17 00:00:00 2001 From: Joseph Date: Wed, 23 Dec 2020 18:40:17 -0800 Subject: [PATCH 09/10] Target Varinat Files and SMN_Caller.py --- data/SMN_target_variant_19.txt | 31 +++++++++++++++++++++++++++++-- data/SMN_target_variant_37.txt | 31 +++++++++++++++++++++++++++++-- smn_caller.py | 17 ++++++++--------- 3 files changed, 66 insertions(+), 13 deletions(-) diff --git a/data/SMN_target_variant_19.txt b/data/SMN_target_variant_19.txt index 1d4d322..77f7545 100644 --- a/data/SMN_target_variant_19.txt +++ b/data/SMN_target_variant_19.txt @@ -1,2 +1,29 @@ -#chr pos_SMN1 base_REF pos_SMN2 base_ALT annotation -chr5 70247901 T 69372481 G g.27134T>G +chr5 70247901 T 69372481 G g.27134T>G +chr5 70220935 C 69345517 G c.5C>G +chr5 70220973 C 69345555 T c.43C>T +chr5 70234672 G 69359248 A c.88G>A +chr5 70234715 A 69359291 T c.131A>T +chr5 70237250 C 69361826 A c.188C>A +chr5 70238186 G 69362762 C c.275G>C +chr5 70238194 G 69362770 C c.283G>C +chr5 70238216 G 69362792 A c.305G>A +chr5 70238221 GAAG 69362797 GAAAG c.312dupA +chr5 70238243 C 69362819 G c.332C>G +chr5 70238257 A 69362833 T c.346A>T +chr5 70238299 T 69362875 C c.388T>C +chr5 70238300 A 69362876 G c.389A>G +chr5 70238307 TAGAGAGG 69362883 TAGG c.399_402delAGAG +chr5 70238311 G 69362887 A c.400G>A +chr5 70238317 C 69362893 G c.406C>G +chr5 70240540 T 69365117 A c.683T>A +chr5 70241892 G 69366467 C c.724-1G>C +chr5 70241903 C 69366478 T c.734C>T +chr5 70241939 CTGATGCTTTGGG 69366514 CTGATGCTTTGCT c.770_780dup11 +chr5 70241953 A 69366528 G c.784A>G +chr5 70241954 G 69366529 T c.785G>T +chr5 70241977 A 69366552 C c.808A>C +chr5 70241984 A 69366559 G c.815A>G +chr5 70241990 C 69366565 T c.821C>T +chr5 70241992 G 69366567 A c.823G>A +chr5 70247768 G 69372348 T c.835G>T +chr5 70247769 G 69372349 T c.836G>T diff --git a/data/SMN_target_variant_37.txt b/data/SMN_target_variant_37.txt index 5c5479e..f25ce78 100644 --- a/data/SMN_target_variant_37.txt +++ b/data/SMN_target_variant_37.txt @@ -1,2 +1,29 @@ -#chr pos_SMN1 base_REF pos_SMN2 base_ALT annotation -5 70247901 T 69372481 G g.27134T>G +chr5 70247901 T 69372481 G g.27134T>G +chr5 70220935 C 69345517 G c.5C>G +chr5 70220973 C 69345555 T c.43C>T +chr5 70234672 G 69359248 A c.88G>A +chr5 70234715 A 69359291 T c.131A>T +chr5 70237250 C 69361826 A c.188C>A +chr5 70238186 G 69362762 C c.275G>C +chr5 70238194 G 69362770 C c.283G>C +chr5 70238216 G 69362792 A c.305G>A +chr5 70238221 GAAG 69362797 GAAAG c.312dupA +chr5 70238243 C 69362819 G c.332C>G +chr5 70238257 A 69362833 T c.346A>T +chr5 70238299 T 69362875 C c.388T>C +chr5 70238300 A 69362876 G c.389A>G +chr5 70238307 TAGAGAGG 69362883 TAGG c.399_402delAGAG +chr5 70238311 G 69362887 A c.400G>A +chr5 70238317 C 69362893 G c.406C>G +chr5 70240540 T 69365117 A c.683T>A +chr5 70241892 G 69366467 C c.724-1G>C +chr5 70241903 C 69366478 T c.734C>T +chr5 70241939 CTGATGCTTTGGG 69366514 CTGATGCTTTGCT c.770_780dup11 +chr5 70241953 A 69366528 G c.784A>G +chr5 70241954 G 69366529 T c.785G>T +chr5 70241977 A 69366552 C c.808A>C +chr5 70241984 A 69366559 G c.815A>G +chr5 70241990 C 69366565 T c.821C>T +chr5 70241992 G 69366567 A c.823G>A +chr5 70247768 G 69372348 T c.835G>T +chr5 70247769 G 69372349 T c.836G>T \ No newline at end of file diff --git a/smn_caller.py b/smn_caller.py index 0440bc3..6b8a177 100755 --- a/smn_caller.py +++ b/smn_caller.py @@ -198,9 +198,7 @@ def write_to_tsv(final_output, out_tsv): "variants_called", ] with open(out_tsv, "w") as tsv_output: - f = open(out_tsv,'r') - lines = f.readlines()[1:] - f.close() + tsv_output.write("\t".join(header) + "\n") for sample_id in final_output: final_call = final_output[sample_id] output_per_sample = [ @@ -237,13 +235,14 @@ def main(): snp_file = os.path.join(datadir, "SMN_SNP_%s.txt" % genome) variant_file = os.path.join(datadir, "SMN_target_variant_%s.txt" % genome) #Open variant files - with open(variant_file, 'r') as j: - for line in j: - variants = line.split() - annotation = variants[5] - var_name.append(annotation) - gmm_file = os.path.join(datadir, "SMN_gmm.txt") + with open(variant_file, 'r') as work: + for line in work: + if not line.startswith("#"): + variants = line.split() + annotation = variants[5] + var_name.append(annotation) + gmm_file = os.path.join(datadir, "SMN_gmm.txt") for required_file in [region_file, snp_file, variant_file, gmm_file]: if os.path.exists(required_file) == 0: raise Exception("File %s not found." % required_file) From 6c15d0ea63d9951f193e95a79b4af4131bb7c4e9 Mon Sep 17 00:00:00 2001 From: Joseph Date: Tue, 12 Jan 2021 22:46:35 -0800 Subject: [PATCH 10/10] Making the code more reader friendly --- caller/call_smn12.py | 11 ++++--- data/SMN_target_variant_19.txt | 1 + data/SMN_target_variant_37.txt | 59 +++++++++++++++++----------------- data/SMN_target_variant_38.txt | 2 +- smn_caller.py | 10 +++--- 5 files changed, 43 insertions(+), 40 deletions(-) diff --git a/caller/call_smn12.py b/caller/call_smn12.py index fecb01f..485bb88 100644 --- a/caller/call_smn12.py +++ b/caller/call_smn12.py @@ -242,6 +242,7 @@ def get_smn12_call(raw_cn_call, lsnp1, lsnp2, var_ref, var_alt, mdepth, var_name [None] * TOTAL_NUM_SITES, None, None, + None, ) elif sma_likelihood_ratio < SMA_CUTOFF: dout = smn_call( @@ -255,6 +256,7 @@ def get_smn12_call(raw_cn_call, lsnp1, lsnp2, var_ref, var_alt, mdepth, var_name [None] * TOTAL_NUM_SITES, None, None, + None, ) else: dout = smn_call( @@ -268,6 +270,7 @@ def get_smn12_call(raw_cn_call, lsnp1, lsnp2, var_ref, var_alt, mdepth, var_name [None] * TOTAL_NUM_SITES, None, None, + None, ) else: @@ -297,10 +300,8 @@ def get_smn12_call(raw_cn_call, lsnp1, lsnp2, var_ref, var_alt, mdepth, var_name # targeted variant(s) #Adding from -add_pathogenic_variants - (var_alt, var_ref) - cn = call_cn_var_homo(full_length_cn, var_alt, var_ref) - new_call = get_called_variants(var_name, cn) - (new_call) + copy_number_variant = call_cn_var_homo(full_length_cn, var_alt, var_ref) + new_variants_being_called = get_called_variants(var_name, copy_number_variant) var_cn_confident = None raw_var_cn = None var_fraction = get_fraction(var_alt, var_ref) @@ -339,7 +340,7 @@ def get_smn12_call(raw_cn_call, lsnp1, lsnp2, var_ref, var_alt, mdepth, var_name cn_prob, raw_var_cn, var_cn_confident, - new_call, + new_variants_being_called, ) return dout diff --git a/data/SMN_target_variant_19.txt b/data/SMN_target_variant_19.txt index 77f7545..7b24907 100644 --- a/data/SMN_target_variant_19.txt +++ b/data/SMN_target_variant_19.txt @@ -1,3 +1,4 @@ +#chr pos_SMN1 base_REF pos_SMN2 base_ALT annotation chr5 70247901 T 69372481 G g.27134T>G chr5 70220935 C 69345517 G c.5C>G chr5 70220973 C 69345555 T c.43C>T diff --git a/data/SMN_target_variant_37.txt b/data/SMN_target_variant_37.txt index f25ce78..38ac551 100644 --- a/data/SMN_target_variant_37.txt +++ b/data/SMN_target_variant_37.txt @@ -1,29 +1,30 @@ -chr5 70247901 T 69372481 G g.27134T>G -chr5 70220935 C 69345517 G c.5C>G -chr5 70220973 C 69345555 T c.43C>T -chr5 70234672 G 69359248 A c.88G>A -chr5 70234715 A 69359291 T c.131A>T -chr5 70237250 C 69361826 A c.188C>A -chr5 70238186 G 69362762 C c.275G>C -chr5 70238194 G 69362770 C c.283G>C -chr5 70238216 G 69362792 A c.305G>A -chr5 70238221 GAAG 69362797 GAAAG c.312dupA -chr5 70238243 C 69362819 G c.332C>G -chr5 70238257 A 69362833 T c.346A>T -chr5 70238299 T 69362875 C c.388T>C -chr5 70238300 A 69362876 G c.389A>G -chr5 70238307 TAGAGAGG 69362883 TAGG c.399_402delAGAG -chr5 70238311 G 69362887 A c.400G>A -chr5 70238317 C 69362893 G c.406C>G -chr5 70240540 T 69365117 A c.683T>A -chr5 70241892 G 69366467 C c.724-1G>C -chr5 70241903 C 69366478 T c.734C>T -chr5 70241939 CTGATGCTTTGGG 69366514 CTGATGCTTTGCT c.770_780dup11 -chr5 70241953 A 69366528 G c.784A>G -chr5 70241954 G 69366529 T c.785G>T -chr5 70241977 A 69366552 C c.808A>C -chr5 70241984 A 69366559 G c.815A>G -chr5 70241990 C 69366565 T c.821C>T -chr5 70241992 G 69366567 A c.823G>A -chr5 70247768 G 69372348 T c.835G>T -chr5 70247769 G 69372349 T c.836G>T \ No newline at end of file +#chr pos_SMN1 base_REF pos_SMN2 base_ALT annotation +5 70247901 T 69372481 G g.27134T>G +5 70220935 C 69345517 G c.5C>G +5 70220973 C 69345555 T c.43C>T +5 70234672 G 69359248 A c.88G>A +5 70234715 A 69359291 T c.131A>T +5 70237250 C 69361826 A c.188C>A +5 70238186 G 69362762 C c.275G>C +5 70238194 G 69362770 C c.283G>C +5 70238216 G 69362792 A c.305G>A +5 70238221 GAAG 69362797 GAAAG c.312dupA +5 70238243 C 69362819 G c.332C>G +5 70238257 A 69362833 T c.346A>T +5 70238299 T 69362875 C c.388T>C +5 70238300 A 69362876 G c.389A>G +5 70238307 TAGAGAGG 69362883 TAGG c.399_402delAGAG +5 70238311 G 69362887 A c.400G>A +5 70238317 C 69362893 G c.406C>G +5 70240540 T 69365117 A c.683T>A +5 70241892 G 69366467 C c.724-1G>C +5 70241903 C 69366478 T c.734C>T +5 70241939 CTGATGCTTTGGG 69366514 CTGATGCTTTGCT c.770_780dup11 +5 70241953 A 69366528 G c.784A>G +5 70241954 G 69366529 T c.785G>T +5 70241977 A 69366552 C c.808A>C +5 70241984 A 69366559 G c.815A>G +5 70241990 C 69366565 T c.821C>T +5 70241992 G 69366567 A c.823G>A +5 70247768 G 69372348 T c.835G>T +5 70247769 G 69372349 T c.836G>T \ No newline at end of file diff --git a/data/SMN_target_variant_38.txt b/data/SMN_target_variant_38.txt index 0962979..80ecfe8 100644 --- a/data/SMN_target_variant_38.txt +++ b/data/SMN_target_variant_38.txt @@ -28,4 +28,4 @@ chr5 70946163 C 70070738 T c.821C>T chr5 70946165 G 70070740 A c.823G>A chr5 70951941 G 70076521 T c.835G>T chr5 70951942 G 70076522 T c.836G>T -chr5 70952074 T 70076654 G g.27134T>G + diff --git a/smn_caller.py b/smn_caller.py index 6b8a177..6ca22b9 100755 --- a/smn_caller.py +++ b/smn_caller.py @@ -236,11 +236,11 @@ def main(): variant_file = os.path.join(datadir, "SMN_target_variant_%s.txt" % genome) #Open variant files - with open(variant_file, 'r') as work: - for line in work: + with open(variant_file, 'r') as explore_variants: + for line in explore_variants: if not line.startswith("#"): variants = line.split() - annotation = variants[5] + annotation = variants [6] var_name.append(annotation) gmm_file = os.path.join(datadir, "SMN_gmm.txt") for required_file in [region_file, snp_file, variant_file, gmm_file]: @@ -254,7 +254,7 @@ def main(): variant_db = get_snp_position(variant_file) gmm_parameter = parse_gmm_file(gmm_file) region_dic = parse_region_file(region_file) - out_json = os.path.join(outdir, prefix + ".json") + out_json = os.path.join(outdir, prefix + ".json") out_tsv = os.path.join(outdir, prefix + ".tsv") final_output = {} with open(manifest) as read_manifest: @@ -267,7 +267,7 @@ def main(): if count_file is None and os.path.exists(bam_name) == 0: logging.warning( "Input alignmet file for sample %s does not exist.", sample_id - ) + ) elif count_file is not None and os.path.exists(count_file) == 0: logging.warning( "Input count file for sample %s does not exist", sample_id