Skip to content

Commit

Permalink
added test for vs2
Browse files Browse the repository at this point in the history
  • Loading branch information
KateSakharova committed Oct 18, 2024
1 parent 638c18e commit 7a94edb
Show file tree
Hide file tree
Showing 9 changed files with 1,537 additions and 14 deletions.
12 changes: 7 additions & 5 deletions bin/parse_viral_pred.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ def parse_virus_sorter2(sorter_files, vs_cutoff):
prophages = dict()

final_boundary_file, final_score_file, final_combined_fa_file = "", "", ""
print('SORTER',sorter_files)
for i in sorter_files:
if "final-viral-boundary.tsv" in i:
final_boundary_file = i
Expand Down Expand Up @@ -294,16 +295,17 @@ def merge_annotations(pprmeta, finder, sorter, sorter2, assembly, vs_cutoff):
sorter2, vs_cutoff)

for seq_record in SeqIO.parse(assembly, "fasta"):
# HC
if seq_record.id in sorter_hc:
hc_predictions_contigs.append(sorter_hc.get(seq_record.id).get_seq_record())
# Pro
elif seq_record.id in sorter_prophages:
# TODO: check this decision because for VirSorter2 sequence can be in 2 categories
if seq_record.id in sorter_prophages:
# a contig may have several prophages
# for prophages write the record as it holds the
# sliced fasta
for record in sorter_prophages.get(seq_record.id):
prophage_predictions_contigs.append(record.get_seq_record())
# HC
if seq_record.id in sorter_hc:
hc_predictions_contigs.append(sorter_hc.get(seq_record.id).get_seq_record())
# LC
elif seq_record.id in finder_lc:
lc_predictions_contigs.append(seq_record)
Expand Down Expand Up @@ -331,7 +333,7 @@ def main(pprmeta, finder, sorter, sorter2, assembly, outdir, vs_cutoff, prefix=F
sorter_hc,
sorter_lc,
sorter_prophages,
) = merge_annotations(pprmeta, finder, sorter, sorter2, assembly, args.vs_cutoff)
) = merge_annotations(pprmeta, finder, sorter, sorter2, assembly, vs_cutoff)

at_least_one = False
name_prefix = ""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,5 @@ TTT
G
>seq6
C
>seq7|phage-circular
T
>seq8
AA
>seq9
GG

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
>seq1|prophage-1:23146
AAA
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ seq2 high_confidence False
seq4 high_confidence False
seq3 high_confidence False
seq5 high_confidence False
seq7 high_confidence True
seq6 high_confidence False
seq8 high_confidence False
seq9 high_confidence False
seq7 low_confidence True
seq8 low_confidence False
seq10 low_confidence False
seq1||partial prophage False 1 23146
seq1 prophage False 1 23146
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ seq1||partial 0.913 0.053 0.913 dsDNAphage 105648 0 67.800 1.300
seq4||full 1.000 0.087 1.000 dsDNAphage 62420 2 66.000 0.000
seq2||full 0.987 0.027 0.987 dsDNAphage 57882 15 83.000 1.900
seq3||full 1.000 0.413 1.000 dsDNAphage 47566 3 87.700 0.000
seq7||full 1.000 0.173 1.000 dsDNAphage 45193 1 85.500 0.000
seq7||full 1.000 0.173 0.7 dsDNAphage 45193 1 85.500 0.000
seq6||full 1.000 0.513 1.000 dsDNAphage 45227 4 74.000 0.000
seq5||full 1.000 0.387 1.000 dsDNAphage 40543 7 70.700 0.000
seq8||full 1.000 0.300 1.000 dsDNAphage 38823 13 95.100 0.000
seq8||full 1.000 0.300 0.7 dsDNAphage 38823 13 95.100 0.000
seq9||full 0.993 0.147 0.993 dsDNAphage 37936 7 88.900 0.000
seq10||lt2gene nan nan nan dsDNAphage 5899 1 100.000 0.000
52 changes: 52 additions & 0 deletions tests/test_parse_viral_preds.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,59 @@ def test_virsorter_precedence(self):
hashlib.md5(expected.encode("utf-8")).hexdigest())

shutil.rmtree(test_dir)

def test_virsorter2_precedence(self):
"""VirSorter2 results take precedence over the other tools
"""
pprmeta_path = self._build_path("/virsorter_precedence/pprmeta.csv")
vf_path = self._build_path("/virsorter_precedence/virfinder.txt")
vs_path = self._build_path("/virsorter_precedence/virsorter2")
assembly = self._build_path("/virsorter_precedence/assembly_renamed_filt1500bp.fasta")

test_dir = tempfile.mkdtemp()

sorter_files = [os.path.join(vs_path, f) for f in os.listdir(vs_path)]

main(pprmeta_path, vf_path, None, sorter_files, assembly, test_dir, 0.9)

with open(test_dir + "/high_confidence_viral_contigs.fna", "rb") as hc_f:
with open(
self._build_path("/virsorter_precedence/expected_vs2/"
"/high_confidence_viral_contigs.fna"), "rb") as hc_e:
self.assertEqual(hashlib.md5(hc_f.read()).hexdigest(),
hashlib.md5(hc_e.read()).hexdigest())

with open(test_dir + "/low_confidence_viral_contigs.fna", "rb") as lc_f:
content = lc_f.readlines()
self.assertEqual(">seq1\n" in content, False)
lc_f.seek(0)
with open(
self._build_path("/virsorter_precedence/expected_vs2/"
"/low_confidence_viral_contigs.fna"), "rb") as lc_e:
self.assertEqual(hashlib.md5(lc_f.read()).hexdigest(),
hashlib.md5(lc_e.read()).hexdigest())

with open(test_dir + "/prophages.fna", "rb") as p_f:
self.assertEqual(p_f.readline(), b">seq1|prophage-1:23146\n")
p_f.seek(0)
with open(
self._build_path("/virsorter_precedence/expected_vs2/"
"/prophages.fna"), "rb") as p_e:
self.assertEqual(hashlib.md5(p_f.read()).hexdigest(),
hashlib.md5(p_e.read()).hexdigest())

# seq1 has to be in prophages and not in low_confidence
with open(test_dir + "/virsorter_metadata.tsv", "rb") as vs_f:
# sort the lines as the script doesn't guarantee order
obtained = str(sorted(vs_f.readlines()))
with open(
self._build_path("/virsorter_precedence/expected_vs2/"
"/virsorter_metadata.tsv"), "rb") as vs_e:
expected = str(sorted(vs_e.readlines()))
self.assertEqual(hashlib.md5(obtained.encode("utf-8")).hexdigest(),
hashlib.md5(expected.encode("utf-8")).hexdigest())

shutil.rmtree(test_dir)

if __name__ == "__main__":
unittest.main()

0 comments on commit 7a94edb

Please sign in to comment.