From 7fd9ace0cb7134dfb2679ed803650607de27a7bb Mon Sep 17 00:00:00 2001
From: Dom Laetsch <dominik@Rambler.local>
Date: Sun, 17 Apr 2016 23:54:24 +0100
Subject: [PATCH] v0.9.14

- added new output format for bam2cov
	- parses readcov as well
---
 bam2cov.py    | 76 ++++++++++++++++++++++++------------------
 lib/BtCore.py | 91 ++++++++++++++++++++++++++-------------------------
 lib/BtIO.py   | 52 ++++++++++++++++++++++-------
 lib/BtLog.py  |  6 ++--
 lib/BtPlot.py | 28 +++++++++++-----
 5 files changed, 154 insertions(+), 99 deletions(-)
diff --git a/bam2cov.py b/bam2cov.py
index 5f38afa..4321b76 100644
--- a/bam2cov.py
+++ b/bam2cov.py
@@ -1,12 +1,16 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-"""usage: blobtools bam2cov         -i FASTA -b BAM [-h|--help] 
-    
+"""usage: blobtools bam2cov         -i FASTA -b BAM [--mq MQ] [--no_base_cov]
+                                    [-h|--help]
+
     Options:
         -h --help                   show this
-        -i, --infile FASTA          FASTA file of assembly. Headers are split at whitespaces.  
+        -i, --infile FASTA          FASTA file of assembly. Headers are split at whitespaces.
         -b, --bam <BAM>             BAM file (requires samtools in $PATH)
+        --mq <MQ>                   minimum Mapping Quality (MQ) [default: 1]
+        --no_base_cov               only parse read coverage (faster, but ...
+                                        can only be used for "blobtools blobplot --noblobs")
 """
 
 from __future__ import division
@@ -18,11 +22,12 @@
 
 class Fasta():
     def __init__(self, name, seq):
-        self.name = name 
+        self.name = name
         self.length = len(seq)
         self.n_count = seq.count('N')
         self.agct_count = self.length - self.n_count
-        self.cov = 0.0
+        self.base_cov = 0.0
+        self.read_cov = 0
 
 def which(program):
     def is_exe(fpath):
@@ -47,9 +52,9 @@ def runCmd(command):
     return iter(p.stdout.readline, b'')
 
 def readFasta(infile):
-    with open(infile) as fh: 
+    with open(infile) as fh:
         header, seqs = '', []
-        for l in fh: 
+        for l in fh:
             if l[0] == '>':
                 if (header):
                     yield header, ''.join(seqs)
@@ -86,49 +91,58 @@ def readBam(infile, fasta_headers):
     progress_unit = int(int(reads_total)/1000)
     base_cov_dict = {}
     cigar_match_re = re.compile(r"(\d+)M") # only gets digits before M's
-    # execute samtools to get only mapped reads
-    command = "samtools view -F 4 " + infile
+
+    read_cov_dict = {}
+    # execute samtools to get only mapped reads from primary alignment
+    command = "samtools view -q " + str(mq) + " -F 256 -F 4 " + infile
     # only one counter since only yields mapped reads
-    parsed_reads = 0 
+    parsed_reads = 0
     for line in runCmd(command):
         match = line.split("\t")
-        if match >= 11:
-            seq_name = match[2]
-            base_cov = sum([int(matching) for matching in cigar_match_re.findall(match[5])])
-            if (base_cov):
-                parsed_reads += 1
-                if seq_name not in fasta_headers:
-                    print BtLog.warn_d['2'] % (seq_name, infile)
-                else:
-                    base_cov_dict[seq_name] = base_cov_dict.get(seq_name, 0) + base_cov 
+        seq_name = match[2]
+        if seq_name not in fasta_headers:
+            print BtLog.warn_d['2'] % (seq_name, infile)
+        else:
+            read_cov_dict[seq_name] = read_cov_dict.get(seq_name, 0) + 1
+            if not (no_base_cov_flag):
+                base_cov = sum([int(matching) for matching in cigar_match_re.findall(match[5])])
+                if (base_cov):
+                    base_cov_dict[seq_name] = base_cov_dict.get(seq_name, 0) + base_cov
+            parsed_reads += 1
         BtLog.progress(parsed_reads, progress_unit, reads_total)
     BtLog.progress(reads_total, progress_unit, reads_total)
-
-    if not int(reads_mapped) == int(parsed_reads):
-        print warn_d['3'] % (reads_mapped, parsed_reads)
-    return base_cov_dict, reads_total, parsed_reads
+    return base_cov_dict, read_cov_dict, reads_total, parsed_reads
 
 def parseBam(bam_f, fasta_dict):
-    base_cov_dict, reads_total, reads_mapped = readBam(bam_f, set(fasta_dict.keys()))
+    base_cov_dict, read_cov_dict, reads_total, reads_mapped = readBam(bam_f, set(fasta_dict.keys()))
     if reads_total == 0:
         print BtLog.warn_d['4'] % bam_f
     for name, base_cov in base_cov_dict.items():
-        fasta_dict[name].cov = base_cov / fasta_dict[name].agct_count
-    return fasta_dict
+        fasta_dict[name].base_cov = base_cov / fasta_dict[name].agct_count
+    for name, read_cov in read_cov_dict.items():
+        fasta_dict[name].read_cov = read_cov
+    return fasta_dict, reads_total, reads_mapped
 
-def writeCov(fasta_dict, out_f):
+def writeCov(fasta_dict, reads_total, reads_mapped, out_f):
     with open(out_f, 'w') as fh:
+        fh.write("# Total Reads = %s\n" % (reads_total))
+        fh.write("# Mapped Reads = %s\n" % (reads_mapped))
+        fh.write("# Unmapped Reads = %s\n" % (reads_total - reads_mapped))
+        fh.write("# Parameters : MQ = %s, No_base_cov_flag = %s\n" % (mq, no_base_cov_flag))
+        fh.write("# %s\t%s\t%s\n" % ("contig_id", "read_cov", "base_cov"))
         for name, fasta_obj in fasta_dict.items():
-            fh.write("%s\t%s\n" % (name, fasta_obj.cov))
+            fh.write("%s\t%s\t%s\n" % (name, fasta_obj.read_cov, fasta_obj.base_cov))
 
 if __name__ == '__main__':
     args = docopt(__doc__)
-    
+
     fasta_f = args['--infile']
     bam_f = args['--bam']
     out_f = os.path.basename(bam_f) + ".cov"
+    mq = int(args['--mq'])
+    no_base_cov_flag = args['--no_base_cov']
 
     fasta_dict = parseFasta(fasta_f)
-    fasta_dict = parseBam(bam_f, fasta_dict)
-    writeCov(fasta_dict, out_f)
+    fasta_dict, reads_total, reads_mapped = parseBam(bam_f, fasta_dict)
+    writeCov(fasta_dict, reads_total, reads_mapped, out_f)
 
diff --git a/lib/BtCore.py b/lib/BtCore.py
index f78586e..82cdaec 100644
--- a/lib/BtCore.py
+++ b/lib/BtCore.py
@@ -14,14 +14,14 @@ class BlobDB holds all information parsed from files
     def __init__(self, title):
         self.title = title
         self.assembly_f = ''
-        self.dict_of_blobs = {}  
+        self.dict_of_blobs = {}
         self.order_of_blobs = []
         self.set_of_taxIds = set()
         self.lineages = {}
         self.length = 0
         self.seqs = 0
         self.n_count = 0
-        self.covLibs = {} 
+        self.covLibs = {}
         self.hitLibs = {}
         self.nodesDB_f = ''
         self.taxrules = []
@@ -36,8 +36,8 @@ def view(self, out_f, ranks, taxrule, hits_flag, seqs):
         header += "%s\n" % "\n".join("## " + hitLib['name'] + " : " + hitLib["f"] for hitLib in self.hitLibs.values())
         header += "## nodesDB : %s\n" % self.nodesDB_f
         header += "## taxrule : %s\n" % taxrule
-        header += "##\n"    
-        header += "# %s" % sep.join(map(str, [ "name", "length", "GC", "N"  ])) 
+        header += "##\n"
+        header += "# %s" % sep.join(map(str, [ "name", "length", "GC", "N"  ]))
         header += "%s%s" % (sep, sep.join([cov_lib_name for cov_lib_name in self.covLibs]))
         if (len(self.covLibs)) > 1:
             header += "%s%s" % (sep, "cov_sum")
@@ -59,10 +59,10 @@ def view(self, out_f, ranks, taxrule, hits_flag, seqs):
         else:
             with open(out_f, 'w') as fh:
                 fh.write(header + body)
-    
+
     def getViewLine(self, blob, taxrule, ranks, sep, hits_flag):
         line = ''
-        line += "\n%s" % sep.join(map(str, [ blob['name'], blob['length'], blob['gc'], blob['n_count']  ])) 
+        line += "\n%s" % sep.join(map(str, [ blob['name'], blob['length'], blob['gc'], blob['n_count']  ]))
         line += sep
         line += "%s" % sep.join(map(str, [ blob['covs'][covLib] for covLib in self.covLibs]))
         if len(self.covLibs) > 1:
@@ -81,7 +81,7 @@ def getViewLine(self, blob, taxrule, ranks, sep, hits_flag):
                         line += "%s=" % tax_lib_name
                         sum_dict = {}
                         for hit in blob['hits'][tax_lib_name]:
-                            tax_rank = self.lineages[hit['taxId']][rank] 
+                            tax_rank = self.lineages[hit['taxId']][rank]
                             sum_dict[tax_rank] = sum_dict.get(tax_rank, 0.0) + hit['score']
                         line += "%s" % "|".join([":".join(map(str, [tax_rank, sum_dict[tax_rank]])) for tax_rank in sorted(sum_dict, key=sum_dict.get, reverse=True)])
                     else:
@@ -110,7 +110,7 @@ def load(self, BlobDb_f):
         for k, v in blobDict.items():
             setattr(self, k, v)
         self.set_of_taxIds = blobDict['lineages'].keys()
-        #for k, v in self.__dict__.items(): 
+        #for k, v in self.__dict__.items():
         #    print k, type(v), v # this seems to work
 
         #self.title = blobDict['title']
@@ -119,7 +119,7 @@ def load(self, BlobDb_f):
         #self.lineages = blobDict['lineages']
         #self.set_of_taxIds = blobDict['lineages'].keys()
         #self.order_of_blobs = blobDict['order_of_blobs']
-        #self.dict_of_blobs = blobDict['dict_of_blobs'] 
+        #self.dict_of_blobs = blobDict['dict_of_blobs']
         #self.length = int(blobDict['length'])
         #self.seqs = int(blobDict['seqs'])
         #self.n_count = int(blobDict['n_count'])
@@ -129,14 +129,14 @@ def load(self, BlobDb_f):
 
         # self.title = title
         # self.assembly_f = ''
-        # self.dict_of_blobs = {}  
+        # self.dict_of_blobs = {}
         # self.order_of_blobs = []
         # self.set_of_taxIds = set()
         # self.lineages = {}
         # self.length = 0
         # self.seqs = 0
         # self.n_count = 0
-        # self.covLibs = {} 
+        # self.covLibs = {}
         # self.hitLibs = {}
         # self.nodesDB_f = ''
         # self.taxrules = []
@@ -149,13 +149,13 @@ def getPlotData(self, rank, min_length, hide_nohits, taxrule, c_index, catcolour
         cov_lib_dict = self.covLibs
         #print cov_lib_dict
         cov_lib_names_l = self.covLibs.keys() # does not include cov_sum
-        
+
         if len(cov_lib_names_l) > 1:
             # more than one cov_lib, cov_sum_lib has to be created
             cov_lib_dict['sum'] = CovLibObj('sum', 'sum', None).__dict__ # ugly
             cov_lib_dict['sum']['reads_total'] = sum([cov_lib_dict[x]['reads_total'] for x in cov_lib_dict])
             cov_lib_dict['sum']['reads_mapped'] = sum([cov_lib_dict[x]['reads_mapped'] for x in cov_lib_dict])
-            
+
         #print self.covLibs
         #cov_libs_reads_total = {cov_lib : data['reads_total'] for cov_lib, data in self.covLibs.items()}
         #print cov_libs_reads_total # correct
@@ -164,32 +164,32 @@ def getPlotData(self, rank, min_length, hide_nohits, taxrule, c_index, catcolour
 
         for blob in self.dict_of_blobs.values():
             name, gc, length, group = blob['name'], blob['gc'], blob['length'], ''
-            
+
             if (catcolour_dict): # annotation with categories specified in catcolour
                 group = str(catcolour_dict[name])
             elif (c_index): # annotation with c_index instead of taxonomic group
                 group = str(blob['taxonomy'][taxrule][rank]['c_index'])
             else: # annotation with taxonomic group
                 group = str(blob['taxonomy'][taxrule][rank]['tax'])
-            
-            if not group in data_dict: 
+
+            if not group in data_dict:
                 data_dict[group] = {
-                                    'name' : [], 
-                                    'length' : [], 
-                                    'gc' : [], 
-                                    'covs' : {covLib : [] for covLib in cov_lib_dict.keys()},           # includes cov_sum if it exists 
+                                    'name' : [],
+                                    'length' : [],
+                                    'gc' : [],
+                                    'covs' : {covLib : [] for covLib in cov_lib_dict.keys()},           # includes cov_sum if it exists
                                     'reads_mapped' : {covLib : 0 for covLib in cov_lib_dict.keys()},    # includes cov_sum if it exists
                                     'count' : 0,
                                     'count_hidden' : 0,
                                     'count_visible' : 0,
-                                    'span': 0, 
-                                    'span_hidden' : 0, 
+                                    'span': 0,
+                                    'span_hidden' : 0,
                                     'span_visible' : 0,
                                     }
 
             data_dict[group]['count'] = data_dict[group].get('count', 0) + 1
             data_dict[group]['span'] = data_dict[group].get('span', 0) + int(length)
-            
+
             if ((hide_nohits) and group == 'no-hit') or length < min_length: # hidden
                 data_dict[group]['count_hidden'] = data_dict[group].get('count_hidden', 0) + 1
                 data_dict[group]['span_hidden'] = data_dict[group].get('span_hidden', 0) + int(length)
@@ -212,16 +212,16 @@ def getPlotData(self, rank, min_length, hide_nohits, taxrule, c_index, catcolour
                         max_cov = cov
 
                     # add cov of blob to group
-                    data_dict[group]['covs'][cov_lib].append(cov) 
+                    data_dict[group]['covs'][cov_lib].append(cov)
 
                     cov_sum += cov
 
                     # add readcov
-                    if cov_lib in blob['read_cov']:                 
+                    if cov_lib in blob['read_cov']:
                         reads_mapped = blob['read_cov'][cov_lib]
-                        data_dict[group]['reads_mapped'][cov_lib] += reads_mapped  
+                        data_dict[group]['reads_mapped'][cov_lib] += reads_mapped
                         reads_mapped_sum += reads_mapped
-                
+
                 if len(cov_lib_names_l) > 1:
                     if cov_sum < 0.02 :
                         cov_sum = 0.02
@@ -232,8 +232,8 @@ def getPlotData(self, rank, min_length, hide_nohits, taxrule, c_index, catcolour
                         data_dict[group]['reads_mapped']['sum'] += reads_mapped_sum
         #if len(cov_lib_names_l) > 1:
         #    for cov_lib, data in self.covLibs.items():
-        #        cov_libs_reads_total['cov_sum'] = cov_libs_reads_total.get('cov_sum', 0) + data['reads_total'] 
-        
+        #        cov_libs_reads_total['cov_sum'] = cov_libs_reads_total.get('cov_sum', 0) + data['reads_total']
+
 
         #for group in data_dict:
         #    print "#", group
@@ -246,7 +246,7 @@ def addCovLib(self, covLib):
         self.covLibs[covLib.name] = covLib
         for blObj in self.dict_of_blobs.values():
             blObj.addCov(covLib.name, 0.0)
-    
+
     def parseFasta(self, fasta_f, fasta_type):
         print BtLog.status_d['1'] % ('FASTA', fasta_f)
         self.assembly_f = abspath(fasta_f)
@@ -260,7 +260,7 @@ def parseFasta(self, fasta_f, fasta_type):
                 self.seqs += 1
                 self.length += blObj.length
                 self.n_count += blObj.n_count
-                
+
                 if (fasta_type):
                     cov = BtIO.parseCovFromHeader(fasta_type, blObj.name)
                     self.covLibs[fasta_type].cov_sum += cov
@@ -270,16 +270,15 @@ def parseFasta(self, fasta_f, fasta_type):
                 self.dict_of_blobs[blObj.name] = blObj
             else:
                 BtLog.error('5', blObj.name)
-        
+
         if self.seqs == 0 or self.length == 0:
             BtLog.error('1')
-            
+
     def parseCovs(self, covLibObjs):
         for covLib in covLibObjs:
             self.addCovLib(covLib)
             print BtLog.status_d['1'] % (covLib.name, covLib.f)
             if covLib.fmt == 'bam' or covLib.fmt == 'sam':
-                
                 base_cov_dict = {}
                 if covLib.fmt == 'bam':
                     base_cov_dict, covLib.reads_total, covLib.reads_mapped, read_cov_dict = BtIO.readBam(covLib.f, set(self.dict_of_blobs))
@@ -305,17 +304,19 @@ def parseCovs(self, covLibObjs):
                     self.dict_of_blobs[name].addReadCov(covLib.name, read_cov_dict[name])
 
             elif covLib.fmt == 'cov':
-                cov_dict = BtIO.readCov(covLib.f, set(self.dict_of_blobs))
-                if not len(cov_dict) == self.seqs:
+                base_cov_dict, covLib.reads_total, covLib.reads_mapped, read_cov_dict = BtIO.readCov(covLib.f, set(self.dict_of_blobs))
+                #cov_dict = BtIO.readCov(covLib.f, set(self.dict_of_blobs))
+                if not len(base_cov_dict) == self.seqs:
                     print BtLog.warn_d['4'] % covLib.f
-                for name, cov in cov_dict.items():
+                for name, cov in base_cov_dict.items():
                     covLib.cov_sum += cov
                     self.dict_of_blobs[name].addCov(covLib.name, cov)
+                    self.dict_of_blobs[name].addReadCov(covLib.name, read_cov_dict[name])
             else:
-                pass        
+                pass
             covLib.mean_cov = covLib.cov_sum/self.seqs
             if covLib.cov_sum == 0.0:
-                BtLog.error('26', covLib.name)
+                print BtLog.warn_d['6'] % (covLib.name)
             self.covLibs[covLib.name] = covLib
 
 
@@ -327,10 +328,10 @@ def parseHits(self, hitLibs):
             for hitDict in BtIO.readTax(hitLib.f, set(self.dict_of_blobs)):
                 if ";" in hitDict['taxId']:
                     hitDict['taxId'] = hitDict['taxId'].split(";")[0]
-                    print BtLog.warn['5'] % (hitDict['name'], hitLib)
+                    print BtLog.warn_d['5'] % (hitDict['name'], hitLib)
                 self.set_of_taxIds.add(hitDict['taxId'])
                 self.dict_of_blobs[hitDict['name']].addHits(hitLib.name, hitDict)
-    
+
     def computeTaxonomy(self, taxrules, nodesDB):
         tree_lists = BtTax.getTreeList(self.set_of_taxIds, nodesDB)
         self.lineages = BtTax.getLineages(tree_lists, nodesDB)
@@ -344,14 +345,14 @@ def computeTaxonomy(self, taxrules, nodesDB):
                     blObj.taxonomy[taxrule] = BtTax.taxRule(taxrule, blObj.hits, self.lineages)
                 else:
                     blObj.taxonomy[taxrule] = BtTax.noHit()
-    
+
     def getBlobs(self):
         for blObj in [self.dict_of_blobs[key] for key in self.order_of_blobs]:
             yield blObj
 
 class BlObj():
     def __init__(self, name, seq):
-        self.name = name 
+        self.name = name
         self.length = len(seq)
         self.n_count = seq.count('N')
         self.agct_count = self.length - self.n_count
@@ -360,7 +361,7 @@ def __init__(self, name, seq):
         self.read_cov = {}
         self.hits = {}
         self.taxonomy = {}
-        
+
     def calculateGC(self, seq):
         return float((seq.count('G') + seq.count('C') ) / self.agct_count \
                      if self.agct_count > 0 else 0.0)
@@ -393,4 +394,4 @@ def __init__(self, name, fmt, f):
         self.f = abspath(f) if (f) else ''
 
 if __name__ == '__main__':
-    pass
\ No newline at end of file
+    pass
diff --git a/lib/BtIO.py b/lib/BtIO.py
index 3f61faa..94879f2 100644
--- a/lib/BtIO.py
+++ b/lib/BtIO.py
@@ -146,22 +146,52 @@ def parseCovFromHeader(fasta_type, header ):
         pass
 
 def readCov(infile, set_of_blobs):
-    cov_line_re = re.compile(r"^(\S+)\t(\d+\.*\d*)")
-    cov_dict = {}
+    old_cov_line_re = re.compile(r"^(\S+)\t(\d+\.*\d*)")
+    base_cov_dict = {}
+
+    cov_line_re = re.compile(r"^(\S+)\t(\d+\.*\d*)\t(\d+\.*\d*)")
+    reads_total = 0
+    reads_mapped = 0
+    read_cov_dict = {}
+
     seqs_parsed = 0
     progress_unit = 1
+    old_format = 1
     with open(infile) as fh:
         for line in fh:
-            match = cov_line_re.search(line)
-            if match:
-                seqs_parsed += 1
-                name, cov = match.group(1), float(match.group(2))
-                if name not in set_of_blobs:
-                    print BtLog.warn_d['2'] % (name, infile)
-                cov_dict[name] = cov
+            if line.startswith("#"):
+                old_format = 0
+            if old_format == 0:
+                if line.startswith("# Total Reads"):
+                    reads_total = int(line.split(" = ")[1])
+                elif line.startswith("# Mapped Reads"):
+                    reads_mapped = int(line.split(" = ")[1])
+                elif line.startswith("# Unmapped Reads"):
+                    pass
+                elif line.startswith("# Parameters"):
+                    pass
+                elif line.startswith("# contig_id"):
+                    pass
+                else:
+                    match = cov_line_re.search(line)
+                    if match:
+                        seqs_parsed += 1
+                        name, read_cov, base_cov = match.group(1), int(match.group(2)), float(match.group(3))
+                        if name not in set_of_blobs:
+                            print BtLog.warn_d['2'] % (name, infile)
+                        read_cov_dict[name] = read_cov
+                        base_cov_dict[name] = base_cov
+            else:
+                match = old_cov_line_re.search(line)
+                if match:
+                    seqs_parsed += 1
+                    name, base_cov = match.group(1), float(match.group(2))
+                    if name not in set_of_blobs:
+                        print BtLog.warn_d['2'] % (name, infile)
+                    base_cov_dict[name] = base_cov
             BtLog.progress(seqs_parsed, progress_unit, len(set_of_blobs))
-        BtLog.progress(len(set_of_blobs), progress_unit, len(set_of_blobs))
-    return cov_dict
+        #BtLog.progress(len(set_of_blobs), progress_unit, len(set_of_blobs))
+    return base_cov_dict, reads_total, reads_mapped, read_cov_dict
 
 def checkCas(infile):
     print BtLog.status_d['12']
diff --git a/lib/BtLog.py b/lib/BtLog.py
index c00c424..044e8e4 100644
--- a/lib/BtLog.py
+++ b/lib/BtLog.py
@@ -56,8 +56,7 @@ def progress(iteration, steps, max_value):
     '22' : '[ERROR:22]\t: Tax file %s seems to have no taxids.',
     '23' : '[ERROR:23]\t: Catcolour file %s does not seem to have the right format.',
     '24' : '[ERROR:24]\t: Catcolour file incompatible with c-index colouring.',
-    '25' : '[ERROR:25]\t: Cov file %s does not seem to have the right format.',
-    '26' : '[ERROR:26]\t: The cumulative coverage of cov lib %s is 0.0. Please check the mapping/coverage file.'
+    '25' : '[ERROR:25]\t: Cov file %s does not seem to have the right format.'
 }
 
 warn_d = {
@@ -66,7 +65,8 @@ def progress(iteration, steps, max_value):
     '2' : '[WARN]\t: %s in file %s is not part of the assembly',
     '3' : '[WARN]\t: samtools flagstat reported %s mapped reads, %s mapped reads were parsed',
     '4' : '[WARN]\t: No coverage data found in %s',
-    '5' : '[WARN]\t: Hit for sequence %s in tax file %s has multiple taxIds, only first one is used. '
+    '5' : '[WARN]\t: Hit for sequence %s in tax file %s has multiple taxIds, only first one is used.',
+    '6' : '[WARN]\t: The cumulative coverage of cov lib %s is 0.0. Please check the mapping/coverage file.'
 }
 status_d = {
     '1' : '[STATUS]\t: Parsing %s - %s',
diff --git a/lib/BtPlot.py b/lib/BtPlot.py
index 23c7e08..9ee8005 100644
--- a/lib/BtPlot.py
+++ b/lib/BtPlot.py
@@ -74,16 +74,26 @@ def parseCatColour(catcolour_f):
 
 def parseCovFile(cov_f):
     cov_dict = {}
+    old_format = 1
+    seq_name = ''
+    cov = 0.0
     with open(cov_f) as fh:
         for l in fh:
-            try:
-                seq_name, cov = l.rstrip("\n").split("\t")
-                if float(cov) < 0.02:
-                    cov_dict[seq_name] = 0.02
-                else:
-                    cov_dict[seq_name] = float(cov)
-            except:
-                BtLog.error('25', cov_f)
+            if l.startswith("#"):
+                old_format = 0
+            else:
+                try:
+                    field = l.rstrip("\n").split("\t")
+                    if not (old_format):
+                        seq_name, cov = field[0], field[2]
+                    else:
+                        seq_name, cov = field[0], field[1]
+                    if float(cov) < 0.02:
+                        cov_dict[seq_name] = 0.02
+                    else:
+                        cov_dict[seq_name] = float(cov)
+                except:
+                    BtLog.error('25', cov_f)
     return cov_dict
 
 
@@ -407,7 +417,7 @@ def plotScatterCov(self, cov_lib, cov_dict, info_flag, x_label, y_label, scale,
         for group in self.plot_order:
             i += 1
             group_length_array = array(self.stats[group]['length'])
-            group_cov_y_array = array([cov_dict[name] for name in self.stats[group]['name']]) # fix it!
+            group_cov_y_array = array([cov_dict.get(name, 0.02) for name in self.stats[group]['name']])
             group_cov_x_array = array(self.stats[group]['covs'][cov_lib])
             # calculate values for legend
             if len(group_length_array) > 0: