Add option to skip transcriptome quantification

For those interested in only the assembled sequences, this option can substantially reduce runtime by skipping the Kallisto build transcriptome index step.
Teichlab · Nov 15, 2018 · cfc9d93 · cfc9d93
1 parent c32f80a
commit cfc9d93
Show file tree

Hide file tree

Showing 2 changed files with 25 additions and 4 deletions.
diff --git a/bracerlib/bracer_func.py b/bracerlib/bracer_func.py
@@ -2063,7 +2063,7 @@ def quantify_with_kallisto(kallisto, cell, output_dir, cell_name,
                            kallisto_base_transcriptome, fastq1, fastq2, ncores, 
                            should_resume, single_end, fragment_length, 
                            fragment_sd, trimmed_fastq1, trimmed_fastq2,
-                           keep_trimmed_reads):
+                           keep_trimmed_reads, no_transcriptome_quant):
 
     print("##Running Kallisto##")
     if should_resume:
@@ -2097,8 +2097,13 @@ def quantify_with_kallisto(kallisto, cell, output_dir, cell_name,
     output_transcriptome = "{}/expression_quantification/kallisto_index/{}_transcriptome.fa".format(
                                                             output_dir, cell_name)
 
+    if no_transcriptome_quant:
+        idx_filenames = [fasta_filename]
+    else:
+        idx_filenames = [kallisto_base_transcriptome, fasta_filename]
+
     with open(output_transcriptome, 'w') as outfile:
-        for fname in [kallisto_base_transcriptome, fasta_filename]:
+        for fname in idx_filenames:
             with open(fname) as infile:
                 for line in infile:
                     outfile.write(line)

diff --git a/bracerlib/tasks.py b/bracerlib/tasks.py
@@ -233,6 +233,11 @@ def __init__(self, **kwargs):
             parser.add_argument('--keep_trimmed_reads',
                                 help='Do not delete the output files from the trimming step.',
                                 action = "store_true")
+            parser.add_argument('--no_transcriptome_quant',
+                                help='Kallisto quantification is performed only '
+                                'on the assembled sequences and not the entire '
+                                'transcriptome.',
+                                action = "store_true")
             parser.add_argument('cell_name', metavar="<CELL_NAME>", 
                                 help='name of cell for file labels')
             parser.add_argument('output_dir', metavar="<OUTPUT_DIR>",
@@ -260,6 +265,7 @@ def __init__(self, **kwargs):
             self.max_junc_len = args.max_junc_len
             self.no_trimming = args.no_trimming
             self.keep_trimmed_reads = args.keep_trimmed_reads
+            self.no_transcriptome_quant = args.no_transcriptome_quant
             config_file = args.config_file
 
 
@@ -281,6 +287,7 @@ def __init__(self, **kwargs):
             self.max_junc_len = kwargs.get('max_junc_len')
             self.no_trimming = kwargs.get('no_trimming')
             self.keep_trimmed_reads = kwargs.get('keep_trimmed_reads')
+            self.no_transcriptome_quant = kwargs.get('no_transcriptome_quant')
             config_file = kwargs.get('config_file')
 
         self.trimmed_fastq1 = None
@@ -606,7 +613,8 @@ def quantify(self, cell):
                 kallisto_base_transcriptome, self.fastq1, self.fastq2, 
                 self.ncores, self.resume_with_existing_files, self.single_end, 
                 self.fragment_length, self.fragment_sd, self.trimmed_fastq1,
-                self.trimmed_fastq2, self.keep_trimmed_reads)
+                self.trimmed_fastq2, self.keep_trimmed_reads,
+                self.no_transcriptome_quant)
         print()
 
         counts = bracer_func.load_kallisto_counts(
@@ -1887,6 +1895,11 @@ def __init__(self, **kwargs):
             parser.add_argument('--infer_lineage', help='Construct lineage trees '
                                 'for clone groups shown in clonal network', 
                                 action ="store_true")
+            parser.add_argument('--no_transcriptome_quant',
+                                help='Kallisto quantification is performed only '
+                                'on the assembled sequences and not the entire '
+                                'transcriptome.',
+                                action = "store_true")
             parser.add_argument('--output', '-o', 
                                 help='Directory for output data of test')
             #parser.add_argument('--no_trimming', help='Do not trim reads',
@@ -1902,6 +1915,7 @@ def __init__(self, **kwargs):
             self.no_networks = args.no_networks
             self.resume = args.resume_with_existing_files
             self.infer_lineage = args.infer_lineage
+            self.no_transcriptome_quant = args.no_transcriptome_quant
             #self.no_trimming = args.no_trimming
         else:
             self.resource_dir = kwargs.get('resource_dir')
@@ -1912,6 +1926,7 @@ def __init__(self, **kwargs):
             self.no_networks = kwargs.get('no_networks')
             self.resume = kwargs.get('resume_with_existing_files')
             self.infer_lineage = kwargs.get('infer_lineage')
+            self.no_transcriptome_quant = kwargs.get('no_transcriptome_quant')
             #self.no_trimming = kwargs.get('no_trimming')
 
         self.trimmed_fastq1 = None
@@ -1942,7 +1957,8 @@ def run(self):
                       single_end=False, fragment_length=False, fragment_sd=False, 
                       loci=['H', 'K', 'L'], max_junc_len=100, 
                       no_trimming=True, trimmed_fastq1=self.trimmed_fastq1,
-                      trimmed_fastq2=self.trimmed_fastq2).run()
+                      trimmed_fastq2=self.trimmed_fastq2,
+                      no_transcriptome_quant=self.no_transcriptome_quant).run()
 
         Summariser(resource_dir=self.resource_dir, config_file=self.config_file, 
                    use_unfiltered=False, graph_format=self.graph_format,