Skip to content

Commit

Permalink
Add option to skip transcriptome quantification
Browse files Browse the repository at this point in the history
For those interested in only the assembled sequences, this option can
substantially reduce runtime by skipping the Kallisto build
transcriptome index step.
  • Loading branch information
dcroote committed Nov 15, 2018
1 parent c32f80a commit cfc9d93
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 4 deletions.
9 changes: 7 additions & 2 deletions bracerlib/bracer_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -2063,7 +2063,7 @@ def quantify_with_kallisto(kallisto, cell, output_dir, cell_name,
kallisto_base_transcriptome, fastq1, fastq2, ncores,
should_resume, single_end, fragment_length,
fragment_sd, trimmed_fastq1, trimmed_fastq2,
keep_trimmed_reads):
keep_trimmed_reads, no_transcriptome_quant):

print("##Running Kallisto##")
if should_resume:
Expand Down Expand Up @@ -2097,8 +2097,13 @@ def quantify_with_kallisto(kallisto, cell, output_dir, cell_name,
output_transcriptome = "{}/expression_quantification/kallisto_index/{}_transcriptome.fa".format(
output_dir, cell_name)

if no_transcriptome_quant:
idx_filenames = [fasta_filename]
else:
idx_filenames = [kallisto_base_transcriptome, fasta_filename]

with open(output_transcriptome, 'w') as outfile:
for fname in [kallisto_base_transcriptome, fasta_filename]:
for fname in idx_filenames:
with open(fname) as infile:
for line in infile:
outfile.write(line)
Expand Down
20 changes: 18 additions & 2 deletions bracerlib/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,11 @@ def __init__(self, **kwargs):
parser.add_argument('--keep_trimmed_reads',
help='Do not delete the output files from the trimming step.',
action = "store_true")
parser.add_argument('--no_transcriptome_quant',
help='Kallisto quantification is performed only '
'on the assembled sequences and not the entire '
'transcriptome.',
action = "store_true")
parser.add_argument('cell_name', metavar="<CELL_NAME>",
help='name of cell for file labels')
parser.add_argument('output_dir', metavar="<OUTPUT_DIR>",
Expand Down Expand Up @@ -260,6 +265,7 @@ def __init__(self, **kwargs):
self.max_junc_len = args.max_junc_len
self.no_trimming = args.no_trimming
self.keep_trimmed_reads = args.keep_trimmed_reads
self.no_transcriptome_quant = args.no_transcriptome_quant
config_file = args.config_file


Expand All @@ -281,6 +287,7 @@ def __init__(self, **kwargs):
self.max_junc_len = kwargs.get('max_junc_len')
self.no_trimming = kwargs.get('no_trimming')
self.keep_trimmed_reads = kwargs.get('keep_trimmed_reads')
self.no_transcriptome_quant = kwargs.get('no_transcriptome_quant')
config_file = kwargs.get('config_file')

self.trimmed_fastq1 = None
Expand Down Expand Up @@ -606,7 +613,8 @@ def quantify(self, cell):
kallisto_base_transcriptome, self.fastq1, self.fastq2,
self.ncores, self.resume_with_existing_files, self.single_end,
self.fragment_length, self.fragment_sd, self.trimmed_fastq1,
self.trimmed_fastq2, self.keep_trimmed_reads)
self.trimmed_fastq2, self.keep_trimmed_reads,
self.no_transcriptome_quant)
print()

counts = bracer_func.load_kallisto_counts(
Expand Down Expand Up @@ -1887,6 +1895,11 @@ def __init__(self, **kwargs):
parser.add_argument('--infer_lineage', help='Construct lineage trees '
'for clone groups shown in clonal network',
action ="store_true")
parser.add_argument('--no_transcriptome_quant',
help='Kallisto quantification is performed only '
'on the assembled sequences and not the entire '
'transcriptome.',
action = "store_true")
parser.add_argument('--output', '-o',
help='Directory for output data of test')
#parser.add_argument('--no_trimming', help='Do not trim reads',
Expand All @@ -1902,6 +1915,7 @@ def __init__(self, **kwargs):
self.no_networks = args.no_networks
self.resume = args.resume_with_existing_files
self.infer_lineage = args.infer_lineage
self.no_transcriptome_quant = args.no_transcriptome_quant
#self.no_trimming = args.no_trimming
else:
self.resource_dir = kwargs.get('resource_dir')
Expand All @@ -1912,6 +1926,7 @@ def __init__(self, **kwargs):
self.no_networks = kwargs.get('no_networks')
self.resume = kwargs.get('resume_with_existing_files')
self.infer_lineage = kwargs.get('infer_lineage')
self.no_transcriptome_quant = kwargs.get('no_transcriptome_quant')
#self.no_trimming = kwargs.get('no_trimming')

self.trimmed_fastq1 = None
Expand Down Expand Up @@ -1942,7 +1957,8 @@ def run(self):
single_end=False, fragment_length=False, fragment_sd=False,
loci=['H', 'K', 'L'], max_junc_len=100,
no_trimming=True, trimmed_fastq1=self.trimmed_fastq1,
trimmed_fastq2=self.trimmed_fastq2).run()
trimmed_fastq2=self.trimmed_fastq2,
no_transcriptome_quant=self.no_transcriptome_quant).run()

Summariser(resource_dir=self.resource_dir, config_file=self.config_file,
use_unfiltered=False, graph_format=self.graph_format,
Expand Down

0 comments on commit cfc9d93

Please sign in to comment.