Skip to content

Commit

Permalink
Minor changes
Browse files Browse the repository at this point in the history
  • Loading branch information
snayfach committed Nov 1, 2016
1 parent 09aaa4d commit 51e31ef
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 24 deletions.
3 changes: 1 addition & 2 deletions midas/merge/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ def annotate_site(site, genes, gene_index, contigs):
# gene_index: current position in list of genes; global variable
site.snp_types = {}
site.amino_acids = {}

while True:
# 1. fetch next gene
# if there are no more genes, snp must be non-coding so break
Expand All @@ -120,7 +119,7 @@ def annotate_site(site, genes, gene_index, contigs):
# 4. otherwise, snp must be in gene
# annotate site (1D-4D) and snp (SYN, NS)
else:
site.gene_id = gene['gene_id'].split('|')[-1]
site.gene_id = gene['gene_id']
site.ref_codon, site.codon_pos = fetch_ref_codon(site, gene)
if not all([_ in ['A','T','C','G'] for _ in site.ref_codon]): # check for invalid bases in codon
site.site_type = 'NA'; site.gene_id = ''
Expand Down
2 changes: 1 addition & 1 deletion midas/merge/merge_snps.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def filter_snp_matrix(species_id, samples, args):
write_matrices(site, matrices)

def write_readme(args, sp):
outfile = open('%s/%s/README' % (args['outdir'], sp.id), 'w')
outfile = open('%s/%s/readme.txt' % (args['outdir'], sp.id), 'w')
outfile.write("""
Description of output files and file formats from 'merge_midas.py snps'
Expand Down
2 changes: 1 addition & 1 deletion midas/merge/merge_species.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def identify_samples(args):
return samples

def write_readme(args):
outfile = open('%s/README' % args['outdir'], 'w')
outfile = open('%s/readme.txt' % args['outdir'], 'w')
outfile.write("""
Description of output files and file formats from 'merge_midas.py species'
Expand Down
9 changes: 2 additions & 7 deletions midas/merge/snp_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,13 @@ def __init__(self, files, samples, info=None):
self.id, self.depth = next(files['depth'])
self.id, self.alt_allele = next(files['alt_allele'])
self.info = next(info) if info else None
self.ref_id, self.ref_pos, self.ref_allele = self.parse_id()
self.ref_id, self.ref_pos, self.ref_allele = self.id.rsplit('|', 2)
self.ref_pos = int(self.ref_pos)
self.samples = samples

except StopIteration:
self.id = None

def parse_id(self):
ref_id = self.id.rsplit('|')[0]
ref_pos = int(self.id.split('|')[1])
ref_allele = self.id.split('|')[2]
return ref_id, ref_pos, ref_allele

def sample_values(self): # return a dic mapping sample id to site values
d = {}
for index, sample in enumerate(self.samples):
Expand Down
12 changes: 9 additions & 3 deletions scripts/merge_midas.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,13 +134,18 @@ def genes_arguments():
species.add_argument('--species_id', dest='species_id', type=str, metavar='CHAR',
help="""Comma-separated list of species ids""")
species.add_argument('--max_species', type=int, metavar='INT',
help="""Maximum number of species to merge. useful for testing (use all)""")
help="""Maximum number of species to merge. Useful for testing (use all)""")
sample = parser.add_argument_group('Sample filters (select subset of samples from INPUT)')
sample.add_argument('--sample_depth', type=float, default=1.0, metavar='FLOAT',
help="""Minimum read-depth across all genes with non-zero coverage (1.0)""")
sample.add_argument('--max_samples', type=int, metavar='INT',
help="""Maximum number of samples to process. useful for testing (use all)""")
gene = parser.add_argument_group('Presence/Absence')
help="""Maximum number of samples to process. Useful for testing (use all)""")
gene = parser.add_argument_group('Quantification')
gene.add_argument('--cluster_pid', type=str, dest='cluster_pid', default='95', choices=['75', '80', '85', '90', '95', '99'],
help="""In the database, pan-genomes are defined at 6 different %% identity clustering cutoffs
CLUSTER_PID allows you to quantify gene content for any of these sets of gene clusters
By default, gene content is reported for genes clustered at 95%% identity (95)
""")
gene.add_argument('--min_copy', type=float, default=0.35, metavar='FLOAT',
help="""Genes >= MIN_COPY are classified as present
Genes < MIN_COPY are classified as absent (0.35)""")
Expand Down Expand Up @@ -295,6 +300,7 @@ def print_genes_arguments(args):
print (" keep samples with >=%s mean coverage across genes with non-zero coverage" % args['sample_depth'])
if args['max_samples']: print (" keep <= %s samples" % args['max_samples'])
print ("Gene quantification criterea:")
print (" quantify genes clustered at %s%% identity" % args['cluster_pid'])
print (" present (1): genes with copy number >= %s" % args['min_copy'])
print (" absent (0): genes with copy number < %s" % args['min_copy'])
print ("===============================")
Expand Down
22 changes: 12 additions & 10 deletions scripts/run_midas.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,10 +148,12 @@ def print_species_arguments(args):
lines.append("Input reads (2nd mate): %s" % args['m2'])
lines.append("Remove temporary files: %s" % args['remove_temp'])
lines.append("Word size for database search: %s" % args['word_size'])
if args['mapid']: lines.append("Minimum mapping identity: %s" % args['mapid'])
if args['mapid']:
lines.append("Minimum mapping identity: %s" % args['mapid'])
lines.append("Minimum mapping alignment coverage: %s" % args['aln_cov'])
lines.append("Number of reads to use from input: %s" % (args['max_reads'] if args['max_reads'] else 'use all'))
if args['read_length']: lines.append("Trim reads to %s-bp and discard reads with length < %s-bp" % (args['read_length'], args['read_length']))
if args['read_length']:
lines.append("Trim reads from 3'/right end to %s-bp and discard reads with length < %s-bp" % (args['read_length'], args['read_length']))
lines.append("Number of threads for database search: %s" % args['threads'])
lines.append("================================")
args['log'].write('\n'.join(lines)+'\n')
Expand Down Expand Up @@ -258,11 +260,11 @@ def gene_arguments():
map.add_argument('--mapid', type=float, metavar='FLOAT',
default=94.0, help='Discard reads with alignment identity < MAPID (94.0)')
map.add_argument('--mapq', type=int, metavar='INT',
default=20, help='Discard reads with mapping quality < MAPQ (10)')
default=0, help=argparse.SUPPRESS) # help='Discard reads with mapping quality < MAPQ (0)'
map.add_argument('--aln_cov', type=float, metavar='FLOAT',
default=0.75, help='Discard reads with alignment coverage < ALN_COV (0.75)')
map.add_argument('--trim', type=int, default=0, metavar='INT',
help='Trim N base-pairs from read-tails (0)')
help='Trim N base-pairs from 3\'/right end of read (0)')
args = vars(parser.parse_args())
if args['species_id']: args['species_id'] = args['species_id'].split(',')
return args
Expand Down Expand Up @@ -303,7 +305,7 @@ def print_gene_arguments(args):
lines.append(" minimum alignment coverage of reads: %s" % args['aln_cov'])
lines.append(" minimum read quality score: %s" % args['readq'])
lines.append(" minimum mapping quality score: %s" % args['mapq'])
lines.append(" trim %s base-pairs from read-tails" % args['trim'])
lines.append(" trim %s base-pairs from 3'/right end of read" % args['trim'])
lines.append("================================")
args['log'].write('\n'.join(lines)+'\n')
sys.stdout.write('\n'.join(lines)+'\n')
Expand Down Expand Up @@ -377,7 +379,7 @@ def snp_arguments():
snps.add_argument('--readq', type=int, metavar='INT',
default=20, help='Discard reads with mean quality < READQ (20)')
snps.add_argument('--trim', metavar='INT', type=int, default=0,
help='Trim N base-pairs from read-tails (0)')
help='Trim N base-pairs from 3\'/right end of read')
snps.add_argument('--discard', default=False, action='store_true',
help='Discard discordant read-pairs')
snps.add_argument('--baq', default=False, action='store_true',
Expand Down Expand Up @@ -424,7 +426,7 @@ def print_snp_arguments(args):
lines.append(" minimum mapping quality score: %s" % args['mapq'])
lines.append(" minimum base quality score: %s" % args['baseq'])
lines.append(" minimum read quality score: %s" % args['readq'])
lines.append(" trim %s base-pairs from read-tails" % args['trim'])
lines.append(" trim %s base-pairs from 3'/right end of read" % args['trim'])
if args['discard']: lines.append(" discard discordant read-pairs")
if args['baq']: lines.append(" enable BAQ (per-base alignment quality)")
if args['adjust_mq']: lines.append(" adjust MAPQ")
Expand Down Expand Up @@ -574,7 +576,7 @@ def check_snps(args):
sys.exit("\nError: BASEQ must be between 0 and 100\n")

def write_readme(program, args):
outfile = open('%s/%s/README' % (args['outdir'], program), 'w')
outfile = open('%s/%s/readme.txt' % (args['outdir'], program), 'w')
if program == 'species':
outfile.write("""
Description of output files and file formats from 'run_midas.py species'
Expand All @@ -599,7 +601,7 @@ def write_readme(program, args):
coverage: estimated genome-coverage (i.e. read-depth) of species in metagenome
relative_abundance: estimated relative abundance of species in metagenome
**Additional information for each species can be found in the reference database:
Additional information for each species can be found in the reference database:
%s/marker_genes
""" % args['db'])
elif program == 'genes':
Expand Down Expand Up @@ -638,7 +640,7 @@ def write_readme(program, args):
mean_coverage: average read-depth across genes with at least 1 mapped read
marker_coverage: median read-depth across 15 universal single copy genes
**Additional information for each species can be found in the reference database:
Additional information for each species can be found in the reference database:
%s/pan_genomes
""" % args['db'])
elif program == 'snps':
Expand Down

0 comments on commit 51e31ef

Please sign in to comment.