Skip to content

Commit

Permalink
Merge pull request #581 from nf-core/custom-script-licence
Browse files Browse the repository at this point in the history
Add custom script licences to all tools
  • Loading branch information
jfy133 authored Feb 12, 2024
2 parents 90c7007 + 7359174 commit 9a67aad
Show file tree
Hide file tree
Showing 12 changed files with 246 additions and 58 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### `Changed`

- [#581](https://github.com/nf-core/mag/pull/581) - Added explicit licence text to headers of all custom scripts (reported by @FriederikeHanssen and @maxibor, fix by @jfy133)

### `Fixed`

### `Dependencies`
Expand Down
82 changes: 67 additions & 15 deletions bin/combine_tables.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
#!/usr/bin/env python

## Originally written by Daniel Straub and Sabrina Krakau and released under the MIT license.
## See git repository (https://github.com/nf-core/mag) for full license text.


import sys
import argparse
import os.path
Expand All @@ -8,11 +12,25 @@

def parse_args(args=None):
parser = argparse.ArgumentParser()
parser.add_argument("-d", "--depths_summary", required=True, metavar="FILE", help="Bin depths summary file.")
parser.add_argument("-b", "--busco_summary", metavar="FILE", help="BUSCO summary file.")
parser.add_argument("-c", "--checkm_summary", metavar="FILE", help="CheckM summary file.")
parser.add_argument("-q", "--quast_summary", metavar="FILE", help="QUAST BINS summary file.")
parser.add_argument("-g", "--gtdbtk_summary", metavar="FILE", help="GTDB-Tk summary file.")
parser.add_argument(
"-d",
"--depths_summary",
required=True,
metavar="FILE",
help="Bin depths summary file.",
)
parser.add_argument(
"-b", "--busco_summary", metavar="FILE", help="BUSCO summary file."
)
parser.add_argument(
"-c", "--checkm_summary", metavar="FILE", help="CheckM summary file."
)
parser.add_argument(
"-q", "--quast_summary", metavar="FILE", help="QUAST BINS summary file."
)
parser.add_argument(
"-g", "--gtdbtk_summary", metavar="FILE", help="GTDB-Tk summary file."
)
parser.add_argument("-a", "--cat_summary", metavar="FILE", help="CAT table file.")
parser.add_argument(
"-o",
Expand Down Expand Up @@ -45,7 +63,14 @@ def parse_cat_table(cat_table):
for line in f:
maxcol = max(maxcol, len(line.split("\t")))

header = ["bin", "classification", "reason", "lineage", "lineage scores", "full lineage names"]
header = [
"bin",
"classification",
"reason",
"lineage",
"lineage scores",
"full lineage names",
]

df = pd.read_table(
cat_table,
Expand All @@ -55,7 +80,11 @@ def parse_cat_table(cat_table):
skiprows=1,
)
# merge all rank columns into a single column
df["CAT_rank"] = df.filter(regex="rank_\d+").apply(lambda x: ";".join(x.dropna()), axis=1).str.lstrip()
df["CAT_rank"] = (
df.filter(regex="rank_\d+")
.apply(lambda x: ";".join(x.dropna()), axis=1)
.str.lstrip()
)
# remove rank_* columns
df.drop(df.filter(regex="rank_\d+").columns, axis=1, inplace=True)

Expand All @@ -65,21 +94,34 @@ def parse_cat_table(cat_table):
def main(args=None):
args = parse_args(args)

if not args.busco_summary and not args.checkm_summary and not args.quast_summary and not args.gtdbtk_summary:
sys.exit("No summary specified! Please specify at least BUSCO, CheckM or QUAST summary.")
if (
not args.busco_summary
and not args.checkm_summary
and not args.quast_summary
and not args.gtdbtk_summary
):
sys.exit(
"No summary specified! Please specify at least BUSCO, CheckM or QUAST summary."
)

# GTDB-Tk can only be run in combination with BUSCO or CheckM
if args.gtdbtk_summary and not (args.busco_summary or args.checkm_summary):
sys.exit("Invalid parameter combination: GTDB-TK summary specified, but no BUSCO or CheckM summary!")
sys.exit(
"Invalid parameter combination: GTDB-TK summary specified, but no BUSCO or CheckM summary!"
)

# handle bin depths
results = pd.read_csv(args.depths_summary, sep="\t")
results.columns = ["Depth " + str(col) if col != "bin" else col for col in results.columns]
results.columns = [
"Depth " + str(col) if col != "bin" else col for col in results.columns
]
bins = results["bin"].sort_values().reset_index(drop=True)

if args.busco_summary:
busco_results = pd.read_csv(args.busco_summary, sep="\t")
if not bins.equals(busco_results["GenomeBin"].sort_values().reset_index(drop=True)):
if not bins.equals(
busco_results["GenomeBin"].sort_values().reset_index(drop=True)
):
sys.exit("Bins in BUSCO summary do not match bins in bin depths summary!")
results = pd.merge(
results, busco_results, left_on="bin", right_on="GenomeBin", how="outer"
Expand Down Expand Up @@ -107,7 +149,9 @@ def main(args=None):
]
checkm_results = pd.read_csv(args.checkm_summary, usecols=use_columns, sep="\t")
checkm_results["Bin Id"] = checkm_results["Bin Id"] + ".fa"
if not bins.equals(checkm_results["Bin Id"].sort_values().reset_index(drop=True)):
if not bins.equals(
checkm_results["Bin Id"].sort_values().reset_index(drop=True)
):
sys.exit("Bins in CheckM summary do not match bins in bin depths summary!")
results = pd.merge(
results, checkm_results, left_on="bin", right_on="Bin Id", how="outer"
Expand All @@ -116,7 +160,9 @@ def main(args=None):

if args.quast_summary:
quast_results = pd.read_csv(args.quast_summary, sep="\t")
if not bins.equals(quast_results["Assembly"].sort_values().reset_index(drop=True)):
if not bins.equals(
quast_results["Assembly"].sort_values().reset_index(drop=True)
):
sys.exit("Bins in QUAST summary do not match bins in bin depths summary!")
results = pd.merge(
results, quast_results, left_on="bin", right_on="Assembly", how="outer"
Expand All @@ -134,7 +180,13 @@ def main(args=None):
cat_results = parse_cat_table(args.cat_summary)
if len(set(cat_results["bin"].to_list()).difference(set(bins))) > 0:
sys.exit("Bins in CAT summary do not match bins in bin depths summary!")
results = pd.merge(results, cat_results[["bin", "CAT_rank"]], left_on="bin", right_on="bin", how="outer")
results = pd.merge(
results,
cat_results[["bin", "CAT_rank"]],
left_on="bin",
right_on="bin",
how="outer",
)

results.to_csv(args.out, sep="\t")

Expand Down
4 changes: 2 additions & 2 deletions bin/domain_classification.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env Rscript

# Written by Jim Downie and released under the MIT license.
# See git repository (https://github.com/nf-core/mag) for full license text.
## Written by Jim Downie and released under the MIT license.
## See git repository (https://github.com/nf-core/mag) for full license text.

library(optparse)
library(tidyverse)
Expand Down
15 changes: 12 additions & 3 deletions bin/filter_ssu.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#!/usr/bin/env python

## Originally written by Hadrien Gourlé and released under the MIT license.
## See git repository (https://github.com/nf-core/mag) for full license text.

from __future__ import print_function

import os
Expand Down Expand Up @@ -28,10 +31,16 @@ def filter(args):


def main():
parser = argparse.ArgumentParser(prog="filter_ssu.py", usage="filter ssu hits from refinem")
parser = argparse.ArgumentParser(
prog="filter_ssu.py", usage="filter ssu hits from refinem"
)
parser.add_argument("--evalue", help="evalue threshold")
parser.add_argument("ssu", metavar="ssu.tsv", help="ssu tsv file generated by refinem")
parser.add_argument("output", metavar="output.tsv", default="output.tsv", help="output file name")
parser.add_argument(
"ssu", metavar="ssu.tsv", help="ssu tsv file generated by refinem"
)
parser.add_argument(
"output", metavar="output.tsv", default="output.tsv", help="output file name"
)
parser.set_defaults(func=filter)
args = parser.parse_args()

Expand Down
35 changes: 28 additions & 7 deletions bin/get_mag_depths.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#!/usr/bin/env python

## Originally written by Sabrina Krakau and released under the MIT license.
## See git repository (https://github.com/nf-core/mag) for full license text.

import sys
import argparse
import os.path
Expand All @@ -14,7 +17,12 @@
def parse_args(args=None):
parser = argparse.ArgumentParser()
parser.add_argument(
"-b", "--bins", required=True, nargs="+", metavar="FILE", help="Bins: FASTA containing all contigs."
"-b",
"--bins",
required=True,
nargs="+",
metavar="FILE",
help="Bins: FASTA containing all contigs.",
)
parser.add_argument(
"-d",
Expand All @@ -23,9 +31,15 @@ def parse_args(args=None):
metavar="FILE",
help="(Compressed) TSV file containing contig depths for each sample: contigName, contigLen, totalAvgDepth, sample1_avgDepth, sample1_var [, sample2_avgDepth, sample2_var, ...].",
)
parser.add_argument("-a", "--assembler", required=True, type=str, help="Assembler name.")
parser.add_argument("-i", "--id", required=True, type=str, help="Sample or group id.")
parser.add_argument("-m", "--binner", required=True, type=str, help="Binning method.")
parser.add_argument(
"-a", "--assembler", required=True, type=str, help="Assembler name."
)
parser.add_argument(
"-i", "--id", required=True, type=str, help="Sample or group id."
)
parser.add_argument(
"-m", "--binner", required=True, type=str, help="Binning method."
)
return parser.parse_args(args)


Expand Down Expand Up @@ -56,7 +70,9 @@ def main(args=None):

# Initialize output files
n_samples = len(sample_names)
with open(args.assembler + "-" + args.binner + "-" + args.id + "-binDepths.tsv", "w") as outfile:
with open(
args.assembler + "-" + args.binner + "-" + args.id + "-binDepths.tsv", "w"
) as outfile:
print("bin", "\t".join(sample_names), sep="\t", file=outfile)

# for each bin, access contig depths and compute mean bin depth (for all samples)
Expand All @@ -77,10 +93,15 @@ def main(args=None):
all_depths[sample].append(contig_depths[sample])

binname = os.path.basename(file)
with open(args.assembler + "-" + args.binner + "-" + args.id + "-binDepths.tsv", "a") as outfile:
with open(
args.assembler + "-" + args.binner + "-" + args.id + "-binDepths.tsv", "a"
) as outfile:
print(
binname,
"\t".join(str(statistics.median(sample_depths)) for sample_depths in all_depths),
"\t".join(
str(statistics.median(sample_depths))
for sample_depths in all_depths
),
sep="\t",
file=outfile,
)
Expand Down
3 changes: 3 additions & 0 deletions bin/get_mag_depths_summary.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#!/usr/bin/env python

## Originally written by Sabrina Krakau and released under the MIT license.
## See git repository (https://github.com/nf-core/mag) for full license text.

import sys
import argparse
import pandas as pd
Expand Down
28 changes: 21 additions & 7 deletions bin/multiqc_to_custom_tsv.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#!/usr/bin/env python
# copied from nf-core/viralrecon and adjusted

## Copied from nf-core/viralrecon and adjusted
## See git repository (https://github.com/nf-core/viralrecon) for full license text.


import os
import sys
Expand All @@ -9,9 +12,7 @@


def parse_args(args=None):
Description = (
"Create custom spreadsheet for pertinent MultiQC bowtie 2 metrics generated by the nf-core/mag pipeline."
)
Description = "Create custom spreadsheet for pertinent MultiQC bowtie 2 metrics generated by the nf-core/mag pipeline."
Epilog = "Example usage: python multiqc_to_custom_tsv.py"
parser = argparse.ArgumentParser(description=Description, epilog=Epilog)
parser.add_argument(
Expand Down Expand Up @@ -86,7 +87,9 @@ def metrics_dict_to_file(FileFieldList, MultiQCDataDir, OutFile, se):
for yamlFile, mappingList in FileFieldList:
yamlFile = os.path.join(MultiQCDataDir, yamlFile)
if os.path.exists(yamlFile):
MetricsDict = yaml_fields_to_dict(YAMLFile=yamlFile, AppendDict=MetricsDict, FieldMappingList=mappingList)
MetricsDict = yaml_fields_to_dict(
YAMLFile=yamlFile, AppendDict=MetricsDict, FieldMappingList=mappingList
)
FieldList += [x[0] for x in mappingList]
else:
print("WARNING: File does not exist: {}".format(yamlFile))
Expand All @@ -96,7 +99,15 @@ def metrics_dict_to_file(FileFieldList, MultiQCDataDir, OutFile, se):
with open(OutFile, "w") as fout:
if se:
fout.write(
"{}\n".format("\t".join(["Sample", "SE reads not mapped (kept)", "SE reads mapped (discarded)"]))
"{}\n".format(
"\t".join(
[
"Sample",
"SE reads not mapped (kept)",
"SE reads mapped (discarded)",
]
)
)
)
else:
fout.write(
Expand All @@ -118,7 +129,10 @@ def metrics_dict_to_file(FileFieldList, MultiQCDataDir, OutFile, se):
[
k,
str(MetricsDict[k][FieldList[0]]),
str(MetricsDict[k][FieldList[1]] + MetricsDict[k][FieldList[2]]),
str(
MetricsDict[k][FieldList[1]]
+ MetricsDict[k][FieldList[2]]
),
]
)
)
Expand Down
18 changes: 15 additions & 3 deletions bin/plot_mag_depths.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#!/usr/bin/env python

# Originally written by Sabrina Krakau and released under the MIT license.
# See git repository (https://github.com/nf-core/mag) for full license text.

import sys
import argparse
import os.path
Expand All @@ -26,7 +29,9 @@ def parse_args(args=None):
metavar="FILE",
help="File in TSV format containing group information for samples: sample, group",
)
parser.add_argument("-o", "--out", required=True, metavar="FILE", type=str, help="Output file.")
parser.add_argument(
"-o", "--out", required=True, metavar="FILE", type=str, help="Output file."
)
return parser.parse_args(args)


Expand All @@ -43,12 +48,19 @@ def main(args=None):
# compute centered log-ratios
# divide df by sample-wise geometric means
gmeans = stats.gmean(df, axis=0) # apply on axis=0: 'index'
df = np.log(df.div(gmeans, axis="columns")) # divide column-wise (axis=1|'columns'), take natural logorithm
df = np.log(
df.div(gmeans, axis="columns")
) # divide column-wise (axis=1|'columns'), take natural logorithm
df.index.name = "MAGs"
df.columns.name = "Samples"

# prepare colors for group information
color_map = dict(zip(groups["group"].unique(), sns.color_palette(n_colors=len(groups["group"].unique()))))
color_map = dict(
zip(
groups["group"].unique(),
sns.color_palette(n_colors=len(groups["group"].unique())),
)
)

# plot
plt.figure()
Expand Down
6 changes: 5 additions & 1 deletion bin/run_busco.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
#! /usr/bin/env bash

# Originally written by Sabrina Krakau and James Fellows Yates and released
# under the MIT license.
# See git repository (https://github.com/nf-core/mag) for full license text.

p=$1
cp_augustus_config=$2
db=$3
Expand Down Expand Up @@ -148,7 +152,7 @@ if [ -f BUSCO/logs/prodigal_out.log ]; then
fi

# output value of most_spec_db
echo ${most_spec_db} > info_most_spec_db.txt
echo ${most_spec_db} >info_most_spec_db.txt

# if needed delete temporary BUSCO files
if [ ${busco_clean} = "Y" ]; then
Expand Down
Loading

0 comments on commit 9a67aad

Please sign in to comment.