From 8137628714b63990e71363a8c1bfb119e337333a Mon Sep 17 00:00:00 2001 From: alienzj Date: Sat, 25 Apr 2020 18:44:51 +0800 Subject: [PATCH] update to v0.7.1 --- README.md | 211 ++++++++++++++++++++++++++++++++++++++++++++++++ README.org | 113 -------------------------- build.sh | 5 +- conda/meta.yaml | 9 ++- setup.py | 57 +++++-------- 5 files changed, 238 insertions(+), 157 deletions(-) create mode 100644 README.md delete mode 100644 README.org diff --git a/README.md b/README.md new file mode 100644 index 00000000..8f40bfbe --- /dev/null +++ b/README.md @@ -0,0 +1,211 @@ +# metapi + +[![bioconda-badge](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io) +[![PyPI version](https://badge.fury.io/py/metapi.svg)](https://badge.fury.io/py/metapi) +[![star this repo](http://githubbadges.com/star.svg?user=ohmeta&repo=metapi&style=flat)](https://github.com/ohmeta/metapi) +[![Anaconda-Server Badge](https://anaconda.org/bioconda/metapi/badges/downloads.svg)](https://anaconda.org/bioconda/metapi) + +A pipeline to construct genome catalogue from metagenomcis data. + +## Installation + +metapi works with Python 3.6+. +You can install it via [bioconda](https://bioconda.github.io/): + +``` +$ conda install metapi +``` + +Or via pip: + +``` +$ pip install metapi +``` + +## Run + +### help + +``` +$ metapi --help + + .___ ___. _______ .___________. ___ .______ __ + | \/ | | ____|| | / \ | _ \ | | + | \ / | | |__ `---| |----` / ^ \ | |_) | | | + | |\/| | | __| | | / /_\ \ | ___/ | | + | | | | | |____ | | / _____ \ | | | | + |__| |__| |_______| |__| /__/ \__\ | _| |__| + + Omics for All, Open Source for All + + A pipeline to construct a genome catalogue from metagenomics data + + optional arguments: + -h, --help show this help message and exit + -v, --version print software version and exit + + available subcommands: + + init init project + denovo_wf denovo_wf pipeline +``` + +### init + +``` +$ metapi init --help + + usage: metapi init [-h] [-d WORKDIR] [-s SAMPLES] + [-b {simulate,trimmingrmhost,assembly}] + + arguments: + -h, --help show this help message and exit + -d WORKDIR, --workdir WORKDIR + project workdir + -s SAMPLES, --samples SAMPLES + samples list, tsv format required if begin from + trimming, rmhost, or assembly: if it is fastq: the + header is [id, fq1, fq2], else it is sra: the header + is [id, sra] else begin from simulate: the header is + [id, genome, abundance, reads_num, model] + -b {simulate,trimming,rmhost,assembly}, --begin {simulate,trimming,rmhost,assembly} + pipeline starting point +``` + +### denovo_wf + +``` +$ metapi denovo_wf --help + + usage: metapi denovo_wf [-h] [-d WORKDIR] [--cores CORES] [--jobs JOBS] + [--list] [--run] [--debug] [--dry_run] [--qsub] + [--wait WAIT] [--snake [SNAKEMAKEARGS]] + [TASK] + + positional arguments: + TASK pipeline end point. Allowed values are simulate_all, + prepare_reads_all, raw_fastqc_all, rmhost_bwa_all, + rmhost_bowtie2_all, rmhost_all, assebmly_megahit_all, + assembly_idba_ud_all, assembly_metaspades_all, + assembly_spades_all, assembly_metaquast_all, + assembly_report_all, assembly_all, + alignment_base_depth_all, alignment_all, + binning_metabat2_all, binning_maxbin2_all, + binning_all, predcit_scafitgs_gene_all, + predict_bins_gene_all, predcit_all, checkm_link_bins, + checkm_all, dereplicate_drep_all, dereplicate_all, + classify_short_reads_kraken2_all, + classify_hmq_bins_gtdbtk_all, classify_all, + profiling_metaphlan2_all, profiling_jgi_all, + profiling_humann2_all, profiling_all, + upload_sequencing_all, upload_assembly_all, + upload_all, all + + arguments: + -h, --help show this help message and exit + -d WORKDIR, --workdir WORKDIR + project workdir, default: ./ + --cores CORES CPU cores + --jobs JOBS qsub job numbers + --list list pipeline rules + --run run pipeline + --debug debug pipeline + --dry_run dry run pipeline + --qsub qsub pipeline + --wait WAIT wait given seconds + --snake [SNAKEMAKEARGS] + other snakemake command options, if want --touch, just + --snake touch +``` + +## input requirements + +The input samples file: `samples.tsv` format: + +Note: If `id` col contain same id, then the reads of each sample will be merged. + +- begin from trimming, rmhost or assembly: + + - `Paired-end fastq` + + | id | fq1 | fq2 | + | :-: | :-----: | :-----: | + | s1 | aa.1.fq | aa.2.fq | + | s2 | bb.1.fq | bb.2.fq | + | s2 | cc.1.fq | cc.2.fq | + | s3 | dd.1.fq | dd.2.fq | + + - `Single-end fastq` + + | id | fq1 | fq2 | + | :-: | :-----: | :-: | + | s1 | aa.1.fq | | + | s2 | bb.1.fq | | + | s2 | cc.1.fq | | + | s3 | dd.1.fq | | + + - `SRA`: + + SRA can be dumpped to Paired-end fastq reads + + | id | sra | + | :-: | :----: | + | s1 | aa.sra | + | s2 | bb.sra | + | s2 | cc.sra | + | s3 | dd.sra | + +- begin from simulate + + | id | genome | abundance | reads_num | model | + | :-: | :----: | :-------: | :-------: | :---: | + | s1 | g1.fa | 1.0 | 1M | hiseq | + | s2 | g1.fa | 0.5 | 2M | hiseq | + | s2 | g2.fa | 0.5 | 2M | hiseq | + | s3 | g1.fa | 0.2 | 3M | hiseq | + | s3 | g2.fa | 0.3 | 3M | hiseq | + | s3 | g3.fa | 0.5 | 3M | hiseq | + +It means: + +The sample s1 contain 1M reads which come from g1, the relatative abundance of +species g1 is 1.0. + +The sample s2 contain 2M reads, 1M reads come from g1 +and 1M reads come from g2. the relatative abundance of +species g1 is 0.5, the relatative abundance of +species g2 is 0.5. + +The sample s3 contain 3M reads, 0.6M reads come from g1, 0.9M reads come from +g2 and 1.5M reads come from g3, the relatative abundance of +species g1 is 0.2, the relatative abundance of +species g1 is 0.3, the relatative abundance of +species g1 is 0.5. + +Then metapi will use [InSilicoSeq] to generate metagenomics shutgun reads. + +## Getting help + +If you want to report a bug or issue, or have problems with installing or +running the software, please create [a new +issue](https://github.com/ohmeta/metapi/issues). +This is the preferred way of getting support. Alternatively, you can [mail me](mailto:alienchuj@gmail.com). + +## Contributing + +Contributions welcome! Send me a pull request or get in [touch](mailto:alienchuj@gmail.com) + +When contributing a PR, please use the [dev](https://github.com/ohmeta/metapi/tree/dev) branch. +For style, code will be checked using flake8, +[black](https://github.com/psf/black), and +[smakefmt](https://github.com/snakemake/snakefmt). These modules can be +installed via conda, `conda install black flake8 flake8-bugbear snakefmt` or via +pip `pip install black flake8 flake8-bugbear snakefmt`. + +## Contributors + +- Jie Zhu - @alienzj + +## License + +This module is licensed under the terms of the [GPLv3 license](https://opensource.org/licenses/GPL-3.0). diff --git a/README.org b/README.org deleted file mode 100644 index 6a9bfcb1..00000000 --- a/README.org +++ /dev/null @@ -1,113 +0,0 @@ -#+TITLE: metapi - -* Install -#+BEGIN_SRC bash -conda install metapi -#+END_SRC - -* Run -** help -#+BEGIN_SRC bash -$ metapi --help - -.___ ___. _______ .___________. ___ .______ __ -| \/ | | ____|| | / \ | _ \ | | -| \ / | | |__ `---| |----` / ^ \ | |_) | | | -| |\/| | | __| | | / /_\ \ | ___/ | | -| | | | | |____ | | / _____ \ | | | | -|__| |__| |_______| |__| /__/ \__\ | _| |__| - -Omics for All, Open Source for All - -A pipeline to construct a genome catalogue from metagenomics data - -optional arguments: --h, --help show this help message and exit --v, --version print software version and exit - -available subcommands: - -init init project -denovo_wf denovo_wf pipeline -#+END_SRC -** init -#+BEGIN_SRC bash -$ metapi init --help - -usage: metapi init [-h] [-d WORKDIR] [-s SAMPLES] - [-b {simulate,trimmingrmhost,assembly}] - -arguments: - -h, --help show this help message and exit - -d WORKDIR, --workdir WORKDIR - project workdir - -s SAMPLES, --samples SAMPLES - samples list, tsv format required if begin from - trimming, rmhost, or assembly: if it is fastq: the - header is [id, fq1, fq2], else it is sra: the header - is [id, sra] else begin from simulate: the header is - [id, genome, abundance, reads_num, model] --b {simulate,trimming,rmhost,assembly}, --begin {simulate,trimming,rmhost,assembly} - pipeline starting point -#+END_SRC - -** denovo_wf -#+BEGIN_SRC bash -$ metapi denovo_wf --help - -usage: metapi denovo_wf [-h] [-d WORKDIR] [--cores CORES] [--jobs JOBS] - [--list] [--run] [--debug] [--dry_run] [--qsub] - [--wait WAIT] [--snake [SNAKEMAKEARGS]] - [TASK] - -positional arguments: - TASK pipeline end point. Allowed values are simulate_all, - prepare_reads_all, raw_fastqc_all, rmhost_bwa_all, - rmhost_bowtie2_all, rmhost_all, assebmly_megahit_all, - assembly_idba_ud_all, assembly_metaspades_all, - assembly_spades_all, assembly_metaquast_all, - assembly_report_all, assembly_all, - alignment_base_depth_all, alignment_all, - binning_metabat2_all, binning_maxbin2_all, - binning_all, predcit_scafitgs_gene_all, - predict_bins_gene_all, predcit_all, checkm_link_bins, - checkm_all, dereplicate_drep_all, dereplicate_all, - classify_short_reads_kraken2_all, - classify_hmq_bins_gtdbtk_all, classify_all, - profiling_metaphlan2_all, profiling_jgi_all, - profiling_humann2_all, profiling_all, - upload_sequencing_all, upload_assembly_all, - upload_all, all - -arguments: - -h, --help show this help message and exit - -d WORKDIR, --workdir WORKDIR - project workdir, default: ./ - --cores CORES CPU cores - --jobs JOBS qsub job numbers - --list list pipeline rules - --run run pipeline - --debug debug pipeline - --dry_run dry run pipeline - --qsub qsub pipeline - --wait WAIT wait given seconds - --snake [SNAKEMAKEARGS] - other snakemake command options, if want --touch, just - --snake touch -#+END_SRC - -** input requirements -Please supply samples.tsv format like below: -| id | fq1 | fq2 | -|----+---------------+---------------| -| s1 | s1.1.fq.gz | s1.2.fq.gz | -| s2 | s2.1.fq.gz | s2.2.fq.gz | -| s2 | s21.1.fq.gz | s21.2.fq.g | -| s3 | hello.1.fq.gz | hello.2.fq.gz | -The header must be: id fq1 fq2. -If id is same, the fq1 and fq2 belong to one sample, and will be merged to -process. - -* License -- Omics for all, Open Source for all. -- GPLv3+ diff --git a/build.sh b/build.sh index 9bfee8e2..e9ac904b 100755 --- a/build.sh +++ b/build.sh @@ -1,3 +1,6 @@ #!/bin/sh +rm -rf build +rm -rf metapi.egg-info +rm -rf dist python3 setup.py sdist bdist_wheel -twine upload dist/* \ No newline at end of file +twine upload dist/* diff --git a/conda/meta.yaml b/conda/meta.yaml index bf6948a6..453733df 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -1,6 +1,6 @@ {% set name = "metapi" %} {% set version = "0.7.1" %} -{% set sha256 = "5943b213b97f42bd13379c43c66949c543b35f1a84ff552d450021677306bf60" %} +{% set sha256 = "bd5bf907cbe90d512d77f4fff75d3c16352f232aa22e7e43ba088cb7b6a6f583" %} package: name: {{ name|lower }} @@ -20,12 +20,12 @@ build: requirements: build: - - python >=3.5 + - python >=3.6 - setuptools - pip host: - - python >=3.5 + - python >=3.6 - setuptools - pip - pandas @@ -33,7 +33,7 @@ requirements: - snakemake >=5.10 run: - - python >=3.5 + - python >=3.6 - numpy - pandas - openpyxl @@ -41,6 +41,7 @@ requirements: - ruamel.yaml - snakemake >=5.10 - insilicoseq + - fastqc - seqtk - seqkit - multiqc diff --git a/setup.py b/setup.py index a578700d..d7b543ec 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ #!/usr/bin/env python import setuptools -with open("README.org") as f: +with open("README.md") as f: long_description = f.read() version = "0.7.1" @@ -23,47 +23,26 @@ author_email="zhujie@genomics.cn", description="a pipeline to construct a genome catalogue from metagenomics data", long_description=long_description, - long_description_content_type="text/org", + long_description_content_type="text/markdown", packages=["metapi"], package_data={ - "metapi": [ - "metapi/config.yaml", - "metapi/cluster.yaml", - "metapi/__init__.py", - "metapi/corer.py", - "metapi/configer.py", - "metapi/simulator.py", - "metapi/sampler.py", - "metapi/tooler.py", - "metapi/qcer.py", - "metapi/assembler.py", - "metapi/aligner.py", - "metapi/binner.py", - "metapi/checkmer.py", - "metapi/classifier.py", - "metapi/profiler.py", - "metapi/uploader.py", - "metapi/Snakefile", - "metapi/rules/simulate.smk", - "metapi/rules/raw.smk", - "metapi/rules/trimming.smk", - "metapi/rules/rmhost.smk", - "metapi/rules/qcreport.smk", - "metapi/rules/assembly.smk", - "metapi/rules/coassembly.smk", - "metapi/rules/alignment.smk", - "metapi/rules/binning.smk", - "metapi/rules/cobinning.smk", - "metapi/rules/predict.smk", - "metapi/rules/checkm.smk", - "metapi/rules/dereplicate.smk", - "metapi/rules/classify.smk", - "metapi/rules/porfiling.smk", - "metapi/rules/upload.smk", - ] + "metapi": ["metapi/*.yaml", "metapi/Snakefile", "metapi/rules/*.smk",] }, include_package_data=True, - install_requires=["pandas", "ruamel.yaml"], + install_requires=[ + "numpy", + "pandas", + "openpyxl", + "snakemake", + "ruamel.yaml", + "biopython", + "InSilicoSeq", + "multiqc", + "quast", + "checkm-genome", + "gtdbtk", + "drep", + ], zip_safe=False, entry_points={"console_scripts": ["metapi = metapi.corer:main"]}, classifiers=[ @@ -80,7 +59,7 @@ "Programming Language :: Python :: 3.8", "Topic :: Scientific/Engineering :: Bio-Informatics", ], - python_requires=">=3.5", + python_requires=">=3.6", ) print(