# install dependencies
conda install \
pandas ruamel_yaml \
snakemake pigz sra-tools \
insilicoseq ncbi-genome-download \
sickle-trim fastp \
bwa bowtie2 samtools \
spades idba megahit quast \
metabat maxbin2 checkm-genome \
prodigal prokka gtdbtk metaphlan2 humann2
conda install -c alienzj seqtk
# install metapi
git clone -b dev https://github.com/ohmeta/metapi
# configure PYTHONPATH
echo 'export PYTHONPATH="/path/to/metapi:$PYTHONPATH"' >> ~/.bashrc
python /path/to/metapi/metapi/core.py --help
Please supply samples.tsv format like below:
id | fq1 | fq2 |
---|---|---|
s1 | s1.1.fq.gz | s1.2.fq.gz |
s2 | s2.1.fq.gz | s2.2.fq.gz |
s2 | s21.1.fq.gz | s21.2.fq.g |
s3 | hello.1.fq.gz | hello.2.fq.gz |
The header must be: id fq1 fq2. If id is same, the fq1 and fq2 belong to one sample, and will be merged to process.
# Assume you in a directory named hello_meta, all operation will do in this directory
cd hello_meta
python /path/to/metapi/metapi/core.py init -d ./ -s samples.tsv -b raw -a metaspades
# -d ./ : work in hello_meta directory
# -s samples.tsv: input data
# -b raw : begin from raw data
# -a metaspades : use MetaSPAdes assembler
After that, after hello_meta directory, you will see: assay results scripts sources study config.yaml cluster.yaml
See what metapi can do:
snakemake --snakefile /path/to/metapi/metapi/Snakefile \
--configfile ./config.yaml --list
Debug a specific rule, eg checkm_lineage_wf:
snakemake --snakefile /path/to/metapi/metapi/Snakefile \
--configfile ./config.yaml \
-p -r -n --debug-dag \
--until checkm_lineage_wf
snakemake --snakefile /path/to/metapi/metapi/Snakefile \
--configfile ./config.yaml \
--until checkm_lineage_wf
snakemake --snakefile /path/to/metapi/metapi/Snakefile \
--configfile ./config.yaml \
--cluster-config ./cluster.yaml \
--jobs 80 \
--cluster "qsub -S /bin/bash -cwd \
-q {cluster.queue} \
-P {cluster.project} \
-l vf={cluster.mem},p={cluster.cores} \
-binding linear:{cluster.cores} \
-o {cluster.output} \
-e {cluster.error}" \
--latency-wait 360 \
-k \
--until checkm_lineage_wf
Omics for all, Open Source for all. GPLv3+.