Add the toolbox that worked on the EIDV virtual machine.

EdinburghGenomics · Oct 9, 2024 · e3aa577 · e3aa577
1 parent 5866106
commit e3aa577
Show file tree

Hide file tree

Showing 18 changed files with 117 additions and 0 deletions.
diff --git a/toolbox_eidfvm/README.txt b/toolbox_eidfvm/README.txt
@@ -0,0 +1,20 @@
+Illuminatus uses various tools to process the sequence files. Some are called directly
+and others are called indirectly as part of other tools. Aside from those contained
+directly within the Illuminatus codebase and those managed under the Python3 VEnv,
+everything called by Illuminatus should be collected here.
+
+Every executable in this directory should be either a link to a tool installed under
+/lustre/software or else a thin wrapper shell script that references the tool. The
+link or reference should be to the specific numbered version, never to the 'current'
+symlink or to whatever is sitting under /lustre/software/bin.
+
+The upshot of this rule is that we can tinker with the 'current' symlinks and the contents
+of '/lustre/software/bin' without breaking the pipeline. We can also see exactly what the
+pipeline is using by just examining this directory. I'm in two minds if this directory should
+sit up in /lustre/software or should be kept in the Illuminatus GIT repository
+with the rest of the code. For now I'm doing the latter as it seems important for provenance.
+(After several years - yes this is the best idea!)
+
+If you want to test with a new version of anything and for some reason you don't want to
+do a full GIT checkout of the pipeline, you should copy the whole toolbox directory and
+set the alternative $TOOLBOX in your test environment (eg. in environ.sh).
diff --git a/toolbox_eidfvm/apngasm-noopt b/toolbox_eidfvm/apngasm-noopt
@@ -0,0 +1 @@
+/mnt/vdb/software/apngasm/apngasm-2.91/apngasm-noopt
diff --git a/toolbox_eidfvm/bcl2fastq b/toolbox_eidfvm/bcl2fastq
@@ -0,0 +1 @@
+/mnt/vdb/software/bcl2fastq/bcl2fastq_v2.20.0.422/bcl2fastq
diff --git a/toolbox_eidfvm/bootstrap_python3 b/toolbox_eidfvm/bootstrap_python3
@@ -0,0 +1 @@
+/home/eidf163/eidf163/tbooth_eidf_vm/miniforge3/envs/py3.6/bin/python3
diff --git a/toolbox_eidfvm/fastq_screen b/toolbox_eidfvm/fastq_screen
@@ -0,0 +1,26 @@
+#!/bin/bash
+set -euo pipefail
+
+# Wrapper to run the preferred version of fastq_screen with the preferred
+# selection of databases and configuration.
+# Note  that we expect the conf file to define databases only!
+# Note2 that the newer version of fastq_screen neither accepts nor expects
+# a --seqcount parameter, and the subsample size will be approximate.
+
+# Caller should set --threads and --subset 1000000 (or whatever)
+# Can also say --quiet to reduce log spew
+# You probably also want to add --bowtie '--trim3 {N}' where N=max(readlen-50,0)
+# but to do this you need to know what readlen is.
+
+BASE="/mnt/vdb"
+export PATH="$BASE/software/bowtie/bowtie-1.2.2-beta2-linux-x86_64:$PATH"
+
+# Remove these lines to disable graph generation...
+#export LD_LIBRARY_PATH="$BASE/software/fastq_screen/perl_libs/lib64${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
+#export PERL5LIB="$BASE/software/fastq_screen/perl_libs/lib64/perl5/:$BASE/software/fastq_screen/perl_libs/share/perl5/"
+perl -e 'use GD::Graph::bars'
+
+exec "$BASE/software/fastq_screen/fastq_screen_v0.11.3_debug/fastq_screen" \
+    --aligner bowtie \
+    --conf "$BASE/references/fastqscreen/fastq_screen.conf_bowtie1_20200922" \
+    "$@"
diff --git a/toolbox_eidfvm/fastqc b/toolbox_eidfvm/fastqc
@@ -0,0 +1 @@
+/mnt/vdb/software/FastQC/FastQC_v0.12.1/fastqc
diff --git a/toolbox_eidfvm/gnuplot b/toolbox_eidfvm/gnuplot
@@ -0,0 +1 @@
+/home/eidf163/eidf163/tbooth_eidf_vm/miniforge3/envs/gnuplot/bin/gnuplot
diff --git a/toolbox_eidfvm/interop_plot_by_cycle b/toolbox_eidfvm/interop_plot_by_cycle
@@ -0,0 +1 @@
+/mnt/vdb/software/interop/interop-1.1.1/bin/plot_by_cycle
diff --git a/toolbox_eidfvm/interop_plot_by_lane b/toolbox_eidfvm/interop_plot_by_lane
@@ -0,0 +1 @@
+/mnt/vdb/software/interop/interop-1.1.1/bin/plot_by_lane
diff --git a/toolbox_eidfvm/interop_plot_flowcell b/toolbox_eidfvm/interop_plot_flowcell
@@ -0,0 +1 @@
+/mnt/vdb/software/interop/interop-1.1.1/bin/plot_flowcell
diff --git a/toolbox_eidfvm/interop_plot_qscore_heatmap b/toolbox_eidfvm/interop_plot_qscore_heatmap
@@ -0,0 +1 @@
+/mnt/vdb/software/interop/interop-1.1.1/bin/plot_qscore_heatmap
diff --git a/toolbox_eidfvm/interop_plot_qscore_histogram b/toolbox_eidfvm/interop_plot_qscore_histogram
@@ -0,0 +1 @@
+/mnt/vdb/software/interop/interop-1.1.1/bin/plot_qscore_histogram
diff --git a/toolbox_eidfvm/interop_plot_sample_qc b/toolbox_eidfvm/interop_plot_sample_qc
@@ -0,0 +1 @@
+/mnt/vdb/software/interop/interop-1.1.1/bin/plot_sample_qc
diff --git a/toolbox_eidfvm/multiqc b/toolbox_eidfvm/multiqc
@@ -0,0 +1,18 @@
+#!/bin/sh
+
+# The version of MultiQC used by the pipeline needs to be selected from the
+# active Python virtualenv. If the system version gets run instead then we get
+# weird errors. This script tries to block that from happening and give a meaningful
+# error.
+
+# If you really aren't using a virtualenv and you want the default version of MultiQC to
+# be run then simply remove this file from the toolbox.
+
+if [ -z "${VIRTUAL_ENV:-}" ] ; then
+    echo "The pipeline should always run MultiQC from the active Python3 VEnv. But no VEnv is active!"  >&2
+    exit 1
+else
+    # We're good, assuming multiqc really is installed in the VEnv
+    exec "${VIRTUAL_ENV}/bin/multiqc" "$@"
+fi
+
diff --git a/toolbox_eidfvm/post_activate_venv.sh b/toolbox_eidfvm/post_activate_venv.sh
@@ -0,0 +1,5 @@
+# On the old cluster I decided to put Cutadapt into the VEnv, so I added
+# this hook to give me a programmatic way to do it. You can add extra packages to this
+# file if they want to be in the VEnv for a specific deployment.
+
+pip_install cutadapt==1.18
diff --git a/toolbox_eidfvm/snakemake_jobscript.sh b/toolbox_eidfvm/snakemake_jobscript.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+# Default jobscript for Snakemake on the GSEG environment.
+# Made by Tim B on 2016-11-01
+# Updated after DRMAA fix on 2017-06-13
+
+( module purge -s ; unset module ) || true
+
+# Where am I running? Weird syntax avoids fopen on /dev/stderr
+# which is bad if /dev/stderr is a real file.
+# Could add more debugging info here, maybe?
+echo "Running on `hostname`" | tee >(cat >&2)
+
+# Set TMPDIR, which most programs will respect, including Picard if run
+# via my wrapper scripts.
+export TMPDIR=/lustre-gseg/tmp/"$USER@$HOSTNAME"
+mkdir -p "$TMPDIR"
+
+# Also, we can have a pre-run script.  Useful for qc_tools_python/activate
+# and any other case where you want specific settings on each node.
+# Under Snakemake+SLURM, you can simply set this as an env var.
+if [ -r "$SNAKE_PRERUN" ] ; then
+    echo "+ source $SNAKE_PRERUN" >&2
+    source "$SNAKE_PRERUN"
+fi
+
+# properties = {properties}
+{exec_job}
diff --git a/toolbox_eidfvm/wd_count_well_duplicates b/toolbox_eidfvm/wd_count_well_duplicates
@@ -0,0 +1 @@
+/mnt/vdb/software/well_duplicates/git_20180212/count_well_duplicates.py
diff --git a/toolbox_eidfvm/wd_get_cached_targets b/toolbox_eidfvm/wd_get_cached_targets
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+# Choose which well_duplicates to use and where to cache the cluster lists.
+BASE="/mnt/vdb"
+
+exec env PATH="$BASE/software/well_duplicates/git_20180212:$PATH" \
+         CLUSTER_LISTS="$BASE/software/well_duplicates/shared_cluster_lists" \
+         get_cached_targets.sh "$@"
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		/mnt/vdb/software/apngasm/apngasm-2.91/apngasm-noopt
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		/mnt/vdb/software/bcl2fastq/bcl2fastq_v2.20.0.422/bcl2fastq
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		/home/eidf163/eidf163/tbooth_eidf_vm/miniforge3/envs/py3.6/bin/python3
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		/mnt/vdb/software/interop/interop-1.1.1/bin/plot_by_cycle
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		/mnt/vdb/software/well_duplicates/git_20180212/count_well_duplicates.py