From e81df22475a72f0388547d8e4e42575274eb0d77 Mon Sep 17 00:00:00 2001 From: Nathan Siemers Date: Mon, 31 May 2021 19:23:02 -0700 Subject: [PATCH] removed Bio Alphabet from dependencies. 'tracer test' fails because of Bio Alphabet dependencies in old pkl objects. --- Dockerfile | 29 +++++++++++++++++++++++------ tracer | 2 +- tracerlib/core.py | 5 ++++- tracerlib/tracer_func.py | 16 ++++++++++++---- 4 files changed, 40 insertions(+), 12 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0ebc9db..9fa584e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,6 +8,7 @@ FROM debian:latest RUN apt-get update && apt-get -y upgrade RUN apt-get -y install wget curl unzip build-essential zlib1g-dev git python3 python3-pip bowtie2 default-jre procps cmake libcairo2-dev pkg-config samtools jellyfish salmon + #Trinity - depends on zlib1g-dev and openjdk-8-jre installed previously RUN wget https://github.com/trinityrnaseq/trinityrnaseq/releases/download/v2.11.0/trinityrnaseq-v2.11.0.FULL.tar.gz RUN tar xvzf trinityrnaseq-v2.11.0.FULL.tar.gz && rm trinityrnaseq-v2.11.0.FULL.tar.gz @@ -17,10 +18,12 @@ RUN cd /trinityrnaseq-v2.11.0 && make #and then on top of that, the environmental variable thing facilitates the creation of a shell wrapper. fun RUN wget ftp://ftp.ncbi.nih.gov/blast/executables/igblast/release/1.7.0/ncbi-igblast-1.7.0-x64-linux.tar.gz RUN tar -xzvf ncbi-igblast-1.7.0-x64-linux.tar.gz && rm ncbi-igblast-1.7.0-x64-linux.tar.gz -RUN cd /ncbi-igblast-1.7.0/bin/ && wget -r ftp://ftp.ncbi.nih.gov/blast/executables/igblast/release/internal_data && \ - wget -r ftp://ftp.ncbi.nih.gov/blast/executables/igblast/release/optional_file && \ - mv ftp.ncbi.nih.gov/blast/executables/igblast/release/internal_data . && \ - mv ftp.ncbi.nih.gov/blast/executables/igblast/release/optional_file . && \ +RUN cd /ncbi-igblast-1.7.0/bin/ && wget -r ftp://ftp.ncbi.nih.gov/blast/executables/igblast/release/old_internal_data && \ + wget -r ftp://ftp.ncbi.nih.gov/blast/executables/igblast/release/old_optional_file && \ + mv ftp.ncbi.nih.gov/blast/executables/igblast/release/old_internal_data . && \ + mv ftp.ncbi.nih.gov/blast/executables/igblast/release/old_optional_file . && \ + ln -s old_internal_data internal_data && \ + ln -s old_optional_file optional_file && \ rm -r ftp.ncbi.nih.gov #aligners - kallisto and salmon @@ -34,16 +37,30 @@ RUN apt-get -y install graphviz #tracer proper COPY . /tracer + + +#placing a preconfigured tracer.conf in ~/.tracerrc +RUN cp /tracer/docker_helper_files/docker_tracer.conf ~/.tracerrc + + +## update some python packages to remove install version error +RUN pip3 install numpy --upgrade +RUN pip3 install pyparsing --upgrade RUN cd /tracer && pip3 install -r docker_helper_files/requirements_stable.txt && python3 setup.py install + +################################################################ #obtaining the transcript sequences. no salmon/kallisto indices as they make dockerhub unhappy for some reason + + RUN mkdir GRCh38 && cd GRCh38 && wget ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_27/gencode.v27.transcripts.fa.gz && \ gunzip gencode.v27.transcripts.fa.gz && python3 /tracer/docker_helper_files/gencode_parse.py gencode.v27.transcripts.fa && rm gencode.v27.transcripts.fa + + RUN mkdir GRCm38 && cd GRCm38 && wget ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M15/gencode.vM15.transcripts.fa.gz && \ gunzip gencode.vM15.transcripts.fa.gz && python3 /tracer/docker_helper_files/gencode_parse.py gencode.vM15.transcripts.fa && rm gencode.vM15.transcripts.fa -#placing a preconfigured tracer.conf in ~/.tracerrc -RUN cp /tracer/docker_helper_files/docker_tracer.conf ~/.tracerrc #this is a tracer container, so let's point it at a tracer wrapper that sets the silly IgBLAST environment variable thing ENTRYPOINT ["bash", "/tracer/docker_helper_files/docker_wrapper.sh"] + diff --git a/tracer b/tracer index 7a0511c..30c51a6 100755 --- a/tracer +++ b/tracer @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 ############################################################################## # TraCeR - a tool to reconstruct TCR sequences from single-cell RNA-seq data # diff --git a/tracerlib/core.py b/tracerlib/core.py index 24d948d..98a6484 100644 --- a/tracerlib/core.py +++ b/tracerlib/core.py @@ -2,7 +2,10 @@ from collections import Counter, defaultdict import six -from Bio.Alphabet import generic_dna +try: + from Bio.Alphabet import generic_dna +except: + print("Bio.Alphabet IUPAC not imported. Possibly missing, possibly you are runing a modern version of python that doesn't need it") from Bio.Seq import Seq import pdb diff --git a/tracerlib/tracer_func.py b/tracerlib/tracer_func.py index 1d05f9d..bfc9c86 100644 --- a/tracerlib/tracer_func.py +++ b/tracerlib/tracer_func.py @@ -26,7 +26,12 @@ import networkx as nx import six from Bio import SeqIO -from Bio.Alphabet import IUPAC + +try: + from Bio.Alphabet import IUPAC +except: + print("Bio.Alphabet IUPAC not imported. Possibly missing, possibly you are runing a modern version of python that doesn't need it") + from Bio.Seq import Seq from tracerlib.core import Cell, Recombinant, Invar_cell @@ -462,8 +467,10 @@ def is_rearrangement_productive(seq): in_frame = True else: in_frame = False - - seq = Seq(seq, IUPAC.unambiguous_dna) + # original + #seq = Seq(seq, IUPAC.unambiguous_dna) + # modified + seq = Seq(seq) aa_seq = seq.translate() contains_stop = "*" in aa_seq @@ -553,7 +560,8 @@ def get_fasta_line_for_contig_assembly(trinity_seq, hit_table, locus, IMGT_seqs, end_base_removal_count = (1 - end_padding) % 3 seq = trinity_seq[start_base_removal_count:-(end_base_removal_count)] - seq = Seq(seq, IUPAC.unambiguous_dna) + # seq = Seq(seq, IUPAC.unambiguous_dna) + seq = Seq(seq) aa_seq = seq.translate() contains_stop = "*" in aa_seq