Skip to content

Commit

Permalink
Merge pull request #3 from PyEED/blastToolsLocally
Browse files Browse the repository at this point in the history
Blast tools locally
  • Loading branch information
NiklasAbraham authored Nov 11, 2024
2 parents fe3d564 + 9a3cb05 commit 3aabaf1
Show file tree
Hide file tree
Showing 11 changed files with 263 additions and 1 deletion.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,11 @@
[![Publish Docker image](https://github.com/EnzymeML/EnzymeML_JupyterLab/actions/workflows/release_image.yaml/badge.svg)](https://github.com/EnzymeML/EnzymeML_JupyterLab/actions/workflows/release_image.yaml)

🐳 Docker image with JupyterLab and `pyeed`

#### Ports Used by different containers

- `cytoscape` uses `6080` and `8787`
- jupyterlab uses `8888`
- clustalo uses `5001`
- blast uses `6001`
- mmseq2 uses `8000`
17 changes: 17 additions & 0 deletions blast/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
FROM ncbi/blast

# Install Python and Flask
RUN apt-get update && apt-get install -y python3 python3-pip
RUN pip3 install fastapi uvicorn

# Add the Python script to the container
COPY app.py /usr/local/bin/app.py

# Set the working directory
WORKDIR /usr/local/bin

# Expose the port the server will run on
EXPOSE 6001

# Run the Python server script
CMD ["python3", "app.py"]
71 changes: 71 additions & 0 deletions blast/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from fastapi import FastAPI, HTTPException, Request
from pydantic import BaseModel
import subprocess
import os
import uuid
from typing import Optional
import logging

app = FastAPI()
logger = logging.getLogger(__name__)

class BlastRequest(BaseModel):
tool: str
query: str
db: str
evalue: str
outfmt: str


## ENDPOINTS --------------------------

def create_fastas_file_from_seq(seq, filename):
with open(filename, 'w') as file:
file.write(f">seq\n{seq}\n")

@app.get("/")
async def read_root():
return {"message": "Welcome to the BLAST API!"}

# this get json params
@app.post("/run_blast")
async def run_blast(request: Request):
request = await request.json()

query_filename = f"in.fasta"
result_filename = f"out.out"
# create empty file
open(result_filename, 'w').close()

# Create the FASTA file
create_fastas_file_from_seq(request['query'], query_filename)

# Run the BLAST command
command = [
request['tool'],
'-query', query_filename,
'-db', request['db'],
'-evalue', request['evalue'],
'-outfmt', request['outfmt'],
'-num_threads', request['num_threads'],
'-out', result_filename,
'-max_target_seqs', '10000'
]

try:
subprocess.run(command, check=True)
except subprocess.CalledProcessError as e:
raise HTTPException(status_code=500, detail=str(e))


with open(result_filename, 'r') as file:
result = file.read()

return result



if __name__ == '__main__':
import uvicorn

uvicorn.run("app:app", host="0.0.0.0", port=6001, reload=True)
4 changes: 4 additions & 0 deletions blast/reload_development.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
sudo docker stop blast_docker
sudo docker remove blast_docker
sudo docker build --no-cache -t blast_docker .
sudo docker run --name blast_docker --volume /mnt/databases:/blast/blastdb -p 6001:6001 blast_docker
3 changes: 3 additions & 0 deletions clustalo/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,7 @@ RUN wget http://www.clustal.org/omega/clustalo-1.2.4-Ubuntu-x86_64 \
COPY requirements.txt .
RUN pip install -r requirements.txt

COPY app.py .


CMD ["python", "app.py"]
Binary file modified clustalo/__pycache__/app.cpython-312.pyc
Binary file not shown.
2 changes: 1 addition & 1 deletion clustalo/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
fastapi
python-multipart
uvicorn
uvicorn
19 changes: 19 additions & 0 deletions mmseqs2/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Base image for mmseqs2
FROM soedinglab/mmseqs2:latest

# Add the standard Debian repositories to ensure we can install all packages
RUN echo "deb http://deb.debian.org/debian bullseye main contrib non-free" > /etc/apt/sources.list
RUN echo "deb http://security.debian.org/debian-security bullseye-security main contrib non-free" >> /etc/apt/sources.list

# Install Python and Flask
RUN apt-get update && apt-get install -y python3 python3-pip
RUN pip3 install fastapi uvicorn

# Copy the FastAPI app to the container
COPY app.py app.py

# Expose the port on which FastAPI will run
EXPOSE 8000

# Start the FastAPI server when the container starts
CMD ["python3", "app.py"]
133 changes: 133 additions & 0 deletions mmseqs2/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import subprocess
import os
from uuid import uuid4
import shutil

app = FastAPI()

# Define a model for the input parameters
class MMSeqsParams(BaseModel):
query: str # The query sequence
database: str
output: str # The output directory
sensitivity: float = 7.5 # Sensitivity parameter for mmseqs2
threads: int = 4 # Number of threads to use
blast_format: bool = True # Option to convert to BLAST+ format

# Dictionary to keep track of running jobs and results
job_results = {}

def create_fastas_file_from_seq(seq, filename):
with open(filename, 'w') as file:
file.write(f">seq\n{seq}\n")

def create_queryDB_from_seq(filename):
# this will create a db from a single sequence file
# the command is mmseqs createdb <input> <output>
# the output should be a file with the same name as the input but with the extension .db

command = [
"mmseqs", "createdb",
filename,
filename.replace('fasta', '') + ".db"
]

try:
subprocess.run(command, check=True)

except subprocess.CalledProcessError as e:
raise HTTPException(status_code=600, detail=str(e))


@app.get("/")
async def read_root():
return {"message": "Welcome to the MMSeqs2 API!"}

@app.post("/run_mmseqs")
async def run_mmseqs(params: MMSeqsParams):
# Create a unique job id
job_id = str(uuid4())
output_dir = f"/tmp/{job_id}"

# Prepare the output directory
os.makedirs(output_dir, exist_ok=True)

# Prepare paths
result_m8_path = os.path.join(output_dir, "result.m8")
result_tsv_path = os.path.join(output_dir, "result.tsv")

# Create the FASTA file
path_query = os.path.join(output_dir, "query.fasta")
path_queryDB = path_query.replace('fasta', '') + ".db"
create_fastas_file_from_seq(params.query, path_query)
create_queryDB_from_seq(path_query)

# Run the mmseqs2 search command
command = [
"mmseqs", "search",
path_queryDB,
params.database,
os.path.join(output_dir, "result"),
output_dir,
"--threads", str(params.threads),
"--sensitivity", str(params.sensitivity)
]

try:
# Execute mmseqs search
subprocess.run(command, check=True)

# Convert the results to BLAST+ format if requested
if params.blast_format:
# mmseqs convertalis queryDB targetDB resultDB resultDB.m8
# Convert to BLAST tabular format (BLAST m8 format)
convert_command = [
"mmseqs", "convertalis",
params.query,
params.database,
os.path.join(output_dir, "result"),
result_m8_path,
]
subprocess.run(convert_command, check=True)

# Store the result path for m8 format
job_results[job_id] = {
"status": "completed",
"result_path": result_m8_path
}
else:
# Store the result path for standard mmseqs2 output (TSV format)
job_results[job_id] = {
"status": "completed",
"result_path": result_tsv_path
}

return {"job_id": job_id}
except subprocess.CalledProcessError as e:
raise HTTPException(status_code=500, detail=f"mmseqs2 failed: {str(e)}")

@app.get("/results/{job_id}")
async def get_results(job_id: str):
# Check if the job exists
if job_id not in job_results:
raise HTTPException(status_code=404, detail="Job not found")

# Get the result path
result = job_results[job_id]

# Read and return the result (assuming it's a text file you want to read and return)
result_file = result["result_path"]
if os.path.exists(result_file):
with open(result_file, "r") as file:
data = file.read()
return {"status": result["status"], "results": data}
else:
raise HTTPException(status_code=404, detail="Result file not found")


if __name__ == '__main__':
import uvicorn

uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)
4 changes: 4 additions & 0 deletions mmseqs2/reload_development.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
sudo docker stop mmseq_docker
sudo docker remove mmseq_docker
sudo docker build --no-cache -t mmseq_docker .
sudo docker run --name mmseq_docker --volume /mnt/databases:/app -p 8000:8000 mmseq_docker
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
pyeed @ git+https://github.com/PyEED/pyeed.git@main
chromatopy @ git+https://github.com/FAIRChemistry/chromatopy.git
MTPHandler @ git+https://github.com/FAIRChemistry/MTPHandler.git
python-multipart

0 comments on commit 3aabaf1

Please sign in to comment.