diff --git a/README.md b/README.md index b83e73a..913bdbc 100644 --- a/README.md +++ b/README.md @@ -2,3 +2,11 @@ [![Publish Docker image](https://github.com/EnzymeML/EnzymeML_JupyterLab/actions/workflows/release_image.yaml/badge.svg)](https://github.com/EnzymeML/EnzymeML_JupyterLab/actions/workflows/release_image.yaml) 🐳 Docker image with JupyterLab and `pyeed` + + #### Ports Used by different containers + + - `cytoscape` uses `6080` and `8787` + - jupyterlab uses `8888` + - clustalo uses `5001` + - blast uses `6001` + - mmseq2 uses `8000` diff --git a/blast/Dockerfile b/blast/Dockerfile new file mode 100644 index 0000000..4dc9f89 --- /dev/null +++ b/blast/Dockerfile @@ -0,0 +1,17 @@ +FROM ncbi/blast + +# Install Python and Flask +RUN apt-get update && apt-get install -y python3 python3-pip +RUN pip3 install fastapi uvicorn + +# Add the Python script to the container +COPY app.py /usr/local/bin/app.py + +# Set the working directory +WORKDIR /usr/local/bin + +# Expose the port the server will run on +EXPOSE 6001 + +# Run the Python server script +CMD ["python3", "app.py"] diff --git a/blast/app.py b/blast/app.py new file mode 100644 index 0000000..08bc9dc --- /dev/null +++ b/blast/app.py @@ -0,0 +1,71 @@ +from fastapi import FastAPI, HTTPException, Request +from pydantic import BaseModel +import subprocess +import os +import uuid +from typing import Optional +import logging + +app = FastAPI() +logger = logging.getLogger(__name__) + +class BlastRequest(BaseModel): + tool: str + query: str + db: str + evalue: str + outfmt: str + + +## ENDPOINTS -------------------------- + +def create_fastas_file_from_seq(seq, filename): + with open(filename, 'w') as file: + file.write(f">seq\n{seq}\n") + +@app.get("/") +async def read_root(): + return {"message": "Welcome to the BLAST API!"} + +# this get json params +@app.post("/run_blast") +async def run_blast(request: Request): + request = await request.json() + + query_filename = f"in.fasta" + result_filename = f"out.out" + # create empty file + open(result_filename, 'w').close() + + # Create the FASTA file + create_fastas_file_from_seq(request['query'], query_filename) + + # Run the BLAST command + command = [ + request['tool'], + '-query', query_filename, + '-db', request['db'], + '-evalue', request['evalue'], + '-outfmt', request['outfmt'], + '-num_threads', request['num_threads'], + '-out', result_filename, + '-max_target_seqs', '10000' + ] + + try: + subprocess.run(command, check=True) + except subprocess.CalledProcessError as e: + raise HTTPException(status_code=500, detail=str(e)) + + + with open(result_filename, 'r') as file: + result = file.read() + + return result + + + +if __name__ == '__main__': + import uvicorn + + uvicorn.run("app:app", host="0.0.0.0", port=6001, reload=True) diff --git a/blast/reload_development.sh b/blast/reload_development.sh new file mode 100644 index 0000000..5409855 --- /dev/null +++ b/blast/reload_development.sh @@ -0,0 +1,4 @@ +sudo docker stop blast_docker +sudo docker remove blast_docker +sudo docker build --no-cache -t blast_docker . +sudo docker run --name blast_docker --volume /mnt/databases:/blast/blastdb -p 6001:6001 blast_docker \ No newline at end of file diff --git a/clustalo/Dockerfile b/clustalo/Dockerfile index 6d4f188..591a20f 100644 --- a/clustalo/Dockerfile +++ b/clustalo/Dockerfile @@ -11,4 +11,7 @@ RUN wget http://www.clustal.org/omega/clustalo-1.2.4-Ubuntu-x86_64 \ COPY requirements.txt . RUN pip install -r requirements.txt +COPY app.py . + + CMD ["python", "app.py"] diff --git a/clustalo/__pycache__/app.cpython-312.pyc b/clustalo/__pycache__/app.cpython-312.pyc index 157be52..f62b98c 100644 Binary files a/clustalo/__pycache__/app.cpython-312.pyc and b/clustalo/__pycache__/app.cpython-312.pyc differ diff --git a/clustalo/requirements.txt b/clustalo/requirements.txt index cdbd180..80d5c68 100644 --- a/clustalo/requirements.txt +++ b/clustalo/requirements.txt @@ -1,3 +1,3 @@ fastapi python-multipart -uvicorn +uvicorn \ No newline at end of file diff --git a/mmseqs2/Dockerfile b/mmseqs2/Dockerfile new file mode 100644 index 0000000..f747526 --- /dev/null +++ b/mmseqs2/Dockerfile @@ -0,0 +1,19 @@ +# Base image for mmseqs2 +FROM soedinglab/mmseqs2:latest + +# Add the standard Debian repositories to ensure we can install all packages +RUN echo "deb http://deb.debian.org/debian bullseye main contrib non-free" > /etc/apt/sources.list +RUN echo "deb http://security.debian.org/debian-security bullseye-security main contrib non-free" >> /etc/apt/sources.list + +# Install Python and Flask +RUN apt-get update && apt-get install -y python3 python3-pip +RUN pip3 install fastapi uvicorn + +# Copy the FastAPI app to the container +COPY app.py app.py + +# Expose the port on which FastAPI will run +EXPOSE 8000 + +# Start the FastAPI server when the container starts +CMD ["python3", "app.py"] diff --git a/mmseqs2/app.py b/mmseqs2/app.py new file mode 100644 index 0000000..98b1586 --- /dev/null +++ b/mmseqs2/app.py @@ -0,0 +1,133 @@ +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel +import subprocess +import os +from uuid import uuid4 +import shutil + +app = FastAPI() + +# Define a model for the input parameters +class MMSeqsParams(BaseModel): + query: str # The query sequence + database: str + output: str # The output directory + sensitivity: float = 7.5 # Sensitivity parameter for mmseqs2 + threads: int = 4 # Number of threads to use + blast_format: bool = True # Option to convert to BLAST+ format + +# Dictionary to keep track of running jobs and results +job_results = {} + +def create_fastas_file_from_seq(seq, filename): + with open(filename, 'w') as file: + file.write(f">seq\n{seq}\n") + +def create_queryDB_from_seq(filename): + # this will create a db from a single sequence file + # the command is mmseqs createdb + # the output should be a file with the same name as the input but with the extension .db + + command = [ + "mmseqs", "createdb", + filename, + filename.replace('fasta', '') + ".db" + ] + + try: + subprocess.run(command, check=True) + + except subprocess.CalledProcessError as e: + raise HTTPException(status_code=600, detail=str(e)) + + +@app.get("/") +async def read_root(): + return {"message": "Welcome to the MMSeqs2 API!"} + +@app.post("/run_mmseqs") +async def run_mmseqs(params: MMSeqsParams): + # Create a unique job id + job_id = str(uuid4()) + output_dir = f"/tmp/{job_id}" + + # Prepare the output directory + os.makedirs(output_dir, exist_ok=True) + + # Prepare paths + result_m8_path = os.path.join(output_dir, "result.m8") + result_tsv_path = os.path.join(output_dir, "result.tsv") + + # Create the FASTA file + path_query = os.path.join(output_dir, "query.fasta") + path_queryDB = path_query.replace('fasta', '') + ".db" + create_fastas_file_from_seq(params.query, path_query) + create_queryDB_from_seq(path_query) + + # Run the mmseqs2 search command + command = [ + "mmseqs", "search", + path_queryDB, + params.database, + os.path.join(output_dir, "result"), + output_dir, + "--threads", str(params.threads), + "--sensitivity", str(params.sensitivity) + ] + + try: + # Execute mmseqs search + subprocess.run(command, check=True) + + # Convert the results to BLAST+ format if requested + if params.blast_format: + # mmseqs convertalis queryDB targetDB resultDB resultDB.m8 + # Convert to BLAST tabular format (BLAST m8 format) + convert_command = [ + "mmseqs", "convertalis", + params.query, + params.database, + os.path.join(output_dir, "result"), + result_m8_path, + ] + subprocess.run(convert_command, check=True) + + # Store the result path for m8 format + job_results[job_id] = { + "status": "completed", + "result_path": result_m8_path + } + else: + # Store the result path for standard mmseqs2 output (TSV format) + job_results[job_id] = { + "status": "completed", + "result_path": result_tsv_path + } + + return {"job_id": job_id} + except subprocess.CalledProcessError as e: + raise HTTPException(status_code=500, detail=f"mmseqs2 failed: {str(e)}") + +@app.get("/results/{job_id}") +async def get_results(job_id: str): + # Check if the job exists + if job_id not in job_results: + raise HTTPException(status_code=404, detail="Job not found") + + # Get the result path + result = job_results[job_id] + + # Read and return the result (assuming it's a text file you want to read and return) + result_file = result["result_path"] + if os.path.exists(result_file): + with open(result_file, "r") as file: + data = file.read() + return {"status": result["status"], "results": data} + else: + raise HTTPException(status_code=404, detail="Result file not found") + + +if __name__ == '__main__': + import uvicorn + + uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True) \ No newline at end of file diff --git a/mmseqs2/reload_development.sh b/mmseqs2/reload_development.sh new file mode 100644 index 0000000..d074569 --- /dev/null +++ b/mmseqs2/reload_development.sh @@ -0,0 +1,4 @@ +sudo docker stop mmseq_docker +sudo docker remove mmseq_docker +sudo docker build --no-cache -t mmseq_docker . +sudo docker run --name mmseq_docker --volume /mnt/databases:/app -p 8000:8000 mmseq_docker \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 341cafa..90779b1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,5 @@ pyeed @ git+https://github.com/PyEED/pyeed.git@main +chromatopy @ git+https://github.com/FAIRChemistry/chromatopy.git +MTPHandler @ git+https://github.com/FAIRChemistry/MTPHandler.git python-multipart +