Skip to content

Folding

Benjamin Fry edited this page Apr 16, 2024 · 3 revisions

Alphafold

Running Colabfold Single-Sequence on an SBGrid workstation with GPUs

import os
import multiprocessing
import subprocess

def submit_colabfold(path_to_fasta, output_dir, device=0):
    """
    Runs AF2 in single sequence mode.
    Device is the index of the desired device as it appears in nvidia-smi
    """
    gpu_map = {"CUDA_DEVICE_ORDER": "PCI_BUS_ID", "CUDA_VISIBLE_DEVICES": str(device)}
    subprocess.run(f"/nfs/sbgrid/programs/x86_64-linux/colabfold/1.5.2/bin/colabfold_batch {path_to_fasta} {output_dir} --msa-mode single_sequence --overwrite-existing-results".split(), env=gpu_map)

def worker_submit_colabfold(tup):
    fasta_path, output_dir, idx = tup
    os.makedirs(output_dir, exist_ok=True)
    submit_colabfold(fasta_path, output_dir, idx)

if __name__ == '__main__':
    path_to_designs = '...'
    all_fasta_paths = []

    # Assumes number of design fasta files is less than or equal to the number of GPUs.
    for path in os.listdir(path_to_designs):
        fasta_path = os.path.join(path_to_designs, path)
        if os.path.isfile(fasta_path) and fasta_path.endswith('.fasta'):
            all_fasta_paths.append(fasta_path)

    # Submits one file chunk per GPU.
    with multiprocessing.Pool(len(all_fasta_paths)) as p:
        for _ in p.imap(worker_submit_colabfold, [(fasta_path, os.path.join(path_to_designs, f'chunk_{idx}'), idx) for idx, fasta_path in enumerate(all_fasta_paths)]):
            pass

ESMfold

Omegafold