From 1180b200f9d532b8562d10697c1f617e9a820ddf Mon Sep 17 00:00:00 2001 From: Jalil Nourisa Date: Tue, 8 Oct 2024 20:04:21 +0200 Subject: [PATCH] scenicplus pipeline fixed --- runs.ipynb | 32 +++++++++---------- src/methods/multi_omics/scenicplus/main.py | 2 -- src/methods/multi_omics/scenicplus/script.py | 30 ++++++++--------- .../{config.novsh.yaml => config.vsh.yaml} | 8 +++-- src/methods/multi_omics/scenicplus_ns/main.nf | 4 +-- src/methods/multi_omics/scglue/main.py | 18 +++++------ src/methods/multi_omics/scglue/script.py | 4 +-- 7 files changed, 50 insertions(+), 48 deletions(-) rename src/methods/multi_omics/scenicplus_ns/{config.novsh.yaml => config.vsh.yaml} (81%) diff --git a/runs.ipynb b/runs.ipynb index 0472805bc..356159157 100644 --- a/runs.ipynb +++ b/runs.ipynb @@ -819,38 +819,38 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Submitted batch job 7757148\n" + "Submitted batch job 7757181\n" ] } ], "source": [ "if True:\n", + " par = {\n", + " 'methods': ['scglue'],\n", + " 'models_dir': 'resources/grn_models/',\n", + " 'multiomics_rna': 'resources/grn-benchmark/multiomics_rna.h5ad', \n", + " 'multiomics_atac': 'resources/grn-benchmark/multiomics_atac.h5ad', \n", + " 'num_workers': 20,\n", + " 'mem': \"120GB\",\n", + " 'time': \"48:00:00\"\n", + " }\n", + " \n", " # par = {\n", - " # 'methods': ['scglue'],\n", - " # 'models_dir': 'resources/grn_models/',\n", - " # 'multiomics_rna': 'resources/grn-benchmark/multiomics_rna.h5ad', \n", - " # 'multiomics_atac': 'resources/grn-benchmark/multiomics_atac.h5ad', \n", + " # 'methods': ['scenicplus'],\n", + " # 'models_dir': 'resources/grn_models/d0_hvg',\n", + " # 'multiomics_rna': 'resources/grn-benchmark/multiomics_rna_d0_hvg.h5ad', \n", + " # 'multiomics_atac': 'resources/grn-benchmark/multiomics_atac_d0.h5ad', \n", " # 'num_workers': 20,\n", " # 'mem': \"250GB\",\n", " # 'time': \"48:00:00\"\n", " # }\n", - " \n", - " par = {\n", - " 'methods': ['scenicplus'],\n", - " 'models_dir': 'resources/grn_models/d0_hvg',\n", - " 'multiomics_rna': 'resources/grn-benchmark/multiomics_rna_d0_hvg.h5ad', \n", - " 'multiomics_atac': 'resources/grn-benchmark/multiomics_atac_d0.h5ad', \n", - " 'num_workers': 20,\n", - " 'mem': \"250GB\",\n", - " 'time': \"48:00:00\"\n", - " }\n", "\n", " for method in par['methods']:\n", " par['prediction'] = f\"{par['models_dir']}/{method}.csv\"\n", diff --git a/src/methods/multi_omics/scenicplus/main.py b/src/methods/multi_omics/scenicplus/main.py index cf3e4f6eb..a14b26651 100644 --- a/src/methods/multi_omics/scenicplus/main.py +++ b/src/methods/multi_omics/scenicplus/main.py @@ -721,8 +721,6 @@ def snakemake_pipeline(par): cwd = os.getcwd() print(cwd) - - settings['input_data']['cisTopic_obj_fname'] = f"{cwd}/{par['cistopic_object']}" settings['input_data']['GEX_anndata_fname'] = f"{cwd}/{os.path.join(par['temp_dir'], 'rna.h5ad')}" settings['input_data']['region_set_folder'] = f"{cwd}/{os.path.join(par['temp_dir'], 'region_sets')}" diff --git a/src/methods/multi_omics/scenicplus/script.py b/src/methods/multi_omics/scenicplus/script.py index e56f0c399..3b9f66601 100644 --- a/src/methods/multi_omics/scenicplus/script.py +++ b/src/methods/multi_omics/scenicplus/script.py @@ -68,21 +68,21 @@ def main(par): par['MALLET_PATH'] = os.path.join(par['temp_dir'], 'Mallet-202108', 'bin', 'mallet') os.makedirs(par['atac_dir'], exist_ok=True) - # print('------- download_databases -------') - # download_databases(par) - # print_memory_usage() - # print('------- process_peak -------') - # process_peak(par) - # print_memory_usage() - # print('------- run_cistopic -------') - # run_cistopic(par) - # print_memory_usage() - # print('------- process_topics -------') - # process_topics(par) - # print_memory_usage() - # print('------- preprocess_rna -------') - # preprocess_rna(par) - # print_memory_usage() + print('------- download_databases -------') + download_databases(par) + print_memory_usage() + print('------- process_peak -------') + process_peak(par) + print_memory_usage() + print('------- run_cistopic -------') + run_cistopic(par) + print_memory_usage() + print('------- process_topics -------') + process_topics(par) + print_memory_usage() + print('------- preprocess_rna -------') + preprocess_rna(par) + print_memory_usage() print('------- snakemake_pipeline -------') snakemake_pipeline(par) print_memory_usage() diff --git a/src/methods/multi_omics/scenicplus_ns/config.novsh.yaml b/src/methods/multi_omics/scenicplus_ns/config.vsh.yaml similarity index 81% rename from src/methods/multi_omics/scenicplus_ns/config.novsh.yaml rename to src/methods/multi_omics/scenicplus_ns/config.vsh.yaml index ac7060938..77e27e2d7 100644 --- a/src/methods/multi_omics/scenicplus_ns/config.novsh.yaml +++ b/src/methods/multi_omics/scenicplus_ns/config.vsh.yaml @@ -20,10 +20,14 @@ functionality: description: "Whether to perform quality control." - name: --cell_topic type: file - default: output/cell_topic.csv required: false direction: output description: "Cell-topics prob scores" + - name: --grn_extended + type: file + required: false + direction: output + description: "Source-target-peak triplets" resources: @@ -36,4 +40,4 @@ functionality: platforms: - type: nextflow directives: - label: [ midtime, highmem, highcpu ] + label: [ onedaytime, highmem, highcpu ] diff --git a/src/methods/multi_omics/scenicplus_ns/main.nf b/src/methods/multi_omics/scenicplus_ns/main.nf index 4a27b82d0..743cd319a 100644 --- a/src/methods/multi_omics/scenicplus_ns/main.nf +++ b/src/methods/multi_omics/scenicplus_ns/main.nf @@ -13,10 +13,10 @@ workflow run_wf { num_workers: "num_workers" ], - toState: [prediction:"prediction", cell_topic:"cell_topic", scplus_mdata:"scplus_mdata"] + toState: [prediction:"prediction", cell_topic:"cell_topic", scplus_mdata:"scplus_mdata", grn_extended:"grn_extended"] ) - | setState(["prediction", "cell_topic", "scplus_mdata"]) + | setState(["prediction", "cell_topic", "scplus_mdata", "grn_extended"]) emit: output_ch diff --git a/src/methods/multi_omics/scglue/main.py b/src/methods/multi_omics/scglue/main.py index ab48fa89e..c860201f3 100644 --- a/src/methods/multi_omics/scglue/main.py +++ b/src/methods/multi_omics/scglue/main.py @@ -262,10 +262,10 @@ def prune_grn(par): "--annotations_fname", f"{par['temp_dir']}/ctx_annotation.tsv", "--expression_mtx_fname", f"{par['temp_dir']}/rna.loom", "--output", f"{par['temp_dir']}/pruned_grn.csv", - # "--top_n_targets", str(par['top_n_targets']), - # "--rank_threshold", str(par['rank_threshold']), - # "--auc_threshold", "0.1", - # "--nes_threshold", str(par['nes_threshold']), + "--top_n_targets", str(par['top_n_targets']), + "--rank_threshold", str(par['rank_threshold']), + "--auc_threshold", "0.1", + "--nes_threshold", str(par['nes_threshold']), "--min_genes", "1", "--num_workers", f"{par['num_workers']}", "--cell_id_attribute", "obs_id", # be sure that obs_id is in obs and name is in var @@ -300,11 +300,11 @@ def main(par): # preprocess(par) # print('----- training ---- ', flush=True) # training(par) - print('----- create_prior ---- ', flush=True) - create_prior(par) - print('----- pyscenic_grn ---- ', flush=True) - pyscenic_grn(par) - print('----- prune_grn ---- ', flush=True) + # print('----- create_prior ---- ', flush=True) + # create_prior(par) + # print('----- pyscenic_grn ---- ', flush=True) + # pyscenic_grn(par) + # print('----- prune_grn ---- ', flush=True) prune_grn(par) print('Curate predictions', flush=True) pruned_grn = pd.read_csv( diff --git a/src/methods/multi_omics/scglue/script.py b/src/methods/multi_omics/scglue/script.py index 6d25d8a58..cf8e34f17 100644 --- a/src/methods/multi_omics/scglue/script.py +++ b/src/methods/multi_omics/scglue/script.py @@ -11,8 +11,8 @@ "num_workers": 20, "prediction": "output/scglue_d0_hvg.csv", "max_n_links": 50000, - "nes_threshold": 1.5, - "rank_threshold": 1500, + "nes_threshold": 1, + "rank_threshold": 5000, "top_n_targets": 100, 'normalize': False, 'extend_range': 150000