Skip to content

Commit

Permalink
metric ws distance added
Browse files Browse the repository at this point in the history
  • Loading branch information
janursa committed Jan 5, 2025
1 parent de0ca29 commit 5f374ff
Show file tree
Hide file tree
Showing 20 changed files with 1,565 additions and 528 deletions.
1,363 changes: 974 additions & 389 deletions runs.ipynb

Large diffs are not rendered by default.

7 changes: 4 additions & 3 deletions scripts/sbatch/calculate_scores.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
#!/bin/bash
#SBATCH --job-name=scores
#SBATCH --time=48:00:00
#SBATCH --time=10:00:00
#SBATCH --output=logs/%j.out
#SBATCH --error=logs/%j.err
#SBATCH --mail-type=END
#SBATCH [email protected]
#SBATCH --mem=64G
#SBATCH --cpus-per-task=20
#SBATCH --cpus-per-task=1

python src/metrics/script_all.py
# python src/metrics/script_all.py
python src/metrics/all_metrics/script_all.py
3 changes: 2 additions & 1 deletion scripts/sbatch/run_helper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@
#SBATCH --mem=64G
#SBATCH --cpus-per-task=20

python src/helper.py
# python src/helper.py
python src/metrics/wasserstein/background_score.py
6 changes: 6 additions & 0 deletions src/api/comp_metric.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,12 @@ functionality:
type: boolean
direction: input
default: false
- name: --dataset_id
type: string
direction: input
required: true
default: op


test_resources:
- type: python_script
Expand Down
18 changes: 11 additions & 7 deletions src/methods/script_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ def run_grn_inference(dataset='op', subsample=None):
elif method in ["scenicplus"]:
mem = "250GB"
time = "12:00:00"
elif method in ["scenic"]:
mem = "250GB"
time = "24:00:00"

# Prepare sbatch command
tag = f"--job-name={method}" # No spaces around '='
Expand Down Expand Up @@ -143,23 +146,24 @@ def run_grn_inference(dataset='op', subsample=None):
print(f"Command error output: {e.stderr}")

if __name__ == '__main__':
force = False
force = True
sbatch = True
# methods = ["positive_control", "negative_control", "pearson_corr", "portia", "grnboost2", "ppcor", "scenic"],
# methods = ["portia", "grnboost2"]
methods = ["scenicplus"]
methods = ["scenic"]
datasets = ['adamson']


partition='cpu'

mem = "120GB"
time = "12:00:00"
# mem = "120GB"
# time = "24:00:00"

if False: # normal run
for dataset in ['op','replogle2', 'norman', 'adamson', 'nakatake']:
if True: # normal run
for dataset in datasets:
run_grn_inference(dataset, subsample=None)

if True: # subsample
if False: # subsample
# for dataset in ['replogle2', 'norman', 'adamson', 'nakatake']: # 'replogle2' 'op' norman
for dataset in ['op']:
if dataset == 'op':
Expand Down
66 changes: 66 additions & 0 deletions src/metrics/all_metrics/helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import os
import pandas as pd
from regression_2.consensus.script import main as main_consensus_reg2
from wasserstein.consensus.script import main as main_consensus_ws
from wasserstein.background_distance.script import main as main_ws_background_distance
from all_metrics.script import main as main_scores
from all_metrics.script import par as main_par



def run_scores_all(datasets, models):
scores_dir = 'resources/scores/'
save_file_name = f"{scores_dir}/default_scores.csv"

scores_store = []
for dataset in datasets:
for model in models:
par = main_par.copy()
# - adjust par
par['dataset_id'] = dataset
par['prediction'] = f'resources/grn_models/{dataset}/{model}.csv'
if not os.path.exists(par['prediction']):
print('Skipping ', par['prediction'])
continue
# - run
scores_model = main_scores(par)
scores_model['model'] = model
scores_model['dataset'] = dataset

scores_store.append(scores_model)
scores_all = pd.concat(scores_store)
scores_all.to_csv(save_file_name)

def run_consensus(datasets):
models = ['positive_control', 'pearson_corr', 'portia', 'ppcor', 'grnboost2', 'scenic', 'granie', 'scglue', 'celloracle', 'figr', 'scenicplus']

for dataset in datasets:
par = {
'models': models,
'evaluation_data': f'resources/evaluation_datasets/{dataset}_perturbation.h5ad',
'evaluation_data_sc': f'resources/datasets_raw/{dataset}_sc_counts.h5ad',
'models_dir': f'resources/grn_models/{dataset}/',
'regulators_consensus': f'resources/prior/regulators_consensus_{dataset}.json',
'ws_consensus': f'resources/prior/ws_consensus_{dataset}.csv',
'tf_all': 'resources/prior/tf_all.csv',

}
# - reg2 consensus
print(f'--determining consensus for reg2--{dataset}')
main_consensus_reg2(par)

# - ws consensus
print(f'--determining consensus for ws--{dataset}')
if dataset in ['norman', 'adamson']:
main_consensus_ws(par)
def run_ws_distance_background(datasets):
for dataset in datasets:
par = {
'evaluation_data_sc': f'resources/datasets_raw/{dataset}_sc_counts.h5ad',
'background_distance': f'resources/prior/ws_distance_background_{dataset}.csv',
'tf_all': 'resources/prior/tf_all.csv',
'layer': 'X_norm'
}
print(f'--run ws distance background --{dataset}')
if dataset in ['norman', 'adamson']:
main_ws_background_distance(par)
82 changes: 82 additions & 0 deletions src/metrics/all_metrics/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import pandas as pd
import anndata as ad
import sys
import numpy as np
import os


## VIASH START
par = {
'prediction': f'resources/grn_models/norman/grnboost2.csv',
'method_id': 'grnboost2',

"tf_all": f"resources/prior/tf_all.csv",
'skeleton': f'resources/prior/skeleton.csv',
'dataset_id': 'norman',
'layer': 'X_norm',
"apply_tf": True,
'subsample': -1,
'verbose': 4,
'num_workers': 20,
'binarize': False,
'max_n_links': 50000,
'apply_skeleton': False,
'reg_type':'ridge',
'score': 'output/score.h5ad'
}
## VIASH END

meta = {
"resources_dir": 'src/metrics/',
"util": 'src/utils'
}
sys.path.append(meta["resources_dir"])
sys.path.append(meta["util"])
from regression_1.main import main as main_reg1
from regression_2.main import main as main_reg2
from wasserstein.script import main as main_ws



def main(par):
"""
Calculate all scores for a given model and daatset.
"""
assert par['dataset_id']
dataset = par['dataset_id']

par['evaluation_data'] = f'resources/evaluation_datasets/{dataset}_perturbation.h5ad'
par['evaluation_data_sc'] = f'resources/datasets_raw/{dataset}_sc_counts.h5ad'
par['regulators_consensus'] = f'resources/prior/regulators_consensus_{dataset}.json'
par['ws_consensus'] = f'resources/prior/ws_consensus_{dataset}.csv'
par['ws_distance_background'] = f'resources/prior/ws_distance_background_{dataset}.csv'

scores_all = []

scores_reg1 = main_reg1(par)
scores_all.append(scores_reg1)
scores_reg2 = main_reg2(par)
scores_all.append(scores_reg2)
if dataset in ['norman', 'adamson']:
print(par)
_, scores_ws = main_ws(par)
scores_all.append(scores_ws)

scores_all = pd.concat(scores_all, axis=1)

return scores_all
if __name__ == '__main__':
scores_all = main(par)

output = ad.AnnData(
X=np.empty((0, 0)),
uns={
"dataset_id": par["dataset_id"],
"method_id": par['method_id'],
"metric_ids": scores_all.columns.values,
"metric_values": scores_all.values[0]
}
)
print(output)
output.write_h5ad(par['score'], compression='gzip')
print('Completed', flush=True)
162 changes: 162 additions & 0 deletions src/metrics/all_metrics/script_all.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
import pandas as pd
import anndata as ad
import sys
import numpy as np
import os

meta = {
"resources_dir": 'src/metrics/',
"util": 'src/utils'
}
sys.path.append(meta["resources_dir"])
sys.path.append(meta["util"])

from all_metrics.helper import run_consensus, run_ws_distance_background, run_scores_all

par = {
'layer': 'X_norm',
"tf_all": "resources/prior/tf_all.csv",
'skeleton': 'resources/prior/skeleton.csv',
"apply_tf": True,
'subsample': -1,
'verbose': 4,
'num_workers': 20,
'binarize': False,
'max_n_links': 50000,
'apply_skeleton': False,
'reg_type':'ridge'
}


def run_evaluation(dataset, models, models_dir, save_file_name):
print('------ ', dataset, '------')

# - determines models to run
grn_files_dict = {}
# - add models
for model in models:
print(model)
grn_file = f"{models_dir}/{model}.csv"
if not os.path.exists(grn_file):
print(f"{grn_file} doesnt exist. Skipped.")
continue
grn_files_dict[model] = grn_file

# - actual runs
i = 0
for model, grn_file in grn_files_dict.items():
par['prediction'] = grn_file
reg1 = main_reg1(par)
reg2 = main_reg2(par)
score = pd.concat([reg1, reg2], axis=1)
score.index = [model]
if i==0:
df_all = score
else:
df_all = pd.concat([df_all, score])
df_all.to_csv(save_file_name)
print(df_all)
i+=1

if __name__ == '__main__':
run_scores_flag = True
run_consensus_flag = False
run_ws_distance_background_flag = False
datasets = ['op', 'replogle2', 'nakatake', 'norman', 'adamson']

if run_consensus_flag: # run consensus
run_consensus(datasets)

if run_ws_distance_background_flag: # run background scores for ws distance
run_ws_distance_background(datasets)

if run_scores_flag:
models = ['negative_control', 'positive_control', 'pearson_corr', 'portia', 'ppcor', 'grnboost2', 'scenic', 'granie', 'scglue', 'celloracle', 'figr', 'scenicplus']

run_scores_all(datasets, models=models)


aaa

if False: # default run
for dataset in dataset:
models_dir = f"resources/grn_models/{dataset}"
scores_dir = f"resources/scores/{dataset}"
run_consensus(dataset)
save_file_name = f"{scores_dir}/default_scores.csv"

run_evaluation(dataset, models, models_dir, scores_dir, save_file_name)

if True: # subsample
# for dataset in ['op', 'replogle2', 'nakatake', 'norman', 'adamson']: #'op', 'replogle2', 'nakatake', 'norman', 'adamson'
for dataset in ['op']:
if dataset == 'op':
models_subsampled = [f'{model}_{subsample}' for subsample in [1, 2] for model in models]
else:
models_subsampled = [f'{model}_{subsample}' for subsample in [0.2, 0.5] for model in models]
models_dir = f"resources/grn_models/{dataset}"
scores_dir = f"resources/scores/{dataset}"

save_file_name = f"{scores_dir}/subsampled.csv"

run_evaluation(dataset, models_subsampled, models_dir, scores_dir, save_file_name)



if False: # run global models
models = ['pearson_corr']
dataset = 'op'

models_dir = "resources/grn_models/global/"
scores_dir = f"resources/scores/{dataset}"
# run_consensus(dataset)
save_file_name = f"{scores_dir}/X_norm-50000-skeleton_False-binarize_False-ridge-global-True.csv"

run_evaluation(dataset, models, models_dir, scores_dir, run_global_models, save_file_name)

if False: # run skeleton
models = ['negative_control', 'positive_control', 'pearson_corr', 'portia', 'ppcor', 'grnboost2', 'scenic', 'granie', 'scglue', 'celloracle', 'figr', 'scenicplus']

dataset = 'op'

models_dir = f"resources/grn_models/{dataset}"
scores_dir = f"resources/scores/{dataset}"
save_file_name = f"{scores_dir}/X_norm-50000-skeleton_True-binarize_False-ridge-global-False.csv"

# run_consensus(dataset)
run_evaluation(dataset, models, models_dir, scores_dir, save_file_name, apply_skeleton=True)

if False: # run GB
models = ['negative_control', 'positive_control', 'pearson_corr', 'portia', 'ppcor', 'grnboost2', 'scenic', 'granie', 'scglue', 'celloracle', 'figr', 'scenicplus']

dataset = 'op'

models_dir = f"resources/grn_models/{dataset}"
scores_dir = f"resources/scores/{dataset}"
save_file_name = f"{scores_dir}/X_norm-50000-skeleton_True-binarize_False-GB-global-False.csv"

# run_consensus(dataset)
run_evaluation(dataset, models, models_dir, scores_dir, save_file_name, apply_skeleton=True, reg_type='GB')






# def define_par(dataset):

# par = {
# "evaluation_data": f"resources/evaluation_datasets/{dataset}_perturbation.h5ad",
# 'consensus': f'resources/prior/{dataset}_consensus-num-regulators.json',

# 'layer': 'X_norm',

# "tf_all": "resources/prior/tf_all.csv",
# 'skeleton': 'resources/prior/skeleton.csv',
# "apply_tf": True,
# 'subsample': -1,
# 'verbose': 4,
# 'num_workers': 20
# }

# return par
4 changes: 0 additions & 4 deletions src/metrics/consensus/create-consensus.sh

This file was deleted.

Loading

0 comments on commit 5f374ff

Please sign in to comment.