-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
20 changed files
with
1,565 additions
and
528 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,12 @@ | ||
#!/bin/bash | ||
#SBATCH --job-name=scores | ||
#SBATCH --time=48:00:00 | ||
#SBATCH --time=10:00:00 | ||
#SBATCH --output=logs/%j.out | ||
#SBATCH --error=logs/%j.err | ||
#SBATCH --mail-type=END | ||
#SBATCH [email protected] | ||
#SBATCH --mem=64G | ||
#SBATCH --cpus-per-task=20 | ||
#SBATCH --cpus-per-task=1 | ||
|
||
python src/metrics/script_all.py | ||
# python src/metrics/script_all.py | ||
python src/metrics/all_metrics/script_all.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
import os | ||
import pandas as pd | ||
from regression_2.consensus.script import main as main_consensus_reg2 | ||
from wasserstein.consensus.script import main as main_consensus_ws | ||
from wasserstein.background_distance.script import main as main_ws_background_distance | ||
from all_metrics.script import main as main_scores | ||
from all_metrics.script import par as main_par | ||
|
||
|
||
|
||
def run_scores_all(datasets, models): | ||
scores_dir = 'resources/scores/' | ||
save_file_name = f"{scores_dir}/default_scores.csv" | ||
|
||
scores_store = [] | ||
for dataset in datasets: | ||
for model in models: | ||
par = main_par.copy() | ||
# - adjust par | ||
par['dataset_id'] = dataset | ||
par['prediction'] = f'resources/grn_models/{dataset}/{model}.csv' | ||
if not os.path.exists(par['prediction']): | ||
print('Skipping ', par['prediction']) | ||
continue | ||
# - run | ||
scores_model = main_scores(par) | ||
scores_model['model'] = model | ||
scores_model['dataset'] = dataset | ||
|
||
scores_store.append(scores_model) | ||
scores_all = pd.concat(scores_store) | ||
scores_all.to_csv(save_file_name) | ||
|
||
def run_consensus(datasets): | ||
models = ['positive_control', 'pearson_corr', 'portia', 'ppcor', 'grnboost2', 'scenic', 'granie', 'scglue', 'celloracle', 'figr', 'scenicplus'] | ||
|
||
for dataset in datasets: | ||
par = { | ||
'models': models, | ||
'evaluation_data': f'resources/evaluation_datasets/{dataset}_perturbation.h5ad', | ||
'evaluation_data_sc': f'resources/datasets_raw/{dataset}_sc_counts.h5ad', | ||
'models_dir': f'resources/grn_models/{dataset}/', | ||
'regulators_consensus': f'resources/prior/regulators_consensus_{dataset}.json', | ||
'ws_consensus': f'resources/prior/ws_consensus_{dataset}.csv', | ||
'tf_all': 'resources/prior/tf_all.csv', | ||
|
||
} | ||
# - reg2 consensus | ||
print(f'--determining consensus for reg2--{dataset}') | ||
main_consensus_reg2(par) | ||
|
||
# - ws consensus | ||
print(f'--determining consensus for ws--{dataset}') | ||
if dataset in ['norman', 'adamson']: | ||
main_consensus_ws(par) | ||
def run_ws_distance_background(datasets): | ||
for dataset in datasets: | ||
par = { | ||
'evaluation_data_sc': f'resources/datasets_raw/{dataset}_sc_counts.h5ad', | ||
'background_distance': f'resources/prior/ws_distance_background_{dataset}.csv', | ||
'tf_all': 'resources/prior/tf_all.csv', | ||
'layer': 'X_norm' | ||
} | ||
print(f'--run ws distance background --{dataset}') | ||
if dataset in ['norman', 'adamson']: | ||
main_ws_background_distance(par) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
import pandas as pd | ||
import anndata as ad | ||
import sys | ||
import numpy as np | ||
import os | ||
|
||
|
||
## VIASH START | ||
par = { | ||
'prediction': f'resources/grn_models/norman/grnboost2.csv', | ||
'method_id': 'grnboost2', | ||
|
||
"tf_all": f"resources/prior/tf_all.csv", | ||
'skeleton': f'resources/prior/skeleton.csv', | ||
'dataset_id': 'norman', | ||
'layer': 'X_norm', | ||
"apply_tf": True, | ||
'subsample': -1, | ||
'verbose': 4, | ||
'num_workers': 20, | ||
'binarize': False, | ||
'max_n_links': 50000, | ||
'apply_skeleton': False, | ||
'reg_type':'ridge', | ||
'score': 'output/score.h5ad' | ||
} | ||
## VIASH END | ||
|
||
meta = { | ||
"resources_dir": 'src/metrics/', | ||
"util": 'src/utils' | ||
} | ||
sys.path.append(meta["resources_dir"]) | ||
sys.path.append(meta["util"]) | ||
from regression_1.main import main as main_reg1 | ||
from regression_2.main import main as main_reg2 | ||
from wasserstein.script import main as main_ws | ||
|
||
|
||
|
||
def main(par): | ||
""" | ||
Calculate all scores for a given model and daatset. | ||
""" | ||
assert par['dataset_id'] | ||
dataset = par['dataset_id'] | ||
|
||
par['evaluation_data'] = f'resources/evaluation_datasets/{dataset}_perturbation.h5ad' | ||
par['evaluation_data_sc'] = f'resources/datasets_raw/{dataset}_sc_counts.h5ad' | ||
par['regulators_consensus'] = f'resources/prior/regulators_consensus_{dataset}.json' | ||
par['ws_consensus'] = f'resources/prior/ws_consensus_{dataset}.csv' | ||
par['ws_distance_background'] = f'resources/prior/ws_distance_background_{dataset}.csv' | ||
|
||
scores_all = [] | ||
|
||
scores_reg1 = main_reg1(par) | ||
scores_all.append(scores_reg1) | ||
scores_reg2 = main_reg2(par) | ||
scores_all.append(scores_reg2) | ||
if dataset in ['norman', 'adamson']: | ||
print(par) | ||
_, scores_ws = main_ws(par) | ||
scores_all.append(scores_ws) | ||
|
||
scores_all = pd.concat(scores_all, axis=1) | ||
|
||
return scores_all | ||
if __name__ == '__main__': | ||
scores_all = main(par) | ||
|
||
output = ad.AnnData( | ||
X=np.empty((0, 0)), | ||
uns={ | ||
"dataset_id": par["dataset_id"], | ||
"method_id": par['method_id'], | ||
"metric_ids": scores_all.columns.values, | ||
"metric_values": scores_all.values[0] | ||
} | ||
) | ||
print(output) | ||
output.write_h5ad(par['score'], compression='gzip') | ||
print('Completed', flush=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
import pandas as pd | ||
import anndata as ad | ||
import sys | ||
import numpy as np | ||
import os | ||
|
||
meta = { | ||
"resources_dir": 'src/metrics/', | ||
"util": 'src/utils' | ||
} | ||
sys.path.append(meta["resources_dir"]) | ||
sys.path.append(meta["util"]) | ||
|
||
from all_metrics.helper import run_consensus, run_ws_distance_background, run_scores_all | ||
|
||
par = { | ||
'layer': 'X_norm', | ||
"tf_all": "resources/prior/tf_all.csv", | ||
'skeleton': 'resources/prior/skeleton.csv', | ||
"apply_tf": True, | ||
'subsample': -1, | ||
'verbose': 4, | ||
'num_workers': 20, | ||
'binarize': False, | ||
'max_n_links': 50000, | ||
'apply_skeleton': False, | ||
'reg_type':'ridge' | ||
} | ||
|
||
|
||
def run_evaluation(dataset, models, models_dir, save_file_name): | ||
print('------ ', dataset, '------') | ||
|
||
# - determines models to run | ||
grn_files_dict = {} | ||
# - add models | ||
for model in models: | ||
print(model) | ||
grn_file = f"{models_dir}/{model}.csv" | ||
if not os.path.exists(grn_file): | ||
print(f"{grn_file} doesnt exist. Skipped.") | ||
continue | ||
grn_files_dict[model] = grn_file | ||
|
||
# - actual runs | ||
i = 0 | ||
for model, grn_file in grn_files_dict.items(): | ||
par['prediction'] = grn_file | ||
reg1 = main_reg1(par) | ||
reg2 = main_reg2(par) | ||
score = pd.concat([reg1, reg2], axis=1) | ||
score.index = [model] | ||
if i==0: | ||
df_all = score | ||
else: | ||
df_all = pd.concat([df_all, score]) | ||
df_all.to_csv(save_file_name) | ||
print(df_all) | ||
i+=1 | ||
|
||
if __name__ == '__main__': | ||
run_scores_flag = True | ||
run_consensus_flag = False | ||
run_ws_distance_background_flag = False | ||
datasets = ['op', 'replogle2', 'nakatake', 'norman', 'adamson'] | ||
|
||
if run_consensus_flag: # run consensus | ||
run_consensus(datasets) | ||
|
||
if run_ws_distance_background_flag: # run background scores for ws distance | ||
run_ws_distance_background(datasets) | ||
|
||
if run_scores_flag: | ||
models = ['negative_control', 'positive_control', 'pearson_corr', 'portia', 'ppcor', 'grnboost2', 'scenic', 'granie', 'scglue', 'celloracle', 'figr', 'scenicplus'] | ||
|
||
run_scores_all(datasets, models=models) | ||
|
||
|
||
aaa | ||
|
||
if False: # default run | ||
for dataset in dataset: | ||
models_dir = f"resources/grn_models/{dataset}" | ||
scores_dir = f"resources/scores/{dataset}" | ||
run_consensus(dataset) | ||
save_file_name = f"{scores_dir}/default_scores.csv" | ||
|
||
run_evaluation(dataset, models, models_dir, scores_dir, save_file_name) | ||
|
||
if True: # subsample | ||
# for dataset in ['op', 'replogle2', 'nakatake', 'norman', 'adamson']: #'op', 'replogle2', 'nakatake', 'norman', 'adamson' | ||
for dataset in ['op']: | ||
if dataset == 'op': | ||
models_subsampled = [f'{model}_{subsample}' for subsample in [1, 2] for model in models] | ||
else: | ||
models_subsampled = [f'{model}_{subsample}' for subsample in [0.2, 0.5] for model in models] | ||
models_dir = f"resources/grn_models/{dataset}" | ||
scores_dir = f"resources/scores/{dataset}" | ||
|
||
save_file_name = f"{scores_dir}/subsampled.csv" | ||
|
||
run_evaluation(dataset, models_subsampled, models_dir, scores_dir, save_file_name) | ||
|
||
|
||
|
||
if False: # run global models | ||
models = ['pearson_corr'] | ||
dataset = 'op' | ||
|
||
models_dir = "resources/grn_models/global/" | ||
scores_dir = f"resources/scores/{dataset}" | ||
# run_consensus(dataset) | ||
save_file_name = f"{scores_dir}/X_norm-50000-skeleton_False-binarize_False-ridge-global-True.csv" | ||
|
||
run_evaluation(dataset, models, models_dir, scores_dir, run_global_models, save_file_name) | ||
|
||
if False: # run skeleton | ||
models = ['negative_control', 'positive_control', 'pearson_corr', 'portia', 'ppcor', 'grnboost2', 'scenic', 'granie', 'scglue', 'celloracle', 'figr', 'scenicplus'] | ||
|
||
dataset = 'op' | ||
|
||
models_dir = f"resources/grn_models/{dataset}" | ||
scores_dir = f"resources/scores/{dataset}" | ||
save_file_name = f"{scores_dir}/X_norm-50000-skeleton_True-binarize_False-ridge-global-False.csv" | ||
|
||
# run_consensus(dataset) | ||
run_evaluation(dataset, models, models_dir, scores_dir, save_file_name, apply_skeleton=True) | ||
|
||
if False: # run GB | ||
models = ['negative_control', 'positive_control', 'pearson_corr', 'portia', 'ppcor', 'grnboost2', 'scenic', 'granie', 'scglue', 'celloracle', 'figr', 'scenicplus'] | ||
|
||
dataset = 'op' | ||
|
||
models_dir = f"resources/grn_models/{dataset}" | ||
scores_dir = f"resources/scores/{dataset}" | ||
save_file_name = f"{scores_dir}/X_norm-50000-skeleton_True-binarize_False-GB-global-False.csv" | ||
|
||
# run_consensus(dataset) | ||
run_evaluation(dataset, models, models_dir, scores_dir, save_file_name, apply_skeleton=True, reg_type='GB') | ||
|
||
|
||
|
||
|
||
|
||
|
||
# def define_par(dataset): | ||
|
||
# par = { | ||
# "evaluation_data": f"resources/evaluation_datasets/{dataset}_perturbation.h5ad", | ||
# 'consensus': f'resources/prior/{dataset}_consensus-num-regulators.json', | ||
|
||
# 'layer': 'X_norm', | ||
|
||
# "tf_all": "resources/prior/tf_all.csv", | ||
# 'skeleton': 'resources/prior/skeleton.csv', | ||
# "apply_tf": True, | ||
# 'subsample': -1, | ||
# 'verbose': 4, | ||
# 'num_workers': 20 | ||
# } | ||
|
||
# return par |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.