max n links constraint added to reg2

openproblems-bio · Sep 17, 2024 · ada5099 · ada5099
1 parent aa3da45
commit ada5099
Show file tree

Hide file tree

Showing 5 changed files with 35 additions and 14 deletions.
diff --git a/scripts/run_grn_evaluation.sh b/scripts/run_grn_evaluation.sh
@@ -43,7 +43,6 @@ append_entry() {
   - id: ${reg_type}_${1}
     metric_ids: ${metric_ids}
     perturbation_data: ${resources_dir}/grn-benchmark/perturbation_data.h5ad
-    multiomics_rna: ${resources_dir}/grn-benchmark/multiomics_rna.h5ad
     reg_type: $reg_type
     method_id: $1
     subsample: $subsample

diff --git a/src/metrics/regression_1/main.py b/src/metrics/regression_1/main.py
@@ -211,6 +211,10 @@ def main(par):
     # subset to keep only those links with source as tf
     if par['apply_tf']:
         net = net[net.source.isin(tf_all)]
+    # if 'cell_type' in net.columns:
+    #     print('Taking mean of cell type specific grns')
+    #     net.drop(columns=['cell_type'], inplace=True)
+    #     net = net.groupby(['source', 'target']).mean().reset_index()
 
     subsample = par['subsample']
     max_workers = par['max_workers']

diff --git a/src/metrics/regression_1/script.py b/src/metrics/regression_1/script.py
@@ -7,7 +7,7 @@
 par = {
   "perturbation_data": "resources/grn-benchmark/perturbation_data.h5ad",
   "tf_all": "resources/prior/tf_all.csv",
-  "prediction": "output/portia_celltype_0.csv",
+  "prediction": "resources/grn_models/donor_0_celltype/grnboost2.csv",
   "method_id": "scenic",
   "min_tf": False,
   "max_n_links": 50000,

diff --git a/src/metrics/regression_2/main.py b/src/metrics/regression_2/main.py
@@ -16,17 +16,30 @@
 SEED = 0xCAFE
 N_POINTS_TO_ESTIMATE_BACKGROUND = 20
 
+def select_top_links(net, par):
+    print("Number of links reduced to ", par['max_n_links'])
+    net_sorted = net.reindex(net['weight'].abs().sort_values(ascending=False).index)
+    net = net_sorted.head(par['max_n_links']).reset_index(drop=True)
+    return net
 
-def load_grn(filepath: str, gene_names: np.ndarray) -> np.ndarray:
+def load_grn(filepath: str, gene_names: np.ndarray, par: Dict[str, Any]) -> np.ndarray:
     gene_dict = {gene_name: i for i, gene_name in enumerate(gene_names)}
     A = np.zeros((len(gene_names), len(gene_names)), dtype=float)
     df = pd.read_csv(filepath, sep=',', header='infer', index_col=0)
+    if 'cell_type' in df.columns:
+        print('Taking mean of cell type specific grns')
+        df.drop(columns=['cell_type'], inplace=True)
+        df = df.groupby(['source', 'target']).mean().reset_index()
+
     for source, target, weight in zip(df['source'], df['target'], df['weight']):
         if (source not in gene_dict) or (target not in gene_dict):
             continue
         i = gene_dict[source]
         j = gene_dict[target]
         A[i, j] = float(weight)
+    if df.shape[0] > par['max_n_links']:
+        df = select_top_links(df, par)
+    print(df)
     return A
 
 
@@ -276,12 +289,8 @@ def main(par: Dict[str, Any]) -> pd.DataFrame:
 
     # Load inferred GRN
     print(f'Loading GRN', flush=True)
-    grn = load_grn(par['prediction'], gene_names)
-    # if 'cell_type' in grn.columns:
-    #     print('Non specific')
-    #     grn.drop(columns=['cell_type'], inplace=True)
-    #     grn = grn.groupby(['source', 'target']).mean().reset_index()
-
+    grn = load_grn(par['prediction'], gene_names, par)
+
     # Load and standardize perturbation data
     layer = par['layer']
     X = perturbation_data.layers[layer]

diff --git a/src/metrics/regression_2/script.py b/src/metrics/regression_2/script.py
@@ -8,16 +8,25 @@
 par = {
     'perturbation_data': 'resources/grn-benchmark/perturbation_data.h5ad',
     'layer': 'scgen_pearson',
-    'prediction': 'resources/grn_models/collectri.csv',
-    'tfs': 'resources/prior/tf_all.csv',
-    'consensus': 'resources/grn-benchmark/consensus-num-regulators.json',
+    "prediction": "resources/grn_models/donor_0_celltype/grnboost2.csv",
+    'tf_all': 'resources/prior/tf_all.csv',
+    "max_n_links": 50000,
+    'consensus': 'resources/prior/consensus-num-regulators.json',
     'score': 'output/score_regression2.csv',
     'reg_type': 'ridge',
-    'static_only': True
+    'static_only': True,
+    'layer': 'scgen_pearson',
+    'subsample': -2,
+    'max_workers': 4,
+    'apply_tf': True,
+    'clip_scores': True,
+    'method_id': 'grnboost'
 
 }
 ## VIASH END
-
+# meta = {
+#   "resources_dir":'src/metrics/regression_1/'
+# }
 print(par)
 sys.path.append(meta['resources_dir'])
 from main import main