Skip to content

Commit

Permalink
max n links constraint added to reg2
Browse files Browse the repository at this point in the history
  • Loading branch information
janursa committed Sep 17, 2024
1 parent aa3da45 commit ada5099
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 14 deletions.
1 change: 0 additions & 1 deletion scripts/run_grn_evaluation.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ append_entry() {
- id: ${reg_type}_${1}
metric_ids: ${metric_ids}
perturbation_data: ${resources_dir}/grn-benchmark/perturbation_data.h5ad
multiomics_rna: ${resources_dir}/grn-benchmark/multiomics_rna.h5ad
reg_type: $reg_type
method_id: $1
subsample: $subsample
Expand Down
4 changes: 4 additions & 0 deletions src/metrics/regression_1/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,10 @@ def main(par):
# subset to keep only those links with source as tf
if par['apply_tf']:
net = net[net.source.isin(tf_all)]
# if 'cell_type' in net.columns:
# print('Taking mean of cell type specific grns')
# net.drop(columns=['cell_type'], inplace=True)
# net = net.groupby(['source', 'target']).mean().reset_index()

subsample = par['subsample']
max_workers = par['max_workers']
Expand Down
2 changes: 1 addition & 1 deletion src/metrics/regression_1/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
par = {
"perturbation_data": "resources/grn-benchmark/perturbation_data.h5ad",
"tf_all": "resources/prior/tf_all.csv",
"prediction": "output/portia_celltype_0.csv",
"prediction": "resources/grn_models/donor_0_celltype/grnboost2.csv",
"method_id": "scenic",
"min_tf": False,
"max_n_links": 50000,
Expand Down
23 changes: 16 additions & 7 deletions src/metrics/regression_2/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,30 @@
SEED = 0xCAFE
N_POINTS_TO_ESTIMATE_BACKGROUND = 20

def select_top_links(net, par):
print("Number of links reduced to ", par['max_n_links'])
net_sorted = net.reindex(net['weight'].abs().sort_values(ascending=False).index)
net = net_sorted.head(par['max_n_links']).reset_index(drop=True)
return net

def load_grn(filepath: str, gene_names: np.ndarray) -> np.ndarray:
def load_grn(filepath: str, gene_names: np.ndarray, par: Dict[str, Any]) -> np.ndarray:
gene_dict = {gene_name: i for i, gene_name in enumerate(gene_names)}
A = np.zeros((len(gene_names), len(gene_names)), dtype=float)
df = pd.read_csv(filepath, sep=',', header='infer', index_col=0)
if 'cell_type' in df.columns:
print('Taking mean of cell type specific grns')
df.drop(columns=['cell_type'], inplace=True)
df = df.groupby(['source', 'target']).mean().reset_index()

for source, target, weight in zip(df['source'], df['target'], df['weight']):
if (source not in gene_dict) or (target not in gene_dict):
continue
i = gene_dict[source]
j = gene_dict[target]
A[i, j] = float(weight)
if df.shape[0] > par['max_n_links']:
df = select_top_links(df, par)
print(df)
return A


Expand Down Expand Up @@ -276,12 +289,8 @@ def main(par: Dict[str, Any]) -> pd.DataFrame:

# Load inferred GRN
print(f'Loading GRN', flush=True)
grn = load_grn(par['prediction'], gene_names)
# if 'cell_type' in grn.columns:
# print('Non specific')
# grn.drop(columns=['cell_type'], inplace=True)
# grn = grn.groupby(['source', 'target']).mean().reset_index()

grn = load_grn(par['prediction'], gene_names, par)

# Load and standardize perturbation data
layer = par['layer']
X = perturbation_data.layers[layer]
Expand Down
19 changes: 14 additions & 5 deletions src/metrics/regression_2/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,25 @@
par = {
'perturbation_data': 'resources/grn-benchmark/perturbation_data.h5ad',
'layer': 'scgen_pearson',
'prediction': 'resources/grn_models/collectri.csv',
'tfs': 'resources/prior/tf_all.csv',
'consensus': 'resources/grn-benchmark/consensus-num-regulators.json',
"prediction": "resources/grn_models/donor_0_celltype/grnboost2.csv",
'tf_all': 'resources/prior/tf_all.csv',
"max_n_links": 50000,
'consensus': 'resources/prior/consensus-num-regulators.json',
'score': 'output/score_regression2.csv',
'reg_type': 'ridge',
'static_only': True
'static_only': True,
'layer': 'scgen_pearson',
'subsample': -2,
'max_workers': 4,
'apply_tf': True,
'clip_scores': True,
'method_id': 'grnboost'

}
## VIASH END

# meta = {
# "resources_dir":'src/metrics/regression_1/'
# }
print(par)
sys.path.append(meta['resources_dir'])
from main import main
Expand Down

0 comments on commit ada5099

Please sign in to comment.