Skip to content

Commit

Permalink
bug in regression 1 fixed
Browse files Browse the repository at this point in the history
  • Loading branch information
janursa committed Aug 25, 2024
1 parent dbe0511 commit 3651b47
Show file tree
Hide file tree
Showing 10 changed files with 1,046 additions and 51 deletions.
864 changes: 864 additions & 0 deletions runs.ipynb

Large diffs are not rendered by default.

24 changes: 11 additions & 13 deletions scripts/run_grn_evaluation_tw.sh
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
#!/bin/bash

# RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"

RUN_ID="pearson_gb"
RUN_ID="pearson_gb_subsample"
resources_dir="s3://openproblems-data/resources/grn"
publish_dir="s3://openproblems-data/resources/grn/results/${RUN_ID}"
# grn_models_folder="${resources_dir}/supplementary/grn_models_noised"
grn_models_folder="${resources_dir}/grn_models"
reg_type=GB
subsample=-2
max_workers=20
max_workers=10

param_file="./params/${RUN_ID}.yaml"

Expand Down Expand Up @@ -77,14 +75,14 @@ HERE
# -c src/common/nextflow_helpers/labels_ci.config \
# -params-file ${param_file}

./tw-windows-x86_64.exe launch `
https://github.com/openproblems-bio/task_grn_benchmark.git `
--revision build/main `
--pull-latest `
--main-script target/nextflow/workflows/run_grn_evaluation/main.nf `
--workspace 53907369739130 `
--compute-env 6TeIFgV5OY4pJCk8I0bfOh `
--params-file ./params/scgen_pearson_gb_pcs.yaml `
--config src/common/nextflow_helpers/labels_tw.config
# ./tw-windows-x86_64.exe launch `
# https://github.com/openproblems-bio/task_grn_benchmark.git `
# --revision build/main `
# --pull-latest `
# --main-script target/nextflow/workflows/run_grn_evaluation/main.nf `
# --workspace 53907369739130 `
# --compute-env 6TeIFgV5OY4pJCk8I0bfOh `
# --params-file ./params/scgen_pearson_gb_pcs.yaml `
# --config src/common/nextflow_helpers/labels_tw.config


86 changes: 86 additions & 0 deletions scripts/run_pc_vs_nc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#!/bin/bash

# RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"

subsamples=(-2 -3 -4)

RUN_ID="robust_analy_$1"
resources_dir="resources"
publish_dir="output/${RUN_ID}"

# resources_dir="s3://openproblems-data/resources/grn"
# publish_dir="s3://openproblems-data/resources/grn/results/${RUN_ID}"

grn_models_folder="${resources_dir}/grn_models"


reg_type=ridge
max_workers=10
layer=pearson

param_file="./params/${RUN_ID}.yaml"

grn_names=(
"collectri"
"celloracle"
"scenicplus"
"figr"
"granie"
"scglue"
)



# Start writing to the YAML file
cat > $param_file << HERE
param_list:
HERE

append_entry() {
cat >> $param_file << HERE
- id: ${1}_${2}
perturbation_data: ${resources_dir}/grn-benchmark/perturbation_data.h5ad
layer: ${layer}
reg_type: $reg_type
method_id: ${2}-${1}
subsample: $2
max_workers: $max_workers
consensus: ${resources_dir}/prior/consensus-num-regulators.json
prediction: ${grn_models_folder}/$1.csv
degree: 0
HERE
}
# Loop through grn_names and layers
for subsample in "${subsamples[@]}"; do
for grn_name in "${grn_names[@]}"; do
append_entry "$grn_name" "$subsample"
done
done



# Append the remaining output_state and publish_dir to the YAML file
cat >> $param_file << HERE
output_state: "state.yaml"
publish_dir: "$publish_dir"
HERE

nextflow run . \
-main-script target/nextflow/workflows/run_robustness_analysis/main.nf \
-profile docker \
-with-trace \
-c src/common/nextflow_helpers/labels_ci.config \
-params-file ${param_file}

# ./tw-windows-x86_64.exe launch `
# https://github.com/openproblems-bio/task_grn_benchmark.git `
# --revision build/main `
# --pull-latest `
# --main-script target/nextflow/workflows/run_grn_evaluation/main.nf `
# --workspace 53907369739130 `
# --compute-env 6TeIFgV5OY4pJCk8I0bfOh `
# --params-file ./params/scgen_pearson_gb_pcs.yaml `
# --config src/common/nextflow_helpers/labels_tw.config


27 changes: 14 additions & 13 deletions scripts/run_robust_analys.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@

# RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"

RUN_ID="robust_analy"
degrees=(0 10 20 50 100)
noise_type="$1"
echo $noise_type

RUN_ID="robust_analy_$1"
resources_dir="resources"
publish_dir="output/${RUN_ID}"

Expand All @@ -19,7 +23,6 @@ layer=pearson

param_file="./params/${RUN_ID}.yaml"


grn_names=(
"collectri"
"celloracle"
Expand All @@ -29,8 +32,7 @@ grn_names=(
"scglue"
)

degrees=(10 20 50 100)
types=(links weight)


# Start writing to the YAML file
cat > $param_file << HERE
Expand All @@ -39,28 +41,27 @@ HERE

append_entry() {
cat >> $param_file << HERE
- id: ${1}_${2}_${3}
- id: ${1}_${2}
perturbation_data: ${resources_dir}/grn-benchmark/perturbation_data.h5ad
layer: ${layer}
reg_type: $reg_type
method_id: $1
method_id: ${2}-${1}
subsample: $subsample
max_workers: $max_workers
consensus: ${resources_dir}/prior/consensus-num-regulators.json
prediction: ${grn_models_folder}/$1.csv
degree: ${3}
type: ${2}
degree: ${2}
noise_type: ${noise_type}
HERE
}
# Loop through grn_names and layers
for type in "${types[@]}"; do
for degree in "${degrees[@]}"; do
for grn_name in "${grn_names[@]}"; do
append_entry "$grn_name" "$type" "$degree"
done
for degree in "${degrees[@]}"; do
for grn_name in "${grn_names[@]}"; do
append_entry "$grn_name" "$degree"
done
done


# Append the remaining output_state and publish_dir to the YAML file
cat >> $param_file << HERE
output_state: "state.yaml"
Expand Down
2 changes: 0 additions & 2 deletions src/methods/multi_omics/scglue/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@ def preprocess(rna, atac, par):
sc.pp.neighbors(atac, use_rep="X_lsi", metric="cosine")
sc.tl.umap(atac)
print('step 2 completed')



scglue.data.get_gene_annotation(
rna, gtf=par['annotation_file'],
Expand Down
9 changes: 7 additions & 2 deletions src/metrics/regression_1/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,6 @@ def main(par):
reg_type = par['reg_type']
max_workers = par['max_workers']
layer = par["layer"]
pert_df = pd.DataFrame(perturbation_data.layers[layer], columns=gene_names)

if subsample == -1:
pass
elif subsample == -2: # one combination of cell_type, sm_name
Expand All @@ -192,11 +190,18 @@ def main(par):
for _, row in obs.iterrows():
mask.append((sampled_obs==row).all(axis=1).any())
perturbation_data = perturbation_data[mask,:]
elif subsample == -3: #negative control
mask = perturbation_data.obs.sm_name == 'Dimethyl Sulfoxide'
perturbation_data = perturbation_data[mask,:]
elif subsample == -4: #positive control
mask = perturbation_data.obs.sm_name.isin(['Dabrafenib', 'Belinostat'])
perturbation_data = perturbation_data[mask,:]
else:
perturbation_data = perturbation_data[np.random.choice(perturbation_data.n_obs, subsample, replace=False), :]

print(perturbation_data.shape)

pert_df = pd.DataFrame(perturbation_data.layers[layer], columns=gene_names)
pert_df = pert_df.T # make it gene*sample

# process net
Expand Down
74 changes: 60 additions & 14 deletions src/robustness_analysis/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,74 @@
"prediction": "resources/grn_models/collectri.csv",
"prediction_n": "output/grn_noised.csv",
'degree': 20,
'type': 'links'
'noise_type': 'links'
}

## VIASH END

degree = par['degree']/100
type = par['noise_type']


prediction = pd.read_csv(par['prediction'])
assert 'weight' in prediction.columns

if type =='weight':
print('Add noise to weight')
std_dev = prediction['weight'].std()
noise = np.random.normal(0, degree * std_dev, size=prediction['weight'].shape)
prediction['weight'] += noise

elif type =='links':
print('Permute links')
num_rows_to_permute = int(len(prediction) * degree)
permute_indices = np.random.choice(prediction.index, size=num_rows_to_permute, replace=False)


if type == 'weight': # add noise to weight
assert 'weight' in prediction.columns
print('Add noise to weight')
std_dev = prediction['weight'].std()
noise = np.random.normal(0, degree * std_dev, size=prediction['weight'].shape)
prediction['weight'] += noise

elif type == 'links': # shuffle source-target-weight
print('Permute links')
num_rows_to_permute = int(len(prediction) * degree)
permute_indices = np.random.choice(prediction.index, size=num_rows_to_permute, replace=False)
prediction.loc[permute_indices, 'weight'] = np.random.permutation(prediction.loc[permute_indices, 'weight'].values)

elif type == 'net': # shuffle source-target matrix
print('Permute links')

prediction.loc[permute_indices, 'weight'] = np.random.permutation(prediction.loc[permute_indices, 'weight'].values)
# 1. Pivot the GRN with target as index and source as columns
pivot_df = prediction.pivot(index='target', columns='source', values='weight')

# Fill NaNs with 0 or a value of your choice
pivot_df.fillna(0, inplace=True)

# 2. Randomly choose 20% of the matrix to shuffle
matrix_flattened = pivot_df.values.flatten()
n_elements = len(matrix_flattened)
n_shuffle = int(n_elements * degree)

# Randomly select 20% of the matrix elements' indices
shuffle_indices = np.random.choice(n_elements, n_shuffle, replace=False)

# Get the values that will be shuffled
shuffle_values = matrix_flattened[shuffle_indices]

# 3. Shuffle the selected values
np.random.shuffle(shuffle_values)

# Assign the shuffled values back to the selected positions
matrix_flattened[shuffle_indices] = shuffle_values

# Reshape the flattened array back into the matrix
pivot_df_shuffled = pd.DataFrame(matrix_flattened.reshape(pivot_df.shape),
index=pivot_df.index,
columns=pivot_df.columns)

flat_df = pivot_df_shuffled.reset_index()

# Melt the DataFrame to turn it back into long-form (source-target-weight)
prediction = flat_df.melt(id_vars='target', var_name='source', value_name='weight')


prediction = prediction[prediction['weight'] !=0 ].reset_index(drop=True)


else:
raise ValueError(f'Wrong type ({type}) for adding noise')

print('Output noised GRN')
prediction.to_csv(par['prediction_n'])

3 changes: 1 addition & 2 deletions src/workflows/run_grn_evaluation/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@ workflow run_wf {

// construct list of metrics
metrics = [
regression_1,
regression_2
regression_1
]

/***************************
Expand Down
2 changes: 1 addition & 1 deletion src/workflows/run_robustness_analysis/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ functionality:
required: false
direction: input
default: 20
- name: --type
- name: --noise_type
type: string
required: false
direction: input
Expand Down
6 changes: 2 additions & 4 deletions src/workflows/run_robustness_analysis/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ workflow run_wf {

| noise_grn.run(
fromState: [
prediction: "prediction"
prediction: "prediction", degree: "degree", noise_type: "noise_type"
],
toState: [
prediction_n: "prediction_n"
Expand All @@ -49,9 +49,7 @@ workflow run_wf {
reg_type: "reg_type",
method_id: "method_id",
max_workers: "max_workers",
consensus: "consensus",
degree: "degree",
type: "type"
consensus: "consensus"
],
// use 'toState' to publish that component's outputs to the overall state
toState: { id, output, state, comp ->
Expand Down

0 comments on commit 3651b47

Please sign in to comment.