Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deep Learning Analysis Updates and Reproducibility #1

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 37 additions & 7 deletions mpnst/calculate_statistics.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import os
import argparse
import numpy as np

def calculate_statistics(file_path):
Expand All @@ -7,9 +9,19 @@ def calculate_statistics(file_path):
lines = file.readlines()[1:] # Skip header
for line in lines:
values = line.strip().split('\t')
rmse = float(values[1])
pearson = float(values[2])
data.append((rmse, pearson))
try:
rmse = float(values[1])
pearson = float(values[2])
data.append((rmse, pearson))
except ValueError:
# Skip lines with non-numeric data
continue

# Ensure there is valid data to process
num_seeds = len(data)
if num_seeds == 0:
print(f"No valid data found in file: {file_path}")
return

# Separate RMSE and Pearson Correlation
rmse_values = [item[0] for item in data]
Expand All @@ -21,12 +33,30 @@ def calculate_statistics(file_path):
mean_pearson = np.mean(pearson_values)
std_pearson = np.std(pearson_values)

# Print results
print(f"Results for file: {file_path}")
print(f"Number of seeds used: {num_seeds}")
print(f"Mean ± Standard Deviation of RMSE: {mean_rmse:.3f} ± {std_rmse:.3f}")
print(f"Mean ± Standard Deviation of Pearson Correlation: {mean_pearson:.3f} ± {std_pearson:.3f}")
print("-" * 40)

def process_all_files_in_folder(results_folder):
# Loop through all files in the folder
for filename in os.listdir(results_folder):
if filename.endswith(".txt"):
file_path = os.path.join(results_folder, filename)
calculate_statistics(file_path)

def main():
# Set up argument parser
parser = argparse.ArgumentParser(description="Process result files in a folder to calculate statistics.")
parser.add_argument('results_folder', type=str, help="Path to the folder containing the result files.")

# File path
# file_path = 'path_to_your_file/seed_10_epoch_100_GDSCv2_early_exit_rna_train_results_table.txt'
# Parse the arguments
args = parser.parse_args()

file_path = 'seed_10_epoch_100_CCLE_MPNST_dss_rna_train_results_table.txt'
# Process all files in the given folder
process_all_files_in_folder(args.results_folder)

calculate_statistics(file_path)
if __name__ == "__main__":
main()
197 changes: 109 additions & 88 deletions mpnst/coderdata_auc_deep_TTA_check_self.py

Large diffs are not rendered by default.

264 changes: 162 additions & 102 deletions mpnst/coderdata_auc_deep_TTA_cross_test.py

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/bin/csh
#SBATCH -A coderdata_mpnst # Replace with your actual project name
#SBATCH -t 12:00:00 # Set a shorter time limit for the test
#SBATCH -N 1 # Number of nodes
#SBATCH -n 4 # Number of cores (adjust as needed)
#SBATCH -J deeptta_BeatAML_CCLE # Job name for debugging
#SBATCH -o /people/jaco059/CoderData_Moon_Project/logs/BeatAML_CCLE_cross_%j.out # Standard output file for debugging
#SBATCH -e /people/jaco059/CoderData_Moon_Project/logs/BeatAML_CCLE_cross_%j.err # Standard error file, %j will be replaced by job ID
#SBATCH --partition=dlv # Partition to submit to
#SBATCH --gres=gpu:1 # Request 1 GPU
# Make sure the module commands are available
source /etc/profile.d/modules.csh
source /people/jaco059/.conda/envs/CDRP # location of conda environment

# Set up your environment you wish to run in with module commands
module purge # Clear all loaded modules
conda activate CDRP

# Unlimit system resources (for csh/tcsh)
unlimit

# It is extremely useful to record the modules you have loaded, your limit settings,
# your current environment variables and the dynamically loaded libraries
# that your executable is linked against in your job output file.
echo "loaded modules"
module list >& _modules.lis_
cat _modules.lis_
/bin/rm -f _modules.lis_

echo "limits"
limit

echo "Environment Variables"
printenv

# Execute your command with specified arguments for a short debugging session
python /people/jaco059/CoderData_Moon_Project/cdrp/mpnst/coderdata_auc_deep_TTA_cross_test.py \
--data_split_seed 10 \
--n_epochs 100 \
--train_omics_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/beataml_transcriptomics.csv \
--train_exp_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/beataml_experiments.tsv \
--train_drugs_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/beataml_drugs.tsv \
--test_omics_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/broad_sanger_transcriptomics.csv \
--test_exp_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/broad_sanger_experiments.tsv \
--test_drugs_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/broad_sanger_drugs.tsv \
--output_prefix BeatAML_CCLE_DSS \
--train_study_description BeatAML \
--test_study_description CCLE \
--dose_response_metric dss \
--train_log_transform True \
--test_log_transform True \
--checkpoint_path /people/jaco059/CoderData_Moon_Project/cdrp/mpnst/tmp/beataml_ccle_best.pt
# Wait for background processes to finish
wait
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/bin/csh
#SBATCH -A coderdata_mpnst # Replace with your actual project name
#SBATCH -t 12:00:00 # Set a shorter time limit for the test
#SBATCH -N 1 # Number of nodes
#SBATCH -n 4 # Number of cores (adjust as needed)
#SBATCH -J deeptta_BeatAML_MPNST # Job name for debugging
#SBATCH -o /people/jaco059/CoderData_Moon_Project/logs/BeatAML_MPNST_cross_%j.out # Standard output file for debugging
#SBATCH -e /people/jaco059/CoderData_Moon_Project/logs/BeatAML_MPNST_cross_%j.err # Standard error file, %j will be replaced by job ID
#SBATCH --partition=dlv # Partition to submit to
#SBATCH --gres=gpu:1 # Request 1 GPU
# Make sure the module commands are available
source /etc/profile.d/modules.csh
source /people/jaco059/.conda/envs/CDRP # location of conda environment

# Set up your environment you wish to run in with module commands
module purge # Clear all loaded modules
conda activate CDRP

# Unlimit system resources (for csh/tcsh)
unlimit

# It is extremely useful to record the modules you have loaded, your limit settings,
# your current environment variables and the dynamically loaded libraries
# that your executable is linked against in your job output file.
echo "loaded modules"
module list >& _modules.lis_
cat _modules.lis_
/bin/rm -f _modules.lis_

echo "limits"
limit

echo "Environment Variables"
printenv

# Execute your command with specified arguments for a short debugging session
python /people/jaco059/CoderData_Moon_Project/cdrp/mpnst/coderdata_auc_deep_TTA_cross_test.py \
--data_split_seed 10 \
--n_epochs 100 \
--train_omics_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/beataml_transcriptomics.csv \
--train_exp_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/beataml_experiments.tsv \
--train_drugs_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/beataml_drugs.tsv \
--test_omics_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/mpnst_transcriptomics.csv \
--test_exp_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/mpnst_remapped_experiments.tsv \
--test_drugs_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/mpnst_drugs.tsv \
--output_prefix BeatAML_MPNST_DSS \
--train_study_description BeatAML \
--test_study_description MPNST \
--dose_response_metric dss \
--train_log_transform True \
--test_log_transform True \
--checkpoint_path /people/jaco059/CoderData_Moon_Project/cdrp/mpnst/tmp/beataml_mpnst_best.pt
# Wait for background processes to finish
wait
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/bin/csh
#SBATCH -A coderdata_mpnst # Replace with your actual project name
#SBATCH -t 12:00:00 # Set a shorter time limit for the test
#SBATCH -N 1 # Number of nodes
#SBATCH -n 4 # Number of cores (adjust as needed)
#SBATCH -J deeptta_CCLE_beataml # Job name for debugging
#SBATCH -o /people/jaco059/CoderData_Moon_Project/logs/CCLE_BeatAML_cross_%j.out # Standard output file for debugging
#SBATCH -e /people/jaco059/CoderData_Moon_Project/logs/CCLE_BeatAML_cross_%j.err # Standard error file, %j will be replaced by job ID
#SBATCH --partition=dlv # Partition to submit to
#SBATCH --gres=gpu:1 # Request 1 GPU
# Make sure the module commands are available
source /etc/profile.d/modules.csh
source /people/jaco059/.conda/envs/CDRP # location of conda environment

# Set up your environment you wish to run in with module commands
module purge # Clear all loaded modules
conda activate CDRP

# Unlimit system resources (for csh/tcsh)
unlimit

# It is extremely useful to record the modules you have loaded, your limit settings,
# your current environment variables and the dynamically loaded libraries
# that your executable is linked against in your job output file.
echo "loaded modules"
module list >& _modules.lis_
cat _modules.lis_
/bin/rm -f _modules.lis_

echo "limits"
limit

echo "Environment Variables"
printenv

# Execute your command with specified arguments for a short debugging session
python /people/jaco059/CoderData_Moon_Project/cdrp/mpnst/coderdata_auc_deep_TTA_cross_test.py \
--data_split_seed 10 \
--n_epochs 100 \
--train_omics_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/broad_sanger_transcriptomics.csv \
--train_exp_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/broad_sanger_experiments.tsv \
--train_drugs_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/broad_sanger_drugs.tsv \
--test_omics_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/beataml_transcriptomics.csv \
--test_exp_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/beataml_experiments.tsv \
--test_drugs_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/beataml_drugs.tsv \
--output_prefix CCLE_BeatAML_dss \
--train_study_description CCLE \
--test_study_description BeatAML \
--dose_response_metric dss \
--train_log_transform True \
--test_log_transform True \
--checkpoint_path /people/jaco059/CoderData_Moon_Project/cdrp/mpnst/tmp/ccle_beataml_best.pt
# Wait for background processes to finish
wait
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/bin/csh
#SBATCH -A coderdata_mpnst # Replace with your actual project name
#SBATCH -t 12:00:00 # Set a shorter time limit for the test
#SBATCH -N 1 # Number of nodes
#SBATCH -n 4 # Number of cores (adjust as needed)
#SBATCH -J deeptta_CCLE_mpnst # Job name for debugging
#SBATCH -o /people/jaco059/CoderData_Moon_Project/logs/CCLE_MPNST_cross_%j.out # Standard output file for debugging
#SBATCH -e /people/jaco059/CoderData_Moon_Project/logs/CCLE_MPNST_cross_%j.err # Standard error file, %j will be replaced by job ID
#SBATCH --partition=dlv # Partition to submit to
#SBATCH --gres=gpu:1 # Request 1 GPU
# Make sure the module commands are available
source /etc/profile.d/modules.csh
source /people/jaco059/.conda/envs/CDRP # location of conda environment

# Set up your environment you wish to run in with module commands
module purge # Clear all loaded modules
conda activate CDRP

# Unlimit system resources (for csh/tcsh)
unlimit

# It is extremely useful to record the modules you have loaded, your limit settings,
# your current environment variables and the dynamically loaded libraries
# that your executable is linked against in your job output file.
echo "loaded modules"
module list >& _modules.lis_
cat _modules.lis_
/bin/rm -f _modules.lis_

echo "limits"
limit

echo "Environment Variables"
printenv

# Execute your command with specified arguments for a short debugging session
python //people/jaco059/CoderData_Moon_Project/cdrp/mpnst/coderdata_auc_deep_TTA_cross_test.py \
--data_split_seed 10 \
--n_epochs 100 \
--train_omics_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/broad_sanger_transcriptomics.csv \
--train_exp_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/broad_sanger_experiments.tsv \
--train_drugs_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/broad_sanger_drugs.tsv \
--test_omics_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/mpnst_transcriptomics.csv \
--test_exp_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/mpnst_remapped_experiments.tsv \
--test_drugs_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/mpnst_drugs.tsv \
--output_prefix CCLE_MPNST_DSS \
--train_study_description CCLE \
--test_study_description MPNST \
--dose_response_metric dss \
--train_log_transform True \
--test_log_transform True \
--checkpoint_path /people/jaco059/CoderData_Moon_Project/cdrp/mpnst/tmp/ccle_mpnst_best.pt
# Wait for background processes to finish
wait
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/bin/csh
#SBATCH -A coderdata_mpnst # Replace with your actual project name
#SBATCH -t 12:00:00 # Set a shorter time limit for the test
#SBATCH -N 1 # Number of nodes
#SBATCH -n 4 # Number of cores (adjust as needed)
#SBATCH -J deeptta_MPNST_BeatAML # Job name for debugging
#SBATCH -o /people/jaco059/CoderData_Moon_Project/logs/MPNST_BeatAML_cross_%j.out # Standard output file for debugging
#SBATCH -e /people/jaco059/CoderData_Moon_Project/logs/MPNST_BeatAML_cross_%j.err # Standard error file, %j will be replaced by job ID
#SBATCH --partition=dlv # Partition to submit to
#SBATCH --gres=gpu:1 # Request 1 GPU
# Make sure the module commands are available
source /etc/profile.d/modules.csh
source /people/jaco059/.conda/envs/CDRP # location of conda environment

# Set up your environment you wish to run in with module commands
module purge # Clear all loaded modules
conda activate CDRP

# Unlimit system resources (for csh/tcsh)
unlimit

# It is extremely useful to record the modules you have loaded, your limit settings,
# your current environment variables and the dynamically loaded libraries
# that your executable is linked against in your job output file.
echo "loaded modules"
module list >& _modules.lis_
cat _modules.lis_
/bin/rm -f _modules.lis_

echo "limits"
limit

echo "Environment Variables"
printenv

# Execute your command with specified arguments for a short debugging session
python //people/jaco059/CoderData_Moon_Project/cdrp/mpnst/coderdata_auc_deep_TTA_cross_test.py \
--data_split_seed 10 \
--n_epochs 100 \
--train_omics_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/mpnst_transcriptomics.csv \
--train_exp_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/mpnst_remapped_experiments.tsv \
--train_drugs_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/mpnst_drugs.tsv \
--test_omics_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/beataml_transcriptomics.csv \
--test_exp_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/beataml_experiments.tsv \
--test_drugs_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/beataml_drugs.tsv \
--output_prefix MPNST_BeatAML_DSS \
--train_study_description MPNST \
--test_study_description BeatAML \
--dose_response_metric dss \
--train_log_transform True \
--test_log_transform True \
--checkpoint_path /people/jaco059/CoderData_Moon_Project/cdrp/mpnst/tmp/mpnst_beataml_best.pt
# Wait for background processes to finish
wait
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/bin/csh
#SBATCH -A coderdata_mpnst # Replace with your actual project name
#SBATCH -t 12:00:00 # Set a shorter time limit for the test
#SBATCH -N 1 # Number of nodes
#SBATCH -n 4 # Number of cores (adjust as needed)
#SBATCH -J deeptta_MPNST_CCLE # Job name for debugging
#SBATCH -o /people/jaco059/CoderData_Moon_Project/logs/MPNST_CCLE_cross_%j.out # Standard output file for debugging
#SBATCH -e /people/jaco059/CoderData_Moon_Project/logs/MPNST_CCLE_cross_%j.err # Standard error file, %j will be replaced by job ID
#SBATCH --partition=dlv # Partition to submit to
#SBATCH --gres=gpu:1 # Request 1 GPU
# Make sure the module commands are available
source /etc/profile.d/modules.csh
source /people/jaco059/.conda/envs/CDRP # location of conda environment

# Set up your environment you wish to run in with module commands
module purge # Clear all loaded modules
conda activate CDRP

# Unlimit system resources (for csh/tcsh)
unlimit

# It is extremely useful to record the modules you have loaded, your limit settings,
# your current environment variables and the dynamically loaded libraries
# that your executable is linked against in your job output file.
echo "loaded modules"
module list >& _modules.lis_
cat _modules.lis_
/bin/rm -f _modules.lis_

echo "limits"
limit

echo "Environment Variables"
printenv

# Execute your command with specified arguments for a short debugging session
python /people/jaco059/CoderData_Moon_Project/cdrp/mpnst/coderdata_auc_deep_TTA_cross_test.py \
--data_split_seed 10 \
--n_epochs 100 \
--train_omics_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/mpnst_transcriptomics.csv \
--train_exp_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/mpnst_remapped_experiments.tsv \
--train_drugs_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/mpnst_drugs.tsv \
--test_omics_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/broad_sanger_transcriptomics.csv \
--test_exp_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/broad_sanger_experiments.tsv \
--test_drugs_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/broad_sanger_drugs.tsv \
--output_prefix MPNST_CCLE_DSS \
--train_study_description MPNST \
--test_study_description CCLE \
--dose_response_metric dss \
--train_log_transform True \
--test_log_transform True \
--checkpoint_path /people/jaco059/CoderData_Moon_Project/cdrp/mpnst/tmp/mpnst_ccle_best.pt
# Wait for background processes to finish
wait
Loading