PNNL-CompBio · jjacobson95 · Sep 13, 2024 · Sep 16, 2024 · Sep 16, 2024
diff --git a/mpnst/calculate_statistics.py b/mpnst/calculate_statistics.py
@@ -1,3 +1,5 @@
+import os
+import argparse
 import numpy as np
 
 def calculate_statistics(file_path):
@@ -7,9 +9,19 @@ def calculate_statistics(file_path):
         lines = file.readlines()[1:]  # Skip header
         for line in lines:
             values = line.strip().split('\t')
-            rmse = float(values[1])
-            pearson = float(values[2])
-            data.append((rmse, pearson))
+            try:
+                rmse = float(values[1])
+                pearson = float(values[2])
+                data.append((rmse, pearson))
+            except ValueError:
+                # Skip lines with non-numeric data
+                continue
+
+    # Ensure there is valid data to process
+    num_seeds = len(data)
+    if num_seeds == 0:
+        print(f"No valid data found in file: {file_path}")
+        return
 
     # Separate RMSE and Pearson Correlation
     rmse_values = [item[0] for item in data]
@@ -21,12 +33,30 @@ def calculate_statistics(file_path):
     mean_pearson = np.mean(pearson_values)
     std_pearson = np.std(pearson_values)
 
+    # Print results
+    print(f"Results for file: {file_path}")
+    print(f"Number of seeds used: {num_seeds}")
     print(f"Mean ± Standard Deviation of RMSE: {mean_rmse:.3f} ± {std_rmse:.3f}")
     print(f"Mean ± Standard Deviation of Pearson Correlation: {mean_pearson:.3f} ± {std_pearson:.3f}")
+    print("-" * 40)
+
+def process_all_files_in_folder(results_folder):
+    # Loop through all files in the folder
+    for filename in os.listdir(results_folder):
+        if filename.endswith(".txt"):
+            file_path = os.path.join(results_folder, filename)
+            calculate_statistics(file_path)
+
+def main():
+    # Set up argument parser
+    parser = argparse.ArgumentParser(description="Process result files in a folder to calculate statistics.")
+    parser.add_argument('results_folder', type=str, help="Path to the folder containing the result files.")
 
-# File path
-# file_path = 'path_to_your_file/seed_10_epoch_100_GDSCv2_early_exit_rna_train_results_table.txt'
+    # Parse the arguments
+    args = parser.parse_args()
 
-file_path = 'seed_10_epoch_100_CCLE_MPNST_dss_rna_train_results_table.txt'
+    # Process all files in the given folder
+    process_all_files_in_folder(args.results_folder)
 
-calculate_statistics(file_path)
+if __name__ == "__main__":
+    main()
diff --git a/mpnst/coderdata_auc_deep_TTA_check_self.py b/mpnst/coderdata_auc_deep_TTA_check_self.py
diff --git a/mpnst/coderdata_auc_deep_TTA_cross_test.py b/mpnst/coderdata_auc_deep_TTA_cross_test.py
diff --git a/mpnst/model_generation_slurm_scripts_3_x_3/coderdata_SLURM_cross_BeatAML_CCLE.sh b/mpnst/model_generation_slurm_scripts_3_x_3/coderdata_SLURM_cross_BeatAML_CCLE.sh
@@ -0,0 +1,54 @@
+#!/bin/csh
+#SBATCH -A coderdata_mpnst                # Replace with your actual project name
+#SBATCH -t 12:00:00                    # Set a shorter time limit for the test
+#SBATCH -N 1                          # Number of nodes
+#SBATCH -n 4                          # Number of cores (adjust as needed)
+#SBATCH -J deeptta_BeatAML_CCLE       # Job name for debugging
+#SBATCH -o /people/jaco059/CoderData_Moon_Project/logs/BeatAML_CCLE_cross_%j.out  # Standard output file for debugging
+#SBATCH -e /people/jaco059/CoderData_Moon_Project/logs/BeatAML_CCLE_cross_%j.err  # Standard error file, %j will be replaced by job ID
+#SBATCH --partition=dlv               # Partition to submit to
+#SBATCH --gres=gpu:1                  # Request 1 GPU     
+# Make sure the module commands are available
+source /etc/profile.d/modules.csh
+source /people/jaco059/.conda/envs/CDRP # location of conda environment
+
+# Set up your environment you wish to run in with module commands
+module purge                          # Clear all loaded modules
+conda activate CDRP
+
+# Unlimit system resources (for csh/tcsh)
+unlimit
+
+# It is extremely useful to record the modules you have loaded, your limit settings, 
+# your current environment variables and the dynamically loaded libraries 
+# that your executable is linked against in your job output file.
+echo "loaded modules"
+module list >& _modules.lis_
+cat _modules.lis_
+/bin/rm -f _modules.lis_
+
+echo "limits"
+limit
+
+echo "Environment Variables"
+printenv
+
+# Execute your command with specified arguments for a short debugging session
+python /people/jaco059/CoderData_Moon_Project/cdrp/mpnst/coderdata_auc_deep_TTA_cross_test.py \
+    --data_split_seed 10 \
+    --n_epochs 100 \
+    --train_omics_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/beataml_transcriptomics.csv \
+    --train_exp_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/beataml_experiments.tsv \
+    --train_drugs_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/beataml_drugs.tsv \
+    --test_omics_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/broad_sanger_transcriptomics.csv \
+    --test_exp_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/broad_sanger_experiments.tsv \
+    --test_drugs_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/broad_sanger_drugs.tsv \
+    --output_prefix BeatAML_CCLE_DSS \
+    --train_study_description BeatAML \
+    --test_study_description CCLE \
+    --dose_response_metric dss \
+    --train_log_transform True \
+    --test_log_transform True \
+    --checkpoint_path /people/jaco059/CoderData_Moon_Project/cdrp/mpnst/tmp/beataml_ccle_best.pt
+# Wait for background processes to finish
+wait
diff --git a/mpnst/model_generation_slurm_scripts_3_x_3/coderdata_SLURM_cross_BeatAML_MPNST.sh b/mpnst/model_generation_slurm_scripts_3_x_3/coderdata_SLURM_cross_BeatAML_MPNST.sh
@@ -0,0 +1,54 @@
+#!/bin/csh
+#SBATCH -A coderdata_mpnst                # Replace with your actual project name
+#SBATCH -t 12:00:00                    # Set a shorter time limit for the test
+#SBATCH -N 1                          # Number of nodes
+#SBATCH -n 4                          # Number of cores (adjust as needed)
+#SBATCH -J deeptta_BeatAML_MPNST        # Job name for debugging
+#SBATCH -o /people/jaco059/CoderData_Moon_Project/logs/BeatAML_MPNST_cross_%j.out  # Standard output file for debugging
+#SBATCH -e /people/jaco059/CoderData_Moon_Project/logs/BeatAML_MPNST_cross_%j.err  # Standard error file, %j will be replaced by job ID
+#SBATCH --partition=dlv               # Partition to submit to
+#SBATCH --gres=gpu:1                  # Request 1 GPU     
+# Make sure the module commands are available
+source /etc/profile.d/modules.csh
+source /people/jaco059/.conda/envs/CDRP # location of conda environment
+
+# Set up your environment you wish to run in with module commands
+module purge                          # Clear all loaded modules
+conda activate CDRP
+
+# Unlimit system resources (for csh/tcsh)
+unlimit
+
+# It is extremely useful to record the modules you have loaded, your limit settings, 
+# your current environment variables and the dynamically loaded libraries 
+# that your executable is linked against in your job output file.
+echo "loaded modules"
+module list >& _modules.lis_
+cat _modules.lis_
+/bin/rm -f _modules.lis_
+
+echo "limits"
+limit
+
+echo "Environment Variables"
+printenv
+
+# Execute your command with specified arguments for a short debugging session 
+python /people/jaco059/CoderData_Moon_Project/cdrp/mpnst/coderdata_auc_deep_TTA_cross_test.py \
+    --data_split_seed 10 \
+    --n_epochs 100 \
+    --train_omics_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/beataml_transcriptomics.csv \
+    --train_exp_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/beataml_experiments.tsv \
+    --train_drugs_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/beataml_drugs.tsv \
+    --test_omics_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/mpnst_transcriptomics.csv \
+    --test_exp_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/mpnst_remapped_experiments.tsv \
+    --test_drugs_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/mpnst_drugs.tsv \
+    --output_prefix BeatAML_MPNST_DSS \
+    --train_study_description BeatAML \
+    --test_study_description MPNST \
+    --dose_response_metric dss \
+    --train_log_transform True \
+    --test_log_transform True \
+    --checkpoint_path /people/jaco059/CoderData_Moon_Project/cdrp/mpnst/tmp/beataml_mpnst_best.pt
+# Wait for background processes to finish
+wait
diff --git a/mpnst/model_generation_slurm_scripts_3_x_3/coderdata_SLURM_cross_CCLE_BeatAML.sh b/mpnst/model_generation_slurm_scripts_3_x_3/coderdata_SLURM_cross_CCLE_BeatAML.sh
@@ -0,0 +1,54 @@
+#!/bin/csh
+#SBATCH -A coderdata_mpnst                # Replace with your actual project name
+#SBATCH -t 12:00:00                    # Set a shorter time limit for the test
+#SBATCH -N 1                          # Number of nodes
+#SBATCH -n 4                          # Number of cores (adjust as needed)
+#SBATCH -J deeptta_CCLE_beataml       # Job name for debugging
+#SBATCH -o /people/jaco059/CoderData_Moon_Project/logs/CCLE_BeatAML_cross_%j.out  # Standard output file for debugging
+#SBATCH -e /people/jaco059/CoderData_Moon_Project/logs/CCLE_BeatAML_cross_%j.err  # Standard error file, %j will be replaced by job ID
+#SBATCH --partition=dlv               # Partition to submit to
+#SBATCH --gres=gpu:1                  # Request 1 GPU     
+# Make sure the module commands are available
+source /etc/profile.d/modules.csh
+source /people/jaco059/.conda/envs/CDRP # location of conda environment
+
+# Set up your environment you wish to run in with module commands
+module purge                          # Clear all loaded modules
+conda activate CDRP
+
+# Unlimit system resources (for csh/tcsh)
+unlimit
+
+# It is extremely useful to record the modules you have loaded, your limit settings, 
+# your current environment variables and the dynamically loaded libraries 
+# that your executable is linked against in your job output file.
+echo "loaded modules"
+module list >& _modules.lis_
+cat _modules.lis_
+/bin/rm -f _modules.lis_
+
+echo "limits"
+limit
+
+echo "Environment Variables"
+printenv
+
+# Execute your command with specified arguments for a short debugging session
+python /people/jaco059/CoderData_Moon_Project/cdrp/mpnst/coderdata_auc_deep_TTA_cross_test.py \
+    --data_split_seed 10 \
+    --n_epochs 100 \
+    --train_omics_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/broad_sanger_transcriptomics.csv \
+    --train_exp_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/broad_sanger_experiments.tsv \
+    --train_drugs_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/broad_sanger_drugs.tsv \
+    --test_omics_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/beataml_transcriptomics.csv \
+    --test_exp_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/beataml_experiments.tsv \
+    --test_drugs_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/beataml_drugs.tsv \
+    --output_prefix CCLE_BeatAML_dss \
+    --train_study_description CCLE \
+    --test_study_description BeatAML \
+    --dose_response_metric dss \
+    --train_log_transform True \
+    --test_log_transform True \
+    --checkpoint_path /people/jaco059/CoderData_Moon_Project/cdrp/mpnst/tmp/ccle_beataml_best.pt
+# Wait for background processes to finish
+wait
diff --git a/mpnst/model_generation_slurm_scripts_3_x_3/coderdata_SLURM_cross_CCLE_MPNST.sh b/mpnst/model_generation_slurm_scripts_3_x_3/coderdata_SLURM_cross_CCLE_MPNST.sh
@@ -0,0 +1,54 @@
+#!/bin/csh
+#SBATCH -A coderdata_mpnst                # Replace with your actual project name
+#SBATCH -t 12:00:00                    # Set a shorter time limit for the test
+#SBATCH -N 1                          # Number of nodes
+#SBATCH -n 4                          # Number of cores (adjust as needed)
+#SBATCH -J deeptta_CCLE_mpnst        # Job name for debugging
+#SBATCH -o /people/jaco059/CoderData_Moon_Project/logs/CCLE_MPNST_cross_%j.out  # Standard output file for debugging
+#SBATCH -e /people/jaco059/CoderData_Moon_Project/logs/CCLE_MPNST_cross_%j.err  # Standard error file, %j will be replaced by job ID
+#SBATCH --partition=dlv               # Partition to submit to
+#SBATCH --gres=gpu:1                  # Request 1 GPU     
+# Make sure the module commands are available
+source /etc/profile.d/modules.csh
+source /people/jaco059/.conda/envs/CDRP # location of conda environment
+
+# Set up your environment you wish to run in with module commands
+module purge                          # Clear all loaded modules
+conda activate CDRP
+
+# Unlimit system resources (for csh/tcsh)
+unlimit
+
+# It is extremely useful to record the modules you have loaded, your limit settings, 
+# your current environment variables and the dynamically loaded libraries 
+# that your executable is linked against in your job output file.
+echo "loaded modules"
+module list >& _modules.lis_
+cat _modules.lis_
+/bin/rm -f _modules.lis_
+
+echo "limits"
+limit
+
+echo "Environment Variables"
+printenv
+
+# Execute your command with specified arguments for a short debugging session 
+python //people/jaco059/CoderData_Moon_Project/cdrp/mpnst/coderdata_auc_deep_TTA_cross_test.py \
+    --data_split_seed 10 \
+    --n_epochs 100 \
+    --train_omics_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/broad_sanger_transcriptomics.csv \
+    --train_exp_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/broad_sanger_experiments.tsv \
+    --train_drugs_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/broad_sanger_drugs.tsv \
+    --test_omics_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/mpnst_transcriptomics.csv \
+    --test_exp_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/mpnst_remapped_experiments.tsv \
+    --test_drugs_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/mpnst_drugs.tsv \
+    --output_prefix CCLE_MPNST_DSS \
+    --train_study_description CCLE \
+    --test_study_description MPNST \
+    --dose_response_metric dss \
+    --train_log_transform True \
+    --test_log_transform True \
+    --checkpoint_path /people/jaco059/CoderData_Moon_Project/cdrp/mpnst/tmp/ccle_mpnst_best.pt
+# Wait for background processes to finish
+wait
diff --git a/mpnst/model_generation_slurm_scripts_3_x_3/coderdata_SLURM_cross_MPNST_BeatAML.sh b/mpnst/model_generation_slurm_scripts_3_x_3/coderdata_SLURM_cross_MPNST_BeatAML.sh
@@ -0,0 +1,54 @@
+#!/bin/csh
+#SBATCH -A coderdata_mpnst                # Replace with your actual project name
+#SBATCH -t 12:00:00                    # Set a shorter time limit for the test
+#SBATCH -N 1                          # Number of nodes
+#SBATCH -n 4                          # Number of cores (adjust as needed)
+#SBATCH -J deeptta_MPNST_BeatAML        # Job name for debugging
+#SBATCH -o /people/jaco059/CoderData_Moon_Project/logs/MPNST_BeatAML_cross_%j.out  # Standard output file for debugging
+#SBATCH -e /people/jaco059/CoderData_Moon_Project/logs/MPNST_BeatAML_cross_%j.err  # Standard error file, %j will be replaced by job ID
+#SBATCH --partition=dlv               # Partition to submit to
+#SBATCH --gres=gpu:1                  # Request 1 GPU     
+# Make sure the module commands are available
+source /etc/profile.d/modules.csh
+source /people/jaco059/.conda/envs/CDRP # location of conda environment
+
+# Set up your environment you wish to run in with module commands
+module purge                          # Clear all loaded modules
+conda activate CDRP
+
+# Unlimit system resources (for csh/tcsh)
+unlimit
+
+# It is extremely useful to record the modules you have loaded, your limit settings, 
+# your current environment variables and the dynamically loaded libraries 
+# that your executable is linked against in your job output file.
+echo "loaded modules"
+module list >& _modules.lis_
+cat _modules.lis_
+/bin/rm -f _modules.lis_
+
+echo "limits"
+limit
+
+echo "Environment Variables"
+printenv
+
+# Execute your command with specified arguments for a short debugging session 
+python //people/jaco059/CoderData_Moon_Project/cdrp/mpnst/coderdata_auc_deep_TTA_cross_test.py \
+    --data_split_seed 10 \
+    --n_epochs 100 \
+    --train_omics_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/mpnst_transcriptomics.csv \
+    --train_exp_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/mpnst_remapped_experiments.tsv \
+    --train_drugs_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/mpnst_drugs.tsv \
+    --test_omics_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/beataml_transcriptomics.csv \
+    --test_exp_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/beataml_experiments.tsv \
+    --test_drugs_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/beataml_drugs.tsv \
+    --output_prefix MPNST_BeatAML_DSS \
+    --train_study_description MPNST \
+    --test_study_description BeatAML \
+    --dose_response_metric dss \
+    --train_log_transform True \
+    --test_log_transform True \
+    --checkpoint_path /people/jaco059/CoderData_Moon_Project/cdrp/mpnst/tmp/mpnst_beataml_best.pt
+# Wait for background processes to finish
+wait
diff --git a/mpnst/model_generation_slurm_scripts_3_x_3/coderdata_SLURM_cross_MPNST_CCLE.sh b/mpnst/model_generation_slurm_scripts_3_x_3/coderdata_SLURM_cross_MPNST_CCLE.sh
@@ -0,0 +1,54 @@
+#!/bin/csh
+#SBATCH -A coderdata_mpnst                # Replace with your actual project name
+#SBATCH -t 12:00:00                    # Set a shorter time limit for the test
+#SBATCH -N 1                          # Number of nodes
+#SBATCH -n 4                          # Number of cores (adjust as needed)
+#SBATCH -J deeptta_MPNST_CCLE        # Job name for debugging
+#SBATCH -o /people/jaco059/CoderData_Moon_Project/logs/MPNST_CCLE_cross_%j.out  # Standard output file for debugging
+#SBATCH -e /people/jaco059/CoderData_Moon_Project/logs/MPNST_CCLE_cross_%j.err  # Standard error file, %j will be replaced by job ID
+#SBATCH --partition=dlv               # Partition to submit to
+#SBATCH --gres=gpu:1                  # Request 1 GPU     
+# Make sure the module commands are available
+source /etc/profile.d/modules.csh
+source /people/jaco059/.conda/envs/CDRP # location of conda environment
+
+# Set up your environment you wish to run in with module commands
+module purge                          # Clear all loaded modules
+conda activate CDRP
+
+# Unlimit system resources (for csh/tcsh)
+unlimit
+
+# It is extremely useful to record the modules you have loaded, your limit settings, 
+# your current environment variables and the dynamically loaded libraries 
+# that your executable is linked against in your job output file.
+echo "loaded modules"
+module list >& _modules.lis_
+cat _modules.lis_
+/bin/rm -f _modules.lis_
+
+echo "limits"
+limit
+
+echo "Environment Variables"
+printenv
+
+# Execute your command with specified arguments for a short debugging session 
+python /people/jaco059/CoderData_Moon_Project/cdrp/mpnst/coderdata_auc_deep_TTA_cross_test.py \
+    --data_split_seed 10 \
+    --n_epochs 100 \
+    --train_omics_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/mpnst_transcriptomics.csv \
+    --train_exp_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/mpnst_remapped_experiments.tsv \
+    --train_drugs_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/mpnst_drugs.tsv \
+    --test_omics_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/broad_sanger_transcriptomics.csv \
+    --test_exp_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/broad_sanger_experiments.tsv \
+    --test_drugs_input_path /people/jaco059/CoderData_Moon_Project/data_coderdata_0_1_40/broad_sanger_drugs.tsv \
+    --output_prefix MPNST_CCLE_DSS \
+    --train_study_description MPNST \
+    --test_study_description CCLE \
+    --dose_response_metric dss \
+    --train_log_transform True \
+    --test_log_transform True \
+    --checkpoint_path /people/jaco059/CoderData_Moon_Project/cdrp/mpnst/tmp/mpnst_ccle_best.pt
+# Wait for background processes to finish
+wait