Make lint happy

cbcrg · Oct 21, 2024 · a677c87 · a677c87
1 parent 84f8372
commit a677c87
Show file tree

Hide file tree

Showing 35 changed files with 141 additions and 145 deletions.
diff --git a/Dockerfiles/ps/Dockerfile b/Dockerfiles/ps/Dockerfile
@@ -12,4 +12,4 @@ RUN micromamba install -y -n base -c defaults -c bioconda -c conda-forge \
         && micromamba clean -a -y
 
 ENV PATH="$MAMBA_ROOT_PREFIX/bin:$PATH"
-USER root
+USER root
diff --git a/Dockerfiles/stimulus/Dockerfile b/Dockerfiles/stimulus/Dockerfile
@@ -18,4 +18,4 @@ RUN pip install \
     ray[train,tune,default]==2.23.0 \
     matplotlib==3.9.0 \
     pandas==2.2.0 \
-    safetensors==0.4.5 
+    safetensors==0.4.5
diff --git a/bin/tests/test_data/dna_experiment/test.csv b/bin/tests/test_data/dna_experiment/test.csv
@@ -1,3 +1,3 @@
 hello:input:dna,hola:label:float,pet:meta:str
 ACTGACTGATCGATGC,12,cat
-ACTGACTGATCGATGC,12,dog
+ACTGACTGATCGATGC,12,dog
diff --git a/bin/tests/test_data/dna_experiment/test_config.json b/bin/tests/test_data/dna_experiment/test_config.json
@@ -15,7 +15,7 @@
             "column_name": "hello:input:dna",
             "name": "ReverseComplement",
             "params": {}
-        }  
+        }
     ],
     "split": {
         "name": "RandomSplitter",

diff --git a/bin/tests/test_data/dna_experiment/test_unequal_dna_float.csv b/bin/tests/test_data/dna_experiment/test_unequal_dna_float.csv
@@ -2,4 +2,4 @@ hello:input:dna,hola:label:float
 ACTGACTGATCGATGC,5
 ACTGACTGATCGATGC,5
 ATCAGTCAG,2.3
-aTCnndhaksdhjtcgaysdgyagctaggat,10
+aTCnndhaksdhjtcgaysdgyagctaggat,10
diff --git a/bin/tests/test_data/dna_experiment/test_with_split.csv b/bin/tests/test_data/dna_experiment/test_with_split.csv
@@ -46,4 +46,4 @@ CGGTAGTTCACTGAC,1,2,bird
 CCGGAAGTTCACTGA,1,2,bird
 TCCGTAAGTTCACTG,1,2,bird
 ATCGGTAAGTTCACT,1,2,bird
-ATCCGGTAAGTTCAC,1,2,bird
+ATCCGGTAAGTTCAC,1,2,bird
diff --git a/bin/tests/test_data/prot_dna_experiment/test.csv b/bin/tests/test_data/prot_dna_experiment/test.csv
@@ -1,3 +1,3 @@
 hello:input:dna,bonjour:input:prot,hola:label:float,pet:meta:str
 ACTGACTGATCGATGC,GPRTTIKAKQLETLK,12,cat
-ACTGACTGATCGATGC,GPRTTIKAKQLETLK,12,dog
+ACTGACTGATCGATGC,GPRTTIKAKQLETLK,12,dog
diff --git a/bin/tests/test_data/prot_dna_experiment/test_config.json b/bin/tests/test_data/prot_dna_experiment/test_config.json
@@ -20,7 +20,7 @@
             "column_name": "hello:input:dna",
             "name": "ReverseComplement",
             "params": {}
-        }  
+        }
     ],
     "split": {
             "name": "RandomSplitter",

diff --git a/bin/tests/test_data/prot_dna_experiment/test_with_split.csv b/bin/tests/test_data/prot_dna_experiment/test_with_split.csv
@@ -1,4 +1,4 @@
 hello:input:dna,bonjour:input:prot,hola:label:float,split:split:int,pet:meta:str
 ACTGACTGATCGATGC,GPRTTIKAKQLETLK,12,0,cat
 ACTGACTGATCGATGC,GPRTTIKAKQLETLK,12,1,dog
-ACTGACTGATCGATGC,GPRTTIKAKQLETLK,12,2,bird
+ACTGACTGATCGATGC,GPRTTIKAKQLETLK,12,2,bird
diff --git a/bin/tests/test_data/titanic/process_titanic_to_stimulus.py b/bin/tests/test_data/titanic/process_titanic_to_stimulus.py
@@ -5,7 +5,7 @@ def arg_parser():
     parser = argparse.ArgumentParser(description="Process Titanic dataset to stimulus format")
     parser.add_argument("--input", type=str, help="Path to input csv file, should be identical to Kaggle download of the Titanic dataset, see : https://www.kaggle.com/c/titanic/data", required=True)
     parser.add_argument("--output", type=str, help="Path to output csv file", default="titanic_stimulus.csv", required=False)
-    return parser.parse_args()  
+    return parser.parse_args()
 
 def main():
     args = arg_parser()
@@ -41,4 +41,4 @@ def main():
     df.write_csv(args.output)
 
 if __name__ == "__main__":
-    main()
+    main()
diff --git a/bin/tests/test_data/titanic/titanic_stimulus.json b/bin/tests/test_data/titanic/titanic_stimulus.json
@@ -1,6 +1,6 @@
 {
     "experiment": "TitanicExperiment",
-    "interpret_params_mode": "column_wise", 
+    "interpret_params_mode": "column_wise",
     "split": [
         {
             "name": "RandomSplitter",

diff --git a/bin/tests/test_model/dnatofloat_model.py b/bin/tests/test_model/dnatofloat_model.py
@@ -1,14 +1,14 @@
-import torch 
+import torch
 import torch.nn as nn
 from typing import Callable, Optional, Tuple
-    
+
 class ModelSimple(torch.nn.Module):
     """
     A simple model example.
-    It takes as input a 1D tensor of any size, 
-    apply some convolutional layer and 
+    It takes as input a 1D tensor of any size,
+    apply some convolutional layer and
     outputs a single value using a maxpooling layer and a softmax function.
-    
+
     All functions `forward`, `compute_loss` and `batch` need to be implemented for any new model.
     """
     def __init__(self, kernel_size: int = 3, pool_size: int = 2):
@@ -18,20 +18,20 @@ def __init__(self, kernel_size: int = 3, pool_size: int = 2):
         self.softmax = nn.Softmax(dim=1)
         # had to change to 6 because dna sequence is shoprter
         self.linear = nn.Linear(6, 1)
-    
+
     def forward(self, hello: torch.Tensor) -> dict:
         """
         Forward pass of the model.
         It should return the output as a dictionary, with the same keys as `y`.
         """
-        x = hello.permute(0, 2, 1).to(torch.float32)  # permute the two last dimensions of hello 
+        x = hello.permute(0, 2, 1).to(torch.float32)  # permute the two last dimensions of hello
         x = self.conv1(x)
         x = self.pool(x)
         x = self.softmax(x)
         x = self.linear(x)
         x = x.squeeze()
         return x
-    
+
     def compute_loss(self, output: torch.Tensor, hola: torch.Tensor, loss_fn: Callable) -> torch.Tensor:
         """
         Compute the loss.
@@ -40,14 +40,14 @@ def compute_loss(self, output: torch.Tensor, hola: torch.Tensor, loss_fn: Callab
         `loss_fn` is the loss function to be used.
         """
         return loss_fn(output, hola.to(torch.float32))
-    
+
     def batch(self, x: dict, y: dict, loss_fn1: Callable, loss_fn2: Callable, optimizer: Optional[Callable] = None) -> Tuple[torch.Tensor, dict]:
         """
         Perform one batch step.
         `x` is a dictionary with the input tensors.
         `y` is a dictionary with the target tensors.
         `loss_fn1` and `loss_fn2` are the loss function to be used.
-        
+
         If `optimizer` is passed, it will perform the optimization step -> training step
         Otherwise, only return the forward pass output and loss -> evaluation step
 
@@ -62,4 +62,4 @@ def batch(self, x: dict, y: dict, loss_fn1: Callable, loss_fn2: Callable, optimi
             loss1.backward(retain_graph=True)
             loss2.backward(retain_graph=True)
             optimizer.step()
-        return loss1, output
+        return loss1, output
diff --git a/bin/tests/test_model/titanic_model.py b/bin/tests/test_model/titanic_model.py
@@ -28,7 +28,7 @@ def forward(self, pclass: torch.Tensor, sex: torch.Tensor, age: torch.Tensor, si
             x = self.relu(layer(x))
         x = self.softmax(self.output_layer(x))
         return x
-    
+
     def compute_loss(self, output: torch.Tensor, survived: torch.Tensor, loss_fn: Callable) -> torch.Tensor:
         """
         Compute the loss.
@@ -37,7 +37,7 @@ def compute_loss(self, output: torch.Tensor, survived: torch.Tensor, loss_fn: Ca
         `loss_fn` is the loss function to be used.
         """
         return loss_fn(output, survived)
-    
+
     def batch(self, x: dict, y: dict, loss_fn: Callable, optimizer: Optional[Callable] = None) -> Tuple[torch.Tensor, dict]:
         """
         Perform one batch step.

diff --git a/conf/crg.config b/conf/crg.config
@@ -1,6 +1,6 @@
 params {
-  config_profile_name = 'CRG profile'
-  config_profile_description = 'Configuration to run on CRG cluster'
+    config_profile_name = 'CRG profile'
+    config_profile_description = 'Configuration to run on CRG cluster'
 }
 
 
@@ -16,16 +16,16 @@ process {
     errorStrategy = params.err_start
 
     withLabel:process_low {
-          queue = 'cn-el7,short-centos79'
-          cpus   = { 1                   }
-          memory = { 4.GB * task.attempt }
-          time   = { 1.h  * task.attempt }
+        queue = 'cn-el7,short-centos79'
+        cpus   = { 1                   }
+        memory = { 4.GB * task.attempt }
+        time   = { 1.h  * task.attempt }
     }
     withLabel:process_medium{
-          queue = 'cn-el7,short-centos79'
-          cpus   = { 4                    }
-          memory = { 10.GB * task.attempt }
-          time   = { 6.h   * task.attempt }
+        queue = 'cn-el7,short-centos79'
+        cpus   = { 4                    }
+        memory = { 10.GB * task.attempt }
+        time   = { 6.h   * task.attempt }
     }
     withLabel:process_high {
         queue = 'cn-el7,long-centos79'

diff --git a/conf/crg_slurm.config b/conf/crg_slurm.config
@@ -1,6 +1,6 @@
 params {
-  config_profile_name = 'CRG profile - slurm new cluster'
-  config_profile_description = 'Configuration to run on CRG new cluster'
+    config_profile_name = 'CRG profile - slurm new cluster'
+    config_profile_description = 'Configuration to run on CRG new cluster'
 
 }
 
@@ -15,18 +15,18 @@ process {
     executor = "slurm"
     maxRetries = params.max_retries
     errorStrategy = params.err_start
-    
+
     withLabel:process_low {
-          clusterOptions = '--qos=shorter'
-          cpus   = {  1                  }
-          memory = { 4.GB * task.attempt }
-          time   = { 1.h  * task.attempt }
+        clusterOptions = '--qos=shorter'
+        cpus   = {  1                  }
+        memory = { 4.GB * task.attempt }
+        time   = { 1.h  * task.attempt }
     }
     withLabel:process_medium{
-          clusterOptions = '--qos=short'
-          cpus   = { 4                    }
-          memory = { 10.GB * task.attempt }
-          time   = { 6.h   * task.attempt }
+        clusterOptions = '--qos=short'
+        cpus   = { 4                    }
+        memory = { 10.GB * task.attempt }
+        time   = { 6.h   * task.attempt }
     }
     withLabel:process_high {
         clusterOptions = '--qos=normal'

diff --git a/conf/local.config b/conf/local.config
@@ -1,27 +1,27 @@
 params {
-  config_profile_name = 'Local profile'
-  config_profile_description = 'Configuration to run on local machine'
+    config_profile_name = 'Local profile'
+    config_profile_description = 'Configuration to run on local machine'
 
 }
 
 
 process {
-    maxRetries = params.max_retries
+maxRetries = params.max_retries
     errorStrategy = params.err_start
 
     withLabel:process_low {
-          cpus   = { 1                   }
-          memory = { 4.GB * task.attempt }
-          time   = { 1.h  * task.attempt }
-       }
+        cpus   = { 1                   }
+        memory = { 4.GB * task.attempt }
+        time   = { 1.h  * task.attempt }
+    }
     withLabel:process_medium{
-          cpus   = { 4                    }
-          memory = { 10.GB * task.attempt }
-          time   = { 6.h   * task.attempt }
+        cpus   = { 4                    }
+        memory = { 10.GB * task.attempt }
+        time   = { 6.h   * task.attempt }
     }
     withLabel:process_medium_high {
-          cpus   = { 12                   }
-          memory = { 50.GB * task.attempt }
-          time   = { 12.h  * task.attempt }
+        cpus   = { 12                   }
+        memory = { 50.GB * task.attempt }
+        time   = { 12.h  * task.attempt }
     }
 }
diff --git a/conf/modules.config b/conf/modules.config
@@ -38,9 +38,9 @@ process {
 
         // the prefix is the input data csv filename without last extantion ( all other dots are changed to underscores) + the specific experimental config json name (dynamically created during interpret json step)
         ext.prefix = { "${combination_key}".split(' ')[0].split('\\.')[0..-2].join("_") + "-" + "${combination_key}".split(' ')[2] }
-        
+
         // the output directory is called by the unique name of the run + the time of launch.
-	    // all subidrs of the output dir are called with the same schema of the prefix above, which makes them process instance specific (aka unique).
+        // all subidrs of the output dir are called with the same schema of the prefix above, which makes them process instance specific (aka unique).
         // at the time of publishing, files are copied and then renamed to a standard naming convention based on their extention and sffixes. This is done through saveAs mode. The files need to be coopied because the original file unrenamed will be further used by the pipeline and it's name has to stay unique (thanks to the prefix), otherwise there wil be filename confilcts.
         publishDir = [
             path: { "${params.outdir}/${workflow.runName}_" + "${workflow.start}/".replaceAll('[-:]', '_').split('\\.')[0] + "/" + "${combination_key}".split(' ')[0].split('\\.')[0..-2].join("_") + "-" + "${combination_key}".split(' ')[2] },
@@ -53,15 +53,15 @@ process {
                 else if (filename.endsWith(".csv")) "training_data.csv"
                 else if (filename.endsWith(".json")) "experiment_config.json"
                 else filename
-             }
+            }
         ]
     }
 
     withName: "STIMULUS_ANALYSIS_DEFAULT" {
 
         // the output directory is called by the unique name of the run + the time of launch.
-        // it has subdirs that identify the class of analysises like -> /analysis_default/. 
-        // the subsubdirs have the name of the split informstion used to create the test set. So conceptually all analyses carried on the same test set are under the same subsubdir. 
+        // it has subdirs that identify the class of analysises like -> /analysis_default/.
+        // the subsubdirs have the name of the split informstion used to create the test set. So conceptually all analyses carried on the same test set are under the same subsubdir.
         publishDir = [
             path: { "${params.outdir}/${workflow.runName}_" + "${workflow.start}/".replaceAll('[-:]', '_').split('\\.')[0] + "/analysis_default/" + "${split_transform_key}" },
             mode: params.publish_dir_mode,

diff --git a/conf/test_learn.config b/conf/test_learn.config
@@ -5,7 +5,7 @@ params {
 
     config_profile_name        = 'Test Learn profile'
     config_profile_description = 'Minimal test dataset to check if a model that should learn actually does'
-    
+
     // Input data
     csv         = "${projectDir}/bin/tests/test_data/titanic/titanic_stimulus.csv"
     exp_conf    = "${projectDir}/bin/tests/test_data/titanic/titanic_stimulus.json"  // TODO make the data transformations available to titanic data types

diff --git a/main.nf b/main.nf
@@ -73,7 +73,7 @@ workflow {
         params.outdir //,
         // params.input
     )
-    
+
     //
     // WORKFLOW: Run main workflow
     //

diff --git a/modules/local/check_torch_model.nf b/modules/local/check_torch_model.nf
@@ -5,7 +5,7 @@ process CHECK_TORCH_MODEL {
     label 'process_medium'
     // TODO: push image to nf-core quay.io
     container "docker.io/mathysgrapotte/stimulus-py:latest"
-    
+
     input:
     tuple path(original_csv), path(model),  path(experiment_config), path(ray_tune_config), path(initial_weights)
 

diff --git a/modules/local/interpret_json.nf b/modules/local/interpret_json.nf
@@ -5,7 +5,7 @@ process INTERPRET_JSON {
     label 'process_low'
     // TODO: push image to nf-core quay.io
     container "docker.io/mathysgrapotte/stimulus-py:latest"
-    
+
     input:
     path user_json
     val message_from_check_model // only here to ensure that this module waits for check_model module to actually run
@@ -17,7 +17,7 @@ process INTERPRET_JSON {
 
     script:
     """
-    stimulus-interpret-json -j ${user_json} 
+    stimulus-interpret-json -j ${user_json}
     """
 
     stub:

diff --git a/modules/local/stimulus_analysis_default.nf b/modules/local/stimulus_analysis_default.nf
@@ -8,13 +8,13 @@ process STIMULUS_ANALYSIS_DEFAULT {
 
     input:
     tuple val(split_transform_key), \
-          val(combination_key), \
-          path(data), \
-          path(experiment_config), \
-          path(model_config), \
-          path(weights), \
-          path(optimizer), \
-          path(metrics)
+        val(combination_key), \
+        path(data), \
+        path(experiment_config), \
+        path(model_config), \
+        path(weights), \
+        path(optimizer), \
+        path(metrics)
     path(model)
 
     output:

diff --git a/modules/local/stimulus_split_csv.nf b/modules/local/stimulus_split_csv.nf
@@ -1,6 +1,6 @@
 
 process STIMULUS_SPLIT_CSV {
-    
+
     tag "${original_csv} - ${split_transform_key}"
     label 'process_low'
     // TODO: push image to nf-core quay.io
-Original file line number
+Diff line change
@@ Expand Up / @@ -73,7 +73,7 @@ workflow { @@
             params.outdir //,
             // params.input
         )
         //
         // WORKFLOW: Run main workflow
         //
@@ Expand Down @@