Skip to content

Commit

Permalink
Make lint happy
Browse files Browse the repository at this point in the history
  • Loading branch information
JoseEspinosa committed Oct 21, 2024
1 parent 84f8372 commit a677c87
Show file tree
Hide file tree
Showing 35 changed files with 141 additions and 145 deletions.
2 changes: 1 addition & 1 deletion Dockerfiles/ps/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ RUN micromamba install -y -n base -c defaults -c bioconda -c conda-forge \
&& micromamba clean -a -y

ENV PATH="$MAMBA_ROOT_PREFIX/bin:$PATH"
USER root
USER root
2 changes: 1 addition & 1 deletion Dockerfiles/stimulus/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@ RUN pip install \
ray[train,tune,default]==2.23.0 \
matplotlib==3.9.0 \
pandas==2.2.0 \
safetensors==0.4.5
safetensors==0.4.5
2 changes: 1 addition & 1 deletion bin/tests/test_data/dna_experiment/test.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
hello:input:dna,hola:label:float,pet:meta:str
ACTGACTGATCGATGC,12,cat
ACTGACTGATCGATGC,12,dog
ACTGACTGATCGATGC,12,dog
2 changes: 1 addition & 1 deletion bin/tests/test_data/dna_experiment/test_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"column_name": "hello:input:dna",
"name": "ReverseComplement",
"params": {}
}
}
],
"split": {
"name": "RandomSplitter",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ hello:input:dna,hola:label:float
ACTGACTGATCGATGC,5
ACTGACTGATCGATGC,5
ATCAGTCAG,2.3
aTCnndhaksdhjtcgaysdgyagctaggat,10
aTCnndhaksdhjtcgaysdgyagctaggat,10
2 changes: 1 addition & 1 deletion bin/tests/test_data/dna_experiment/test_with_split.csv
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,4 @@ CGGTAGTTCACTGAC,1,2,bird
CCGGAAGTTCACTGA,1,2,bird
TCCGTAAGTTCACTG,1,2,bird
ATCGGTAAGTTCACT,1,2,bird
ATCCGGTAAGTTCAC,1,2,bird
ATCCGGTAAGTTCAC,1,2,bird
2 changes: 1 addition & 1 deletion bin/tests/test_data/prot_dna_experiment/test.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
hello:input:dna,bonjour:input:prot,hola:label:float,pet:meta:str
ACTGACTGATCGATGC,GPRTTIKAKQLETLK,12,cat
ACTGACTGATCGATGC,GPRTTIKAKQLETLK,12,dog
ACTGACTGATCGATGC,GPRTTIKAKQLETLK,12,dog
2 changes: 1 addition & 1 deletion bin/tests/test_data/prot_dna_experiment/test_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"column_name": "hello:input:dna",
"name": "ReverseComplement",
"params": {}
}
}
],
"split": {
"name": "RandomSplitter",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
hello:input:dna,bonjour:input:prot,hola:label:float,split:split:int,pet:meta:str
ACTGACTGATCGATGC,GPRTTIKAKQLETLK,12,0,cat
ACTGACTGATCGATGC,GPRTTIKAKQLETLK,12,1,dog
ACTGACTGATCGATGC,GPRTTIKAKQLETLK,12,2,bird
ACTGACTGATCGATGC,GPRTTIKAKQLETLK,12,2,bird
4 changes: 2 additions & 2 deletions bin/tests/test_data/titanic/process_titanic_to_stimulus.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ def arg_parser():
parser = argparse.ArgumentParser(description="Process Titanic dataset to stimulus format")
parser.add_argument("--input", type=str, help="Path to input csv file, should be identical to Kaggle download of the Titanic dataset, see : https://www.kaggle.com/c/titanic/data", required=True)
parser.add_argument("--output", type=str, help="Path to output csv file", default="titanic_stimulus.csv", required=False)
return parser.parse_args()
return parser.parse_args()

def main():
args = arg_parser()
Expand Down Expand Up @@ -41,4 +41,4 @@ def main():
df.write_csv(args.output)

if __name__ == "__main__":
main()
main()
2 changes: 1 addition & 1 deletion bin/tests/test_data/titanic/titanic_stimulus.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"experiment": "TitanicExperiment",
"interpret_params_mode": "column_wise",
"interpret_params_mode": "column_wise",
"split": [
{
"name": "RandomSplitter",
Expand Down
22 changes: 11 additions & 11 deletions bin/tests/test_model/dnatofloat_model.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import torch
import torch
import torch.nn as nn
from typing import Callable, Optional, Tuple

class ModelSimple(torch.nn.Module):
"""
A simple model example.
It takes as input a 1D tensor of any size,
apply some convolutional layer and
It takes as input a 1D tensor of any size,
apply some convolutional layer and
outputs a single value using a maxpooling layer and a softmax function.
All functions `forward`, `compute_loss` and `batch` need to be implemented for any new model.
"""
def __init__(self, kernel_size: int = 3, pool_size: int = 2):
Expand All @@ -18,20 +18,20 @@ def __init__(self, kernel_size: int = 3, pool_size: int = 2):
self.softmax = nn.Softmax(dim=1)
# had to change to 6 because dna sequence is shoprter
self.linear = nn.Linear(6, 1)

def forward(self, hello: torch.Tensor) -> dict:
"""
Forward pass of the model.
It should return the output as a dictionary, with the same keys as `y`.
"""
x = hello.permute(0, 2, 1).to(torch.float32) # permute the two last dimensions of hello
x = hello.permute(0, 2, 1).to(torch.float32) # permute the two last dimensions of hello
x = self.conv1(x)
x = self.pool(x)
x = self.softmax(x)
x = self.linear(x)
x = x.squeeze()
return x

def compute_loss(self, output: torch.Tensor, hola: torch.Tensor, loss_fn: Callable) -> torch.Tensor:
"""
Compute the loss.
Expand All @@ -40,14 +40,14 @@ def compute_loss(self, output: torch.Tensor, hola: torch.Tensor, loss_fn: Callab
`loss_fn` is the loss function to be used.
"""
return loss_fn(output, hola.to(torch.float32))

def batch(self, x: dict, y: dict, loss_fn1: Callable, loss_fn2: Callable, optimizer: Optional[Callable] = None) -> Tuple[torch.Tensor, dict]:
"""
Perform one batch step.
`x` is a dictionary with the input tensors.
`y` is a dictionary with the target tensors.
`loss_fn1` and `loss_fn2` are the loss function to be used.
If `optimizer` is passed, it will perform the optimization step -> training step
Otherwise, only return the forward pass output and loss -> evaluation step
Expand All @@ -62,4 +62,4 @@ def batch(self, x: dict, y: dict, loss_fn1: Callable, loss_fn2: Callable, optimi
loss1.backward(retain_graph=True)
loss2.backward(retain_graph=True)
optimizer.step()
return loss1, output
return loss1, output
4 changes: 2 additions & 2 deletions bin/tests/test_model/titanic_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def forward(self, pclass: torch.Tensor, sex: torch.Tensor, age: torch.Tensor, si
x = self.relu(layer(x))
x = self.softmax(self.output_layer(x))
return x

def compute_loss(self, output: torch.Tensor, survived: torch.Tensor, loss_fn: Callable) -> torch.Tensor:
"""
Compute the loss.
Expand All @@ -37,7 +37,7 @@ def compute_loss(self, output: torch.Tensor, survived: torch.Tensor, loss_fn: Ca
`loss_fn` is the loss function to be used.
"""
return loss_fn(output, survived)

def batch(self, x: dict, y: dict, loss_fn: Callable, optimizer: Optional[Callable] = None) -> Tuple[torch.Tensor, dict]:
"""
Perform one batch step.
Expand Down
20 changes: 10 additions & 10 deletions conf/crg.config
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
params {
config_profile_name = 'CRG profile'
config_profile_description = 'Configuration to run on CRG cluster'
config_profile_name = 'CRG profile'
config_profile_description = 'Configuration to run on CRG cluster'
}


Expand All @@ -16,16 +16,16 @@ process {
errorStrategy = params.err_start

withLabel:process_low {
queue = 'cn-el7,short-centos79'
cpus = { 1 }
memory = { 4.GB * task.attempt }
time = { 1.h * task.attempt }
queue = 'cn-el7,short-centos79'
cpus = { 1 }
memory = { 4.GB * task.attempt }
time = { 1.h * task.attempt }
}
withLabel:process_medium{
queue = 'cn-el7,short-centos79'
cpus = { 4 }
memory = { 10.GB * task.attempt }
time = { 6.h * task.attempt }
queue = 'cn-el7,short-centos79'
cpus = { 4 }
memory = { 10.GB * task.attempt }
time = { 6.h * task.attempt }
}
withLabel:process_high {
queue = 'cn-el7,long-centos79'
Expand Down
22 changes: 11 additions & 11 deletions conf/crg_slurm.config
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
params {
config_profile_name = 'CRG profile - slurm new cluster'
config_profile_description = 'Configuration to run on CRG new cluster'
config_profile_name = 'CRG profile - slurm new cluster'
config_profile_description = 'Configuration to run on CRG new cluster'

}

Expand All @@ -15,18 +15,18 @@ process {
executor = "slurm"
maxRetries = params.max_retries
errorStrategy = params.err_start

withLabel:process_low {
clusterOptions = '--qos=shorter'
cpus = { 1 }
memory = { 4.GB * task.attempt }
time = { 1.h * task.attempt }
clusterOptions = '--qos=shorter'
cpus = { 1 }
memory = { 4.GB * task.attempt }
time = { 1.h * task.attempt }
}
withLabel:process_medium{
clusterOptions = '--qos=short'
cpus = { 4 }
memory = { 10.GB * task.attempt }
time = { 6.h * task.attempt }
clusterOptions = '--qos=short'
cpus = { 4 }
memory = { 10.GB * task.attempt }
time = { 6.h * task.attempt }
}
withLabel:process_high {
clusterOptions = '--qos=normal'
Expand Down
26 changes: 13 additions & 13 deletions conf/local.config
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@
params {
config_profile_name = 'Local profile'
config_profile_description = 'Configuration to run on local machine'
config_profile_name = 'Local profile'
config_profile_description = 'Configuration to run on local machine'

}


process {
maxRetries = params.max_retries
maxRetries = params.max_retries
errorStrategy = params.err_start

withLabel:process_low {
cpus = { 1 }
memory = { 4.GB * task.attempt }
time = { 1.h * task.attempt }
}
cpus = { 1 }
memory = { 4.GB * task.attempt }
time = { 1.h * task.attempt }
}
withLabel:process_medium{
cpus = { 4 }
memory = { 10.GB * task.attempt }
time = { 6.h * task.attempt }
cpus = { 4 }
memory = { 10.GB * task.attempt }
time = { 6.h * task.attempt }
}
withLabel:process_medium_high {
cpus = { 12 }
memory = { 50.GB * task.attempt }
time = { 12.h * task.attempt }
cpus = { 12 }
memory = { 50.GB * task.attempt }
time = { 12.h * task.attempt }
}
}
10 changes: 5 additions & 5 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ process {

// the prefix is the input data csv filename without last extantion ( all other dots are changed to underscores) + the specific experimental config json name (dynamically created during interpret json step)
ext.prefix = { "${combination_key}".split(' ')[0].split('\\.')[0..-2].join("_") + "-" + "${combination_key}".split(' ')[2] }

// the output directory is called by the unique name of the run + the time of launch.
// all subidrs of the output dir are called with the same schema of the prefix above, which makes them process instance specific (aka unique).
// all subidrs of the output dir are called with the same schema of the prefix above, which makes them process instance specific (aka unique).
// at the time of publishing, files are copied and then renamed to a standard naming convention based on their extention and sffixes. This is done through saveAs mode. The files need to be coopied because the original file unrenamed will be further used by the pipeline and it's name has to stay unique (thanks to the prefix), otherwise there wil be filename confilcts.
publishDir = [
path: { "${params.outdir}/${workflow.runName}_" + "${workflow.start}/".replaceAll('[-:]', '_').split('\\.')[0] + "/" + "${combination_key}".split(' ')[0].split('\\.')[0..-2].join("_") + "-" + "${combination_key}".split(' ')[2] },
Expand All @@ -53,15 +53,15 @@ process {
else if (filename.endsWith(".csv")) "training_data.csv"
else if (filename.endsWith(".json")) "experiment_config.json"
else filename
}
}
]
}

withName: "STIMULUS_ANALYSIS_DEFAULT" {

// the output directory is called by the unique name of the run + the time of launch.
// it has subdirs that identify the class of analysises like -> /analysis_default/.
// the subsubdirs have the name of the split informstion used to create the test set. So conceptually all analyses carried on the same test set are under the same subsubdir.
// it has subdirs that identify the class of analysises like -> /analysis_default/.
// the subsubdirs have the name of the split informstion used to create the test set. So conceptually all analyses carried on the same test set are under the same subsubdir.
publishDir = [
path: { "${params.outdir}/${workflow.runName}_" + "${workflow.start}/".replaceAll('[-:]', '_').split('\\.')[0] + "/analysis_default/" + "${split_transform_key}" },
mode: params.publish_dir_mode,
Expand Down
2 changes: 1 addition & 1 deletion conf/test_learn.config
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ params {

config_profile_name = 'Test Learn profile'
config_profile_description = 'Minimal test dataset to check if a model that should learn actually does'

// Input data
csv = "${projectDir}/bin/tests/test_data/titanic/titanic_stimulus.csv"
exp_conf = "${projectDir}/bin/tests/test_data/titanic/titanic_stimulus.json" // TODO make the data transformations available to titanic data types
Expand Down
2 changes: 1 addition & 1 deletion main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ workflow {
params.outdir //,
// params.input
)

//
// WORKFLOW: Run main workflow
//
Expand Down
2 changes: 1 addition & 1 deletion modules/local/check_torch_model.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ process CHECK_TORCH_MODEL {
label 'process_medium'
// TODO: push image to nf-core quay.io
container "docker.io/mathysgrapotte/stimulus-py:latest"

input:
tuple path(original_csv), path(model), path(experiment_config), path(ray_tune_config), path(initial_weights)

Expand Down
4 changes: 2 additions & 2 deletions modules/local/interpret_json.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ process INTERPRET_JSON {
label 'process_low'
// TODO: push image to nf-core quay.io
container "docker.io/mathysgrapotte/stimulus-py:latest"

input:
path user_json
val message_from_check_model // only here to ensure that this module waits for check_model module to actually run
Expand All @@ -17,7 +17,7 @@ process INTERPRET_JSON {

script:
"""
stimulus-interpret-json -j ${user_json}
stimulus-interpret-json -j ${user_json}
"""

stub:
Expand Down
14 changes: 7 additions & 7 deletions modules/local/stimulus_analysis_default.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ process STIMULUS_ANALYSIS_DEFAULT {

input:
tuple val(split_transform_key), \
val(combination_key), \
path(data), \
path(experiment_config), \
path(model_config), \
path(weights), \
path(optimizer), \
path(metrics)
val(combination_key), \
path(data), \
path(experiment_config), \
path(model_config), \
path(weights), \
path(optimizer), \
path(metrics)
path(model)

output:
Expand Down
2 changes: 1 addition & 1 deletion modules/local/stimulus_split_csv.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

process STIMULUS_SPLIT_CSV {

tag "${original_csv} - ${split_transform_key}"
label 'process_low'
// TODO: push image to nf-core quay.io
Expand Down
Loading

0 comments on commit a677c87

Please sign in to comment.