From e872c171fbeead6a85bd7f57be469f03d7bb3d54 Mon Sep 17 00:00:00 2001 From: Tim Booth Date: Tue, 22 Oct 2024 12:19:18 +0100 Subject: [PATCH] Try to get all the tests working --- activate_venv | 1 + doc/ragic.txt | 6 ++++++ driver.sh | 11 +++-------- samplesheet_fetch.sh | 6 ++++-- test/requirements.txt | 1 + test/test_driver.py | 2 +- test/test_samplesheet_fetch.py | 8 ++++---- 7 files changed, 20 insertions(+), 15 deletions(-) diff --git a/activate_venv b/activate_venv index 9f29e2a..a2b45d2 100644 --- a/activate_venv +++ b/activate_venv @@ -109,6 +109,7 @@ else pip_install python-Levenshtein==0.12.1 # snakemake and drmaa (note the settings/wrapper in shell_helper_functions.sh) + # FIXME - I'm using Snakemake 5 for the pipeline and 7 for the unit tests! pip_install snakemake==5.5.3 pip_install drmaa==0.7.9 diff --git a/doc/ragic.txt b/doc/ragic.txt index f06b4df..fe5f842 100644 --- a/doc/ragic.txt +++ b/doc/ragic.txt @@ -33,3 +33,9 @@ Now let's: 1) Implement the revcomp logic 2) Test the whole thing on the MiSeq run 3) Look at spreadsheet import (and thus do a much larger project) +4) Look at auto_redo.sh + +4 is going to be a bit of work. I think that since there is now no explicit "generate sample sheet" +step we want to ectually have a "re-run" button in Ragic. Then we want to be able to ask Ragic +"which runs have a re-run flag set" and also allow the pipeline to clear off the flag as it goes. +Not too hard to implement. Not a big priority. diff --git a/driver.sh b/driver.sh index 507e57d..dd4c209 100755 --- a/driver.sh +++ b/driver.sh @@ -39,8 +39,7 @@ if [ -e "$ENVIRON_SH" ] ; then CLUSTER_PARTITION EXTRA_SLURM_FLAGS \ SSPP_HOOK TOOLBOX VERBOSE \ USE_RAGIC WRITE_TO_RAGIC DRY_RUN \ - SNAKE_THREADS LOCAL_CORES EXTRA_SNAKE_FLAGS \ - REDO_HOURS_TO_LOOK_BACK + SNAKE_THREADS LOCAL_CORES EXTRA_SNAKE_FLAGS fi # Just because I renamed it @@ -766,13 +765,9 @@ get_run_status() { # run_dir fi } -# **** And now the main processing actions, starting with a search for updated sample sheets for -# **** previously processed runs. +# **** And now the main processing actions -if [ -n "${REDO_HOURS_TO_LOOK_BACK:-}" ] ; then - log "Looking for new replacement sample sheets from the last $REDO_HOURS_TO_LOOK_BACK hours." - auto_redo.sh |& log || true -fi +# TODO - add a Ragic version of auto_redo.sh here log "Looking for run directories matching regex $SEQDATA_LOCATION/$RUN_NAME_REGEX/" diff --git a/samplesheet_fetch.sh b/samplesheet_fetch.sh index 9fd1398..f5c2e10 100755 --- a/samplesheet_fetch.sh +++ b/samplesheet_fetch.sh @@ -133,6 +133,7 @@ done export SSPP_FILE="$(readlink -f "SampleSheet.csv.$counter")" samplesheet_from_ragic.py --empty_on_missing -f "${UFLOWCELLID}" | \ "$SSPP_HOOK" >> "SampleSheet.csv.$counter" +echo "Extracted new SampleSheet.csv.$counter from Ragic with filter ($SSPP_HOOK)" if [ ! -s "SampleSheet.csv.$counter" ] ; then echo "New SampleSheet.csv for ${FLOWCELLID} is empty - ie. not found in Ragic" @@ -140,8 +141,9 @@ if [ ! -s "SampleSheet.csv.$counter" ] ; then exit 0 fi -# Now see if the new sheet is different. -if diff -q "SampleSheet.csv.$counter" SampleSheet.csv ; then +# Now see if the new sheet is different. We do want to ignore the Date line because +# this can change if, for eg. we just push the run ID back. +if diff -I '^Date,' -q "SampleSheet.csv.$counter" SampleSheet.csv ; then echo "SampleSheet.csv for ${FLOWCELLID} is already up-to-date" rm -f "SampleSheet.csv.$counter" exit 0 diff --git a/test/requirements.txt b/test/requirements.txt index efc9f4a..e4ef0e3 100644 --- a/test/requirements.txt +++ b/test/requirements.txt @@ -2,5 +2,6 @@ coverage bashmocker==0.3.0 pyyaml==6.0.1 yamlloader<2 +setuptools==71.1.0 snakemake==7.18.2 rt==2.2.2 diff --git a/test/test_driver.py b/test/test_driver.py index 1aac7c6..ae3ef35 100755 --- a/test/test_driver.py +++ b/test/test_driver.py @@ -234,7 +234,7 @@ def test_new(self, test_data=None): # This may or may not be mocked. If so, and REDO_HOURS_TO_LOOK_BACK is set, it should # be called. - if 'auto_redo.sh' in expected_calls and self.environment.get('REDO_HOURS_TO_LOOK_BACK'): + if 'auto_redo.sh' in expected_calls and self.environment.get('USE_RAGIC'): expected_calls['auto_redo.sh'] = [[]] # The call to rt_runticket_manager.py is non-deterministic, so we have to doctor it... diff --git a/test/test_samplesheet_fetch.py b/test/test_samplesheet_fetch.py index 979bc17..180b6a7 100755 --- a/test/test_samplesheet_fetch.py +++ b/test/test_samplesheet_fetch.py @@ -83,7 +83,7 @@ def test_replace_original(self): self.assertEqual(os.readlink('SampleSheet.csv'), 'SampleSheet.csv.1') self.assertEqual(last_stdout[0], "SampleSheet.csv renamed as SampleSheet.csv.0") - self.assertEqual(last_stdout[2], "SampleSheet.csv for XXXX is now linked to new SampleSheet.csv.1") + self.assertEqual(last_stdout[3], "SampleSheet.csv for XXXX is now linked to new SampleSheet.csv.1") with open("SampleSheet.csv") as fh: self.assertEqual(fh.read().rstrip(), 'this one') @@ -112,7 +112,7 @@ def test_keep_original(self): self.assertEqual(os.readlink('SampleSheet.csv'), 'SampleSheet.csv.0') self.assertEqual(last_stdout[0], "SampleSheet.csv renamed as SampleSheet.csv.0") - self.assertEqual(last_stdout[1][:37], "New SampleSheet.csv for XXXX is empty") + self.assertEqual(last_stdout[2][:37], "New SampleSheet.csv for XXXX is empty") def test_none_found(self): @@ -230,7 +230,7 @@ def test_always_touch(self): # Run the thingy again time.sleep(0.1) second_stdout = self.bm_run_fetch() - self.assertEqual(second_stdout[0], "SampleSheet.csv for XXXX is already up-to-date") + self.assertEqual(second_stdout[1], "SampleSheet.csv for XXXX is already up-to-date") self.assertGreater(os.lstat('SampleSheet.csv').st_mtime, utimestamp) def test_case_mismatch(self): @@ -248,7 +248,7 @@ def test_case_mismatch(self): self.assertEqual(os.readlink('SampleSheet.csv'), 'SampleSheet.csv.1') self.assertEqual(last_stdout[0], "SampleSheet.csv renamed as SampleSheet.csv.0") - self.assertEqual(last_stdout[2], "SampleSheet.csv for jd7l6 is now linked to new SampleSheet.csv.1") + self.assertEqual(last_stdout[3], "SampleSheet.csv for jd7l6 is now linked to new SampleSheet.csv.1") with open("SampleSheet.csv") as fh: self.assertEqual(fh.read().rstrip(), '--empty_on_missing -f JD7L6')