From e872c171fbeead6a85bd7f57be469f03d7bb3d54 Mon Sep 17 00:00:00 2001
From: Tim Booth <tim.booth@ed.ac.uk>
Date: Tue, 22 Oct 2024 12:19:18 +0100
Subject: [PATCH] Try to get all the tests working

---
 activate_venv                  |  1 +
 doc/ragic.txt                  |  6 ++++++
 driver.sh                      | 11 +++--------
 samplesheet_fetch.sh           |  6 ++++--
 test/requirements.txt          |  1 +
 test/test_driver.py            |  2 +-
 test/test_samplesheet_fetch.py |  8 ++++----
 7 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/activate_venv b/activate_venv
index 9f29e2a..a2b45d2 100644
--- a/activate_venv
+++ b/activate_venv
@@ -109,6 +109,7 @@ else
         pip_install python-Levenshtein==0.12.1
 
         # snakemake and drmaa (note the settings/wrapper in shell_helper_functions.sh)
+        # FIXME - I'm using Snakemake 5 for the pipeline and 7 for the unit tests!
         pip_install snakemake==5.5.3
         pip_install drmaa==0.7.9
 
diff --git a/doc/ragic.txt b/doc/ragic.txt
index f06b4df..fe5f842 100644
--- a/doc/ragic.txt
+++ b/doc/ragic.txt
@@ -33,3 +33,9 @@ Now let's:
 1) Implement the revcomp logic
 2) Test the whole thing on the MiSeq run
 3) Look at spreadsheet import (and thus do a much larger project)
+4) Look at auto_redo.sh
+
+4 is going to be a bit of work. I think that since there is now no explicit "generate sample sheet"
+step we want to ectually have a "re-run" button in Ragic. Then we want to be able to ask Ragic
+"which runs have a re-run flag set" and also allow the pipeline to clear off the flag as it goes.
+Not too hard to implement. Not a big priority.
diff --git a/driver.sh b/driver.sh
index 507e57d..dd4c209 100755
--- a/driver.sh
+++ b/driver.sh
@@ -39,8 +39,7 @@ if [ -e "$ENVIRON_SH" ] ; then
            CLUSTER_PARTITION   EXTRA_SLURM_FLAGS \
            SSPP_HOOK           TOOLBOX           VERBOSE \
            USE_RAGIC           WRITE_TO_RAGIC    DRY_RUN           \
-           SNAKE_THREADS       LOCAL_CORES       EXTRA_SNAKE_FLAGS \
-           REDO_HOURS_TO_LOOK_BACK
+           SNAKE_THREADS       LOCAL_CORES       EXTRA_SNAKE_FLAGS
 fi
 
 # Just because I renamed it
@@ -766,13 +765,9 @@ get_run_status() { # run_dir
   fi
 }
 
-# **** And now the main processing actions, starting with a search for updated sample sheets for
-# **** previously processed runs.
+# **** And now the main processing actions
 
-if [ -n "${REDO_HOURS_TO_LOOK_BACK:-}" ] ; then
-    log "Looking for new replacement sample sheets from the last $REDO_HOURS_TO_LOOK_BACK hours."
-    auto_redo.sh |& log || true
-fi
+# TODO - add a Ragic version of auto_redo.sh here
 
 log "Looking for run directories matching regex $SEQDATA_LOCATION/$RUN_NAME_REGEX/"
 
diff --git a/samplesheet_fetch.sh b/samplesheet_fetch.sh
index 9fd1398..f5c2e10 100755
--- a/samplesheet_fetch.sh
+++ b/samplesheet_fetch.sh
@@ -133,6 +133,7 @@ done
 export SSPP_FILE="$(readlink -f "SampleSheet.csv.$counter")"
 samplesheet_from_ragic.py --empty_on_missing -f "${UFLOWCELLID}" | \
     "$SSPP_HOOK" >> "SampleSheet.csv.$counter"
+echo "Extracted new SampleSheet.csv.$counter from Ragic with filter ($SSPP_HOOK)"
 
 if [ ! -s "SampleSheet.csv.$counter" ] ; then
     echo "New SampleSheet.csv for ${FLOWCELLID} is empty - ie. not found in Ragic"
@@ -140,8 +141,9 @@ if [ ! -s "SampleSheet.csv.$counter" ] ; then
     exit 0
 fi
 
-# Now see if the new sheet is different.
-if diff -q "SampleSheet.csv.$counter" SampleSheet.csv ; then
+# Now see if the new sheet is different. We do want to ignore the Date line because
+# this can change if, for eg. we just push the run ID back.
+if diff -I '^Date,' -q "SampleSheet.csv.$counter" SampleSheet.csv ; then
     echo "SampleSheet.csv for ${FLOWCELLID} is already up-to-date"
     rm -f "SampleSheet.csv.$counter"
     exit 0
diff --git a/test/requirements.txt b/test/requirements.txt
index efc9f4a..e4ef0e3 100644
--- a/test/requirements.txt
+++ b/test/requirements.txt
@@ -2,5 +2,6 @@ coverage
 bashmocker==0.3.0
 pyyaml==6.0.1
 yamlloader<2
+setuptools==71.1.0
 snakemake==7.18.2
 rt==2.2.2
diff --git a/test/test_driver.py b/test/test_driver.py
index 1aac7c6..ae3ef35 100755
--- a/test/test_driver.py
+++ b/test/test_driver.py
@@ -234,7 +234,7 @@ def test_new(self, test_data=None):
 
         # This may or may not be mocked. If so, and REDO_HOURS_TO_LOOK_BACK is set, it should
         # be called.
-        if 'auto_redo.sh' in expected_calls and self.environment.get('REDO_HOURS_TO_LOOK_BACK'):
+        if 'auto_redo.sh' in expected_calls and self.environment.get('USE_RAGIC'):
             expected_calls['auto_redo.sh'] = [[]]
 
         # The call to rt_runticket_manager.py is non-deterministic, so we have to doctor it...
diff --git a/test/test_samplesheet_fetch.py b/test/test_samplesheet_fetch.py
index 979bc17..180b6a7 100755
--- a/test/test_samplesheet_fetch.py
+++ b/test/test_samplesheet_fetch.py
@@ -83,7 +83,7 @@ def test_replace_original(self):
         self.assertEqual(os.readlink('SampleSheet.csv'), 'SampleSheet.csv.1')
 
         self.assertEqual(last_stdout[0], "SampleSheet.csv renamed as SampleSheet.csv.0")
-        self.assertEqual(last_stdout[2], "SampleSheet.csv for XXXX is now linked to new SampleSheet.csv.1")
+        self.assertEqual(last_stdout[3], "SampleSheet.csv for XXXX is now linked to new SampleSheet.csv.1")
 
         with open("SampleSheet.csv") as fh:
             self.assertEqual(fh.read().rstrip(), 'this one')
@@ -112,7 +112,7 @@ def test_keep_original(self):
         self.assertEqual(os.readlink('SampleSheet.csv'), 'SampleSheet.csv.0')
 
         self.assertEqual(last_stdout[0], "SampleSheet.csv renamed as SampleSheet.csv.0")
-        self.assertEqual(last_stdout[1][:37], "New SampleSheet.csv for XXXX is empty")
+        self.assertEqual(last_stdout[2][:37], "New SampleSheet.csv for XXXX is empty")
 
 
     def test_none_found(self):
@@ -230,7 +230,7 @@ def test_always_touch(self):
         # Run the thingy again
         time.sleep(0.1)
         second_stdout = self.bm_run_fetch()
-        self.assertEqual(second_stdout[0], "SampleSheet.csv for XXXX is already up-to-date")
+        self.assertEqual(second_stdout[1], "SampleSheet.csv for XXXX is already up-to-date")
         self.assertGreater(os.lstat('SampleSheet.csv').st_mtime, utimestamp)
 
     def test_case_mismatch(self):
@@ -248,7 +248,7 @@ def test_case_mismatch(self):
         self.assertEqual(os.readlink('SampleSheet.csv'), 'SampleSheet.csv.1')
 
         self.assertEqual(last_stdout[0], "SampleSheet.csv renamed as SampleSheet.csv.0")
-        self.assertEqual(last_stdout[2], "SampleSheet.csv for jd7l6 is now linked to new SampleSheet.csv.1")
+        self.assertEqual(last_stdout[3], "SampleSheet.csv for jd7l6 is now linked to new SampleSheet.csv.1")
 
         with open("SampleSheet.csv") as fh:
             self.assertEqual(fh.read().rstrip(), '--empty_on_missing -f JD7L6')