From fb01a1219d91a22f2aa1f26fcad16f86075b98f4 Mon Sep 17 00:00:00 2001 From: RachelDuffin Date: Fri, 5 Jul 2024 01:48:55 +0100 Subject: [PATCH] Update readme, improve TSO500 log messages --- README.md | 56 ++++++++++++++++++++---------------------- wscleaner/__main__.py | 3 +-- wscleaner/wscleaner.py | 10 ++++---- 3 files changed, 32 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index 9ef1bb9..a1b828f 100644 --- a/README.md +++ b/README.md @@ -2,49 +2,48 @@ The Synnovis Genome Informatics team use a linux workstation to manage sequencing files. These files are uploaded to the DNAnexus service for storage, however clearing the workstation is time intensive. Workstation Cleaner (wscleaner) automates the deletion of local directories that have been uploaded to the DNAnexus cloud storage service. -A RunFolderManager class will instatiate objects for local Runfolders, each of which has an associated DNA Nexus project object. The manager loops over the runfolders and deletes them if all checks pass. +A RunFolderManager class will instantiate objects for local runfolders, each of which has an associated DNAnexus project object. The manager loops over the runfolders and deletes them if all checks pass. DNAnexus projects are accessed with the dxpy module, a python wrapper for the DNAnexus API. -DNA Nexus projects are accessed with the dxpy module, a python wrapper for the DNA Nexus API. +## Protocol -When executed, Runfolders in the input (root) directory are deleted based on the following criteria: +When executed, runfolders in the input (root) directory are identified based on: +* Matching the expected runfolder regex pattern +Runfolders are identified for deletion if meeting the following criteria: * A single DNAnexus project is found matching the runfolder name -* All local FASTQ files are uploaded and in a 'closed' state -* X logfiles are present in the DNA Nexus project /Logfiles directory (NB X can be added as a command line argument - default is 5) +* All local FASTQ files are uploaded and in a 'closed' state (for TSO runfolders, there are no local fastqs so this check automatically passes) +* X logfiles are present in the DNAnexus project `automated_scripts_logfiles` directory (NB X can be added as a command line argument - default is 6) +* Runfolder's upload runfolder log file contains no errors -OR if the run is identified as a TSO500 run, based on: - * the bcl2fastq2_output.log file created by the automated scripts - AND - * Presence of `_TSO` in the human readable DNANexus project name +TSO runfolders must meet the following additional criteria to be identified for deletion: +* Presence of bcl2fastq2_output.log file +* Presence of `TSO run.` in the bcl2fastq log file +* Presence of `_TSO` in the human readable DNANexus project name -A DNAnexus API key must be cached locally using the `--set-key` option. +## Usage +The script takes the following arguments, and can be run in either dry run mode (doesn't delete runfolders) or live mode (deletes runfolders). The script has been developed using python 3.10.6. -## Manual Usage +_**When running on the workstation, the conda environment must be activated prior to running the wscleaner command.**_ ``` -usage: wscleaner [-h] [--auth AUTH] [--dry-run] [--logfile LOGFILE] - [--min-age MIN_AGE] [--logfile-count LOGFILE_COUNT] - [--version] - root +usage: __main__.py [-h] --auth_token_file AUTH_TOKEN_FILE [--dry-run] --runfolders_dir RUNFOLDERS_DIR --log_dir LOG_DIR [--min-age MIN_AGE] + [--logfile-count LOGFILE_COUNT] [--version] -positional arguments: - root A directory containing runfolders to process - -optional arguments: +options: -h, --help show this help message and exit - --runfolders_dir A directory containing runfolders to process - --auth_token_file AUTH A text file containing the DNANexus authentication - token + --auth_token_file AUTH_TOKEN_FILE + A text file containing the DNANexus authentication token --dry-run Perform a dry run without deleting files + --runfolders_dir RUNFOLDERS_DIR + A directory containing runfolders to process + --log_dir LOG_DIR Directory to save log file to --min-age MIN_AGE The age (days) a runfolder must be to be deleted --logfile-count LOGFILE_COUNT - The number of logfiles a runfolder must have in - /Logfiles + The number of logfiles a runfolder must have in /Logfiles --version Print version ``` -**The conda environment must be activated prior to running the wscleaner command.** ### Dry run mode @@ -62,16 +61,13 @@ If running in production mode: conda activate python3.10.6 && python3 -m wscleaner --runfolders_dir $RUNFOLDERS_DIR --auth_token_file $AUTH_TOKEN_FILEPATH --log_dir $LOG_DIR ``` -## Test +## Testing Tests should be run and all passing prior to any new release. ```bash -# Run from the cloned repo directory after installation python3 -m pytest -v --auth_token_file=$FULL_PATH_TO_FILE_CONTAINING_AUTH_TOKEN -mp.txt ``` -## License -Developed by Synnovis Genome Informatics +### Developed by Synnovis Genome Informatics \ No newline at end of file diff --git a/wscleaner/__main__.py b/wscleaner/__main__.py index 1b74ec3..c498bfe 100644 --- a/wscleaner/__main__.py +++ b/wscleaner/__main__.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """wscleaner -Delete runfolders in a root directory on the condition that it has uploaded to DNA Nexus. +Delete runfolders in a root directory on the condition that it has uploaded to DNAnexus. Methods: cli_parser(): Parses command line arguments @@ -45,7 +45,6 @@ def cli_parser(): """Parses command line arguments. Args: None. The argparse.ArgumentParser auto-collects arguments from sys.args Returns: Argument parser object with a 'root' attribute if root directory given. - Otherwise, --set-key and --print-key exit after actions are performed. """ parser = argparse.ArgumentParser() parser.add_argument( diff --git a/wscleaner/wscleaner.py b/wscleaner/wscleaner.py index 004ce33..6b7182d 100644 --- a/wscleaner/wscleaner.py +++ b/wscleaner/wscleaner.py @@ -87,8 +87,8 @@ def TSO500_check(self): Checks if the run is a TSO500 run. These need to be cleaned up but do not contain fastqs Returns True if TSO run detected. """ - logfile_check = False project_name = False + logfile_check = False bcl2fastq_filepath = os.path.join(self.path, "bcl2fastq2_output.log") # ensure not trying to open files that don't exist if os.path.isdir(self.path) and os.path.exists(bcl2fastq_filepath): @@ -97,14 +97,14 @@ def TSO500_check(self): # take last line of the logfile - look for statement produced by automated scripts for TSO runs if demultiplexing_file.readlines()[-1].startswith("TSO500 run."): logfile_check = True - self.logger.debug( - f"bcl2fastq2_output.log for {self.name} contains the string expected for TSO500 runs" + self.logger.info( + f"{self.name} - bcl2fastq2_output.log contains the string expected for TSO500 runs" ) else: self.logger.debug( - f"bcl2fastq2_output.log for {self.name} DOES NOT contain expected TSO500 string" + f"{self.name} - bcl2fastq2_output.log DOES NOT contain expected TSO500 string" ) - # may be an issue identifying the DNAnexus project + # May be an issue identifying the DNAnexus project # get the dnanexus project name to assess if contains "_TSO" if self.dx_project.id: nexus_project_name = dxpy.describe(self.dx_project.id)["name"]