From ba5fc18bbe0a342dbe4ff51711bd29c8e9f7ca01 Mon Sep 17 00:00:00 2001 From: Nana Mensah Date: Thu, 30 May 2019 09:47:07 +0100 Subject: [PATCH 01/15] Add wscleaner root dir. Add git ignore for python package files. --- .gitignore | 3 +++ wscleaner/DESIGN.md | 32 ++++++++++++++++++++++++++++++++ wscleaner/README.md | 36 ++++++++++++++++++++++++++++++++++++ wscleaner/setup.py | 24 ++++++++++++++++++++++++ 4 files changed, 95 insertions(+) create mode 100644 .gitignore create mode 100644 wscleaner/DESIGN.md create mode 100644 wscleaner/README.md create mode 100644 wscleaner/setup.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d560825 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*.pyc +*.egg-info +config.json \ No newline at end of file diff --git a/wscleaner/DESIGN.md b/wscleaner/DESIGN.md new file mode 100644 index 0000000..8259925 --- /dev/null +++ b/wscleaner/DESIGN.md @@ -0,0 +1,32 @@ +# Workstation Cleaner Design Document + +Owner: Nana Mensah +Date: 30/05/19 +Status: Draft + +## Brief + +The Viapath Genome Informatics team use a linux workstation to manage sequencing files. These files are uploaded to the DNAnexus service for storage, however clearing the workstation is time intensive. + +## User Story + +As a Clinical Bioinformatician, I need to automate the deletion of sequencing folders that have been successfuly backed up, so that I can free up time for other duties. + +## Functional requirements + +FR1. Accurately detect sequencing folders have been successfully backed up +FR2. Delete old sequencing folders that are successfully backed up +FR3. Log all activity to a local logfile + +## Non-functional requirements + +NF1. Run from the Linux command line +NF2. Process runfolders within 24 hours +NF3. Use any available DNAnexus SDKs +NF4. Attempt to process all folders at least once + +## Design Summary + +A RunFolderManager class will instatiate objects for local Runfolders, each of which has an associated DNA Nexus project object. The manager loops over the runfolders and deletes them if all checks pass. + +DNA Nexus projects are accessed with the dxpy module, a python wrapper for the DNA Nexus API. Credentials are cached locally using the command-line option '--set-key'. diff --git a/wscleaner/README.md b/wscleaner/README.md new file mode 100644 index 0000000..af4641f --- /dev/null +++ b/wscleaner/README.md @@ -0,0 +1,36 @@ +# Workstation Cleaner + +Workstation Cleaner (wscleaner) deletes local directories that have been uploaded to the DNAnexus cloud storage service. + +When executed, Runfolders in the input (root) directory are deleted based on the following criteria: + +* A single DNAnexus project is found matching the runfolder name +* All local FASTQ files are uploaded and in a 'closed' state +* Six logfiles are present in the DNA Nexus project /Logfiles directory + +A DNAnexus API key must be cached locally using the `--set-key` option. + +## Install + +```bash +git clone https://github.com/moka-guys/wscleaner.git +pip install ./wscleaner +``` + +## Usage + +```bash +wscleaner --set-key DNA_NEXUS_KEY # Localyl caches dnanexus api key +wscleaner ROOT_DIRECTORY --logfile LOGFILE_PATH +``` + +## Test + +```bash +# Run from the cloned repo directory after installation +pytest . --auth_token DNA_NEXUS_KEY +``` + +## License + +Developed by Viapath Genome Informatics diff --git a/wscleaner/setup.py b/wscleaner/setup.py new file mode 100644 index 0000000..186120a --- /dev/null +++ b/wscleaner/setup.py @@ -0,0 +1,24 @@ +from setuptools import setup, find_packages + +setup(name='wscleaner', + version='1.0', + description='Package to remove uploaded runfolders from \ + the Viapath Genome Informatics NGS workstation', + url='https://github.com/NMNS93/wscleaner', + author='Nana Mensah', + author_email='gst-tr.MokaGuys@nhs.net', + license='MIT', + packages=find_packages(), + zip_safe=False, + + python_requires = '>=3.6.8', + install_requires = ['docutils>=0.3', 'dxpy==0.279.0', 'pytest==4.4.0', 'pytest-cov==2.6.1', + 'Sphinx==2.0.1', 'psutil==5.6.1'], + + package_data = {}, + + entry_points={ + 'console_scripts': 'wscleaner = wscleaner.main:main' + } + + ) From eeee376ff43cfa5866fcb0b8c47f1ad8cb86da0d Mon Sep 17 00:00:00 2001 From: Nana Mensah Date: Thu, 30 May 2019 09:47:29 +0100 Subject: [PATCH 02/15] Add wscleaner tests --- wscleaner/test/conftest.py | 39 +++++++++++ wscleaner/test/coverage.txt | 21 ++++++ wscleaner/test/generate.py | 30 +++++++++ wscleaner/test/test_all.py | 97 ++++++++++++++++++++++++++++ wscleaner/test/test_dir_1_fastqs.txt | 54 ++++++++++++++++ wscleaner/test/test_dir_2_fastqs.txt | 24 +++++++ 6 files changed, 265 insertions(+) create mode 100644 wscleaner/test/conftest.py create mode 100644 wscleaner/test/coverage.txt create mode 100644 wscleaner/test/generate.py create mode 100644 wscleaner/test/test_all.py create mode 100644 wscleaner/test/test_dir_1_fastqs.txt create mode 100644 wscleaner/test/test_dir_2_fastqs.txt diff --git a/wscleaner/test/conftest.py b/wscleaner/test/conftest.py new file mode 100644 index 0000000..6ac8d67 --- /dev/null +++ b/wscleaner/test/conftest.py @@ -0,0 +1,39 @@ +"""conftest.py + +Config for pytest. +""" +import pytest +import pathlib + +def pytest_addoption(parser): + """Add command line options to pytest""" + parser.addoption("--auth_token", action="store", default=None, help="A DNANexus authentication key") + +@pytest.fixture +def auth_token(request): + """Create pytest fixture from command line argument for authentication token""" + return request.config.getoption("--auth_token") + +@pytest.fixture(scope="session") +def data_test_runfolders(): + """A fixture that returns a list of tuples containing (runfolder_name, fastq_list_file).""" + return [ + ('190408_NB551068_0234_AHJ7MTAFXY_NGS265B', 'test/test_dir_1_fastqs.txt'), + ('190410_NB551068_0235_AHKGMGAFXY_NGS265C', 'test/test_dir_2_fastqs.txt') + ] + +@pytest.fixture(scope="session", autouse=True) +def create_test_dirs(request, data_test_runfolders): + """Create test data for testing. + + This is an autouse fixture with session scope, meaning it is run once before any tests are collected. + """ + for runfolder_name, fastq_list_file in data_test_runfolders: + # Create the runfolder directory as per Illumina spec + test_path = f'test/data/{runfolder_name}/Data/Intensities/BaseCalls' + pathlib.Path(test_path).mkdir(parents=True, exist_ok=True) + # Generate empty fastqfiles in runfolder + with open(fastq_list_file) as f: + fastq_list = f.read().splitlines() + for fastq_file in fastq_list: + pathlib.Path(test_path, fastq_file).touch(mode=777, exist_ok=True) diff --git a/wscleaner/test/coverage.txt b/wscleaner/test/coverage.txt new file mode 100644 index 0000000..d94e13e --- /dev/null +++ b/wscleaner/test/coverage.txt @@ -0,0 +1,21 @@ +============================= test session starts ============================== +platform linux -- Python 3.6.8, pytest-4.4.0, py-1.8.0, pluggy-0.9.0 +rootdir: /home/nana/Documents/MOKAGUYS/wscleaner +plugins: cov-2.6.1 +collected 9 items + +test/test_all.py ......... [100%] + +----------- coverage: platform linux, python 3.6.8-final-0 ----------- +Name Stmts Miss Cover +-------------------------------------------------- +wscleaner/__init__.py 0 0 100% +wscleaner/auth.py 35 14 60% +wscleaner/lib.py 101 6 94% +wscleaner/main.py 43 26 40% +wscleaner/mokaguys_logger.py 10 5 50% +-------------------------------------------------- +TOTAL 189 51 73% + + +========================== 9 passed in 44.68 seconds =========================== diff --git a/wscleaner/test/generate.py b/wscleaner/test/generate.py new file mode 100644 index 0000000..5f45155 --- /dev/null +++ b/wscleaner/test/generate.py @@ -0,0 +1,30 @@ +"""generate.py + +Generates dummy data for testing. +""" + +import pathlib + +def data_test_runfolders(): + """A fixture that returns a list of tuples containing (runfolder_name, fastq_list_file).""" + return [ + ('190408_NB551068_0234_AHJ7MTAFXY_NGS265B', 'test/test_dir_1_fastqs.txt'), + ('190410_NB551068_0235_AHKGMGAFXY_NGS265C', 'test/test_dir_2_fastqs.txt') + ] + +def create_test_dirs(test_data): + """Create test data for testing. + + This is an autouse fixture with session scope, meaning it is run once before any tests are collected. + """ + for runfolder_name, fastq_list_file in test_data: + # Create the runfolder directory as per Illumina spec + test_path = f'test/data/{runfolder_name}/Data/Intensities/BaseCalls' + pathlib.Path(test_path).mkdir(parents=True, exist_ok=True) + # Generate empty fastqfiles in runfolder + with open(fastq_list_file) as f: + fastq_list = f.read().splitlines() + for fastq_file in fastq_list: + pathlib.Path(test_path, fastq_file).touch(mode=777, exist_ok=True) + +create_test_dirs(data_test_runfolders()) \ No newline at end of file diff --git a/wscleaner/test/test_all.py b/wscleaner/test/test_all.py new file mode 100644 index 0000000..27106cd --- /dev/null +++ b/wscleaner/test/test_all.py @@ -0,0 +1,97 @@ +import pytest +import dxpy +from pathlib import Path +import argparse +import json +import sys +import shutil + +from pkg_resources import resource_filename +from wscleaner.auth import SetKeyAction, dx_set_auth +from wscleaner.main import cli_parser +from wscleaner.lib import RunFolderManager, RunFolder + +# AUTH: Set DNAnexus authentication for tests +def test_auth(auth_token): + """Test that an authentication token is passed to pytest as a command line argument""" + assert auth_token != None + +@pytest.fixture(autouse=True) +def set_auth(auth_token): + """Set the authenticatino token for all subsequent tests""" + dx_set_auth(auth_token) + + +# FIXTURES: Define functions to use in downstream tests +@pytest.fixture +def rfm(): + """Return an instance of the runfolder manager with the test/data directory""" + test_path = Path(str(Path(__file__).parent), 'data') + rfm = RunFolderManager(str(test_path)) + return rfm + + +# TESTS +class TestAuth: + def test_set_auth(self, auth_token): + """test that the authentication token is set correctly""" + authobj = dx_set_auth(auth_token) + assert dxpy.SECURITY_CONTEXT['auth_token'] == auth_token + + def test_setkey(self, monkeypatch, auth_token): + """test that the --set-key command-line argument caches the authentication token""" + # Set setkey cli arguments + sys.argv = ['python', 'wscleaner', '--set-key', auth_token] + # Mock Action object + # Parse args + with pytest.raises(SystemExit) as err: + args = cli_parser() + # Make assertions on created config file + fn = resource_filename('wscleaner','config.json') + with open(fn, 'r') as f: + assert auth_token in f.read() + # Delete temp config + Path(fn).unlink() + +class TestFolders: + def test_runfolders_ready(self, data_test_runfolders, rfm): + """Test that runfolders in the test directory pass checks for deletion. Est. 20 seconds.""" + for runfolder in rfm.find_runfolders(min_age=0): + assert all([runfolder.dx_project, rfm.check_fastqs(runfolder), rfm.check_logfiles(runfolder)]) + + def test_find_fastqs(self, data_test_runfolders): + """Tests the correct number of fastqs are present in local and uploaded directories""" + for runfolder_name, fastq_list_file in data_test_runfolders: + rf = RunFolder(Path('test/data', runfolder_name)) + with open(fastq_list_file) as f: + test_folder_fastqs = len(f.readlines()) + assert len(rf.find_fastqs()) == test_folder_fastqs + assert len(rf.dx_project.find_fastqs()) == test_folder_fastqs + + def test_min_age(self, rfm): + """test that the runfolder age function records age""" + runfolders = rfm.find_runfolders(min_age=0) + # Asser that this test runfolder was recently generated + assert all([ rf.age < 14 for rf in runfolders ]) + +class TestRFM: + def test_find_runfolders(self, data_test_runfolders, rfm): + """test the runfolder manager directory finding function""" + rfm_runfolders = rfm.find_runfolders(min_age=0) + runfolder_names = [str(folder.path.name) for folder in rfm_runfolders] + test_runfolder_names = [ rf for rf, fastq_list_file in data_test_runfolders ] + runfolders_bools = [ item in runfolder_names for item in test_runfolder_names ] + assert all(runfolders_bools) + + def test_validate(self, rfm): + """test the runfoldermanager _validate function correctly reads the path""" + assert rfm.root.name == Path(str(Path(__file__).parent), 'data').name + + def test_delete(self, monkeypatch, rfm): + """test that the runfolder manager delete call creates the log of deleted files. + Here, the pytest monkeypatch fixture is used to overwrite the delete function and persist the test directories. + """ + test_folder = rfm.find_runfolders(min_age=0)[0] + monkeypatch.setattr(shutil, 'rmtree', lambda x: 'TEST_DELETED') + rfm.delete(test_folder) + assert test_folder.name in rfm.deleted diff --git a/wscleaner/test/test_dir_1_fastqs.txt b/wscleaner/test/test_dir_1_fastqs.txt new file mode 100644 index 0000000..a813f35 --- /dev/null +++ b/wscleaner/test/test_dir_1_fastqs.txt @@ -0,0 +1,54 @@ +NGS265B_77_217354_KB_M_IMDv2GSD_Pan1063_S24_R2_001.fastq.gz +NGS265B_74_215597_MA_U_NGSEQ1FH_Pan1965_S22_R1_001.fastq.gz +NGS265B_79_216375_MK_F_IMDv2GSD_Pan1063_S26_R1_001.fastq.gz +NGS265B_57_216008_RH_F_NGSEQ1ALP_Pan1965_S5_R2_001.fastq.gz +NGS265B_63_215584_AM_F_NGSEQ1FH_Pan1965_S11_R2_001.fastq.gz +NGS265B_65_215862_FA_F_NGSEQ1FH_Pan1965_S13_R2_001.fastq.gz +NGS265B_78_216530_JD_M_IMDv2GSD_Pan1063_S25_R1_001.fastq.gz +NGS265B_61_215368_NW_F_NGSEQ1Cys_Pan1965_S9_R1_001.fastq.gz +NGS265B_57_216008_RH_F_NGSEQ1ALP_Pan1965_S5_R1_001.fastq.gz +NGS265B_56_215743_KC_F_NGSEQ1ALP_Pan1965_S4_R2_001.fastq.gz +NGS265B_67_215593_AB_M_NGSEQ1FH_Pan1965_S15_R2_001.fastq.gz +NGS265B_56_215743_KC_F_NGSEQ1ALP_Pan1965_S4_R1_001.fastq.gz +NGS265B_70_215922_AF_M_NGSEQ1FH_Pan1965_S18_R2_001.fastq.gz +NGS265B_55_215661_EW_F_NGSEQ1ALP_Pan1965_S3_R1_001.fastq.gz +NGS265B_66_215583_HA_M_NGSEQ1FH_Pan1965_S14_R1_001.fastq.gz +NGS265B_72_215598_FA_U_NGSEQ1FH_Pan1965_S20_R2_001.fastq.gz +NGS265B_78_216530_JD_M_IMDv2GSD_Pan1063_S25_R2_001.fastq.gz +NGS265B_67_215593_AB_M_NGSEQ1FH_Pan1965_S15_R1_001.fastq.gz +NGS265B_64_215921_BM_F_NGSEQ1FH_Pan1965_S12_R2_001.fastq.gz +NGS265B_75_216919_DD_M_IMDv2GSD_Pan1063_S23_R2_001.fastq.gz +NGS265B_74_215597_MA_U_NGSEQ1FH_Pan1965_S22_R2_001.fastq.gz +NGS265B_62_215592_HA_F_NGSEQ1FH_Pan1965_S10_R1_001.fastq.gz +NGS265B_69_215594_MA_M_NGSEQ1FH_Pan1965_S17_R1_001.fastq.gz +NGS265B_58_213847_TS_M_NGSEQ1ALP_Pan1965_S6_R1_001.fastq.gz +NGS265B_63_215584_AM_F_NGSEQ1FH_Pan1965_S11_R1_001.fastq.gz +NGS265B_55_215661_EW_F_NGSEQ1ALP_Pan1965_S3_R2_001.fastq.gz +NGS265B_54_215749_VT_F_NGSEQ1ALP_Pan1965_S2_R1_001.fastq.gz +NGS265B_80_215482_AK_F_IMDv2UCD_Pan1063_S27_R2_001.fastq.gz +NGS265B_71_215696_WS_M_NGSEQ1FH_Pan1965_S19_R1_001.fastq.gz +NGS265B_61_215368_NW_F_NGSEQ1Cys_Pan1965_S9_R2_001.fastq.gz +NGS265B_69_215594_MA_M_NGSEQ1FH_Pan1965_S17_R2_001.fastq.gz +NGS265B_68_215591_SA_M_NGSEQ1FH_Pan1965_S16_R2_001.fastq.gz +NGS265B_53_215680_LW_F_NGSEQ1ALP_Pan1965_S1_R2_001.fastq.gz +NGS265B_75_216919_DD_M_IMDv2GSD_Pan1063_S23_R1_001.fastq.gz +NGS265B_79_216375_MK_F_IMDv2GSD_Pan1063_S26_R2_001.fastq.gz +NGS265B_73_215590_SA_U_NGSEQ1FH_Pan1965_S21_R1_001.fastq.gz +NGS265B_72_215598_FA_U_NGSEQ1FH_Pan1965_S20_R1_001.fastq.gz +NGS265B_77_217354_KB_M_IMDv2GSD_Pan1063_S24_R1_001.fastq.gz +NGS265B_80_215482_AK_F_IMDv2UCD_Pan1063_S27_R1_001.fastq.gz +NGS265B_60_215663_HA_M_NGSEQ1ALP_Pan1965_S8_R2_001.fastq.gz +NGS265B_60_215663_HA_M_NGSEQ1ALP_Pan1965_S8_R1_001.fastq.gz +NGS265B_73_215590_SA_U_NGSEQ1FH_Pan1965_S21_R2_001.fastq.gz +NGS265B_54_215749_VT_F_NGSEQ1ALP_Pan1965_S2_R2_001.fastq.gz +NGS265B_65_215862_FA_F_NGSEQ1FH_Pan1965_S13_R1_001.fastq.gz +NGS265B_71_215696_WS_M_NGSEQ1FH_Pan1965_S19_R2_001.fastq.gz +NGS265B_66_215583_HA_M_NGSEQ1FH_Pan1965_S14_R2_001.fastq.gz +NGS265B_53_215680_LW_F_NGSEQ1ALP_Pan1965_S1_R1_001.fastq.gz +NGS265B_64_215921_BM_F_NGSEQ1FH_Pan1965_S12_R1_001.fastq.gz +NGS265B_58_213847_TS_M_NGSEQ1ALP_Pan1965_S6_R2_001.fastq.gz +NGS265B_70_215922_AF_M_NGSEQ1FH_Pan1965_S18_R1_001.fastq.gz +NGS265B_59_216014_HB_M_NGSEQ1ALP_Pan1965_S7_R1_001.fastq.gz +NGS265B_68_215591_SA_M_NGSEQ1FH_Pan1965_S16_R1_001.fastq.gz +NGS265B_62_215592_HA_F_NGSEQ1FH_Pan1965_S10_R2_001.fastq.gz +NGS265B_59_216014_HB_M_NGSEQ1ALP_Pan1965_S7_R2_001.fastq.gz diff --git a/wscleaner/test/test_dir_2_fastqs.txt b/wscleaner/test/test_dir_2_fastqs.txt new file mode 100644 index 0000000..e960fb7 --- /dev/null +++ b/wscleaner/test/test_dir_2_fastqs.txt @@ -0,0 +1,24 @@ +NGS265C_85_216424_SA_F_CMCMD_Pan2022_S5_R2_001.fastq.gz +NGS265C_83_217371_KL_F_CMCMD_Pan2022_S4_R1_001.fastq.gz +NGS265C_81_216659_EB_F_CMCMD_Pan2022_S2_R2_001.fastq.gz +NGS265C_83_217371_KL_F_CMCMD_Pan2022_S4_R2_001.fastq.gz +NGS265C_94_216362_GN_M_CMCMD_Pan2022_S11_R1_001.fastq.gz +NGS265C_90_214535_MI_M_CMCMD_Pan2022_S9_R2_001.fastq.gz +NGS265C_82_216242_IN_F_CMCMD_Pan2022_S3_R2_001.fastq.gz +NGS265C_89_217353_JS_M_CMCMD_Pan2022_S8_R1_001.fastq.gz +NGS265C_88_217359_AA_M_CMCMD_Pan2022_S7_R1_001.fastq.gz +NGS265C_91_217055_IL_M_CMCMD_Pan2022_S10_R1_001.fastq.gz +NGS265C_91_217055_IL_M_CMCMD_Pan2022_S10_R2_001.fastq.gz +NGS265C_85_216424_SA_F_CMCMD_Pan2022_S5_R1_001.fastq.gz +NGS265C_39_214774_AH_M_NGSEQ2DMD_Pan1158_S1_R1_001.fastq.gz +NGS265C_39_214774_AH_M_NGSEQ2DMD_Pan1158_S1_R2_001.fastq.gz +NGS265C_96_216127_DO_M_CMCMD_Pan2022_S12_R1_001.fastq.gz +NGS265C_81_216659_EB_F_CMCMD_Pan2022_S2_R1_001.fastq.gz +NGS265C_82_216242_IN_F_CMCMD_Pan2022_S3_R1_001.fastq.gz +NGS265C_86_213909_DP_F_CMCMD_Pan2022_S6_R2_001.fastq.gz +NGS265C_86_213909_DP_F_CMCMD_Pan2022_S6_R1_001.fastq.gz +NGS265C_94_216362_GN_M_CMCMD_Pan2022_S11_R2_001.fastq.gz +NGS265C_96_216127_DO_M_CMCMD_Pan2022_S12_R2_001.fastq.gz +NGS265C_90_214535_MI_M_CMCMD_Pan2022_S9_R1_001.fastq.gz +NGS265C_89_217353_JS_M_CMCMD_Pan2022_S8_R2_001.fastq.gz +NGS265C_88_217359_AA_M_CMCMD_Pan2022_S7_R2_001.fastq.gz From ad2882c895590997711f3c45cfc709758a01129c Mon Sep 17 00:00:00 2001 From: Nana Mensah Date: Thu, 30 May 2019 09:47:54 +0100 Subject: [PATCH 03/15] Add wscleaner package version 1 --- wscleaner/wscleaner/__init__.py | 0 wscleaner/wscleaner/auth.py | 83 +++++++++ wscleaner/wscleaner/lib.py | 222 +++++++++++++++++++++++++ wscleaner/wscleaner/main.py | 84 ++++++++++ wscleaner/wscleaner/mokaguys_logger.py | 41 +++++ 5 files changed, 430 insertions(+) create mode 100644 wscleaner/wscleaner/__init__.py create mode 100644 wscleaner/wscleaner/auth.py create mode 100644 wscleaner/wscleaner/lib.py create mode 100644 wscleaner/wscleaner/main.py create mode 100644 wscleaner/wscleaner/mokaguys_logger.py diff --git a/wscleaner/wscleaner/__init__.py b/wscleaner/wscleaner/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/wscleaner/wscleaner/auth.py b/wscleaner/wscleaner/auth.py new file mode 100644 index 0000000..54be2bb --- /dev/null +++ b/wscleaner/wscleaner/auth.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 +"""auth.py + +Utlily classes for the workstation cleaner module. + +Methods: + get_config(): Read the DNANexus API token from the application cache file + dx_set_auth(): Set the DNAnexus authentication token used in each instance of the application + +Objects: + SetKeyAction: Set the DNAnexus authentication token used in future instances of the application and exit + PrintKeyAction: Print the cached DNAnexus authentication key +""" + +from pathlib import Path +from pkg_resources import resource_filename +import json +import argparse +import dxpy + +import logging +logger = logging.getLogger(__name__) + +CONFIG_FILE = 'config.json' + +def get_config(config=CONFIG_FILE): + """Read the DNANexus API token from the application cache file + + Returns: + filename (object): A python file object + Raises: + AttirbuteError: Config file not found. + """ + # Return the file object containing the cached DNAnexus token if it exists + filename = resource_filename('wscleaner',config) + logger.debug(f'Config: {Path(filename).name}') + if Path(filename).is_file(): + return filename + else: + raise AttributeError('Config file not found. Set auth key with --set-key.') + +def dx_set_auth(auth_token=None): + """Set the DNAnexus authentication token used in future instances of the application and exit + + Args: + auth_token (str): A DNAnexus authentication key""" + if auth_token: + security_context = {'auth_token_type': 'Bearer', 'auth_token': auth_token} + else: + filename = get_config() + with open(filename, 'r') as f: + # Password is written to the cache as a dictionary. Loaded here using json module + pwd = json.load(f) + security_context = {'auth_token_type': 'Bearer', 'auth_token': pwd['auth_token']} + dxpy.set_security_context(security_context) + +class SetKeyAction(argparse.Action): + """Set the DNAnexus authentication key based on command line arguments and exit the program. + + Inherits from argparse.Action, which initiates __call__() when the linked argument is present. + """ + # Override argparse.Action.__call__() with desired behaviour + def __call__(self, parser, namespace, values, option_string=None, config=CONFIG_FILE): + filename = resource_filename('wscleaner',config) + with open(filename, 'w') as f: + # 'values' contains authentication token given on the command line. Store for future + # wscleaner calls to set as the DNAnexus dxpy security context. + json.dump({'auth_token': values}, f) + parser.exit() + + +class PrintKeyAction(argparse.Action): + """Print the cached DNAnexus authentication key + + Inherits from argparse.Action, which initiates __call__() when the linked argument is present. + """ + # Override argparse.Action.__call__() with desired behaviour + def __call__(self, parser, namespace, values, option_string=None): + filename = get_config() + with open(filename, 'r') as f: + pwd = json.load(f) + print(pwd) + parser.exit() diff --git a/wscleaner/wscleaner/lib.py b/wscleaner/wscleaner/lib.py new file mode 100644 index 0000000..3869edc --- /dev/null +++ b/wscleaner/wscleaner/lib.py @@ -0,0 +1,222 @@ +#!/usr/bin/env python3 +"""lib.py + +Utlily classes for the workstation cleaner module. + +Objects: + RunFolder: A local directory containing files with the 'fastq.gz' extension + DxProjectRunFolder: A DNAnexus project + RunFolderManager: Contains methods for finding, checking and deleting runfolders in a root directory. +""" + +from pathlib import Path +import dxpy +import shutil +import time + +from functools import partial + +import logging +logger = logging.getLogger(__name__) + +class RunFolder(): + """A local directory containing files with the 'fastq.gz' extension + + Arguments: + path (str): The path of a local directory + Attributes: + path (Pathlib.Path): A path object created from the input directory + name (str): The runfolder/directory name + dx_project (DxProjectRunfolder): A DX Project object + age (int): Age of the runfolder in days + Methods: + find_fastqs: Returns a list of local files with the 'fastq.gz' extension + """ + def __init__(self, path): + self.logger = logging.getLogger(__name__ + '.RunFolder') + self.path = Path(path) + self.name = self.path.name + self.logger.debug(f'Initiating RunFolder instance for {self.name}') + self.dx_project = DxProjectRunFolder(self.name) + + @property + def age(self): + """Returns runfolder age in days""" + age_in_seconds = time.time() - self.path.stat().st_mtime + age_in_days = age_in_seconds // (24 * 3600) + self.logger.debug(f'{self.name} age is {age_in_days}') + return age_in_days + + def find_fastqs(self, count=False): + """Returns a list or count of local files with the 'fastq.gz' extension + Args: + count(bool): Returns number of fastqs if True. + """ + # Find paths of files with fastq.gz extension + fastq_paths = self.path.rglob('*.fastq.gz') + # Sort fastq filenames for cleaner logfile outputs + fastq_filenames_unsorted = [ path.name for path in fastq_paths ] + fastq_filenames = sorted(fastq_filenames_unsorted) + # Return number of fastqs if count is True, otherwise return fastq file names + if count: + self.logger.debug(f'{self.name} contains {len(fastq_filenames)} fastq files') + return len(fastq_filenames) + else: + self.logger.debug(f'{self.name} contains {len(fastq_filenames)} fastq files: {fastq_filenames}') + return fastq_filenames + + +class DxProjectRunFolder(): + """A DNAnexus project. + + Arguments: + runfolder_name (str): The name of a local runfolder + Attributes: + id (str): A DNAnexus project ID. Project matches input runfolder based on business rules. + Methods: + find_fastqs: Returns a list of files in the DNAnexus project (self.id) with the fastq.gz extension + count_logfiles: Count logfiles in the DNAnexus project (self.id). Logfiles are in an expected location + """ + def __init__(self, runfolder_name): + self.logger = logging.getLogger(__name__ + '.DXProjectRunFolder') + self.runfolder = runfolder_name + self.id = self.__dx_find_one_project() + + def find_fastqs(self): + """Returns a list of files in the DNAnexus project (self.id) with the fastq.gz extension""" + fastq_regex = 'fastq.gz' + # Search dnanexus for files with the fastq.gz extension. + # name_mode='regexp' tells dxpy to look for any occurence of 'fastq.gz' in the filename + search_response = dxpy.find_data_objects( + project=self.id, classname='file', name='fastq.gz', name_mode='regexp' + ) + file_ids = [ result['id'] for result in search_response ] + + # Gather a list of uploaded fastq files with the state 'closed', indicating a completed upload. + fastq_filenames_unsorted = [] + for dx_file in file_ids: + file_description = dxpy.describe(dx_file) + if file_description['state'] == 'closed': + fastq_filenames_unsorted.append(file_description['name']) + # Sort fastq filenames for cleaner logfile output + fastq_filenames = sorted(fastq_filenames_unsorted) + self.logger.debug(f'{self.id} contains {len(fastq_filenames)} "closed" fastq files: {fastq_filenames}') + return fastq_filenames + + def count_logfiles(self): + """Count logfiles in the DNAnexus project (self.id). Logfiles are in an expected location. + Returns: + logfile_count (int): A count of logfiles""" + # Set uploaded runfolder name. Runfolder is renamed upon upload to the DNANexus project + # without the first four characters + uploaded_runfolder = dxpy.describe(self.id)['name'][4:] + # Set logfiles locatoin. This is expected in 'Logfiles/', a subdirectory of the uploaded runfolder + logfile_dir = str(Path('/',uploaded_runfolder,'Logfiles')) + logfile_list = dxpy.find_data_objects(project=self.id, folder=logfile_dir, classname='file') + logfile_count = len(list(logfile_list)) + return logfile_count + + def __dx_find_one_project(self): + """Find a single DNAnexus project from the input runfolder name + + Returns: + A DNAnexus project ID. If the search fails, returns None. + """ + try: + # Search for the project matching self.runfolder. + # name_mode='regexp' - look for any occurence of the runfolder name in the project name. + # Setting more_ok/zero_ok to False ensures only one project is succesfully returned. + project = dxpy.find_one_project(name=self.runfolder, name_mode='regexp', more_ok=False, zero_ok=False) + self.logger.debug(f'{self.runfolder} DNAnexus project: {project["id"]}') + return project['id'] + except dxpy.exceptions.DXSearchError: + # Catch exception and raise none + self.logger.debug(f'0 or >1 DNAnexus projects found for {self.runfolder}') + return None + + def __bool__(self): + """Boolean expressions on class instances will return True if a single DNAnexus project was found.""" + if self.id: + return True + else: + return False + +class RunFolderManager(): + """Contains methods for finding, checking and deleting runfolders in a root directory. + + Args: + directory (str): A parent directory containing runfolders to process + Attributes: + root(pathlib.Path): A path object to the root directory + deleted(List): A list of deleted runfolders populated by calls to self.delete() + Methods: + find_runfolders(): Search the parent directory for subdirectories containing fastq.gz files. + Returns wscleaner.lib.RunFolder objects. + check_fastqs(): Returns true if a runfolder's fastq.gz files match those in it's DNAnexus project. + check_logfiles(): Returns true if a runfolder's DNAnexus project contains 6 logfiles in the + expected location + delete(): Delete the local runfolder from the root directory and append name to self.deleted. + Raises: + __validate():ValueError: The directory passed to the class instance does not exist. + """ + def __init__(self, directory): + self.logger = logging.getLogger(__name__ + '.RunFolderManager') + self.__validate(directory) + self.root = Path(directory) + self.deleted = [] # Delete runfolders appended here by self.deleted + + def __validate(self, directory): + """Check that input directory exists. Log and raise error if otherwise.""" + try: + assert Path(directory).is_dir() + except AssertionError: + self.logger.error(f'Directory does not exist: {directory}', exc_info=True) + raise + + def find_runfolders(self, min_age=14): + """Search the parent directory for subdirectories containing fastq.gz files. + Args: + min_age(int): Minimum age in days of runfolders returned. + Returns: + runfolder_objects(list): List of wscleaner.lib.RunFolder objects. + """ + subdirectories = self.root.iterdir() + runfolder_objects = [] + for directory in subdirectories: + rf = RunFolder(directory) + # Criteria for runfolder: Older than or equal to min_age and containsn fastq.gz files + if (rf.age >= min_age) and (rf.find_fastqs(count=True) > 0): + self.logger.debug(f'{rf.name} IS RUNFOLDER.') + runfolder_objects.append(rf) + else: + self.logger.debug(f'{rf.name} IS NOT RUNFOLDER.') + return runfolder_objects + + def check_fastqs(self, runfolder): + """Returns true if a runfolder's fastq.gz files match those in it's DNAnexus project.""" + dx_fastqs = runfolder.dx_project.find_fastqs() + local_fastqs = runfolder.find_fastqs() + fastqs_equal = sorted(dx_fastqs) == sorted(local_fastqs) + # Fastq files with "Undetermined" in their name may not be uploaded to DNAnexus. + # We accept local runfolders with 2 more fastq files than their dna nexus project. + equal_without_undetermined_fastqs = ( + len(local_fastqs) == len(dx_fastqs) + 2 and + all([fastq in local_fastqs for fastq in dx_fastqs]) + ) + fastq_bool = fastqs_equal or equal_without_undetermined_fastqs + self.logger.debug(f'{runfolder.name} FASTQ BOOL: {fastq_bool}') + return fastq_bool + + def check_logfiles(self, runfolder): + """Returns true if a runfolder's DNAnexus project contains 6 logfiles in the + expected location""" + dx_logfiles = runfolder.dx_project.count_logfiles() + logfile_bool = dx_logfiles >= 6 + self.logger.debug(f'{runfolder.name} LOGFILE BOOL: {logfile_bool}') + return logfile_bool + + def delete(self, runfolder): + """Delete the local runfolder from the root directory and append name to self.deleted.""" + self.deleted.append(runfolder.name) + shutil.rmtree(runfolder.path) + self.logger.debug(f'{runfolder.name} DELETED.') diff --git a/wscleaner/wscleaner/main.py b/wscleaner/wscleaner/main.py new file mode 100644 index 0000000..fa0a971 --- /dev/null +++ b/wscleaner/wscleaner/main.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 +"""wscleaner + +Delete runfolders in a root directory on the condition that it has uploaded to DNA Nexus. + +Methods: + cli_parser(): Parses command line arguments + main(): Process input directory or API keys +""" + +import argparse +import logging +import pkg_resources +from wscleaner import mokaguys_logger +from wscleaner.auth import SetKeyAction, PrintKeyAction, dx_set_auth +from wscleaner.lib import RunFolder, RunFolderManager + + + +def cli_parser(): + """Parses command line arguments. + Args: None. The argparse.ArgumentParser auto-collects arguments from sys.args + Returns: Argument parser object with a 'root' attribute if root directory given. + Otherwise, --set-key and --print-key exit after actions are performed. + """ + parser = argparse.ArgumentParser() + # argparse API for adding custom routines. SetKeyAction and PrintKeyAction are classes with + # routines that exit the software after an action is performed. + parser.register('action', 'setkey', SetKeyAction) + parser.register('action', 'printkey', PrintKeyAction) + # Define CLI arguments + parser.add_argument('--set-key', action='setkey', help='Cache a DNA Nexus API key') + parser.add_argument('--print-key', nargs=0, action='printkey', help='Print the cached DNA Nexus API key') + parser.add_argument('root', help='A directory containing runfolders to process') + parser.add_argument('--logfile', help='A path for the application logfile', default='mokaguys_logger.log') + # Get version from setup.py as version CLI response + version_number = pkg_resources.require("wscleaner")[0].version + parser.add_argument('--version', help='Programe version', action='version', version=f"wscleaner v{version_number}") + args = parser.parse_args() + return args + +def main(): + # Parse CLI arguments. Some arguments will exit the program intentionally. See docstring for detail. + args = cli_parser() + + # Setup logging for module. Submodules inherit log handlers and filters + mokaguys_logger.log_setup(args.logfile) + logger = logging.getLogger(__name__) + logger.info(f'START') + + # Setup dxpy with cached authentication token + dx_set_auth() + + # Set root directory and search it for runfolders + RFM = RunFolderManager(args.root) + logger.info(f'Root directory {args.root}') + local_runfolders = RFM.find_runfolders(min_age=0) + logger.debug(f'Found local runfolders: {[rf.name for rf in local_runfolders]}') + + for runfolder in local_runfolders: + logger.info(f'Processing {runfolder.name}') + # Delete runfolder if it meets the backup criteria + # runfolder.dx_project is evaluated first as following criteria checks depend on it + if runfolder.dx_project: + fastqs_uploaded = RFM.check_fastqs(runfolder) + logfiles_uploaded = RFM.check_logfiles(runfolder) + if fastqs_uploaded and logfiles_uploaded: + RFM.delete(runfolder) + logger.info(f'{runfolder.name} - DELETED') + elif not fastqs_uploaded: + logger.warning(f'{runfolder.name} - FASTQ MISMATCH') + elif not logfiles_uploaded: + logger.warning(f'{runfolder.name} - LOGFILE MISMATCH') + else: + logger.warning(f'{runfolder.name} - DX PROJECT MISMATCH') + + # Record runfolders removed by this iteration + logger.info(f'Runfolders deleted in this instance: {RFM.deleted}') + logger.info(f'END') + # END + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/wscleaner/wscleaner/mokaguys_logger.py b/wscleaner/wscleaner/mokaguys_logger.py new file mode 100644 index 0000000..83f605b --- /dev/null +++ b/wscleaner/wscleaner/mokaguys_logger.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 +"""mokaguys_logger.py + +Log messages using the python standard library logging module. + +Version: 1.0 +Timestamp: 30/05/19 +""" + +import logging +from logging.config import dictConfig + +def log_setup(logfile_name, syslog='/dev/log'): + """Setup application logging using python's standard library logging module + + Args: + logfile_name(str): The name of the output logfile written to by the file handler + syslog(str): Output target for the system log handler + """ + logging_config = dict( + version=1.0, + formatters={'log_formatter': {'format': "{asctime} {name}.{module}: {levelname} - {message}", + 'style': '{', 'datefmt': r'%Y-%m-%d %H:%M:%S'}}, + handlers={ + # DEBUG message are ommitted from the console output by setting the stream handler level + # to INFO, making console outputs easier to read. DEBUG messages are still written to + # the application logfile and system log. + 'stream_handler': {'class': 'logging.StreamHandler', 'formatter': 'log_formatter', 'level': logging.INFO}, + 'file_handler': {'class': 'logging.FileHandler', 'formatter': 'log_formatter', 'level': logging.DEBUG, + 'filename': logfile_name}, + 'syslog_handler': {'class': 'logging.handlers.SysLogHandler', 'formatter': 'log_formatter', 'level': logging.DEBUG, + 'address': syslog}}, + root={'handlers': ['file_handler', 'stream_handler', 'syslog_handler'], 'level': logging.DEBUG} + ) + dictConfig(logging_config) + + +if __name__ == '__main__': + log_setup() + log = logging.getLogger('TEST') + log.info('TEST') From 0c0d3e401d6162a42cbbeff738436c18d34b5171 Mon Sep 17 00:00:00 2001 From: Nana Mensah Date: Thu, 30 May 2019 09:54:43 +0100 Subject: [PATCH 04/15] Add wscleaner 1.0 usage to readme --- README.md | 37 +++++++++++++++++++++++++++++++++---- wscleaner/README.md | 7 ++++--- 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index babc175..b76a4d3 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,25 @@ # Workstation Housekeeping v1.4 + Scripts to manage data on the NGS workstation --- ## backup_runfolder.py + Uploads an Illumina runfolder to DNANexus. -### Quickstart -``` - usage: backup_runfolder.py [-h] -i RUNFOLDER [-a AUTH_TOKEN] [--ignore IGNORE] [-p PROJECT] [--logpath LOGPATH] +### Usage + +```bash +backup_runfolder.py [-h] -i RUNFOLDER [-a AUTH_TOKEN] [--ignore IGNORE] [-p PROJECT] [--logpath LOGPATH] ``` ### What are the dependencies for this script? + This tool requires the DNAnexus utilities `ua` (upload agent) and `dx` (DNAnexus toolkit) to be available in the system PATH. Python3 is required, and this tool uses packages from the standard library. ### How does this tool work? + * The script parses the input parameters, asserting that the given runfolder exists. * If the `-p` option is given, the script attempts to find a matching DNAnexus project. Otherwise, it looks for a single project matching the runfolder name. If more or less than 1 project matches, the script logs an error and exits. * The runfolder is traversed and a list of files in each folder is obtained. If any comma-separated strings passed to the `--ignore` argument are present within the filepath, or filename the file is excluded. @@ -26,14 +31,38 @@ This tool requires the DNAnexus utilities `ua` (upload agent) and `dx` (DNAnexus * (If relevant) A count of files in the DNA Nexus project containing a pattern to be ignored. NB this may not be accurate if the ignore term is found in the result of dx find data (eg present in project name) * Logs from this and the script are written to a logfile, named after the runfolder. A destination for this file can be passed to the `--logpath` flag. +--- ## findfastqs.sh + Report the number of gzipped fastq files in an Illumina runfolder. ### Usage -``` + +```bash $ findfastqs.sh RUNFOLDER >>> RUNFOLDER has 156 demultiplexed fastq files with 2 undetermined. Total: 158 ``` --- + +## Workstation Cleaner (wscleaner) + +Delete local directories that have been uploaded to the DNAnexus cloud storage service. + +### Install + +```bash +git clone https://github.com/moka-guys/workstation_housekeeping.git +pip install workstation_housekeeping/wscleaner +wscleaner --version # Print version number +``` + +### Usage + +```bash +wscleaner --set-key DNA_NEXUS_KEY # Cache dnanexus api key +wscleaner ROOT_DIRECTORY --logfile LOGFILE_PATH +``` + +--- \ No newline at end of file diff --git a/wscleaner/README.md b/wscleaner/README.md index af4641f..d470659 100644 --- a/wscleaner/README.md +++ b/wscleaner/README.md @@ -13,14 +13,15 @@ A DNAnexus API key must be cached locally using the `--set-key` option. ## Install ```bash -git clone https://github.com/moka-guys/wscleaner.git -pip install ./wscleaner +git clone https://github.com/moka-guys/workstation_housekeeping.git +pip install workstation_housekeeping/wscleaner +wscleaner --version # Print version number ``` ## Usage ```bash -wscleaner --set-key DNA_NEXUS_KEY # Localyl caches dnanexus api key +wscleaner --set-key DNA_NEXUS_KEY # Cache dnanexus api key wscleaner ROOT_DIRECTORY --logfile LOGFILE_PATH ``` From cbd93c34c4ace25d2c2bee3cfa182f699ea2f6f2 Mon Sep 17 00:00:00 2001 From: Nana Mensah Date: Thu, 30 May 2019 10:01:25 +0100 Subject: [PATCH 05/15] Remove test data. Update gitignore --- .gitignore | 3 +- wscleaner/test/test_dir_1_fastqs.txt | 54 ---------------------------- wscleaner/test/test_dir_2_fastqs.txt | 24 ------------- 3 files changed, 2 insertions(+), 79 deletions(-) delete mode 100644 wscleaner/test/test_dir_1_fastqs.txt delete mode 100644 wscleaner/test/test_dir_2_fastqs.txt diff --git a/.gitignore b/.gitignore index d560825..2bc7e99 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ *.pyc *.egg-info -config.json \ No newline at end of file +wscleaner/wscleaner/config.json +wscleaner/test/test_dir*.txt \ No newline at end of file diff --git a/wscleaner/test/test_dir_1_fastqs.txt b/wscleaner/test/test_dir_1_fastqs.txt deleted file mode 100644 index a813f35..0000000 --- a/wscleaner/test/test_dir_1_fastqs.txt +++ /dev/null @@ -1,54 +0,0 @@ -NGS265B_77_217354_KB_M_IMDv2GSD_Pan1063_S24_R2_001.fastq.gz -NGS265B_74_215597_MA_U_NGSEQ1FH_Pan1965_S22_R1_001.fastq.gz -NGS265B_79_216375_MK_F_IMDv2GSD_Pan1063_S26_R1_001.fastq.gz -NGS265B_57_216008_RH_F_NGSEQ1ALP_Pan1965_S5_R2_001.fastq.gz -NGS265B_63_215584_AM_F_NGSEQ1FH_Pan1965_S11_R2_001.fastq.gz -NGS265B_65_215862_FA_F_NGSEQ1FH_Pan1965_S13_R2_001.fastq.gz -NGS265B_78_216530_JD_M_IMDv2GSD_Pan1063_S25_R1_001.fastq.gz -NGS265B_61_215368_NW_F_NGSEQ1Cys_Pan1965_S9_R1_001.fastq.gz -NGS265B_57_216008_RH_F_NGSEQ1ALP_Pan1965_S5_R1_001.fastq.gz -NGS265B_56_215743_KC_F_NGSEQ1ALP_Pan1965_S4_R2_001.fastq.gz -NGS265B_67_215593_AB_M_NGSEQ1FH_Pan1965_S15_R2_001.fastq.gz -NGS265B_56_215743_KC_F_NGSEQ1ALP_Pan1965_S4_R1_001.fastq.gz -NGS265B_70_215922_AF_M_NGSEQ1FH_Pan1965_S18_R2_001.fastq.gz -NGS265B_55_215661_EW_F_NGSEQ1ALP_Pan1965_S3_R1_001.fastq.gz -NGS265B_66_215583_HA_M_NGSEQ1FH_Pan1965_S14_R1_001.fastq.gz -NGS265B_72_215598_FA_U_NGSEQ1FH_Pan1965_S20_R2_001.fastq.gz -NGS265B_78_216530_JD_M_IMDv2GSD_Pan1063_S25_R2_001.fastq.gz -NGS265B_67_215593_AB_M_NGSEQ1FH_Pan1965_S15_R1_001.fastq.gz -NGS265B_64_215921_BM_F_NGSEQ1FH_Pan1965_S12_R2_001.fastq.gz -NGS265B_75_216919_DD_M_IMDv2GSD_Pan1063_S23_R2_001.fastq.gz -NGS265B_74_215597_MA_U_NGSEQ1FH_Pan1965_S22_R2_001.fastq.gz -NGS265B_62_215592_HA_F_NGSEQ1FH_Pan1965_S10_R1_001.fastq.gz -NGS265B_69_215594_MA_M_NGSEQ1FH_Pan1965_S17_R1_001.fastq.gz -NGS265B_58_213847_TS_M_NGSEQ1ALP_Pan1965_S6_R1_001.fastq.gz -NGS265B_63_215584_AM_F_NGSEQ1FH_Pan1965_S11_R1_001.fastq.gz -NGS265B_55_215661_EW_F_NGSEQ1ALP_Pan1965_S3_R2_001.fastq.gz -NGS265B_54_215749_VT_F_NGSEQ1ALP_Pan1965_S2_R1_001.fastq.gz -NGS265B_80_215482_AK_F_IMDv2UCD_Pan1063_S27_R2_001.fastq.gz -NGS265B_71_215696_WS_M_NGSEQ1FH_Pan1965_S19_R1_001.fastq.gz -NGS265B_61_215368_NW_F_NGSEQ1Cys_Pan1965_S9_R2_001.fastq.gz -NGS265B_69_215594_MA_M_NGSEQ1FH_Pan1965_S17_R2_001.fastq.gz -NGS265B_68_215591_SA_M_NGSEQ1FH_Pan1965_S16_R2_001.fastq.gz -NGS265B_53_215680_LW_F_NGSEQ1ALP_Pan1965_S1_R2_001.fastq.gz -NGS265B_75_216919_DD_M_IMDv2GSD_Pan1063_S23_R1_001.fastq.gz -NGS265B_79_216375_MK_F_IMDv2GSD_Pan1063_S26_R2_001.fastq.gz -NGS265B_73_215590_SA_U_NGSEQ1FH_Pan1965_S21_R1_001.fastq.gz -NGS265B_72_215598_FA_U_NGSEQ1FH_Pan1965_S20_R1_001.fastq.gz -NGS265B_77_217354_KB_M_IMDv2GSD_Pan1063_S24_R1_001.fastq.gz -NGS265B_80_215482_AK_F_IMDv2UCD_Pan1063_S27_R1_001.fastq.gz -NGS265B_60_215663_HA_M_NGSEQ1ALP_Pan1965_S8_R2_001.fastq.gz -NGS265B_60_215663_HA_M_NGSEQ1ALP_Pan1965_S8_R1_001.fastq.gz -NGS265B_73_215590_SA_U_NGSEQ1FH_Pan1965_S21_R2_001.fastq.gz -NGS265B_54_215749_VT_F_NGSEQ1ALP_Pan1965_S2_R2_001.fastq.gz -NGS265B_65_215862_FA_F_NGSEQ1FH_Pan1965_S13_R1_001.fastq.gz -NGS265B_71_215696_WS_M_NGSEQ1FH_Pan1965_S19_R2_001.fastq.gz -NGS265B_66_215583_HA_M_NGSEQ1FH_Pan1965_S14_R2_001.fastq.gz -NGS265B_53_215680_LW_F_NGSEQ1ALP_Pan1965_S1_R1_001.fastq.gz -NGS265B_64_215921_BM_F_NGSEQ1FH_Pan1965_S12_R1_001.fastq.gz -NGS265B_58_213847_TS_M_NGSEQ1ALP_Pan1965_S6_R2_001.fastq.gz -NGS265B_70_215922_AF_M_NGSEQ1FH_Pan1965_S18_R1_001.fastq.gz -NGS265B_59_216014_HB_M_NGSEQ1ALP_Pan1965_S7_R1_001.fastq.gz -NGS265B_68_215591_SA_M_NGSEQ1FH_Pan1965_S16_R1_001.fastq.gz -NGS265B_62_215592_HA_F_NGSEQ1FH_Pan1965_S10_R2_001.fastq.gz -NGS265B_59_216014_HB_M_NGSEQ1ALP_Pan1965_S7_R2_001.fastq.gz diff --git a/wscleaner/test/test_dir_2_fastqs.txt b/wscleaner/test/test_dir_2_fastqs.txt deleted file mode 100644 index e960fb7..0000000 --- a/wscleaner/test/test_dir_2_fastqs.txt +++ /dev/null @@ -1,24 +0,0 @@ -NGS265C_85_216424_SA_F_CMCMD_Pan2022_S5_R2_001.fastq.gz -NGS265C_83_217371_KL_F_CMCMD_Pan2022_S4_R1_001.fastq.gz -NGS265C_81_216659_EB_F_CMCMD_Pan2022_S2_R2_001.fastq.gz -NGS265C_83_217371_KL_F_CMCMD_Pan2022_S4_R2_001.fastq.gz -NGS265C_94_216362_GN_M_CMCMD_Pan2022_S11_R1_001.fastq.gz -NGS265C_90_214535_MI_M_CMCMD_Pan2022_S9_R2_001.fastq.gz -NGS265C_82_216242_IN_F_CMCMD_Pan2022_S3_R2_001.fastq.gz -NGS265C_89_217353_JS_M_CMCMD_Pan2022_S8_R1_001.fastq.gz -NGS265C_88_217359_AA_M_CMCMD_Pan2022_S7_R1_001.fastq.gz -NGS265C_91_217055_IL_M_CMCMD_Pan2022_S10_R1_001.fastq.gz -NGS265C_91_217055_IL_M_CMCMD_Pan2022_S10_R2_001.fastq.gz -NGS265C_85_216424_SA_F_CMCMD_Pan2022_S5_R1_001.fastq.gz -NGS265C_39_214774_AH_M_NGSEQ2DMD_Pan1158_S1_R1_001.fastq.gz -NGS265C_39_214774_AH_M_NGSEQ2DMD_Pan1158_S1_R2_001.fastq.gz -NGS265C_96_216127_DO_M_CMCMD_Pan2022_S12_R1_001.fastq.gz -NGS265C_81_216659_EB_F_CMCMD_Pan2022_S2_R1_001.fastq.gz -NGS265C_82_216242_IN_F_CMCMD_Pan2022_S3_R1_001.fastq.gz -NGS265C_86_213909_DP_F_CMCMD_Pan2022_S6_R2_001.fastq.gz -NGS265C_86_213909_DP_F_CMCMD_Pan2022_S6_R1_001.fastq.gz -NGS265C_94_216362_GN_M_CMCMD_Pan2022_S11_R2_001.fastq.gz -NGS265C_96_216127_DO_M_CMCMD_Pan2022_S12_R2_001.fastq.gz -NGS265C_90_214535_MI_M_CMCMD_Pan2022_S9_R1_001.fastq.gz -NGS265C_89_217353_JS_M_CMCMD_Pan2022_S8_R2_001.fastq.gz -NGS265C_88_217359_AA_M_CMCMD_Pan2022_S7_R2_001.fastq.gz From f21b2f56709d972363296bf4c8be7b1287017d67 Mon Sep 17 00:00:00 2001 From: Nana Mensah Date: Sun, 2 Jun 2019 18:52:02 +0100 Subject: [PATCH 06/15] Bump version number in README. Fix module docstring typo in lib.py --- README.md | 2 +- wscleaner/wscleaner/lib.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b76a4d3..a177f44 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Workstation Housekeeping v1.4 +# Workstation Housekeeping v1.5 Scripts to manage data on the NGS workstation diff --git a/wscleaner/wscleaner/lib.py b/wscleaner/wscleaner/lib.py index 3869edc..9da732d 100644 --- a/wscleaner/wscleaner/lib.py +++ b/wscleaner/wscleaner/lib.py @@ -3,7 +3,7 @@ Utlily classes for the workstation cleaner module. -Objects: +Classes: RunFolder: A local directory containing files with the 'fastq.gz' extension DxProjectRunFolder: A DNAnexus project RunFolderManager: Contains methods for finding, checking and deleting runfolders in a root directory. From 33298a28e17b7a13ae8e7adc1bf322d82873594f Mon Sep 17 00:00:00 2001 From: Nana Mensah Date: Sun, 2 Jun 2019 18:52:24 +0100 Subject: [PATCH 07/15] Fix module docstring typo in auth --- wscleaner/wscleaner/auth.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wscleaner/wscleaner/auth.py b/wscleaner/wscleaner/auth.py index 54be2bb..f823574 100644 --- a/wscleaner/wscleaner/auth.py +++ b/wscleaner/wscleaner/auth.py @@ -7,7 +7,7 @@ get_config(): Read the DNANexus API token from the application cache file dx_set_auth(): Set the DNAnexus authentication token used in each instance of the application -Objects: +Classes: SetKeyAction: Set the DNAnexus authentication token used in future instances of the application and exit PrintKeyAction: Print the cached DNAnexus authentication key """ From 6d00cc36150895f060091aa987ea4b3282356298 Mon Sep 17 00:00:00 2001 From: Nana Mensah Date: Sun, 9 Jun 2019 21:44:21 +0100 Subject: [PATCH 08/15] Add dry run command line flag and test. Fix typo. --- .gitignore | 3 ++- wscleaner/test/test_all.py | 16 ++++++++++++++-- wscleaner/wscleaner/lib.py | 14 ++++++++++---- wscleaner/wscleaner/main.py | 7 ++++--- 4 files changed, 30 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index 2bc7e99..cf1f3f1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ *.pyc *.egg-info wscleaner/wscleaner/config.json -wscleaner/test/test_dir*.txt \ No newline at end of file +wscleaner/test/test_dir*.txt +wscleaner/test/data \ No newline at end of file diff --git a/wscleaner/test/test_all.py b/wscleaner/test/test_all.py index 27106cd..5d44940 100644 --- a/wscleaner/test/test_all.py +++ b/wscleaner/test/test_all.py @@ -7,7 +7,7 @@ import shutil from pkg_resources import resource_filename -from wscleaner.auth import SetKeyAction, dx_set_auth +from wscleaner.auth import SetKeyAction, dx_set_auth, CONFIG_FILE from wscleaner.main import cli_parser from wscleaner.lib import RunFolderManager, RunFolder @@ -30,6 +30,12 @@ def rfm(): rfm = RunFolderManager(str(test_path)) return rfm +@pytest.fixture +def rfm_dry(): + """Return an instance of the runfolder manager with the test/data directory""" + test_path = Path(str(Path(__file__).parent), 'data') + rfm_dry = RunFolderManager(str(test_path), dry_run=True) + return rfm_dry # TESTS class TestAuth: @@ -47,7 +53,7 @@ def test_setkey(self, monkeypatch, auth_token): with pytest.raises(SystemExit) as err: args = cli_parser() # Make assertions on created config file - fn = resource_filename('wscleaner','config.json') + fn = resource_filename('wscleaner',CONFIG_FILE) with open(fn, 'r') as f: assert auth_token in f.read() # Delete temp config @@ -95,3 +101,9 @@ def test_delete(self, monkeypatch, rfm): monkeypatch.setattr(shutil, 'rmtree', lambda x: 'TEST_DELETED') rfm.delete(test_folder) assert test_folder.name in rfm.deleted + + def test_dry_run(self, rfm_dry): + """test that the dry_run option does not cause the test directory to be deleted""" + test_folder = rfm_dry.find_runfolders(min_age=0)[0] + rfm_dry.delete(test_folder) + assert test_folder.name not in rfm_dry.deleted diff --git a/wscleaner/wscleaner/lib.py b/wscleaner/wscleaner/lib.py index 9da732d..b30334f 100644 --- a/wscleaner/wscleaner/lib.py +++ b/wscleaner/wscleaner/lib.py @@ -146,6 +146,7 @@ class RunFolderManager(): Args: directory (str): A parent directory containing runfolders to process + dry_run (bool): Do not delete directories Attributes: root(pathlib.Path): A path object to the root directory deleted(List): A list of deleted runfolders populated by calls to self.delete() @@ -159,10 +160,11 @@ class RunFolderManager(): Raises: __validate():ValueError: The directory passed to the class instance does not exist. """ - def __init__(self, directory): + def __init__(self, directory, dry_run=False): self.logger = logging.getLogger(__name__ + '.RunFolderManager') self.__validate(directory) self.root = Path(directory) + self.__dry_run = dry_run self.deleted = [] # Delete runfolders appended here by self.deleted def __validate(self, directory): @@ -217,6 +219,10 @@ def check_logfiles(self, runfolder): def delete(self, runfolder): """Delete the local runfolder from the root directory and append name to self.deleted.""" - self.deleted.append(runfolder.name) - shutil.rmtree(runfolder.path) - self.logger.debug(f'{runfolder.name} DELETED.') + print(self.__dry_run) + if self.__dry_run: + self.logger.info(f'DRY RUN DELETE {runfolder.name}') + else: + self.deleted.append(runfolder.name) + shutil.rmtree(runfolder.path) + self.logger.info(f'{runfolder.name} DELETED.') diff --git a/wscleaner/wscleaner/main.py b/wscleaner/wscleaner/main.py index fa0a971..af2110a 100644 --- a/wscleaner/wscleaner/main.py +++ b/wscleaner/wscleaner/main.py @@ -31,11 +31,12 @@ def cli_parser(): # Define CLI arguments parser.add_argument('--set-key', action='setkey', help='Cache a DNA Nexus API key') parser.add_argument('--print-key', nargs=0, action='printkey', help='Print the cached DNA Nexus API key') + parser.add_argument('--dry-run', help='Perform a dry run without deleting files', action='store_true', default=False) parser.add_argument('root', help='A directory containing runfolders to process') parser.add_argument('--logfile', help='A path for the application logfile', default='mokaguys_logger.log') # Get version from setup.py as version CLI response version_number = pkg_resources.require("wscleaner")[0].version - parser.add_argument('--version', help='Programe version', action='version', version=f"wscleaner v{version_number}") + parser.add_argument('--version', help='Print version', action='version', version=f"wscleaner v{version_number}") args = parser.parse_args() return args @@ -52,7 +53,8 @@ def main(): dx_set_auth() # Set root directory and search it for runfolders - RFM = RunFolderManager(args.root) + # If dry-run CLI flag is given, no directories are deleted by the runfolder manager. + RFM = RunFolderManager(args.root, dry_run=args.dry_run) logger.info(f'Root directory {args.root}') local_runfolders = RFM.find_runfolders(min_age=0) logger.debug(f'Found local runfolders: {[rf.name for rf in local_runfolders]}') @@ -66,7 +68,6 @@ def main(): logfiles_uploaded = RFM.check_logfiles(runfolder) if fastqs_uploaded and logfiles_uploaded: RFM.delete(runfolder) - logger.info(f'{runfolder.name} - DELETED') elif not fastqs_uploaded: logger.warning(f'{runfolder.name} - FASTQ MISMATCH') elif not logfiles_uploaded: From 83f3de33648f2401ee7b2d42328f51c8b551a465 Mon Sep 17 00:00:00 2001 From: Nana Mensah Date: Thu, 24 Oct 2019 10:32:02 +0100 Subject: [PATCH 09/15] Update None comparison in test_all.py --- wscleaner/test/test_all.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wscleaner/test/test_all.py b/wscleaner/test/test_all.py index 5d44940..38a7495 100644 --- a/wscleaner/test/test_all.py +++ b/wscleaner/test/test_all.py @@ -14,7 +14,7 @@ # AUTH: Set DNAnexus authentication for tests def test_auth(auth_token): """Test that an authentication token is passed to pytest as a command line argument""" - assert auth_token != None + assert auth_token is not None @pytest.fixture(autouse=True) def set_auth(auth_token): From 47aa777b7cb2ec2dbf636c34fcf35852f0e20d0f Mon Sep 17 00:00:00 2001 From: Nana Mensah Date: Thu, 24 Oct 2019 10:33:39 +0100 Subject: [PATCH 10/15] Cleanup import statements --- wscleaner/wscleaner/lib.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/wscleaner/wscleaner/lib.py b/wscleaner/wscleaner/lib.py index b30334f..c494c8c 100644 --- a/wscleaner/wscleaner/lib.py +++ b/wscleaner/wscleaner/lib.py @@ -8,15 +8,14 @@ DxProjectRunFolder: A DNAnexus project RunFolderManager: Contains methods for finding, checking and deleting runfolders in a root directory. """ - -from pathlib import Path -import dxpy +import logging import shutil import time +from pathlib import Path + +import dxpy -from functools import partial -import logging logger = logging.getLogger(__name__) class RunFolder(): From ff6814225773285b152ee03d0ba29c571930fcf7 Mon Sep 17 00:00:00 2001 From: Nana Mensah Date: Thu, 24 Oct 2019 11:23:57 +0100 Subject: [PATCH 11/15] Clarify attributes in DxProjectRunFolder --- wscleaner/wscleaner/lib.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wscleaner/wscleaner/lib.py b/wscleaner/wscleaner/lib.py index c494c8c..13b8ce0 100644 --- a/wscleaner/wscleaner/lib.py +++ b/wscleaner/wscleaner/lib.py @@ -71,7 +71,8 @@ class DxProjectRunFolder(): Arguments: runfolder_name (str): The name of a local runfolder Attributes: - id (str): A DNAnexus project ID. Project matches input runfolder based on business rules. + runfolder (str): Runfolder name + id (str): Project ID of the matching runfolder project in DNANexus. Methods: find_fastqs: Returns a list of files in the DNAnexus project (self.id) with the fastq.gz extension count_logfiles: Count logfiles in the DNAnexus project (self.id). Logfiles are in an expected location From fd55920690e83d67ba745f9fb483c88a6751e629 Mon Sep 17 00:00:00 2001 From: Nana Mensah Date: Thu, 24 Oct 2019 11:24:33 +0100 Subject: [PATCH 12/15] Remove redundant variable from find_fastqs --- wscleaner/wscleaner/lib.py | 1 - 1 file changed, 1 deletion(-) diff --git a/wscleaner/wscleaner/lib.py b/wscleaner/wscleaner/lib.py index 13b8ce0..a2caee6 100644 --- a/wscleaner/wscleaner/lib.py +++ b/wscleaner/wscleaner/lib.py @@ -84,7 +84,6 @@ def __init__(self, runfolder_name): def find_fastqs(self): """Returns a list of files in the DNAnexus project (self.id) with the fastq.gz extension""" - fastq_regex = 'fastq.gz' # Search dnanexus for files with the fastq.gz extension. # name_mode='regexp' tells dxpy to look for any occurence of 'fastq.gz' in the filename search_response = dxpy.find_data_objects( From b1b1f8ddb7fc9588791c2cd7862133bd42d5a407 Mon Sep 17 00:00:00 2001 From: Nana Mensah Date: Thu, 24 Oct 2019 11:37:25 +0100 Subject: [PATCH 13/15] Refactor runfoldermanager.check_fastqs. Various comment updates. --- wscleaner/wscleaner/lib.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/wscleaner/wscleaner/lib.py b/wscleaner/wscleaner/lib.py index a2caee6..72c8804 100644 --- a/wscleaner/wscleaner/lib.py +++ b/wscleaner/wscleaner/lib.py @@ -109,7 +109,7 @@ def count_logfiles(self): # Set uploaded runfolder name. Runfolder is renamed upon upload to the DNANexus project # without the first four characters uploaded_runfolder = dxpy.describe(self.id)['name'][4:] - # Set logfiles locatoin. This is expected in 'Logfiles/', a subdirectory of the uploaded runfolder + # Set logfile location in DNANexus project. This is expected in 'Logfiles/', a subdirectory of the uploaded runfolder logfile_dir = str(Path('/',uploaded_runfolder,'Logfiles')) logfile_list = dxpy.find_data_objects(project=self.id, folder=logfile_dir, classname='file') logfile_count = len(list(logfile_list)) @@ -134,7 +134,7 @@ def __dx_find_one_project(self): return None def __bool__(self): - """Boolean expressions on class instances will return True if a single DNAnexus project was found.""" + """Allows boolean expressions on class instances which return True if a single DNAnexus project was found.""" if self.id: return True else: @@ -185,7 +185,7 @@ def find_runfolders(self, min_age=14): runfolder_objects = [] for directory in subdirectories: rf = RunFolder(directory) - # Criteria for runfolder: Older than or equal to min_age and containsn fastq.gz files + # Criteria for runfolder: Older than or equal to min_age and contains fastq.gz files if (rf.age >= min_age) and (rf.find_fastqs(count=True) > 0): self.logger.debug(f'{rf.name} IS RUNFOLDER.') runfolder_objects.append(rf) @@ -197,14 +197,7 @@ def check_fastqs(self, runfolder): """Returns true if a runfolder's fastq.gz files match those in it's DNAnexus project.""" dx_fastqs = runfolder.dx_project.find_fastqs() local_fastqs = runfolder.find_fastqs() - fastqs_equal = sorted(dx_fastqs) == sorted(local_fastqs) - # Fastq files with "Undetermined" in their name may not be uploaded to DNAnexus. - # We accept local runfolders with 2 more fastq files than their dna nexus project. - equal_without_undetermined_fastqs = ( - len(local_fastqs) == len(dx_fastqs) + 2 and - all([fastq in local_fastqs for fastq in dx_fastqs]) - ) - fastq_bool = fastqs_equal or equal_without_undetermined_fastqs + fastq_bool = all([fastq in dx_fastqs for fastq in local_fastqs]) self.logger.debug(f'{runfolder.name} FASTQ BOOL: {fastq_bool}') return fastq_bool From 54712f54a2eff307a3b08958d4921bf5ff7dbcc0 Mon Sep 17 00:00:00 2001 From: Nana Mensah Date: Thu, 24 Oct 2019 11:48:01 +0100 Subject: [PATCH 14/15] Fix test_min_age --- wscleaner/test/test_all.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wscleaner/test/test_all.py b/wscleaner/test/test_all.py index 38a7495..fcc7f2a 100644 --- a/wscleaner/test/test_all.py +++ b/wscleaner/test/test_all.py @@ -76,9 +76,9 @@ def test_find_fastqs(self, data_test_runfolders): def test_min_age(self, rfm): """test that the runfolder age function records age""" - runfolders = rfm.find_runfolders(min_age=0) + runfolders = rfm.find_runfolders(min_age=10) # Asser that this test runfolder was recently generated - assert all([ rf.age < 14 for rf in runfolders ]) + assert all([ rf.age > 10 for rf in runfolders ]) class TestRFM: def test_find_runfolders(self, data_test_runfolders, rfm): From 38f37be611a6fd65622d3a9d694a74e221fad90e Mon Sep 17 00:00:00 2001 From: Nana Mensah Date: Mon, 28 Oct 2019 10:59:35 +0000 Subject: [PATCH 15/15] Add minumum age option and set default age to 14. Add usage to readme. --- wscleaner/README.md | 24 ++++++++++++++++++++++-- wscleaner/wscleaner/lib.py | 3 +-- wscleaner/wscleaner/main.py | 3 ++- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/wscleaner/README.md b/wscleaner/README.md index d470659..515e4cb 100644 --- a/wscleaner/README.md +++ b/wscleaner/README.md @@ -18,11 +18,31 @@ pip install workstation_housekeeping/wscleaner wscleaner --version # Print version number ``` -## Usage +## Quickstart ```bash wscleaner --set-key DNA_NEXUS_KEY # Cache dnanexus api key -wscleaner ROOT_DIRECTORY --logfile LOGFILE_PATH +wscleaner ROOT_DIRECTORY +``` + +## Usage + +``` +wscleaner [-h] [--set-key SET_KEY] [--print-key] [--dry-run] + [--logfile LOGFILE] [--min-age MIN_AGE] [--version] + root + +positional arguments: + root A directory containing runfolders to process + +optional arguments: + -h, --help show this help message and exit + --set-key SET_KEY Cache a DNA Nexus API key + --print-key Print the cached DNA Nexus API key + --dry-run Perform a dry run without deleting files + --logfile LOGFILE A path for the application logfile + --min-age MIN_AGE The age (days) a runfolder must be to be deleted + --version Print version ``` ## Test diff --git a/wscleaner/wscleaner/lib.py b/wscleaner/wscleaner/lib.py index 72c8804..7fc1040 100644 --- a/wscleaner/wscleaner/lib.py +++ b/wscleaner/wscleaner/lib.py @@ -174,7 +174,7 @@ def __validate(self, directory): self.logger.error(f'Directory does not exist: {directory}', exc_info=True) raise - def find_runfolders(self, min_age=14): + def find_runfolders(self, min_age=None): """Search the parent directory for subdirectories containing fastq.gz files. Args: min_age(int): Minimum age in days of runfolders returned. @@ -211,7 +211,6 @@ def check_logfiles(self, runfolder): def delete(self, runfolder): """Delete the local runfolder from the root directory and append name to self.deleted.""" - print(self.__dry_run) if self.__dry_run: self.logger.info(f'DRY RUN DELETE {runfolder.name}') else: diff --git a/wscleaner/wscleaner/main.py b/wscleaner/wscleaner/main.py index af2110a..cf8a51e 100644 --- a/wscleaner/wscleaner/main.py +++ b/wscleaner/wscleaner/main.py @@ -34,6 +34,7 @@ def cli_parser(): parser.add_argument('--dry-run', help='Perform a dry run without deleting files', action='store_true', default=False) parser.add_argument('root', help='A directory containing runfolders to process') parser.add_argument('--logfile', help='A path for the application logfile', default='mokaguys_logger.log') + parser.add_argument('--min-age', help='The age (days) a runfolder must be to be deleted', type=int, default=14) # Get version from setup.py as version CLI response version_number = pkg_resources.require("wscleaner")[0].version parser.add_argument('--version', help='Print version', action='version', version=f"wscleaner v{version_number}") @@ -56,7 +57,7 @@ def main(): # If dry-run CLI flag is given, no directories are deleted by the runfolder manager. RFM = RunFolderManager(args.root, dry_run=args.dry_run) logger.info(f'Root directory {args.root}') - local_runfolders = RFM.find_runfolders(min_age=0) + local_runfolders = RFM.find_runfolders(min_age=args.min_age) logger.debug(f'Found local runfolders: {[rf.name for rf in local_runfolders]}') for runfolder in local_runfolders: