From 76bb464439f45b971b128ebc56f2278d509aabbd Mon Sep 17 00:00:00 2001 From: Aledj2 Date: Fri, 26 Apr 2019 13:35:46 +0100 Subject: [PATCH] upload 100 at a time. change version number. fix #11 --- README.md | 4 ++-- backup_runfolder.py | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index fdb7c0a..7f19a6e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Workstation Housekeeping v1.2 +# Workstation Housekeeping v1.3 Scripts to manage data on the NGS workstation --- @@ -19,7 +19,7 @@ This tool requires the DNAnexus utilities `ua` (upload agent) and `dx` (DNAnexus * If the `-p` option is given, the script attempts to find a matching DNAnexus project. Otherwise, it looks for a single project matching the runfolder name. If more or less than 1 project matches, the script logs an error and exits. * The runfolder is traversed and a list of files in each folder is obtained. If any comma-separated strings passed to the `--ignore` argument are present within the filepath, or filename the file is excluded. -* The DNAnexus `ua` utility is used to upload files in batches of 500 at a time. The number of upload tries is set to 100 with the `--tries` flag. +* The DNAnexus `ua` utility is used to upload files in batches of 100 at a time. The number of upload tries is set to 100 with the `--tries` flag. * Orthogonal tests are performed to: * A count of files that should be uploaded (using the ignore terms if provided) * A count of files in the DNA Nexus project diff --git a/backup_runfolder.py b/backup_runfolder.py index d326f34..18070ac 100755 --- a/backup_runfolder.py +++ b/backup_runfolder.py @@ -41,7 +41,7 @@ def log_setup(args): # If logfile path passed to --logpath, prepend to logfile name, else write to current directory logpath = args.logpath if args.logpath else "" # Set logfile name as runfolder name with '.log' extension - logfile_name = "".join([os.path.basename(args.runfolder), ".log"]) + logfile_name = "".join([os.path.basename(args.runfolder.strip("/")), ".log"]) logfile_fullpath = os.path.join(logpath, logfile_name) # Create dictionary with logging config parameters. @@ -281,14 +281,14 @@ def call_upload_agent(self): # create the nexus path for each dir nexus_path, project_filepath = self.get_nexus_filepath(path) self.logger.info('Calling upload agent on %s to location %s', path, project_filepath) - # upload agent has a max number of uploads of 1000 per command - # count number of files in list and divide by 500.0 eg 20/500.0 = 0.04. ceil rounds up to the nearest integer (0.04->1). If there are 500, ceil(500/500.0)=1.0 if there are 750 ceil(750/500.0)=2.0 - iterations_needed = math.ceil(len(file_dict[path]) / 500.0) + # upload agent has a max number of uploads of 1000 per command. uploadingmultiple files at a time is quicker, but uploading too many at a time has caused it to hang. + # count number of files in list and divide by 100.0 eg 20/100.0 = 0.02. ceil rounds up to the nearest integer (0.02->1). If there are 100, ceil(100/100.0)=1.0 if there are 750 ceil(750/100.0)=8.0 + iterations_needed = math.ceil(len(file_dict[path]) / 100.0) # set the iterations count to 1 iteration_count = 1 # will pass a slice of the file list to the upload agent so set variables for start and stop so it uploads files 0-999 start = 0 - stop = 500 + stop = 100 # while we haven't finished the iterations while iteration_count <= iterations_needed: # if it's the last iteration, set stop == length of list so not to ask for elements that aren't in the list (if 4 items in list len(list)=4 and slice of 0:4 won't miss the last element) @@ -303,8 +303,8 @@ def call_upload_agent(self): # increase the iteration_count and start and stop by 1000 for the next iteration so second iteration will do files 1000-1999 iteration_count += 1 - start += 500 - stop += 500 + start += 100 + stop += 100 # Create DNAnexus upload command nexus_upload_command = ('ua --auth-token {auth_token} --project {nexus_project} --folder {nexus_folder} --do-not-compress --upload-threads 10 --tries 100 {files}'.format(