diff --git a/.github/workflows/qiita-ci.yml b/.github/workflows/qiita-ci.yml index 181970579..618232707 100644 --- a/.github/workflows/qiita-ci.yml +++ b/.github/workflows/qiita-ci.yml @@ -96,7 +96,9 @@ jobs: export REDBIOM_HOST="http://localhost:7379" pip install . --no-binary redbiom - conda install -c conda-forge --yes biom-format + # 10.2022 + # this is for redbiom / biom-format (so fine to delete in the future) + pip install future pwd mkdir ~/.qiita_plugins @@ -182,6 +184,8 @@ jobs: conda activate qiita export QIITA_SERVER_CERT=`pwd`/qiita_core/support_files/server.crt export QIITA_CONFIG_FP=`pwd`/qiita_core/support_files/config_test.cfg + # for testing we only need to have this set, not actually exist + export QIITA_JOB_SCHEDULER_EPILOGUE=`/path/to/epilogue/file` export REDBIOM_HOST="http://localhost:7379" nosetests $COVER_PACKAGE --with-doctest --with-coverage --with-timer -v --cover-package=${COVER_PACKAGE// / --cover-package=} -e 'test_submit_EBI_parse_EBI_reply_failure' -e 'test_full_submission' diff --git a/CHANGELOG.md b/CHANGELOG.md index 62acf3bf7..4ff3d6d1e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Qiita changelog +Version 2022.09 +--------------- + +* Moved Qiita's code base and plugins to SLURM (from Torque). The plugins updated are: qiita-spots/qp-woltka, biocore/mg-scripts, qiita-spots/qp-spades, qiita-spots/qp-meta, qiita-spots/qp-fastp-minimap2. +* Pinned the paramiko version to < 2.9 [as newer versions were causing issues with older systems](https://github.com/paramiko/paramiko/issues/1961#issuecomment-1008119073). +* Pinned the scipy version to < 1.8 to avoid issues with the biom-format library. +* Updates to the INSTALL instructions (thank you @aliu104 !) + Version 2022.07 --------------- diff --git a/INSTALL.md b/INSTALL.md index af59f74e6..bbe8b0c99 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -6,7 +6,7 @@ Qiita is pip installable, but depends on specific versions of python and non-pyt ## Install and setup miniconda -Download the appropriate installer [here](http://conda.pydata.org/docs/install/quick.html) corresponding to your operating system and execute it. +Download the appropriate installer [here](https://repo.anaconda.com/miniconda/) corresponding to your operating system and execute it. Next, ensure conda is up-to-date. @@ -19,6 +19,7 @@ conda update conda Setup a virtual environment in conda named `qiita` by executing the following: ```bash +conda config --add channels conda-forge conda create -q --yes -n qiita python=3.6 pip libgfortran numpy nginx ``` @@ -40,7 +41,7 @@ $ which python (qiita) ``` -If you don't see this output, your `$PATH` variable was setup incorrectly or you haven't restarted your shell. Consult the [conda documentation](http://conda.pydata.org/docs/install/quick.html). +If you don't see this output, your `$PATH` variable was setup incorrectly or you haven't restarted your shell. Consult the [conda documentation](https://docs.conda.io/projects/conda/en/latest/user-guide/install/index.html). As long as you are in the active qiita environment, commands such as `pip install` or `python` will refer to and be contained within this virtual environment. @@ -54,7 +55,7 @@ source deactivate Install the non-python dependencies ----------------------------------- -* [PostgreSQL](http://www.postgresql.org/download/) (minimum required version 9.5.14, we have tested most extensively with 9.5.15) +* [PostgreSQL](http://www.postgresql.org/download/) (currently using v13) * [redis-server](http://redis.io) (we have tested most extensively with 2.8.17) * [webdis] (https://github.com/nicolasff/webdis) (latest version should be fine but we have tested the most with 9ee6fe2 - Feb 6, 2016) @@ -64,9 +65,43 @@ There are several options to install these dependencies depending on your needs: - Alternatively, you could install them via conda. However, the conda repository may not have the exact versions of these dependencies that you want. - You could setup a full development environment with [Vagrant](https://www.vagrantup.com/), and continue using conda under it to primarily manage python dependencies. Note that we don't cover Vagrant in these instructions. +### PostgreSQL installation on Linux +The following instructions have been adapted from [this site](https://computingforgeeks.com/how-to-install-postgresql-13-on-ubuntu/) and tested on Ubuntu v20.04.4 for Postgres v13. + +First, ensure that you have updated packages and reboot the system with: +```bash +sudo apt update && sudo apt -y full-upgrade +[ -f /var/run/reboot-required ] && sudo reboot -f +``` +You can reboot the system with `sudo reboot` in case any packages were updated. + +Next, we need to add the Postgres repository to our system: +```bash +sudo apt update +sudo apt install curl gpg gnupg2 software-properties-common apt-transport-https lsb-release ca-certificates +curl -fsSL https://www.postgresql.org/media/keys/ACCC4CF8.asc|sudo gpg --dearmor -o /etc/apt/trusted.gpg.d/postgresql.gpg +echo "deb http://apt.postgresql.org/pub/repos/apt/ `lsb_release -cs`-pgdg main" |sudo tee /etc/apt/sources.list.d/pgdg.list +``` +Adding the repository has added many different packages, which allows us to now install Postgres v13 with the following commands: +```bash +sudo apt update +sudo apt install postgresql-13 postgresql-client-13 +``` +Now, we need to reconfigure the `pg_hba.conf` file and change all occurrences of `md5` and `peer` to `trust`. You can access the file with: +```bash +sudo vim /etc/postgresql/13/main/pg_hba.conf +``` +To make sure all changes have been reflected, restart the Postgres server: +```bash +sudo service postgresql restart +``` +Installing Postgres is now complete. Note that you will need to start the Postgres server every time you start the Qiita server. You can do this with the following command: +```bash +sudo service postgresql start +``` ### PostgreSQL installation on Mac OS X -For Mac OS X, you can either install postgres through the [Postgres.app](https://postgresapp.com/downloads.html). These instructions were tested with the Postgres.app v9.5. +For Mac OS X, you can either install postgres through the [Postgres.app](https://postgresapp.com/downloads.html). These instructions were tested with the Postgres.app v9.5, v13. You'll then need to ensure that the postgres binaries (for example, ``psql``) are in your executable search path (``$PATH`` environment variable). If you are using Postgres.app on OS X, you can do this by running the following, though you may have to replace`~/.bash_profile`with `~/.zshrc` if you're using zshell rather than the built-in bash, and you may have to change the version number `Versions/9.3/` to the exact one that you are installing: @@ -75,14 +110,20 @@ echo 'export PATH="$PATH:/Applications/Postgres.app/Contents/Versions/9.5/bin/"' source ~/.bash_profile ``` -### Redis-server installation on Mac OS X +### Redis-server installation using Homebrew (Mac OS X, Linux) -Assuming you have [homebrew](http://www.brew.sh) installed, you can install redis-server v2.8.x as follows: +Assuming you have [homebrew](http://brew.sh) installed, you can install the latest version of the redis-server as follows: ```bash brew update brew install homebrew/versions/redis28 ``` +### Redis-server installation using apt-get (Linux) + +Alternatively, you can sudo install redis: +```bash +sudo apt-get install redis-server +``` ### webdis @@ -112,7 +153,7 @@ Install Qiita development version and its python dependencies Clone the git repository with the development version of Qiita into your current directory: ```bash -git clone https://github.com/biocore/qiita.git +git clone https://github.com/qiita-spots/qiita.git ``` Navigate to the cloned directory and ensure your conda environment is active: @@ -121,12 +162,17 @@ Navigate to the cloned directory and ensure your conda environment is active: cd qiita source activate qiita ``` - +If you are using Ubuntu or a Windows Subsystem for Linux (WSL), you will need to ensure that you have a C++ compiler and that development libraries and include files for PostgreSQL are available. Type `cc` into your system to ensure that it doesn't result in `program not found`. The following commands will install a C++ compiler and `libpq-dev`: +```bash +sudo apt install gcc # alternatively, you can install clang instead +sudo apt-get install libpq-dev +``` Install Qiita (this occurs through setuptools' `setup.py` file in the qiita directory): ```bash pip install . --no-binary redbiom ``` +Note that if you get any errors or warnings with 'certifi', you can add the `--ignore-installed` tag to the command above. At this point, Qiita will be installed and the system will start. However, you will need to install plugins in order to process any kind of data. For a list @@ -148,7 +194,7 @@ Move the Qiita sample configuration file to a different directory by executing: cp ./qiita_core/support_files/config_test.cfg ~/.qiita_config_test.cfg ``` -Note that you will need to change `BASE_URL = https://localhost:8383` to `BASE_URL = https://localhost:21174` if you are not using NGINX. +Note that you will need to change `BASE_URL = https://localhost:8383` to `BASE_URL = https://localhost:21174` in the new copy of the configuration file if you are not using NGINX. Additionally, you will also need to change all URLs that start with `/home/runner/work/qiita/qiita/...` into wherever your qiita directory is (e.g. `/home//qiita/...`). Set your `QIITA_CONFIG_FP` environment variable to point to that file (into `.bashrc` if using bash; `.zshrc` if using zshell): @@ -162,6 +208,10 @@ Set your `QIITA_CONFIG_FP` environment variable to point to that file (into `.ba Update paths in the newly copied configuration file to match your settings, e.g. replace /home/travis/ with your user home directory. +If you are working on WSL, you will need to start the redis server with the following command before making a test environment: +```bash +redis-server --daemonize yes --port 7777 +``` Next, make a test environment: ```bash diff --git a/qiita_core/__init__.py b/qiita_core/__init__.py index 97e580f13..c27c17601 100644 --- a/qiita_core/__init__.py +++ b/qiita_core/__init__.py @@ -6,4 +6,4 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -__version__ = "2022.07" +__version__ = "2022.09" diff --git a/qiita_core/configuration_manager.py b/qiita_core/configuration_manager.py index 933c97f34..6ae47e134 100644 --- a/qiita_core/configuration_manager.py +++ b/qiita_core/configuration_manager.py @@ -42,11 +42,11 @@ class ConfigurationManager(object): Max upload size valid_upload_extension : str The extensions that are valid to upload, comma separated - trq_owner : str - Email address of submitter of Torque jobs - trq_poll_val : int - Interval (in seconds) to wait between calls to Torque's qstat program - trq_dependency_q_cnt : int + job_scheduler_owner : str + Email address of submitter of jobs + job_scheduler_poll_val : int + Interval (in seconds) to wait between calls to job_scheduler program + job_scheduler_dependency_q_cnt : int Hard upper-limit on the number of an artifact's concurrent validation processes. user : str @@ -145,7 +145,7 @@ def __init__(self): self._get_main(config) self._get_smtp(config) - self._get_torque(config) + self._get_job_scheduler(config) self._get_postgres(config) self._get_redis(config) self._get_ebi(config) @@ -234,22 +234,22 @@ def _get_main(self, config): self.key_file = join(install_dir, 'qiita_core', 'support_files', 'server.key') - def _get_torque(self, config): - """Get the configuration of the torque section""" - self.trq_owner = config.get('torque', 'TORQUE_JOB_OWNER') - self.trq_poll_val = int(config.get('torque', 'TORQUE_POLLING_VALUE')) - self.trq_dependency_q_cnt = config.get('torque', - 'TORQUE_PROCESSING_QUEUE_COUNT') - self.trq_dependency_q_cnt = int(self.trq_dependency_q_cnt) - - if not self.trq_owner: - self.trq_owner = None - - if not self.trq_poll_val: - self.trq_poll_val = None - - if not self.trq_dependency_q_cnt: - self.trq_dependency_q_cnt = None + def _get_job_scheduler(self, config): + """Get the configuration of the job_scheduler section""" + self.job_scheduler_owner = config.get( + 'job_scheduler', 'JOB_SCHEDULER_JOB_OWNER', fallback=None) + self.job_scheduler_poll_val = config.get( + 'job_scheduler', 'JOB_SCHEDULER_POLLING_VALUE', fallback=None) + self.job_scheduler_dependency_q_cnt = config.get( + 'job_scheduler', 'JOB_SCHEDULER_PROCESSING_QUEUE_COUNT', + fallback=None) + + if self.job_scheduler_poll_val is not None: + self.job_scheduler_poll_val = int(self.job_scheduler_poll_val) + + if self.job_scheduler_dependency_q_cnt is not None: + self.job_scheduler_dependency_q_cnt = int( + self.job_scheduler_dependency_q_cnt) def _get_postgres(self, config): """Get the configuration of the postgres section""" diff --git a/qiita_core/support_files/config_test.cfg b/qiita_core/support_files/config_test.cfg index 8630849b4..e6423f609 100644 --- a/qiita_core/support_files/config_test.cfg +++ b/qiita_core/support_files/config_test.cfg @@ -123,16 +123,16 @@ PASSWORD = postgres # The postgres password for the admin_user ADMIN_PASSWORD = postgres -# ----------------------------- Torque settings ----------------------------- -[torque] -# The email address of the submitter of Torque jobs -TORQUE_JOB_OWNER = torque_user@somewhere.org +# ----------------------------- Job Scheduler Settings ----------------------------- +[job_scheduler] +# The email address of the submitter of jobs +JOB_SCHEDULER_JOB_OWNER = user@somewhere.org -# The number of seconds to wait between successive qstat calls -TORQUE_POLLING_VALUE = 15 +# The number of seconds to wait between successive calls +JOB_SCHEDULER__POLLING_VALUE = 15 # Hard upper-limit on concurrently running validator jobs -TORQUE_PROCESSING_QUEUE_COUNT = 2 +JOB_SCHEDULER_PROCESSING_QUEUE_COUNT = 2 # ----------------------------- EBI settings ----------------------------- [ebi] diff --git a/qiita_core/tests/test_configuration_manager.py b/qiita_core/tests/test_configuration_manager.py index a00210eab..b07c801ff 100644 --- a/qiita_core/tests/test_configuration_manager.py +++ b/qiita_core/tests/test_configuration_manager.py @@ -59,10 +59,10 @@ def test_init(self): self.assertEqual(obs.cookie_secret, "SECRET") self.assertEqual(obs.key_file, "/tmp/server.key") - # Torque section - self.assertEqual(obs.trq_owner, "torque_user@somewhere.org") - self.assertEqual(obs.trq_poll_val, 15) - self.assertEqual(obs.trq_dependency_q_cnt, 2) + # job_scheduler section + self.assertEqual(obs.job_scheduler_owner, "user@somewhere.org") + self.assertEqual(obs.job_scheduler_poll_val, 15) + self.assertEqual(obs.job_scheduler_dependency_q_cnt, 2) # Postgres section self.assertEqual(obs.user, "postgres") @@ -180,13 +180,13 @@ def test_get_main(self): self.assertEqual(obs.qiita_env, "") - def test_get_torque(self): + def test_get_job_scheduler(self): obs = ConfigurationManager() - conf_setter = partial(self.conf.set, 'torque') - conf_setter('TORQUE_JOB_OWNER', '') - obs._get_torque(self.conf) - self.assertIsNone(obs.trq_owner) + conf_setter = partial(self.conf.set, 'job_scheduler') + conf_setter('JOB_SCHEDULER_JOB_OWNER', '') + obs._get_job_scheduler(self.conf) + self.assertEqual('', obs.job_scheduler_owner) def test_get_postgres(self): obs = ConfigurationManager() @@ -329,16 +329,16 @@ def test_get_portal(self): # The postgres password for the admin_user ADMIN_PASSWORD = thishastobesecure -# ----------------------------- Torque settings ----------------------------- -[torque] -# The email address of the submitter of Torque jobs -TORQUE_JOB_OWNER = torque_user@somewhere.org +# ------------------------- job_scheduler settings ------------------------- +[job_scheduler] +# The email address of the submitter of jobs +JOB_SCHEDULER_JOB_OWNER = user@somewhere.org -# The number of seconds to wait between successive qstat calls -TORQUE_POLLING_VALUE = 15 +# The number of seconds to wait between successive calls +JOB_SCHEDULER_POLLING_VALUE = 15 # Hard upper-limit on concurrently running validator jobs -TORQUE_PROCESSING_QUEUE_COUNT = 2 +JOB_SCHEDULER_PROCESSING_QUEUE_COUNT = 2 # ----------------------------- EBI settings ----------------------------- [ebi] diff --git a/qiita_db/__init__.py b/qiita_db/__init__.py index 3bcecc7c7..afee149ae 100644 --- a/qiita_db/__init__.py +++ b/qiita_db/__init__.py @@ -27,7 +27,7 @@ from . import user from . import processing_job -__version__ = "2022.07" +__version__ = "2022.09" __all__ = ["analysis", "artifact", "archive", "base", "commands", "environment_manager", "exceptions", "investigation", "logger", diff --git a/qiita_db/environment_manager.py b/qiita_db/environment_manager.py index 1e8342a36..3820f2b21 100644 --- a/qiita_db/environment_manager.py +++ b/qiita_db/environment_manager.py @@ -203,16 +203,12 @@ def make_environment(load_ontologies, download_reference, add_demo_user): # Insert the settings values to the database sql = """INSERT INTO settings - (test, base_data_dir, base_work_dir, trq_owner, - trq_poll_val, trq_dependency_q_cnt) - VALUES (%s, %s, %s, %s, %s, %s)""" + (test, base_data_dir, base_work_dir) + VALUES (%s, %s, %s)""" qdb.sql_connection.TRN.add( sql, [test, qiita_config.base_data_dir, - qiita_config.working_dir, - qiita_config.trq_owner, - qiita_config.trq_poll_val, - qiita_config.trq_dependency_q_cnt]) + qiita_config.working_dir]) qdb.sql_connection.TRN.execute() create_layout(test=test, verbose=verbose) diff --git a/qiita_db/metadata_template/base_metadata_template.py b/qiita_db/metadata_template/base_metadata_template.py index cb2bdba9e..ec5cbeba9 100644 --- a/qiita_db/metadata_template/base_metadata_template.py +++ b/qiita_db/metadata_template/base_metadata_template.py @@ -1537,7 +1537,7 @@ def _get_accession_numbers(self, column): return result def _update_accession_numbers(self, column, values): - """Update accession numbers stored in `column` with the ones in `values` + """Update accession numbers stored in `column` with `values` Parameters ---------- diff --git a/qiita_db/processing_job.py b/qiita_db/processing_job.py index 5abc7bdb0..cb99442b9 100644 --- a/qiita_db/processing_job.py +++ b/qiita_db/processing_job.py @@ -17,13 +17,12 @@ from multiprocessing import Process, Queue, Event from re import search, findall from subprocess import Popen, PIPE -from time import sleep, time +from time import sleep from uuid import UUID from os.path import join -from threading import Thread from humanize import naturalsize +from os import environ -from qiita_core.exceptions import IncompetentQiitaDeveloperError from qiita_core.qiita_settings import qiita_config from qiita_db.util import create_nested_path @@ -53,27 +52,25 @@ class Watcher(Process): # 'running' in Qiita, as 'waiting' in Qiita connotes that the # main job itself has completed, and is waiting on validator # jobs to finish, etc. Revisit - torque_to_qiita_state_map = {'completed': 'completed', - 'held': 'queued', - 'queued': 'queued', - 'exiting': 'running', - 'running': 'running', - 'moving': 'running', - 'waiting': 'running', - 'suspended': 'running', - 'DROPPED': 'error'} + job_scheduler_to_qiita_state_map = {'completed': 'completed', + 'held': 'queued', + 'queued': 'queued', + 'exiting': 'running', + 'running': 'running', + 'moving': 'running', + 'waiting': 'running', + 'suspended': 'running', + 'DROPPED': 'error'} def __init__(self): super(Watcher, self).__init__() # set self.owner to qiita, or whomever owns processes we need to watch. - self.owner = qiita_config.trq_owner + self.owner = qiita_config.job_scheduler_owner - # Torque is set to drop jobs from its queue 60 seconds after - # completion, by default. Setting a polling value less than - # that allows for multiple chances to catch the exit status - # before it disappears. - self.polling_value = qiita_config.trq_poll_val + # Setting a polling value less than 60 seconds allows for multiple + # chances to catch the exit status before it disappears. + self.polling_value = qiita_config.job_scheduler_poll_val # the cross-process method by which to communicate across # process boundaries. Note that when Watcher object runs, @@ -204,7 +201,8 @@ def stop(self): def launch_local(env_script, start_script, url, job_id, job_dir): - # launch_local() differs from launch_torque(), as no Watcher() is used. + # launch_local() differs from launch_job_scheduler(), as no Watcher() is + # used. # each launch_local() process will execute the cmd as a child process, # wait, and update the database once cmd has completed. # @@ -248,99 +246,53 @@ def launch_local(env_script, start_script, url, job_id, job_dir): ProcessingJob(job_id).complete(False, error=error) -def launch_torque(env_script, start_script, url, job_id, job_dir, - dependent_job_id, resource_params): +def launch_job_scheduler(env_script, start_script, url, job_id, job_dir, + dependent_job_id, resource_params): - # note that job_id is Qiita's UUID, not a Torque job ID + # note that job_id is Qiita's UUID, not a job_scheduler job ID cmd = [start_script, url, job_id, job_dir] - # generating file contents to be used with qsub - lines = [] - - # TODO: is PBS_JOBID is being set correctly? - lines.append("echo $PBS_JOBID") - - # TODO: revisit below + lines = [ + '#!/bin/bash', + f'#SBATCH --error {job_dir}/slurm-error.txt', + f'#SBATCH --output {job_dir}/slurm-output.txt'] + lines.append("echo $SLURM_JOBID") lines.append("source ~/.bash_profile") lines.append(env_script) + + epilogue = environ.get('QIITA_JOB_SCHEDULER_EPILOGUE', '') + if epilogue: + lines.append(f"#SBATCH --epilog {epilogue}") + lines.append(' '.join(cmd)) - # writing the file to be used with qsub + # writing the script file create_nested_path(job_dir) fp = join(job_dir, '%s.txt' % job_id) - with open(fp, 'w') as torque_job_file: - torque_job_file.write("\n".join(lines)) + with open(fp, 'w') as job_file: + job_file.write("\n".join(lines)) - qsub_cmd = ['qsub'] + sbatch_cmd = ['sbatch'] if dependent_job_id: # note that a dependent job should be submitted before the - # 'parent' job ends, most likely. Torque doesn't keep job state - # around forever, and creating a dependency on a job already - # completed has not been tested. - qsub_cmd.append("-W") - qsub_cmd.append("depend=afterok:%s" % dependent_job_id) - - qsub_cmd.append(resource_params) - qsub_cmd.append(fp) - qsub_cmd.append("-o") - qsub_cmd.append("%s/qsub-output.txt" % job_dir) - qsub_cmd.append("-e") - qsub_cmd.append("%s/qsub-error.txt" % job_dir) - # TODO: revisit epilogue - qsub_cmd.append("-l") - qsub_cmd.append("epilogue=/home/qiita/qiita-epilogue.sh") + # 'parent' job ends + sbatch_cmd.append("-d") + sbatch_cmd.append("afterok:%s" % dependent_job_id) - # Popen() may also need universal_newlines=True - # may also need stdout = stdout.decode("utf-8").rstrip() - qsub_cmd = ' '.join(qsub_cmd) - - # Qopen is a wrapper for Popen() that allows us to wait on a qsub - # call, but return if the qsub command is not returning after a - # prolonged period of time. - q = Qopen(qsub_cmd) - q.start() - - # wait for qsub_cmd to finish, but not longer than the number of - # seconds specified below. - init_time = time() - q.join(5) - total_time = time() - init_time - # for internal use, logging if the time is larger than 2 seconds - if total_time > 2: - qdb.logger.LogEntry.create('Runtime', 'qsub return time', info={ - 'time_in_seconds': str(total_time)}) - - # if q.returncode is None, it's because qsub did not return. - if q.returncode is None: - e = "Error Torque configuration information incorrect: %s" % qsub_cmd - raise IncompetentQiitaDeveloperError(e) - - # q.returncode in this case means qsub successfully pushed the job - # onto Torque's queue. - if q.returncode != 0: - raise AssertionError("Error Torque could not launch %s (%d)" % - (qsub_cmd, q.returncode)) - - torque_job_id = q.stdout.decode('ascii').strip('\n') - - return torque_job_id - - -class Qopen(Thread): - def __init__(self, cmd): - super(Qopen, self).__init__() - self.cmd = cmd - self.stdout = None - self.stderr = None - self.returncode = None + sbatch_cmd.append(resource_params) + sbatch_cmd.append(fp) - def run(self): - proc = Popen(self.cmd, shell=True, stdout=PIPE, stderr=PIPE) - self.stdout, self.stderr = proc.communicate() - self.returncode = proc.returncode + stdout, stderr, return_value = _system_call(' '.join(sbatch_cmd)) + + if return_value != 0: + raise AssertionError(f'Error submitting job: {sbatch_cmd} :: {stderr}') + + job_id = stdout.strip('\n').split(" ")[-1] + + return job_id def _system_call(cmd): @@ -395,8 +347,8 @@ class ProcessingJob(qdb.base.QiitaObject): _launch_map = {'qiita-plugin-launcher': {'function': launch_local, 'execute_in_process': False}, - 'qiita-plugin-launcher-qsub': - {'function': launch_torque, + 'qiita-plugin-launcher-slurm': + {'function': launch_job_scheduler, 'execute_in_process': True}} @classmethod @@ -432,7 +384,7 @@ def by_external_id(cls, external_id): Parameters ---------- external_id : str - An external id (e.g. Torque Job ID) + An external id (e.g. job scheduler Job ID) Returns ------- @@ -506,21 +458,20 @@ def get_resource_allocation_info(self): '{input_size}' in allocation): samples, columns, input_size = self.shape parts = [] + error_msg = ('Obvious incorrect allocation. Please ' + 'contact qiita.help@gmail.com') for part in allocation.split(' '): if ('{samples}' in part or '{columns}' in part or '{input_size}' in part): - variable, value = part.split('=') - error_msg = ('Obvious incorrect allocation. Please ' - 'contact qiita.help@gmail.com') # to make sure that the formula is correct and avoid # possible issues with conversions, we will check that # all the variables {samples}/{columns}/{input_size} # present in the formula are not None, if any is None # we will set the job's error (will stop it) and the # message is gonna be shown to the user within the job - if (('{samples}' in value and samples is None) or - ('{columns}' in value and columns is None) or - ('{input_size}' in value and input_size is + if (('{samples}' in part and samples is None) or + ('{columns}' in part and columns is None) or + ('{input_size}' in part and input_size is None)): self._set_error(error_msg) return 'Not valid' @@ -528,7 +479,7 @@ def get_resource_allocation_info(self): try: # if eval has something that can't be processed # it will raise a NameError - mem = eval(value.format( + mem = eval(part.format( samples=samples, columns=columns, input_size=input_size)) except NameError: @@ -538,8 +489,7 @@ def get_resource_allocation_info(self): if mem <= 0: self._set_error(error_msg) return 'Not valid' - value = naturalsize(mem, gnu=True, format='%.0f') - part = '%s=%s' % (variable, value) + part = naturalsize(mem, gnu=True, format='%.0f') parts.append(part) @@ -1353,7 +1303,7 @@ def _complete_artifact_transformation(self, artifacts_data): self._set_validator_jobs(validator_jobs) # Submit m validator jobs as n lists of jobs - n = qiita_config.trq_dependency_q_cnt + n = qiita_config.job_scheduler_dependency_q_cnt # taken from: # https://www.geeksforgeeks.org/break-list-chunks-size-n-python/ diff --git a/qiita_db/support_files/patches/87.sql b/qiita_db/support_files/patches/87.sql new file mode 100644 index 000000000..e6415c3d4 --- /dev/null +++ b/qiita_db/support_files/patches/87.sql @@ -0,0 +1,18 @@ +-- Aug 11, 2022 +-- updating resource allocations to use slurm +UPDATE qiita.processing_job_resource_allocation SET allocation = +REPLACE( + REPLACE( + REPLACE( + REPLACE( + REPLACE( + REPLACE( + REPLACE(allocation, '-q qiita', '-p qiita'), + '-l nodes=', '-N '), + ':ppn=', ' -n '), + '-l pmem=', '--mem-per-cpu '), + '-l mem=', '--mem '), + '-l walltime=', '--time '), +'-p 1023', '--qos=qiita_prio'); + +INSERT INTO qiita.filepath_type (filepath_type) VALUES ('bam'); diff --git a/qiita_db/support_files/qiita-db-settings.sql b/qiita_db/support_files/qiita-db-settings.sql index 2c7c0709b..23a16ac70 100644 --- a/qiita_db/support_files/qiita-db-settings.sql +++ b/qiita_db/support_files/qiita-db-settings.sql @@ -1,9 +1,6 @@ -CREATE TABLE settings ( +CREATE TABLE settings ( test bool DEFAULT True NOT NULL, base_data_dir varchar NOT NULL, base_work_dir varchar NOT NULL, - current_patch varchar DEFAULT 'unpatched' NOT NULL, - trq_owner varchar, - trq_poll_val int, - trq_dependency_q_cnt int + current_patch varchar DEFAULT 'unpatched' NOT NULL ); diff --git a/qiita_db/test/test_processing_job.py b/qiita_db/test/test_processing_job.py index 51f957275..24447a78b 100644 --- a/qiita_db/test/test_processing_job.py +++ b/qiita_db/test/test_processing_job.py @@ -551,8 +551,9 @@ def test_complete_success(self): # here we can test that the validator shape and allocation is correct validator = validators[0] self.assertEqual(validator.parameters.values['artifact_type'], 'BIOM') - self.assertEqual(validator.get_resource_allocation_info(), '-q qiita ' - '-l nodes=1:ppn=1 -l mem=90gb -l walltime=150:00:00') + self.assertEqual( + validator.get_resource_allocation_info(), + '-p qiita -N 1 -n 1 --mem 90gb --time 150:00:00') self.assertEqual(validator.shape, (27, 31, None)) # Test the output artifact is going to be named based on the # input parameters @@ -787,16 +788,16 @@ def test_get_resource_allocation_info(self): jids = { # Split libraries FASTQ '6d368e16-2242-4cf8-87b4-a5dc40bb890b': - '-q qiita -l nodes=1:ppn=1 -l mem=120gb -l walltime=80:00:00', + '-p qiita -N 1 -n 1 --mem 120gb --time 80:00:00', # Pick closed-reference OTUs '80bf25f3-5f1d-4e10-9369-315e4244f6d5': - '-q qiita -l nodes=1:ppn=5 -l mem=120gb -l walltime=130:00:00', + '-p qiita -N 1 -n 5 --mem 120gb --time 130:00:00', # Single Rarefaction / Analysis '8a7a8461-e8a1-4b4e-a428-1bc2f4d3ebd0': - '-q qiita -l nodes=1:ppn=5 -l pmem=8gb -l walltime=168:00:00', + '-p qiita -N 1 -n 5 --mem-per-cpu 8gb --time 168:00:00', # Split libraries 'bcc7ebcd-39c1-43e4-af2d-822e3589f14d': - '-q qiita -l nodes=1:ppn=1 -l mem=60gb -l walltime=25:00:00'} + '-p qiita -N 1 -n 1 --mem 60gb --time 25:00:00'} for jid, allocation in jids.items(): job = qdb.processing_job.ProcessingJob(jid) @@ -813,7 +814,7 @@ def _set_allocation(memory): sql = """UPDATE qiita.processing_job_resource_allocation SET allocation = '{0}' WHERE name = 'Split libraries FASTQ'""".format( - '-q qiita -l mem=%s' % memory) + '-p qiita --mem %s' % memory) qdb.sql_connection.perform_as_transaction(sql) # let's start with something simple, samples*1000 @@ -821,27 +822,27 @@ def _set_allocation(memory): _set_allocation('{samples}*1000') self.assertEqual( job_not_changed.get_resource_allocation_info(), - '-q qiita -l nodes=1:ppn=5 -l mem=120gb -l walltime=130:00:00') + '-p qiita -N 1 -n 5 --mem 120gb --time 130:00:00') self.assertEqual(job_changed.get_resource_allocation_info(), - '-q qiita -l mem=26K') + '-p qiita --mem 26K') # a little more complex ((samples+columns)*1000000)+4000000 # (( 27 + 31 )*1000000)+4000000 ~ 62000000 _set_allocation('(({samples}+{columns})*1000000)+4000000') self.assertEqual( job_not_changed.get_resource_allocation_info(), - '-q qiita -l nodes=1:ppn=5 -l mem=120gb -l walltime=130:00:00') + '-p qiita -N 1 -n 5 --mem 120gb --time 130:00:00') self.assertEqual(job_changed.get_resource_allocation_info(), - '-q qiita -l mem=59M') + '-p qiita --mem 59M') # now something real input_size+(2*1e+9) # 116 +(2*1e+9) ~ 2000000116 _set_allocation('{input_size}+(2*1e+9)') self.assertEqual( job_not_changed.get_resource_allocation_info(), - '-q qiita -l nodes=1:ppn=5 -l mem=120gb -l walltime=130:00:00') + '-p qiita -N 1 -n 5 --mem 120gb --time 130:00:00') self.assertEqual(job_changed.get_resource_allocation_info(), - '-q qiita -l mem=2G') + '-p qiita --mem 2G') @qiita_test_checker() diff --git a/qiita_db/test/test_setup.py b/qiita_db/test/test_setup.py index cfc373d8f..bb6e01463 100644 --- a/qiita_db/test/test_setup.py +++ b/qiita_db/test/test_setup.py @@ -36,7 +36,7 @@ def test_filepath(self): self.assertEqual(get_count("qiita.filepath"), 23) def test_filepath_type(self): - self.assertEqual(get_count("qiita.filepath_type"), 24) + self.assertEqual(get_count("qiita.filepath_type"), 25) def test_study_prep_template(self): self.assertEqual(get_count("qiita.study_prep_template"), 2) diff --git a/qiita_db/test/test_software.py b/qiita_db/test/test_software.py index f971fe1f3..04930a181 100644 --- a/qiita_db/test/test_software.py +++ b/qiita_db/test/test_software.py @@ -539,13 +539,13 @@ def test_iter(self): # Command 2 is Split libraries and has defined resources self.assertEqual( qdb.software.Command(2).resource_allocation, - '-q qiita -l nodes=1:ppn=1 -l mem=60gb -l walltime=25:00:00') + '-p qiita -N 1 -n 1 --mem 60gb --time 25:00:00') # Command 9 is Summarize Taxa and has no defined resources so it goes # to defaults self.assertEqual( qdb.software.Command(9).resource_allocation, - '-q qiita -l nodes=1:ppn=5 -l pmem=8gb -l walltime=168:00:00') + '-p qiita -N 1 -n 5 --mem-per-cpu 8gb --time 168:00:00') # delete allocations to test errors qdb.sql_connection.perform_as_transaction( diff --git a/qiita_db/test/test_util.py b/qiita_db/test/test_util.py index e616d7b84..08288c20c 100644 --- a/qiita_db/test/test_util.py +++ b/qiita_db/test/test_util.py @@ -135,6 +135,7 @@ def test_get_filepath_types(self): 'directory': 8, 'plain_text': 9, 'reference_seqs': 10, 'reference_tax': 11, 'reference_tree': 12, 'log': 13, 'sample_template': 14, 'prep_template': 15, 'qiime_map': 16, + 'bam': 17 } with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add("SELECT filepath_type,filepath_type_id " diff --git a/qiita_db/util.py b/qiita_db/util.py index db4c8107f..b8410434d 100644 --- a/qiita_db/util.py +++ b/qiita_db/util.py @@ -2080,7 +2080,7 @@ def generate_analyses_list_per_study(study_id): def create_nested_path(path): - """Wraps makedirs() to make it safe to use across multiple concurrent calls. + """Wraps makedirs() to make it safe across multiple concurrent calls. Returns successfully if the path was created, or if it already exists. (Note, this alters the normal makedirs() behavior, where False is returned if the full path already exists.) diff --git a/qiita_pet/__init__.py b/qiita_pet/__init__.py index 97e580f13..c27c17601 100644 --- a/qiita_pet/__init__.py +++ b/qiita_pet/__init__.py @@ -6,4 +6,4 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -__version__ = "2022.07" +__version__ = "2022.09" diff --git a/qiita_pet/handlers/api_proxy/__init__.py b/qiita_pet/handlers/api_proxy/__init__.py index c5d992301..01639f883 100644 --- a/qiita_pet/handlers/api_proxy/__init__.py +++ b/qiita_pet/handlers/api_proxy/__init__.py @@ -38,7 +38,7 @@ from .user import (user_jobs_get_req) from .util import check_access, check_fp -__version__ = "2022.07" +__version__ = "2022.09" __all__ = ['prep_template_summary_get_req', 'data_types_get_req', 'study_get_req', 'sample_template_filepaths_get_req', diff --git a/qiita_pet/templates/admin_processing_job.html b/qiita_pet/templates/admin_processing_job.html index b9fa88f38..cd010d3ef 100644 --- a/qiita_pet/templates/admin_processing_job.html +++ b/qiita_pet/templates/admin_processing_job.html @@ -70,7 +70,6 @@ 'running': 'text-info' }; - //row[8] is PBS/Torque/Slurm id out.push('' + status + ' ' + row[0] + ' [ ' + row[9] + ' ]
'); diff --git a/qiita_ware/__init__.py b/qiita_ware/__init__.py index 97e580f13..c27c17601 100644 --- a/qiita_ware/__init__.py +++ b/qiita_ware/__init__.py @@ -6,4 +6,4 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -__version__ = "2022.07" +__version__ = "2022.09" diff --git a/qiita_ware/ebi.py b/qiita_ware/ebi.py index 55a72cf92..694dcd01f 100644 --- a/qiita_ware/ebi.py +++ b/qiita_ware/ebi.py @@ -1080,9 +1080,9 @@ def _generate_demultiplexed_fastq_demux(self, mtime): if wrote_sequences: demux_samples.add(s) else: - del(self.samples[s]) - del(self.samples_prep[s]) - del(self.sample_demux_fps[s]) + del (self.samples[s]) + del (self.samples_prep[s]) + del (self.sample_demux_fps[s]) remove(sample_fp) return demux_samples @@ -1164,9 +1164,9 @@ def generate_demultiplexed_fastq(self, rewrite_fastq=False, mtime=None): if missing_samples: for ms in missing_samples: - del(self.samples[ms]) - del(self.samples_prep[ms]) - del(self.sample_demux_fps[ms]) + del (self.samples[ms]) + del (self.samples_prep[ms]) + del (self.sample_demux_fps[ms]) if not demux_samples: error_msg = ("All samples were removed from the submission " diff --git a/qiita_ware/test/test_ebi.py b/qiita_ware/test/test_ebi.py index 0e796d788..106c377f1 100644 --- a/qiita_ware/test/test_ebi.py +++ b/qiita_ware/test/test_ebi.py @@ -191,8 +191,8 @@ def test_generate_sample_xml(self): '1.SKM7.640188', '1.SKD7.640191', '1.SKB6.640176', '1.SKM4.640180'] for k in keys_to_del: - del(submission.samples[k]) - del(submission.samples_prep[k]) + del (submission.samples[k]) + del (submission.samples_prep[k]) obs = ET.tostring(submission.generate_sample_xml()) self.assertEqual(obs.decode('ascii'), exp) @@ -524,8 +524,8 @@ def test_generate_experiment_xml(self): '1.SKM7.640188', '1.SKD7.640191', '1.SKB6.640176', '1.SKM4.640180'] for k in keys_to_del: - del(submission.samples[k]) - del(submission.samples_prep[k]) + del (submission.samples[k]) + del (submission.samples_prep[k]) obs = ET.tostring(submission.generate_experiment_xml()) self.assertEqual(obs.decode('ascii'), exp) @@ -583,8 +583,8 @@ def test_generate_run_xml(self): keys_to_del = ['1.SKD6.640190', '1.SKM6.640187', '1.SKD9.640182', '1.SKM8.640201', '1.SKM2.640199'] for k in keys_to_del: - del(submission.samples[k]) - del(submission.samples_prep[k]) + del (submission.samples[k]) + del (submission.samples_prep[k]) submission.generate_demultiplexed_fastq(mtime=1) self.files_to_remove.append(submission.full_ebi_dir) @@ -1001,8 +1001,8 @@ def test_parse_EBI_reply(self): keys_to_del = ['1.SKD6.640190', '1.SKM6.640187', '1.SKD9.640182', '1.SKM8.640201', '1.SKM2.640199', '1.SKB3.640195'] for k in keys_to_del: - del(e.samples[k]) - del(e.samples_prep[k]) + del (e.samples[k]) + del (e.samples_prep[k]) # Genereate the XML files so the aliases are generated # and stored internally diff --git a/qiita_ware/test/test_private_plugin.py b/qiita_ware/test/test_private_plugin.py index 40b329687..3cb659b61 100644 --- a/qiita_ware/test/test_private_plugin.py +++ b/qiita_ware/test/test_private_plugin.py @@ -388,17 +388,17 @@ def _set_allocation(memory): sql = """UPDATE qiita.processing_job_resource_allocation SET allocation = '{0}' WHERE name = 'build_analysis_files'""".format( - '-q qiita -l mem=%s' % memory) + '-p qiita --mem %s' % memory) TRN.add(sql) TRN.execute() self.assertEqual(job.shape, (4, None, 1256812)) self.assertEqual( job.get_resource_allocation_info(), - '-q qiita -l nodes=1:ppn=1 -l mem=16gb -l walltime=10:00:00') + '-p qiita -N 1 -n 1 --mem 16gb --time 10:00:00') _set_allocation('{samples}*1000') self.assertEqual(job.get_resource_allocation_info(), - '-q qiita -l mem=4K') + '-p qiita --mem 4K') _set_allocation('{columns}*1000') self.assertEqual(job.get_resource_allocation_info(), 'Not valid') self.assertEqual(job.status, 'error') diff --git a/scripts/qiita b/scripts/qiita index efd45ab60..99e61e989 100755 --- a/scripts/qiita +++ b/scripts/qiita @@ -332,7 +332,7 @@ def start(port, master): # register the job's changed status in Qiita. qjob.complete(job_state, error=job_error) - if qiita_config.plugin_launcher == 'qiita-plugin-launcher-qsub': + if qiita_config.plugin_launcher == 'qiita-plugin-launcher-slurm': if master: # Only a single Watcher() process is desired gWatcher = qdb.processing_job.Watcher() @@ -350,7 +350,7 @@ def start(port, master): # Thread() can be replaced with Process() if need be # update_database_func() requires a defined Watcher object. - # if plugin_launcher is defined as qsub, it should be + # if plugin_launcher is defined as slurm, it should be # defined. p = Thread(target=update_database_func) p.start() diff --git a/scripts/qiita-private-launcher-qsub b/scripts/qiita-private-launcher-slurm old mode 100755 new mode 100644 similarity index 76% rename from scripts/qiita-private-launcher-qsub rename to scripts/qiita-private-launcher-slurm index 17c1f7c77..282fc4c96 --- a/scripts/qiita-private-launcher-qsub +++ b/scripts/qiita-private-launcher-slurm @@ -12,17 +12,18 @@ from subprocess import Popen, PIPE from datetime import datetime from tempfile import mkdtemp from os.path import join +from os import environ import click -PBSFILE = """#!/bin/bash -#PBS -l nodes=1:ppn=1 -#PBS -o %s/qsub-output.txt -#PBS -e %s/qsub-error.txt -#PBS -l epilogue=/home/qiita/qiita-epilogue.sh +SBATCHFILE = """#!/bin/bash +#SBATCH -N=1 +#SBATCH -n=1 +#SBATCH --output=%s/slurm-output.txt +#SBATCH --error=%s/slurm-error.txt # Commands to run -echo $PBS_JOBID +echo $SLURM_JOBID %s """ @@ -42,11 +43,17 @@ def start(qiita_env, command, arguments): datestr = datetime.now().strftime("%Y%m%d_%I%M%S.%f") dirpath = mkdtemp(prefix=datestr, dir='/projects/qiita_data/working_dir/private-jobs/') - fp = join(dirpath, 'private.qsub') + fp = join(dirpath, 'private') with open(fp, 'w') as f: - f.write(PBSFILE % (dirpath, dirpath, "\n".join(lines))) - qsub_cmd = "qsub %s" % fp - proc = Popen(qsub_cmd, shell=True, stdout=PIPE, stderr=PIPE) + f.write(SBATCHFILE % (dirpath, dirpath, "\n".join(lines))) + + cmd = "sbatch %s" % fp + + epilogue = environ.get('QIITA_JOB_SCHEDULER_EPILOGUE', '') + if epilogue: + cmd = f'{cmd} --epilog {epilogue}' + + proc = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE) stdout, stderr = proc.communicate() if proc.returncode and proc.returncode != 0: raise ValueError( diff --git a/scripts/qiita-recover-jobs b/scripts/qiita-recover-jobs index fc81c56f7..75939ad7e 100644 --- a/scripts/qiita-recover-jobs +++ b/scripts/qiita-recover-jobs @@ -12,10 +12,10 @@ from qiita_db.sql_connection import TRN from qiita_db.processing_job import ProcessingJob import pandas as pd from time import sleep -from datetime import timedelta from math import ceil +from io import StringIO + -QIITA_QUEUE_LOG = '/home/qiita/qiita-queues-logs-DONT-DELETE.log' SLEEP_TIME = 6 CHANCES = 3 SQL = """SELECT processing_job_id @@ -23,7 +23,6 @@ SQL = """SELECT processing_job_id JOIN qiita.processing_job_status USING (processing_job_status_id) WHERE processing_job_status = %s""" -ARRAY_COMMANDS = set(['Woltka v0.1.1']) def _submit_jobs(jids_to_recover, recover_type): @@ -42,37 +41,17 @@ def _submit_jobs(jids_to_recover, recover_type): def _retrieve_queue_jobs(): - lines = check_output(["qstat", "-f"]).decode('ascii').split("\n") - - # looking for qiita jobs - # i-1: the line before is the job name, which is the internal qiita job id - job_names = [lines[i-1] for i, l in enumerate(lines) - if l.startswith(' Job_Owner = qiita')] - - qiita_jids = [] - for job in job_names: - # this should always be false but rather check - if 'Job_Name' not in job: - continue - # ignoring interactive jobs - if 'STDIN' in job: - continue - - # removing extra info - jid = job[15:].replace('merge-', '').replace('.txt', '') - qiita_jids.append(jid) + # getting all the jobs in the queues + all_jobs = pd.read_csv(StringIO( + check_output(['squeue', '-o', '%all']).decode('ascii')), sep='|') - return set(qiita_jids) + # just keeping the qiita jobs + jobs = all_jobs[all_jobs.GROUP == 'qiita'] + # ignore the merge-jobs and get unique values + qiita_jids = jobs.NAME.str.replace('merge-', '').unique() -def _count_jobs_in_scheduler(): - # first let's count all regular jobs - j1 = len(check_output(['qstat']).decode('ascii').split("\n")) - # now, let's count the jobs in job arrays - lines = check_output(['qstat', '-f']).decode('ascii').split("\n") - j2 = sum([int(x.split(' ')[-1].split(',')[-1].split('-')[-1]) - for x in lines if 'job_array_request' in x]) - return j1 + j2 + return set(qiita_jids) def _get_jids_to_recover(recover_type): @@ -84,57 +63,35 @@ def _get_jids_to_recover(recover_type): return jids_to_recover -def _parse_queue_values(d): - max_mem = 0 - max_pmem = 0 - max_vmem = 0 - max_wt = timedelta(hours=0, minutes=0, seconds=0) - d = d.split(',') - for dd in d: - if dd.startswith('mem'): - v = int(dd[4:-2]) - if v > max_mem: - max_mem = v - elif dd.startswith('pmem'): - v = int(dd[5:-2]) - if v > max_pmem: - max_pmem = v - elif dd.startswith('vmem'): - v = int(dd[5:-2]) - if v > max_mem: - max_mem = v - elif dd.startswith('walltime'): - v = map(int, dd[9:].split(':')) - v = timedelta(hours=v[0], minutes=v[1], seconds=v[2]) - if v > max_wt: - max_wt = v - return max_mem, max_pmem, max_vmem, max_wt - - def _qiita_queue_log_parse(jids_to_recover): - df = pd.read_csv(QIITA_QUEUE_LOG, sep='\t', - index_col=None, header=None, dtype=str, names=[ - 'bjid', 'user', 'group', 'jid', 'session', - 'resource-list', 'resource-used', 'queue', 'account', - 'exit-code', 'node']) - # remove the register and empty fields to avoid errors - df = df[(df.bjid != '0') & - (~df.bjid.isnull()) & - (~df.user.isnull()) & - (df.jid != 'register.txt')] - # generate the qiita job id - df['qjid'] = df.jid.apply(lambda x: x.split('.')[0]) - results = [] - for jid, ddf in df.groupby('qjid'): - if jid in jids_to_recover: - vals = [] - for _, r in ddf.iterrows(): - vals.append({ - 'exit-code': r['exit-code'], - 'resource-list': _parse_queue_values(r['resource-list']), - 'resource-used': _parse_queue_values(r['resource-used'])}) - results.append((ProcessingJob(jid), vals)) + for jid in jids_to_recover: + job = ProcessingJob(jid) + if job.external_id: + bvals = pd.read_csv(StringIO(check_output([ + 'sacct', '-p', + '--format=ExitCode,ReqMem,MaxRSS,CPUTimeRAW,TimelimitRaw', + '-j', f'{job.external_id}.batch']).decode( + 'ascii')), sep='|').iloc[0].to_dict() + vals = pd.read_csv(StringIO(check_output([ + 'sacct', '-p', + '--format=ExitCode,ReqMem,MaxRSS,CPUTimeRAW,TimelimitRaw', + '-j', f'{job.external_id}']).decode( + 'ascii')), sep='|').iloc[0].to_dict() + data = { + 'exit-code': bvals['ExitCode'], + 'mem-requested': bvals['ReqMem'], + 'time-requested': vals['TimelimitRaw'], + 'mem-used': bvals['MaxRSS'], + 'time-used': bvals['CPUTimeRAW']} + else: + data = { + 'exit-code': None, + 'mem-requested': None, + 'time-requested': None, + 'mem-used': None, + 'time-used': None} + results.append(job, data) return results diff --git a/setup.py b/setup.py index 3389ec2c0..ab49f7ffb 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ from setuptools import setup from glob import glob -__version__ = "2022.07" +__version__ = "2022.09" classes = """ @@ -108,7 +108,8 @@ 'networkx', 'humanize', 'wtforms<3.0.0', 'nltk', 'openpyxl', 'sphinx-bootstrap-theme', 'Sphinx<3.0', 'gitpython', 'redbiom', 'pyzmq', 'sphinx_rtd_theme', - 'paramiko', 'seaborn', 'matplotlib', 'scipy', 'nose', + 'paramiko<2.9', 'seaborn', 'matplotlib', 'scipy<1.8', + 'nose', 'flake8', 'six', 'qiita-files @ https://github.com/' 'qiita-spots/qiita-files/archive/master.zip', 'mock', 'python-jose', 'markdown2', 'iteration_utilities',