From 9de072ff219d8272da95a3be838ac73a5175acf1 Mon Sep 17 00:00:00 2001 From: Vanessasaurus <814322+vsoch@users.noreply.github.com> Date: Fri, 20 Jan 2023 20:07:31 -0800 Subject: [PATCH] testing waiting for the minicluster to be cleaned up before submittng another (#18) * testing waiting for the minicluster to be cleaned up before submitting another Signed-off-by: vsoch --- CHANGELOG.md | 1 + docs/getting_started/commands.md | 3 +++ .../clouds/shared/scripts/minicluster-run | 17 +++++++++++++++ fluxcloud/version.py | 2 +- ...uster-run-lmp-size-2-minicluster-size-2.sh | 21 +++++++++++++++++-- ...uster-run-lmp-size-4-minicluster-size-4.sh | 21 +++++++++++++++++-- 6 files changed, 60 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c0c71e5..7f0bf30 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and **Merged pull requests**. Critical items to know are: The versions coincide with releases on pip. Only major versions will be released as tags on Github. ## [0.0.x](https://github.com/converged-computing/flux-cloud/tree/main) (0.0.x) + - wait until pods terminated and removed between applies (0.1.13) - add support for custom placement group name (0.1.12) - experiment class to support better template rendering - scripts are generated from templates (jinja2) without getopt diff --git a/docs/getting_started/commands.md b/docs/getting_started/commands.md index 812fed7..0c5ca71 100644 --- a/docs/getting_started/commands.md +++ b/docs/getting_started/commands.md @@ -138,6 +138,9 @@ To force overwrite of existing results (by default they are skipped) $ flux-cloud apply -e n1-standard-1-2 --force ``` +Note that by default, we always wait for a previous run to be cleaned up +before continuing. + ## down And then bring down your first (or named) cluster: diff --git a/fluxcloud/main/clouds/shared/scripts/minicluster-run b/fluxcloud/main/clouds/shared/scripts/minicluster-run index 367bdec..31a081b 100755 --- a/fluxcloud/main/clouds/shared/scripts/minicluster-run +++ b/fluxcloud/main/clouds/shared/scripts/minicluster-run @@ -18,6 +18,23 @@ print_magenta "logfile : ${LOGFILE}" is_installed kubectl +# Ensure we wait for the space to be cleaned up +echo +podsCleaned="false" +print_blue "Waiting for previous pods to be cleaned up..." +while [[ "${podsCleaned}" == "false" ]]; do + echo -n "." + sleep 2 + state=$(kubectl get pods --namespace ${NAMESPACE} 2>&1) + lines=$(echo $state | wc -l) + if [[ "${lines}" == "1" ]] && [[ "${state}" == *"No resources found in"* ]]; then + echo + print_green "🌀️ Previous pods are cleaned up." + podsCleaned="true" + break + fi +done + # Create the namespace (ok if already exists) run_echo_allow_fail kubectl create namespace ${NAMESPACE} diff --git a/fluxcloud/version.py b/fluxcloud/version.py index 07c65da..05b29d7 100644 --- a/fluxcloud/version.py +++ b/fluxcloud/version.py @@ -1,7 +1,7 @@ # Copyright 2022-2023 Lawrence Livermore National Security, LLC # SPDX-License-Identifier: Apache-2.0 -__version__ = "0.1.12" +__version__ = "0.1.13" AUTHOR = "Vanessa Sochat" EMAIL = "vsoch@users.noreply.github.com" NAME = "flux-cloud" diff --git a/tests/lammps/data/k8s-size-4-local/.scripts/minicluster-run-lmp-size-2-minicluster-size-2.sh b/tests/lammps/data/k8s-size-4-local/.scripts/minicluster-run-lmp-size-2-minicluster-size-2.sh index 81e6f1b..0e3fe98 100755 --- a/tests/lammps/data/k8s-size-4-local/.scripts/minicluster-run-lmp-size-2-minicluster-size-2.sh +++ b/tests/lammps/data/k8s-size-4-local/.scripts/minicluster-run-lmp-size-2-minicluster-size-2.sh @@ -133,9 +133,9 @@ function with_exponential_backoff { } NAMESPACE="flux-operator" -CRD="/home/vanessa/Desktop/Code/flux/flux-cloud/tests/lammps/data/k8s-size-4-local/.scripts/minicluster.yaml" +CRD="/tmp/lammps-data-WpiC0E/k8s-size-4-local/.scripts/minicluster.yaml" JOB="lammps" -LOGFILE="/home/vanessa/Desktop/Code/flux/flux-cloud/tests/lammps/data/k8s-size-4-local/lmp-size-2-minicluster-size-2/log.out" +LOGFILE="/tmp/lammps-data-WpiC0E/k8s-size-4-local/lmp-size-2-minicluster-size-2/log.out" print_magenta " apply : ${CRD}" print_magenta " job : ${JOB}" @@ -143,6 +143,23 @@ print_magenta "logfile : ${LOGFILE}" is_installed kubectl +# Ensure we wait for the space to be cleaned up +echo +podsCleaned="false" +print_blue "Waiting for previous pods to be cleaned up..." +while [[ "${podsCleaned}" == "false" ]]; do + echo -n "." + sleep 2 + state=$(kubectl get pods --namespace ${NAMESPACE} 2>&1) + lines=$(echo $state | wc -l) + if [[ "${lines}" == "1" ]] && [[ "${state}" == *"No resources found in"* ]]; then + echo + print_green "🌀️ Previous pods are cleaned up." + podsCleaned="true" + break + fi +done + # Create the namespace (ok if already exists) run_echo_allow_fail kubectl create namespace ${NAMESPACE} diff --git a/tests/lammps/data/k8s-size-4-local/.scripts/minicluster-run-lmp-size-4-minicluster-size-4.sh b/tests/lammps/data/k8s-size-4-local/.scripts/minicluster-run-lmp-size-4-minicluster-size-4.sh index 4ca150d..9d7912f 100755 --- a/tests/lammps/data/k8s-size-4-local/.scripts/minicluster-run-lmp-size-4-minicluster-size-4.sh +++ b/tests/lammps/data/k8s-size-4-local/.scripts/minicluster-run-lmp-size-4-minicluster-size-4.sh @@ -133,9 +133,9 @@ function with_exponential_backoff { } NAMESPACE="flux-operator" -CRD="/home/vanessa/Desktop/Code/flux/flux-cloud/tests/lammps/data/k8s-size-4-local/.scripts/minicluster.yaml" +CRD="/tmp/lammps-data-WpiC0E/k8s-size-4-local/.scripts/minicluster.yaml" JOB="lammps" -LOGFILE="/home/vanessa/Desktop/Code/flux/flux-cloud/tests/lammps/data/k8s-size-4-local/lmp-size-4-minicluster-size-4/log.out" +LOGFILE="/tmp/lammps-data-WpiC0E/k8s-size-4-local/lmp-size-4-minicluster-size-4/log.out" print_magenta " apply : ${CRD}" print_magenta " job : ${JOB}" @@ -143,6 +143,23 @@ print_magenta "logfile : ${LOGFILE}" is_installed kubectl +# Ensure we wait for the space to be cleaned up +echo +podsCleaned="false" +print_blue "Waiting for previous pods to be cleaned up..." +while [[ "${podsCleaned}" == "false" ]]; do + echo -n "." + sleep 2 + state=$(kubectl get pods --namespace ${NAMESPACE} 2>&1) + lines=$(echo $state | wc -l) + if [[ "${lines}" == "1" ]] && [[ "${state}" == *"No resources found in"* ]]; then + echo + print_green "🌀️ Previous pods are cleaned up." + podsCleaned="true" + break + fi +done + # Create the namespace (ok if already exists) run_echo_allow_fail kubectl create namespace ${NAMESPACE}