Skip to content

Commit

Permalink
WIP preparing to refactor flux cloud (#30)
Browse files Browse the repository at this point in the history
* preparing to refactor flux cloud

Signed-off-by: vsoch <[email protected]>
  • Loading branch information
vsoch authored Mar 20, 2023
1 parent 97f2777 commit 3af9240
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 47 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ and **Merged pull requests**. Critical items to know are:
The versions coincide with releases on pip. Only major versions will be released as tags on Github.

## [0.0.x](https://github.com/converged-computing/flux-cloud/tree/main) (0.0.x)
- changes to flux operator sdk (0.2.1)
- refactor flux submit and apply to use fluxoperator Python SDK (0.2.0)
- This reduces scripts in output folder, but is a good tradeoff for fewer errors
- remove "ui" command, flux-cloud is intended mostly for automation
Expand Down
35 changes: 11 additions & 24 deletions fluxcloud/main/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import uuid

from flux_restful_client.main import get_client
from fluxoperator.client import FluxOperator
from fluxoperator.client import FluxMiniCluster

import fluxcloud.utils as utils
from fluxcloud.logger import logger
Expand Down Expand Up @@ -80,7 +80,7 @@ def _create_minicluster(

try:
# The operator will time creation through pods being ready
result = operator.create_minicluster(**minicluster, container=job)
operator.create(**minicluster, container=job)
except Exception as e:
# Give the user the option to delete and recreate or just exit
logger.error(f"There was an issue creating the MiniCluster: {e}")
Expand All @@ -91,7 +91,7 @@ def _create_minicluster(
"Would you like to delete this mini cluster and re-create?"
):
logger.info("Cleaning up MiniCluster...")
operator.delete_minicluster(name=name, namespace=namespace)
operator.delete()
return self._create_minicluster(
operator, minicluster, experiment, job, interactive=interactive
)
Expand All @@ -109,16 +109,14 @@ def _create_minicluster(
# Save MiniCluster metadata
image_slug = re.sub("(:|/)", "-", image)
uid = f"{size}-{name}-{image_slug}"
experiment.save_json(result, f"minicluster-size-{uid}.json")
experiment.save_json(operator.metadata, f"minicluster-size-{uid}.json")

# This is a good point to also save nodes metadata
nodes = operator.get_nodes()
operator.wait_pods(quiet=True)
pods = operator.get_pods()

experiment.save_file(nodes.to_str(), f"nodes-{uid}.json")
experiment.save_file(pods.to_str(), f"pods-size-{uid}.json")
return result
return operator.metadata

def apply(
self,
Expand All @@ -132,34 +130,28 @@ def apply(
"""
Use the client to apply (1:1 job,minicluster) the jobs programatically.
"""
namespace = minicluster["namespace"]
name = minicluster["name"]

# Interact with the Flux Operator Python SDK
operator = FluxOperator(namespace)
operator = FluxMiniCluster()

self._create_minicluster(
operator, minicluster, experiment, job, interactive=interactive
)

# Get the broker pod (this would also wait for all pods to be ready)
broker = operator.get_broker_pod()

# Time from when broker pod (and all pods are ready)
start = time.time()

# Get the pod to stream output from directly
if outfile is not None:
operator.stream_output(outfile, pod=broker, stdout=stdout)
operator.stream_output(outfile, stdout=stdout)

# When output done streaming, job is done
end = time.time()
logger.info(f"Job {name} is complete! Cleaning up MiniCluster...")

# This also waits for termination (and pods to be gone) and times it
operator.delete_minicluster(name=name, namespace=namespace)

# TODO likely need to separate minicluster up/down times.
operator.delete()
results = {"times": operator.times}
results["times"][name] = end - start
return results
Expand All @@ -170,26 +162,21 @@ def submit(
"""
Use the client to submit the jobs programatically.
"""
namespace = minicluster["namespace"]
image = job["image"]
name = minicluster["name"]
size = minicluster["size"]

# Interact with the Flux Operator Python SDK
operator = FluxOperator(namespace)
operator = FluxMiniCluster()

self._create_minicluster(
operator, minicluster, experiment, job, interactive=interactive
)

# Get the broker pod (this would also wait for all pods to be ready)
broker = operator.get_broker_pod()

# Return results (and times) to calling client
results = {}

# Submit jobs via port forward - this waits until the server is ready
with operator.port_forward(broker) as forward_url:
with operator.port_forward() as forward_url:
print(f"Port forward opened to {forward_url}")

# See https://flux-framework.org/flux-restful-api/getting_started/api.html
Expand Down Expand Up @@ -268,7 +255,7 @@ def submit(
"Would you like to delete this mini cluster?"
):
logger.info("Cleaning up MiniCluster...")
operator.delete_minicluster(name=name, namespace=namespace)
operator.delete()

# Get times recorded by FluxOperator Python SDK
results["jobs"] = completed
Expand Down
4 changes: 2 additions & 2 deletions fluxcloud/version.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright 2022-2023 Lawrence Livermore National Security, LLC
# SPDX-License-Identifier: Apache-2.0

__version__ = "0.2.0"
__version__ = "0.2.1"
AUTHOR = "Vanessa Sochat"
EMAIL = "[email protected]"
NAME = "flux-cloud"
Expand All @@ -15,7 +15,7 @@

INSTALL_REQUIRES = (
("kubernetes", {"min_version": None}),
("fluxoperator", {"min_version": "0.0.12"}),
("fluxoperator", {"min_version": "0.0.19"}),
("ruamel.yaml", {"min_version": None}),
("jsonschema", {"min_version": None}),
("requests", {"min_version": None}),
Expand Down
4 changes: 2 additions & 2 deletions tests/lammps/experiments.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ minicluster:
jobs:
lmp-size-2:
command: lmp -v x 2 -v y 2 -v z 2 -in in.reaxc.hns -nocite
image: ghcr.io/rse-ops/lammps:flux-sched-focal-v0.24.0
image: ghcr.io/rse-ops/lammps:flux-sched-focal
size: 2
lmp-size-4:
command: lmp -v x 2 -v y 2 -v z 2 -in in.reaxc.hns -nocite
image: ghcr.io/rse-ops/lammps:flux-sched-focal-v0.24.0
image: ghcr.io/rse-ops/lammps:flux-sched-focal
size: 4
19 changes: 0 additions & 19 deletions tests/lammps/minicluster-template.yaml

This file was deleted.

0 comments on commit 3af9240

Please sign in to comment.