diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000..8b3d8a5 --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,142 @@ +name: metrics operator tag and release + +on: + workflow_dispatch: + inputs: + release_tag: + description: Custom release tag + type: string + required: true + +jobs: + build-arm: + runs-on: ubuntu-latest + name: make and build arm + steps: + - name: Checkout Repository + uses: actions/checkout@v3 + - name: Set tag + run: | + echo "Tag for release is ${{ inputs.release_tag }}" + echo "tag=${{ inputs.release_tag }}" >> ${GITHUB_ENV} + - uses: actions/setup-go@v3 + with: + go-version: ^1.20 + - name: GHCR Login + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Add custom buildx ARM builder + run: | + docker buildx create --name armbuilder + docker buildx use armbuilder + docker buildx inspect --bootstrap + + - name: Deploy Container + env: + tag: ${{ env.tag }} + run: make arm-deploy ARMIMG=ghcr.io/converged-computing/metrics-operator:${tag}-arm + + build: + runs-on: ubuntu-latest + strategy: + matrix: + command: [docker] + name: make and build ${{ matrix.command }} + steps: + - name: Checkout Repository + uses: actions/checkout@v3 + - uses: actions/setup-go@v3 + with: + go-version: ^1.20 + - name: Set tag + run: | + echo "Tag for release is ${{ inputs.release_tag }}" + echo "tag=${{ inputs.release_tag }}" >> ${GITHUB_ENV} + - name: GHCR Login + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build Container + env: + tag: ${{ env.tag }} + run: | + image=ghcr.io/converged-computing/metrics-operator-${{ matrix.command }}:v${tag} + img=ghcr.io/converged-computing/metrics-operator:v${tag} + make ${{ matrix.command }}-build BUNDLE_IMG=${image} IMG=${img} CATALOG_IMG=${image} + + - name: Deploy Container + env: + tag: ${{ env.tag }} + run: | + image=ghcr.io/converged-computing/metrics-operator-${{ matrix.command }}:v${tag} + img=ghcr.io/converged-computing/metrics-operator:v${tag} + make ${{ matrix.command }}-push BUNDLE_IMG=${image} IMG=${img} CATALOG_IMG=${image} + + release: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + - uses: actions/setup-go@v3 + with: + go-version: ^1.20 + - name: Set tag + run: | + echo "Tag for release is ${{ inputs.release_tag }}" + echo "tag=${{ inputs.release_tag }}" >> ${GITHUB_ENV} + - name: Install + run: conda create --quiet --name mo twine + - name: Install dependencies + run: | + export PATH="/usr/share/miniconda/bin:$PATH" + source activate mo + pip install setuptools wheel twine + - name: Build and publish + env: + TWINE_USERNAME: ${{ secrets.PYPI_USER }} + TWINE_PASSWORD: ${{ secrets.PYPI_PASS }} + tag: ${{ env.tag }} + run: | + export PATH="/usr/share/miniconda/bin:$PATH" + source activate mo + cd sdk/python/v1alpha1/ + pip install -e . + python setup.py sdist bdist_wheel + cd dist + wheelfile=$(ls metricsoperator-*.whl) + wheelfile=$(echo "$wheelfile" | sed "s/metricsoperator-//") + wheelfile=$(echo "$wheelfile" | sed "s/-py3-none-any.whl//") + echo "Release for Python is ${wheelfile}" + echo "Release for metrics operator is ${tag}" + cd ../ + if [[ "${wheelfile}" == "${tag}" ]]; then + echo "Versions are correct, publishing." + twine upload dist/* + else + echo "Versions are not correct, please fix and upload locally." + fi + + - name: Build release manifests + env: + tag: ${{ env.tag }} + run: | + make build-config-arm ARMIMG=ghcr.io/converged-computing/metrics-operator:${tag}-arm + make build-config IMG=ghcr.io/converged-computing/metrics-operator:v${tag} + - name: Release metrics Operator + uses: softprops/action-gh-release@v1 + with: + name: Metrics Operator Release v${{ env.tag }} + tag_name: ${{ env.tag }} + generate_release_notes: true + files: | + examples/dist/metrics-operator-arm.yaml + examples/dist/metrics-operator.yaml + env: + GITHUB_REPOSITORY: converged-computing/metrics-operator diff --git a/README.md b/README.md index facec62..1e1dede 100644 --- a/README.md +++ b/README.md @@ -5,18 +5,17 @@ Developing metrics and a catalog of applications to assess different kinds of Kubernetes performance. We likely will choose different metrics that are important for HPC. Note that I haven't started the operator yet because I'm [testing ideas for the design](hack/test). +To learn more: -View our ⭐️ [Documentation](https://converged-computing.github.io/metrics-operator/) ⭐️ +- ⭐️ [Documentation](https://converged-computing.github.io/metrics-operator/) ⭐️ +- 🐯️ [Python module](https://pypi.org/project/metricsoperator/0.0.0/) 🐯️ ## Dinosaur TODO - Find better logging library for logging outside of controller -- Python function to save entire spec to yaml (for MetricSet and JobSet) -- When first metric ready for use with Python (storage) do first releases -- We should have Python SDK with parsers for output (e.g., run metric, parse output meaningfully) -- Need a strategy for storing metrics output / logs +- For larger metric collections, we should have a log streaming mode (and not wait for Completed/Successful) - For services we are measuring, we likely need to be able to kill after N seconds (to complete job) or to specify the success policy on the metrics containers instead of the application -- TBA +- Python function to save entire spec to yaml (for MetricSet and JobSet)? - Metrics parsers to do (need to add separators, formatting, python parser): - perf-sysstat - netmark / osu-benchmark diff --git a/docs/_static/data/metrics.json b/docs/_static/data/metrics.json index 0a1682d..6b5f5dd 100644 --- a/docs/_static/data/metrics.json +++ b/docs/_static/data/metrics.json @@ -1,11 +1,4 @@ [ - { - "name": "network-osu-benchmark", - "description": "point to point MPI benchmarks", - "type": "standalone", - "image": "ghcr.io/converged-computing/metric-osu-benchmark:latest", - "url": "https://mvapich.cse.ohio-state.edu/benchmarks/" - }, { "name": "perf-sysstat", "description": "statistics for Linux tasks (processes) : I/O, CPU, memory, etc.", @@ -26,5 +19,12 @@ "type": "standalone", "image": "vanessa/netmark:latest", "url": "" + }, + { + "name": "network-osu-benchmark", + "description": "point to point MPI benchmarks", + "type": "standalone", + "image": "ghcr.io/converged-computing/metric-osu-benchmark:latest", + "url": "https://mvapich.cse.ohio-state.edu/benchmarks/" } ] \ No newline at end of file diff --git a/docs/development/developer-guide.md b/docs/development/developer-guide.md index c113207..b962a93 100644 --- a/docs/development/developer-guide.md +++ b/docs/development/developer-guide.md @@ -108,6 +108,13 @@ I run this before I push to a GitHub branch. $ make pre-push ``` +We also use pre-commit for Python formatting: + +```bash +pip install -r .github/dev-requirements.txt +pre-commit run --all-files +``` + ## Writing Metric Containers This section will include instructions for how to write a metrics container. diff --git a/docs/getting_started/user-guide.md b/docs/getting_started/user-guide.md index 38928f8..c5abb63 100644 --- a/docs/getting_started/user-guide.md +++ b/docs/getting_started/user-guide.md @@ -98,8 +98,7 @@ Let's first review how this works. 3. The metric output is printed in pod logs with a standard packaging (e.g., sections and headers) to distinguish output sections. 4. We provide a Python module [metricsoperator](https://pypi.org/project/metricsoperator/) that can help you run an experiment, applying the metrics.yaml and then retrieving and parsing logs. -For the last step, this is important because every metric tool is a special snowflake, outputting some custom format that is hard to parse and then plot. We hope to provide -an easy means to do this so you can go from data collection to results more quickly. Now let's review a suggested set of steps for you as a new user! You can: +For the last step, this is important because every metric tool is a special snowflake, outputting some custom format that is hard to parse and then plot. By providing a parser paired with each metric, we hope to provide an easy means to do this so you can go from data collection to results more quickly. Now let's review a suggested set of steps for you as a new user! You can: 1. First choose one or more [metrics](metrics.md), [request a metric be added](https://github.com/converged-computing/metrics-operator/issues), or start with a pre-created [examples](https://github.com/converged-computing/metrics-operator/tree/main/examples). Often if you want to measure an application or storage or "other" (e.g., networking) we already have a metrics.yaml and associated parser suited for your needs. 2. Run the metric directly from the metrics.yaml, or use the Python module [metricsoperator](https://pypi.org/project/metricsoperator/) to run and collect output. @@ -119,8 +118,8 @@ and experiments that you put together here for others to use. For all metric types, the following applies: 1. You can create more than one pod (scale the metric) as you see fit. -2. There is always a headless service provided for metrics within the JobSet to make use of -3. The definition of metrics in your metrics.yaml file is consistent across types +2. There is always a headless service provided for metrics within the JobSet to make use of. +3. The definition of metrics in your metrics.yaml file is consistent across types. 4. Each metric type in the list can take a rate, completions, and custom options. For another overview of these designs, please see the [developer docs](../development/index.md). diff --git a/docs/index.md b/docs/index.md index bf77d8e..5e9d61a 100644 --- a/docs/index.md +++ b/docs/index.md @@ -7,12 +7,12 @@ that you can install to your cluster to easily measure different aspects of perf - I/O metrics to assess storage options - system performance metrics (memory, cpu, etc.) - - timings + - network or other custom metrics / timings -For this project, we aim to provide both of the following: +For this project, we aim to provide the following: 1. A catalog of pre-defined metrics and associated containers you can quickly use -2. A number of containerized HPC applications to demonstrate using the operator. +2. A number of containerized HPC applications and experiments to demonstrate using the operator. We wanted to create this operator because we didn't have a solid understanding of our application performance (typically from the high performance computing space) diff --git a/sdk/python/v1alpha1/.gitignore b/sdk/python/v1alpha1/.gitignore index e2b2ffd..6b87408 100644 --- a/sdk/python/v1alpha1/.gitignore +++ b/sdk/python/v1alpha1/.gitignore @@ -2,3 +2,4 @@ metricsoperator.egg-info build __pycache__ .eggs +dist diff --git a/sdk/python/v1alpha1/CHANGELOG.md b/sdk/python/v1alpha1/CHANGELOG.md new file mode 100644 index 0000000..95cc208 --- /dev/null +++ b/sdk/python/v1alpha1/CHANGELOG.md @@ -0,0 +1,18 @@ +# CHANGELOG + +This is a manually generated log to track changes to the repository for each release. +Each section should include general headers such as **Implemented enhancements** +and **Merged pull requests**. Critical items to know are: + + - renamed commands + - deprecated / removed commands + - changed defaults + - backward incompatible changes (recipe file format? image file format?) + - migration guidance (how to convert images?) + - changed behaviour (recipe sections work differently) + +The versions coincide with releases on pip. Only major versions will be released as tags on Github. + +## [0.0.x](https://github.com/converged-computing/metrics-operator/tree/main) (0.0.x) + - First release with support for parsing io-sysstat output (0.0.1) + - Skeleton release (0.0.0) diff --git a/sdk/python/v1alpha1/README.md b/sdk/python/v1alpha1/README.md index 71258cb..71b00ec 100644 --- a/sdk/python/v1alpha1/README.md +++ b/sdk/python/v1alpha1/README.md @@ -1,3 +1,22 @@ # Metrics Operator Python > Helpers for submitting CRD with Python and parsing logs. + +![https://raw.githubusercontent.com/converged-computing/metrics-operator/main/docs/images/metrics-operator-banner.png](https://raw.githubusercontent.com/converged-computing/metrics-operator/main/docs/images/metrics-operator-banner.png) + +Welcome to the Metrics Operator Python! You can learn more about the operator +at the ⭐️ [Documentation](https://converged-computing.github.io/metrics-operator/) ⭐️ +and see our [python examples](https://github.com/converged-computing/metrics-operator/tree/main/examples/python/) for examples of using this module. + +## License + +HPCIC DevTools is distributed under the terms of the MIT license. +All new contributions must be made under this license. + +See [LICENSE](https://github.com/converged-computing/cloud-select/blob/main/LICENSE), +[COPYRIGHT](https://github.com/converged-computing/cloud-select/blob/main/COPYRIGHT), and +[NOTICE](https://github.com/converged-computing/cloud-select/blob/main/NOTICE) for details. + +SPDX-License-Identifier: (MIT) + +LLNL-CODE- 842614 diff --git a/sdk/python/v1alpha1/setup.py b/sdk/python/v1alpha1/setup.py index 97e2839..642aeb5 100644 --- a/sdk/python/v1alpha1/setup.py +++ b/sdk/python/v1alpha1/setup.py @@ -30,7 +30,7 @@ if __name__ == "__main__": setup( name="metricsoperator", - version="0.0.0", + version="0.0.1", author="Vanessasaurus", author_email="vsoch@users.noreply.github.com", maintainer="Vanessasaurus",