Remove rate completions (#44)

* remove rate/completions from hard coding into base metric Signed-off-by: vsoch <[email protected]>
converged-computing · Aug 16, 2023 · 69a8c03 · 69a8c03
1 parent 609cbf6
commit 69a8c03
Show file tree

Hide file tree

Showing 29 changed files with 136 additions and 165 deletions.
diff --git a/api/v1alpha1/metric_types.go b/api/v1alpha1/metric_types.go
@@ -230,13 +230,6 @@ type Volume struct {
 type Metric struct {
 	Name string `json:"name"`
 
-	// Global attributes shared by all metrics
-	// Sampling rate in seconds. Defaults to every 10 seconds
-	// +kubebuilder:default=10
-	// +default=10
-	// +optional
-	Rate int32 `json:"rate"`
-
 	// Metric Options
 	// Metric specific options
 	// +optional
@@ -251,12 +244,6 @@ type Metric struct {
 	// +optional
 	MapOptions map[string]map[string]intstr.IntOrString `json:"mapOptions"`
 
-	// Completions
-	// Number of completions to do, more relevant for service type applications
-	// that run forever, or a storage metric. If not set (0) then don't set a limit
-	// +optional
-	Completions int32 `json:"completions"`
-
 	// Container Spec has attributes for the container
 	//+optional
 	Attributes ContainerSpec `json:"attributes"`
@@ -328,10 +315,8 @@ func (m *MetricSet) Validate() bool {
 		fmt.Printf("😥️ One or more metrics are required.\n")
 		return false
 	}
-
-	// Storage or an application can have completions (replicas)
 	if m.Spec.Pods < 1 {
-		fmt.Printf("😥️ Completions must be >= 1.")
+		fmt.Printf("😥️ Pods must be >= 1.")
 		return false
 	}
 
@@ -363,12 +348,6 @@ func (m *MetricSet) Validate() bool {
 	if m.Spec.Completions == 0 {
 		m.Spec.Completions = m.Spec.Pods
 	}
-	// Validation for each metric
-	for _, metric := range m.Spec.Metrics {
-		if metric.Rate <= 0 {
-			metric.Rate = 10
-		}
-	}
 
 	// A standalone metric by definition runs alone
 	if len(m.Spec.Metrics) > 1 && m.IsStandalone() {

diff --git a/config/crd/bases/flux-framework.org_metricsets.yaml b/config/crd/bases/flux-framework.org_metricsets.yaml
@@ -168,12 +168,6 @@ spec:
                           - privileged
                           type: object
                       type: object
-                    completions:
-                      description: Completions Number of completions to do, more relevant
-                        for service type applications that run forever, or a storage
-                        metric. If not set (0) then don't set a limit
-                      format: int32
-                      type: integer
                     listOptions:
                       additionalProperties:
                         items:
@@ -204,12 +198,6 @@ spec:
                         x-kubernetes-int-or-string: true
                       description: Metric Options Metric specific options
                       type: object
-                    rate:
-                      default: 10
-                      description: Global attributes shared by all metrics Sampling
-                        rate in seconds. Defaults to every 10 seconds
-                      format: int32
-                      type: integer
                     resources:
                       description: Resources include limits and requests for the metric
                         container

diff --git a/docs/_static/data/metrics.json b/docs/_static/data/metrics.json
@@ -2,15 +2,15 @@
  {
   "name": "app-amg",
   "description": "parallel algebraic multigrid solver for linear systems arising from problems on unstructured grids",
-  "family": "simulation",
+  "family": "solver",
   "type": "standalone",
   "image": "ghcr.io/converged-computing/metric-amg:latest",
   "url": "https://github.com/LLNL/AMG"
  },
  {
   "name": "app-kripke",
   "description": "parallel algebraic multigrid solver for linear systems arising from problems on unstructured grids",
-  "family": "simulation",
+  "family": "solver",
   "type": "standalone",
   "image": "ghcr.io/converged-computing/metric-kripke:latest",
   "url": "https://github.com/LLNL/Kripke"

diff --git a/docs/_static/data/table.html b/docs/_static/data/table.html
@@ -459,6 +459,9 @@
       if(data.family == 'storage'){
         $(row).find('td:eq(2)').css('background-color', 'lavender');
       }
+      if(data.family == 'solver'){
+        $(row).find('td:eq(2)').css('background-color', 'lightgreen');
+      }
       if(data.family == 'performance'){
         $(row).find('td:eq(2)').css('background-color', '#f79fb7');
       }

diff --git a/docs/getting_started/metrics.md b/docs/getting_started/metrics.md
@@ -11,14 +11,8 @@ Each of the above is a metric design, which is primarily represented in the Metr
 there are different families of metrics (e.g., storage, network, performance, simulation) shown in the table below as the "Family" column. 
 We likely will tweak and improve upon these categories.
 
-<iframe src="../_static/data/table.html" style="width:100%; height:800px;" frameBorder="0"></iframe>
+<iframe src="../_static/data/table.html" style="width:100%; height:850px;" frameBorder="0"></iframe>
 
-All metrics can be customized with the following variables
-
-|Name | Description | Type | Default |
-|-----|-------------|------------|------|
-| completions | Number of times to run metric | int32 | unset (runs for lifetime of application or indefinitely) |
-| rate | Seconds to pause between measurements | int32 | 10 |
 
 ## Implemented Metrics
 
@@ -43,6 +37,8 @@ This metric provides the "pidstat" executable of the sysstat library. The follow
 | color | Set to turn on color parsing | Anything set | unset |
 | pids | For debugging, show consistent output of ps aux | Anything set | unset |
 | threads | add `-t` to each pidstat command to indicate wanting thread-level output | unset |
+| completions | Number of times to run metric | int32 | unset (runs for lifetime of application or indefinitely) |
+| rate | Seconds to pause between measurements | int32 | 10 |
 
 By default color and pids are set to false anticipating log parsing.
 And we also provide the option to see "commands" or specific commands based on a job index to the metric.
@@ -51,11 +47,14 @@ and the rest (workers).
 
 ```yaml
 - name: perf-sysstat
-  rate: 2
   options:
     pids: "true"
 
-  # Look for pids based on commands matched to index
+  # Custom options
+  options:
+    rate: 2
+
+# Look for pids based on commands matched to index
   mapOptions:
     commands:
        # First set all to use the worker command, but give the lead broker a special command
@@ -72,7 +71,7 @@ for how we use them.  If there is an option or command that is not exposed that
 
 These metrics are intended to assess storage volumes.
 
-#### io-sfio
+#### io-fio
 
  - [Storage Metric Set](user-guide.md#application-metric-set)
  - *[io-host-volume](https://github.com/converged-computing/metrics-operator/tree/main/examples/storage/google/io-fusion)*
@@ -101,6 +100,8 @@ This is the "iostat" executable of the sysstat library.
 |Name | Description | Type | Default |
 |-----|-------------|------------|------|
 | human | Show tabular, human-readable output inside of json | string "true" or "false" | "false" |
+| completions | Number of times to run metric | int32 | unset (runs for lifetime of application or indefinitely) |
+| rate | Seconds to pause between measurements | int32 | 10 |
 
 This is good for mounted storage that can be seen by the operating system, but may not work for something like NFS.
 

diff --git a/examples/dist/metrics-operator-arm.yaml b/examples/dist/metrics-operator-arm.yaml
@@ -160,10 +160,6 @@ spec:
                           - privileged
                           type: object
                       type: object
-                    completions:
-                      description: Completions Number of completions to do, more relevant for service type applications that run forever, or a storage metric. If not set (0) then don't set a limit
-                      format: int32
-                      type: integer
                     listOptions:
                       additionalProperties:
                         items:
@@ -194,11 +190,6 @@ spec:
                         x-kubernetes-int-or-string: true
                       description: Metric Options Metric specific options
                       type: object
-                    rate:
-                      default: 10
-                      description: Global attributes shared by all metrics Sampling rate in seconds. Defaults to every 10 seconds
-                      format: int32
-                      type: integer
                     resources:
                       description: Resources include limits and requests for the metric container
                       properties:

diff --git a/examples/dist/metrics-operator.yaml b/examples/dist/metrics-operator.yaml
@@ -160,10 +160,6 @@ spec:
                           - privileged
                           type: object
                       type: object
-                    completions:
-                      description: Completions Number of completions to do, more relevant for service type applications that run forever, or a storage metric. If not set (0) then don't set a limit
-                      format: int32
-                      type: integer
                     listOptions:
                       additionalProperties:
                         items:
@@ -194,11 +190,6 @@ spec:
                         x-kubernetes-int-or-string: true
                       description: Metric Options Metric specific options
                       type: object
-                    rate:
-                      default: 10
-                      description: Global attributes shared by all metrics Sampling rate in seconds. Defaults to every 10 seconds
-                      format: int32
-                      type: integer
                     resources:
                       description: Resources include limits and requests for the metric container
                       properties:

diff --git a/examples/tests/app-amg/README.md b/examples/tests/app-amg/README.md
@@ -57,7 +57,7 @@ and then AMG running a test, and the log is printed to the console.
 kubectl logs metricset-sample-l-0-0-lt782 -f
 ```
 ```console
-METADATA START {"pods":2,"completions":2,"metricName":"app-amg","metricDescription":"parallel algebraic multigrid solver for linear systems arising from problems on unstructured grids","metricType":"standalone","metricOptions":{"command":"amg","completions":0,"mpirun":"mpirun --hostfile ./hostlist.txt","rate":10,"workdir":"/opt/AMG"}}
+METADATA START {"pods":2,"completions":2,"metricName":"app-amg","metricDescription":"parallel algebraic multigrid solver for linear systems arising from problems on unstructured grids","metricType":"standalone","metricOptions":{"command":"amg","mpirun":"mpirun --hostfile ./hostlist.txt","workdir":"/opt/AMG"}}
 METADATA END
 Sleeping for 10 seconds waiting for network...
 METRICS OPERATOR COLLECTION START
@@ -129,7 +129,7 @@ find sections of data. Also note that the worker will only be alive long enough
 finish, and once it does, the worker goes away! Here is what you'll see in its brief life:
 
 ```console
-METADATA START {"pods":2,"completions":2,"metricName":"app-amg","metricDescription":"parallel algebraic multigrid solver for linear systems arising from problems on unstructured grids","metricType":"standalone","metricOptions":{"command":"amg","completions":0,"mpirun":"mpirun --hostfile ./hostlist.txt","rate":10,"workdir":"/opt/AMG"}}
+METADATA START {"pods":2,"completions":2,"metricName":"app-amg","metricDescription":"parallel algebraic multigrid solver for linear systems arising from problems on unstructured grids","metricType":"standalone","metricOptions":{"command":"amg","mpirun":"mpirun --hostfile ./hostlist.txt","workdir":"/opt/AMG"}}
 METADATA END
 Sleeping for 10 seconds waiting for network...
 METRICS OPERATOR COLLECTION START

diff --git a/examples/tests/app-kripke/README.md b/examples/tests/app-kripke/README.md
@@ -56,7 +56,7 @@ and then AMG running a test, and the log is printed to the console.
 kubectl logs metricset-sample-l-0-0-lt782 -f
 ```
 ```console
-METADATA START {"pods":2,"completions":2,"metricName":"app-kripke","metricDescription":"parallel algebraic multigrid solver for linear systems arising from problems on unstructured grids","metricType":"standalone","metricOptions":{"command":"kripke","completions":0,"mpirun":"","rate":10,"workdir":"/opt/kripke"}}
+METADATA START {"pods":2,"completions":2,"metricName":"app-kripke","metricDescription":"parallel algebraic multigrid solver for linear systems arising from problems on unstructured grids","metricType":"standalone","metricOptions":{"command":"kripke","completions":0,"mpirun":"","workdir":"/opt/kripke"}}
 METADATA END
 /metrics_operator/kripke-launcher.sh: line 7: cd: /opt/kripke: No such file or directory
 Sleeping for 10 seconds waiting for network...

diff --git a/examples/tests/app-lammps/README.md b/examples/tests/app-lammps/README.md
@@ -52,7 +52,7 @@ and then LAMMPS running, and the log is printed to the console.
 kubectl logs metricset-sample-l-0-0-lt782 -f
 ```
 ```console
-METADATA START {"pods":2,"completions":2,"metricName":"app-lammps","metricDescription":"LAMMPS molecular dynamic simulation","metricType":"standalone","metricOptions":{"command":"mpirun --hostfile ./hostlist.txt -np 2 --map-by socket lmp -v x 2 -v y 2 -v z 2 -in in.reaxc.hns -nocite","completions":0,"rate":10,"workdir":"/opt/lammps/examples/reaxff/HNS"}}
+METADATA START {"pods":2,"completions":2,"metricName":"app-lammps","metricDescription":"LAMMPS molecular dynamic simulation","metricType":"standalone","metricOptions":{"command":"mpirun --hostfile ./hostlist.txt -np 2 --map-by socket lmp -v x 2 -v y 2 -v z 2 -in in.reaxc.hns -nocite","workdir":"/opt/lammps/examples/reaxff/HNS"}}
 METADATA END
 Sleeping for 10 seconds waiting for network...
 METRICS OPERATOR COLLECTION START
@@ -145,7 +145,7 @@ find sections of data. Also note that the worker will only be alive long enough
 finish, and once it does, the worker goes away! Here is what you'll see in its brief life:
 
 ```console
-METADATA START {"pods":2,"completions":2,"metricName":"app-lammps","metricDescription":"LAMMPS molecular dynamic simulation","metricType":"standalone","metricOptions":{"command":"mpirun --hostfile ./hostlist.txt -np 2 --map-by socket lmp -v x 2 -v y 2 -v z 2 -in in.reaxc.hns -nocite","completions":0,"rate":10,"workdir":"/opt/lammps/examples/reaxff/HNS"}}
+METADATA START {"pods":2,"completions":2,"metricName":"app-lammps","metricDescription":"LAMMPS molecular dynamic simulation","metricType":"standalone","metricOptions":{"command":"mpirun --hostfile ./hostlist.txt -np 2 --map-by socket lmp -v x 2 -v y 2 -v z 2 -in in.reaxc.hns -nocite","completions":0,"workdir":"/opt/lammps/examples/reaxff/HNS"}}
 METADATA END
 Sleeping for 10 seconds waiting for network...
 METRICS OPERATOR COLLECTION START

diff --git a/examples/tests/io-fio/README.md b/examples/tests/io-fio/README.md
@@ -57,7 +57,7 @@ And see the fio result!
 
 ```console
 $ kubectl logs metricset-sample-m-0-4x56g 
-METADATA START {"pods":1,"completions":1,"storageVolumePath":"/workflow","storageVolumeHostPath":"/tmp/workflow","metricName":"io-fio","metricDescription":"Flexible IO Tester (FIO)","metricType":"storage","metricOptions":{"blocksize":"4k","completions":0,"directory":"/tmp","iodepth":64,"rate":10,"size":"4G","testname":"test"}}
+METADATA START {"pods":1,"completions":1,"storageVolumePath":"/workflow","storageVolumeHostPath":"/tmp/workflow","metricName":"io-fio","metricDescription":"Flexible IO Tester (FIO)","metricType":"storage","metricOptions":{"blocksize":"4k","completions":0,"directory":"/tmp","iodepth":64,"size":"4G","testname":"test"}}
 METADATA END
 FIO COMMAND START
 fio --randrepeat=1 --ioengine=libaio --direct=1 --gtod_reduce=1 --name=test --bs=4k --iodepth=64 --readwrite=randrw --rwmixread=75 --size=4G --filename=/tmp/test-b273108fb88ca182ac07dad8b6fe4e61 --output-format=json

diff --git a/examples/tests/io-host-volume/README.md b/examples/tests/io-host-volume/README.md
@@ -59,7 +59,7 @@ added by the Metrics operator for easy parsing by the metricsoperator Python mod
 
 ```console
 root
-METADATA START {"pods":2,"completions":2,"metricName":"network-netmark","metricDescription":"point to point networking tool","metricType":"standalone","metricOptions":{"completions":0,"messageSize":0,"rate":10,"sendReceiveCycles":20,"storeEachTrial":"true","tasks":2,"trials":20,"warmups":10}}
+METADATA START {"pods":2,"completions":2,"metricName":"network-netmark","metricDescription":"point to point networking tool","metricType":"standalone","metricOptions":{"messageSize":0,"rate":10,"sendReceiveCycles":20,"storeEachTrial":"true","tasks":2,"trials":20,"warmups":10}}
 METADATA END
 Sleeping for 10 seconds waiting for network...
 METRICS OPERATOR COLLECTION START
@@ -72,7 +72,7 @@ Error from server (BadRequest): container "io-sysstat" in pod "metricset-sample-
 (env) (base) vanessa@vanessa-ThinkPad-T490s:~/Desktop/Code/metrics-operator$ kubectl logs metricset-sample-m-0-tr58z -f
 Error from server (BadRequest): container "io-sysstat" in pod "metricset-sample-m-0-tr58z" is waiting to start: ContainerCreating
 (env) (base) vanessa@vanessa-ThinkPad-T490s:~/Desktop/Code/metrics-operator$ kubectl logs metricset-sample-m-0-tr58z -f
-METADATA START {"pods":1,"completions":1,"storageVolumePath":"/workflow","storageVolumeHostPath":"/tmp/workflow","metricName":"io-sysstat","metricDescription":"statistics for Linux tasks (processes) : I/O, CPU, memory, etc.","metricType":"storage","metricOptions":{"completions":2,"human":"false","rate":10}}
+METADATA START {"pods":1,"completions":1,"storageVolumePath":"/workflow","storageVolumeHostPath":"/tmp/workflow","metricName":"io-sysstat","metricDescription":"statistics for Linux tasks (processes) : I/O, CPU, memory, etc.","metricType":"storage","metricOptions":{"completions":2,"human":"false"}}
 METADATA END
 METRICS OPERATOR COLLECTION START
 METRICS OPERATOR TIMEPOINT
@@ -273,7 +273,7 @@ You'll see a more tabular format:
 <summary>Output with options->human set to "true"</summary>
 
 ```console
-METADATA START {"pods":1,"completions":1,"storageVolumePath":"/workflow","storageVolumeHostPath":"/tmp/workflow","metricName":"io-sysstat","metricDescription":"statistics for Linux tasks (processes) : I/O, CPU, memory, etc.","metricType":"storage","metricOptions":{"completions":2,"human":"true","rate":10}}
+METADATA START {"pods":1,"completions":1,"storageVolumePath":"/workflow","storageVolumeHostPath":"/tmp/workflow","metricName":"io-sysstat","metricDescription":"statistics for Linux tasks (processes) : I/O, CPU, memory, etc.","metricType":"storage","metricOptions":{"completions":2,"human":"true"}}
 METADATA END
 METRICS OPERATOR COLLECTION START
 ...

diff --git a/examples/tests/io-host-volume/metrics.yaml b/examples/tests/io-host-volume/metrics.yaml
@@ -16,10 +16,9 @@ spec:
 
   metrics:
     - name: io-sysstat
-      rate: 10
-      completions: 2
-
-      # Add human readable output (in a table instead of json)
-      # options:
-      #  human: "true"
+      options:
+        rate: 10
+        completions: 2
+        # Add human readable output (in a table instead of json)
+        # human: "true"
 
diff --git a/examples/tests/network-netmark/README.md b/examples/tests/network-netmark/README.md
@@ -58,12 +58,12 @@ kubectl logs metricset-sample-n-0-0-lt782 -f
 ```
 ```console
 root
-METADATA START {"pods":2,"completions":2,"metricName":"network-netmark","metricDescription":"point to point networking tool","metricType":"standalone","metricOptions":{"completions":0,"messageSize":0,"rate":10,"sendReceiveCycles":20,"storeEachTrial":"true","tasks":2,"trials":20,"warmups":10}}
+METADATA START {"pods":2,"completions":2,"metricName":"network-netmark","metricDescription":"point to point networking tool","metricType":"standalone","metricOptions":{"messageSize":0,"sendReceiveCycles":20,"storeEachTrial":"true","tasks":2,"trials":20,"warmups":10}}
 METADATA END
 Sleeping for 10 seconds waiting for network...
 (env) (base) vanessa@vanessa-ThinkPad-T490s:~/Desktop/Code/metrics-operator$ kubectl logs metricset-sample-n-0-0-82jz4 -f
 root
-METADATA START {"pods":2,"completions":2,"metricName":"network-netmark","metricDescription":"point to point networking tool","metricType":"standalone","metricOptions":{"completions":0,"messageSize":0,"rate":10,"sendReceiveCycles":20,"storeEachTrial":"true","tasks":2,"trials":20,"warmups":10}}
+METADATA START {"pods":2,"metricName":"network-netmark","metricDescription":"point to point networking tool","metricType":"standalone","metricOptions":{"messageSize":0,"sendReceiveCycles":20,"storeEachTrial":"true","tasks":2,"trials":20,"warmups":10}}
 METADATA END
 Sleeping for 10 seconds waiting for network...
 METRICS OPERATOR COLLECTION START
@@ -195,7 +195,7 @@ finish, and once it does, the worker goes away! Here is what you'll see in its b
 
 ```console
 root
-METADATA START {"pods":2,"completions":2,"metricName":"network-netmark","metricDescription":"point to point networking tool","metricType":"standalone","metricOptions":{"completions":0,"messageSize":0,"rate":10,"sendReceiveCycles":20,"storeEachTrial":"true","tasks":2,"trials":20,"warmups":10}}
+METADATA START {"pods":2,"completions":2,"metricName":"network-netmark","metricDescription":"point to point networking tool","metricType":"standalone","metricOptions":{"messageSize":0,"sendReceiveCycles":20,"storeEachTrial":"true","tasks":2,"trials":20,"warmups":10}}
 METADATA END
 Sleeping for 10 seconds waiting for network...
 METRICS OPERATOR COLLECTION START