forked from bacalhau-project/bacalhau
-
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: implement local Telemetry suite (bacalhau-project#3302)
- closes bacalhau-project#3301 --------- Co-authored-by: frrist <[email protected]>
- Loading branch information
Showing
8 changed files
with
341 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# Usage | ||
**Start containers:** | ||
```shell | ||
docker-compose up | ||
``` | ||
**Export collection endpoint for bacalhau** | ||
```shell | ||
export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 | ||
``` | ||
**Start Bacalhau** | ||
```shell | ||
bacalhau serve --node-type=compute,requester | ||
``` | ||
**Open Browser** | ||
- Grafana: http://localhost:3000 | ||
- Username: `admin` | ||
- Password: `admin` | ||
- Jaeger: http://localhost:16686 | ||
|
||
**Clean up** | ||
- Remove volumes associated with containers to reset state. | ||
|
||
**Saving Changes to a Grafana Dashboard** | ||
- export dashboard data from grafana as json | ||
- save it to file ./grafana/provisioning/dashboards/dashboard.json | ||
|
||
# Best Practices for Telemetry Collections | ||
[OpenTelemetry In Bacalhau](../../docs/docs/dev/open_telemetry_in_bacalhau.md) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
version: '3.5' | ||
|
||
services: | ||
prometheus: | ||
image: prom/prometheus:latest | ||
volumes: | ||
- ./prometheus/:/etc/prometheus/ | ||
- prometheus-storage:/prometheus | ||
command: | ||
- '--config.file=/etc/prometheus/prometheus.yml' | ||
- '--storage.tsdb.path=/prometheus' | ||
- '--web.console.libraries=/usr/share/prometheus/console_libraries' | ||
- '--web.console.templates=/usr/share/prometheus/consoles' | ||
ports: | ||
- 9090:9090 | ||
restart: always | ||
|
||
grafana: | ||
image: grafana/grafana | ||
depends_on: | ||
- prometheus | ||
volumes: | ||
- ./grafana/provisioning/datasources:/etc/grafana/provisioning/datasources # Datasource provisioning | ||
- ./grafana/provisioning/dashboards:/etc/grafana/provisioning/dashboards # Dashboard provisioning | ||
|
||
ports: | ||
- 3000:3000 | ||
restart: always | ||
|
||
opentelemetry-collector: | ||
image: otel/opentelemetry-collector:latest | ||
command: [ "--config=/etc/otel-collector-config.yaml" ] # Command to use the custom config | ||
volumes: | ||
- ./otel-collector-config.yaml:/etc/otel-collector-config.yaml | ||
ports: | ||
- 127.0.0.1:4318:4318 # HTTP | ||
- 55681:55681 # OpenTelemetry protocol | ||
depends_on: | ||
- prometheus | ||
|
||
jaeger: | ||
container_name: jaeger | ||
image: jaegertracing/all-in-one:latest | ||
ports: | ||
- "6831:6831/udp" | ||
- "5778:5778" | ||
- "4316:4316" | ||
- "16686:16686" | ||
- "14268:14268" | ||
|
||
volumes: | ||
prometheus-storage: {} |
187 changes: 187 additions & 0 deletions
187
ops/metrics/grafana/provisioning/dashboards/dashboard.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,187 @@ | ||
{ | ||
"annotations": { | ||
"list": [ | ||
{ | ||
"builtIn": 1, | ||
"datasource": { | ||
"type": "grafana", | ||
"uid": "-- Grafana --" | ||
}, | ||
"enable": true, | ||
"hide": true, | ||
"iconColor": "rgba(0, 211, 255, 1)", | ||
"name": "Annotations & Alerts", | ||
"type": "dashboard" | ||
} | ||
] | ||
}, | ||
"editable": true, | ||
"fiscalYearStartMonth": 0, | ||
"graphTooltip": 0, | ||
"links": [], | ||
"liveNow": false, | ||
"panels": [ | ||
{ | ||
"datasource": { | ||
"type": "prometheus", | ||
"uid": "P6EBD7EB59B5FF381" | ||
}, | ||
"fieldConfig": { | ||
"defaults": { | ||
"color": { | ||
"mode": "thresholds" | ||
}, | ||
"mappings": [], | ||
"thresholds": { | ||
"mode": "absolute", | ||
"steps": [ | ||
{ | ||
"color": "green", | ||
"value": null | ||
}, | ||
{ | ||
"color": "red", | ||
"value": 80 | ||
} | ||
] | ||
}, | ||
"unitScale": true | ||
}, | ||
"overrides": [] | ||
}, | ||
"gridPos": { | ||
"h": 8, | ||
"w": 24, | ||
"x": 0, | ||
"y": 0 | ||
}, | ||
"id": 2, | ||
"options": { | ||
"colorMode": "value", | ||
"graphMode": "area", | ||
"justifyMode": "auto", | ||
"orientation": "auto", | ||
"reduceOptions": { | ||
"calcs": [ | ||
"lastNotNull" | ||
], | ||
"fields": "", | ||
"values": false | ||
}, | ||
"showPercentChange": false, | ||
"textMode": "auto", | ||
"wideLayout": true | ||
}, | ||
"pluginVersion": "10.3.1", | ||
"targets": [ | ||
{ | ||
"datasource": { | ||
"type": "prometheus", | ||
"uid": "P6EBD7EB59B5FF381" | ||
}, | ||
"disableTextWrap": false, | ||
"editorMode": "builder", | ||
"expr": "bacalhau_jobs_received_total", | ||
"fullMetaSearch": false, | ||
"includeNullMetadata": true, | ||
"instant": false, | ||
"legendFormat": "__auto", | ||
"range": true, | ||
"refId": "A", | ||
"useBackend": false | ||
} | ||
], | ||
"title": "Jobs Receieved", | ||
"type": "stat" | ||
}, | ||
{ | ||
"datasource": { | ||
"type": "prometheus", | ||
"uid": "P6EBD7EB59B5FF381" | ||
}, | ||
"fieldConfig": { | ||
"defaults": { | ||
"color": { | ||
"mode": "thresholds" | ||
}, | ||
"mappings": [], | ||
"thresholds": { | ||
"mode": "absolute", | ||
"steps": [ | ||
{ | ||
"color": "green", | ||
"value": null | ||
}, | ||
{ | ||
"color": "red", | ||
"value": 80 | ||
} | ||
] | ||
}, | ||
"unitScale": true | ||
}, | ||
"overrides": [] | ||
}, | ||
"gridPos": { | ||
"h": 9, | ||
"w": 24, | ||
"x": 0, | ||
"y": 8 | ||
}, | ||
"id": 1, | ||
"options": { | ||
"colorMode": "value", | ||
"graphMode": "area", | ||
"justifyMode": "auto", | ||
"orientation": "auto", | ||
"reduceOptions": { | ||
"calcs": [ | ||
"lastNotNull" | ||
], | ||
"fields": "", | ||
"values": false | ||
}, | ||
"showPercentChange": false, | ||
"textMode": "auto", | ||
"wideLayout": true | ||
}, | ||
"pluginVersion": "10.3.1", | ||
"targets": [ | ||
{ | ||
"datasource": { | ||
"type": "prometheus", | ||
"uid": "P6EBD7EB59B5FF381" | ||
}, | ||
"disableTextWrap": false, | ||
"editorMode": "builder", | ||
"expr": "bacalhau_jobs_completed_total", | ||
"fullMetaSearch": false, | ||
"includeNullMetadata": true, | ||
"instant": false, | ||
"legendFormat": "__auto", | ||
"range": true, | ||
"refId": "A", | ||
"useBackend": false | ||
} | ||
], | ||
"title": "Jobs Completed", | ||
"type": "stat" | ||
} | ||
], | ||
"refresh": "", | ||
"schemaVersion": 39, | ||
"tags": [], | ||
"templating": { | ||
"list": [] | ||
}, | ||
"time": { | ||
"from": "now-5m", | ||
"to": "now" | ||
}, | ||
"timepicker": {}, | ||
"timezone": "", | ||
"title": "Bacalhau Metrics", | ||
"uid": "cbe6c668-d74b-4a27-be8b-431c19b2d4ca", | ||
"version": 1, | ||
"weekStart": "" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
apiVersion: 1 | ||
|
||
providers: | ||
- name: 'default' # A unique name for this provider | ||
orgId: 1 # Optional: specify organization ID, 1 is default | ||
folder: '' # The folder to save dashboards in Grafana | ||
type: file | ||
options: | ||
path: /etc/grafana/provisioning/dashboards # Path within the container |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
apiVersion: 1 | ||
|
||
datasources: | ||
- name: Prometheus OTEL | ||
type: prometheus | ||
access: proxy | ||
url: http://prometheus:9090 |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
# receive telemetry data from bacalhau otel sdk. | ||
receivers: | ||
otlp: | ||
protocols: | ||
http: | ||
endpoint: "0.0.0.0:4318" | ||
|
||
# batch process data and label it with 'otel' as the service colector | ||
processors: | ||
batch: | ||
memory_limiter: | ||
check_interval: 5s | ||
limit_mib: 4000 | ||
spike_limit_mib: 500 | ||
resource: | ||
attributes: | ||
- key: service.collector | ||
value: otel | ||
action: insert | ||
attributes/metrics: | ||
actions: | ||
- pattern: net\.sock.+ | ||
action: delete | ||
|
||
|
||
exporters: | ||
# metrics are exported to prometheus | ||
prometheus: | ||
endpoint: "0.0.0.0:9095" | ||
namespace: "bacalhau" | ||
# uncomment for debugging, will print all metrics to stdout | ||
#logging: | ||
#loglevel: debug | ||
# traces go to jaeger instance | ||
otlp/jaeger: | ||
endpoint: "jaeger:4317" | ||
tls: | ||
insecure: true | ||
insecure_skip_verify: true | ||
|
||
service: | ||
pipelines: | ||
metrics: | ||
receivers: [otlp] | ||
processors: [memory_limiter, resource, attributes/metrics, batch] | ||
exporters: [prometheus] | ||
#exporters: [prometheus, logging] | ||
traces: | ||
receivers: [otlp] | ||
processors: [memory_limiter, resource, attributes/metrics, batch] | ||
exporters: [otlp/jaeger] | ||
#exporters: [logging, otlp/jaeger] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
scrape_configs: | ||
- job_name: 'otel-collector' | ||
scrape_interval: 5s | ||
static_configs: | ||
- targets: ['opentelemetry-collector:9095'] |