Skip to content

Commit

Permalink
feat(qrm): add message tag in qrm plugins alloc_failed,get_topology_h…
Browse files Browse the repository at this point in the history
…ints_failed and remove_pod_failed metric
  • Loading branch information
xu282934741 committed Jan 17, 2025
1 parent 0c24ce6 commit 52ccd74
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 9 deletions.
14 changes: 11 additions & 3 deletions pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package dynamicpolicy
import (
"context"
"fmt"
"strings"
"sync"
"time"

Expand Down Expand Up @@ -694,7 +695,10 @@ func (p *DynamicPolicy) GetTopologyHints(ctx context.Context,
defer func() {
p.RUnlock()
if err != nil {
_ = p.emitter.StoreInt64(util.MetricNameGetTopologyHintsFailed, 1, metrics.MetricTypeNameRaw)
errorMessage := strings.ReplaceAll(fmt.Sprintf("%v", err), " ", "_")
_ = p.emitter.StoreInt64(util.MetricNameGetTopologyHintsFailed, 1, metrics.MetricTypeNameRaw,
metrics.MetricTag{Key: "error_message", Val: errorMessage})

general.ErrorS(err, "GetTopologyHints failed",
"podNamespace", req.PodNamespace,
"podName", req.PodName,
Expand Down Expand Up @@ -840,7 +844,9 @@ func (p *DynamicPolicy) Allocate(ctx context.Context,
}
} else if respErr != nil {
_ = p.removeContainer(req.PodUid, req.ContainerName)
_ = p.emitter.StoreInt64(util.MetricNameAllocateFailed, 1, metrics.MetricTypeNameRaw)
errorMessage := strings.ReplaceAll(fmt.Sprintf("%v", respErr), " ", "_")
_ = p.emitter.StoreInt64(util.MetricNameAllocateFailed, 1, metrics.MetricTypeNameRaw,
metrics.MetricTag{Key: "error_message", Val: errorMessage})
}

p.Unlock()
Expand Down Expand Up @@ -933,7 +939,9 @@ func (p *DynamicPolicy) RemovePod(ctx context.Context,
defer func() {
p.Unlock()
if err != nil {
_ = p.emitter.StoreInt64(util.MetricNameRemovePodFailed, 1, metrics.MetricTypeNameRaw)
errorMessage := strings.ReplaceAll(fmt.Sprintf("%v", err), " ", "_")
_ = p.emitter.StoreInt64(util.MetricNameRemovePodFailed, 1, metrics.MetricTypeNameRaw,
metrics.MetricTag{Key: "error_message", Val: errorMessage})
general.ErrorS(err, "RemovePod failed", "podUID", req.PodUid)
}
general.InfoS("finished", "duration", time.Since(startTime).String(), "podUID", req.PodUid)
Expand Down
17 changes: 13 additions & 4 deletions pkg/agent/qrm-plugins/memory/dynamicpolicy/policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"errors"
"fmt"
"strings"
"sync"
"time"

Expand Down Expand Up @@ -584,7 +585,9 @@ func (p *DynamicPolicy) GetTopologyHints(ctx context.Context,
defer func() {
p.RUnlock()
if err != nil {
_ = p.emitter.StoreInt64(util.MetricNameGetTopologyHintsFailed, 1, metrics.MetricTypeNameRaw)
errorMessage := strings.ReplaceAll(fmt.Sprintf("%v", err), " ", "_")
_ = p.emitter.StoreInt64(util.MetricNameGetTopologyHintsFailed, 1, metrics.MetricTypeNameRaw,
metrics.MetricTag{Key: "error_message", Val: errorMessage})
general.ErrorS(err, "GetTopologyHints failed",
"podNamespace", req.PodNamespace,
"podName", req.PodName,
Expand Down Expand Up @@ -626,7 +629,9 @@ func (p *DynamicPolicy) RemovePod(ctx context.Context,
defer func() {
p.Unlock()
if err != nil {
_ = p.emitter.StoreInt64(util.MetricNameRemovePodFailed, 1, metrics.MetricTypeNameRaw)
errorMessage := strings.ReplaceAll(fmt.Sprintf("%v", err), " ", "_")
_ = p.emitter.StoreInt64(util.MetricNameRemovePodFailed, 1, metrics.MetricTypeNameRaw,
metrics.MetricTag{Key: "error_message", Val: errorMessage})
general.ErrorS(err, "RemovePod failed", "podUID", req.PodUid)
}
general.InfoS("finished", "duration", time.Since(startTime), "podUID", req.PodUid)
Expand All @@ -652,7 +657,9 @@ func (p *DynamicPolicy) RemovePod(ctx context.Context,
err = p.removePod(req.PodUid)
if err != nil {
general.ErrorS(err, "remove pod failed with error", "podUID", req.PodUid)
_ = p.emitter.StoreInt64(util.MetricNameRemovePodFailed, 1, metrics.MetricTypeNameRaw)
errorMessage := strings.ReplaceAll(fmt.Sprintf("%v", err), " ", "_")
_ = p.emitter.StoreInt64(util.MetricNameRemovePodFailed, 1, metrics.MetricTypeNameRaw,
metrics.MetricTag{Key: "error_message", Val: errorMessage})
return nil, err
}

Expand Down Expand Up @@ -944,7 +951,9 @@ func (p *DynamicPolicy) Allocate(ctx context.Context,
}
} else if respErr != nil {
_ = p.removeContainer(req.PodUid, req.ContainerName)
_ = p.emitter.StoreInt64(util.MetricNameAllocateFailed, 1, metrics.MetricTypeNameRaw)
errorMessage := strings.ReplaceAll(fmt.Sprintf("%v", respErr), " ", "_")
_ = p.emitter.StoreInt64(util.MetricNameAllocateFailed, 1, metrics.MetricTypeNameRaw,
metrics.MetricTag{Key: "error_message", Val: errorMessage})
}

p.Unlock()
Expand Down
8 changes: 6 additions & 2 deletions pkg/agent/qrm-plugins/network/staticpolicy/policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,9 @@ func (p *StaticPolicy) GetTopologyHints(_ context.Context,
defer func() {
p.Unlock()
if err != nil {
_ = p.emitter.StoreInt64(util.MetricNameGetTopologyHintsFailed, 1, metrics.MetricTypeNameRaw)
errorMessage := strings.ReplaceAll(fmt.Sprintf("%v", err), " ", "_")
_ = p.emitter.StoreInt64(util.MetricNameGetTopologyHintsFailed, 1, metrics.MetricTypeNameRaw,
metrics.MetricTag{Key: "error_message", Val: errorMessage})
}
}()

Expand Down Expand Up @@ -531,7 +533,9 @@ func (p *StaticPolicy) Allocate(_ context.Context,
defer func() {
p.Unlock()
if err != nil {
_ = p.emitter.StoreInt64(util.MetricNameAllocateFailed, 1, metrics.MetricTypeNameRaw)
errorMessage := strings.ReplaceAll(fmt.Sprintf("%v", err), " ", "_")
_ = p.emitter.StoreInt64(util.MetricNameAllocateFailed, 1, metrics.MetricTypeNameRaw,
metrics.MetricTag{Key: "error_message", Val: errorMessage})
}
}()

Expand Down

0 comments on commit 52ccd74

Please sign in to comment.