-
Notifications
You must be signed in to change notification settings - Fork 544
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
block-builder: Add cortex_blockbuilder_blocks_produced_total metric #10538
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,7 @@ import ( | |
otgrpc "github.com/opentracing-contrib/go-grpc" | ||
"github.com/opentracing/opentracing-go" | ||
"github.com/prometheus/client_golang/prometheus" | ||
"github.com/prometheus/prometheus/tsdb" | ||
"github.com/thanos-io/objstore" | ||
"github.com/twmb/franz-go/pkg/kadm" | ||
"github.com/twmb/franz-go/pkg/kgo" | ||
|
@@ -74,10 +75,6 @@ func newWithSchedulerClient( | |
limits *validation.Overrides, | ||
schedulerClient schedulerpb.SchedulerClient, | ||
) (*BlockBuilder, error) { | ||
if cfg.NoPartiallyConsumedRegion { | ||
// We should not have a large buffer if we are putting all the records into a block. | ||
cfg.ConsumeIntervalBuffer = 5 * time.Minute | ||
} | ||
|
||
b := &BlockBuilder{ | ||
cfg: cfg, | ||
|
@@ -489,7 +486,7 @@ func (b *BlockBuilder) consumePartitionSection( | |
return state, nil | ||
} | ||
|
||
var numBlocks int | ||
var blockMetas []tsdb.BlockMeta | ||
defer func(t time.Time, startState PartitionState) { | ||
// No need to log or track time of the unfinished section. Just bail out. | ||
if errors.Is(retErr, context.Canceled) { | ||
|
@@ -507,7 +504,7 @@ func (b *BlockBuilder) consumePartitionSection( | |
level.Info(logger).Log("msg", "done consuming", "duration", dur, | ||
"last_block_end", startState.LastBlockEnd, "curr_block_end", blockEnd, | ||
"last_seen_offset", startState.LastSeenOffset, "curr_seen_offset", retState.LastSeenOffset, | ||
"num_blocks", numBlocks) | ||
"num_blocks", len(blockMetas)) | ||
}(time.Now(), state) | ||
|
||
// We always rewind the partition's offset to the commit offset by reassigning the partition to the client (this triggers partition assignment). | ||
|
@@ -610,11 +607,16 @@ consumerLoop: | |
} | ||
|
||
var err error | ||
numBlocks, err = builder.CompactAndUpload(ctx, b.uploadBlocks) | ||
blockMetas, err = builder.CompactAndUpload(ctx, b.uploadBlocks) | ||
if err != nil { | ||
return state, err | ||
} | ||
|
||
prev, curr, next := getBlockCategoryCount(sectionEndTime, blockMetas) | ||
b.blockBuilderMetrics.blockCounts.WithLabelValues("previous").Add(float64(prev)) | ||
b.blockBuilderMetrics.blockCounts.WithLabelValues("current").Add(float64(curr)) | ||
b.blockBuilderMetrics.blockCounts.WithLabelValues("next").Add(float64(next)) | ||
Comment on lines
+615
to
+618
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Have you considered putting these metrics inside |
||
|
||
// We should take the max of last seen offsets. If the partition was lagging due to some record not being processed | ||
// because of a future sample, we might be coming back to the same consume cycle again. | ||
lastSeenOffset := max(lastRec.Offset, state.LastSeenOffset) | ||
|
@@ -643,6 +645,22 @@ consumerLoop: | |
return newState, nil | ||
} | ||
|
||
func getBlockCategoryCount(sectionEndTime time.Time, blockMetas []tsdb.BlockMeta) (prev, curr, next int) { | ||
// Doing -30m will take care | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (nit) What does this comment say? |
||
currHour := sectionEndTime.Add(-30 * time.Minute).Truncate(2 * time.Hour).Hour() | ||
for _, m := range blockMetas { | ||
hour := time.UnixMilli(m.MinTime/2 + m.MaxTime/2).Truncate(2 * time.Hour).Hour() | ||
if hour < currHour { | ||
prev++ | ||
} else if hour > currHour { | ||
next++ | ||
} else { | ||
curr++ | ||
} | ||
} | ||
return | ||
} | ||
|
||
type stateCommitter interface { | ||
commitState(context.Context, *BlockBuilder, log.Logger, string, PartitionState) error | ||
} | ||
|
@@ -684,21 +702,18 @@ func (c *noOpCommitter) commitState(_ context.Context, _ *BlockBuilder, _ log.Lo | |
|
||
var _ stateCommitter = &noOpCommitter{} | ||
|
||
func (b *BlockBuilder) uploadBlocks(ctx context.Context, tenantID, dbDir string, blockIDs []string) error { | ||
func (b *BlockBuilder) uploadBlocks(ctx context.Context, tenantID, dbDir string, metas []tsdb.BlockMeta) error { | ||
buc := bucket.NewUserBucketClient(tenantID, b.bucket, b.limits) | ||
for _, bid := range blockIDs { | ||
blockDir := path.Join(dbDir, bid) | ||
meta, err := block.ReadMetaFromDir(blockDir) | ||
if err != nil { | ||
return fmt.Errorf("read block meta for block %s (tenant %s): %w", bid, tenantID, err) | ||
} | ||
|
||
if meta.Stats.NumSamples == 0 { | ||
for _, m := range metas { | ||
if m.Stats.NumSamples == 0 { | ||
// No need to upload empty block. | ||
level.Info(b.logger).Log("msg", "skip uploading empty block", "tenant", tenantID, "block", bid) | ||
level.Info(b.logger).Log("msg", "skip uploading empty block", "tenant", tenantID, "block", m.ULID.String()) | ||
return nil | ||
} | ||
|
||
meta := &block.Meta{BlockMeta: m} | ||
blockDir := path.Join(dbDir, meta.ULID.String()) | ||
|
||
meta.Thanos.Source = block.BlockBuilderSource | ||
meta.Thanos.SegmentFiles = block.GetSegmentFiles(blockDir) | ||
|
||
|
@@ -717,11 +732,11 @@ func (b *BlockBuilder) uploadBlocks(ctx context.Context, tenantID, dbDir string, | |
if err == nil { | ||
break | ||
} | ||
level.Warn(b.logger).Log("msg", "failed to upload block; will retry", "err", err, "block", bid, "tenant", tenantID) | ||
level.Warn(b.logger).Log("msg", "failed to upload block; will retry", "err", err, "block", meta.ULID.String(), "tenant", tenantID) | ||
boff.Wait() | ||
} | ||
if err := boff.ErrCause(); err != nil { | ||
return fmt.Errorf("upload block %s (tenant %s): %w", bid, tenantID, err) | ||
return fmt.Errorf("upload block %s (tenant %s): %w", meta.ULID.String(), tenantID, err) | ||
} | ||
} | ||
return nil | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
By removing this override, we can now try different buffer times when we have set NoPartiallyConsumedRegion to true