From 2f144fd9f38facb361308f3208957cecc00523bc Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Thu, 23 Jan 2025 19:11:30 +0800 Subject: [PATCH] This is an automated cherry-pick of #59131 Signed-off-by: ti-chi-bot --- pkg/statistics/BUILD.bazel | 2 +- .../globalstats/global_stats_internal_test.go | 36 +++++++++++++++++++ .../handle/globalstats/merge_worker.go | 2 +- pkg/statistics/handle/globalstats/topn.go | 2 +- pkg/statistics/histogram.go | 15 ++++---- pkg/statistics/histogram_test.go | 18 ---------- 6 files changed, 46 insertions(+), 29 deletions(-) diff --git a/pkg/statistics/BUILD.bazel b/pkg/statistics/BUILD.bazel index 29fa59bd61302..eb79d50812f0c 100644 --- a/pkg/statistics/BUILD.bazel +++ b/pkg/statistics/BUILD.bazel @@ -82,7 +82,7 @@ go_test( data = glob(["testdata/**"]), embed = [":statistics"], flaky = True, - shard_count = 38, + shard_count = 37, deps = [ "//pkg/config", "//pkg/meta/model", diff --git a/pkg/statistics/handle/globalstats/global_stats_internal_test.go b/pkg/statistics/handle/globalstats/global_stats_internal_test.go index e656a0b0e6dfe..d2d2518cd0d4e 100644 --- a/pkg/statistics/handle/globalstats/global_stats_internal_test.go +++ b/pkg/statistics/handle/globalstats/global_stats_internal_test.go @@ -359,10 +359,46 @@ func testIssues24349(t *testing.T, testKit *testkit.TestKit, store kv.Storage) { require.NoError(t, statsHandle.DumpColStatsUsageToKV()) testKit.MustExec("analyze table t with 1 topn, 3 buckets") testKit.MustExec("explain select * from t where a > 0 and b > 0") +<<<<<<< HEAD testKit.MustQuery("show stats_buckets where partition_name='global'").Check(testkit.Rows( "test t global a 0 0 2 2 0 2 0", "test t global b 0 0 3 1 1 2 0", "test t global b 0 1 10 1 4 4 0", +======= + testKit.MustQuery("show stats_topn where table_name = 't'").Sort().Check(testkit.Rows( + "test t global a 0 1 6", + "test t global b 0 2 4", + "test t p0 a 0 0 4", + "test t p0 b 0 3 3", + "test t p1 a 0 1 6", + "test t p1 b 0 2 3", + "test t p2 a 0 2 2", + "test t p2 b 0 1 2", + )) + // column a is trival. + // column b: + // TopN: + // p0: b=3, occurs 3 times + // p1: b=2, occurs 3 times + // p2: b=1, occurs 2 times + // Histogram: + // p0: hist of b: [2, 2] count=repeat=2 + // p1: hist of b: [1, 3] count=2, repeat=3. [4, 4] count==repeat=1 + // After merging global TopN, it should be 2 with 4 as the repeat.(constructed by p1's TopN and p0's histogram) + // Kicking it out, the remained buckets for b are:(consider TopN as a bucket whose lower bound is the same as upper bound and count is the same as repeat) + // [3, 3] count=repeat=4 + // [1, 1] count=repeat=2 + // [1, 3] count=1, repeat=0(merged into TopN) + // [4, 4] count=repeat=1 + // Finally, get one global bucket [1, 4] count=8, repeat=1 + testKit.MustQuery("show stats_buckets where table_name='t'").Sort().Check(testkit.Rows( + "test t global a 0 0 4 4 0 0 0", + "test t global a 0 1 6 2 2 2 0", + "test t global b 0 0 8 1 1 4 0", + "test t p0 b 0 0 1 1 2 2 0", + "test t p1 b 0 0 2 1 1 3 0", + "test t p1 b 0 1 3 1 4 4 0", +>>>>>>> 41c3b01dbc1 (statistics: BinarySearchRemoveVal should use decoded val instead of encoded (#59131)) )) } diff --git a/pkg/statistics/handle/globalstats/merge_worker.go b/pkg/statistics/handle/globalstats/merge_worker.go index b702acaa797f7..93915275cf70f 100644 --- a/pkg/statistics/handle/globalstats/merge_worker.go +++ b/pkg/statistics/handle/globalstats/merge_worker.go @@ -150,7 +150,7 @@ func (worker *topnStatsMergeWorker) Run(timeZone *time.Location, isIndex bool, v count, _ := allHists[j].EqualRowCount(nil, datum, isIndex) if count != 0 { // Remove the value corresponding to encodedVal from the histogram. - worker.statsWrapper.AllHg[j].BinarySearchRemoveVal(statistics.TopNMeta{Encoded: datum.GetBytes(), Count: uint64(count)}) + worker.statsWrapper.AllHg[j].BinarySearchRemoveVal(&datum, int64(count)) } worker.shardMutex[j].Unlock() if count != 0 { diff --git a/pkg/statistics/handle/globalstats/topn.go b/pkg/statistics/handle/globalstats/topn.go index 171756b82357b..b8dd2ce57658b 100644 --- a/pkg/statistics/handle/globalstats/topn.go +++ b/pkg/statistics/handle/globalstats/topn.go @@ -194,7 +194,7 @@ func MergePartTopN2GlobalTopN( if count != 0 { counter[encodedVal] += count // Remove the value corresponding to encodedVal from the histogram. - hists[j].BinarySearchRemoveVal(statistics.TopNMeta{Encoded: datum.GetBytes(), Count: uint64(count)}) + hists[j].BinarySearchRemoveVal(&datum, int64(count)) } } } diff --git a/pkg/statistics/histogram.go b/pkg/statistics/histogram.go index fc3b948f27aaa..b2640ea9f8076 100644 --- a/pkg/statistics/histogram.go +++ b/pkg/statistics/histogram.go @@ -297,15 +297,14 @@ func (hg *Histogram) BucketToString(bktID, idxCols int) string { } // BinarySearchRemoveVal removes the value from the TopN using binary search. -func (hg *Histogram) BinarySearchRemoveVal(valCntPairs TopNMeta) { +func (hg *Histogram) BinarySearchRemoveVal(val *types.Datum, count int64) { lowIdx, highIdx := 0, hg.Len()-1 - column := hg.Bounds.Column(0) // if hg is too small, we don't need to check the branch. because the cost is more than binary search. if hg.Len() > 4 { - if cmpResult := bytes.Compare(column.GetRaw(highIdx*2+1), valCntPairs.Encoded); cmpResult < 0 { + if cmpResult := chunk.Compare(hg.Bounds.GetRow(highIdx*2+1), 0, val); cmpResult < 0 { return } - if cmpResult := bytes.Compare(column.GetRaw(lowIdx), valCntPairs.Encoded); cmpResult > 0 { + if cmpResult := chunk.Compare(hg.Bounds.GetRow(lowIdx), 0, val); cmpResult > 0 { return } } @@ -313,12 +312,12 @@ func (hg *Histogram) BinarySearchRemoveVal(valCntPairs TopNMeta) { var found bool for lowIdx <= highIdx { midIdx = (lowIdx + highIdx) / 2 - cmpResult := bytes.Compare(column.GetRaw(midIdx*2), valCntPairs.Encoded) + cmpResult := chunk.Compare(hg.Bounds.GetRow(midIdx*2), 0, val) if cmpResult > 0 { highIdx = midIdx - 1 continue } - cmpResult = bytes.Compare(column.GetRaw(midIdx*2+1), valCntPairs.Encoded) + cmpResult = chunk.Compare(hg.Bounds.GetRow(midIdx*2+1), 0, val) if cmpResult < 0 { lowIdx = midIdx + 1 continue @@ -331,7 +330,7 @@ func (hg *Histogram) BinarySearchRemoveVal(valCntPairs TopNMeta) { if cmpResult == 0 { midbucket.Repeat = 0 } - midbucket.Count -= int64(valCntPairs.Count) + midbucket.Count -= count if midbucket.Count < 0 { midbucket.Count = 0 } @@ -340,7 +339,7 @@ func (hg *Histogram) BinarySearchRemoveVal(valCntPairs TopNMeta) { } if found { for midIdx++; midIdx <= hg.Len()-1; midIdx++ { - hg.Buckets[midIdx].Count -= int64(valCntPairs.Count) + hg.Buckets[midIdx].Count -= count if hg.Buckets[midIdx].Count < 0 { hg.Buckets[midIdx].Count = 0 } diff --git a/pkg/statistics/histogram_test.go b/pkg/statistics/histogram_test.go index cf26b1cfd97d1..5550758ed2c11 100644 --- a/pkg/statistics/histogram_test.go +++ b/pkg/statistics/histogram_test.go @@ -714,21 +714,3 @@ func generateData(t *testing.T) *Histogram { } return genHist4Test(t, data, 0) } - -func TestVerifyHistsBinarySearchRemoveValAndRemoveVals(t *testing.T) { - data1 := generateData(t) - data2 := generateData(t) - - require.Equal(t, data1, data2) - ctx := mock.NewContext() - sc := ctx.GetSessionVars().StmtCtx - b, err := codec.EncodeKey(sc.TimeZone(), nil, types.NewIntDatum(150)) - require.NoError(t, err) - tmp := TopNMeta{ - Encoded: b, - Count: 2, - } - data1.RemoveVals([]TopNMeta{tmp}) - data2.BinarySearchRemoveVal(tmp) - require.Equal(t, data1, data2) -}