Skip to content

Commit

Permalink
This is an automated cherry-pick of pingcap#59131
Browse files Browse the repository at this point in the history
Signed-off-by: ti-chi-bot <[email protected]>
  • Loading branch information
winoros authored and ti-chi-bot committed Jan 23, 2025
1 parent b313166 commit 2f144fd
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 29 deletions.
2 changes: 1 addition & 1 deletion pkg/statistics/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ go_test(
data = glob(["testdata/**"]),
embed = [":statistics"],
flaky = True,
shard_count = 38,
shard_count = 37,
deps = [
"//pkg/config",
"//pkg/meta/model",
Expand Down
36 changes: 36 additions & 0 deletions pkg/statistics/handle/globalstats/global_stats_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -359,10 +359,46 @@ func testIssues24349(t *testing.T, testKit *testkit.TestKit, store kv.Storage) {
require.NoError(t, statsHandle.DumpColStatsUsageToKV())
testKit.MustExec("analyze table t with 1 topn, 3 buckets")
testKit.MustExec("explain select * from t where a > 0 and b > 0")
<<<<<<< HEAD
testKit.MustQuery("show stats_buckets where partition_name='global'").Check(testkit.Rows(
"test t global a 0 0 2 2 0 2 0",
"test t global b 0 0 3 1 1 2 0",
"test t global b 0 1 10 1 4 4 0",
=======
testKit.MustQuery("show stats_topn where table_name = 't'").Sort().Check(testkit.Rows(
"test t global a 0 1 6",
"test t global b 0 2 4",
"test t p0 a 0 0 4",
"test t p0 b 0 3 3",
"test t p1 a 0 1 6",
"test t p1 b 0 2 3",
"test t p2 a 0 2 2",
"test t p2 b 0 1 2",
))
// column a is trival.
// column b:
// TopN:
// p0: b=3, occurs 3 times
// p1: b=2, occurs 3 times
// p2: b=1, occurs 2 times
// Histogram:
// p0: hist of b: [2, 2] count=repeat=2
// p1: hist of b: [1, 3] count=2, repeat=3. [4, 4] count==repeat=1
// After merging global TopN, it should be 2 with 4 as the repeat.(constructed by p1's TopN and p0's histogram)
// Kicking it out, the remained buckets for b are:(consider TopN as a bucket whose lower bound is the same as upper bound and count is the same as repeat)
// [3, 3] count=repeat=4
// [1, 1] count=repeat=2
// [1, 3] count=1, repeat=0(merged into TopN)
// [4, 4] count=repeat=1
// Finally, get one global bucket [1, 4] count=8, repeat=1
testKit.MustQuery("show stats_buckets where table_name='t'").Sort().Check(testkit.Rows(
"test t global a 0 0 4 4 0 0 0",
"test t global a 0 1 6 2 2 2 0",
"test t global b 0 0 8 1 1 4 0",
"test t p0 b 0 0 1 1 2 2 0",
"test t p1 b 0 0 2 1 1 3 0",
"test t p1 b 0 1 3 1 4 4 0",
>>>>>>> 41c3b01dbc1 (statistics: BinarySearchRemoveVal should use decoded val instead of encoded (#59131))
))
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/statistics/handle/globalstats/merge_worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ func (worker *topnStatsMergeWorker) Run(timeZone *time.Location, isIndex bool, v
count, _ := allHists[j].EqualRowCount(nil, datum, isIndex)
if count != 0 {
// Remove the value corresponding to encodedVal from the histogram.
worker.statsWrapper.AllHg[j].BinarySearchRemoveVal(statistics.TopNMeta{Encoded: datum.GetBytes(), Count: uint64(count)})
worker.statsWrapper.AllHg[j].BinarySearchRemoveVal(&datum, int64(count))
}
worker.shardMutex[j].Unlock()
if count != 0 {
Expand Down
2 changes: 1 addition & 1 deletion pkg/statistics/handle/globalstats/topn.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ func MergePartTopN2GlobalTopN(
if count != 0 {
counter[encodedVal] += count
// Remove the value corresponding to encodedVal from the histogram.
hists[j].BinarySearchRemoveVal(statistics.TopNMeta{Encoded: datum.GetBytes(), Count: uint64(count)})
hists[j].BinarySearchRemoveVal(&datum, int64(count))
}
}
}
Expand Down
15 changes: 7 additions & 8 deletions pkg/statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -297,28 +297,27 @@ func (hg *Histogram) BucketToString(bktID, idxCols int) string {
}

// BinarySearchRemoveVal removes the value from the TopN using binary search.
func (hg *Histogram) BinarySearchRemoveVal(valCntPairs TopNMeta) {
func (hg *Histogram) BinarySearchRemoveVal(val *types.Datum, count int64) {
lowIdx, highIdx := 0, hg.Len()-1
column := hg.Bounds.Column(0)
// if hg is too small, we don't need to check the branch. because the cost is more than binary search.
if hg.Len() > 4 {
if cmpResult := bytes.Compare(column.GetRaw(highIdx*2+1), valCntPairs.Encoded); cmpResult < 0 {
if cmpResult := chunk.Compare(hg.Bounds.GetRow(highIdx*2+1), 0, val); cmpResult < 0 {
return
}
if cmpResult := bytes.Compare(column.GetRaw(lowIdx), valCntPairs.Encoded); cmpResult > 0 {
if cmpResult := chunk.Compare(hg.Bounds.GetRow(lowIdx), 0, val); cmpResult > 0 {
return
}
}
var midIdx = 0
var found bool
for lowIdx <= highIdx {
midIdx = (lowIdx + highIdx) / 2
cmpResult := bytes.Compare(column.GetRaw(midIdx*2), valCntPairs.Encoded)
cmpResult := chunk.Compare(hg.Bounds.GetRow(midIdx*2), 0, val)
if cmpResult > 0 {
highIdx = midIdx - 1
continue
}
cmpResult = bytes.Compare(column.GetRaw(midIdx*2+1), valCntPairs.Encoded)
cmpResult = chunk.Compare(hg.Bounds.GetRow(midIdx*2+1), 0, val)
if cmpResult < 0 {
lowIdx = midIdx + 1
continue
Expand All @@ -331,7 +330,7 @@ func (hg *Histogram) BinarySearchRemoveVal(valCntPairs TopNMeta) {
if cmpResult == 0 {
midbucket.Repeat = 0
}
midbucket.Count -= int64(valCntPairs.Count)
midbucket.Count -= count
if midbucket.Count < 0 {
midbucket.Count = 0
}
Expand All @@ -340,7 +339,7 @@ func (hg *Histogram) BinarySearchRemoveVal(valCntPairs TopNMeta) {
}
if found {
for midIdx++; midIdx <= hg.Len()-1; midIdx++ {
hg.Buckets[midIdx].Count -= int64(valCntPairs.Count)
hg.Buckets[midIdx].Count -= count
if hg.Buckets[midIdx].Count < 0 {
hg.Buckets[midIdx].Count = 0
}
Expand Down
18 changes: 0 additions & 18 deletions pkg/statistics/histogram_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -714,21 +714,3 @@ func generateData(t *testing.T) *Histogram {
}
return genHist4Test(t, data, 0)
}

func TestVerifyHistsBinarySearchRemoveValAndRemoveVals(t *testing.T) {
data1 := generateData(t)
data2 := generateData(t)

require.Equal(t, data1, data2)
ctx := mock.NewContext()
sc := ctx.GetSessionVars().StmtCtx
b, err := codec.EncodeKey(sc.TimeZone(), nil, types.NewIntDatum(150))
require.NoError(t, err)
tmp := TopNMeta{
Encoded: b,
Count: 2,
}
data1.RemoveVals([]TopNMeta{tmp})
data2.BinarySearchRemoveVal(tmp)
require.Equal(t, data1, data2)
}

0 comments on commit 2f144fd

Please sign in to comment.