tikv · bufferflies · Feb 21, 2024 · Dec 28, 2023 · Dec 28, 2023 · Dec 28, 2023
diff --git a/config/client.go b/config/client.go
@@ -36,6 +36,7 @@ package config
 
 import (
 	"fmt"
+	"math"
 	"time"
 
 	"google.golang.org/grpc/encoding/gzip"
@@ -87,6 +88,9 @@ type TiKVClient struct {
 	// TTLRefreshedTxnSize controls whether a transaction should update its TTL or not.
 	TTLRefreshedTxnSize      int64  `toml:"ttl-refreshed-txn-size" json:"ttl-refreshed-txn-size"`
 	ResolveLockLiteThreshold uint64 `toml:"resolve-lock-lite-threshold" json:"resolve-lock-lite-threshold"`
+	// MaxBatchGetRequestCount is the max concurrency number of request to be sent the tikv
+	// 0 means auto adjust by feedback .
+	MaxConcurrencyRequestLimit int64 `toml:"max-concurrency-request-limit" json:"max-concurrency-request-limit"`
 }
 
 // AsyncCommit is the config for the async commit feature. The switch to enable it is a system variable.
@@ -155,7 +159,8 @@ func DefaultTiKVClient() TiKVClient {
 		},
 		CoprReqTimeout: 60 * time.Second,
 
-		ResolveLockLiteThreshold: 16,
+		ResolveLockLiteThreshold:   16,
+		MaxConcurrencyRequestLimit: math.MaxInt64,
 	}
 }
 

diff --git a/internal/client/client.go b/internal/client/client.go
@@ -315,6 +315,7 @@ func (a *connArray) Init(addr string, security config.Security, idleNotify *uint
 				dialTimeout:      a.dialTimeout,
 				tryLock:          tryLock{sync.NewCond(new(sync.Mutex)), false},
 			}
+			batchClient.maxConcurrencyRequestLimit.Store(cfg.TiKVClient.MaxConcurrencyRequestLimit)
 			a.batchCommandsClients = append(a.batchCommandsClients, batchClient)
 		}
 	}
@@ -621,9 +622,9 @@ func (c *RPCClient) sendRequest(ctx context.Context, addr string, req *tikvrpc.R
 	// TiDB RPC server supports batch RPC, but batch connection will send heart beat, It's not necessary since
 	// request to TiDB is not high frequency.
 	if config.GetGlobalConfig().TiKVClient.MaxBatchSize > 0 && enableBatch {
-		if batchReq := req.ToBatchCommandsRequest(); batchReq != nil {
+		if batchReq, pri := req.ToBatchCommandsRequest(); batchReq != nil {
 			defer trace.StartRegion(ctx, req.Type.String()).End()
-			return sendBatchRequest(ctx, addr, req.ForwardedHost, connArray.batchConn, batchReq, timeout)
+			return sendBatchRequest(ctx, addr, req.ForwardedHost, connArray.batchConn, batchReq, timeout, pri)
 		}
 	}
 

diff --git a/internal/client/client_batch.go b/internal/client/client_batch.go
@@ -70,15 +70,15 @@ type batchCommandsEntry struct {
 	// canceled indicated the request is canceled or not.
 	canceled int32
 	err      error
+	pri      uint64
 }
 
 func (b *batchCommandsEntry) isCanceled() bool {
 	return atomic.LoadInt32(&b.canceled) == 1
 }
 
-// TODO: implement by the request priority.
-func (b *batchCommandsEntry) priority() int {
-	return 0
+func (b *batchCommandsEntry) priority() uint64 {
+	return b.pri
 }
 
 func (b *batchCommandsEntry) error(err error) {
@@ -107,14 +107,14 @@ func (b *batchCommandsBuilder) push(entry *batchCommandsEntry) {
 	b.entries.Push(entry)
 }
 
-// build builds BatchCommandsRequests and calls collect() for each valid entry.
+// buildWithLimit builds BatchCommandsRequests and calls collect() for each valid entry.
 // The first return value is the request that doesn't need forwarding.
 // The second is a map that maps forwarded hosts to requests.
-func (b *batchCommandsBuilder) build(
-	collect func(id uint64, e *batchCommandsEntry),
+func (b *batchCommandsBuilder) buildWithLimit(limit int64, collect func(id uint64, e *batchCommandsEntry),
 ) (*tikvpb.BatchCommandsRequest, map[string]*tikvpb.BatchCommandsRequest) {
-	for _, entry := range b.entries.All() {
-		e := entry.(*batchCommandsEntry)
+	pri, pending := uint64(0), b.entries.Len()
+	for count, i := int64(0), 0; i < pending; i++ {
+		e := b.entries.Pop().(*batchCommandsEntry)
 		if e.isCanceled() {
 			continue
 		}
@@ -133,7 +133,15 @@ func (b *batchCommandsBuilder) build(
 			batchReq.RequestIds = append(batchReq.RequestIds, b.idAlloc)
 			batchReq.Requests = append(batchReq.Requests, e.req)
 		}
+		if count == 0 {
+			pri = e.priority()
+		}
+		count++
 		b.idAlloc++
+		// keep one batch for each priority, don't max different priority request into one batch requests.
+		if count >= limit || e.priority() != pri {
+			break
+		}
 	}
 	var req *tikvpb.BatchCommandsRequest
 	if len(b.requests) > 0 {
@@ -145,20 +153,22 @@ func (b *batchCommandsBuilder) build(
 	return req, b.forwardingReqs
 }
 
+// cancel all requests, only used in test.
 func (b *batchCommandsBuilder) cancel(e error) {
 	for _, entry := range b.entries.All() {
 		entry.(*batchCommandsEntry).error(e)
 	}
+	b.entries.Reset()
 }
 
 // reset resets the builder to the initial state.
 // Should call it before collecting a new batch.
 func (b *batchCommandsBuilder) reset() {
+	b.entries.clean()
 	// NOTE: We can't simply set entries = entries[:0] here.
 	// The data in the cap part of the slice would reference the prewrite keys whose
 	// underlying memory is borrowed from memdb. The reference cause GC can't release
 	// the memdb, leading to serious memory leak problems in the large transaction case.
-	b.entries.Reset()
 	for i := 0; i < len(b.requests); i++ {
 		b.requests[i] = nil
 	}
@@ -336,8 +346,7 @@ func (a *batchConn) batchSendLoop(cfg config.TiKVClient) {
 				a.fetchMorePendingRequests(int(cfg.MaxBatchSize), int(bestBatchWaitSize), cfg.MaxBatchWaitTime)
 			}
 		}
-		a.pendingRequests.Observe(float64(len(a.batchCommandsCh)))
-		a.batchSize.Observe(float64(a.reqBuilder.len()))
+		a.pendingRequests.Observe(float64(len(a.batchCommandsCh) + a.reqBuilder.len()))
 		length := a.reqBuilder.len()
 		if uint(length) == 0 {
 			// The batch command channel is closed.
@@ -349,12 +358,20 @@ func (a *batchConn) batchSendLoop(cfg config.TiKVClient) {
 			bestBatchWaitSize++
 		}
 
-		a.getClientAndSend()
+		batch := a.getClientAndSend()
+		if batch != 0 {
+			a.batchSize.Observe(float64(a.reqBuilder.len()))
+		}
 		metrics.TiKVBatchSendLatency.Observe(float64(time.Since(start)))
 	}
 }
 
-func (a *batchConn) getClientAndSend() {
+const (
+	SendFailedReasonNoAvailableLimit   = "no available limit"
+	SendFailedReasonTryLockForSendFail = "tryLockForSend fail"
+)
+
+func (a *batchConn) getClientAndSend() int {
 	if val, err := util.EvalFailpoint("mockBatchClientSendDelay"); err == nil {
 		if timeout, ok := val.(int); ok && timeout > 0 {
 			time.Sleep(time.Duration(timeout * int(time.Millisecond)))
@@ -366,37 +383,47 @@ func (a *batchConn) getClientAndSend() {
 		cli    *batchCommandsClient
 		target string
 	)
+	reason := ""
 	for i := 0; i < len(a.batchCommandsClients); i++ {
 		a.index = (a.index + 1) % uint32(len(a.batchCommandsClients))
 		target = a.batchCommandsClients[a.index].target
 		// The lock protects the batchCommandsClient from been closed while it's in use.
-		if a.batchCommandsClients[a.index].tryLockForSend() {
-			cli = a.batchCommandsClients[a.index]
-			break
+		if c := a.batchCommandsClients[a.index]; c.tryLockForSend() {
+			if c.sent.Load() <= c.maxConcurrencyRequestLimit.Load() {
+				cli = c
+				break
+			} else {
+				reason = SendFailedReasonNoAvailableLimit
+				c.unlockForSend()
+			}
+		} else {
+			reason = SendFailedReasonTryLockForSendFail
 		}
 	}
 	if cli == nil {
-		logutil.BgLogger().Warn("no available connections", zap.String("target", target))
+		logutil.BgLogger().Warn("no available connections", zap.String("target", target), zap.String("reason", reason))
 		metrics.TiKVNoAvailableConnectionCounter.Inc()
-
-		// Please ensure the error is handled in region cache correctly.
-		a.reqBuilder.cancel(errors.New("no available connections"))
-		return
+		return 0
 	}
 	defer cli.unlockForSend()
-
-	req, forwardingReqs := a.reqBuilder.build(func(id uint64, e *batchCommandsEntry) {
+	available := cli.maxConcurrencyRequestLimit.Load() - cli.sent.Load()
+	batch := 0
+	req, forwardingReqs := a.reqBuilder.buildWithLimit(available, func(id uint64, e *batchCommandsEntry) {
 		cli.batched.Store(id, e)
+		cli.sent.Add(1)
 		if trace.IsEnabled() {
 			trace.Log(e.ctx, "rpc", "send")
 		}
 	})
 	if req != nil {
+		batch += len(req.RequestIds)
 		cli.send("", req)
 	}
 	for forwardedHost, req := range forwardingReqs {
+		batch += len(req.RequestIds)
 		cli.send(forwardedHost, req)
 	}
+	return batch
 }
 
 type tryLock struct {
@@ -507,6 +534,10 @@ type batchCommandsClient struct {
 	closed int32
 	// tryLock protects client when re-create the streaming.
 	tryLock
+	// sent is the counter of sent requests to tikv but not accept response.
+	sent atomic.Int64
+	// limit is the max number of requests can be sent to tikv but not accept response.
+	maxConcurrencyRequestLimit atomic.Int64
 }
 
 func (c *batchCommandsClient) isStopped() bool {
@@ -549,6 +580,7 @@ func (c *batchCommandsClient) failPendingRequests(err error) {
 		id, _ := key.(uint64)
 		entry, _ := value.(*batchCommandsEntry)
 		c.batched.Delete(id)
+		c.sent.Add(-1)
 		entry.error(err)
 		return true
 	})
@@ -661,6 +693,7 @@ func (c *batchCommandsClient) batchRecvLoop(cfg config.TiKVClient, tikvTransport
 				entry.res <- responses[i]
 			}
 			c.batched.Delete(requestID)
+			c.sent.Add(-1)
 		}
 
 		transportLayerLoad := resp.GetTransportLayerLoad()
@@ -779,6 +812,7 @@ func sendBatchRequest(
 	batchConn *batchConn,
 	req *tikvpb.BatchCommandsRequest_Request,
 	timeout time.Duration,
+	priority uint64,
 ) (*tikvrpc.Response, error) {
 	entry := &batchCommandsEntry{
 		ctx:           ctx,
@@ -787,6 +821,7 @@ func sendBatchRequest(
 		forwardedHost: forwardedHost,
 		canceled:      0,
 		err:           nil,
+		pri:           priority,
 	}
 	timer := time.NewTimer(timeout)
 	defer timer.Stop()

diff --git a/internal/client/client_test.go b/internal/client/client_test.go
@@ -37,6 +37,7 @@ package client
 import (
 	"context"
 	"fmt"
+	"math"
 	"math/rand"
 	"runtime"
 	"strconv"
@@ -111,10 +112,10 @@ func TestCancelTimeoutRetErr(t *testing.T) {
 
 	ctx, cancel := context.WithCancel(context.TODO())
 	cancel()
-	_, err := sendBatchRequest(ctx, "", "", a, req, 2*time.Second)
+	_, err := sendBatchRequest(ctx, "", "", a, req, 2*time.Second, 0)
 	assert.Equal(t, errors.Cause(err), context.Canceled)
 
-	_, err = sendBatchRequest(context.Background(), "", "", a, req, 0)
+	_, err = sendBatchRequest(context.Background(), "", "", a, req, 0, 0)
 	assert.Equal(t, errors.Cause(err), context.DeadlineExceeded)
 }
 
@@ -134,8 +135,8 @@ func TestSendWhenReconnect(t *testing.T) {
 	}
 
 	req := tikvrpc.NewRequest(tikvrpc.CmdEmpty, &tikvpb.BatchCommandsEmptyRequest{})
-	_, err = rpcClient.SendRequest(context.Background(), addr, req, 100*time.Second)
-	assert.True(t, err.Error() == "no available connections")
+	_, err = rpcClient.SendRequest(context.Background(), addr, req, 5*time.Second)
+	assert.True(t, strings.Contains(err.Error(), "timeout"))
 	server.Stop()
 }
 
@@ -386,7 +387,7 @@ func TestBatchCommandsBuilder(t *testing.T) {
 		assert.Equal(t, builder.len(), i+1)
 	}
 	entryMap := make(map[uint64]*batchCommandsEntry)
-	batchedReq, forwardingReqs := builder.build(func(id uint64, e *batchCommandsEntry) {
+	batchedReq, forwardingReqs := builder.buildWithLimit(math.MaxInt64, func(id uint64, e *batchCommandsEntry) {
 		entryMap[id] = e
 	})
 	assert.Equal(t, len(batchedReq.GetRequests()), 10)
@@ -412,7 +413,7 @@ func TestBatchCommandsBuilder(t *testing.T) {
 		}
 	}
 	entryMap = make(map[uint64]*batchCommandsEntry)
-	batchedReq, forwardingReqs = builder.build(func(id uint64, e *batchCommandsEntry) {
+	batchedReq, forwardingReqs = builder.buildWithLimit(math.MaxInt64, func(id uint64, e *batchCommandsEntry) {
 		entryMap[id] = e
 	})
 	assert.Equal(t, len(batchedReq.GetRequests()), 1)
@@ -422,8 +423,8 @@ func TestBatchCommandsBuilder(t *testing.T) {
 		assert.Equal(t, len(forwardingReqs[host].GetRequests()), i+2)
 		assert.Equal(t, len(forwardingReqs[host].GetRequestIds()), i+2)
 	}
-	assert.Equal(t, builder.idAlloc, uint64(10+builder.len()))
-	assert.Equal(t, len(entryMap), builder.len())
+	assert.Equal(t, int(builder.idAlloc), 20)
+	assert.Equal(t, len(entryMap), 10)
 	for host, forwardingReq := range forwardingReqs {
 		for i, id := range forwardingReq.GetRequestIds() {
 			assert.Equal(t, entryMap[id].req, forwardingReq.GetRequests()[i])
@@ -444,7 +445,7 @@ func TestBatchCommandsBuilder(t *testing.T) {
 		builder.push(entry)
 	}
 	entryMap = make(map[uint64]*batchCommandsEntry)
-	batchedReq, forwardingReqs = builder.build(func(id uint64, e *batchCommandsEntry) {
+	batchedReq, forwardingReqs = builder.buildWithLimit(math.MaxInt64, func(id uint64, e *batchCommandsEntry) {
 		entryMap[id] = e
 	})
 	assert.Equal(t, len(batchedReq.GetRequests()), 2)
@@ -475,7 +476,6 @@ func TestBatchCommandsBuilder(t *testing.T) {
 	// Test reset
 	builder.reset()
 	assert.Equal(t, builder.len(), 0)
-	assert.Equal(t, builder.entries.Len(), 0)
 	assert.Equal(t, len(builder.requests), 0)
 	assert.Equal(t, len(builder.requestIDs), 0)
 	assert.Equal(t, len(builder.forwardingReqs), 0)
@@ -665,7 +665,7 @@ func TestBatchClientRecoverAfterServerRestart(t *testing.T) {
 	assert.Nil(t, err)
 	// send some request, it should be success.
 	for i := 0; i < 100; i++ {
-		_, err = sendBatchRequest(context.Background(), addr, "", conn.batchConn, req, time.Second*20)
+		_, err = sendBatchRequest(context.Background(), addr, "", conn.batchConn, req, time.Second*20, 0)
 		require.NoError(t, err)
 	}
 
@@ -674,8 +674,8 @@ func TestBatchClientRecoverAfterServerRestart(t *testing.T) {
 	require.False(t, server.IsRunning())
 
 	// send some request, it should be failed since server is down.
-	for i := 0; i < 200; i++ {
-		_, err = sendBatchRequest(context.Background(), addr, "", conn.batchConn, req, time.Second*20)
+	for i := 0; i < 10; i++ {
+		_, err = sendBatchRequest(context.Background(), addr, "", conn.batchConn, req, time.Millisecond*100, 0)
 		require.Error(t, err)
 		time.Sleep(time.Millisecond * time.Duration(rand.Intn(300)))
 		grpcConn := conn.Get()
@@ -718,7 +718,19 @@ func TestBatchClientRecoverAfterServerRestart(t *testing.T) {
 
 	// send some request, it should be success again.
 	for i := 0; i < 100; i++ {
-		_, err = sendBatchRequest(context.Background(), addr, "", conn.batchConn, req, time.Second*20)
+		_, err = sendBatchRequest(context.Background(), addr, "", conn.batchConn, req, time.Second*20, 0)
 		require.NoError(t, err)
 	}
 }
+
+func TestLimitConcurrency(t *testing.T) {
+	re := require.New(t)
+	batch := newBatchConn(1, 128, nil)
+	for i := 0; i < 100; i++ {
+		batch.reqBuilder.push(&batchCommandsEntry{req: &tikvpb.BatchCommandsRequest_Request{}})
+	}
+	re.Equal(100, batch.reqBuilder.len())
+	req, _ := batch.reqBuilder.buildWithLimit(1, func(_ uint64, _ *batchCommandsEntry) {})
+	re.Len(req.RequestIds, 1)
+	re.Equal(99, batch.reqBuilder.len())
+}