From 3007cab19998a05f692e3fc0c6e5611fddb9ea46 Mon Sep 17 00:00:00 2001 From: Erez Rokah Date: Mon, 26 Aug 2024 12:05:48 +0300 Subject: [PATCH] fix: Attempt to launch plugin up to 3 times (#395) #### Summary Fixes https://github.com/cloudquery/cloudquery-issues/issues/2280 (internal issue). Sometimes plugins fail to launch the first time (we're not sure why, seems to be happening only on MacOS and for large plugins). Retrying the same binary works, so this PR implements that. Example logs: ``` 2024-08-26T08:48:08Z INF Loading spec(s) args=["examples/aws-postgres.yml"] invocation_id=4356ddcc-f66d-4231-bdcd-90d4e803adb5 module=cli 2024-08-26T08:48:08Z DBG starting plugin attempt=1 invocation_id=4356ddcc-f66d-4231-bdcd-90d4e803adb5 module=cli path=****** 2024-08-26T08:48:13Z INF Plugin server listening address=/var/folders/9s/pgf_fjl160j81k95zb5gm7vw0000gn/T/cq-VIAmPPIOcYreNayw.sock invocation_id=4356ddcc-f66d-4231-bdcd-90d4e803adb5 module=cli 2024-08-26T08:48:13Z DBG failed to start plugin, retrying error="failed to run plugin ******: signal: killed" attempt=0 invocation_id=4356ddcc-f66d-4231-bdcd-90d4e803adb5 module=cli 2024-08-26T08:48:14Z DBG starting plugin attempt=2 invocation_id=4356ddcc-f66d-4231-bdcd-90d4e803adb5 module=cli path=****** 2024-08-26T08:48:14Z INF Plugin server listening address=/var/folders/9s/pgf_fjl160j81k95zb5gm7vw0000gn/T/cq-AscwEzGdxCtujUjO.sock invocation_id=4356ddcc-f66d-4231-bdcd-90d4e803adb5 module=cli 2024-08-26T08:48:15Z DBG plugin started successfully attempt=2 invocation_id=4356ddcc-f66d-4231-bdcd-90d4e803adb5 module=cli path=****** ``` --- --- managedplugin/plugin.go | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/managedplugin/plugin.go b/managedplugin/plugin.go index c966d76..a9d7338 100644 --- a/managedplugin/plugin.go +++ b/managedplugin/plugin.go @@ -423,15 +423,36 @@ func getFreeTCPAddr() (string, error) { } func (c *Client) startLocal(ctx context.Context, path string) error { - if c.useTCP { - tcpAddr, err := getFreeTCPAddr() - if err != nil { - return fmt.Errorf("failed to get free port: %w", err) - } - c.tcpAddr = tcpAddr - return c.startLocalTCP(ctx, path) - } - return c.startLocalUnixSocket(ctx, path) + attempt := 0 + return retry.Do( + func() error { + attempt++ + c.logger.Debug().Str("path", path).Int("attempt", attempt).Msg("starting plugin") + var err error + if c.useTCP { + var tcpAddr string + tcpAddr, err = getFreeTCPAddr() + if err != nil { + err = fmt.Errorf("failed to get free port: %w", err) + } else { + c.tcpAddr = tcpAddr + err = c.startLocalTCP(ctx, path) + } + } else { + err = c.startLocalUnixSocket(ctx, path) + } + if err == nil { + c.logger.Debug().Str("path", path).Int("attempt", attempt).Msg("plugin started successfully") + } + return err + }, + retry.Attempts(3), + retry.Delay(1*time.Second), + retry.LastErrorOnly(true), + retry.OnRetry(func(n uint, err error) { + c.logger.Debug().Err(err).Int("attempt", int(n)).Msg("failed to start plugin, retrying") + }), + ) } func (c *Client) startLocalTCP(ctx context.Context, path string) error {