Skip to content

Commit

Permalink
reduce potential lock contention; avoid goroutine leak
Browse files Browse the repository at this point in the history
Hitting the wrench process with `kill -SIGQUIT <pid>` showed in the goroutine
stack trace dump that one goroutine was hung on an HTTP request doomed to
never finish. Unfortunately, that also meant it held the mutex used to prevent
concurrent writes forever - which meant the cache was unable to add new Zig
versions - leading to bug reports like mlugg/setup-zig#5 (comment)
indicating fetching a new Zig version was impossible.

This fixes it by reducing the lock contention, and using an HTTP client with
an actual timeout (Go's defaults suck here, crazy that the default function
for 'make an GET request' can hang forever and leak a goroutine, or worse as
in this case.)

```
goroutine 25991611 [select, 1612 minutes]:
runtime.gopark(0xc00040e9f0?, 0x4?, 0xa8?, 0x83?, 0xc00040e948?)
        /opt/homebrew/Cellar/go/1.21.6/libexec/src/runtime/proc.go:398 +0xce fp=0xc00040e7e0 sp=0xc00040e7c0 pc=0x43be2e
runtime.selectgo(0xc00040e9f0, 0xc00040e940, 0x3?, 0x0, 0x4159fa?, 0x1)
        /opt/homebrew/Cellar/go/1.21.6/libexec/src/runtime/select.go:327 +0x725 fp=0xc00040e900 sp=0xc00040e7e0 pc=0x44b8e5
net/http.(*http2ClientConn).RoundTrip(0xc000446000, 0xc000662300)
        /opt/homebrew/Cellar/go/1.21.6/libexec/src/net/http/h2_bundle.go:8381 +0x44f fp=0xc00040ea40 sp=0xc00040e900 pc=0x6cf4cf
net/http.(*http2Transport).RoundTripOpt(0xc000128360, 0xc000662300, {0x70?})
        /opt/homebrew/Cellar/go/1.21.6/libexec/src/net/http/h2_bundle.go:7649 +0x1b1 fp=0xc00040eb70 sp=0xc00040ea40 pc=0x6cb431
net/http.(*http2Transport).RoundTrip(0x16b3000?, 0xc0001479c0?)
        /opt/homebrew/Cellar/go/1.21.6/libexec/src/net/http/h2_bundle.go:7598 +0x15 fp=0xc00040eb98 sp=0xc00040eb70 pc=0x6cafb5
net/http.(*Transport).roundTrip(0x16b3000, 0xc000662300)
        /opt/homebrew/Cellar/go/1.21.6/libexec/src/net/http/transport.go:602 +0x78b fp=0xc00040edc0 sp=0xc00040eb98 pc=0x6fd3eb
net/http.(*Transport).RoundTrip(0x0?, 0xf98720?)
        /opt/homebrew/Cellar/go/1.21.6/libexec/src/net/http/roundtrip.go:17 +0x13 fp=0xc00040ede0 sp=0xc00040edc0 pc=0x6e6253
net/http.send(0xc000662300, {0xf98720, 0x16b3000}, {0x1?, 0xffffffffffffffff?, 0x0?})
        /opt/homebrew/Cellar/go/1.21.6/libexec/src/net/http/client.go:260 +0x606 fp=0xc00040f010 sp=0xc00040ede0 pc=0x6a4166
net/http.(*Client).send(0x16d3380, 0xc000662300, {0xc00040f0c0?, 0x5f6bc5?, 0x0?})
        /opt/homebrew/Cellar/go/1.21.6/libexec/src/net/http/client.go:181 +0x98 fp=0xc00040f088 sp=0xc00040f010 pc=0x6a39f8
net/http.(*Client).do(0x16d3380, 0xc000662300)
        /opt/homebrew/Cellar/go/1.21.6/libexec/src/net/http/client.go:724 +0x912 fp=0xc00040f298 sp=0xc00040f088 pc=0x6a5d32
net/http.(*Client).Do(...)
        /opt/homebrew/Cellar/go/1.21.6/libexec/src/net/http/client.go:590
net/http.(*Client).Get(0xf97a60?, {0xc00032f450?, 0xe5a75c?})
        /opt/homebrew/Cellar/go/1.21.6/libexec/src/net/http/client.go:488 +0x5f fp=0xc00040f2f0 sp=0xc00040f298 pc=0x6a52bf
net/http.Get(...)
        /opt/homebrew/Cellar/go/1.21.6/libexec/src/net/http/client.go:457
github.com/hexops/wrench/internal/wrench.(*Bot).httpPkgEnsureZigDownloadCached(0xc0000979a0, {0xc0005d4f5a, 0x19}, {0xe50bab, 0x4}, {0xc0005d4f49, 0x2a})
        /Volumes/data/hexops/wrench/internal/wrench/http_pkg.go:417 +0x8a7 fp=0xc00040f7a0 sp=0xc00040f2f0 pc=0xc42467
```

Signed-off-by: Stephen Gutekanst <[email protected]>
  • Loading branch information
emidoots committed Jun 20, 2024
1 parent 020e4f4 commit d06eafd
Showing 1 changed file with 26 additions and 6 deletions.
32 changes: 26 additions & 6 deletions internal/wrench/http_pkg.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package wrench

import (
"context"
"encoding/json"
"fmt"
"io"
Expand Down Expand Up @@ -355,6 +356,7 @@ func (b *Bot) httpPkgEnsureZigVersionCached(version, versionKind string) {
var (
cachedResponsesMu sync.Mutex
cachedResponses = map[string]error{}
fsParallelismLock sync.Mutex
)

func (b *Bot) httpPkgEnsureZigDownloadCached(version, versionKind, fname string) error {
Expand Down Expand Up @@ -406,15 +408,15 @@ func (b *Bot) httpPkgEnsureZigDownloadCached(version, versionKind, fname string)
logWriter := b.idWriter("zig")

cachedResponsesMu.Lock()
defer cachedResponsesMu.Unlock()

if cachedError, ok := cachedResponses[url]; ok {
cachedError, isCachedError := cachedResponses[url]
cachedResponsesMu.Unlock()
if isCachedError {
fmt.Fprintf(logWriter, "not fetching: %s (cached error %s)\n", url, cachedError)
return cachedError
}
fmt.Fprintf(logWriter, "fetch: %s > %s\n", url, filePath)

resp, err := http.Get(url)
resp, err := httpGet(url, 60*time.Second)
if err != nil {
return errors.Wrap(err, "Get")
}
Expand All @@ -423,13 +425,20 @@ func (b *Bot) httpPkgEnsureZigDownloadCached(version, versionKind, fname string)
if resp.StatusCode >= 400 && resp.StatusCode <= 500 {
// 404 not found, 403 forbidden, etc.
err := fmt.Errorf("bad response status: %s", resp.Status)
cachedResponsesMu.Lock()
cachedResponses[url] = err
cachedResponsesMu.Unlock()
return err
}
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("bad response status: %s", resp.Status)
}

// Two goroutines may have fetched this file at the same time (assuming neither hit the cache,
// new file download) - which is fine - but we can't have them write to disk at the same time.
fsParallelismLock.Lock()
defer fsParallelismLock.Unlock()

if err := os.MkdirAll(dirPath, os.ModePerm); err != nil {
return errors.Wrap(err, "MkdirAll "+dirPath)
}
Expand Down Expand Up @@ -625,7 +634,7 @@ func (b *Bot) httpPkgZigIndexCached() ([]byte, error) {
}

// Fetch the latest upstream Zig index.json
resp, err := http.Get("https://ziglang.org/download/index.json")
resp, err := httpGet("https://ziglang.org/download/index.json", 60*time.Second)
if err != nil {
return nil, errors.Wrap(err, "fetching upstream https://ziglang.org/download/index.json")
}
Expand All @@ -637,7 +646,7 @@ func (b *Bot) httpPkgZigIndexCached() ([]byte, error) {

// Fetch the Mach index.json which contains Mach nominated versions, but is otherwise not as
// up-to-date as ziglang.org's version.
resp, err = http.Get("https://machengine.org/zig/index.json")
resp, err = httpGet("https://machengine.org/zig/index.json", 60*time.Second)
if err != nil {
return nil, errors.Wrap(err, "fetching mach https://machengine.org/zig/index.json")
}
Expand Down Expand Up @@ -703,3 +712,14 @@ func (b *Bot) httpPkgZigIndexCached() ([]byte, error) {

return httpPkgZigIndexCached, nil
}

// Like http.Get, but actually respects a timeout instead of leaking a goroutine to forever run.
func httpGet(url string, timeout time.Duration) (*http.Response, error) {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return nil, err
}
return http.DefaultClient.Do(req)
}

0 comments on commit d06eafd

Please sign in to comment.