Skip to content

Commit

Permalink
Revert "feat: limiting processing the response body via `limitBodyToN…
Browse files Browse the repository at this point in the history
…Bytes` when `searchForBodyPatterns==true`"

This reverts commit 634e1cf.
  • Loading branch information
d-led committed Sep 17, 2024
1 parent 34c725b commit e84fe12
Show file tree
Hide file tree
Showing 5 changed files with 2 additions and 68 deletions.
1 change: 0 additions & 1 deletion .link-checker-service.toml
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ regex = "Login Service"

[HTTPClient]
maxRedirectsCount = 15
limitBodyToNBytes = 100_000
timeoutSeconds = 45
userAgent = "lcs/0.9"
browserUserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36"
Expand Down
6 changes: 0 additions & 6 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,6 @@

Notable changes will be documented here

## 0.9.37

- limiting processing the response body via `limitBodyToNBytes` when `searchForBodyPatterns==true`
- upgraded dependencies
- Go v1.23

## 0.9.36

- upgraded dependencies
Expand Down
3 changes: 0 additions & 3 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ const (
proxyKey = "proxy"
pacScriptURLKey = "pacScriptURL"
maxRedirectsCountKey = "maxRedirectsCount"
limitBodyToNBytesKey = "limitBodyToNBytes"
timeoutSecondsKey = "timeoutSeconds"
userAgentKey = "userAgent"
browserUserAgentKey = "browserUserAgent"
Expand Down Expand Up @@ -103,8 +102,6 @@ func init() {
_ = viper.BindPFlag(httpClientMapKey+skipCertificateCheckKey, rootCmd.PersistentFlags().Lookup(skipCertificateCheckKey))
rootCmd.PersistentFlags().Bool(enableRequestTracingKey, false, "HTTP client: enable request tracing")
_ = viper.BindPFlag(httpClientMapKey+enableRequestTracingKey, rootCmd.PersistentFlags().Lookup(enableRequestTracingKey))
rootCmd.PersistentFlags().Uint(limitBodyToNBytesKey, 0, "HTTP client: maximum number of bytes to read from the body when searching for patterns. Unlimited if 0!")
_ = viper.BindPFlag(httpClientMapKey+limitBodyToNBytesKey, rootCmd.PersistentFlags().Lookup(limitBodyToNBytesKey))
// service
rootCmd.PersistentFlags().UintP(maxConcurrentHTTPRequestsKey, "c", 256, "maximum number of total concurrent HTTP requests")
_ = viper.BindPFlag(maxConcurrentHTTPRequestsKey, rootCmd.PersistentFlags().Lookup(maxConcurrentHTTPRequestsKey))
Expand Down
26 changes: 1 addition & 25 deletions infrastructure/url_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
"context"
"crypto/tls"
"fmt"
"io"
"log"
"net"
"net/http"
Expand All @@ -31,7 +30,6 @@ import (
"github.com/go-resty/resty/v2"
)

const defaultLimitBodyToNBytes = 0
const defaultMaxRedirectsCount = 15
const defaultTimeoutSeconds = 10
const defaultUserAgent = "lcs/0.9"
Expand Down Expand Up @@ -74,7 +72,6 @@ type urlCheckerSettings struct {
EnableRequestTracing bool
URLCheckerPlugins []string
PacScriptURL string
LimitBodyToNBytes uint
}

// URLChecker interface that all layers should conform to
Expand Down Expand Up @@ -206,7 +203,6 @@ func getURLCheckerSettings() urlCheckerSettings {
UserAgent: defaultUserAgent,
BrowserUserAgent: defaultBrowserUserAgent,
AcceptHeader: defaultAcceptHeader,
LimitBodyToNBytes: defaultLimitBodyToNBytes,
}

if proxyURL := viper.GetString("proxy"); proxyURL != "" {
Expand All @@ -224,7 +220,6 @@ func getURLCheckerSettings() urlCheckerSettings {
}

s.MaxRedirectsCount = viper.GetUint("HTTPClient.maxRedirectsCount")
s.LimitBodyToNBytes = viper.GetUint("HTTPClient.limitBodyToNBytes")
s.TimeoutSeconds = viper.GetUint("HTTPClient.timeoutSeconds")
if v := viper.GetString("HTTPClient.userAgent"); v != "" {
s.UserAgent = v
Expand All @@ -245,7 +240,6 @@ func getURLCheckerSettings() urlCheckerSettings {
log.Printf("HTTP client AcceptHeader: %v", s.AcceptHeader)
log.Printf("HTTP client SkipCertificateCheck: %v", s.SkipCertificateCheck)
log.Printf("HTTP client EnableRequestTracing: %v", s.EnableRequestTracing)
log.Printf("HTTP client LimitBodyToNBytes: %v", s.LimitBodyToNBytes)

// advanced configuration feature: only configurable via the config file
s.SearchForBodyPatterns = viper.GetBool("searchForBodyPatterns")
Expand Down Expand Up @@ -490,13 +484,12 @@ func (c *URLCheckerClient) tryGetRequestAndProcessResponseBody(ctx context.Conte
shouldRetryBasedOnStatus(res.Code) {
response, err := client.R().
SetHeader("Accept", c.settings.AcceptHeader).
SetDoNotParseResponse(true).
SetContext(ctx).
SetHeader("User-Agent", c.settings.BrowserUserAgent).
Get(urlToCheck)
res = c.processResponse(urlToCheck, response, err)
if c.settings.SearchForBodyPatterns && response != nil {
body = c.limitedBody(response)
body = response.String()
}
}

Expand Down Expand Up @@ -648,23 +641,6 @@ func (c *URLCheckerClient) tryHeadRequestAsBrowserIfForbidden(ctx context.Contex
return res
}

func (c *URLCheckerClient) limitedBody(response *resty.Response) string {
body := response.RawBody()
defer func() { _ = body.Close() }()

var reader io.Reader = body

if c.settings.LimitBodyToNBytes > 0 {
reader = io.LimitReader(body, int64(c.settings.LimitBodyToNBytes))
}

b, err := io.ReadAll(reader)
if err != nil {
return ""
}
return string(b)
}

func buildClient(settings urlCheckerSettings) *resty.Client {
client := resty.New()
client.SetTimeout(time.Second * time.Duration(settings.TimeoutSeconds))
Expand Down
34 changes: 1 addition & 33 deletions infrastructure/url_checker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,8 @@ package infrastructure

import (
"context"
"fmt"
"github.com/stretchr/testify/require"
"log"
"net/http"
"net/http/httptest"
"os"
"strings"
"testing"
"time"

Expand All @@ -41,11 +36,10 @@ func TestOkUrls(t *testing.T) {
func TestSearchingForBodyPatterns(t *testing.T) {
setUpViperTestConfiguration()
viper.Set("searchForBodyPatterns", true)
viper.Set("HTTPClient.limitBodyToNBytes", uint(0))
res := NewURLCheckerClient().CheckURL(context.Background(), "https://google.com")
assert.Nil(t, res.Error)
assert.Equal(t, http.StatusOK, res.Code)
require.Contains(t, res.BodyPatternsFound, "google")
assert.Len(t, res.BodyPatternsFound, 1)
assert.Equal(t, "google", res.BodyPatternsFound[0], "should have found at least one mention of google")
}

Expand Down Expand Up @@ -77,16 +71,13 @@ func setUpViperTestConfiguration() {
viper.Set("HTTPClient.timeoutSeconds", uint(15))
viper.Set("HTTPClient.maxRedirectsCount", uint(15))
viper.Set("HTTPClient.enableRequestTracing", false)
viper.Set("HTTPClient.limitBodyToNBytes", uint(0))
viper.Set("searchForBodyPatterns", false)
viper.Set("urlCheckerPlugins", []string{})
patterns := []struct {
Name string
Regex string
}{
{"google", "google"},
{"start-a", "start-a"},
{"ab", "ab"},
}
viper.Set("bodyPatterns", patterns)
}
Expand Down Expand Up @@ -152,26 +143,3 @@ func TestResponseTimeout(t *testing.T) {
assert.NotNil(t, res.Error, "the response should have failed due to the abort")
assert.NotEqual(t, http.StatusOK, res.Code)
}

func TestLimitingBodyReading(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
_, _ = fmt.Fprintln(w,
"start-"+
strings.Repeat("a", 100)+
strings.Repeat("b", 100))
}))
log.Println("Test server started at:", ts.URL)
defer ts.Close()
setUpViperTestConfiguration()
viper.Set("searchForBodyPatterns", true)
viper.Set("HTTPClient.limitBodyToNBytes", uint(100))
res := NewURLCheckerClient().CheckURL(context.Background(), ts.URL)
assert.Equal(t, http.StatusOK, res.Code)
assert.Contains(t, res.BodyPatternsFound, "start-a")
assert.NotContains(
t,
res.BodyPatternsFound,
"ab",
"the repeated 'b' part of the message should have not been processed",
)
}

0 comments on commit e84fe12

Please sign in to comment.