Skip to content

Commit

Permalink
Remove ANSI codes from hostnames in proxy logs
Browse files Browse the repository at this point in the history
- Added a function to strip ANSI escape codes from hostname fields in parsed proxy logs, ensuring cleaner data.
- Updated tests to verify that ANSI codes are correctly removed from hostnames.
  • Loading branch information
Tim committed Dec 9, 2024
1 parent 59e5ff2 commit 6979722
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 0 deletions.
8 changes: 8 additions & 0 deletions logline/proxy_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ func (parser ProxyParser) Parse(logLine string) (HttpRequest, error) {
}
//fmt.Printf("%+v\n", result)

// Data quality fixes
result["hostname"] = removeANSICodes(result["hostname"])

httpRequest := HttpRequest{}
httpRequest.Host = result["hostname"]
httpRequest.SourceIp = result["remote_addr"]
Expand All @@ -48,3 +51,8 @@ func (parser ProxyParser) Parse(logLine string) (HttpRequest, error) {

return httpRequest, nil
}

func removeANSICodes(input string) string {
ansiEscapeRe := regexp.MustCompile(`\[[0-9;]*m`)
return ansiEscapeRe.ReplaceAllString(input, "")
}
10 changes: 10 additions & 0 deletions logline/proxy_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,13 @@ func TestProxyLogLogParserImplementation(t *testing.T) {
}
fmt.Printf("%+v\n", result)
}

func TestProxyLogRemovesANSICodesFromHostname(t *testing.T) {
line := `[0mprom-grafana.tyranus.de 127.0.0.1 - - [29/Aug/2023:07:47:30 +0000] "GET / HTTP/1.1" 200 12345 "-" "Mozilla/5.0" "-"`

httpRequest, err := logParser.Parse(line)
failNowIfErr(t, err, line)

expectedHost := "prom-grafana.tyranus.de"
assert.Equal(t, expectedHost, httpRequest.Host, "The hostname should have ANSI codes removed")
}

0 comments on commit 6979722

Please sign in to comment.