From 58884eef6b4e5b87482cc8b13b1f67accf4a1628 Mon Sep 17 00:00:00 2001 From: Corentin Barreau Date: Wed, 25 Sep 2024 21:14:17 +0200 Subject: [PATCH] add: DNS WARC records & DNS caching --- client.go | 7 ++++++- dialer.go | 15 ++++++++++----- dns.go | 34 +++++++++++++++++++++++++++++----- metadata.go => record.go | 8 +++----- 4 files changed, 48 insertions(+), 16 deletions(-) rename metadata.go => record.go (77%) diff --git a/client.go b/client.go index 404bfbe..c8c2ee8 100644 --- a/client.go +++ b/client.go @@ -23,6 +23,7 @@ type HTTPClientSettings struct { DialTimeout time.Duration ResponseHeaderTimeout time.Duration DNSResolutionTimeout time.Duration + DNSRecordsTTL time.Duration TLSHandshakeTimeout time.Duration TCPTimeout time.Duration MaxReadBeforeTruncate int @@ -167,10 +168,14 @@ func NewWARCWritingHTTPClient(HTTPClientSettings HTTPClientSettings) (httpClient HTTPClientSettings.DNSResolutionTimeout = 5 * time.Second } + if HTTPClientSettings.DNSRecordsTTL == 0 { + HTTPClientSettings.DNSRecordsTTL = 5 * time.Minute + } + httpClient.TLSHandshakeTimeout = HTTPClientSettings.TLSHandshakeTimeout // Configure custom dialer / transport - customDialer, err := newCustomDialer(httpClient, HTTPClientSettings.Proxy, HTTPClientSettings.DialTimeout, HTTPClientSettings.DNSResolutionTimeout, HTTPClientSettings.DNSServers, HTTPClientSettings.DisableIPv4, HTTPClientSettings.DisableIPv6) + customDialer, err := newCustomDialer(httpClient, HTTPClientSettings.Proxy, HTTPClientSettings.DialTimeout, HTTPClientSettings.DNSRecordsTTL, HTTPClientSettings.DNSResolutionTimeout, HTTPClientSettings.DNSServers, HTTPClientSettings.DisableIPv4, HTTPClientSettings.DisableIPv6) if err != nil { return nil, err } diff --git a/dialer.go b/dialer.go index 08a2f38..87ba8fc 100644 --- a/dialer.go +++ b/dialer.go @@ -23,17 +23,20 @@ import ( ) type customDialer struct { - proxyDialer proxy.Dialer - client *CustomHTTPClient - DNSConfig *dns.ClientConfig - DNSClient *dns.Client + proxyDialer proxy.Dialer + client *CustomHTTPClient + DNSConfig *dns.ClientConfig + DNSClient *dns.Client + DNSRecords *sync.Map + // This defines the TTL for DNS records in the cache + DNSRecordsTTL time.Duration net.Dialer DNSServer string disableIPv4 bool disableIPv6 bool } -func newCustomDialer(httpClient *CustomHTTPClient, proxyURL string, DialTimeout, DNSResolutionTimeout time.Duration, DNSServers []string, disableIPv4, disableIPv6 bool) (d *customDialer, err error) { +func newCustomDialer(httpClient *CustomHTTPClient, proxyURL string, DialTimeout, DNSRecordsTTL, DNSResolutionTimeout time.Duration, DNSServers []string, disableIPv4, disableIPv6 bool) (d *customDialer, err error) { d = new(customDialer) d.Timeout = DialTimeout @@ -41,6 +44,8 @@ func newCustomDialer(httpClient *CustomHTTPClient, proxyURL string, DialTimeout, d.disableIPv4 = disableIPv4 d.disableIPv6 = disableIPv6 + d.DNSRecordsTTL = 5 * time.Minute + d.DNSRecords = new(sync.Map) d.DNSConfig, err = dns.ClientConfigFromFile("/etc/resolv.conf") if err != nil { return nil, err diff --git a/dns.go b/dns.go index 8c0321c..3df6bc0 100644 --- a/dns.go +++ b/dns.go @@ -3,11 +3,27 @@ package warc import ( "fmt" "net" + "time" "github.com/miekg/dns" ) +type cachedIP struct { + ip net.IP + expiresAt time.Time +} + func (d *customDialer) resolveDNS(address string) (net.IP, error) { + // Check cache first + if cached, ok := d.DNSRecords.Load(address); ok { + cachedEntry := cached.(cachedIP) + if time.Now().Before(cachedEntry.expiresAt) { + return cachedEntry.ip, nil + } + // Cache entry expired, remove it + d.DNSRecords.Delete(address) + } + m := new(dns.Msg) m.SetQuestion(dns.Fqdn(address), dns.TypeA) @@ -16,8 +32,8 @@ func (d *customDialer) resolveDNS(address string) (net.IP, error) { return nil, err } - // Print raw DNS output - // fmt.Printf("Raw DNS response for %s:\n%s\n", address, r.String()) + // Record the DNS response + d.client.WriteRecord("dns:"+address, "resource", "text/dns", r.String()) var ipv4, ipv6 net.IP @@ -30,13 +46,21 @@ func (d *customDialer) resolveDNS(address string) (net.IP, error) { } } + var resolvedIP net.IP // Prioritize IPv6 if both are available and enabled if ipv6 != nil { - return ipv6, nil + resolvedIP = ipv6 + } else if ipv4 != nil { + resolvedIP = ipv4 } - if ipv4 != nil { - return ipv4, nil + if resolvedIP != nil { + // Cache the result + d.DNSRecords.Store(address, cachedIP{ + ip: resolvedIP, + expiresAt: time.Now().Add(d.DNSRecordsTTL), + }) + return resolvedIP, nil } return nil, fmt.Errorf("no suitable IP address found for %s", address) diff --git a/metadata.go b/record.go similarity index 77% rename from metadata.go rename to record.go index 8874c75..00c1588 100644 --- a/metadata.go +++ b/record.go @@ -1,15 +1,13 @@ package warc -import ( - "time" -) +import "time" -func (c *CustomHTTPClient) WriteMetadataRecord(WARCTargetURI, contentType, payload string) { +func (c *CustomHTTPClient) WriteRecord(WARCTargetURI, WARCType, contentType, payload string) { // Initialize the record metadataRecord := NewRecord("", false) // Set the headers - metadataRecord.Header.Set("WARC-Type", "metadata") + metadataRecord.Header.Set("WARC-Type", WARCType) metadataRecord.Header.Set("Content-Type", contentType) metadataRecord.Header.Set("WARC-Target-URI", WARCTargetURI)