From b1e21b02e09871593ff56dce8b8c4ab6cf14cadc Mon Sep 17 00:00:00 2001 From: Malte Tashiro Date: Sat, 18 Jan 2025 12:22:19 +0000 Subject: [PATCH] Handle loops in CNAME chains --- iyp/crawlers/openintel/__init__.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/iyp/crawlers/openintel/__init__.py b/iyp/crawlers/openintel/__init__.py index 6ad2894..78fd21b 100644 --- a/iyp/crawlers/openintel/__init__.py +++ b/iyp/crawlers/openintel/__init__.py @@ -165,6 +165,9 @@ def recurse_chain(current_chain: list, chain_links: dict, records: dict, state: state[link][record_type].update(ips) if chain_tail in chain_links: for link in chain_links[chain_tail]: + if link in current_chain: + # Prevent infinite recursion due to CNAME loops. + continue current_chain.append(link) OpenIntelCrawler.recurse_chain(current_chain, chain_links, records, state) current_chain.pop() @@ -430,10 +433,14 @@ def normalize_ipv6(address): return address @staticmethod - def recurse_cnames(source: str, cnames: set, ips: set, state: dict): + def recurse_cnames(source: str, cnames: dict, ips: set, state: dict, processed_cnames: set): for target in cnames[source]: + if target in processed_cnames: + # Prevent infinite recursion due to CNAME loops. + continue + processed_cnames.add(target) state[target].update(ips) - DnsgraphCrawler.recurse_cnames(target, cnames, ips, state) + DnsgraphCrawler.recurse_cnames(target, cnames, ips, state, processed_cnames) def run(self): # Extract current date for partitioning @@ -554,7 +561,7 @@ def run(self): # pointing to it. cname_resolves = defaultdict(set) for name, ips in resolves_to.items(): - self.recurse_cnames(name, cnames, ips, cname_resolves) + self.recurse_cnames(name, cnames, ips, cname_resolves, {name}) for hostname, ips in cname_resolves.items(): host_qid = hosts_id[hostname] for ip in ips: