From 6df8a1d2450b88bf7a946a00ba75b19497ecc9b3 Mon Sep 17 00:00:00 2001 From: Malte Tashiro Date: Thu, 8 Feb 2024 02:40:42 +0000 Subject: [PATCH] Use itertuples to improve DnsDependencyCrawler performance --- iyp/crawlers/openintel/__init__.py | 44 +++++++++++++++--------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/iyp/crawlers/openintel/__init__.py b/iyp/crawlers/openintel/__init__.py index a8a6dbc..7651925 100644 --- a/iyp/crawlers/openintel/__init__.py +++ b/iyp/crawlers/openintel/__init__.py @@ -293,39 +293,39 @@ def run(self): logging.info('Computing relationships...') start_ts = datetime.now().timestamp() - for index, connection in connections.iterrows(): - if connection['relation_name'] == 'PARENT': + for connection in connections.itertuples(): + if connection.relation_name == 'PARENT': links_parent.append({ - 'src_id': domains_id[connection['from_nodeKey']], - 'dst_id': domains_id[connection['to_nodeKey']], - 'props': [self.reference, connection['properties']], + 'src_id': domains_id[connection.from_nodeKey], + 'dst_id': domains_id[connection.to_nodeKey], + 'props': [self.reference, connection.properties], }) - elif connection['relation_name'] == 'MANAGED_BY': + elif connection.relation_name == 'MANAGED_BY': links_managed_by.append({ - 'src_id': domains_id[connection['from_nodeKey']], - 'dst_id': hosts_id[connection['to_nodeKey']], - 'props': [self.reference, connection['properties']], + 'src_id': domains_id[connection.from_nodeKey], + 'dst_id': hosts_id[connection.to_nodeKey], + 'props': [self.reference, connection.properties], }) - elif connection['relation_name'] == 'PART_OF': + elif connection.relation_name == 'PART_OF': links_part_of.append({ - 'src_id': hosts_id[connection['from_nodeKey']], - 'dst_id': domains_id[connection['to_nodeKey']], - 'props': [self.reference, connection['properties']], + 'src_id': hosts_id[connection.from_nodeKey], + 'dst_id': domains_id[connection.to_nodeKey], + 'props': [self.reference, connection.properties], }) - elif connection['relation_name'] == 'ALIAS_OF': + elif connection.relation_name == 'ALIAS_OF': links_alias_of.append({ - 'src_id': hosts_id[connection['from_nodeKey']], - 'dst_id': hosts_id[connection['to_nodeKey']], - 'props': [self.reference, connection['properties']], + 'src_id': hosts_id[connection.from_nodeKey], + 'dst_id': hosts_id[connection.to_nodeKey], + 'props': [self.reference, connection.properties], }) - elif connection['relation_name'] == 'RESOLVES_TO': + elif connection.relation_name == 'RESOLVES_TO': links_resolves_to.append({ - 'src_id': hosts_id[connection['from_nodeKey']], - 'dst_id': ips_id[connection['to_nodeKey']], - 'props': [self.reference, connection['properties']], + 'src_id': hosts_id[connection.from_nodeKey], + 'dst_id': ips_id[connection.to_nodeKey], + 'props': [self.reference, connection.properties], }) else: - logging.error(f'Unknown relationship type: {connection["relation_name"]}') + logging.error(f'Unknown relationship type: {connection.relation_name}') stop_ts = datetime.now().timestamp() logging.info(f'{stop_ts - start_ts:.2f}s elapsed')