From 4c4d4f404b8a4a6ac6ed0bae1909842e1c79bedf Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Wed, 26 Jun 2024 16:56:19 +0200
Subject: [PATCH 01/87] readme. Add documentation for BlockIP action

---
 README.md | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 3814573a..0e879985 100755
--- a/README.md
+++ b/README.md
@@ -62,7 +62,17 @@ The [scenarios](#definition-of-the-network-topology) define the **topology** of
 6. Parameters of `ScanNetwork` and `FindServices` can be chosen arbitrarily (they don't have to be listed in `known_newtworks`/`known_hosts`)
 
 ### Actions for the defender
-In this version of the environment, the defender does not have actions, and it is not an agent. It is an omnipresent entity in the network that can detect actions from the attacker. This follows the logic that in real computer networks, the admins have tools that consume logs from all computers simultaneously, and they can detect actions from a central position (such as a SIEM). There are several modes of the defender (see [Task Configuration - Defender](#defender-configuration) for details.
+The defender does have the action to block an IP address in a target host. 
+
+In this version, there is no global defender as there was before, because now it is a multi-agent system.
+
+The actions are:
+
+- BlockIP(). That takes as parameters:
+  - "target_host": IP object where the block will be applied.
+  - "source_host": IP object where this actions is executed from.
+  - "blocked_host": IP object to block in ANY direction as seen in the target_host.
+
 
 ### Starting the game
 The environment should be created prior strating the agents. The properties of the environment can be defined in a YAML file. The game server can be started by running:

From 9eea200d1a60d7f24c1b09ac08916e603fb06cde Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Wed, 26 Jun 2024 16:56:35 +0200
Subject: [PATCH 02/87] game_components. Add BlockIP action

---
 env/game_components.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/env/game_components.py b/env/game_components.py
index d38c9bc1..772f1270 100755
--- a/env/game_components.py
+++ b/env/game_components.py
@@ -125,6 +125,7 @@ class ActionType(enum.Enum):
     - FindData
     - ExploitService
     - ExfiltrateData
+    - BlockIP
     - JoinGame
     - QuitGame
     """
@@ -152,6 +153,8 @@ def from_string(cls, string:str):
                 return ActionType.FindData
             case "ActionType.ExfiltrateData":
                 return ActionType.ExfiltrateData
+            case "ActionType.BlockIP":
+                return ActionType.BlockIP
             case "ActionType.JoinGame":
                 return ActionType.JoinGame
             case "ActionType.ResetGame":
@@ -167,6 +170,7 @@ def from_string(cls, string:str):
     FindData = 0.8
     ExploitService = 0.7
     ExfiltrateData = 0.8
+    BlockIP = 1
     JoinGame = 1
     QuitGame = 1
     ResetGame = 1
@@ -191,6 +195,7 @@ class Action():
     - FindData {"target_host": IP object, "source_host": IP object}
     - ExploitService {"target_host": IP object, "target_service": Service object, "source_host": IP object}
     - ExfiltrateData {"target_host": IP object, "source_host": IP object, "data": Data object}
+    - BlockIP("target_host": IP object, "source_host": IP object, "blocked_host": IP object)
     """
     def __init__(self, action_type: ActionType, params: dict={}) -> None:
         self._type = action_type
@@ -227,6 +232,8 @@ def from_dict(cls, data_dict:dict):
                     params[k] = IP(v)
                 case "target_host":
                     params[k] = IP(v)
+                case "blocked_host":
+                    params[k] = IP(v)
                 case "target_network":
                     net,mask = v.split("/")
                     params[k] = Network(net ,int(mask))
@@ -291,6 +298,10 @@ def from_json(cls, json_string:str):
                 parameters = {"target_host": IP(parameters_dict["target_host"]["ip"]),
                                 "source_host": IP(parameters_dict["source_host"]["ip"]),
                               "data": Data(parameters_dict["data"]["owner"],parameters_dict["data"]["id"])}
+            case ActionType.BlockIP:
+                parameters = {"target_host": IP(parameters_dict["target_host"]["ip"]),
+                              "source_host": IP(parameters_dict["source_host"]["ip"]),
+                              "blocked_host": IP(parameters_dict["blocked_host"]["ip"])}
             case ActionType.JoinGame:
                 parameters = {"agent_info":AgentInfo(parameters_dict["agent_info"]["name"], parameters_dict["agent_info"]["role"])}
             case ActionType.QuitGame:

From 7496b73a68aaca2124efecf304a07c8838ff1501 Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Wed, 26 Jun 2024 16:57:07 +0200
Subject: [PATCH 03/87] netsecenv_conf. Delete the global defender and add a
 new defender with goal and start_position.

---
 env/netsecenv_conf.yaml | 71 +++++++++++++++++++++++++----------------
 1 file changed, 43 insertions(+), 28 deletions(-)

diff --git a/env/netsecenv_conf.yaml b/env/netsecenv_conf.yaml
index 9666ceac..3da90c2b 100644
--- a/env/netsecenv_conf.yaml
+++ b/env/netsecenv_conf.yaml
@@ -6,7 +6,6 @@ coordinator:
     attackers:
       goal:
         description: "Exfiltrate data to '213.47.23.195'"
-        # Put randomize_goal_every_episode in True if you put any of the goal values in 'random'
         is_any_part_of_goal_random: True
         known_networks: []
         #known_networks: [192.168.1.0/24, 192.168.3.0/24]
@@ -42,33 +41,49 @@ coordinator:
         known_data: {}
 
     defenders:
-      # types are StochasticDefender and NoDefender
-      #type: 'StochasticDefender'
-      #type: 'StochasticWithThreshold'
-      type: 'NoDefender'
-      tw_size: 5
-      thresholds:
-        scan_network: # if both conditions are true, you are never detected
-          consecutive_actions: 2 # min amount of consecutive actions you can do without detection
-          tw_ratio: 0.25 # min ratio of actions in the tw below which you are not detected
-        find_services:
-          consecutive_actions: 3
-          tw_ratio: 0.3
-        exploit_service:
-          repeated_actions_episode: 2
-          tw_ratio: 0.25
-        find_data:
-          tw_ratio: 0.5
-          repeated_actions_episode: 2
-        exfiltrate_data:
-          consecutive_actions: 2
-          tw_ratio: 0.25
-      action_detetection_prob:
-        scan_network: 0.05
-        find_services: 0.075
-        exploit_service: 0.1
-        find_data: 0.025
-        exfiltrate_data: 0.025
+      goal:
+            description: "Block all attackers"
+            is_any_part_of_goal_random: False
+            known_networks: []
+            # Example
+            #known_networks: [192.168.1.0/24, 192.168.3.0/24]
+            known_hosts: []
+            # Example
+            #known_hosts: [192.168.1.1, 192.168.1.2]
+            controlled_hosts: []
+            # Example
+            #controlled_hosts: [213.47.23.195, 192.168.1.3]
+            # Services are defined as a target host where the service must be, and then a description in the form 'name,type,version,is_local'
+            known_services: {}
+            # Example
+            #known_services: {192.168.1.3: [Local system, lanman server, 10.0.19041, False], 192.168.1.4: [Other system, SMB server, 21.2.39421, False]}
+            # In data, put the target host that must have the data and which data in format user,data
+            # Example to fix the data in one host
+            known_data: {}
+            # Example to fix two data in one host
+            #known_data: {213.47.23.195: [[User1,DataFromServer1], [User5,DataFromServer5]]}
+            # Example to fix the data in two host
+            #known_data: {213.47.23.195: [User1,DataFromServer1], 192.168.3.1: [User3,Data3FromServer3]}
+            # Example to ask a random data in a specific server. Putting 'random' in the data, forces the env to randomly choose where the goal data is
+            # known_data: {213.47.23.195: [random]}
+            blocked_ips: {all_attackers}
+
+          start_position:
+            known_networks: [all_local]
+            known_hosts: [all_local]
+            # The attacker must always at least control the CC if the goal is to exfiltrate there
+            # Example of fixing the starting point of the agent in a local host
+            controlled_hosts: [all_local]
+            # Example of asking a random position to start the agent
+            # controlled_hosts: [213.47.23.195, random]
+            # Services are defined as a target host where the service must be, and then a description in the form 'name,type,version,is_local'
+            known_services: {all_local}
+            # known_services: {192.168.1.3: [Local system, lanman server, 10.0.19041, False], 192.168.1.4: [Other system, SMB server, 21.2.39421, False]}
+            # Same format as before
+            known_data: {all_local}
+            # Blocked IPs
+            blocked_ips: {}
+
 env:
   # random means to choose the seed in a random way, so it is not fixed
   random_seed: 'random'

From 7ea6cbc999eb2de81d6e0fa0d22af34e14c529a4 Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Thu, 27 Jun 2024 11:36:11 +0200
Subject: [PATCH 04/87] utils. First adaptation to add a defender. Make the
 functions to read the conf to be useful for attackers and defenders alike.

---
 utils/utils.py | 100 ++++++++++++++-----------------------------------
 1 file changed, 29 insertions(+), 71 deletions(-)

diff --git a/utils/utils.py b/utils/utils.py
index 8dda6f34..f4eba017 100644
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -112,13 +112,6 @@ def read_config_file(self, conf_file_name):
             self.logger.error(f'Error loading the configuration file{e}')
             pass
     
-    def read_defender_detection_prob(self, action_name: str) -> dict:
-        if self.config["coordinator"]["agents"]["defenders"]["type"] in ["StochasticWithThreshold", "StochasticDefender"]:
-            action_detect_p = self.config["coordinator"]["agents"]["defenders"]["action_detetection_prob"][action_name]
-        else:
-            action_detect_p = 0
-        return action_detect_p  
-
     def read_env_action_data(self, action_name: str) -> dict:
         """
         Generic function to read the known data for any agent and goal of position
@@ -226,62 +219,63 @@ def read_agents_controlled_hosts(self, type_agent: str, type_data: str) -> dict:
                     self.logger('Configuration problem with the known hosts')
         return controlled_hosts
 
-    
-    def get_attackers_win_conditions(self):
+    def get_player_win_conditions(self, type_of_player):
         """
-        Get the goal of the attacker 
+        Get the goal of the player
+        type_of_player: Can be 'attackers' or 'defenders' 
         """
         # Read known nets
-        known_networks = self.read_agents_known_networks('attackers', 'goal')
+        known_networks = self.read_agents_known_networks(type_of_player, 'goal')
 
         # Read known hosts
-        known_hosts = self.read_agents_known_hosts('attackers', 'goal')
+        known_hosts = self.read_agents_known_hosts(type_of_player, 'goal')
 
         # Read controlled hosts
-        controlled_hosts = self.read_agents_controlled_hosts('attackers', 'goal')
+        controlled_hosts = self.read_agents_controlled_hosts(type_of_player, 'goal')
 
         # Goal services
-        known_services = self.read_agents_known_services('attackers', 'goal')
+        known_services = self.read_agents_known_services(type_of_player, 'goal')
 
         # Goal data
-        known_data = self.read_agents_known_data('attackers', 'goal')
+        known_data = self.read_agents_known_data(type_of_player, 'goal')
 
-        attackers_goal = {}
-        attackers_goal['known_networks'] = known_networks
-        attackers_goal['controlled_hosts'] = controlled_hosts
-        attackers_goal['known_hosts'] = known_hosts
-        attackers_goal['known_data'] = known_data
-        attackers_goal['known_services'] = known_services
+        player_goal = {}
+        player_goal['known_networks'] = known_networks
+        player_goal['controlled_hosts'] = controlled_hosts
+        player_goal['known_hosts'] = known_hosts
+        player_goal['known_data'] = known_data
+        player_goal['known_services'] = known_services
 
-        return attackers_goal
+        return player_goal
     
-    def get_attackers_start_position(self):
+    def get_player_start_position(self, type_of_player):
         """
         Generate the starting position of an attacking agent
+        type_of_player: Can be 'attackers' or 'defenders' 
         """
         # Read known nets
-        known_networks = self.read_agents_known_networks('attackers', 'start_position')
+        known_networks = self.read_agents_known_networks(type_of_player, 'start_position')
 
         # Read known hosts
-        known_hosts = self.read_agents_known_hosts('attackers', 'start_position')
+        known_hosts = self.read_agents_known_hosts(type_of_player, 'start_position')
 
         # Read controlled hosts
-        controlled_hosts = self.read_agents_controlled_hosts('attackers', 'start_position')
+        controlled_hosts = self.read_agents_controlled_hosts(type_of_player, 'start_position')
 
         # Start services
-        known_services = self.read_agents_known_services('attackers', 'start_position')
+        known_services = self.read_agents_known_services(type_of_player, 'start_position')
 
         # Start data
-        known_data = self.read_agents_known_data('attackers', 'start_position')
+        known_data = self.read_agents_known_data(type_of_player, 'start_position')
 
-        attackers_start_position = {}
-        attackers_start_position['known_networks'] = known_networks
-        attackers_start_position['controlled_hosts'] = controlled_hosts
-        attackers_start_position['known_hosts'] = known_hosts
-        attackers_start_position['known_data'] = known_data
-        attackers_start_position['known_services'] = known_services
+        player_start_position = {}
+        player_start_position['known_networks'] = known_networks
+        player_start_position['controlled_hosts'] = controlled_hosts
+        player_start_position['known_hosts'] = known_hosts
+        player_start_position['known_data'] = known_data
+        player_start_position['known_services'] = known_services
 
-        return attackers_start_position
+        return player_start_position
 
     def get_start_position(self, agent_role):
         match agent_role:
@@ -404,42 +398,6 @@ def get_store_trajectories(self):
             store_rb = False
         return store_rb
     
-    def get_defender_type(self):
-        """
-        Get the type of the defender
-        """
-        try:
-            defender_placements = self.config["coordinator"]['agents']['defenders']['type']
-        except KeyError:
-            # Option is not in the configuration - default to no defender present
-            defender_placements = "NoDefender"
-        return defender_placements
-    
-    def get_defender_tw_size(self):
-        tw_size = self.config["coordinator"]['agents']['defenders']['tw_size']
-        return tw_size
-    
-    def get_defender_thresholds(self):
-        """Function to read thresholds for stochastic defender with thresholds"""
-        thresholds = {}
-        config_thresholds = self.config["coordinator"]['agents']['defenders']["thresholds"]
-        # ScanNetwork
-        thresholds[ActionType.ScanNetwork] = {"consecutive_actions": config_thresholds["scan_network"]["consecutive_actions"]}
-        thresholds[ActionType.ScanNetwork]["tw_ratio"] = config_thresholds["scan_network"]["tw_ratio"]
-        # FindServices
-        thresholds[ActionType.FindServices] = {"consecutive_actions": config_thresholds["find_services"]["consecutive_actions"]}
-        thresholds[ActionType.FindServices]["tw_ratio"] = config_thresholds["find_services"]["tw_ratio"]
-        # FindData
-        thresholds[ActionType.FindData] = {"repeated_actions_episode": config_thresholds["find_data"]["repeated_actions_episode"]}
-        thresholds[ActionType.FindData]["tw_ratio"] = config_thresholds["find_data"]["tw_ratio"]
-        # ExploitService
-        thresholds[ActionType.ExploitService] = {"repeated_actions_episode": config_thresholds["exploit_service"]["repeated_actions_episode"]}
-        thresholds[ActionType.ExploitService]["tw_ratio"] = config_thresholds["exploit_service"]["tw_ratio"]
-        # ExfiltrateData
-        thresholds[ActionType.ExfiltrateData] = {"consecutive_actions": config_thresholds["exfiltrate_data"]["consecutive_actions"]}
-        thresholds[ActionType.ExfiltrateData]["tw_ratio"] = config_thresholds["exfiltrate_data"]["tw_ratio"]
-        return thresholds
-
     def get_scenario(self):
         """
         Get the scenario config object

From 08710e5ebf54f937ceffa79c7507eb492f1113f8 Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Thu, 27 Jun 2024 11:36:56 +0200
Subject: [PATCH 05/87] netsecenv. Delete the simplistic defender in netsecenv.
 Refactor function names to be generic

---
 env/network_security_game.py | 257 ++++++++++++++++++-----------------
 1 file changed, 129 insertions(+), 128 deletions(-)

diff --git a/env/network_security_game.py b/env/network_security_game.py
index 45ef7061..5559dc95 100755
--- a/env/network_security_game.py
+++ b/env/network_security_game.py
@@ -18,123 +18,6 @@
 # Set the logging
 logger = logging.getLogger('Netsecenv')
 
-class SimplisticDefender:
-    def __init__(self, config_file) -> None:
-        self.task_config = ConfigParser(config_file)
-        self.logger = logging.getLogger('Netsecenv-Defender')
-        defender_type = self.task_config.get_defender_type()
-        self.logger.info(f"Defender set to be of type '{defender_type}'")
-        match defender_type:
-            case "NoDefender":
-                self._defender_type = None
-            case 'StochasticDefender':
-                # For now there is only one type of defender
-                self._defender_type = "Stochastic"
-                self.detection_probability = self._read_detection_probabilities()
-            case "StochasticWithThreshold":
-                self._defender_type = "StochasticWithThreshold"
-                self.detection_probability = self._read_detection_probabilities()
-                self._defender_thresholds = self.task_config.get_defender_thresholds()
-                self._defender_thresholds["tw_size"] = self.task_config.get_defender_tw_size()
-                self._actions_played = []
-            case _: # Default option - no defender
-                self._defender_type = None
-    
-    def _read_detection_probabilities(self)->dict:
-        """
-        Method to read detection probabilities from the task config task.
-        """
-        detection_probability = {}
-        detection_probability[components.ActionType.ScanNetwork] = self.task_config.read_defender_detection_prob('scan_network')
-        detection_probability[components.ActionType.FindServices] = self.task_config.read_defender_detection_prob('find_services')
-        detection_probability[components.ActionType.ExploitService] = self.task_config.read_defender_detection_prob('exploit_service')
-        detection_probability[components.ActionType.FindData] = self.task_config.read_defender_detection_prob('find_data')
-        detection_probability[components.ActionType.ExfiltrateData] = self.task_config.read_defender_detection_prob('exfiltrate_data')
-        self.logger.info(f"Detection probabilities:{detection_probability}")
-        return detection_probability
-
-    def detect(self, state:components.GameState, action:components.Action, actions_played):
-        """
-        Checks if current action was detected based on the defendr type:
-        """
-        if self._defender_type is not None: # There is a defender present
-            match self._defender_type:
-                case "Stochastic":
-                    detection = self._stochastic_detection(action)
-                    self.logger.info(f"\tAction detected?: {detection}")
-                    return detection
-                case "StochasticWithThreshold":
-                    self.logger.info(f"Checking detection based on rules: {action}")
-                    detection = self._stochastic_detection_with_thresholds(action, actions_played)
-                    self.logger.info(f"\tAction detected?: {detection}")
-                    return detection
-        else: # No defender in the environment
-            logger.info("\tNo defender present")
-            return False
-    
-    def _stochastic_detection_with_thresholds(self, action:components.Action, actions_played)->bool:        
-        """ Method used for detection with stochastic defender with minimal thresholds"""
-        if len(actions_played) > self._defender_thresholds["tw_size"]: # single action is never detected:
-            last_n_actions = actions_played[-self._defender_thresholds["tw_size"]:]
-            last_n_action_types = [action.type for action in last_n_actions]
-            repeated_action_episode = actions_played.count(action)
-            self.logger.info('\tThreshold check')
-            # update threh
-            match action.type: # thresholds are based on action type
-                case components.ActionType.ScanNetwork:
-                    tw_ratio = last_n_action_types.count(components.ActionType.ScanNetwork)/self._defender_thresholds["tw_size"]
-                    num_consecutive_scans = max(sum(1 for item in grouped if item == components.ActionType.ScanNetwork)
-                                                for _,grouped in itertools.groupby(last_n_action_types))
-                    if tw_ratio < self._defender_thresholds[components.ActionType.ScanNetwork]["tw_ratio"] and num_consecutive_scans < self._defender_thresholds[components.ActionType.ScanNetwork]["consecutive_actions"]:
-                        return False
-                    else:
-                        self.logger.info(f"\t\t Threshold crossed - TW ratio:{tw_ratio}(T={self._defender_thresholds[components.ActionType.ScanNetwork]['tw_ratio']}), #consecutive actions:{num_consecutive_scans} (T={self._defender_thresholds[components.ActionType.ScanNetwork]['consecutive_actions']})")
-                        return self._stochastic_detection(action)
-                case components.ActionType.FindServices:
-                    tw_ratio = last_n_action_types.count(components.ActionType.FindServices)/self._defender_thresholds["tw_size"]
-                    num_consecutive_scans = max(sum(1 for item in grouped if item == components.ActionType.FindServices)
-                                                for _,grouped in itertools.groupby(last_n_action_types))
-                    if tw_ratio < self._defender_thresholds[components.ActionType.FindServices]["tw_ratio"] and num_consecutive_scans < self._defender_thresholds[components.ActionType.FindServices]["consecutive_actions"]:
-                        return False
-                    else:
-                        self.logger.info(f"\t\t Threshold crossed - TW ratio:{tw_ratio}(T={self._defender_thresholds[components.ActionType.FindServices]['tw_ratio']}), #consecutive actions:{num_consecutive_scans} (T={self._defender_thresholds[components.ActionType.FindServices]['consecutive_actions']})")
-                        return self._stochastic_detection(action)
-                case components.ActionType.FindData:
-                    tw_ratio = last_n_action_types.count(components.ActionType.FindData)/self._defender_thresholds["tw_size"]
-                    if tw_ratio < self._defender_thresholds[components.ActionType.FindData]["tw_ratio"] and repeated_action_episode < self._defender_thresholds[components.ActionType.FindData]["repeated_actions_episode"]:
-                        return False
-                    else:
-                        self.logger.info(f"\t\t Threshold crossed - TW ratio:{tw_ratio}(T={self._defender_thresholds[components.ActionType.FindData]['tw_ratio']}), #repeated actions:{repeated_action_episode}")
-                        return self._stochastic_detection(action)
-                case components.ActionType.ExploitService:
-                    tw_ratio = last_n_action_types.count(components.ActionType.ExploitService)/self._defender_thresholds["tw_size"]
-                    if tw_ratio < self._defender_thresholds[components.ActionType.ExploitService]["tw_ratio"] and repeated_action_episode < self._defender_thresholds[components.ActionType.ExploitService]["repeated_actions_episode"]:
-                        return False
-                    else:
-                        self.logger.info(f"\t\t Threshold crossed - TW ratio:{tw_ratio}(T={self._defender_thresholds[components.ActionType.ExploitService]['tw_ratio']}), #repeated actions:{repeated_action_episode}")
-                        return self._stochastic_detection(action)
-                case components.ActionType.ExfiltrateData:
-                    tw_ratio = last_n_action_types.count(components.ActionType.ExfiltrateData)/self._defender_thresholds["tw_size"]
-                    num_consecutive_scans = max(sum(1 for item in grouped if item == components.ActionType.ExfiltrateData)
-                                                for _,grouped in itertools.groupby(last_n_action_types))
-                    if tw_ratio < self._defender_thresholds[components.ActionType.ExfiltrateData]["tw_ratio"] and num_consecutive_scans < self._defender_thresholds[components.ActionType.ExfiltrateData]["consecutive_actions"]:
-                        return False
-                    else:
-                        self.logger.info(f"\t\t Threshold crossed - TW ratio:{tw_ratio}(T={self._defender_thresholds[components.ActionType.ExfiltrateData]['tw_ratio']}), #consecutive actions:{num_consecutive_scans} (T={self._defender_thresholds[components.ActionType.ExfiltrateData]['consecutive_actions']})")
-                        return self._stochastic_detection(action)
-                case _: # default case - No detection
-                    return False
-        return False
-    
-    def _stochastic_detection(self, action: components.Action)->bool:
-        """ Method stochastic detection based on action default probability"""
-        roll = random.random()
-        self.logger.info(f"\tRunning stochastic detection. {roll} < {self.detection_probability[action.type]}")
-        return roll < self.detection_probability[action.type]
-    
-    def reset(self)->None:
-        self.logger.info("Defender resetted")
-
 class NetworkSecurityEnvironment(object):
     """
     Class to manage the whole network security game
@@ -187,24 +70,27 @@ def __init__(self, task_config_file) -> None:
         components.ActionType.ExploitService.default_success_p = self.task_config.read_env_action_data('exploit_service')
         components.ActionType.FindData.default_success_p = self.task_config.read_env_action_data('find_data')
         components.ActionType.ExfiltrateData.default_success_p = self.task_config.read_env_action_data('exfiltrate_data')
+        components.ActionType.BlockIP.default_success_p = self.task_config.read_env_action_data('block_ip')
 
-        # Place the defender
-        self._defender = SimplisticDefender(task_config_file)
-        
         # Get attacker start
-        self._attacker_start_position = self.task_config.get_attackers_start_position()
+        self._attackers_start_position = self.task_config.get_player_start_position('attackers')
 
         # should be randomized once or every episode?
-        self._randomize_goal_every_episode = self.task_config.get_randomize_goal_every_episode()
+        self._randomize_attacker_goal_every_episode = self.task_config.get_randomize_goal_every_episode()
         
+        # store goal definition
+        self._attacker_goal_conditions = self.task_config.get_player_win_conditions('attackers')
+
+        # store goal description
+        self._goal_description = self.task_config.get_goal_description()
 
-        # # Process episodic randomization of goal position
-        # if not self._randomize_goal_every_episode:
-        #     # REPLACE 'random' keyword once
-        #     logger.info("Episodic randomization disabled, generating static goal_conditions")
-        #     self._goal_conditions = self._process_win_conditions(self._goal_conditions)
+        # Process episodic randomization of goal position
+        # if not self._randomize_attacker_goal_every_episode:
+            # # REPLACE 'random' keyword once
+            # logger.info("Episodic randomization disabled, generating static goal_conditions")
+            # self._attacker_goal_conditions = self._process_win_conditions(self._attacker_goal_conditions)
         # else:
-        #     logger.info("Episodic randomization enabled, keeping 'random' keyword in the goal description.")
+            # logger.info("Episodic randomization enabled, keeping 'random' keyword in the goal description.")
 
         # At this point all 'random' values should be assigned to something
         # Check if dynamic network and ip adddresses are required
@@ -299,6 +185,121 @@ def get_all_actions(self):
                             actions.add(components.Action(components.ActionType.ExploitService, {"target_host":ip, "target_service":service, "source_host":src_ip}))
         return {k:v for k,v in enumerate(actions)}
     
+    def _create_starting_state(self) -> components.GameState:
+        """
+        Builds the starting GameState from 'self._attacker_start_position'.
+        If there is a keyword 'random' used, it is replaced by a valid option at random.
+
+        Currently, we artificially extend the knonw_networks with +- 1 in the third octet.
+        """
+        known_networks = set()
+        controlled_hosts = set()
+        logger.info('Generating starting state')
+        for controlled_host in self._attackers_start_position['controlled_hosts']:
+            if isinstance(controlled_host, components.IP):
+                controlled_hosts.add(controlled_host)
+                logger.info(f'\tThe attacker has control of host {str(controlled_host)}.')
+            elif controlled_host == 'random':
+                # Random start
+                logger.info('\tAdding random starting position of agent')
+                logger.info(f'\t\tChoosing from {self.hosts_to_start}')
+                controlled_hosts.add(random.choice(self.hosts_to_start))
+                logger.info(f'\t\tMaking agent start in {controlled_hosts}')
+            else:
+                logger.error(f"Unsupported value encountered in start_position['controlled_hosts']: {controlled_host}")
+
+        # Add all controlled hosts to known_hosts
+        known_hosts = self._attackers_start_position["known_hosts"].union(controlled_hosts)
+        
+        # Extend the known networks with the neighbouring networks
+        # This is to solve in the env (and not in the agent) the problem
+        # of not knowing other networks appart from the one the agent is in
+        # This is wrong and should be done by the agent, not here
+        # TODO remove this!
+        for controlled_host in controlled_hosts:
+            for net in self._get_networks_from_host(controlled_host): #TODO
+                net_obj = netaddr.IPNetwork(str(net))
+                if net_obj.ip.is_ipv4_private_use(): #TODO
+                    known_networks.add(net)
+                    net_obj.value += 256
+                    if net_obj.ip.is_ipv4_private_use():
+                        ip = components.Network(str(net_obj.ip), net_obj.prefixlen)
+                        logger.info(f'\tAdding {ip} to agent')
+                        known_networks.add(ip)
+                    net_obj.value -= 2*256
+                    if net_obj.ip.is_ipv4_private_use():
+                        ip = components.Network(str(net_obj.ip), net_obj.prefixlen)
+                        logger.info(f'\tAdding {ip} to agent')
+                        known_networks.add(ip)
+                    #return value back to the original
+                    net_obj.value += 256
+       
+        game_state = components.GameState(controlled_hosts, known_hosts, self._attackers_start_position["known_services"], self._attackers_start_position["known_data"], known_networks)
+        return game_state
+
+    def _process_win_conditions(self, win_conditions)->dict:
+        """
+        Method which analyses win_conditions and randomizes parts if required
+        """
+        logger.info("Processing win conditions")
+        updated_win_conditions = {}
+        
+        # networks
+        if win_conditions["known_networks"] == "random":
+            updated_win_conditions["known_networks"] = {random.choice(list(self._networks.keys()))}
+            logger.info("\t\tRadnomizing known_networks")
+        else:
+            updated_win_conditions["known_networks"] = copy.deepcopy(win_conditions["known_networks"])
+        logger.info(f"\tGoal known_networks: {updated_win_conditions['known_networks']}")
+        # known_hosts
+        if win_conditions["known_hosts"] == "random":
+            logger.info("\t\tRandomizing known_host")
+            updated_win_conditions["known_hosts"] = {random.choice(list(self._ip_to_hostname.keys()))}
+        else:
+            updated_win_conditions["known_hosts"] = copy.deepcopy(win_conditions["known_hosts"])
+        logger.info(f"\tGoal known_hosts: {updated_win_conditions['known_hosts']}")
+        
+        # controlled_hosts
+        if win_conditions["controlled_hosts"] == "random":
+            logger.info("\tRandomizing controlled_hots")
+            updated_win_conditions["controlled_hosts"] = {random.choice(list(self._ip_to_hostname.keys()))}
+        else:
+            updated_win_conditions["controlled_hosts"] = copy.deepcopy(win_conditions["controlled_hosts"])
+        logger.info(f"\tGoal controlled_hosts: {updated_win_conditions['controlled_hosts']}")
+        
+        # services
+        updated_win_conditions["known_services"] = {}
+        for host, service_list in win_conditions["known_services"].items():
+            # Was the position defined as random?
+            if isinstance(service_list, str) and service_list.lower() == "random":
+                available_services = []
+                for service in self._services[self._ip_to_hostname[host]]:
+                    available_services.append(components.Service(service.name, service.type, service.version, service.is_local))
+                logger.info(f"\tRandomizing known_services in {host}")
+                updated_win_conditions["known_services"][host] = random.choice(available_services)
+            else:
+                updated_win_conditions["known_services"][host] = copy.deepcopy(win_conditions["known_services"][host])
+        logger.info(f"\tGoal known_services: {updated_win_conditions['known_services']}")
+        
+        # data
+        # prepare all available data if randomization is needed
+        available_data = set()
+        for data in self._data.values():
+            for datapoint in data:
+                available_data.add(components.Data(datapoint.owner, datapoint.id))
+        
+        updated_win_conditions["known_data"] = {}
+        for host, data_set in win_conditions["known_data"].items():
+            # Was random data required in this host?
+            if isinstance(data_set, str) and data_set.lower() == "random":
+                # From all available data, randomly pick the one that is going to be requested in this host
+                updated_win_conditions["known_data"][host] = {random.choice(list(available_data))}
+                logger.info(f"\tRandomizing known_data in {host}")
+            else:
+                updated_win_conditions["known_data"][host] = copy.deepcopy(win_conditions["known_data"][host])
+        logger.info(f"\tGoal known_data: {updated_win_conditions['known_data']}")
+        return updated_win_conditions
+
     def _process_cyst_config(self, configuration_objects:list)-> None:
         """
         Process the cyst configuration file

From 0193c870ba5ace2888aea3cda733261592e58673 Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Thu, 27 Jun 2024 11:37:24 +0200
Subject: [PATCH 06/87] netsecenv. In the yaml conf, add the new action BlockIP
 and its prob of success

---
 env/netsecenv_conf.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/env/netsecenv_conf.yaml b/env/netsecenv_conf.yaml
index 3da90c2b..81b197c2 100644
--- a/env/netsecenv_conf.yaml
+++ b/env/netsecenv_conf.yaml
@@ -108,4 +108,6 @@ env:
     find_data:
       prob_success: 1.0
     exfiltrate_data:
+      prob_success: 1.0
+    block_ip:
       prob_success: 1.0
\ No newline at end of file

From 228f173869045404ecf9bef3b36595bfc9699599 Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Thu, 27 Jun 2024 15:03:11 +0200
Subject: [PATCH 07/87] Use latest agents

---
 NetSecGameAgents | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NetSecGameAgents b/NetSecGameAgents
index 2308a4df..460072d7 160000
--- a/NetSecGameAgents
+++ b/NetSecGameAgents
@@ -1 +1 @@
-Subproject commit 2308a4df829467f49f7b031e91a1c31372bef9b5
+Subproject commit 460072d7a13301cc2e53ccb2a215e20e0db2322c

From 22c764893a42c77137eaa3cd6bb5637ea1724315 Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Wed, 17 Jul 2024 11:47:55 +0200
Subject: [PATCH 08/87] netsecenv. Delete unused initializations
 pre-multiagents

---
 env/network_security_game.py | 20 --------------------
 1 file changed, 20 deletions(-)

diff --git a/env/network_security_game.py b/env/network_security_game.py
index 5559dc95..1a24b59a 100755
--- a/env/network_security_game.py
+++ b/env/network_security_game.py
@@ -72,26 +72,6 @@ def __init__(self, task_config_file) -> None:
         components.ActionType.ExfiltrateData.default_success_p = self.task_config.read_env_action_data('exfiltrate_data')
         components.ActionType.BlockIP.default_success_p = self.task_config.read_env_action_data('block_ip')
 
-        # Get attacker start
-        self._attackers_start_position = self.task_config.get_player_start_position('attackers')
-
-        # should be randomized once or every episode?
-        self._randomize_attacker_goal_every_episode = self.task_config.get_randomize_goal_every_episode()
-        
-        # store goal definition
-        self._attacker_goal_conditions = self.task_config.get_player_win_conditions('attackers')
-
-        # store goal description
-        self._goal_description = self.task_config.get_goal_description()
-
-        # Process episodic randomization of goal position
-        # if not self._randomize_attacker_goal_every_episode:
-            # # REPLACE 'random' keyword once
-            # logger.info("Episodic randomization disabled, generating static goal_conditions")
-            # self._attacker_goal_conditions = self._process_win_conditions(self._attacker_goal_conditions)
-        # else:
-            # logger.info("Episodic randomization enabled, keeping 'random' keyword in the goal description.")
-
         # At this point all 'random' values should be assigned to something
         # Check if dynamic network and ip adddresses are required
         if self.task_config.get_use_dynamic_addresses():

From 14d0c91e45298831523aba6bb907deede77e094d Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Wed, 17 Jul 2024 11:48:56 +0200
Subject: [PATCH 09/87] netsecenv. Add new function to get goal description

---
 env/network_security_game.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/env/network_security_game.py b/env/network_security_game.py
index 1a24b59a..6499533c 100755
--- a/env/network_security_game.py
+++ b/env/network_security_game.py
@@ -105,10 +105,6 @@ def seed(self)->int:
     def num_actions(self):
         return len(self.get_all_actions())
     
-    @property
-    def goal_description(self):
-       return self._goal_description
-        
     def get_all_states(self):
         import itertools
         def all_combs(data):

From 11d0c83d36f503459cf5c75fb0506f0d0aa5bcaf Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Wed, 17 Jul 2024 11:49:26 +0200
Subject: [PATCH 10/87] netsecenv. Delete old create starting state. Migrated

---
 env/network_security_game.py | 52 ------------------------------------
 1 file changed, 52 deletions(-)

diff --git a/env/network_security_game.py b/env/network_security_game.py
index 6499533c..7bd3e3a3 100755
--- a/env/network_security_game.py
+++ b/env/network_security_game.py
@@ -161,58 +161,6 @@ def get_all_actions(self):
                             actions.add(components.Action(components.ActionType.ExploitService, {"target_host":ip, "target_service":service, "source_host":src_ip}))
         return {k:v for k,v in enumerate(actions)}
     
-    def _create_starting_state(self) -> components.GameState:
-        """
-        Builds the starting GameState from 'self._attacker_start_position'.
-        If there is a keyword 'random' used, it is replaced by a valid option at random.
-
-        Currently, we artificially extend the knonw_networks with +- 1 in the third octet.
-        """
-        known_networks = set()
-        controlled_hosts = set()
-        logger.info('Generating starting state')
-        for controlled_host in self._attackers_start_position['controlled_hosts']:
-            if isinstance(controlled_host, components.IP):
-                controlled_hosts.add(controlled_host)
-                logger.info(f'\tThe attacker has control of host {str(controlled_host)}.')
-            elif controlled_host == 'random':
-                # Random start
-                logger.info('\tAdding random starting position of agent')
-                logger.info(f'\t\tChoosing from {self.hosts_to_start}')
-                controlled_hosts.add(random.choice(self.hosts_to_start))
-                logger.info(f'\t\tMaking agent start in {controlled_hosts}')
-            else:
-                logger.error(f"Unsupported value encountered in start_position['controlled_hosts']: {controlled_host}")
-
-        # Add all controlled hosts to known_hosts
-        known_hosts = self._attackers_start_position["known_hosts"].union(controlled_hosts)
-        
-        # Extend the known networks with the neighbouring networks
-        # This is to solve in the env (and not in the agent) the problem
-        # of not knowing other networks appart from the one the agent is in
-        # This is wrong and should be done by the agent, not here
-        # TODO remove this!
-        for controlled_host in controlled_hosts:
-            for net in self._get_networks_from_host(controlled_host): #TODO
-                net_obj = netaddr.IPNetwork(str(net))
-                if net_obj.ip.is_ipv4_private_use(): #TODO
-                    known_networks.add(net)
-                    net_obj.value += 256
-                    if net_obj.ip.is_ipv4_private_use():
-                        ip = components.Network(str(net_obj.ip), net_obj.prefixlen)
-                        logger.info(f'\tAdding {ip} to agent')
-                        known_networks.add(ip)
-                    net_obj.value -= 2*256
-                    if net_obj.ip.is_ipv4_private_use():
-                        ip = components.Network(str(net_obj.ip), net_obj.prefixlen)
-                        logger.info(f'\tAdding {ip} to agent')
-                        known_networks.add(ip)
-                    #return value back to the original
-                    net_obj.value += 256
-       
-        game_state = components.GameState(controlled_hosts, known_hosts, self._attackers_start_position["known_services"], self._attackers_start_position["known_data"], known_networks)
-        return game_state
-
     def _process_win_conditions(self, win_conditions)->dict:
         """
         Method which analyses win_conditions and randomizes parts if required

From e06b17b8a0c6d8dc6e2baaa871195abec30dccff Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Wed, 17 Jul 2024 11:49:55 +0200
Subject: [PATCH 11/87] netsecenv. Refactor so we dont use acronyms. Always
 very clear

---
 env/network_security_game.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/env/network_security_game.py b/env/network_security_game.py
index 7bd3e3a3..1564ac8d 100755
--- a/env/network_security_game.py
+++ b/env/network_security_game.py
@@ -883,7 +883,7 @@ def create_state_from_view(self, view:dict, add_neighboring_nets=True)->componen
         for ip, data_list in view["known_data"]:
             known_data[self._ip_mapping[ip]] = data_list
         game_state = components.GameState(controlled_hosts, known_hosts, known_services, known_data, known_networks)
-        logger.info(f"Generated GS:{game_state}")
+        logger.info(f"Generated GameState:{game_state}")
         return game_state
 
     def re_map_goal_dict(self, goal_dict)->dict:

From c757ae190b8e68780358b78c47d66bb851a1b970 Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Wed, 17 Jul 2024 11:50:24 +0200
Subject: [PATCH 12/87] netsecenv. Delete old reset() for the defender that
 does not exist anymore

---
 env/network_security_game.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/env/network_security_game.py b/env/network_security_game.py
index 1564ac8d..393e5a8b 100755
--- a/env/network_security_game.py
+++ b/env/network_security_game.py
@@ -987,7 +987,6 @@ def reset(self, trajectory_filename=None)->None:
       
 
         self._actions_played = []
-        self._defender.reset()
 
     def step(self, state:components.GameState, action:components.Action, agent_id:tuple,action_type='netsecenv')-> components.GameState:
         """

From 18781ef77c8375e6f98dcf8717dd8d4d6c13e929 Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Wed, 17 Jul 2024 11:50:45 +0200
Subject: [PATCH 13/87] netsecenv. Fix bug that the action was added twice to
 the list of played actions

---
 env/network_security_game.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/env/network_security_game.py b/env/network_security_game.py
index 393e5a8b..cdd4bfd9 100755
--- a/env/network_security_game.py
+++ b/env/network_security_game.py
@@ -997,7 +997,6 @@ def step(self, state:components.GameState, action:components.Action, agent_id:tu
         logger.info(f"Agent's action: {action}")
         # Reward for taking an action
         reward = self._rewards["step"]
-        self._actions_played.append(action)
 
         # 1. Perform the action
         self._actions_played.append(action)

From 2ff7b45175bac47825df7ab145f874dcc0f82c5a Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Wed, 17 Jul 2024 11:51:14 +0200
Subject: [PATCH 14/87] netsecenv conf. Migrate attackers to Attacker and
 defenders to Defender. So it can be found by the new code

---
 env/netsecenv_conf.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/env/netsecenv_conf.yaml b/env/netsecenv_conf.yaml
index 81b197c2..93c92fce 100644
--- a/env/netsecenv_conf.yaml
+++ b/env/netsecenv_conf.yaml
@@ -3,7 +3,7 @@
 coordinator:
   # Define the conf for all the 'attacker' agents.
   agents:
-    attackers:
+    Attacker:
       goal:
         description: "Exfiltrate data to '213.47.23.195'"
         is_any_part_of_goal_random: True
@@ -40,7 +40,7 @@ coordinator:
         # Same format as before
         known_data: {}
 
-    defenders:
+    Defender:
       goal:
             description: "Block all attackers"
             is_any_part_of_goal_random: False
@@ -68,7 +68,7 @@ coordinator:
             # known_data: {213.47.23.195: [random]}
             blocked_ips: {all_attackers}
 
-          start_position:
+      start_position:
             known_networks: [all_local]
             known_hosts: [all_local]
             # The attacker must always at least control the CC if the goal is to exfiltrate there

From aea863280b1a6046d60e64b738c205ff5ff21b56 Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Wed, 17 Jul 2024 11:51:37 +0200
Subject: [PATCH 15/87] utils. Migrate the functions to be generic and useful
 for attackers and defenders

---
 utils/utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/utils/utils.py b/utils/utils.py
index f4eba017..165b94a1 100644
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -280,7 +280,7 @@ def get_player_start_position(self, type_of_player):
     def get_start_position(self, agent_role):
         match agent_role:
             case "Attacker":
-                return self.get_attackers_start_position()
+                return self.get_player_start_position(agent_role)
             case "Defender":
                 return {}
             case "Benign":
@@ -297,7 +297,7 @@ def get_start_position(self, agent_role):
     def get_win_conditions(self, agent_role):
          match agent_role:
             case "Attacker":
-                return self.get_attackers_win_conditions()
+                return self.get_player_win_conditions(agent_role)
             case "Defender":
                 return {}
             case "Benign":
@@ -326,7 +326,7 @@ def get_goal_description(self, agent_role)->dict:
         match agent_role:
             case "Attacker":
                 try:
-                    description = self.config['coordinator']['agents']["attackers"]["goal"]["description"]
+                    description = self.config['coordinator']['agents'][agent_role]["goal"]["description"]
                 except KeyError:
                     description = ""
             case "Defender":

From 7b3076323e1130b0d7dd7bd0bb66abd0ca5c6d48 Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Wed, 17 Jul 2024 11:51:49 +0200
Subject: [PATCH 16/87] coor. Small refactor of error

---
 coordinator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/coordinator.py b/coordinator.py
index 8cae5e1c..ac7105b6 100644
--- a/coordinator.py
+++ b/coordinator.py
@@ -265,7 +265,7 @@ async def run(self):
         except asyncio.CancelledError:
             self.logger.info("\tTerminating by CancelledError")
         except Exception as e:
-            self.logger.error(f"Exception in main_coordinator(): {e}")
+            self.logger.error(f"Exception in Class coordinator(): {e}")
             raise e
 
     def _initialize_new_player(self, agent_addr:tuple, agent_name:str, agent_role:str) -> Observation:

From 9f97c16c8a00f844ddc34810612678d4c9223e38 Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Thu, 18 Jul 2024 15:43:48 +0200
Subject: [PATCH 17/87] game_components. Add better debugging

---
 env/game_components.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/env/game_components.py b/env/game_components.py
index 772f1270..da26abf6 100755
--- a/env/game_components.py
+++ b/env/game_components.py
@@ -37,7 +37,7 @@ def __post_init__(self):
         try:
             ipaddress.ip_address(self.ip)
         except ValueError:
-            raise ValueError("Invalid IP address provided")
+            raise ValueError(f"Invalid IP address provided: {self.ip}")
 
     def __repr__(self):
         return self.ip

From 0bf50fd15e45f2b45e7596c83e0cbd124d221211 Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Thu, 18 Jul 2024 15:44:38 +0200
Subject: [PATCH 18/87] netsecenv. Add BlockIP action

---
 env/network_security_game.py | 108 +++++++++++++++++++++++++----------
 1 file changed, 79 insertions(+), 29 deletions(-)

diff --git a/env/network_security_game.py b/env/network_security_game.py
index cdd4bfd9..54970b44 100755
--- a/env/network_security_game.py
+++ b/env/network_security_game.py
@@ -159,6 +159,12 @@ def get_all_actions(self):
                     for ip, host in self._ip_to_hostname.items():
                         if host_id == host:
                             actions.add(components.Action(components.ActionType.ExploitService, {"target_host":ip, "target_service":service, "source_host":src_ip}))
+        # Get BlockIP actions
+        for src_ip in self._ip_to_hostname:
+            for trg_ip in self._ip_to_hostname:
+                for block_ip in self._ip_to_hostname:
+                    actions.add(components.Action(components.ActionType.BlockIP, {"target_host":trg_ip, "source_host":src_ip, "blocked_host":block_ip}))
+
         return {k:v for k,v in enumerate(actions)}
     
     def _process_win_conditions(self, win_conditions)->dict:
@@ -545,7 +551,7 @@ def _get_data_content(self, host_ip:str, data_id:str)->str:
             logger.debug("Data content not found because target IP does not exists.")
         return content
     
-    def _execute_action(self, current:components.GameState, action:components.Action, action_type='netsecenv')-> components.GameState:
+    def _execute_action(self, current_state:components.GameState, action:components.Action, action_type='netsecenv')-> components.GameState:
         """
         Execute the action and update the values in the state
         Before this function it was checked if the action was successful
@@ -559,20 +565,22 @@ def _execute_action(self, current:components.GameState, action:components.Action
         match action.type:
             case components.ActionType.ScanNetwork:
                 if action_type == "realworld":
-                    next_state = self._execute_scan_network_action_real_world(current, action)
+                    next_state = self._execute_scan_network_action_real_world(current_state, action)
                 else:
-                    next_state = self._execute_scan_network_action(current, action)
+                    next_state = self._execute_scan_network_action(current_state, action)
             case components.ActionType.FindServices:
                 if action_type == "realworld":
-                    next_state = self._execute_find_services_real_world(current, action)
+                    next_state = self._execute_find_services_real_world(current_state, action)
                 else:
-                    next_state = self._execute_find_services_action(current, action)
+                    next_state = self._execute_find_services_action(current_state, action)
             case components.ActionType.FindData:
-                next_state = self._execute_find_data_action(current, action)
+                next_state = self._execute_find_data_action(current_state, action)
             case components.ActionType.ExploitService:
-                next_state = self._execute_exploit_service_action(current, action)
+                next_state = self._execute_exploit_service_action(current_state, action)
             case components.ActionType.ExfiltrateData:
-                next_state = self._execute_exfiltrate_data_action(current, action)
+                next_state = self._execute_exfiltrate_data_action(current_state, action)
+            case components.ActionType.BlockIP:
+                next_state = self._execute_block_ip_action(current_state, action)
             case _:
                 raise ValueError(f"Unknown Action type or other error: '{action.type}'")
         return next_state
@@ -593,13 +601,13 @@ def _firewall_check(self, src_ip:components.IP, dst_ip:components.IP)->bool:
             connection_allowed = False
         return connection_allowed
 
-    def _execute_scan_network_action(self, current:components.GameState, action:components.Action)->components.GameState:
+    def _execute_scan_network_action(self, current_state:components.GameState, action:components.Action)->components.GameState:
         """
         Executes the ScanNetwork action in the environment
         """
-        next_nets, next_known_h, next_controlled_h, next_services, next_data = self._state_parts_deep_copy(current)
+        next_nets, next_known_h, next_controlled_h, next_services, next_data = self._state_parts_deep_copy(current_state)
         logger.info(f"\t\tScanning {action.parameters['target_network']}")
-        if "source_host" in action.parameters.keys() and action.parameters["source_host"] in current.controlled_hosts:
+        if "source_host" in action.parameters.keys() and action.parameters["source_host"] in current_state.controlled_hosts:
             new_ips = set()
             for ip in self._ip_to_hostname.keys(): #check if IP exists
                 logger.debug(f"\t\tChecking if {ip} in {action.parameters['target_network']}")
@@ -614,15 +622,15 @@ def _execute_scan_network_action(self, current:components.GameState, action:comp
             logger.info(f"\t\t\t Invalid source_host:'{action.parameters['source_host']}'")
         return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets)
 
-    def _execute_find_services_action(self, current:components.GameState, action:components.Action)->components.GameState:
+    def _execute_find_services_action(self, current_state:components.GameState, action:components.Action)->components.GameState:
         """
         Executes the FindServices action in the environment
         """
-        next_nets, next_known_h, next_controlled_h, next_services, next_data = self._state_parts_deep_copy(current)
+        next_nets, next_known_h, next_controlled_h, next_services, next_data = self._state_parts_deep_copy(current_state)
         logger.info(f"\t\tSearching for services in {action.parameters['target_host']}")
-        if "source_host" in action.parameters.keys() and action.parameters["source_host"] in current.controlled_hosts:
+        if "source_host" in action.parameters.keys() and action.parameters["source_host"] in current_state.controlled_hosts:
             if self._firewall_check(action.parameters["source_host"], action.parameters['target_host']):
-                found_services = self._get_services_from_host(action.parameters["target_host"], current.controlled_hosts)
+                found_services = self._get_services_from_host(action.parameters["target_host"], current_state.controlled_hosts)
                 logger.debug(f"\t\t\tFound {len(found_services)}: {found_services}")
                 if len(found_services) > 0:
                     next_services[action.parameters["target_host"]] = found_services
@@ -659,22 +667,22 @@ def _execute_find_data_action(self, current:components.GameState, action:compone
             logger.debug(f"\t\t\t Invalid source_host:'{action.parameters['source_host']}'")
         return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets)
     
-    def _execute_exfiltrate_data_action(self, current:components.GameState, action:components.Action)->components.GameState:
+    def _execute_exfiltrate_data_action(self, current_state:components.GameState, action:components.Action)->components.GameState:
         """
         Executes the ExfiltrateData action in the environment
         """
-        next_nets, next_known_h, next_controlled_h, next_services, next_data = self._state_parts_deep_copy(current)
+        next_nets, next_known_h, next_controlled_h, next_services, next_data = self._state_parts_deep_copy(current_state)
         logger.info(f"\t\tAttempting to Exfiltrate {action.parameters['data']} from {action.parameters['source_host']} to {action.parameters['target_host']}")
         # Is the target host controlled?
-        if action.parameters["target_host"] in current.controlled_hosts:
-            logger.debug(f"\t\t\t {action.parameters['target_host']} is under-control: {current.controlled_hosts}")
+        if action.parameters["target_host"] in current_state.controlled_hosts:
+            logger.debug(f"\t\t\t {action.parameters['target_host']} is under-control: {current_state.controlled_hosts}")
             # Is the source host controlled?
-            if action.parameters["source_host"] in current.controlled_hosts:
-                logger.debug(f"\t\t\t {action.parameters['source_host']} is under-control: {current.controlled_hosts}")
+            if action.parameters["source_host"] in current_state.controlled_hosts:
+                logger.debug(f"\t\t\t {action.parameters['source_host']} is under-control: {current_state.controlled_hosts}")
                 # Is the source host in the list of hosts we know data from? (this is to avoid the keyerror later in the if)
                 # Does the current state for THIS source already know about this data?
                 if self._firewall_check(action.parameters["source_host"], action.parameters['target_host']):
-                    if action.parameters['source_host'] in current.known_data.keys() and action.parameters["data"] in current.known_data[action.parameters["source_host"]]:
+                    if action.parameters['source_host'] in current_state.known_data.keys() and action.parameters["data"] in current_state.known_data[action.parameters["source_host"]]:
                         # Does the source host have any data?
                         if self._ip_to_hostname[action.parameters["source_host"]] in self._data.keys():
                             # Does the source host have this data?
@@ -703,14 +711,14 @@ def _execute_exfiltrate_data_action(self, current:components.GameState, action:c
             logger.debug("\t\t\tCan not exfiltrate. Target host is not controlled.")
         return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets)
     
-    def _execute_exploit_service_action(self, current:components.GameState, action:components.Action)->components.GameState:
+    def _execute_exploit_service_action(self, current_state:components.GameState, action:components.Action)->components.GameState:
         """
         Executes the ExploitService action in the environment
         """
-        next_nets, next_known_h, next_controlled_h, next_services, next_data = self._state_parts_deep_copy(current)
+        next_nets, next_known_h, next_controlled_h, next_services, next_data = self._state_parts_deep_copy(current_state)
         # We don't check if the target is a known_host because it can be a blind attempt to attack
         logger.info(f"\t\tAttempting to ExploitService in '{action.parameters['target_host']}':'{action.parameters['target_service']}'")
-        if "source_host" in action.parameters.keys() and action.parameters["source_host"] in current.controlled_hosts:
+        if "source_host" in action.parameters.keys() and action.parameters["source_host"] in current_state.controlled_hosts:
             if action.parameters["target_host"] in self._ip_to_hostname: #is it existing IP?
                 if self._firewall_check(action.parameters["source_host"], action.parameters['target_host']):
                     if self._ip_to_hostname[action.parameters["target_host"]] in self._services: #does it have any services?
@@ -740,11 +748,53 @@ def _execute_exploit_service_action(self, current:components.GameState, action:c
             logger.debug(f"\t\t\t Invalid source_host:'{action.parameters['source_host']}'")
         return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets)
     
-    def _execute_scan_network_action_real_world(self, current:components.GameState, action:components.Action)->components.GameState:
+    def _execute_block_ip_action(self, current_state, action):
+        """
+        Executes the BlockIP action 
+        - The action has BlockIP("target_host": IP object, "source_host": IP object, "blocked_host": IP object)
+        - The target host is the host where the blocking will be applied (the FW)
+        - The source host is the host that the agent uses to connect to the target host. A host that must be controlled by the agent
+        - The blocked host is the host that will be included in the FW list to be blocked.
+
+        Logic:
+        - Check if the agent controls the source host
+        - Check if the agent controls the target host
+        - Add the rule to the FW list
+        """
+        blocked_host = action.parameters['blocked_host']
+
+        next_nets, next_known_h, next_controlled_h, next_services, next_data = self._state_parts_deep_copy(current_state)
+        logger.info(f"\t\tBlockIP {action.parameters['target_host']}")
+        if "source_host" in action.parameters.keys() and action.parameters["source_host"] in current_state.controlled_hosts:
+            if "target_host" in action.parameters.keys() and action.parameters["target_host"] in current_state.controlled_hosts:
+                # For now there is only one FW in the main router, but this should change in the future. 
+                # This means we ignore the 'target_host' that would be the router where this is applied.
+
+                # Stop the blocked host to connect to any other IP
+                try:
+                    self._firewall[blocked_host] = set()
+                except KeyError:
+                    # The blocked_host host was not in the list
+                    pass
+                # Stop the other hosts to connect to the blocked_host
+                for host in self._firewall.keys():
+                    try:
+                        self._firewall[host].remove(blocked_host)
+                    except KeyError:
+                        # The blocked_host host was not in the list
+                        pass
+            else:
+                logger.info(f"\t\t\t Invalid target_host:'{action.parameters['target_host']}'")
+        else:
+            logger.info(f"\t\t\t Invalid source_host:'{action.parameters['source_host']}'")
+        return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets)
+
+
+    def _execute_scan_network_action_real_world(self, current_state:components.GameState, action:components.Action)->components.GameState:
         """
         Executes the ScanNetwork action in the the real world
         """
-        next_nets, next_known_h, next_controlled_h, next_services, next_data = self._state_parts_deep_copy(current)
+        next_nets, next_known_h, next_controlled_h, next_services, next_data = self._state_parts_deep_copy(current_state)
         logger.info(f"\t\tScanning {action.parameters['target_network']} in real world.")
         nmap_file_xml = 'nmap-result.xml'
         command = f"nmap -sn {action.parameters['target_network']} -oX {nmap_file_xml}"
@@ -778,11 +828,11 @@ def _execute_scan_network_action_real_world(self, current:components.GameState,
         next_known_h = next_known_h.union(new_ips)
         return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets)
     
-    def _execute_find_services_action_real_world(self, current:components.GameState, action:components.Action)->components.GameState:
+    def _execute_find_services_action_real_world(self, current_state:components.GameState, action:components.Action)->components.GameState:
         """
         Executes the FindServices action in the real world
         """
-        next_nets, next_known_h, next_controlled_h, next_services, next_data = self._state_parts_deep_copy(current)
+        next_nets, next_known_h, next_controlled_h, next_services, next_data = self._state_parts_deep_copy(current_state)
         logger.info(f"\t\tScanning ports in {action.parameters['target_host']} in real world.")
         nmap_file_xml = 'nmap-result.xml'
         command = f"nmap -sT -n {action.parameters['target_host']} -oX {nmap_file_xml}"

From 5054a6edaaa1b961c69c9b4ce74987a310056779 Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Thu, 18 Jul 2024 16:07:29 +0200
Subject: [PATCH 19/87] coord. Pring in log also when the goal was not reached

---
 coordinator.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/coordinator.py b/coordinator.py
index ac7105b6..2222aaf0 100644
--- a/coordinator.py
+++ b/coordinator.py
@@ -237,7 +237,7 @@ async def run(self):
                             self.logger.info(f"Coordinator received from RESET request from agent {agent_addr}")
                             if all(self._reset_requests.values()):
                                 # should we discard the queue here?
-                                self.logger.info(f"All agents requested reset, action_q:{self._actions_queue.empty()}, answers_q{self._answers_queue.empty()}")
+                                self.logger.info(f"All agents requested reset, action_q:{self._actions_queue.empty()}, answers_q:{self._answers_queue.empty()}")
                                 self._world.reset()
                                 self._get_goal_description_per_role()
                                 self._get_win_condition_per_role()
@@ -526,6 +526,8 @@ def _goal_reached(self, agent_addr:tuple)->bool:
         goal_check = self._check_goal(agents_state, win_condition)
         if goal_check:
             self.logger.info("\tGoal reached!")
+        else:
+            self.logger.info("\tGoal not reached!")
         return goal_check
     
     def _check_goal(self, state:GameState, goal_conditions:dict)->bool:

From 663b7191bd1f2429861a7da4fe41ed989933ae17 Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Thu, 18 Jul 2024 16:13:36 +0200
Subject: [PATCH 20/87] Improve main README for block actions

---
 README.md | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 0e879985..67bdcf3b 100755
--- a/README.md
+++ b/README.md
@@ -47,9 +47,8 @@ The [scenarios](#definition-of-the-network-topology) define the **topology** of
 
 ### Assumptions of the NetSecGame
 1. NetSecGame works with the closed-world assumption. Only the defined entities exist in the simulation.
-2. Actions have no `Delete` effect. No entity is removed from the environment, agents do not forget discovered assets.
-3. If the attacker does a successful action in the same step that the defender successfully detects the action, the priority goes to the defender. The reward is a penalty, and the game ends.
-
+2. If the attacker does a successful action in the same step that the defender successfully detects the action, the priority goes to the defender. The reward is a penalty, and the game ends.
+(From commit d6d4ac9, July 18th, 2024, the new action BlockIP removes controlled hosts from the state of others. So the state can get smaller)
 
 - The action FindServices finds the new services in a host. If in a subsequent call to FindServices there are less services, they completely replace the list of previous services found. That is, each list of services is the final one and no memory is retained of previous open services.
 
@@ -60,14 +59,14 @@ The [scenarios](#definition-of-the-network-topology) define the **topology** of
 4. Playing `ExfiltrateData` requires controlling **BOTH** source and target hosts
 5. Playing `Find Services` can be used to discover hosts (if those have any active services)
 6. Parameters of `ScanNetwork` and `FindServices` can be chosen arbitrarily (they don't have to be listed in `known_newtworks`/`known_hosts`)
+7. The `BlockIP` action needs its three parameters (Source host, Target host and Blocked host) to be in the controlled list of the Agent. 
 
 ### Actions for the defender
 The defender does have the action to block an IP address in a target host. 
 
-In this version, there is no global defender as there was before, because now it is a multi-agent system.
+There is no global defender anymore as there was before, because now it is a multi-agent system.
 
 The actions are:
-
 - BlockIP(). That takes as parameters:
   - "target_host": IP object where the block will be applied.
   - "source_host": IP object where this actions is executed from.

From 3a2e1de2cea324415aef37f09de892f8fc779902 Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Thu, 18 Jul 2024 17:51:51 +0200
Subject: [PATCH 21/87] components documentation. Add blockip description

---
 docs/Components.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/Components.md b/docs/Components.md
index 4bd5c9cd..6cc19182 100644
--- a/docs/Components.md
+++ b/docs/Components.md
@@ -43,6 +43,7 @@ GameState is an object that represents a view of the NetSecGame environment in a
 - `known_services`: Dictionary of services that the agent is aware of.
 The dictionary format: {`IP`: {`Service`}} where [IP](#ip) object is a key and the value is a set of [Service](#service) objects located in the `IP`.
 - `known_data`: Dictionary of data instances that the agent is aware of. The dictionary format: {`IP`: {`Data`}} where [IP](#ip) object is a key and the value is a set of [Data](#data) objects located in the `IP`.
+- `known_blocks`: Dictionary of firewall blocks the agent is aware of. It is a dictionary with format: {`target_IP`: {`blocked_IP`, `blocked_IP`}}. Where `target_IP` is the [IP](#ip) where the FW rule was applied (usually a router) and `blocked_IP` is the IP address that is blocked. For now the blocks happen in both input and output direction simultaneously.
 
 
 ## Actions

From bddfea67e5c46ac8fe6e132a2a1a7b1fc70913f1 Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Thu, 18 Jul 2024 17:52:22 +0200
Subject: [PATCH 22/87] In conf of the env for goals/start position. Add the
 known_blocks

---
 env/netsecenv_conf.yaml | 90 +++++++++++++++++++++++------------------
 1 file changed, 51 insertions(+), 39 deletions(-)

diff --git a/env/netsecenv_conf.yaml b/env/netsecenv_conf.yaml
index 93c92fce..66acc162 100644
--- a/env/netsecenv_conf.yaml
+++ b/env/netsecenv_conf.yaml
@@ -25,6 +25,9 @@ coordinator:
         #known_data: {213.47.23.195: [User1,DataFromServer1], 192.168.3.1: [User3,Data3FromServer3]}
         # Example to ask a random data in a specific server. Putting 'random' in the data, forces the env to randomly choose where the goal data is
         # known_data: {213.47.23.195: [random]}
+        known_blocks: {}
+        # Example of known blocks. In the host 192.168.2.2, block all connections coming or going to 192.168.1.3
+        # known_blocks: {192.168.2.2: {192.168.1.3}}
 
       start_position:
         known_networks: []
@@ -39,50 +42,59 @@ coordinator:
         # known_services: {192.168.1.3: [Local system, lanman server, 10.0.19041, False], 192.168.1.4: [Other system, SMB server, 21.2.39421, False]}
         # Same format as before
         known_data: {}
+        known_blocks: {}
+        # Example of known blocks to start with. In the host 192.168.2.2, block all connections coming or going to 192.168.1.3
+        # known_blocks: {192.168.2.2: {192.168.1.3}}
 
     Defender:
       goal:
-            description: "Block all attackers"
-            is_any_part_of_goal_random: False
-            known_networks: []
-            # Example
-            #known_networks: [192.168.1.0/24, 192.168.3.0/24]
-            known_hosts: []
-            # Example
-            #known_hosts: [192.168.1.1, 192.168.1.2]
-            controlled_hosts: []
-            # Example
-            #controlled_hosts: [213.47.23.195, 192.168.1.3]
-            # Services are defined as a target host where the service must be, and then a description in the form 'name,type,version,is_local'
-            known_services: {}
-            # Example
-            #known_services: {192.168.1.3: [Local system, lanman server, 10.0.19041, False], 192.168.1.4: [Other system, SMB server, 21.2.39421, False]}
-            # In data, put the target host that must have the data and which data in format user,data
-            # Example to fix the data in one host
-            known_data: {}
-            # Example to fix two data in one host
-            #known_data: {213.47.23.195: [[User1,DataFromServer1], [User5,DataFromServer5]]}
-            # Example to fix the data in two host
-            #known_data: {213.47.23.195: [User1,DataFromServer1], 192.168.3.1: [User3,Data3FromServer3]}
-            # Example to ask a random data in a specific server. Putting 'random' in the data, forces the env to randomly choose where the goal data is
-            # known_data: {213.47.23.195: [random]}
-            blocked_ips: {all_attackers}
+        description: "Block all attackers"
+        is_any_part_of_goal_random: False
+        known_networks: []
+        # Example
+        #known_networks: [192.168.1.0/24, 192.168.3.0/24]
+        known_hosts: []
+        # Example
+        #known_hosts: [192.168.1.1, 192.168.1.2]
+        controlled_hosts: []
+        # Example
+        #controlled_hosts: [213.47.23.195, 192.168.1.3]
+        # Services are defined as a target host where the service must be, and then a description in the form 'name,type,version,is_local'
+        known_services: {}
+        # Example
+        #known_services: {192.168.1.3: [Local system, lanman server, 10.0.19041, False], 192.168.1.4: [Other system, SMB server, 21.2.39421, False]}
+        # In data, put the target host that must have the data and which data in format user,data
+        # Example to fix the data in one host
+        known_data: {}
+        # Example to fix two data in one host
+        #known_data: {213.47.23.195: [[User1,DataFromServer1], [User5,DataFromServer5]]}
+        # Example to fix the data in two host
+        #known_data: {213.47.23.195: [User1,DataFromServer1], 192.168.3.1: [User3,Data3FromServer3]}
+        # Example to ask a random data in a specific server. Putting 'random' in the data, forces the env to randomly choose where the goal data is
+        # known_data: {213.47.23.195: [random]}
+        known_blocks: {'all_routers': 'all_attackers'}
+        # Example of known blocks. In the host 192.168.2.2, block all connections coming or going to 192.168.1.3
+        # known_blocks: {192.168.2.2: {192.168.1.3}}
+        # You can also use the wildcard string 'all_routers', and 'all_attackers', to mean that all the controlled hosts of all the attackers should be in this list in order to win
 
       start_position:
-            known_networks: [all_local]
-            known_hosts: [all_local]
-            # The attacker must always at least control the CC if the goal is to exfiltrate there
-            # Example of fixing the starting point of the agent in a local host
-            controlled_hosts: [all_local]
-            # Example of asking a random position to start the agent
-            # controlled_hosts: [213.47.23.195, random]
-            # Services are defined as a target host where the service must be, and then a description in the form 'name,type,version,is_local'
-            known_services: {all_local}
-            # known_services: {192.168.1.3: [Local system, lanman server, 10.0.19041, False], 192.168.1.4: [Other system, SMB server, 21.2.39421, False]}
-            # Same format as before
-            known_data: {all_local}
-            # Blocked IPs
-            blocked_ips: {}
+        known_networks: [all_local]
+        known_hosts: [all_local]
+        # The attacker must always at least control the CC if the goal is to exfiltrate there
+        # Example of fixing the starting point of the agent in a local host
+        controlled_hosts: [all_local]
+        # Example of asking a random position to start the agent
+        # controlled_hosts: [213.47.23.195, random]
+        # Services are defined as a target host where the service must be, and then a description in the form 'name,type,version,is_local'
+        known_services: {all_local}
+        # known_services: {192.168.1.3: [Local system, lanman server, 10.0.19041, False], 192.168.1.4: [Other system, SMB server, 21.2.39421, False]}
+        # Same format as before
+        known_data: {all_local}
+        # Blocked IPs
+        blocked_ips: {}
+        known_blocks: {}
+        # Example of known blocks to start with. In the host 192.168.2.2, block all connections coming or going to 192.168.1.3
+        # known_blocks: {192.168.2.2: {192.168.1.3}}
 
 env:
   # random means to choose the seed in a random way, so it is not fixed

From fc652f64fcb5b9d7761c86d36f50dc5b5338d16f Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Thu, 18 Jul 2024 17:52:44 +0200
Subject: [PATCH 23/87] game_comp. Add the known_blocks to the state

---
 env/game_components.py | 42 ++++++++++++++++++++++++++----------------
 1 file changed, 26 insertions(+), 16 deletions(-)

diff --git a/env/game_components.py b/env/game_components.py
index da26abf6..9c69dffe 100755
--- a/env/game_components.py
+++ b/env/game_components.py
@@ -323,10 +323,11 @@ class GameState():
     known_services: dict = field(default_factory=dict, hash=True)
     known_data: dict = field(default_factory=dict, hash=True)
     known_networks: set = field(default_factory=set, hash=True)
+    known_blocks: dict = field(default_factory=dict, hash=True)
     
     @property
     def as_graph(self):
-        node_types = {"network":0, "host":1, "service":2, "datapoint":3}
+        node_types = {"network":0, "host":1, "service":2, "datapoint":3, "blocks": 4}
         graph_nodes = {}
         node_features = []
         controlled = []
@@ -387,7 +388,7 @@ def as_graph(self):
         return node_features, controlled, edges, {v:k for k, v in graph_nodes.items()}
 
     def __str__(self) -> str:
-        return f"State<nets:{self.known_networks}; known:{self.known_hosts}; owned:{self.controlled_hosts}; services:{self.known_services}; data:{self.known_data}>"    
+        return f"State<nets:{self.known_networks}; known:{self.known_hosts}; owned:{self.controlled_hosts}; services:{self.known_services}; data:{self.known_data}; blocks:{self.known_blocks}>"    
 
     def as_json(self) -> str:
         """
@@ -405,17 +406,22 @@ def as_dict(self)->dict:
             "known_hosts":[dataclasses.asdict(x) for x in self.known_hosts],
             "controlled_hosts":[dataclasses.asdict(x) for x in self.controlled_hosts],
             "known_services": {str(host):[dataclasses.asdict(s) for s in services] for host,services in self.known_services.items()},
-            "known_data":{str(host):[dataclasses.asdict(d) for d in data] for host,data in self.known_data.items()}}
+            "known_data":{str(host):[dataclasses.asdict(d) for d in data] for host,data in self.known_data.items()},
+            "known_blocks":{str(target_host):[dataclasses.asdict(blocked_host) for blocked_host in blocked_hosts] for target_host, blocked_hosts in self.known_blocks.items()}
+                    }
         return ret_dict
 
     @classmethod
     def from_dict(cls, data_dict:dict):
-        state = GameState(known_networks={Network(x["ip"], x["mask"]) for x in data_dict["known_networks"]},
-            known_hosts={IP(x["ip"]) for x in data_dict["known_hosts"]},
-            controlled_hosts={IP(x["ip"]) for x in data_dict["controlled_hosts"]},
-            known_services={IP(k):{Service(s["name"], s["type"], s["version"], s["is_local"])
+        state = GameState(
+            known_networks = {Network(x["ip"], x["mask"]) for x in data_dict["known_networks"]},
+            known_hosts = {IP(x["ip"]) for x in data_dict["known_hosts"]},
+            controlled_hosts = {IP(x["ip"]) for x in data_dict["controlled_hosts"]},
+            known_services = {IP(k):{Service(s["name"], s["type"], s["version"], s["is_local"])
                 for s in services} for k,services in data_dict["known_services"].items()},  
-            known_data={IP(k):{Data(v["owner"], v["id"]) for v in values} for k,values in data_dict["known_data"].items()}) 
+            known_data = {IP(k):{Data(v["owner"], v["id"]) for v in values} for k,values in data_dict["known_data"].items()},
+            known_blocks = {IP(target_host):{IP(blocked_host) for blocked_host in blocked_hosts} for target_host, blocked_hosts in data_dict["known_blocks"].items()}
+                )
         return state
 
     @classmethod
@@ -424,12 +430,15 @@ def from_json(cls, json_string):
         Creates GameState object from json representation in string
         """
         json_data = json.loads(json_string)
-        state = GameState(known_networks={Network(x["ip"], x["mask"]) for x in json_data["known_networks"]},
-                    known_hosts={IP(x["ip"]) for x in json_data["known_hosts"]},
-                    controlled_hosts={IP(x["ip"]) for x in json_data["controlled_hosts"]},
-                    known_services={IP(k):{Service(s["name"], s["type"], s["version"], s["is_local"])
-                        for s in services} for k,services in json_data["known_services"].items()},  
-                    known_data={IP(k):{Data(v["owner"], v["id"]) for v in values} for k,values in json_data["known_data"].items()}) 
+        state = GameState(
+            known_networks = {Network(x["ip"], x["mask"]) for x in json_data["known_networks"]},
+            known_hosts = {IP(x["ip"]) for x in json_data["known_hosts"]},
+            controlled_hosts = {IP(x["ip"]) for x in json_data["controlled_hosts"]},
+            known_services = {IP(k):{Service(s["name"], s["type"], s["version"], s["is_local"])
+                for s in services} for k,services in json_data["known_services"].items()},  
+            known_data = {IP(k):{Data(v["owner"], v["id"]) for v in values} for k,values in json_data["known_data"].items()},
+            known_blocks = {IP(target_host):{IP(blocked_host) for blocked_host in blocked_hosts} for target_host, blocked_hosts in json_data["known_blocks"].items()}
+            )
         return state
 
 
@@ -465,8 +474,9 @@ def from_string(cls, string:str):
     def __repr__(self) -> str:
         return str(self)
 if __name__ == "__main__":
-    data1 = Data(owner="test", id="test_data", content="content", type="db")
-    data2 = Data(owner="test", id="test_data", content="content", type="db")
+    pass
+    #data1 = Data(owner="test", id="test_data", content="content", type="db")
+    #data2 = Data(owner="test", id="test_data", content="content", type="db")
     # print(data)
     # print(data.size)
 

From 42e4f5bb6f97c93629fb23809e64aebc8f829a8a Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Thu, 18 Jul 2024 17:53:01 +0200
Subject: [PATCH 24/87] netsecenv. Add the known_blocks to the state

---
 env/network_security_game.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/env/network_security_game.py b/env/network_security_game.py
index 54970b44..2c352982 100755
--- a/env/network_security_game.py
+++ b/env/network_security_game.py
@@ -946,7 +946,8 @@ def re_map_goal_dict(self, goal_dict)->dict:
             "known_hosts":set(),
             "controlled_hosts":set(),
             "known_services": {},
-            "known_data": {}
+            "known_data": {},
+            "known_blocks": {}
         }
         for net in goal_dict["known_networks"]:
             if net in self._network_mapping:
@@ -978,6 +979,12 @@ def re_map_goal_dict(self, goal_dict)->dict:
             else:
                 # Unknown IP, do not map
                 new_dict["known_data"][host] = items
+        for host, items in goal_dict["known_blocks"].items():
+            if host in self._ip_mapping:
+                new_dict["known_blocks"][self._ip_mapping[host]] = items
+            else:
+                # Unknown IP, do not map
+                new_dict["known_blocks"][host] = items
         return new_dict    
 
     def update_goal_descriptions(self, goal_description):

From b2c8085e985d06eb3848115b2ec7ddf12e56892d Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Thu, 18 Jul 2024 17:53:20 +0200
Subject: [PATCH 25/87] utils. Add the known_blocks to the state

---
 utils/utils.py | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/utils/utils.py b/utils/utils.py
index 165b94a1..703c9e86 100644
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -146,6 +146,27 @@ def read_agents_known_data(self, type_agent: str, type_data: str) -> dict:
             except (ValueError, netaddr.AddrFormatError):
                 known_data = {}
         return known_data
+
+    def read_agents_known_blocks(self, type_agent: str, type_data: str) -> dict:
+        """
+        Generic function to read the known blocks for any agent and goal of position
+        """
+        known_blocks_conf = self.config["coordinator"]['agents'][type_agent][type_data]['known_blocks']
+        known_blocks = {}
+        for target_host, dict_blocked_hosts in known_blocks_conf.items():
+            try:
+                # Check the host is a good ip
+                _ = netaddr.IPAddress(target_host)
+                target_host_IP = IP(target_host)
+                for known_blocked_host in dict_blocked_hosts.values():
+                    known_blocked_host_IP = IP(known_blocked_host)
+                    known_blocks[target_host_IP].append(known_blocked_host_IP)
+            except (ValueError, netaddr.AddrFormatError):
+                if target_host.lower() == "all_routers":
+                    known_blocks["all_routers"] = dict_blocked_hosts
+            except (ValueError):
+                known_blocks = {}
+        return known_blocks
     
     def read_agents_known_services(self, type_agent: str, type_data: str) -> dict:
         """
@@ -236,6 +257,9 @@ def get_player_win_conditions(self, type_of_player):
         # Goal services
         known_services = self.read_agents_known_services(type_of_player, 'goal')
 
+        # Read known blocks 
+        known_blocks = self.read_agents_known_blocks(type_of_player, 'goal')
+
         # Goal data
         known_data = self.read_agents_known_data(type_of_player, 'goal')
 
@@ -245,6 +269,7 @@ def get_player_win_conditions(self, type_of_player):
         player_goal['known_hosts'] = known_hosts
         player_goal['known_data'] = known_data
         player_goal['known_services'] = known_services
+        player_goal['known_blocks'] = known_blocks
 
         return player_goal
     
@@ -299,7 +324,7 @@ def get_win_conditions(self, agent_role):
             case "Attacker":
                 return self.get_player_win_conditions(agent_role)
             case "Defender":
-                return {}
+                return self.get_player_win_conditions(agent_role)
             case "Benign":
                 # create goal that is unreachable so we have infinite play by the benign agent
                 return {
@@ -307,7 +332,8 @@ def get_win_conditions(self, agent_role):
                     'controlled_hosts': set(),
                     'known_hosts': set(),
                     'known_data': {IP("1.1.1.1"): {Data(owner='User1', id='DataFromInternet', size=0, type='')}},
-                    'known_services': {}
+                    'known_services': {},
+                    'known_blocks': {}
                 }
             case _:
                 raise ValueError(f"Unsupported agent role: {agent_role}")

From ddb48511a964c3a20644e6607cf6494e8756655f Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Thu, 1 Aug 2024 16:34:34 +0200
Subject: [PATCH 26/87] netsecenv. conf file adapt for defender

---
 tests/netsecenv-task-for-testing.yaml | 80 +++++++++++++++++----------
 1 file changed, 52 insertions(+), 28 deletions(-)

diff --git a/tests/netsecenv-task-for-testing.yaml b/tests/netsecenv-task-for-testing.yaml
index 1a33236b..c5041855 100644
--- a/tests/netsecenv-task-for-testing.yaml
+++ b/tests/netsecenv-task-for-testing.yaml
@@ -14,6 +14,7 @@ coordinator:
         controlled_hosts: []
         known_services: {}
         known_data: {213.47.23.195: [[User1,DatabaseData]]}
+        known_blocks: {}
       start_position:
         known_networks: []
         known_hosts: []
@@ -23,34 +24,57 @@ coordinator:
         known_services: {}
         # Same format as before
         known_data: {}
-    defenders:
-      # types are StochasticDefender and NoDefender
-      #type: 'StochasticDefender'
-      #type: 'StochasticWithThreshold'
-      type: 'NoDefender'
-      tw_size: 5
-      thresholds:
-        scan_network: # if both conditions are true, you are never detected
-          consecutive_actions: 2 # min amount of consecutive actions you can do without detection
-          tw_ratio: 0.25 # min ratio of actions in the tw below which you are not detected
-        find_services:
-          consecutive_actions: 3
-          tw_ratio: 0.3
-        exploit_service:
-          repeated_actions_episode: 2
-          tw_ratio: 0.25
-        find_data:
-          tw_ratio: 0.5
-          repeated_actions_episode: 2
-        exfiltrate_data:
-          consecutive_actions: 2
-          tw_ratio: 0.25
-      action_detetection_prob:
-        scan_network: 0.05
-        find_services: 0.075
-        exploit_service: 0.1
-        find_data: 0.025
-        exfiltrate_data: 0.025
+        known_blocks: {}
+    Defender:
+      goal:
+        description: "Block all attackers"
+        is_any_part_of_goal_random: False
+        known_networks: []
+        # Example
+        #known_networks: [192.168.1.0/24, 192.168.3.0/24]
+        known_hosts: []
+        # Example
+        #known_hosts: [192.168.1.1, 192.168.1.2]
+        controlled_hosts: []
+        # Example
+        #controlled_hosts: [213.47.23.195, 192.168.1.3]
+        # Services are defined as a target host where the service must be, and then a description in the form 'name,type,version,is_local'
+        known_services: {}
+        # Example
+        #known_services: {192.168.1.3: [Local system, lanman server, 10.0.19041, False], 192.168.1.4: [Other system, SMB server, 21.2.39421, False]}
+        # In data, put the target host that must have the data and which data in format user,data
+        # Example to fix the data in one host
+        known_data: {}
+        # Example to fix two data in one host
+        #known_data: {213.47.23.195: [[User1,DataFromServer1], [User5,DataFromServer5]]}
+        # Example to fix the data in two host
+        #known_data: {213.47.23.195: [User1,DataFromServer1], 192.168.3.1: [User3,Data3FromServer3]}
+        # Example to ask a random data in a specific server. Putting 'random' in the data, forces the env to randomly choose where the goal data is
+        # known_data: {213.47.23.195: [random]}
+        known_blocks: {'all_routers': 'all_attackers'}
+        # Example of known blocks. In the host 192.168.2.2, block all connections coming or going to 192.168.1.3
+        # known_blocks: {192.168.2.2: {192.168.1.3}}
+        # You can also use the wildcard string 'all_routers', and 'all_attackers', to mean that all the controlled hosts of all the attackers should be in this list in order to win
+
+      start_position:
+        known_networks: [all_local]
+        known_hosts: [all_local]
+        # The attacker must always at least control the CC if the goal is to exfiltrate there
+        # Example of fixing the starting point of the agent in a local host
+        controlled_hosts: [all_local]
+        # Example of asking a random position to start the agent
+        # controlled_hosts: [213.47.23.195, random]
+        # Services are defined as a target host where the service must be, and then a description in the form 'name,type,version,is_local'
+        known_services: {all_local}
+        # known_services: {192.168.1.3: [Local system, lanman server, 10.0.19041, False], 192.168.1.4: [Other system, SMB server, 21.2.39421, False]}
+        # Same format as before
+        known_data: {all_local}
+        # Blocked IPs
+        blocked_ips: {}
+        known_blocks: {}
+        # Example of known blocks to start with. In the host 192.168.2.2, block all connections coming or going to 192.168.1.3
+        # known_blocks: {192.168.2.2: {192.168.1.3}}
+
 env:
   # random means to choose the seed in a random way, so it is not fixed
   random_seed: 'random'

From 75399ca61c258ea3168a639c7682735e3730f265 Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Thu, 1 Aug 2024 16:52:27 +0200
Subject: [PATCH 27/87] test. Fix configuration of env

---
 tests/netsecenv-task-for-testing.yaml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/netsecenv-task-for-testing.yaml b/tests/netsecenv-task-for-testing.yaml
index c5041855..a7883b12 100644
--- a/tests/netsecenv-task-for-testing.yaml
+++ b/tests/netsecenv-task-for-testing.yaml
@@ -4,7 +4,7 @@
 coordinator:
   # Define the conf for all the 'attacker' agents.
   agents:
-    attackers:
+    Attacker:
       goal:
         description: "Exfiltrate DatabaseData to '213.47.23.195'"
         # Put randomize_goal_every_episode in True if you put any of the goal values in 'random'
@@ -15,6 +15,7 @@ coordinator:
         known_services: {}
         known_data: {213.47.23.195: [[User1,DatabaseData]]}
         known_blocks: {}
+
       start_position:
         known_networks: []
         known_hosts: []
@@ -25,6 +26,7 @@ coordinator:
         # Same format as before
         known_data: {}
         known_blocks: {}
+
     Defender:
       goal:
         description: "Block all attackers"

From 952a4f2c3d873ad07b4f0dbc5ebea24be2b2db3f Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Wed, 26 Jun 2024 16:57:07 +0200
Subject: [PATCH 28/87] netsecenv_conf. Delete the global defender and add a
 new defender with goal and start_position.

---
 env/netsecenv_conf.yaml | 44 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/env/netsecenv_conf.yaml b/env/netsecenv_conf.yaml
index 66acc162..b1ec4a3a 100644
--- a/env/netsecenv_conf.yaml
+++ b/env/netsecenv_conf.yaml
@@ -96,6 +96,50 @@ coordinator:
         # Example of known blocks to start with. In the host 192.168.2.2, block all connections coming or going to 192.168.1.3
         # known_blocks: {192.168.2.2: {192.168.1.3}}
 
+    defenders:
+      goal:
+            description: "Block all attackers"
+            is_any_part_of_goal_random: False
+            known_networks: []
+            # Example
+            #known_networks: [192.168.1.0/24, 192.168.3.0/24]
+            known_hosts: []
+            # Example
+            #known_hosts: [192.168.1.1, 192.168.1.2]
+            controlled_hosts: []
+            # Example
+            #controlled_hosts: [213.47.23.195, 192.168.1.3]
+            # Services are defined as a target host where the service must be, and then a description in the form 'name,type,version,is_local'
+            known_services: {}
+            # Example
+            #known_services: {192.168.1.3: [Local system, lanman server, 10.0.19041, False], 192.168.1.4: [Other system, SMB server, 21.2.39421, False]}
+            # In data, put the target host that must have the data and which data in format user,data
+            # Example to fix the data in one host
+            known_data: {}
+            # Example to fix two data in one host
+            #known_data: {213.47.23.195: [[User1,DataFromServer1], [User5,DataFromServer5]]}
+            # Example to fix the data in two host
+            #known_data: {213.47.23.195: [User1,DataFromServer1], 192.168.3.1: [User3,Data3FromServer3]}
+            # Example to ask a random data in a specific server. Putting 'random' in the data, forces the env to randomly choose where the goal data is
+            # known_data: {213.47.23.195: [random]}
+            blocked_ips: {all_attackers}
+
+          start_position:
+            known_networks: [all_local]
+            known_hosts: [all_local]
+            # The attacker must always at least control the CC if the goal is to exfiltrate there
+            # Example of fixing the starting point of the agent in a local host
+            controlled_hosts: [all_local]
+            # Example of asking a random position to start the agent
+            # controlled_hosts: [213.47.23.195, random]
+            # Services are defined as a target host where the service must be, and then a description in the form 'name,type,version,is_local'
+            known_services: {all_local}
+            # known_services: {192.168.1.3: [Local system, lanman server, 10.0.19041, False], 192.168.1.4: [Other system, SMB server, 21.2.39421, False]}
+            # Same format as before
+            known_data: {all_local}
+            # Blocked IPs
+            blocked_ips: {}
+
 env:
   # random means to choose the seed in a random way, so it is not fixed
   random_seed: 'random'

From 994f47f1d589d87bfdf54116fd068e7c8af6f32e Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Wed, 17 Jul 2024 11:51:14 +0200
Subject: [PATCH 29/87] netsecenv conf. Migrate attackers to Attacker and
 defenders to Defender. So it can be found by the new code

---
 env/netsecenv_conf.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/env/netsecenv_conf.yaml b/env/netsecenv_conf.yaml
index b1ec4a3a..a1bb41cb 100644
--- a/env/netsecenv_conf.yaml
+++ b/env/netsecenv_conf.yaml
@@ -96,7 +96,7 @@ coordinator:
         # Example of known blocks to start with. In the host 192.168.2.2, block all connections coming or going to 192.168.1.3
         # known_blocks: {192.168.2.2: {192.168.1.3}}
 
-    defenders:
+    Defender:
       goal:
             description: "Block all attackers"
             is_any_part_of_goal_random: False
@@ -124,7 +124,7 @@ coordinator:
             # known_data: {213.47.23.195: [random]}
             blocked_ips: {all_attackers}
 
-          start_position:
+      start_position:
             known_networks: [all_local]
             known_hosts: [all_local]
             # The attacker must always at least control the CC if the goal is to exfiltrate there

From d27bd59d5a12369424ab927af94befb6f1d3b1c5 Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Thu, 18 Jul 2024 17:52:22 +0200
Subject: [PATCH 30/87] In conf of the env for goals/start position. Add the
 known_blocks

---
 env/netsecenv_conf.yaml | 44 -----------------------------------------
 1 file changed, 44 deletions(-)

diff --git a/env/netsecenv_conf.yaml b/env/netsecenv_conf.yaml
index a1bb41cb..66acc162 100644
--- a/env/netsecenv_conf.yaml
+++ b/env/netsecenv_conf.yaml
@@ -96,50 +96,6 @@ coordinator:
         # Example of known blocks to start with. In the host 192.168.2.2, block all connections coming or going to 192.168.1.3
         # known_blocks: {192.168.2.2: {192.168.1.3}}
 
-    Defender:
-      goal:
-            description: "Block all attackers"
-            is_any_part_of_goal_random: False
-            known_networks: []
-            # Example
-            #known_networks: [192.168.1.0/24, 192.168.3.0/24]
-            known_hosts: []
-            # Example
-            #known_hosts: [192.168.1.1, 192.168.1.2]
-            controlled_hosts: []
-            # Example
-            #controlled_hosts: [213.47.23.195, 192.168.1.3]
-            # Services are defined as a target host where the service must be, and then a description in the form 'name,type,version,is_local'
-            known_services: {}
-            # Example
-            #known_services: {192.168.1.3: [Local system, lanman server, 10.0.19041, False], 192.168.1.4: [Other system, SMB server, 21.2.39421, False]}
-            # In data, put the target host that must have the data and which data in format user,data
-            # Example to fix the data in one host
-            known_data: {}
-            # Example to fix two data in one host
-            #known_data: {213.47.23.195: [[User1,DataFromServer1], [User5,DataFromServer5]]}
-            # Example to fix the data in two host
-            #known_data: {213.47.23.195: [User1,DataFromServer1], 192.168.3.1: [User3,Data3FromServer3]}
-            # Example to ask a random data in a specific server. Putting 'random' in the data, forces the env to randomly choose where the goal data is
-            # known_data: {213.47.23.195: [random]}
-            blocked_ips: {all_attackers}
-
-      start_position:
-            known_networks: [all_local]
-            known_hosts: [all_local]
-            # The attacker must always at least control the CC if the goal is to exfiltrate there
-            # Example of fixing the starting point of the agent in a local host
-            controlled_hosts: [all_local]
-            # Example of asking a random position to start the agent
-            # controlled_hosts: [213.47.23.195, random]
-            # Services are defined as a target host where the service must be, and then a description in the form 'name,type,version,is_local'
-            known_services: {all_local}
-            # known_services: {192.168.1.3: [Local system, lanman server, 10.0.19041, False], 192.168.1.4: [Other system, SMB server, 21.2.39421, False]}
-            # Same format as before
-            known_data: {all_local}
-            # Blocked IPs
-            blocked_ips: {}
-
 env:
   # random means to choose the seed in a random way, so it is not fixed
   random_seed: 'random'

From 9319e574967b9b0826e92584bbe14bc6d482de41 Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Thu, 1 Aug 2024 16:34:34 +0200
Subject: [PATCH 31/87] netsecenv. conf file adapt for defender

---
 tests/netsecenv-task-for-testing.yaml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/netsecenv-task-for-testing.yaml b/tests/netsecenv-task-for-testing.yaml
index a7883b12..fb3dbbdf 100644
--- a/tests/netsecenv-task-for-testing.yaml
+++ b/tests/netsecenv-task-for-testing.yaml
@@ -15,7 +15,6 @@ coordinator:
         known_services: {}
         known_data: {213.47.23.195: [[User1,DatabaseData]]}
         known_blocks: {}
-
       start_position:
         known_networks: []
         known_hosts: []
@@ -26,7 +25,6 @@ coordinator:
         # Same format as before
         known_data: {}
         known_blocks: {}
-
     Defender:
       goal:
         description: "Block all attackers"

From 64c12b8ec30b0c44026b0dd7e398b42204d1557d Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Thu, 1 Aug 2024 16:52:27 +0200
Subject: [PATCH 32/87] test. Fix configuration of env

---
 tests/netsecenv-task-for-testing.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/netsecenv-task-for-testing.yaml b/tests/netsecenv-task-for-testing.yaml
index fb3dbbdf..a7883b12 100644
--- a/tests/netsecenv-task-for-testing.yaml
+++ b/tests/netsecenv-task-for-testing.yaml
@@ -15,6 +15,7 @@ coordinator:
         known_services: {}
         known_data: {213.47.23.195: [[User1,DatabaseData]]}
         known_blocks: {}
+
       start_position:
         known_networks: []
         known_hosts: []
@@ -25,6 +26,7 @@ coordinator:
         # Same format as before
         known_data: {}
         known_blocks: {}
+
     Defender:
       goal:
         description: "Block all attackers"

From 90162861e91e30fd4587ff1fc6f64a231a8e3eea Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Thu, 1 Aug 2024 17:06:27 +0200
Subject: [PATCH 33/87] README. Add the new defender agent conf and description

---
 README.md | 65 ++++++++++++++++++++++++-------------------------------
 1 file changed, 28 insertions(+), 37 deletions(-)

diff --git a/README.md b/README.md
index 67bdcf3b..37cb8c4e 100755
--- a/README.md
+++ b/README.md
@@ -156,7 +156,7 @@ Configuration of the attacking agents. Consists of two parts:
 Example attacker configuration:
 ```YAML
 agents:
-  attackers:
+  Attacker:
     goal:
       randomize_goal_every_episode: False
       known_networks: []
@@ -164,6 +164,7 @@ agents:
       controlled_hosts: []
       known_services: {192.168.1.3: [Local system, lanman server, 10.0.19041, False], 192.168.1.4: [Other system, SMB server, 21.2.39421, False]}
       known_data: {213.47.23.195: ["random"]}
+      known_blocks: {'all_routers': 'all_attackers'}
 
     start_position:
       known_networks: []
@@ -174,49 +175,39 @@ agents:
       # Services are defined as a target host where the service must be, and then a description in the form 'name,type,version,is_local'
       known_services: {}
       known_data: {}
+      known_blocks: {}
 ```
 ### Defender configuration (`defenders`)
-Definition of defending agent's properties. Currently, the defender is **NOT** a separate agent but it is considered part of the environment.
-`type` - Type of the defender. Three types are currently implemented:
-  1. `NoDefender` (default) - interation without defender
-  2. `StochasticDefender` - detections are based on ActionType probabilities (defined in the task configuraion, section `action_detetection_prob`).
-  3. `StochasticDefenderWithThreshold` - Modification of stochastic defender. Detection probabilities are used *IF* threasholds in the particular ActionType is reached. Thresholds are computed in time windows defined by `tw_size` (`tw_size=5` means that 5 previous actions are taken into account). If ratio of some ActionType within the timewindow is above the threshold, the probability defined in the task configuraion, section `action_detetection_prob` is used to determine if the action was detected. For action *BELOW* the thresholds, no detection is made. Additionally, thresholds for consecutive action type is defined in `consecutive_actions`. For example with
-```YAML
-  scan_network:
-    consecutive_actions: 2
-```
-if the agent uses action ScanNetwork (regardless of the parameters) twice or more, the detection can occur. Action types `FindData` and `exploit_service` have additional thresholds for repeated actions (with parameters) throughout the **WHOLE** episode (e.g. if action `<ActionType.FindData|{'target_host': 192.168.2.2}>` is played more than 2 with following configuration, the detection can happen based on the defined probability).  
+Currently, the defender **is** a separate agent.
+
+If you want to have an defender in the game, you need to connect a defender agent. If you don't want to have a defender, just don't use any.
 
 Example of defender configuration:
 ```YAML
-agents:
-  defenders:
-    type: 'StochasticWithThreshold'
-    tw_size: 5
-    thresholds:
-      scan_network:
-        consecutive_actions: 2
-        tw_ratio: 0.25
-      find_services:
-        consecutive_actions: 3
-        tw_ratio: 0.3
-      exploit_service:
-        repeated_actions_episode: 2
-        tw_ratio: 0.25
-      find_data:
-        tw_ratio: 0.5
-        repeated_actions_episode: 2
-      exfiltrate_data:
-        consecutive_actions: 2
-        tw_ratio: 0.25
-    action_detetection_prob:
-        scan_network: 0.05
-        find_services: 0.075
-        exploit_service: 0.1
-        find_data: 0.025
-        exfiltrate_data: 0.025
+   Defender:
+      goal:
+        description: "Block all attackers"
+        is_any_part_of_goal_random: False
+        known_networks: []
+        known_hosts: []
+        controlled_hosts: []
+        known_services: {}
+        known_data: {}
+        known_blocks: {'any_routers': 'all_attackers_controlled_hosts'}
+
+      start_position:
+        known_networks: [all_local]
+        known_hosts: [all_local]
+        controlled_hosts: [all_local]
+        known_services: {all_local}
+        known_data: {all_local}
+        blocked_ips: {}
+        known_blocks: {}
 ```
 
+As in other agents, the description is only a text for the agent, so it can know what is supposed to do to win. In this example the goal of the defender is determined by a state where the known blocks can be applied in any router's firewall and must include all the controlled hosts of all the attackers. These are `magic` words that will push the coordinator to check these positions without reviling them to the defender.
+
+
 ## Definition of the network topology
 The network topology and rules are defined using a [CYST](https://pypi.org/project/cyst/) simulator configuration. Cyst defines a complex network configuration, and this environment does not use all Cyst features for now. CYST components currently used are:
 

From 0731c522c09b20c8712852eb6cbe4ece90df0ce3 Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Thu, 1 Aug 2024 17:13:42 +0200
Subject: [PATCH 34/87] utils. changes for ruff

---
 utils/utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/utils/utils.py b/utils/utils.py
index 703c9e86..295d9a2a 100644
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -9,7 +9,7 @@
 from env.scenarios import scenario_configuration
 from env.scenarios import smaller_scenario_configuration
 from env.scenarios import tiny_scenario_configuration
-from env.game_components import IP, Data, Network, Service, GameState, Action, ActionType, Observation
+from env.game_components import IP, Data, Network, Service, GameState, Action, Observation
 import netaddr
 import logging
 import csv
@@ -157,10 +157,10 @@ def read_agents_known_blocks(self, type_agent: str, type_data: str) -> dict:
             try:
                 # Check the host is a good ip
                 _ = netaddr.IPAddress(target_host)
-                target_host_IP = IP(target_host)
+                target_host_ip = IP(target_host)
                 for known_blocked_host in dict_blocked_hosts.values():
-                    known_blocked_host_IP = IP(known_blocked_host)
-                    known_blocks[target_host_IP].append(known_blocked_host_IP)
+                    known_blocked_host_ip = IP(known_blocked_host)
+                    known_blocks[target_host_ip].append(known_blocked_host_ip)
             except (ValueError, netaddr.AddrFormatError):
                 if target_host.lower() == "all_routers":
                     known_blocks["all_routers"] = dict_blocked_hosts

From c6940ba0ea102733596268997a979eb0ef2f7d2c Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Thu, 1 Aug 2024 17:13:54 +0200
Subject: [PATCH 35/87] netsecgame. Changes for ruff

---
 env/network_security_game.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/env/network_security_game.py b/env/network_security_game.py
index 2c352982..ae116f05 100755
--- a/env/network_security_game.py
+++ b/env/network_security_game.py
@@ -106,7 +106,6 @@ def num_actions(self):
         return len(self.get_all_actions())
     
     def get_all_states(self):
-        import itertools
         def all_combs(data):
             combs = []
             for i in range(1, len(data)+1):
@@ -591,6 +590,7 @@ def _state_parts_deep_copy(self, current:components.GameState)->tuple:
         next_controlled_h = copy.deepcopy(current.controlled_hosts)
         next_services = copy.deepcopy(current.known_services)
         next_data = copy.deepcopy(current.known_data)
+        next_blocked = copy.deepcopy(current.known_blocked)
         return next_nets, next_known_h, next_controlled_h, next_services, next_data
 
     def _firewall_check(self, src_ip:components.IP, dst_ip:components.IP)->bool:

From 14d00f92e468bd30607bf5c9633fc2d0efcf69cf Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Thu, 1 Aug 2024 18:39:27 +0200
Subject: [PATCH 36/87] netsecgame. Add the blocks to the state execution
 action.

---
 env/network_security_game.py | 60 +++++++++++++++++++++---------------
 1 file changed, 36 insertions(+), 24 deletions(-)

diff --git a/env/network_security_game.py b/env/network_security_game.py
index ae116f05..28b45386 100755
--- a/env/network_security_game.py
+++ b/env/network_security_game.py
@@ -550,13 +550,14 @@ def _get_data_content(self, host_ip:str, data_id:str)->str:
             logger.debug("Data content not found because target IP does not exists.")
         return content
     
-    def _execute_action(self, current_state:components.GameState, action:components.Action, action_type='netsecenv')-> components.GameState:
+    def _execute_action(self, current_state:components.GameState, action:components.Action, agent_id, action_type='netsecenv')-> components.GameState:
         """
         Execute the action and update the values in the state
         Before this function it was checked if the action was successful
         So in here all actions were already successful.
 
         - actions_type: Define if the action is simulated in netsecenv or in the real world
+        - agent_id: is the name or type of agent that requested the action
 
         Returns: A new GameState
         """
@@ -590,8 +591,8 @@ def _state_parts_deep_copy(self, current:components.GameState)->tuple:
         next_controlled_h = copy.deepcopy(current.controlled_hosts)
         next_services = copy.deepcopy(current.known_services)
         next_data = copy.deepcopy(current.known_data)
-        next_blocked = copy.deepcopy(current.known_blocked)
-        return next_nets, next_known_h, next_controlled_h, next_services, next_data
+        next_blocked = copy.deepcopy(current.known_blocks)
+        return next_nets, next_known_h, next_controlled_h, next_services, next_data, next_blocked
 
     def _firewall_check(self, src_ip:components.IP, dst_ip:components.IP)->bool:
         """Checks if firewall allows connection from 'src_ip to ''dst_ip'"""
@@ -605,7 +606,7 @@ def _execute_scan_network_action(self, current_state:components.GameState, actio
         """
         Executes the ScanNetwork action in the environment
         """
-        next_nets, next_known_h, next_controlled_h, next_services, next_data = self._state_parts_deep_copy(current_state)
+        next_nets, next_known_h, next_controlled_h, next_services, next_data, next_blocked = self._state_parts_deep_copy(current_state)
         logger.info(f"\t\tScanning {action.parameters['target_network']}")
         if "source_host" in action.parameters.keys() and action.parameters["source_host"] in current_state.controlled_hosts:
             new_ips = set()
@@ -620,13 +621,13 @@ def _execute_scan_network_action(self, current_state:components.GameState, actio
             next_known_h = next_known_h.union(new_ips)
         else:
             logger.info(f"\t\t\t Invalid source_host:'{action.parameters['source_host']}'")
-        return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets)
+        return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
 
     def _execute_find_services_action(self, current_state:components.GameState, action:components.Action)->components.GameState:
         """
         Executes the FindServices action in the environment
         """
-        next_nets, next_known_h, next_controlled_h, next_services, next_data = self._state_parts_deep_copy(current_state)
+        next_nets, next_known_h, next_controlled_h, next_services, next_data, next_blocked = self._state_parts_deep_copy(current_state)
         logger.info(f"\t\tSearching for services in {action.parameters['target_host']}")
         if "source_host" in action.parameters.keys() and action.parameters["source_host"] in current_state.controlled_hosts:
             if self._firewall_check(action.parameters["source_host"], action.parameters['target_host']):
@@ -644,13 +645,13 @@ def _execute_find_services_action(self, current_state:components.GameState, acti
                 logger.debug(f"\t\t\tConnection {action.parameters['source_host']} -> {action.parameters['target_host']} blocked by FW. Skipping")
         else:
             logger.debug(f"\t\t\t Invalid source_host:'{action.parameters['source_host']}'")
-        return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets)
+        return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
     
     def _execute_find_data_action(self, current:components.GameState, action:components.Action)->components.GameState:
         """
         Executes the FindData action in the environment
         """
-        next_nets, next_known_h, next_controlled_h, next_services, next_data = self._state_parts_deep_copy(current)
+        next_nets, next_known_h, next_controlled_h, next_services, next_data, next_blocked = self._state_parts_deep_copy(current)
         logger.info(f"\t\tSearching for data in {action.parameters['target_host']}")
         if "source_host" in action.parameters.keys() and action.parameters["source_host"] in current.controlled_hosts:
             if self._firewall_check(action.parameters["source_host"], action.parameters['target_host']):
@@ -665,13 +666,13 @@ def _execute_find_data_action(self, current:components.GameState, action:compone
                 logger.debug(f"\t\t\tConnection {action.parameters['source_host']} -> {action.parameters['target_host']} blocked by FW. Skipping")
         else:
             logger.debug(f"\t\t\t Invalid source_host:'{action.parameters['source_host']}'")
-        return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets)
+        return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
     
     def _execute_exfiltrate_data_action(self, current_state:components.GameState, action:components.Action)->components.GameState:
         """
         Executes the ExfiltrateData action in the environment
         """
-        next_nets, next_known_h, next_controlled_h, next_services, next_data = self._state_parts_deep_copy(current_state)
+        next_nets, next_known_h, next_controlled_h, next_services, next_data, next_blocked = self._state_parts_deep_copy(current_state)
         logger.info(f"\t\tAttempting to Exfiltrate {action.parameters['data']} from {action.parameters['source_host']} to {action.parameters['target_host']}")
         # Is the target host controlled?
         if action.parameters["target_host"] in current_state.controlled_hosts:
@@ -709,13 +710,13 @@ def _execute_exfiltrate_data_action(self, current_state:components.GameState, ac
                 logger.debug("\t\t\tCan not exfiltrate. Source host is not controlled.")
         else:
             logger.debug("\t\t\tCan not exfiltrate. Target host is not controlled.")
-        return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets)
+        return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
     
     def _execute_exploit_service_action(self, current_state:components.GameState, action:components.Action)->components.GameState:
         """
         Executes the ExploitService action in the environment
         """
-        next_nets, next_known_h, next_controlled_h, next_services, next_data = self._state_parts_deep_copy(current_state)
+        next_nets, next_known_h, next_controlled_h, next_services, next_data, next_blocked = self._state_parts_deep_copy(current_state)
         # We don't check if the target is a known_host because it can be a blind attempt to attack
         logger.info(f"\t\tAttempting to ExploitService in '{action.parameters['target_host']}':'{action.parameters['target_service']}'")
         if "source_host" in action.parameters.keys() and action.parameters["source_host"] in current_state.controlled_hosts:
@@ -746,7 +747,7 @@ def _execute_exploit_service_action(self, current_state:components.GameState, ac
                 logger.debug("\t\t\tCan not exploit. Target host does not exist.")
         else:
             logger.debug(f"\t\t\t Invalid source_host:'{action.parameters['source_host']}'")
-        return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets)
+        return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
     
     def _execute_block_ip_action(self, current_state, action):
         """
@@ -760,41 +761,53 @@ def _execute_block_ip_action(self, current_state, action):
         - Check if the agent controls the source host
         - Check if the agent controls the target host
         - Add the rule to the FW list
+        - Update the state
         """
         blocked_host = action.parameters['blocked_host']
 
-        next_nets, next_known_h, next_controlled_h, next_services, next_data = self._state_parts_deep_copy(current_state)
+        next_nets, next_known_h, next_controlled_h, next_services, next_blocked, next_blocked = self._state_parts_deep_copy(current_state)
         logger.info(f"\t\tBlockIP {action.parameters['target_host']}")
+        # Is the src in the controlled hosts?
         if "source_host" in action.parameters.keys() and action.parameters["source_host"] in current_state.controlled_hosts:
+            # Is the target in the controlled hosts?
             if "target_host" in action.parameters.keys() and action.parameters["target_host"] in current_state.controlled_hosts:
                 # For now there is only one FW in the main router, but this should change in the future. 
                 # This means we ignore the 'target_host' that would be the router where this is applied.
 
-                # Stop the blocked host to connect to any other IP
+                # Stop the blocked host to connect _to_ any other IP
                 try:
                     self._firewall[blocked_host] = set()
                 except KeyError:
                     # The blocked_host host was not in the list
                     pass
-                # Stop the other hosts to connect to the blocked_host
+                # Stop the other hosts to connect _to the blocked_host_
                 for host in self._firewall.keys():
                     try:
                         self._firewall[host].remove(blocked_host)
                     except KeyError:
                         # The blocked_host host was not in the list
                         pass
+                # Update the state of blocked ips. It is a dict with key target_host and a set with blocked hosts inside
+                new_blocked = set()
+                # Store the blocked host IP in the set of blocked hosts
+                new_blocked.add(action.parameters["blocked_host"])
+                if len(new_blocked) > 0:
+                    if action.parameters["target_host"] not in next_blocked.keys():
+                        next_blocked[action.parameters["target_host"]] = new_blocked
+                    else:
+                        next_blocked[action.parameters["target_host"]] = next_blocked[action.parameters["target_host"]].union(new_blocked)
             else:
                 logger.info(f"\t\t\t Invalid target_host:'{action.parameters['target_host']}'")
         else:
             logger.info(f"\t\t\t Invalid source_host:'{action.parameters['source_host']}'")
-        return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets)
+        return components.GameState(next_controlled_h, next_known_h, next_services, next_blocked, next_nets, next_blocked)
 
 
     def _execute_scan_network_action_real_world(self, current_state:components.GameState, action:components.Action)->components.GameState:
         """
         Executes the ScanNetwork action in the the real world
         """
-        next_nets, next_known_h, next_controlled_h, next_services, next_data = self._state_parts_deep_copy(current_state)
+        next_nets, next_known_h, next_controlled_h, next_services, next_data, next_blocked = self._state_parts_deep_copy(current_state)
         logger.info(f"\t\tScanning {action.parameters['target_network']} in real world.")
         nmap_file_xml = 'nmap-result.xml'
         command = f"nmap -sn {action.parameters['target_network']} -oX {nmap_file_xml}"
@@ -826,13 +839,13 @@ def _execute_scan_network_action_real_world(self, current_state:components.GameS
             logger.debug(f"\t\t\tAdding {ip} to new_ips. {status}, {mac_address}, {vendor}")
             new_ips.add(ip)
         next_known_h = next_known_h.union(new_ips)
-        return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets)
+        return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
     
     def _execute_find_services_action_real_world(self, current_state:components.GameState, action:components.Action)->components.GameState:
         """
         Executes the FindServices action in the real world
         """
-        next_nets, next_known_h, next_controlled_h, next_services, next_data = self._state_parts_deep_copy(current_state)
+        next_nets, next_known_h, next_controlled_h, next_services, next_data, next_blocked = self._state_parts_deep_copy(current_state)
         logger.info(f"\t\tScanning ports in {action.parameters['target_host']} in real world.")
         nmap_file_xml = 'nmap-result.xml'
         command = f"nmap -sT -n {action.parameters['target_host']} -oX {nmap_file_xml}"
@@ -868,8 +881,7 @@ def _execute_find_services_action_real_world(self, current_state:components.Game
             logger.info(f"\t\tAdding {action.parameters['target_host']} to known_hosts")
             next_known_h.add(action.parameters["target_host"])
             next_nets = next_nets.union({net for net, values in self._networks.items() if action.parameters["target_host"] in values})
-    
-        return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets)
+        return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
 
     def create_state_from_view(self, view:dict, add_neighboring_nets=True)->components.GameState:
         """
@@ -1051,14 +1063,14 @@ def step(self, state:components.GameState, action:components.Action, agent_id:tu
         in: action
         out: observation of the state of the env
         """
-        logger.info(f"Agent's action: {action}")
+        logger.info(f"Agent {agent_id}. Action: {action}")
         # Reward for taking an action
         reward = self._rewards["step"]
 
         # 1. Perform the action
         self._actions_played.append(action)
         if random.random() <= action.type.default_success_p or action_type == 'realworld':
-            next_state = self._execute_action(state, action, action_type=action_type)
+            next_state = self._execute_action(state, action, agent_id, action_type=action_type)
         else:
             logger.info("\tAction NOT sucessful")
             next_state = state

From bebeba041ad0dd87f016c3ce15d27942735d1bfb Mon Sep 17 00:00:00 2001
From: Sebastian Garcia <eldraco@gmail.com>
Date: Thu, 1 Aug 2024 18:39:45 +0200
Subject: [PATCH 37/87] coord. Add the blocks to the coordinator

---
 coordinator.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/coordinator.py b/coordinator.py
index 2222aaf0..2417b19e 100644
--- a/coordinator.py
+++ b/coordinator.py
@@ -558,6 +558,7 @@ def goal_dict_satistfied(goal_dict:dict, known_dict: dict)-> bool:
         goal_reached["controlled_hosts"] = set(goal_conditions["controlled_hosts"]) <= set(state.controlled_hosts)
         goal_reached["services"] = goal_dict_satistfied(goal_conditions["known_services"], state.known_services)
         goal_reached["data"] = goal_dict_satistfied(goal_conditions["known_data"], state.known_data)
+        goal_reached["known_blocks"] = goal_dict_satistfied(goal_conditions["known_blocks"], state.known_blocks)
         self.logger.debug(f"\t{goal_reached}")
         return all(goal_reached.values())
 

From 7b78febf38d138c1f5bdcbd4074915349f9e05a2 Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Mon, 7 Oct 2024 14:50:50 +0200
Subject: [PATCH 38/87] Add termination variable to the server

---
 coordinator.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/coordinator.py b/coordinator.py
index 2417b19e..24504e1f 100644
--- a/coordinator.py
+++ b/coordinator.py
@@ -90,7 +90,14 @@ def __init__(self, actions_queue, answers_queue, max_connections):
         self.max_connections = max_connections
         self.current_connections = 0
         self.logger = logging.getLogger("AIDojo-Server")
+        self._stop = False
 
+    def close(self)->None:
+        self.logger.info(
+           "Stopping server"
+        )
+        self._stop = True
+    
     async def handle_new_agent(self, reader, writer):
         async def send_data_to_agent(writer, data: str) -> None:
             """
@@ -113,7 +120,7 @@ async def send_data_to_agent(writer, data: str) -> None:
         try:
             addr = writer.get_extra_info("peername")
             self.logger.info(f"New agent connected: {addr}")
-            while True:
+            while not self._stop:
                 data = await reader.read(500)
                 raw_message = data.decode().strip()
                 if len(raw_message):
@@ -151,6 +158,9 @@ async def send_data_to_agent(writer, data: str) -> None:
         finally:
             # Decrement the count of current connections
             self.current_connections -= 1
+            writer.close()
+            return
+            
     async def __call__(self, reader, writer):
         await self.handle_new_agent(reader, writer)
 
@@ -188,7 +198,8 @@ def __init__(self, actions_queue, answers_queue, net_sec_config, allowed_roles,
     @property
     def episode_end(self)->bool:
         # Terminate episode if at least one player wins or reaches the timeout
-        return any(self._agent_episode_ends.values())
+        self.logger.debug(f"End evaluation: {self._agent_episode_ends.values()}")
+        return all(self._agent_episode_ends.values())
     
     def convert_msg_dict_to_json(self, msg_dict)->str:
             try:
@@ -604,7 +615,7 @@ def goal_dict_satistfied(goal_dict:dict, known_dict: dict)-> bool:
         action="store",
         required=False,
         type=str,
-        default="WARNING",
+        default="INFO",
     )
 
     args = parser.parse_args()

From 5d8a92dfeceee4115716b9f59d26c3630b811260 Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Mon, 7 Oct 2024 16:26:32 +0200
Subject: [PATCH 39/87] Fix proccessing of start state and goal desription

---
 utils/utils.py | 60 +++++++++++++++++++++++++++++++++++---------------
 1 file changed, 42 insertions(+), 18 deletions(-)

diff --git a/utils/utils.py b/utils/utils.py
index 295d9a2a..eaba5497 100644
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -153,19 +153,29 @@ def read_agents_known_blocks(self, type_agent: str, type_data: str) -> dict:
         """
         known_blocks_conf = self.config["coordinator"]['agents'][type_agent][type_data]['known_blocks']
         known_blocks = {}
-        for target_host, dict_blocked_hosts in known_blocks_conf.items():
+        for target_host, block_list in known_blocks_conf.items():
             try:
-                # Check the host is a good ip
-                _ = netaddr.IPAddress(target_host)
-                target_host_ip = IP(target_host)
-                for known_blocked_host in dict_blocked_hosts.values():
-                    known_blocked_host_ip = IP(known_blocked_host)
-                    known_blocks[target_host_ip].append(known_blocked_host_ip)
-            except (ValueError, netaddr.AddrFormatError):
-                if target_host.lower() == "all_routers":
-                    known_blocks["all_routers"] = dict_blocked_hosts
-            except (ValueError):
-                known_blocks = {}
+                target_host  = IP(target_host)
+            except ValueError:
+                self.logger.error(f"Error when converting {target_host} to IP address object")
+            if isinstance(block_list,list):
+                known_blocks[target_host] = map(lambda x: IP(x), block_list)
+            elif block_list == "all_attackers":
+                known_blocks[target_host] = block_list
+            else:
+                raise ValueError(f"Unsupported value in 'known_blocks': {known_blocks_conf}")
+            # try:
+            #     # Check the host is a good ip
+            #     _ = netaddr.IPAddress(target_host)
+            #     target_host_ip = IP(target_host)
+            #     for known_blocked_host in dict_blocked_hosts.values():
+            #         known_blocked_host_ip = IP(known_blocked_host)
+            #         known_blocks[target_host_ip].append(known_blocked_host_ip)
+            # except (ValueError, netaddr.AddrFormatError):
+            #     if target_host.lower() == "all_routers":
+            #         known_blocks["all_routers"] = dict_blocked_hosts
+            # except (ValueError):
+            #     known_blocks = {}
         return known_blocks
     
     def read_agents_known_services(self, type_agent: str, type_data: str) -> dict:
@@ -218,8 +228,14 @@ def read_agents_known_hosts(self, type_agent: str, type_data: str) -> dict:
             try:
                 _ = netaddr.IPAddress(ip)
                 known_hosts.add(IP(ip))
-            except (ValueError, netaddr.AddrFormatError):
-                self.logger('Configuration problem with the known hosts')
+            except (ValueError, netaddr.AddrFormatError) as e :
+                if ip == 'random':
+                    # A random start ip was asked for
+                    known_hosts.add('random')
+                elif ip == 'all_local':
+                    known_hosts.add('all_local')
+                else:
+                    self.logger.error(f'Configuration problem with the known hosts: {e}')
         return known_hosts
 
     def read_agents_controlled_hosts(self, type_agent: str, type_data: str) -> dict:
@@ -232,12 +248,14 @@ def read_agents_controlled_hosts(self, type_agent: str, type_data: str) -> dict:
             try:
                 _ = netaddr.IPAddress(ip)
                 controlled_hosts.add(IP(ip))
-            except (ValueError, netaddr.AddrFormatError):
+            except (ValueError, netaddr.AddrFormatError) as e:
                 if ip == 'random' :
                     # A random start ip was asked for
                     controlled_hosts.add('random')
+                elif ip == 'all_local':
+                    controlled_hosts.add('all_local')
                 else:
-                    self.logger('Configuration problem with the known hosts')
+                    self.logger.error(f'Configuration problem with the controlled hosts: {e}')
         return controlled_hosts
 
     def get_player_win_conditions(self, type_of_player):
@@ -263,6 +281,9 @@ def get_player_win_conditions(self, type_of_player):
         # Goal data
         known_data = self.read_agents_known_data(type_of_player, 'goal')
 
+        # Blocks
+        known_blocks = self.read_agents_known_blocks(type_of_player, 'goal')
+
         player_goal = {}
         player_goal['known_networks'] = known_networks
         player_goal['controlled_hosts'] = controlled_hosts
@@ -307,7 +328,7 @@ def get_start_position(self, agent_role):
             case "Attacker":
                 return self.get_player_start_position(agent_role)
             case "Defender":
-                return {}
+                return self.get_player_start_position(agent_role)
             case "Benign":
                 return {
                     'known_networks': set(),
@@ -356,7 +377,10 @@ def get_goal_description(self, agent_role)->dict:
                 except KeyError:
                     description = ""
             case "Defender":
-                description = ""
+                try:
+                    description = self.config['coordinator']['agents'][agent_role]["goal"]["description"]
+                except KeyError:
+                    description = ""
             case "Benign":
                 description = ""
             case _:

From ee3cb1c1d93d359ee35690b3298d00e92fb9b0cb Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Mon, 7 Oct 2024 17:15:48 +0200
Subject: [PATCH 40/87] add support for a wild card 'all_local' in controlled
 hosts

---
 env/network_security_game.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/env/network_security_game.py b/env/network_security_game.py
index 28b45386..8de57a3d 100755
--- a/env/network_security_game.py
+++ b/env/network_security_game.py
@@ -883,6 +883,15 @@ def _execute_find_services_action_real_world(self, current_state:components.Game
             next_nets = next_nets.union({net for net, values in self._networks.items() if action.parameters["target_host"] in values})
         return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
 
+    def _get_all_local_ips(self)->set:
+        local_ips = set()
+        for net, ips in self._networks.items():
+            if netaddr.IPNetwork(str(net)).ip.is_ipv4_private_use():
+                for ip in ips:
+                    local_ips.add(self._ip_mapping[ip])
+        logger.info(f"\t\t\tLocal ips: {local_ips}")
+        return local_ips
+
     def create_state_from_view(self, view:dict, add_neighboring_nets=True)->components.GameState:
         """
         Builds a GameState from given view.
@@ -908,6 +917,10 @@ def create_state_from_view(self, view:dict, add_neighboring_nets=True)->componen
                 selected = random.choice(self.hosts_to_start)
                 controlled_hosts.add(selected)
                 logger.info(f'\t\tMaking agent start in {selected}')
+            elif host == "all_local":
+                # all local ips
+                logger.info('\t\tAdding all local hosts to agent')
+                controlled_hosts.union(self._get_all_local_ips())
             else:
                 logger.error(f"Unsupported value encountered in start_position['controlled_hosts']: {host}")
         # re-map all known based on current mapping in self._ip_mapping
@@ -1005,11 +1018,7 @@ def update_goal_descriptions(self, goal_description):
             new_description = new_description.replace(str(ip), str(self._ip_mapping[ip]))
         return new_description
     
-    # def store_trajectories_to_file(self, filename:str)->None:
-    #     if self._trajectories:
-    #         logger.info(f"Saving trajectories to '{filename}'")
-    #         with open(filename, "w") as outfile:
-    #             json.dump(self._trajectories, outfile)
+
         
     # def save_trajectories(self, trajectory_filename=None):
     #     steps = []

From 84d2ea5bbd7f8dcdfddf0f9ec122023039616244 Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Mon, 7 Oct 2024 17:18:14 +0200
Subject: [PATCH 41/87] Improve start position for defender

---
 env/netsecenv_conf.yaml | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/env/netsecenv_conf.yaml b/env/netsecenv_conf.yaml
index 66acc162..4ce2e3e3 100644
--- a/env/netsecenv_conf.yaml
+++ b/env/netsecenv_conf.yaml
@@ -72,24 +72,20 @@ coordinator:
         #known_data: {213.47.23.195: [User1,DataFromServer1], 192.168.3.1: [User3,Data3FromServer3]}
         # Example to ask a random data in a specific server. Putting 'random' in the data, forces the env to randomly choose where the goal data is
         # known_data: {213.47.23.195: [random]}
-        known_blocks: {'all_routers': 'all_attackers'}
+        known_blocks: {213.47.23.195: 'all_attackers'}
         # Example of known blocks. In the host 192.168.2.2, block all connections coming or going to 192.168.1.3
         # known_blocks: {192.168.2.2: {192.168.1.3}}
         # You can also use the wildcard string 'all_routers', and 'all_attackers', to mean that all the controlled hosts of all the attackers should be in this list in order to win
 
       start_position:
-        known_networks: [all_local]
-        known_hosts: [all_local]
-        # The attacker must always at least control the CC if the goal is to exfiltrate there
-        # Example of fixing the starting point of the agent in a local host
+        # should be empty for defender - will be extracted from controlled hosts
+        known_networks: []
+        # should be empty for defender - will be extracted from controlled hosts
+        known_hosts: []
+        # list of controlled hosts, wildard "all_local" can be used to include all local IPs
         controlled_hosts: [all_local]
-        # Example of asking a random position to start the agent
-        # controlled_hosts: [213.47.23.195, random]
-        # Services are defined as a target host where the service must be, and then a description in the form 'name,type,version,is_local'
-        known_services: {all_local}
-        # known_services: {192.168.1.3: [Local system, lanman server, 10.0.19041, False], 192.168.1.4: [Other system, SMB server, 21.2.39421, False]}
-        # Same format as before
-        known_data: {all_local}
+        known_services: {}
+        known_data: {}
         # Blocked IPs
         blocked_ips: {}
         known_blocks: {}

From 50ac5b0d059c4c2c0bd8471e233ace255f50142f Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Mon, 7 Oct 2024 17:35:36 +0200
Subject: [PATCH 42/87] Fixed error in nto returning data in
 _execute_block_action

---
 env/network_security_game.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/env/network_security_game.py b/env/network_security_game.py
index 8de57a3d..84ba0668 100755
--- a/env/network_security_game.py
+++ b/env/network_security_game.py
@@ -765,7 +765,7 @@ def _execute_block_ip_action(self, current_state, action):
         """
         blocked_host = action.parameters['blocked_host']
 
-        next_nets, next_known_h, next_controlled_h, next_services, next_blocked, next_blocked = self._state_parts_deep_copy(current_state)
+        next_nets, next_known_h, next_controlled_h, next_services, next_data, next_blocked = self._state_parts_deep_copy(current_state)
         logger.info(f"\t\tBlockIP {action.parameters['target_host']}")
         # Is the src in the controlled hosts?
         if "source_host" in action.parameters.keys() and action.parameters["source_host"] in current_state.controlled_hosts:
@@ -920,7 +920,7 @@ def create_state_from_view(self, view:dict, add_neighboring_nets=True)->componen
             elif host == "all_local":
                 # all local ips
                 logger.info('\t\tAdding all local hosts to agent')
-                controlled_hosts.union(self._get_all_local_ips())
+                controlled_hosts = controlled_hosts.union(self._get_all_local_ips())
             else:
                 logger.error(f"Unsupported value encountered in start_position['controlled_hosts']: {host}")
         # re-map all known based on current mapping in self._ip_mapping
@@ -1017,8 +1017,6 @@ def update_goal_descriptions(self, goal_description):
         for ip in self._ip_mapping:
             new_description = new_description.replace(str(ip), str(self._ip_mapping[ip]))
         return new_description
-    
-
         
     # def save_trajectories(self, trajectory_filename=None):
     #     steps = []

From 56a40a851b6dc87d4f1024119bfc9af36e94dd0c Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Mon, 7 Oct 2024 17:38:32 +0200
Subject: [PATCH 43/87] Fix return values

---
 env/network_security_game.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/env/network_security_game.py b/env/network_security_game.py
index 84ba0668..def21e6e 100755
--- a/env/network_security_game.py
+++ b/env/network_security_game.py
@@ -800,7 +800,7 @@ def _execute_block_ip_action(self, current_state, action):
                 logger.info(f"\t\t\t Invalid target_host:'{action.parameters['target_host']}'")
         else:
             logger.info(f"\t\t\t Invalid source_host:'{action.parameters['source_host']}'")
-        return components.GameState(next_controlled_h, next_known_h, next_services, next_blocked, next_nets, next_blocked)
+        return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
 
 
     def _execute_scan_network_action_real_world(self, current_state:components.GameState, action:components.Action)->components.GameState:

From 861927eefac2db735a3e9c5a0bbd4ddfd429d4ae Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Mon, 7 Oct 2024 17:43:16 +0200
Subject: [PATCH 44/87] Fix restoring GameState Object from dict

---
 env/game_components.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/env/game_components.py b/env/game_components.py
index 9c69dffe..f5352f91 100755
--- a/env/game_components.py
+++ b/env/game_components.py
@@ -420,7 +420,7 @@ def from_dict(cls, data_dict:dict):
             known_services = {IP(k):{Service(s["name"], s["type"], s["version"], s["is_local"])
                 for s in services} for k,services in data_dict["known_services"].items()},  
             known_data = {IP(k):{Data(v["owner"], v["id"]) for v in values} for k,values in data_dict["known_data"].items()},
-            known_blocks = {IP(target_host):{IP(blocked_host) for blocked_host in blocked_hosts} for target_host, blocked_hosts in data_dict["known_blocks"].items()}
+            known_blocks = {IP(target_host):{IP(blocked_host["ip"]) for blocked_host in blocked_hosts} for target_host, blocked_hosts in data_dict["known_blocks"].items()}
                 )
         return state
 

From b6c6fe263cae0f0b6500e420870b9faab222fa22 Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Mon, 7 Oct 2024 18:02:28 +0200
Subject: [PATCH 45/87] Add 'known_blocks" to tests

---
 tests/test_coordinator.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/tests/test_coordinator.py b/tests/test_coordinator.py
index ee3d9479..f3de65fd 100644
--- a/tests/test_coordinator.py
+++ b/tests/test_coordinator.py
@@ -133,7 +133,9 @@ def test_check_goal_valid(self, coordinator_init):
             known_data={
                 IP("1.1.1.1"):[Data("Joe Doe", "password", 10, "txt")]
             },
-            known_networks=[Network("1.1.1.1","24")]
+            known_networks=[Network("1.1.1.1","24")],
+            known_blocks={}
+
         )
         win_conditions = {
             "known_networks":[],
@@ -144,7 +146,8 @@ def test_check_goal_valid(self, coordinator_init):
             },
             "known_data":{
                 
-            }
+            },
+            "known_blocks":{}
         }
 
         assert coordinator_init._check_goal(game_state, win_conditions) is True
@@ -159,7 +162,8 @@ def test_check_goal_invalid(self, coordinator_init):
                 known_data={
                     IP("1.1.1.1"):[Data("Joe Doe", "password", 10, "txt")]
                 },
-                known_networks=[Network("1.1.1.1","24")]
+                known_networks=[Network("1.1.1.1","24")],
+                known_blocks={}
             )
             win_conditions = {
                 "known_networks":[],
@@ -170,7 +174,8 @@ def test_check_goal_invalid(self, coordinator_init):
                 },
                 "known_data":{
                     
-                }
+                },
+                "known_blocks":{}
             }
 
             assert coordinator_init._check_goal(game_state, win_conditions) is False
@@ -185,13 +190,15 @@ def test_check_goal_empty(self, coordinator_init):
         known_data={
             IP("1.1.1.1"):[Data("Joe Doe", "password", 10, "txt")]
         },
-        known_networks=[Network("1.1.1.1","24")]
+        known_networks=[Network("1.1.1.1","24")],
+        known_blocks={}
         )
         win_conditions = {
             "known_networks":[],
             "known_hosts":[],
             "controlled_hosts":[],
             "known_services":{},
-            "known_data":{}
+            "known_data":{},
+            "known_blocks":{}
         }
         assert coordinator_init._check_goal(game_state, win_conditions) is True
\ No newline at end of file

From 961e953089dce5ff7c58e59e214663a65bf775fe Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Tue, 8 Oct 2024 13:31:49 +0200
Subject: [PATCH 46/87] remove unused code

---
 env/network_security_game.py | 134 ++++++++++++++++-------------------
 1 file changed, 60 insertions(+), 74 deletions(-)

diff --git a/env/network_security_game.py b/env/network_security_game.py
index def21e6e..a378a41d 100755
--- a/env/network_security_game.py
+++ b/env/network_security_game.py
@@ -78,13 +78,6 @@ def __init__(self, task_config_file) -> None:
             logger.info("Dynamic change of the IP and network addresses enabled")
             self._faker_object = Faker()
             Faker.seed(seed)
-        # # read if replay buffer should be store on disc
-        # if self.task_config.get_store_replay_buffer():
-        #     logger.info("Storing of replay buffer enabled")
-        #     self._episode_replay_buffer = []
-        #     self._trajectories = []
-        # else:
-        #     logger.info("Storing of replay buffer disabled")
         self._episode_replay_buffer = None
 
         # Make a copy of data placements so it is possible to reset to it when episode ends
@@ -166,68 +159,68 @@ def get_all_actions(self):
 
         return {k:v for k,v in enumerate(actions)}
     
-    def _process_win_conditions(self, win_conditions)->dict:
-        """
-        Method which analyses win_conditions and randomizes parts if required
-        """
-        logger.info("Processing win conditions")
-        updated_win_conditions = {}
+    # def _process_win_conditions(self, win_conditions)->dict:
+    #     """
+    #     Method which analyses win_conditions and randomizes parts if required
+    #     """
+    #     logger.info("Processing win conditions")
+    #     updated_win_conditions = {}
         
-        # networks
-        if win_conditions["known_networks"] == "random":
-            updated_win_conditions["known_networks"] = {random.choice(list(self._networks.keys()))}
-            logger.info("\t\tRadnomizing known_networks")
-        else:
-            updated_win_conditions["known_networks"] = copy.deepcopy(win_conditions["known_networks"])
-        logger.info(f"\tGoal known_networks: {updated_win_conditions['known_networks']}")
-        # known_hosts
-        if win_conditions["known_hosts"] == "random":
-            logger.info("\t\tRandomizing known_host")
-            updated_win_conditions["known_hosts"] = {random.choice(list(self._ip_to_hostname.keys()))}
-        else:
-            updated_win_conditions["known_hosts"] = copy.deepcopy(win_conditions["known_hosts"])
-        logger.info(f"\tGoal known_hosts: {updated_win_conditions['known_hosts']}")
+    #     # networks
+    #     if win_conditions["known_networks"] == "random":
+    #         updated_win_conditions["known_networks"] = {random.choice(list(self._networks.keys()))}
+    #         logger.info("\t\tRadnomizing known_networks")
+    #     else:
+    #         updated_win_conditions["known_networks"] = copy.deepcopy(win_conditions["known_networks"])
+    #     logger.info(f"\tGoal known_networks: {updated_win_conditions['known_networks']}")
+    #     # known_hosts
+    #     if win_conditions["known_hosts"] == "random":
+    #         logger.info("\t\tRandomizing known_host")
+    #         updated_win_conditions["known_hosts"] = {random.choice(list(self._ip_to_hostname.keys()))}
+    #     else:
+    #         updated_win_conditions["known_hosts"] = copy.deepcopy(win_conditions["known_hosts"])
+    #     logger.info(f"\tGoal known_hosts: {updated_win_conditions['known_hosts']}")
         
-        # controlled_hosts
-        if win_conditions["controlled_hosts"] == "random":
-            logger.info("\tRandomizing controlled_hots")
-            updated_win_conditions["controlled_hosts"] = {random.choice(list(self._ip_to_hostname.keys()))}
-        else:
-            updated_win_conditions["controlled_hosts"] = copy.deepcopy(win_conditions["controlled_hosts"])
-        logger.info(f"\tGoal controlled_hosts: {updated_win_conditions['controlled_hosts']}")
+    #     # controlled_hosts
+    #     if win_conditions["controlled_hosts"] == "random":
+    #         logger.info("\tRandomizing controlled_hots")
+    #         updated_win_conditions["controlled_hosts"] = {random.choice(list(self._ip_to_hostname.keys()))}
+    #     else:
+    #         updated_win_conditions["controlled_hosts"] = copy.deepcopy(win_conditions["controlled_hosts"])
+    #     logger.info(f"\tGoal controlled_hosts: {updated_win_conditions['controlled_hosts']}")
         
-        # services
-        updated_win_conditions["known_services"] = {}
-        for host, service_list in win_conditions["known_services"].items():
-            # Was the position defined as random?
-            if isinstance(service_list, str) and service_list.lower() == "random":
-                available_services = []
-                for service in self._services[self._ip_to_hostname[host]]:
-                    available_services.append(components.Service(service.name, service.type, service.version, service.is_local))
-                logger.info(f"\tRandomizing known_services in {host}")
-                updated_win_conditions["known_services"][host] = random.choice(available_services)
-            else:
-                updated_win_conditions["known_services"][host] = copy.deepcopy(win_conditions["known_services"][host])
-        logger.info(f"\tGoal known_services: {updated_win_conditions['known_services']}")
+    #     # services
+    #     updated_win_conditions["known_services"] = {}
+    #     for host, service_list in win_conditions["known_services"].items():
+    #         # Was the position defined as random?
+    #         if isinstance(service_list, str) and service_list.lower() == "random":
+    #             available_services = []
+    #             for service in self._services[self._ip_to_hostname[host]]:
+    #                 available_services.append(components.Service(service.name, service.type, service.version, service.is_local))
+    #             logger.info(f"\tRandomizing known_services in {host}")
+    #             updated_win_conditions["known_services"][host] = random.choice(available_services)
+    #         else:
+    #             updated_win_conditions["known_services"][host] = copy.deepcopy(win_conditions["known_services"][host])
+    #     logger.info(f"\tGoal known_services: {updated_win_conditions['known_services']}")
         
-        # data
-        # prepare all available data if randomization is needed
-        available_data = set()
-        for data in self._data.values():
-            for datapoint in data:
-                available_data.add(components.Data(datapoint.owner, datapoint.id))
+    #     # data
+    #     # prepare all available data if randomization is needed
+    #     available_data = set()
+    #     for data in self._data.values():
+    #         for datapoint in data:
+    #             available_data.add(components.Data(datapoint.owner, datapoint.id))
         
-        updated_win_conditions["known_data"] = {}
-        for host, data_set in win_conditions["known_data"].items():
-            # Was random data required in this host?
-            if isinstance(data_set, str) and data_set.lower() == "random":
-                # From all available data, randomly pick the one that is going to be requested in this host
-                updated_win_conditions["known_data"][host] = {random.choice(list(available_data))}
-                logger.info(f"\tRandomizing known_data in {host}")
-            else:
-                updated_win_conditions["known_data"][host] = copy.deepcopy(win_conditions["known_data"][host])
-        logger.info(f"\tGoal known_data: {updated_win_conditions['known_data']}")
-        return updated_win_conditions
+    #     updated_win_conditions["known_data"] = {}
+    #     for host, data_set in win_conditions["known_data"].items():
+    #         # Was random data required in this host?
+    #         if isinstance(data_set, str) and data_set.lower() == "random":
+    #             # From all available data, randomly pick the one that is going to be requested in this host
+    #             updated_win_conditions["known_data"][host] = {random.choice(list(available_data))}
+    #             logger.info(f"\tRandomizing known_data in {host}")
+    #         else:
+    #             updated_win_conditions["known_data"][host] = copy.deepcopy(win_conditions["known_data"][host])
+    #     logger.info(f"\tGoal known_data: {updated_win_conditions['known_data']}")
+    #     return updated_win_conditions
 
     def _process_cyst_config(self, configuration_objects:list)-> None:
         """
@@ -802,7 +795,6 @@ def _execute_block_ip_action(self, current_state, action):
             logger.info(f"\t\t\t Invalid source_host:'{action.parameters['source_host']}'")
         return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
 
-
     def _execute_scan_network_action_real_world(self, current_state:components.GameState, action:components.Action)->components.GameState:
         """
         Executes the ScanNetwork action in the the real world
@@ -1020,17 +1012,11 @@ def update_goal_descriptions(self, goal_description):
         
     # def save_trajectories(self, trajectory_filename=None):
     #     steps = []
-    #     for state,action,reward,next_state in self._episode_replay_buffer:
+    #     for state, action, reward, next_state in self._episode_replay_buffer:
     #         steps.append({"s": state.as_dict, "a":action.as_dict, "r":reward, "s_next":next_state.as_dict})
-    #     goal_state = components.GameState(
-    #         known_hosts=self._goal_conditions["known_hosts"],
-    #         known_networks=self._goal_conditions["known_networks"],
-    #         controlled_hosts=self._goal_conditions["controlled_hosts"],
-    #         known_services=self._goal_conditions["known_services"],
-    #         known_data=self._goal_conditions["known_data"]
-    #     )
+
     #     trajectory = {
-    #         "goal": goal_state.as_dict,
+    #         # "goals": goals_dics,
     #         "end_reason":self._end_reason,
     #         "trajectory":steps
     #     }

From d34ead6d5ea8b991c6b6a49fa6e3de937a343295 Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Tue, 8 Oct 2024 13:33:08 +0200
Subject: [PATCH 47/87] More code cleanup

---
 env/network_security_game.py | 82 +-----------------------------------
 1 file changed, 1 insertion(+), 81 deletions(-)

diff --git a/env/network_security_game.py b/env/network_security_game.py
index a378a41d..66884d6b 100755
--- a/env/network_security_game.py
+++ b/env/network_security_game.py
@@ -159,69 +159,6 @@ def get_all_actions(self):
 
         return {k:v for k,v in enumerate(actions)}
     
-    # def _process_win_conditions(self, win_conditions)->dict:
-    #     """
-    #     Method which analyses win_conditions and randomizes parts if required
-    #     """
-    #     logger.info("Processing win conditions")
-    #     updated_win_conditions = {}
-        
-    #     # networks
-    #     if win_conditions["known_networks"] == "random":
-    #         updated_win_conditions["known_networks"] = {random.choice(list(self._networks.keys()))}
-    #         logger.info("\t\tRadnomizing known_networks")
-    #     else:
-    #         updated_win_conditions["known_networks"] = copy.deepcopy(win_conditions["known_networks"])
-    #     logger.info(f"\tGoal known_networks: {updated_win_conditions['known_networks']}")
-    #     # known_hosts
-    #     if win_conditions["known_hosts"] == "random":
-    #         logger.info("\t\tRandomizing known_host")
-    #         updated_win_conditions["known_hosts"] = {random.choice(list(self._ip_to_hostname.keys()))}
-    #     else:
-    #         updated_win_conditions["known_hosts"] = copy.deepcopy(win_conditions["known_hosts"])
-    #     logger.info(f"\tGoal known_hosts: {updated_win_conditions['known_hosts']}")
-        
-    #     # controlled_hosts
-    #     if win_conditions["controlled_hosts"] == "random":
-    #         logger.info("\tRandomizing controlled_hots")
-    #         updated_win_conditions["controlled_hosts"] = {random.choice(list(self._ip_to_hostname.keys()))}
-    #     else:
-    #         updated_win_conditions["controlled_hosts"] = copy.deepcopy(win_conditions["controlled_hosts"])
-    #     logger.info(f"\tGoal controlled_hosts: {updated_win_conditions['controlled_hosts']}")
-        
-    #     # services
-    #     updated_win_conditions["known_services"] = {}
-    #     for host, service_list in win_conditions["known_services"].items():
-    #         # Was the position defined as random?
-    #         if isinstance(service_list, str) and service_list.lower() == "random":
-    #             available_services = []
-    #             for service in self._services[self._ip_to_hostname[host]]:
-    #                 available_services.append(components.Service(service.name, service.type, service.version, service.is_local))
-    #             logger.info(f"\tRandomizing known_services in {host}")
-    #             updated_win_conditions["known_services"][host] = random.choice(available_services)
-    #         else:
-    #             updated_win_conditions["known_services"][host] = copy.deepcopy(win_conditions["known_services"][host])
-    #     logger.info(f"\tGoal known_services: {updated_win_conditions['known_services']}")
-        
-    #     # data
-    #     # prepare all available data if randomization is needed
-    #     available_data = set()
-    #     for data in self._data.values():
-    #         for datapoint in data:
-    #             available_data.add(components.Data(datapoint.owner, datapoint.id))
-        
-    #     updated_win_conditions["known_data"] = {}
-    #     for host, data_set in win_conditions["known_data"].items():
-    #         # Was random data required in this host?
-    #         if isinstance(data_set, str) and data_set.lower() == "random":
-    #             # From all available data, randomly pick the one that is going to be requested in this host
-    #             updated_win_conditions["known_data"][host] = {random.choice(list(available_data))}
-    #             logger.info(f"\tRandomizing known_data in {host}")
-    #         else:
-    #             updated_win_conditions["known_data"][host] = copy.deepcopy(win_conditions["known_data"][host])
-    #     logger.info(f"\tGoal known_data: {updated_win_conditions['known_data']}")
-    #     return updated_win_conditions
-
     def _process_cyst_config(self, configuration_objects:list)-> None:
         """
         Process the cyst configuration file
@@ -1009,24 +946,7 @@ def update_goal_descriptions(self, goal_description):
         for ip in self._ip_mapping:
             new_description = new_description.replace(str(ip), str(self._ip_mapping[ip]))
         return new_description
-        
-    # def save_trajectories(self, trajectory_filename=None):
-    #     steps = []
-    #     for state, action, reward, next_state in self._episode_replay_buffer:
-    #         steps.append({"s": state.as_dict, "a":action.as_dict, "r":reward, "s_next":next_state.as_dict})
-
-    #     trajectory = {
-    #         # "goals": goals_dics,
-    #         "end_reason":self._end_reason,
-    #         "trajectory":steps
-    #     }
-    #     if not trajectory_filename:
-    #         trajectory_filename = "NSG_trajectories.json"
-    #     if trajectory["end_reason"]:
-    #         self._trajectories.append(trajectory)
-    #         logger.info("Saving trajectories")
-    #         self.store_trajectories_to_file(trajectory_filename)
-    
+            
     def reset(self, trajectory_filename=None)->None: 
         """
         Function to reset the state of the game

From d9ee1a04ef0bf34b1d6a8bf4a5b3178751fef8a7 Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Tue, 8 Oct 2024 13:37:16 +0200
Subject: [PATCH 48/87] Add types to parameters and output

---
 env/network_security_game.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/env/network_security_game.py b/env/network_security_game.py
index 66884d6b..19d3ad2a 100755
--- a/env/network_security_game.py
+++ b/env/network_security_game.py
@@ -95,10 +95,10 @@ def seed(self)->int:
         return self._seed
     
     @property
-    def num_actions(self):
+    def num_actions(self)->int:
         return len(self.get_all_actions())
     
-    def get_all_states(self):
+    def get_all_states(self)->set:
         def all_combs(data):
             combs = []
             for i in range(1, len(data)+1):
@@ -125,7 +125,7 @@ def all_combs(data):
         print("\n",coms_data)
         return set(itertools.product(combs_nets, coms_known_h, coms_owned_h, coms_services, coms_data))
     
-    def get_all_actions(self):
+    def get_all_actions(self)->set:
         actions = set()
         
         # Network scans
@@ -679,7 +679,7 @@ def _execute_exploit_service_action(self, current_state:components.GameState, ac
             logger.debug(f"\t\t\t Invalid source_host:'{action.parameters['source_host']}'")
         return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
     
-    def _execute_block_ip_action(self, current_state, action):
+    def _execute_block_ip_action(self, current_state:components.GameState, action:components.Action)->components.GameState:
         """
         Executes the BlockIP action 
         - The action has BlockIP("target_host": IP object, "source_host": IP object, "blocked_host": IP object)
@@ -821,7 +821,7 @@ def _get_all_local_ips(self)->set:
         logger.info(f"\t\t\tLocal ips: {local_ips}")
         return local_ips
 
-    def create_state_from_view(self, view:dict, add_neighboring_nets=True)->components.GameState:
+    def create_state_from_view(self, view:dict, add_neighboring_nets:bool=True)->components.GameState:
         """
         Builds a GameState from given view.
         If there is a keyword 'random' used, it is replaced by a valid option at random.
@@ -890,7 +890,7 @@ def create_state_from_view(self, view:dict, add_neighboring_nets=True)->componen
         logger.info(f"Generated GameState:{game_state}")
         return game_state
 
-    def re_map_goal_dict(self, goal_dict)->dict:
+    def re_map_goal_dict(self, goal_dict:dict)->dict:
         """
         Updates goal dict based on the current values
         in self._network_mapping and self._ip_mapping.
@@ -941,13 +941,13 @@ def re_map_goal_dict(self, goal_dict)->dict:
                 new_dict["known_blocks"][host] = items
         return new_dict    
 
-    def update_goal_descriptions(self, goal_description):
+    def update_goal_descriptions(self, goal_description:str)->str:
         new_description = goal_description
         for ip in self._ip_mapping:
             new_description = new_description.replace(str(ip), str(self._ip_mapping[ip]))
         return new_description
             
-    def reset(self, trajectory_filename=None)->None: 
+    def reset(self, trajectory_filename:str=None)->None: 
         """
         Function to reset the state of the game
         and prepare for a new episode

From afde545de5bc5cf14ff5d688f9a49a5a844116e0 Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Tue, 8 Oct 2024 13:48:27 +0200
Subject: [PATCH 49/87] trajectories are stored in coordinator

---
 env/network_security_game.py | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/env/network_security_game.py b/env/network_security_game.py
index 19d3ad2a..0191c066 100755
--- a/env/network_security_game.py
+++ b/env/network_security_game.py
@@ -18,6 +18,19 @@
 # Set the logging
 logger = logging.getLogger('Netsecenv')
 
+
+class AIDojoWorld(object):
+
+
+    def step(current_state:components.GameState, action:components.Action, agent_id:tuple)-> components.GameState:
+        raise NotImplementedError
+
+    def create_state_from_view(self, view:dict, add_neighboring_nets:bool=True)->components.GameState:
+        raise NotImplementedError
+    
+    def reset()->None:
+        raise NotImplementedError
+
 class NetworkSecurityEnvironment(object):
     """
     Class to manage the whole network security game
@@ -954,11 +967,11 @@ def reset(self, trajectory_filename:str=None)->None:
         """
         # write all steps in the episode replay buffer in the file
         logger.info('--- Reseting env to its initial state ---')
-        if self._episode_replay_buffer is not None:
-            # Save trajectories to file
-            self.save_trajectories(trajectory_filename)
-            # reset the replay buffer
-            self._episode_replay_buffer = [] 
+        # if self._episode_replay_buffer is not None:
+        #     # Save trajectories to file
+        #     self.save_trajectories(trajectory_filename)
+        #     # reset the replay buffer
+        #     self._episode_replay_buffer = [] 
         # change IPs if needed
         if self.task_config.get_use_dynamic_addresses():
             self._create_new_network_mapping()

From 4a8bd6f22671e0efaf8d0311112f0e7a8ebee936 Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Tue, 8 Oct 2024 14:05:39 +0200
Subject: [PATCH 50/87] Create Base world class

---
 env/network_security_game.py | 207 +++++++++++++++++------------------
 1 file changed, 100 insertions(+), 107 deletions(-)

diff --git a/env/network_security_game.py b/env/network_security_game.py
index 0191c066..c1296be6 100755
--- a/env/network_security_game.py
+++ b/env/network_security_game.py
@@ -15,12 +15,11 @@
 import subprocess
 import xml.etree.ElementTree as ElementTree
 
-# Set the logging
-logger = logging.getLogger('Netsecenv')
-
-
 class AIDojoWorld(object):
 
+    def __init__(self, task_config_file:str, world_name:str)->None:
+        self.task_config = ConfigParser(task_config_file)
+        self.logger = logging.getLogger(world_name)
 
     def step(current_state:components.GameState, action:components.Action, agent_id:tuple)-> components.GameState:
         raise NotImplementedError
@@ -31,14 +30,15 @@ def create_state_from_view(self, view:dict, add_neighboring_nets:bool=True)->com
     def reset()->None:
         raise NotImplementedError
 
-class NetworkSecurityEnvironment(object):
+class NetworkSecurityEnvironment(AIDojoWorld):
     """
     Class to manage the whole network security game
     It uses some Cyst libraries for the network topology
     It presents a env environment to play
     """
-    def __init__(self, task_config_file) -> None:
-        logger.info("Initializing NetSetGame environment")
+    def __init__(self, task_config_file, world_name="NetSecEnv") -> None:
+        super().__init__(task_config_file, world_name)
+        self.logger.info("Initializing NetSetGame environment")
         # Prepare data structures for all environment components (to be filled in self._process_cyst_config())
         self._ip_to_hostname = {} # Mapping of `IP`:`host_name`(str) of all nodes in the environment
         self._networks = {} # A `dict` of the networks present in the environment. Keys: `Network` objects, values `set` of `IP` objects.
@@ -50,8 +50,6 @@ def __init__(self, task_config_file) -> None:
         self._exploits = {}
         # A list of all the hosts where the attacker can start in a random start
         self.hosts_to_start = []
-        # Read the conf file passed by the agent for the rest of values
-        self.task_config = ConfigParser(task_config_file)
         self._network_mapping = {}
         self._ip_mapping = {}
         # Load CYST configuration
@@ -62,11 +60,11 @@ def __init__(self, task_config_file) -> None:
         np.random.seed(seed)
         random.seed(seed)
         self._seed = seed
-        logger.info(f'Setting env seed to {seed}')
+        self.logger.info(f'Setting env seed to {seed}')
 
         # Set maximum number of steps in one episode
         self._max_steps = self.task_config.get_max_steps()
-        logger.info(f"\tSetting max steps to {self._max_steps}")
+        self.logger.info(f"\tSetting max steps to {self._max_steps}")
         
         # Set rewards for goal/detection/step
         self._rewards = {
@@ -74,7 +72,7 @@ def __init__(self, task_config_file) -> None:
             "detection": self.task_config.get_detection_reward(),
             "step": self.task_config.get_step_reward()
         }
-        logger.info(f"\tSetting rewards - {self._rewards}")
+        self.logger.info(f"\tSetting rewards - {self._rewards}")
 
         # Set the default parameters of all actionss
         # if the values of the actions were updated in the configuration file
@@ -88,7 +86,7 @@ def __init__(self, task_config_file) -> None:
         # At this point all 'random' values should be assigned to something
         # Check if dynamic network and ip adddresses are required
         if self.task_config.get_use_dynamic_addresses():
-            logger.info("Dynamic change of the IP and network addresses enabled")
+            self.logger.info("Dynamic change of the IP and network addresses enabled")
             self._faker_object = Faker()
             Faker.seed(seed)
         self._episode_replay_buffer = None
@@ -98,7 +96,7 @@ def __init__(self, task_config_file) -> None:
         self._data_content_original = copy.deepcopy(self._data_content)
         
         self._actions_played = []
-        logger.info("Environment initialization finished")
+        self.logger.info("Environment initialization finished")
 
     @property
     def seed(self)->int:
@@ -195,14 +193,14 @@ def _process_cyst_config(self, configuration_objects:list)-> None:
                 exploits.append(o)
 
         def process_node_config(node_obj:NodeConfig) -> None:
-            logger.info(f"\tProcessing config of node '{node_obj.id}'")
+            self.logger.info(f"\tProcessing config of node '{node_obj.id}'")
             #save the complete object
             node_objects[node_obj.id] = node_obj
-            logger.info(f'\t\tAdded {node_obj.id} to the list of available nodes.')
+            self.logger.info(f'\t\tAdded {node_obj.id} to the list of available nodes.')
             node_to_id[node_obj.id] = len(node_to_id)
 
             #examine interfaces
-            logger.info(f"\t\tProcessing interfaces in node '{node_obj.id}'")
+            self.logger.info(f"\t\tProcessing interfaces in node '{node_obj.id}'")
             for interface in node_obj.interfaces:
                 net_ip, net_mask = str(interface.net).split("/")
                 net = components.Network(net_ip,int(net_mask))
@@ -211,11 +209,11 @@ def process_node_config(node_obj:NodeConfig) -> None:
                 if net not in self._networks:
                     self._networks[net] = []
                 self._networks[net].append(ip)
-                logger.info(f'\t\tAdded network {str(interface.net)} to the list of available nets, with node {node_obj.id}.')
+                self.logger.info(f'\t\tAdded network {str(interface.net)} to the list of available nets, with node {node_obj.id}.')
 
 
             #services
-            logger.info(f"\t\tProcessing services & data in node '{node_obj.id}'")
+            self.logger.info(f"\t\tProcessing services & data in node '{node_obj.id}'")
             for service in node_obj.passive_services:
                 # Check if it is a candidate for random start
                 # Becareful, it will add all the IPs for this node
@@ -227,7 +225,7 @@ def process_node_config(node_obj:NodeConfig) -> None:
                     self._services[node_obj.id] = []
                 self._services[node_obj.id].append(components.Service(service.type, "passive", service.version, service.local))
                 #data
-                logger.info(f"\t\t\tProcessing data in node '{node_obj.id}':'{service.type}' service")
+                self.logger.info(f"\t\t\tProcessing data in node '{node_obj.id}':'{service.type}' service")
                 try:
                     for data in service.private_data:
                         if node_obj.id not in self._data:
@@ -241,19 +239,19 @@ def process_node_config(node_obj:NodeConfig) -> None:
                     #service does not contain any data
 
         def process_router_config(router_obj:RouterConfig)->None:
-            logger.info(f"\tProcessing config of router '{router_obj.id}'")
+            self.logger.info(f"\tProcessing config of router '{router_obj.id}'")
             # Process a router
             # Add the router to the list of nodes. This goes
             # against CYST definition. Check if we can modify it in CYST
             if router_obj.id.lower() == 'internet':
                 # Ignore the router called 'internet' because it is not a router
                 # in our network
-                logger.info("\t\tSkipping the internet router")
+                self.logger.info("\t\tSkipping the internet router")
                 return False
 
             node_objects[router_obj.id] = router_obj
             node_to_id[router_obj.id] = len(node_to_id)
-            logger.info(f"\t\tProcessing interfaces in router '{router_obj.id}'")
+            self.logger.info(f"\t\tProcessing interfaces in router '{router_obj.id}'")
             for interface in r.interfaces:
                 net_ip, net_mask = str(interface.net).split("/")
                 net = components.Network(net_ip,int(net_mask))
@@ -264,7 +262,7 @@ def process_router_config(router_obj:RouterConfig)->None:
                 self._networks[net].append(ip)
 
             #add Firewall rules
-            logger.info(f"\t\tReading FW rules in router '{router_obj.id}'")
+            self.logger.info(f"\t\tReading FW rules in router '{router_obj.id}'")
             for tp in router_obj.traffic_processors:
                 for chain in tp.chains:
                     for rule in chain.rules:
@@ -277,7 +275,7 @@ def process_firewall()->dict:
                 all_ips.update(ips)
             firewall = {ip:set() for ip in all_ips}
             if self.task_config.get_use_firewall():
-                logger.info("Firewall enabled - processing FW rules")
+                self.logger.info("Firewall enabled - processing FW rules")
                 # LOCAL NETWORKS
                 for net, ips in self._networks.items():
                     # IF net is local, allow connection between all nodes in it
@@ -302,15 +300,15 @@ def process_firewall()->dict:
                     if rule.policy == FirewallPolicy.ALLOW:
                         src_net = netaddr.IPNetwork(rule.src_net)
                         dst_net = netaddr.IPNetwork(rule.dst_net)
-                        logger.info(f"\t{rule}")
+                        self.logger.info(f"\t{rule}")
                         for src_ip in all_ips:
                             if str(src_ip) in src_net:
                                 for dst_ip in all_ips:
                                     if str(dst_ip) in dst_net:
                                         firewall[src_ip].add(dst_ip)
-                                        logger.info(f"\t\tAdding {src_ip} -> {dst_ip}")
+                                        self.logger.info(f"\t\tAdding {src_ip} -> {dst_ip}")
             else:
-                logger.info("Firewall disabled, allowing all connections")
+                self.logger.info("Firewall disabled, allowing all connections")
                 for src_ip in all_ips:
                     for dst_ip in all_ips:
                         firewall[src_ip].add(dst_ip)
@@ -326,19 +324,19 @@ def process_firewall()->dict:
         # process firewall rules
         self._firewall = process_firewall()
         
-        logger.info("\tProcessing available exploits")
+        self.logger.info("\tProcessing available exploits")
 
         #exploits
         self._exploits = exploits
         #create initial mapping
-        logger.info("\tCreating initial mapping of IPs and Networks")
+        self.logger.info("\tCreating initial mapping of IPs and Networks")
         for net in self._networks.keys():
             self._network_mapping[net] = net
-        logger.info(f"\tintitial self._network_mapping: {self._network_mapping}")
+        self.logger.info(f"\tintitial self._network_mapping: {self._network_mapping}")
         for ip in self._ip_to_hostname.keys():
             self._ip_mapping[ip] = ip
-        logger.info(f"\tintitial self._ip_mapping: {self._ip_mapping}")
-        logger.info("CYST configuration processed successfully")
+        self.logger.info(f"\tintitial self._ip_mapping: {self._ip_mapping}")
+        self.logger.info("CYST configuration processed successfully")
 
     def _create_new_network_mapping(self)->tuple:
         """ Method that generates random IP and Network addreses
@@ -381,13 +379,13 @@ def _create_new_network_mapping(self)->tuple:
                 if False not in is_private_net_checks: # verify that ALL new networks are still in the private ranges
                     valid_valid_network_mapping = True
             except IndexError as e:
-                logger.info(f"Dynamic address sampling failed, re-trying. {e}")
+                self.logger.info(f"Dynamic address sampling failed, re-trying. {e}")
                 counter_iter +=1
                 if counter_iter > 10:
-                    logger.error("Dynamic address failed more than 10 times - stopping.")
+                    self.logger.error("Dynamic address failed more than 10 times - stopping.")
                     exit(-1)
                 # Invalid IP address boundary
-        logger.info(f"New network mapping:{mapping_nets}")
+        self.logger.info(f"New network mapping:{mapping_nets}")
         
         # genereate mapping for ips:
         for net,ips in self._networks.items():
@@ -398,7 +396,7 @@ def _create_new_network_mapping(self)->tuple:
                 mapping_ips[ip] = components.IP(str(ip_list[i]))
             # Always add random, in case random is selected for ips
             mapping_ips['random'] = 'random'
-        logger.info(f"Mapping IPs done:{mapping_ips}")
+        self.logger.info(f"Mapping IPs done:{mapping_ips}")
         
         # update ALL data structure in the environment with the new mappings
         # self._networks
@@ -432,10 +430,10 @@ def _create_new_network_mapping(self)->tuple:
         #update mappings stored in the environment
         for net, mapping in self._network_mapping.items():
             self._network_mapping[net] = mapping_nets[mapping]
-        logger.debug(f"self._network_mapping: {self._network_mapping}")
+        self.logger.debug(f"self._network_mapping: {self._network_mapping}")
         for ip, mapping in self._ip_mapping.items():
             self._ip_mapping[ip] = mapping_ips[mapping]
-        logger.debug(f"self._ip_mapping: {self._ip_mapping}")
+        self.logger.debug(f"self._ip_mapping: {self._ip_mapping}")
     
     def _get_services_from_host(self, host_ip:str, controlled_hosts:set)-> set:
         """
@@ -449,9 +447,9 @@ def _get_services_from_host(self, host_ip:str, controlled_hosts:set)-> set:
                 else:
                     found_services = {s for s in self._services[self._ip_to_hostname[host_ip]] if not s.is_local}
             else:
-                logger.debug("\tServices not found because host does have any service.")
+                self.logger.debug("\tServices not found because host does have any service.")
         else:
-            logger.debug("\tServices not found because target IP does not exists.")
+            self.logger.debug("\tServices not found because target IP does not exists.")
         return found_services
 
     def _get_networks_from_host(self, host_ip)->set:
@@ -475,7 +473,7 @@ def _get_data_in_host(self, host_ip:str, controlled_hosts:set)->set:
                 if self._ip_to_hostname[host_ip] in self._data:
                     data = self._data[self._ip_to_hostname[host_ip]]
         else:
-            logger.debug("\t\t\tCan't get data in host. The host is not controlled.")
+            self.logger.debug("\t\t\tCan't get data in host. The host is not controlled.")
         return data
 
     def _get_data_content(self, host_ip:str, data_id:str)->str:
@@ -488,9 +486,9 @@ def _get_data_content(self, host_ip:str, data_id:str)->str:
             if (hostname, data_id) in self._data_content:
                 content = self._data_content[hostname,data_id]
             else:
-                logger.info(f"\tData '{data_id}' not found in host '{hostname}'({host_ip})")
+                self.logger.info(f"\tData '{data_id}' not found in host '{hostname}'({host_ip})")
         else:
-            logger.debug("Data content not found because target IP does not exists.")
+            self.logger.debug("Data content not found because target IP does not exists.")
         return content
     
     def _execute_action(self, current_state:components.GameState, action:components.Action, agent_id, action_type='netsecenv')-> components.GameState:
@@ -550,20 +548,20 @@ def _execute_scan_network_action(self, current_state:components.GameState, actio
         Executes the ScanNetwork action in the environment
         """
         next_nets, next_known_h, next_controlled_h, next_services, next_data, next_blocked = self._state_parts_deep_copy(current_state)
-        logger.info(f"\t\tScanning {action.parameters['target_network']}")
+        self.logger.info(f"\t\tScanning {action.parameters['target_network']}")
         if "source_host" in action.parameters.keys() and action.parameters["source_host"] in current_state.controlled_hosts:
             new_ips = set()
             for ip in self._ip_to_hostname.keys(): #check if IP exists
-                logger.debug(f"\t\tChecking if {ip} in {action.parameters['target_network']}")
+                self.logger.debug(f"\t\tChecking if {ip} in {action.parameters['target_network']}")
                 if str(ip) in netaddr.IPNetwork(str(action.parameters["target_network"])):
                     if self._firewall_check(action.parameters["source_host"], ip):
-                        logger.debug(f"\t\t\tAdding {ip} to new_ips")
+                        self.logger.debug(f"\t\t\tAdding {ip} to new_ips")
                         new_ips.add(ip)
                     else:
-                        logger.debug(f"\t\t\tConnection {action.parameters['source_host']} -> {ip} blocked by FW. Skipping")
+                        self.logger.debug(f"\t\t\tConnection {action.parameters['source_host']} -> {ip} blocked by FW. Skipping")
             next_known_h = next_known_h.union(new_ips)
         else:
-            logger.info(f"\t\t\t Invalid source_host:'{action.parameters['source_host']}'")
+            self.logger.info(f"\t\t\t Invalid source_host:'{action.parameters['source_host']}'")
         return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
 
     def _execute_find_services_action(self, current_state:components.GameState, action:components.Action)->components.GameState:
@@ -571,23 +569,23 @@ def _execute_find_services_action(self, current_state:components.GameState, acti
         Executes the FindServices action in the environment
         """
         next_nets, next_known_h, next_controlled_h, next_services, next_data, next_blocked = self._state_parts_deep_copy(current_state)
-        logger.info(f"\t\tSearching for services in {action.parameters['target_host']}")
+        self.logger.info(f"\t\tSearching for services in {action.parameters['target_host']}")
         if "source_host" in action.parameters.keys() and action.parameters["source_host"] in current_state.controlled_hosts:
             if self._firewall_check(action.parameters["source_host"], action.parameters['target_host']):
                 found_services = self._get_services_from_host(action.parameters["target_host"], current_state.controlled_hosts)
-                logger.debug(f"\t\t\tFound {len(found_services)}: {found_services}")
+                self.logger.debug(f"\t\t\tFound {len(found_services)}: {found_services}")
                 if len(found_services) > 0:
                     next_services[action.parameters["target_host"]] = found_services
 
                     #if host was not known, add it to the known_hosts ONLY if there are some found services
                     if action.parameters["target_host"] not in next_known_h:
-                        logger.debug(f"\t\tAdding {action.parameters['target_host']} to known_hosts")
+                        self.logger.debug(f"\t\tAdding {action.parameters['target_host']} to known_hosts")
                         next_known_h.add(action.parameters["target_host"])
                         next_nets = next_nets.union({net for net, values in self._networks.items() if action.parameters["target_host"] in values})
             else:
-                logger.debug(f"\t\t\tConnection {action.parameters['source_host']} -> {action.parameters['target_host']} blocked by FW. Skipping")
+                self.logger.debug(f"\t\t\tConnection {action.parameters['source_host']} -> {action.parameters['target_host']} blocked by FW. Skipping")
         else:
-            logger.debug(f"\t\t\t Invalid source_host:'{action.parameters['source_host']}'")
+            self.logger.debug(f"\t\t\t Invalid source_host:'{action.parameters['source_host']}'")
         return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
     
     def _execute_find_data_action(self, current:components.GameState, action:components.Action)->components.GameState:
@@ -595,20 +593,20 @@ def _execute_find_data_action(self, current:components.GameState, action:compone
         Executes the FindData action in the environment
         """
         next_nets, next_known_h, next_controlled_h, next_services, next_data, next_blocked = self._state_parts_deep_copy(current)
-        logger.info(f"\t\tSearching for data in {action.parameters['target_host']}")
+        self.logger.info(f"\t\tSearching for data in {action.parameters['target_host']}")
         if "source_host" in action.parameters.keys() and action.parameters["source_host"] in current.controlled_hosts:
             if self._firewall_check(action.parameters["source_host"], action.parameters['target_host']):
                 new_data = self._get_data_in_host(action.parameters["target_host"], current.controlled_hosts)
-                logger.debug(f"\t\t\t Found {len(new_data)}: {new_data}")
+                self.logger.debug(f"\t\t\t Found {len(new_data)}: {new_data}")
                 if len(new_data) > 0:
                     if action.parameters["target_host"] not in next_data.keys():
                         next_data[action.parameters["target_host"]] = new_data
                     else:
                         next_data[action.parameters["target_host"]] = next_data[action.parameters["target_host"]].union(new_data)
             else:
-                logger.debug(f"\t\t\tConnection {action.parameters['source_host']} -> {action.parameters['target_host']} blocked by FW. Skipping")
+                self.logger.debug(f"\t\t\tConnection {action.parameters['source_host']} -> {action.parameters['target_host']} blocked by FW. Skipping")
         else:
-            logger.debug(f"\t\t\t Invalid source_host:'{action.parameters['source_host']}'")
+            self.logger.debug(f"\t\t\t Invalid source_host:'{action.parameters['source_host']}'")
         return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
     
     def _execute_exfiltrate_data_action(self, current_state:components.GameState, action:components.Action)->components.GameState:
@@ -616,13 +614,13 @@ def _execute_exfiltrate_data_action(self, current_state:components.GameState, ac
         Executes the ExfiltrateData action in the environment
         """
         next_nets, next_known_h, next_controlled_h, next_services, next_data, next_blocked = self._state_parts_deep_copy(current_state)
-        logger.info(f"\t\tAttempting to Exfiltrate {action.parameters['data']} from {action.parameters['source_host']} to {action.parameters['target_host']}")
+        self.logger.info(f"\t\tAttempting to Exfiltrate {action.parameters['data']} from {action.parameters['source_host']} to {action.parameters['target_host']}")
         # Is the target host controlled?
         if action.parameters["target_host"] in current_state.controlled_hosts:
-            logger.debug(f"\t\t\t {action.parameters['target_host']} is under-control: {current_state.controlled_hosts}")
+            self.logger.debug(f"\t\t\t {action.parameters['target_host']} is under-control: {current_state.controlled_hosts}")
             # Is the source host controlled?
             if action.parameters["source_host"] in current_state.controlled_hosts:
-                logger.debug(f"\t\t\t {action.parameters['source_host']} is under-control: {current_state.controlled_hosts}")
+                self.logger.debug(f"\t\t\t {action.parameters['source_host']} is under-control: {current_state.controlled_hosts}")
                 # Is the source host in the list of hosts we know data from? (this is to avoid the keyerror later in the if)
                 # Does the current state for THIS source already know about this data?
                 if self._firewall_check(action.parameters["source_host"], action.parameters['target_host']):
@@ -631,7 +629,7 @@ def _execute_exfiltrate_data_action(self, current_state:components.GameState, ac
                         if self._ip_to_hostname[action.parameters["source_host"]] in self._data.keys():
                             # Does the source host have this data?
                             if action.parameters["data"] in self._data[self._ip_to_hostname[action.parameters["source_host"]]]:
-                                logger.debug("\t\t\t Data present in the source_host")
+                                self.logger.debug("\t\t\t Data present in the source_host")
                                 if action.parameters["target_host"] not in next_data.keys():
                                     next_data[action.parameters["target_host"]] = {action.parameters["data"]}
                                 else:
@@ -642,17 +640,17 @@ def _execute_exfiltrate_data_action(self, current_state:components.GameState, ac
                                 else:
                                     self._data[self._ip_to_hostname[action.parameters["target_host"]]].add(action.parameters["data"])
                             else:
-                                logger.debug("\t\t\tCan not exfiltrate. Source host does not have this data.")
+                                self.logger.debug("\t\t\tCan not exfiltrate. Source host does not have this data.")
                         else:
-                            logger.debug("\t\t\tCan not exfiltrate. Source host does not have any data.")
+                            self.logger.debug("\t\t\tCan not exfiltrate. Source host does not have any data.")
                     else:
-                        logger.debug("\t\t\tCan not exfiltrate. Agent did not find this data yet.")
+                        self.logger.debug("\t\t\tCan not exfiltrate. Agent did not find this data yet.")
                 else:
-                    logger.debug(f"\t\t\tConnection {action.parameters['source_host']} -> {action.parameters['target_host']} blocked by FW. Skipping")
+                    self.logger.debug(f"\t\t\tConnection {action.parameters['source_host']} -> {action.parameters['target_host']} blocked by FW. Skipping")
             else:
-                logger.debug("\t\t\tCan not exfiltrate. Source host is not controlled.")
+                self.logger.debug("\t\t\tCan not exfiltrate. Source host is not controlled.")
         else:
-            logger.debug("\t\t\tCan not exfiltrate. Target host is not controlled.")
+            self.logger.debug("\t\t\tCan not exfiltrate. Target host is not controlled.")
         return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
     
     def _execute_exploit_service_action(self, current_state:components.GameState, action:components.Action)->components.GameState:
@@ -661,7 +659,7 @@ def _execute_exploit_service_action(self, current_state:components.GameState, ac
         """
         next_nets, next_known_h, next_controlled_h, next_services, next_data, next_blocked = self._state_parts_deep_copy(current_state)
         # We don't check if the target is a known_host because it can be a blind attempt to attack
-        logger.info(f"\t\tAttempting to ExploitService in '{action.parameters['target_host']}':'{action.parameters['target_service']}'")
+        self.logger.info(f"\t\tAttempting to ExploitService in '{action.parameters['target_host']}':'{action.parameters['target_service']}'")
         if "source_host" in action.parameters.keys() and action.parameters["source_host"] in current_state.controlled_hosts:
             if action.parameters["target_host"] in self._ip_to_hostname: #is it existing IP?
                 if self._firewall_check(action.parameters["source_host"], action.parameters['target_host']):
@@ -669,27 +667,27 @@ def _execute_exploit_service_action(self, current_state:components.GameState, ac
                         if action.parameters["target_service"] in self._services[self._ip_to_hostname[action.parameters["target_host"]]]: #does it have the service in question?
                             if action.parameters["target_host"] in next_services: #does the agent know about any services this host have?
                                 if action.parameters["target_service"] in next_services[action.parameters["target_host"]]:
-                                    logger.debug("\t\t\tValid service")
+                                    self.logger.debug("\t\t\tValid service")
                                     if action.parameters["target_host"] not in next_controlled_h:
                                         next_controlled_h.add(action.parameters["target_host"])
-                                        logger.debug("\t\tAdding to controlled_hosts")
+                                        self.logger.debug("\t\tAdding to controlled_hosts")
                                     new_networks = self._get_networks_from_host(action.parameters["target_host"])
-                                    logger.debug(f"\t\t\tFound {len(new_networks)}: {new_networks}")
+                                    self.logger.debug(f"\t\t\tFound {len(new_networks)}: {new_networks}")
                                     next_nets = next_nets.union(new_networks)
                                 else:
-                                    logger.debug("\t\t\tCan not exploit. Agent does not know about target host selected service")
+                                    self.logger.debug("\t\t\tCan not exploit. Agent does not know about target host selected service")
                             else:
-                                logger.debug("\t\t\tCan not exploit. Agent does not know about target host having any service")
+                                self.logger.debug("\t\t\tCan not exploit. Agent does not know about target host having any service")
                         else:
-                            logger.debug("\t\t\tCan not exploit. Target host does not the service that was attempted.")
+                            self.logger.debug("\t\t\tCan not exploit. Target host does not the service that was attempted.")
                     else:
-                        logger.debug("\t\t\tCan not exploit. Target host does not have any services.")
+                        self.logger.debug("\t\t\tCan not exploit. Target host does not have any services.")
                 else:
-                    logger.debug(f"\t\t\tConnection {action.parameters['source_host']} -> {action.parameters['target_host']} blocked by FW. Skipping")
+                    self.logger.debug(f"\t\t\tConnection {action.parameters['source_host']} -> {action.parameters['target_host']} blocked by FW. Skipping")
             else:
-                logger.debug("\t\t\tCan not exploit. Target host does not exist.")
+                self.logger.debug("\t\t\tCan not exploit. Target host does not exist.")
         else:
-            logger.debug(f"\t\t\t Invalid source_host:'{action.parameters['source_host']}'")
+            self.logger.debug(f"\t\t\t Invalid source_host:'{action.parameters['source_host']}'")
         return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
     
     def _execute_block_ip_action(self, current_state:components.GameState, action:components.Action)->components.GameState:
@@ -709,7 +707,7 @@ def _execute_block_ip_action(self, current_state:components.GameState, action:co
         blocked_host = action.parameters['blocked_host']
 
         next_nets, next_known_h, next_controlled_h, next_services, next_data, next_blocked = self._state_parts_deep_copy(current_state)
-        logger.info(f"\t\tBlockIP {action.parameters['target_host']}")
+        self.logger.info(f"\t\tBlockIP {action.parameters['target_host']}")
         # Is the src in the controlled hosts?
         if "source_host" in action.parameters.keys() and action.parameters["source_host"] in current_state.controlled_hosts:
             # Is the target in the controlled hosts?
@@ -740,9 +738,9 @@ def _execute_block_ip_action(self, current_state:components.GameState, action:co
                     else:
                         next_blocked[action.parameters["target_host"]] = next_blocked[action.parameters["target_host"]].union(new_blocked)
             else:
-                logger.info(f"\t\t\t Invalid target_host:'{action.parameters['target_host']}'")
+                self.logger.info(f"\t\t\t Invalid target_host:'{action.parameters['target_host']}'")
         else:
-            logger.info(f"\t\t\t Invalid source_host:'{action.parameters['source_host']}'")
+            self.logger.info(f"\t\t\t Invalid source_host:'{action.parameters['source_host']}'")
         return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
 
     def _execute_scan_network_action_real_world(self, current_state:components.GameState, action:components.Action)->components.GameState:
@@ -750,7 +748,7 @@ def _execute_scan_network_action_real_world(self, current_state:components.GameS
         Executes the ScanNetwork action in the the real world
         """
         next_nets, next_known_h, next_controlled_h, next_services, next_data, next_blocked = self._state_parts_deep_copy(current_state)
-        logger.info(f"\t\tScanning {action.parameters['target_network']} in real world.")
+        self.logger.info(f"\t\tScanning {action.parameters['target_network']} in real world.")
         nmap_file_xml = 'nmap-result.xml'
         command = f"nmap -sn {action.parameters['target_network']} -oX {nmap_file_xml}"
         _ = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, text=True)
@@ -778,7 +776,7 @@ def _execute_scan_network_action_real_world(self, current_state:components.GameS
                 mac_address = ""
                 vendor = ""
 
-            logger.debug(f"\t\t\tAdding {ip} to new_ips. {status}, {mac_address}, {vendor}")
+            self.logger.debug(f"\t\t\tAdding {ip} to new_ips. {status}, {mac_address}, {vendor}")
             new_ips.add(ip)
         next_known_h = next_known_h.union(new_ips)
         return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
@@ -788,7 +786,7 @@ def _execute_find_services_action_real_world(self, current_state:components.Game
         Executes the FindServices action in the real world
         """
         next_nets, next_known_h, next_controlled_h, next_services, next_data, next_blocked = self._state_parts_deep_copy(current_state)
-        logger.info(f"\t\tScanning ports in {action.parameters['target_host']} in real world.")
+        self.logger.info(f"\t\tScanning ports in {action.parameters['target_host']} in real world.")
         nmap_file_xml = 'nmap-result.xml'
         command = f"nmap -sT -n {action.parameters['target_host']} -oX {nmap_file_xml}"
         _ = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, text=True)
@@ -820,7 +818,7 @@ def _execute_find_services_action_real_world(self, current_state:components.Game
         
         # If host was not known, add it to the known_hosts and known_networks ONLY if there are some found services
         if action.parameters["target_host"] not in next_known_h:
-            logger.info(f"\t\tAdding {action.parameters['target_host']} to known_hosts")
+            self.logger.info(f"\t\tAdding {action.parameters['target_host']} to known_hosts")
             next_known_h.add(action.parameters["target_host"])
             next_nets = next_nets.union({net for net, values in self._networks.items() if action.parameters["target_host"] in values})
         return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
@@ -831,7 +829,7 @@ def _get_all_local_ips(self)->set:
             if netaddr.IPNetwork(str(net)).ip.is_ipv4_private_use():
                 for ip in ips:
                     local_ips.add(self._ip_mapping[ip])
-        logger.info(f"\t\t\tLocal ips: {local_ips}")
+        self.logger.info(f"\t\t\tLocal ips: {local_ips}")
         return local_ips
 
     def create_state_from_view(self, view:dict, add_neighboring_nets:bool=True)->components.GameState:
@@ -841,7 +839,7 @@ def create_state_from_view(self, view:dict, add_neighboring_nets:bool=True)->com
 
         Currently, we artificially extend the knonw_networks with +- 1 in the third octet.
         """
-        logger.info(f'Generating state from view:{view}')
+        self.logger.info(f'Generating state from view:{view}')
         # re-map all networks based on current mapping in self._network_mapping
         known_networks = set([self._network_mapping[net] for net in  view["known_networks"]])
         
@@ -851,20 +849,20 @@ def create_state_from_view(self, view:dict, add_neighboring_nets:bool=True)->com
         for host in view['controlled_hosts']:
             if isinstance(host, components.IP):
                 controlled_hosts.add(self._ip_mapping[host])
-                logger.info(f'\tThe attacker has control of host {self._ip_mapping[host]}.')
+                self.logger.info(f'\tThe attacker has control of host {self._ip_mapping[host]}.')
             elif host == 'random':
                 # Random start
-                logger.info('\tAdding random starting position of agent')
-                logger.info(f'\t\tChoosing from {self.hosts_to_start}')
+                self.logger.info('\tAdding random starting position of agent')
+                self.logger.info(f'\t\tChoosing from {self.hosts_to_start}')
                 selected = random.choice(self.hosts_to_start)
                 controlled_hosts.add(selected)
-                logger.info(f'\t\tMaking agent start in {selected}')
+                self.logger.info(f'\t\tMaking agent start in {selected}')
             elif host == "all_local":
                 # all local ips
-                logger.info('\t\tAdding all local hosts to agent')
+                self.logger.info('\t\tAdding all local hosts to agent')
                 controlled_hosts = controlled_hosts.union(self._get_all_local_ips())
             else:
-                logger.error(f"Unsupported value encountered in start_position['controlled_hosts']: {host}")
+                self.logger.error(f"Unsupported value encountered in start_position['controlled_hosts']: {host}")
         # re-map all known based on current mapping in self._ip_mapping
         known_hosts = set([self._ip_mapping[ip] for ip in view["known_hosts"]])
         # Add all controlled hosts to known_hosts
@@ -884,12 +882,12 @@ def create_state_from_view(self, view:dict, add_neighboring_nets:bool=True)->com
                         net_obj.value += 256
                         if net_obj.ip.is_ipv4_private_use():
                             ip = components.Network(str(net_obj.ip), net_obj.prefixlen)
-                            logger.info(f'\tAdding {ip} to agent')
+                            self.logger.info(f'\tAdding {ip} to agent')
                             known_networks.add(ip)
                         net_obj.value -= 2*256
                         if net_obj.ip.is_ipv4_private_use():
                             ip = components.Network(str(net_obj.ip), net_obj.prefixlen)
-                            logger.info(f'\tAdding {ip} to agent')
+                            self.logger.info(f'\tAdding {ip} to agent')
                             known_networks.add(ip)
                         #return value back to the original
                         net_obj.value += 256
@@ -900,7 +898,7 @@ def create_state_from_view(self, view:dict, add_neighboring_nets:bool=True)->com
         for ip, data_list in view["known_data"]:
             known_data[self._ip_mapping[ip]] = data_list
         game_state = components.GameState(controlled_hosts, known_hosts, known_services, known_data, known_networks)
-        logger.info(f"Generated GameState:{game_state}")
+        self.logger.info(f"Generated GameState:{game_state}")
         return game_state
 
     def re_map_goal_dict(self, goal_dict:dict)->dict:
@@ -960,18 +958,13 @@ def update_goal_descriptions(self, goal_description:str)->str:
             new_description = new_description.replace(str(ip), str(self._ip_mapping[ip]))
         return new_description
             
-    def reset(self, trajectory_filename:str=None)->None: 
+    def reset(self)->None: 
         """
         Function to reset the state of the game
         and prepare for a new episode
         """
         # write all steps in the episode replay buffer in the file
-        logger.info('--- Reseting env to its initial state ---')
-        # if self._episode_replay_buffer is not None:
-        #     # Save trajectories to file
-        #     self.save_trajectories(trajectory_filename)
-        #     # reset the replay buffer
-        #     self._episode_replay_buffer = [] 
+        self.logger.info('--- Reseting env to its initial state ---')
         # change IPs if needed
         if self.task_config.get_use_dynamic_addresses():
             self._create_new_network_mapping()
@@ -989,7 +982,7 @@ def step(self, state:components.GameState, action:components.Action, agent_id:tu
         in: action
         out: observation of the state of the env
         """
-        logger.info(f"Agent {agent_id}. Action: {action}")
+        self.logger.info(f"Agent {agent_id}. Action: {action}")
         # Reward for taking an action
         reward = self._rewards["step"]
 
@@ -998,13 +991,13 @@ def step(self, state:components.GameState, action:components.Action, agent_id:tu
         if random.random() <= action.type.default_success_p or action_type == 'realworld':
             next_state = self._execute_action(state, action, agent_id, action_type=action_type)
         else:
-            logger.info("\tAction NOT sucessful")
+            self.logger.info("\tAction NOT sucessful")
             next_state = state
 
         
         # Make the state we just got into, our current state
         current_state = state
-        logger.info(f'New state: {next_state} ')
+        self.logger.info(f'New state: {next_state} ')
 
 
         # Save the transition to the episode replay buffer if there is any

From 3f15fddd0f654e3ec1ea0e66fe2d10ed958acb5d Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Tue, 8 Oct 2024 17:03:30 +0200
Subject: [PATCH 51/87] create general class for the AIDojo world

---
 coordinator.py               |  8 +++++---
 env/network_security_game.py | 24 +++++++++++++++++++++++-
 2 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/coordinator.py b/coordinator.py
index 24504e1f..08daf529 100644
--- a/coordinator.py
+++ b/coordinator.py
@@ -164,15 +164,17 @@ async def send_data_to_agent(writer, data: str) -> None:
     async def __call__(self, reader, writer):
         await self.handle_new_agent(reader, writer)
 
-
 class Coordinator:
     def __init__(self, actions_queue, answers_queue, net_sec_config, allowed_roles, world_type="netsecenv"):
+        # communication channels for asyncio
         self._actions_queue = actions_queue
         self._answers_queue = answers_queue
         self.ALLOWED_ROLES = allowed_roles
         self.logger = logging.getLogger("AIDojo-Coordinator")
+        # world definition
         self._world = NetworkSecurityEnvironment(net_sec_config)
         self.world_type = world_type
+        #  
         self._starting_positions_per_role = self._get_starting_position_per_role()
         self._win_conditions_per_role = self._get_win_condition_per_role()
         self._goal_description_per_role = self._get_goal_description_per_role()
@@ -335,7 +337,7 @@ def _get_win_condition_per_role(self)-> dict:
         win_conditions = {}
         for agent_role in self.ALLOWED_ROLES:
             try:
-                win_conditions[agent_role] = self._world.re_map_goal_dict(
+                win_conditions[agent_role] = self._world.update_goal_dict(
                     self._world.task_config.get_win_conditions(agent_role=agent_role)
                 )
             except KeyError:
@@ -533,7 +535,7 @@ def _goal_reached(self, agent_addr:tuple)->bool:
         self.logger.info(f"Goal check for {agent_addr}({self.agents[agent_addr][1]})")
         agents_state = self._agent_states[agent_addr]
         agent_role = self.agents[agent_addr][1]
-        win_condition = self._world.re_map_goal_dict(self._win_conditions_per_role[agent_role])
+        win_condition = self._world.update_goal_dict(self._win_conditions_per_role[agent_role])
         goal_check = self._check_goal(agents_state, win_condition)
         if goal_check:
             self.logger.info("\tGoal reached!")
diff --git a/env/network_security_game.py b/env/network_security_game.py
index c1296be6..078e8d6d 100755
--- a/env/network_security_game.py
+++ b/env/network_security_game.py
@@ -22,14 +22,36 @@ def __init__(self, task_config_file:str, world_name:str)->None:
         self.logger = logging.getLogger(world_name)
 
     def step(current_state:components.GameState, action:components.Action, agent_id:tuple)-> components.GameState:
+        """
+        Executes given action in a current state of the environment and produces new GameState.
+        """
         raise NotImplementedError
 
     def create_state_from_view(self, view:dict, add_neighboring_nets:bool=True)->components.GameState:
+        """
+        Produces a GameState based on the view of the world.
+        """
         raise NotImplementedError
     
     def reset()->None:
+        """
+        Resets the world to its initial state.
+        """
         raise NotImplementedError
 
+    def update_goal_descriptions(self, goal_description:str)->str:
+       """
+       Takes the existing goal description (text) and updates it with respect to the world.
+       """
+       raise NotImplementedError
+    
+    def update_goal_dict(self, goal_dict:dict)->dict:
+        """
+        Takes the existing goal dict and updates it with respect to the world.
+        """
+        raise NotImplementedError
+    
+
 class NetworkSecurityEnvironment(AIDojoWorld):
     """
     Class to manage the whole network security game
@@ -901,7 +923,7 @@ def create_state_from_view(self, view:dict, add_neighboring_nets:bool=True)->com
         self.logger.info(f"Generated GameState:{game_state}")
         return game_state
 
-    def re_map_goal_dict(self, goal_dict:dict)->dict:
+    def update_goal_dict(self, goal_dict:dict)->dict:
         """
         Updates goal dict based on the current values
         in self._network_mapping and self._ip_mapping.

From 08c472d5dac09ecdcfde535f46739957d456fc7a Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Tue, 8 Oct 2024 17:09:25 +0200
Subject: [PATCH 52/87] Move aidojo_world to separate file

---
 env/aidojo_world.py          | 41 ++++++++++++++++++++++++++++++++++++
 env/network_security_game.py | 40 ++---------------------------------
 2 files changed, 43 insertions(+), 38 deletions(-)
 create mode 100644 env/aidojo_world.py

diff --git a/env/aidojo_world.py b/env/aidojo_world.py
new file mode 100644
index 00000000..64d175ce
--- /dev/null
+++ b/env/aidojo_world.py
@@ -0,0 +1,41 @@
+# Author Ondrej Lukas - ondrej.lukas@aic.fel.cvut.cz
+# Template of world to be used in AI Dojo
+import env.game_components as components
+import logging
+from utils.utils import ConfigParser
+
+class AIDojoWorld(object):
+
+    def __init__(self, task_config_file:str, world_name:str)->None:
+        self.task_config = ConfigParser(task_config_file)
+        self.logger = logging.getLogger(world_name)
+
+    def step(current_state:components.GameState, action:components.Action, agent_id:tuple)-> components.GameState:
+        """
+        Executes given action in a current state of the environment and produces new GameState.
+        """
+        raise NotImplementedError
+
+    def create_state_from_view(self, view:dict, add_neighboring_nets:bool=True)->components.GameState:
+        """
+        Produces a GameState based on the view of the world.
+        """
+        raise NotImplementedError
+    
+    def reset()->None:
+        """
+        Resets the world to its initial state.
+        """
+        raise NotImplementedError
+
+    def update_goal_descriptions(self, goal_description:str)->str:
+       """
+       Takes the existing goal description (text) and updates it with respect to the world.
+       """
+       raise NotImplementedError
+    
+    def update_goal_dict(self, goal_dict:dict)->dict:
+        """
+        Takes the existing goal dict and updates it with respect to the world.
+        """
+        raise NotImplementedError
\ No newline at end of file
diff --git a/env/network_security_game.py b/env/network_security_game.py
index 078e8d6d..621358a5 100755
--- a/env/network_security_game.py
+++ b/env/network_security_game.py
@@ -9,48 +9,12 @@
 import copy
 from cyst.api.configuration import NodeConfig, RouterConfig, ConnectionConfig, ExploitConfig, FirewallPolicy
 import numpy as np
-import logging
 from faker import Faker
-from utils.utils import ConfigParser
+from env.aidojo_world import AIDojoWorld
 import subprocess
 import xml.etree.ElementTree as ElementTree
 
-class AIDojoWorld(object):
-
-    def __init__(self, task_config_file:str, world_name:str)->None:
-        self.task_config = ConfigParser(task_config_file)
-        self.logger = logging.getLogger(world_name)
-
-    def step(current_state:components.GameState, action:components.Action, agent_id:tuple)-> components.GameState:
-        """
-        Executes given action in a current state of the environment and produces new GameState.
-        """
-        raise NotImplementedError
-
-    def create_state_from_view(self, view:dict, add_neighboring_nets:bool=True)->components.GameState:
-        """
-        Produces a GameState based on the view of the world.
-        """
-        raise NotImplementedError
-    
-    def reset()->None:
-        """
-        Resets the world to its initial state.
-        """
-        raise NotImplementedError
-
-    def update_goal_descriptions(self, goal_description:str)->str:
-       """
-       Takes the existing goal description (text) and updates it with respect to the world.
-       """
-       raise NotImplementedError
-    
-    def update_goal_dict(self, goal_dict:dict)->dict:
-        """
-        Takes the existing goal dict and updates it with respect to the world.
-        """
-        raise NotImplementedError
-    
+ 
 
 class NetworkSecurityEnvironment(AIDojoWorld):
     """

From 6a423320adeffda744299eb187eaf2ea722bbb1d Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Tue, 8 Oct 2024 17:11:26 +0200
Subject: [PATCH 53/87] Add comments

---
 env/aidojo_world.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/env/aidojo_world.py b/env/aidojo_world.py
index 64d175ce..5777df3f 100644
--- a/env/aidojo_world.py
+++ b/env/aidojo_world.py
@@ -4,8 +4,12 @@
 import logging
 from utils.utils import ConfigParser
 
+"""
+Basic class for worlds to be used in the AI Dojo.
+Every world (environment) used in AI Dojo should extend this class and implement
+all its methods to be compatible with the game server and game coordinator.
+"""
 class AIDojoWorld(object):
-
     def __init__(self, task_config_file:str, world_name:str)->None:
         self.task_config = ConfigParser(task_config_file)
         self.logger = logging.getLogger(world_name)

From d22ad68010cc89a54bb5c76ab7fcfd3f5c4ac824 Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Tue, 8 Oct 2024 17:19:08 +0200
Subject: [PATCH 54/87] Add default value to world_name argument

---
 env/aidojo_world.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/env/aidojo_world.py b/env/aidojo_world.py
index 5777df3f..7ae13fe6 100644
--- a/env/aidojo_world.py
+++ b/env/aidojo_world.py
@@ -10,7 +10,7 @@
 all its methods to be compatible with the game server and game coordinator.
 """
 class AIDojoWorld(object):
-    def __init__(self, task_config_file:str, world_name:str)->None:
+    def __init__(self, task_config_file:str, world_name:str="BasicAIDojoWorld")->None:
         self.task_config = ConfigParser(task_config_file)
         self.logger = logging.getLogger(world_name)
 

From c6cbd305063a87c8be6117b946b8fb50857a7e84 Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Tue, 8 Oct 2024 17:19:44 +0200
Subject: [PATCH 55/87] Use world_type parameter to determine which environment
 to instantiate

---
 coordinator.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/coordinator.py b/coordinator.py
index 08daf529..762d233c 100644
--- a/coordinator.py
+++ b/coordinator.py
@@ -9,6 +9,7 @@
 import asyncio
 from datetime import datetime
 from env.network_security_game import NetworkSecurityEnvironment
+from env.aidojo_world import AIDojoWorld
 from env.game_components import Action, Observation, ActionType, GameStatus, GameState
 from utils.utils import observation_as_dict, get_logging_level
 from pathlib import Path
@@ -172,7 +173,11 @@ def __init__(self, actions_queue, answers_queue, net_sec_config, allowed_roles,
         self.ALLOWED_ROLES = allowed_roles
         self.logger = logging.getLogger("AIDojo-Coordinator")
         # world definition
-        self._world = NetworkSecurityEnvironment(net_sec_config)
+        match world_type:
+            case "netsecenv":
+                self._world = NetworkSecurityEnvironment(net_sec_config)
+            case _:
+                self._world = AIDojoWorld(net_sec_config)
         self.world_type = world_type
         #  
         self._starting_positions_per_role = self._get_starting_position_per_role()

From d1dfb10d852b373dce728a0ab7236969def85827 Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Tue, 8 Oct 2024 17:30:39 +0200
Subject: [PATCH 56/87] Separate code for the real-world playing into its own
 version of the world

---
 env/network_security_game.py | 289 ++++++++++++++++++++++++++---------
 1 file changed, 216 insertions(+), 73 deletions(-)

diff --git a/env/network_security_game.py b/env/network_security_game.py
index 621358a5..c133b585 100755
--- a/env/network_security_game.py
+++ b/env/network_security_game.py
@@ -729,85 +729,85 @@ def _execute_block_ip_action(self, current_state:components.GameState, action:co
             self.logger.info(f"\t\t\t Invalid source_host:'{action.parameters['source_host']}'")
         return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
 
-    def _execute_scan_network_action_real_world(self, current_state:components.GameState, action:components.Action)->components.GameState:
-        """
-        Executes the ScanNetwork action in the the real world
-        """
-        next_nets, next_known_h, next_controlled_h, next_services, next_data, next_blocked = self._state_parts_deep_copy(current_state)
-        self.logger.info(f"\t\tScanning {action.parameters['target_network']} in real world.")
-        nmap_file_xml = 'nmap-result.xml'
-        command = f"nmap -sn {action.parameters['target_network']} -oX {nmap_file_xml}"
-        _ = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, text=True)
-        # We ignore the result variable for now
-        tree = ElementTree.parse(nmap_file_xml)
-        root = tree.getroot()
-        new_ips = set()
-        for host in root.findall('.//host'):
-            status_elem = host.find('./status')
-            if status_elem is not None:
-                status = host.find('./status').get('state')
-            else:
-                status = ""
-            ip_elem = host.find('./address[@addrtype="ipv4"]')
-            if ip_elem is not None:
-                ip = components.IP(str(ip_elem.get('addr')))
-            else:
-                ip = ""
+    # def _execute_scan_network_action_real_world(self, current_state:components.GameState, action:components.Action)->components.GameState:
+    #     """
+    #     Executes the ScanNetwork action in the the real world
+    #     """
+    #     next_nets, next_known_h, next_controlled_h, next_services, next_data, next_blocked = self._state_parts_deep_copy(current_state)
+    #     self.logger.info(f"\t\tScanning {action.parameters['target_network']} in real world.")
+    #     nmap_file_xml = 'nmap-result.xml'
+    #     command = f"nmap -sn {action.parameters['target_network']} -oX {nmap_file_xml}"
+    #     _ = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, text=True)
+    #     # We ignore the result variable for now
+    #     tree = ElementTree.parse(nmap_file_xml)
+    #     root = tree.getroot()
+    #     new_ips = set()
+    #     for host in root.findall('.//host'):
+    #         status_elem = host.find('./status')
+    #         if status_elem is not None:
+    #             status = host.find('./status').get('state')
+    #         else:
+    #             status = ""
+    #         ip_elem = host.find('./address[@addrtype="ipv4"]')
+    #         if ip_elem is not None:
+    #             ip = components.IP(str(ip_elem.get('addr')))
+    #         else:
+    #             ip = ""
             
-            mac_elem = host.find('./address[@addrtype="mac"]')
-            if mac_elem is not None:
-                mac_address = mac_elem.get('addr', '')
-                vendor = mac_elem.get('vendor', '')
-            else:
-                mac_address = ""
-                vendor = ""
+    #         mac_elem = host.find('./address[@addrtype="mac"]')
+    #         if mac_elem is not None:
+    #             mac_address = mac_elem.get('addr', '')
+    #             vendor = mac_elem.get('vendor', '')
+    #         else:
+    #             mac_address = ""
+    #             vendor = ""
 
-            self.logger.debug(f"\t\t\tAdding {ip} to new_ips. {status}, {mac_address}, {vendor}")
-            new_ips.add(ip)
-        next_known_h = next_known_h.union(new_ips)
-        return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
+    #         self.logger.debug(f"\t\t\tAdding {ip} to new_ips. {status}, {mac_address}, {vendor}")
+    #         new_ips.add(ip)
+    #     next_known_h = next_known_h.union(new_ips)
+    #     return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
     
-    def _execute_find_services_action_real_world(self, current_state:components.GameState, action:components.Action)->components.GameState:
-        """
-        Executes the FindServices action in the real world
-        """
-        next_nets, next_known_h, next_controlled_h, next_services, next_data, next_blocked = self._state_parts_deep_copy(current_state)
-        self.logger.info(f"\t\tScanning ports in {action.parameters['target_host']} in real world.")
-        nmap_file_xml = 'nmap-result.xml'
-        command = f"nmap -sT -n {action.parameters['target_host']} -oX {nmap_file_xml}"
-        _ = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, text=True)
-        # We ignore the result variable for now
-        tree = ElementTree.parse(nmap_file_xml)
-        root = tree.getroot()
+    # def _execute_find_services_action_real_world(self, current_state:components.GameState, action:components.Action)->components.GameState:
+    #     """
+    #     Executes the FindServices action in the real world
+    #     """
+    #     next_nets, next_known_h, next_controlled_h, next_services, next_data, next_blocked = self._state_parts_deep_copy(current_state)
+    #     self.logger.info(f"\t\tScanning ports in {action.parameters['target_host']} in real world.")
+    #     nmap_file_xml = 'nmap-result.xml'
+    #     command = f"nmap -sT -n {action.parameters['target_host']} -oX {nmap_file_xml}"
+    #     _ = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, text=True)
+    #     # We ignore the result variable for now
+    #     tree = ElementTree.parse(nmap_file_xml)
+    #     root = tree.getroot()
 
-        # service_dict is a dict. Key=IP(), values= set of Service() objects
-        found_services = set()
-        port_id = ''
-        protocol = ''
-        for host in root.findall('.//host'):
-            status_elem = host.find('./status')
-            if status_elem is not None and status_elem.get('state') == 'up':
-                ports_elem = host.find('./ports')
-                if ports_elem is not None:
-                    for port in root.findall('.//port[@protocol="tcp"]'):
-                        state_elem = port.find('./state[@state="open"]')
-                        if state_elem is not None:
-                            port_id = port.get('portid')
-                            protocol = port.get('protocol')
-                            service_elem = port.find('./service[@name]')
-                            service_name = service_elem.get('name') if service_elem is not None else "Unknown"
-                            service_fullname = f'{port_id}/{protocol}/{service_name}'
-                            service = components.Service(name=service_fullname, type=service_name, version='', is_local=False)
-                            found_services.add(service)
+    #     # service_dict is a dict. Key=IP(), values= set of Service() objects
+    #     found_services = set()
+    #     port_id = ''
+    #     protocol = ''
+    #     for host in root.findall('.//host'):
+    #         status_elem = host.find('./status')
+    #         if status_elem is not None and status_elem.get('state') == 'up':
+    #             ports_elem = host.find('./ports')
+    #             if ports_elem is not None:
+    #                 for port in root.findall('.//port[@protocol="tcp"]'):
+    #                     state_elem = port.find('./state[@state="open"]')
+    #                     if state_elem is not None:
+    #                         port_id = port.get('portid')
+    #                         protocol = port.get('protocol')
+    #                         service_elem = port.find('./service[@name]')
+    #                         service_name = service_elem.get('name') if service_elem is not None else "Unknown"
+    #                         service_fullname = f'{port_id}/{protocol}/{service_name}'
+    #                         service = components.Service(name=service_fullname, type=service_name, version='', is_local=False)
+    #                         found_services.add(service)
 
-                next_services[action.parameters["target_host"]] = found_services
+    #             next_services[action.parameters["target_host"]] = found_services
         
-        # If host was not known, add it to the known_hosts and known_networks ONLY if there are some found services
-        if action.parameters["target_host"] not in next_known_h:
-            self.logger.info(f"\t\tAdding {action.parameters['target_host']} to known_hosts")
-            next_known_h.add(action.parameters["target_host"])
-            next_nets = next_nets.union({net for net, values in self._networks.items() if action.parameters["target_host"] in values})
-        return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
+    #     # If host was not known, add it to the known_hosts and known_networks ONLY if there are some found services
+    #     if action.parameters["target_host"] not in next_known_h:
+    #         self.logger.info(f"\t\tAdding {action.parameters['target_host']} to known_hosts")
+    #         next_known_h.add(action.parameters["target_host"])
+    #         next_nets = next_nets.union({net for net, values in self._networks.items() if action.parameters["target_host"] in values})
+    #     return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
 
     def _get_all_local_ips(self)->set:
         local_ips = set()
@@ -981,6 +981,149 @@ def step(self, state:components.GameState, action:components.Action, agent_id:tu
             next_state = state
 
         
+        # Make the state we just got into, our current state
+        current_state = state
+        self.logger.info(f'New state: {next_state} ')
+
+
+        # Save the transition to the episode replay buffer if there is any
+        if self._episode_replay_buffer is not None:
+            self._episode_replay_buffer.append((current_state, action, reward, next_state))
+        # Return an observation
+        return next_state
+
+class NetworkSecurityEnvironmentRealWorld(NetworkSecurityEnvironment):
+    
+    def __init__(self, task_config_file, world_name="NetSecEnvRealWorld") -> None:
+        super().__init__(task_config_file, world_name)
+
+    def _execute_scan_network_action_real_world(self, current_state:components.GameState, action:components.Action)->components.GameState:
+        """
+        Executes the ScanNetwork action in the the real world
+        """
+        next_nets, next_known_h, next_controlled_h, next_services, next_data, next_blocked = self._state_parts_deep_copy(current_state)
+        self.logger.info(f"\t\tScanning {action.parameters['target_network']} in real world.")
+        nmap_file_xml = 'nmap-result.xml'
+        command = f"nmap -sn {action.parameters['target_network']} -oX {nmap_file_xml}"
+        _ = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, text=True)
+        # We ignore the result variable for now
+        tree = ElementTree.parse(nmap_file_xml)
+        root = tree.getroot()
+        new_ips = set()
+        for host in root.findall('.//host'):
+            status_elem = host.find('./status')
+            if status_elem is not None:
+                status = host.find('./status').get('state')
+            else:
+                status = ""
+            ip_elem = host.find('./address[@addrtype="ipv4"]')
+            if ip_elem is not None:
+                ip = components.IP(str(ip_elem.get('addr')))
+            else:
+                ip = ""
+            
+            mac_elem = host.find('./address[@addrtype="mac"]')
+            if mac_elem is not None:
+                mac_address = mac_elem.get('addr', '')
+                vendor = mac_elem.get('vendor', '')
+            else:
+                mac_address = ""
+                vendor = ""
+
+            self.logger.debug(f"\t\t\tAdding {ip} to new_ips. {status}, {mac_address}, {vendor}")
+            new_ips.add(ip)
+        next_known_h = next_known_h.union(new_ips)
+        return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
+    
+    def _execute_find_services_action_real_world(self, current_state:components.GameState, action:components.Action)->components.GameState:
+        """
+        Executes the FindServices action in the real world
+        """
+        next_nets, next_known_h, next_controlled_h, next_services, next_data, next_blocked = self._state_parts_deep_copy(current_state)
+        self.logger.info(f"\t\tScanning ports in {action.parameters['target_host']} in real world.")
+        nmap_file_xml = 'nmap-result.xml'
+        command = f"nmap -sT -n {action.parameters['target_host']} -oX {nmap_file_xml}"
+        _ = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, text=True)
+        # We ignore the result variable for now
+        tree = ElementTree.parse(nmap_file_xml)
+        root = tree.getroot()
+
+        # service_dict is a dict. Key=IP(), values= set of Service() objects
+        found_services = set()
+        port_id = ''
+        protocol = ''
+        for host in root.findall('.//host'):
+            status_elem = host.find('./status')
+            if status_elem is not None and status_elem.get('state') == 'up':
+                ports_elem = host.find('./ports')
+                if ports_elem is not None:
+                    for port in root.findall('.//port[@protocol="tcp"]'):
+                        state_elem = port.find('./state[@state="open"]')
+                        if state_elem is not None:
+                            port_id = port.get('portid')
+                            protocol = port.get('protocol')
+                            service_elem = port.find('./service[@name]')
+                            service_name = service_elem.get('name') if service_elem is not None else "Unknown"
+                            service_fullname = f'{port_id}/{protocol}/{service_name}'
+                            service = components.Service(name=service_fullname, type=service_name, version='', is_local=False)
+                            found_services.add(service)
+
+                next_services[action.parameters["target_host"]] = found_services
+        
+        # If host was not known, add it to the known_hosts and known_networks ONLY if there are some found services
+        if action.parameters["target_host"] not in next_known_h:
+            self.logger.info(f"\t\tAdding {action.parameters['target_host']} to known_hosts")
+            next_known_h.add(action.parameters["target_host"])
+            next_nets = next_nets.union({net for net, values in self._networks.items() if action.parameters["target_host"] in values})
+        return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
+
+    def _execute_action(self, current_state:components.GameState, action:components.Action, agent_id)-> components.GameState:
+        """
+        Execute the action and update the values in the state
+        Before this function it was checked if the action was successful
+        So in here all actions were already successful.
+
+        - actions_type: Define if the action is simulated in netsecenv or in the real world
+        - agent_id: is the name or type of agent that requested the action
+
+        Returns: A new GameState
+        """
+        next_state = None
+        match action.type:
+            case components.ActionType.ScanNetwork:
+                    next_state = self._execute_scan_network_action_real_world(current_state, action)
+            case components.ActionType.FindServices:
+                next_state = self._execute_find_services_real_world(current_state, action)
+            case components.ActionType.FindData:
+                next_state = self._execute_find_data_action(current_state, action)
+            case components.ActionType.ExploitService:
+                next_state = self._execute_exploit_service_action(current_state, action)
+            case components.ActionType.ExfiltrateData:
+                next_state = self._execute_exfiltrate_data_action(current_state, action)
+            case components.ActionType.BlockIP:
+                next_state = self._execute_block_ip_action(current_state, action)
+            case _:
+                raise ValueError(f"Unknown Action type or other error: '{action.type}'")
+        return next_state
+
+    def step(self, state:components.GameState, action:components.Action, agent_id:tuple,action_type='netsecenv')-> components.GameState:
+        """
+        Take a step in the environment given an action
+        in: action
+        out: observation of the state of the env
+        """
+        self.logger.info(f"Agent {agent_id}. Action: {action}")
+        # Reward for taking an action
+        reward = self._rewards["step"]
+
+        # 1. Perform the action
+        self._actions_played.append(action)
+        
+        # No randomness in action success - we are playing in real world
+        next_state = self._execute_action(state, action, agent_id, action_type=action_type)
+        
+
+        
         # Make the state we just got into, our current state
         current_state = state
         self.logger.info(f'New state: {next_state} ')

From 9be648339c94c0129f12ef2d4a2d994b19ea2ad1 Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Tue, 8 Oct 2024 17:32:21 +0200
Subject: [PATCH 57/87] Add comments for clarity

---
 env/network_security_game.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/env/network_security_game.py b/env/network_security_game.py
index c133b585..50b0b40f 100755
--- a/env/network_security_game.py
+++ b/env/network_security_game.py
@@ -1095,12 +1095,16 @@ def _execute_action(self, current_state:components.GameState, action:components.
             case components.ActionType.FindServices:
                 next_state = self._execute_find_services_real_world(current_state, action)
             case components.ActionType.FindData:
+                # This Action type is not implemente in real world - use the simualtion
                 next_state = self._execute_find_data_action(current_state, action)
             case components.ActionType.ExploitService:
+                # This Action type is not implemente in real world - use the simualtion
                 next_state = self._execute_exploit_service_action(current_state, action)
             case components.ActionType.ExfiltrateData:
+                # This Action type is not implemente in real world - use the simualtion
                 next_state = self._execute_exfiltrate_data_action(current_state, action)
             case components.ActionType.BlockIP:
+                # This Action type is not implemente in real world - use the simualtion
                 next_state = self._execute_block_ip_action(current_state, action)
             case _:
                 raise ValueError(f"Unknown Action type or other error: '{action.type}'")

From c2d3b9b21a09acf166ed2cf1ae819c29978dcf11 Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Tue, 8 Oct 2024 17:36:02 +0200
Subject: [PATCH 58/87] Add real-world option to the environment selection

---
 coordinator.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/coordinator.py b/coordinator.py
index 762d233c..4413ef56 100644
--- a/coordinator.py
+++ b/coordinator.py
@@ -8,7 +8,7 @@
 import json
 import asyncio
 from datetime import datetime
-from env.network_security_game import NetworkSecurityEnvironment
+from env.network_security_game import NetworkSecurityEnvironment, NetworkSecurityEnvironmentRealWorld
 from env.aidojo_world import AIDojoWorld
 from env.game_components import Action, Observation, ActionType, GameStatus, GameState
 from utils.utils import observation_as_dict, get_logging_level
@@ -176,9 +176,12 @@ def __init__(self, actions_queue, answers_queue, net_sec_config, allowed_roles,
         match world_type:
             case "netsecenv":
                 self._world = NetworkSecurityEnvironment(net_sec_config)
+            case "netsecenv-real-world":
+                self._world = NetworkSecurityEnvironmentRealWorld(net_sec_config)
             case _:
                 self._world = AIDojoWorld(net_sec_config)
         self.world_type = world_type
+        
         #  
         self._starting_positions_per_role = self._get_starting_position_per_role()
         self._win_conditions_per_role = self._get_win_condition_per_role()

From a89c42fc75834ff22ccfe66e8ea6b69447bbf3a4 Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Tue, 8 Oct 2024 17:36:50 +0200
Subject: [PATCH 59/87] Add class description

---
 env/network_security_game.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/env/network_security_game.py b/env/network_security_game.py
index 50b0b40f..2e8e5f64 100755
--- a/env/network_security_game.py
+++ b/env/network_security_game.py
@@ -14,8 +14,6 @@
 import subprocess
 import xml.etree.ElementTree as ElementTree
 
- 
-
 class NetworkSecurityEnvironment(AIDojoWorld):
     """
     Class to manage the whole network security game
@@ -993,7 +991,11 @@ def step(self, state:components.GameState, action:components.Action, agent_id:tu
         return next_state
 
 class NetworkSecurityEnvironmentRealWorld(NetworkSecurityEnvironment):
-    
+    """
+    Class to manage the whole network security game in the real world (current network)
+    It uses some Cyst libraries for the network topology
+    It presents a env environment to play
+    """
     def __init__(self, task_config_file, world_name="NetSecEnvRealWorld") -> None:
         super().__init__(task_config_file, world_name)
 

From 37364e44a7814cda5ceafebb95021302e80573d5 Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Tue, 8 Oct 2024 17:48:18 +0200
Subject: [PATCH 60/87] fix typo

---
 env/network_security_game.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/env/network_security_game.py b/env/network_security_game.py
index 2e8e5f64..d04e0caa 100755
--- a/env/network_security_game.py
+++ b/env/network_security_game.py
@@ -1079,7 +1079,7 @@ def _execute_find_services_action_real_world(self, current_state:components.Game
             next_nets = next_nets.union({net for net, values in self._networks.items() if action.parameters["target_host"] in values})
         return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
 
-    def _execute_action(self, current_state:components.GameState, action:components.Action, agent_id)-> components.GameState:
+    def _execute_action(self, current_state:components.GameState, action:components.Action, agent_id, action_type='netsecenv')-> components.GameState:
         """
         Execute the action and update the values in the state
         Before this function it was checked if the action was successful
@@ -1095,7 +1095,7 @@ def _execute_action(self, current_state:components.GameState, action:components.
             case components.ActionType.ScanNetwork:
                     next_state = self._execute_scan_network_action_real_world(current_state, action)
             case components.ActionType.FindServices:
-                next_state = self._execute_find_services_real_world(current_state, action)
+                next_state = self._execute_find_services_action_real_world(current_state, action)
             case components.ActionType.FindData:
                 # This Action type is not implemente in real world - use the simualtion
                 next_state = self._execute_find_data_action(current_state, action)

From 7648e9d84acb752ee7363024f64548264a17a9cd Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Tue, 8 Oct 2024 17:52:10 +0200
Subject: [PATCH 61/87] remove action_type as action parameter

---
 coordinator.py               |  2 +-
 env/network_security_game.py | 26 ++++++++++----------------
 2 files changed, 11 insertions(+), 17 deletions(-)

diff --git a/coordinator.py b/coordinator.py
index 4413ef56..2c4e5914 100644
--- a/coordinator.py
+++ b/coordinator.py
@@ -479,7 +479,7 @@ def _process_generic_action(self, agent_addr: tuple, action: Action) -> dict:
             
             current_state = self._agent_states[agent_addr]
             # Build new Observation for the agent
-            self._agent_states[agent_addr] = self._world.step(current_state, action, agent_addr, self.world_type)
+            self._agent_states[agent_addr] = self._world.step(current_state, action, agent_addr)
             self._agent_goal_reached[agent_addr] = self._goal_reached(agent_addr)
 
             reward = self._world._rewards["step"]
diff --git a/env/network_security_game.py b/env/network_security_game.py
index d04e0caa..1b946f8c 100755
--- a/env/network_security_game.py
+++ b/env/network_security_game.py
@@ -475,7 +475,7 @@ def _get_data_content(self, host_ip:str, data_id:str)->str:
             self.logger.debug("Data content not found because target IP does not exists.")
         return content
     
-    def _execute_action(self, current_state:components.GameState, action:components.Action, agent_id, action_type='netsecenv')-> components.GameState:
+    def _execute_action(self, current_state:components.GameState, action:components.Action, agent_id)-> components.GameState:
         """
         Execute the action and update the values in the state
         Before this function it was checked if the action was successful
@@ -489,15 +489,9 @@ def _execute_action(self, current_state:components.GameState, action:components.
         next_state = None
         match action.type:
             case components.ActionType.ScanNetwork:
-                if action_type == "realworld":
-                    next_state = self._execute_scan_network_action_real_world(current_state, action)
-                else:
-                    next_state = self._execute_scan_network_action(current_state, action)
-            case components.ActionType.FindServices:
-                if action_type == "realworld":
-                    next_state = self._execute_find_services_real_world(current_state, action)
-                else:
-                    next_state = self._execute_find_services_action(current_state, action)
+                next_state = self._execute_scan_network_action(current_state, action)
+            case components.ActionType.FindServices:   
+                next_state = self._execute_find_services_action(current_state, action)
             case components.ActionType.FindData:
                 next_state = self._execute_find_data_action(current_state, action)
             case components.ActionType.ExploitService:
@@ -960,7 +954,7 @@ def reset(self)->None:
 
         self._actions_played = []
 
-    def step(self, state:components.GameState, action:components.Action, agent_id:tuple,action_type='netsecenv')-> components.GameState:
+    def step(self, state:components.GameState, action:components.Action, agent_id:tuple)-> components.GameState:
         """
         Take a step in the environment given an action
         in: action
@@ -972,8 +966,8 @@ def step(self, state:components.GameState, action:components.Action, agent_id:tu
 
         # 1. Perform the action
         self._actions_played.append(action)
-        if random.random() <= action.type.default_success_p or action_type == 'realworld':
-            next_state = self._execute_action(state, action, agent_id, action_type=action_type)
+        if random.random() <= action.type.default_success_p:
+            next_state = self._execute_action(state, action, agent_id)
         else:
             self.logger.info("\tAction NOT sucessful")
             next_state = state
@@ -1079,7 +1073,7 @@ def _execute_find_services_action_real_world(self, current_state:components.Game
             next_nets = next_nets.union({net for net, values in self._networks.items() if action.parameters["target_host"] in values})
         return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
 
-    def _execute_action(self, current_state:components.GameState, action:components.Action, agent_id, action_type='netsecenv')-> components.GameState:
+    def _execute_action(self, current_state:components.GameState, action:components.Action, agent_id)-> components.GameState:
         """
         Execute the action and update the values in the state
         Before this function it was checked if the action was successful
@@ -1112,7 +1106,7 @@ def _execute_action(self, current_state:components.GameState, action:components.
                 raise ValueError(f"Unknown Action type or other error: '{action.type}'")
         return next_state
 
-    def step(self, state:components.GameState, action:components.Action, agent_id:tuple,action_type='netsecenv')-> components.GameState:
+    def step(self, state:components.GameState, action:components.Action, agent_id:tuple)-> components.GameState:
         """
         Take a step in the environment given an action
         in: action
@@ -1126,7 +1120,7 @@ def step(self, state:components.GameState, action:components.Action, agent_id:tu
         self._actions_played.append(action)
         
         # No randomness in action success - we are playing in real world
-        next_state = self._execute_action(state, action, agent_id, action_type=action_type)
+        next_state = self._execute_action(state, action, agent_id)
         
 
         

From 00bc7b06485c8275e9f639c0376bd116b85e2690 Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Tue, 8 Oct 2024 17:52:33 +0200
Subject: [PATCH 62/87] removed unused code

---
 env/network_security_game.py | 80 ------------------------------------
 1 file changed, 80 deletions(-)

diff --git a/env/network_security_game.py b/env/network_security_game.py
index 1b946f8c..8eb6944f 100755
--- a/env/network_security_game.py
+++ b/env/network_security_game.py
@@ -721,86 +721,6 @@ def _execute_block_ip_action(self, current_state:components.GameState, action:co
             self.logger.info(f"\t\t\t Invalid source_host:'{action.parameters['source_host']}'")
         return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
 
-    # def _execute_scan_network_action_real_world(self, current_state:components.GameState, action:components.Action)->components.GameState:
-    #     """
-    #     Executes the ScanNetwork action in the the real world
-    #     """
-    #     next_nets, next_known_h, next_controlled_h, next_services, next_data, next_blocked = self._state_parts_deep_copy(current_state)
-    #     self.logger.info(f"\t\tScanning {action.parameters['target_network']} in real world.")
-    #     nmap_file_xml = 'nmap-result.xml'
-    #     command = f"nmap -sn {action.parameters['target_network']} -oX {nmap_file_xml}"
-    #     _ = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, text=True)
-    #     # We ignore the result variable for now
-    #     tree = ElementTree.parse(nmap_file_xml)
-    #     root = tree.getroot()
-    #     new_ips = set()
-    #     for host in root.findall('.//host'):
-    #         status_elem = host.find('./status')
-    #         if status_elem is not None:
-    #             status = host.find('./status').get('state')
-    #         else:
-    #             status = ""
-    #         ip_elem = host.find('./address[@addrtype="ipv4"]')
-    #         if ip_elem is not None:
-    #             ip = components.IP(str(ip_elem.get('addr')))
-    #         else:
-    #             ip = ""
-            
-    #         mac_elem = host.find('./address[@addrtype="mac"]')
-    #         if mac_elem is not None:
-    #             mac_address = mac_elem.get('addr', '')
-    #             vendor = mac_elem.get('vendor', '')
-    #         else:
-    #             mac_address = ""
-    #             vendor = ""
-
-    #         self.logger.debug(f"\t\t\tAdding {ip} to new_ips. {status}, {mac_address}, {vendor}")
-    #         new_ips.add(ip)
-    #     next_known_h = next_known_h.union(new_ips)
-    #     return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
-    
-    # def _execute_find_services_action_real_world(self, current_state:components.GameState, action:components.Action)->components.GameState:
-    #     """
-    #     Executes the FindServices action in the real world
-    #     """
-    #     next_nets, next_known_h, next_controlled_h, next_services, next_data, next_blocked = self._state_parts_deep_copy(current_state)
-    #     self.logger.info(f"\t\tScanning ports in {action.parameters['target_host']} in real world.")
-    #     nmap_file_xml = 'nmap-result.xml'
-    #     command = f"nmap -sT -n {action.parameters['target_host']} -oX {nmap_file_xml}"
-    #     _ = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, text=True)
-    #     # We ignore the result variable for now
-    #     tree = ElementTree.parse(nmap_file_xml)
-    #     root = tree.getroot()
-
-    #     # service_dict is a dict. Key=IP(), values= set of Service() objects
-    #     found_services = set()
-    #     port_id = ''
-    #     protocol = ''
-    #     for host in root.findall('.//host'):
-    #         status_elem = host.find('./status')
-    #         if status_elem is not None and status_elem.get('state') == 'up':
-    #             ports_elem = host.find('./ports')
-    #             if ports_elem is not None:
-    #                 for port in root.findall('.//port[@protocol="tcp"]'):
-    #                     state_elem = port.find('./state[@state="open"]')
-    #                     if state_elem is not None:
-    #                         port_id = port.get('portid')
-    #                         protocol = port.get('protocol')
-    #                         service_elem = port.find('./service[@name]')
-    #                         service_name = service_elem.get('name') if service_elem is not None else "Unknown"
-    #                         service_fullname = f'{port_id}/{protocol}/{service_name}'
-    #                         service = components.Service(name=service_fullname, type=service_name, version='', is_local=False)
-    #                         found_services.add(service)
-
-    #             next_services[action.parameters["target_host"]] = found_services
-        
-    #     # If host was not known, add it to the known_hosts and known_networks ONLY if there are some found services
-    #     if action.parameters["target_host"] not in next_known_h:
-    #         self.logger.info(f"\t\tAdding {action.parameters['target_host']} to known_hosts")
-    #         next_known_h.add(action.parameters["target_host"])
-    #         next_nets = next_nets.union({net for net, values in self._networks.items() if action.parameters["target_host"] in values})
-    #     return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
-
     def _get_all_local_ips(self)->set:
         local_ips = set()
         for net, ips in self._networks.items():

From 650a92a273747b978a52e4eedf88c864708fd366 Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Wed, 9 Oct 2024 10:19:31 +0200
Subject: [PATCH 63/87] Add debug logging in block_action

---
 env/network_security_game.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/env/network_security_game.py b/env/network_security_game.py
index 8eb6944f..f44d649c 100755
--- a/env/network_security_game.py
+++ b/env/network_security_game.py
@@ -696,6 +696,7 @@ def _execute_block_ip_action(self, current_state:components.GameState, action:co
                 # Stop the blocked host to connect _to_ any other IP
                 try:
                     self._firewall[blocked_host] = set()
+                    self.logger.debug(f"Removing all allowed connections from {blocked_host}")
                 except KeyError:
                     # The blocked_host host was not in the list
                     pass
@@ -703,6 +704,7 @@ def _execute_block_ip_action(self, current_state:components.GameState, action:co
                 for host in self._firewall.keys():
                     try:
                         self._firewall[host].remove(blocked_host)
+                        self.logger.debug(f"Removing {blocked_host} from allowed connections from {host}")
                     except KeyError:
                         # The blocked_host host was not in the list
                         pass

From bd8202cf405891b383fce2c62de1930fee018a47 Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Wed, 9 Oct 2024 14:24:26 +0200
Subject: [PATCH 64/87] Move worlds in the separate folder

---
 coordinator.py                                |   7 +-
 env/{ => worlds}/aidojo_world.py              |   0
 env/{ => worlds}/network_security_game.py     | 155 +----------------
 .../network_security_game_real_world.py       | 160 ++++++++++++++++++
 tests/test_actions.py                         |   2 +-
 5 files changed, 166 insertions(+), 158 deletions(-)
 rename env/{ => worlds}/aidojo_world.py (100%)
 rename env/{ => worlds}/network_security_game.py (85%)
 create mode 100644 env/worlds/network_security_game_real_world.py

diff --git a/coordinator.py b/coordinator.py
index 2c4e5914..c8737851 100644
--- a/coordinator.py
+++ b/coordinator.py
@@ -8,8 +8,9 @@
 import json
 import asyncio
 from datetime import datetime
-from env.network_security_game import NetworkSecurityEnvironment, NetworkSecurityEnvironmentRealWorld
-from env.aidojo_world import AIDojoWorld
+from env.worlds.network_security_game import NetworkSecurityEnvironment
+from env.worlds.network_security_game_real_world import NetworkSecurityEnvironmentRealWorld
+from env.worlds.aidojo_world import AIDojoWorld
 from env.game_components import Action, Observation, ActionType, GameStatus, GameState
 from utils.utils import observation_as_dict, get_logging_level
 from pathlib import Path
@@ -663,6 +664,6 @@ def goal_dict_satistfied(goal_dict:dict, known_dict: dict)-> bool:
     if task_config_file is None:
         raise KeyError("Task configuration must be provided to start the coordinator! Use -h for more details.")
     # Create AI Dojo
-    ai_dojo = AIDojo(host, port, task_config_file, world_type)
+    ai_dojo = AIDojo(host, port, task_config_file, world_type="netsecenv-real-world")
     # Run it!
     ai_dojo.run()
diff --git a/env/aidojo_world.py b/env/worlds/aidojo_world.py
similarity index 100%
rename from env/aidojo_world.py
rename to env/worlds/aidojo_world.py
diff --git a/env/network_security_game.py b/env/worlds/network_security_game.py
similarity index 85%
rename from env/network_security_game.py
rename to env/worlds/network_security_game.py
index f44d649c..b4056b94 100755
--- a/env/network_security_game.py
+++ b/env/worlds/network_security_game.py
@@ -10,9 +10,7 @@
 from cyst.api.configuration import NodeConfig, RouterConfig, ConnectionConfig, ExploitConfig, FirewallPolicy
 import numpy as np
 from faker import Faker
-from env.aidojo_world import AIDojoWorld
-import subprocess
-import xml.etree.ElementTree as ElementTree
+from env.worlds.aidojo_world import AIDojoWorld
 
 class NetworkSecurityEnvironment(AIDojoWorld):
     """
@@ -895,157 +893,6 @@ def step(self, state:components.GameState, action:components.Action, agent_id:tu
             next_state = state
 
         
-        # Make the state we just got into, our current state
-        current_state = state
-        self.logger.info(f'New state: {next_state} ')
-
-
-        # Save the transition to the episode replay buffer if there is any
-        if self._episode_replay_buffer is not None:
-            self._episode_replay_buffer.append((current_state, action, reward, next_state))
-        # Return an observation
-        return next_state
-
-class NetworkSecurityEnvironmentRealWorld(NetworkSecurityEnvironment):
-    """
-    Class to manage the whole network security game in the real world (current network)
-    It uses some Cyst libraries for the network topology
-    It presents a env environment to play
-    """
-    def __init__(self, task_config_file, world_name="NetSecEnvRealWorld") -> None:
-        super().__init__(task_config_file, world_name)
-
-    def _execute_scan_network_action_real_world(self, current_state:components.GameState, action:components.Action)->components.GameState:
-        """
-        Executes the ScanNetwork action in the the real world
-        """
-        next_nets, next_known_h, next_controlled_h, next_services, next_data, next_blocked = self._state_parts_deep_copy(current_state)
-        self.logger.info(f"\t\tScanning {action.parameters['target_network']} in real world.")
-        nmap_file_xml = 'nmap-result.xml'
-        command = f"nmap -sn {action.parameters['target_network']} -oX {nmap_file_xml}"
-        _ = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, text=True)
-        # We ignore the result variable for now
-        tree = ElementTree.parse(nmap_file_xml)
-        root = tree.getroot()
-        new_ips = set()
-        for host in root.findall('.//host'):
-            status_elem = host.find('./status')
-            if status_elem is not None:
-                status = host.find('./status').get('state')
-            else:
-                status = ""
-            ip_elem = host.find('./address[@addrtype="ipv4"]')
-            if ip_elem is not None:
-                ip = components.IP(str(ip_elem.get('addr')))
-            else:
-                ip = ""
-            
-            mac_elem = host.find('./address[@addrtype="mac"]')
-            if mac_elem is not None:
-                mac_address = mac_elem.get('addr', '')
-                vendor = mac_elem.get('vendor', '')
-            else:
-                mac_address = ""
-                vendor = ""
-
-            self.logger.debug(f"\t\t\tAdding {ip} to new_ips. {status}, {mac_address}, {vendor}")
-            new_ips.add(ip)
-        next_known_h = next_known_h.union(new_ips)
-        return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
-    
-    def _execute_find_services_action_real_world(self, current_state:components.GameState, action:components.Action)->components.GameState:
-        """
-        Executes the FindServices action in the real world
-        """
-        next_nets, next_known_h, next_controlled_h, next_services, next_data, next_blocked = self._state_parts_deep_copy(current_state)
-        self.logger.info(f"\t\tScanning ports in {action.parameters['target_host']} in real world.")
-        nmap_file_xml = 'nmap-result.xml'
-        command = f"nmap -sT -n {action.parameters['target_host']} -oX {nmap_file_xml}"
-        _ = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, text=True)
-        # We ignore the result variable for now
-        tree = ElementTree.parse(nmap_file_xml)
-        root = tree.getroot()
-
-        # service_dict is a dict. Key=IP(), values= set of Service() objects
-        found_services = set()
-        port_id = ''
-        protocol = ''
-        for host in root.findall('.//host'):
-            status_elem = host.find('./status')
-            if status_elem is not None and status_elem.get('state') == 'up':
-                ports_elem = host.find('./ports')
-                if ports_elem is not None:
-                    for port in root.findall('.//port[@protocol="tcp"]'):
-                        state_elem = port.find('./state[@state="open"]')
-                        if state_elem is not None:
-                            port_id = port.get('portid')
-                            protocol = port.get('protocol')
-                            service_elem = port.find('./service[@name]')
-                            service_name = service_elem.get('name') if service_elem is not None else "Unknown"
-                            service_fullname = f'{port_id}/{protocol}/{service_name}'
-                            service = components.Service(name=service_fullname, type=service_name, version='', is_local=False)
-                            found_services.add(service)
-
-                next_services[action.parameters["target_host"]] = found_services
-        
-        # If host was not known, add it to the known_hosts and known_networks ONLY if there are some found services
-        if action.parameters["target_host"] not in next_known_h:
-            self.logger.info(f"\t\tAdding {action.parameters['target_host']} to known_hosts")
-            next_known_h.add(action.parameters["target_host"])
-            next_nets = next_nets.union({net for net, values in self._networks.items() if action.parameters["target_host"] in values})
-        return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
-
-    def _execute_action(self, current_state:components.GameState, action:components.Action, agent_id)-> components.GameState:
-        """
-        Execute the action and update the values in the state
-        Before this function it was checked if the action was successful
-        So in here all actions were already successful.
-
-        - actions_type: Define if the action is simulated in netsecenv or in the real world
-        - agent_id: is the name or type of agent that requested the action
-
-        Returns: A new GameState
-        """
-        next_state = None
-        match action.type:
-            case components.ActionType.ScanNetwork:
-                    next_state = self._execute_scan_network_action_real_world(current_state, action)
-            case components.ActionType.FindServices:
-                next_state = self._execute_find_services_action_real_world(current_state, action)
-            case components.ActionType.FindData:
-                # This Action type is not implemente in real world - use the simualtion
-                next_state = self._execute_find_data_action(current_state, action)
-            case components.ActionType.ExploitService:
-                # This Action type is not implemente in real world - use the simualtion
-                next_state = self._execute_exploit_service_action(current_state, action)
-            case components.ActionType.ExfiltrateData:
-                # This Action type is not implemente in real world - use the simualtion
-                next_state = self._execute_exfiltrate_data_action(current_state, action)
-            case components.ActionType.BlockIP:
-                # This Action type is not implemente in real world - use the simualtion
-                next_state = self._execute_block_ip_action(current_state, action)
-            case _:
-                raise ValueError(f"Unknown Action type or other error: '{action.type}'")
-        return next_state
-
-    def step(self, state:components.GameState, action:components.Action, agent_id:tuple)-> components.GameState:
-        """
-        Take a step in the environment given an action
-        in: action
-        out: observation of the state of the env
-        """
-        self.logger.info(f"Agent {agent_id}. Action: {action}")
-        # Reward for taking an action
-        reward = self._rewards["step"]
-
-        # 1. Perform the action
-        self._actions_played.append(action)
-        
-        # No randomness in action success - we are playing in real world
-        next_state = self._execute_action(state, action, agent_id)
-        
-
-        
         # Make the state we just got into, our current state
         current_state = state
         self.logger.info(f'New state: {next_state} ')
diff --git a/env/worlds/network_security_game_real_world.py b/env/worlds/network_security_game_real_world.py
new file mode 100644
index 00000000..d7276510
--- /dev/null
+++ b/env/worlds/network_security_game_real_world.py
@@ -0,0 +1,160 @@
+#Authors
+# Ondrej Lukas - ondrej.lukas@aic.fel.cvut.cz
+# Sebastian Garcia. sebastian.garcia@agents.fel.cvut.cz
+
+import env.game_components as components
+from env.worlds.network_security_game import NetworkSecurityEnvironment
+import subprocess
+import xml.etree.ElementTree as ElementTree
+
+
+class NetworkSecurityEnvironmentRealWorld(NetworkSecurityEnvironment):
+    """
+    Class to manage the whole network security game in the real world (current network)
+    It uses some Cyst libraries for the network topology
+    It presents a env environment to play
+    """
+    def __init__(self, task_config_file, world_name="NetSecEnvRealWorld") -> None:
+        super().__init__(task_config_file, world_name)
+
+    def _execute_scan_network_action_real_world(self, current_state:components.GameState, action:components.Action)->components.GameState:
+        """
+        Executes the ScanNetwork action in the the real world
+        """
+        next_nets, next_known_h, next_controlled_h, next_services, next_data, next_blocked = self._state_parts_deep_copy(current_state)
+        self.logger.info(f"\t\tScanning {action.parameters['target_network']} in real world.")
+        nmap_file_xml = 'nmap-result.xml'
+        command = f"nmap -sn {action.parameters['target_network']} -oX {nmap_file_xml}"
+        _ = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, text=True)
+        # We ignore the result variable for now
+        tree = ElementTree.parse(nmap_file_xml)
+        root = tree.getroot()
+        new_ips = set()
+        for host in root.findall('.//host'):
+            status_elem = host.find('./status')
+            if status_elem is not None:
+                status = host.find('./status').get('state')
+            else:
+                status = ""
+            ip_elem = host.find('./address[@addrtype="ipv4"]')
+            if ip_elem is not None:
+                ip = components.IP(str(ip_elem.get('addr')))
+            else:
+                ip = ""
+            
+            mac_elem = host.find('./address[@addrtype="mac"]')
+            if mac_elem is not None:
+                mac_address = mac_elem.get('addr', '')
+                vendor = mac_elem.get('vendor', '')
+            else:
+                mac_address = ""
+                vendor = ""
+
+            self.logger.debug(f"\t\t\tAdding {ip} to new_ips. {status}, {mac_address}, {vendor}")
+            new_ips.add(ip)
+        next_known_h = next_known_h.union(new_ips)
+        return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
+    
+    def _execute_find_services_action_real_world(self, current_state:components.GameState, action:components.Action)->components.GameState:
+        """
+        Executes the FindServices action in the real world
+        """
+        next_nets, next_known_h, next_controlled_h, next_services, next_data, next_blocked = self._state_parts_deep_copy(current_state)
+        self.logger.info(f"\t\tScanning ports in {action.parameters['target_host']} in real world.")
+        nmap_file_xml = 'nmap-result.xml'
+        command = f"nmap -sT -n {action.parameters['target_host']} -oX {nmap_file_xml}"
+        _ = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, text=True)
+        # We ignore the result variable for now
+        tree = ElementTree.parse(nmap_file_xml)
+        root = tree.getroot()
+
+        # service_dict is a dict. Key=IP(), values= set of Service() objects
+        found_services = set()
+        port_id = ''
+        protocol = ''
+        for host in root.findall('.//host'):
+            status_elem = host.find('./status')
+            if status_elem is not None and status_elem.get('state') == 'up':
+                ports_elem = host.find('./ports')
+                if ports_elem is not None:
+                    for port in root.findall('.//port[@protocol="tcp"]'):
+                        state_elem = port.find('./state[@state="open"]')
+                        if state_elem is not None:
+                            port_id = port.get('portid')
+                            protocol = port.get('protocol')
+                            service_elem = port.find('./service[@name]')
+                            service_name = service_elem.get('name') if service_elem is not None else "Unknown"
+                            service_fullname = f'{port_id}/{protocol}/{service_name}'
+                            service = components.Service(name=service_fullname, type=service_name, version='', is_local=False)
+                            found_services.add(service)
+
+                next_services[action.parameters["target_host"]] = found_services
+        
+        # If host was not known, add it to the known_hosts and known_networks ONLY if there are some found services
+        if action.parameters["target_host"] not in next_known_h:
+            self.logger.info(f"\t\tAdding {action.parameters['target_host']} to known_hosts")
+            next_known_h.add(action.parameters["target_host"])
+            next_nets = next_nets.union({net for net, values in self._networks.items() if action.parameters["target_host"] in values})
+        return components.GameState(next_controlled_h, next_known_h, next_services, next_data, next_nets, next_blocked)
+
+    def _execute_action(self, current_state:components.GameState, action:components.Action, agent_id)-> components.GameState:
+        """
+        Execute the action and update the values in the state
+        Before this function it was checked if the action was successful
+        So in here all actions were already successful.
+
+        - actions_type: Define if the action is simulated in netsecenv or in the real world
+        - agent_id: is the name or type of agent that requested the action
+
+        Returns: A new GameState
+        """
+        next_state = None
+        match action.type:
+            case components.ActionType.ScanNetwork:
+                    next_state = self._execute_scan_network_action_real_world(current_state, action)
+            case components.ActionType.FindServices:
+                next_state = self._execute_find_services_action_real_world(current_state, action)
+            case components.ActionType.FindData:
+                # This Action type is not implemente in real world - use the simualtion
+                next_state = self._execute_find_data_action(current_state, action)
+            case components.ActionType.ExploitService:
+                # This Action type is not implemente in real world - use the simualtion
+                next_state = self._execute_exploit_service_action(current_state, action)
+            case components.ActionType.ExfiltrateData:
+                # This Action type is not implemente in real world - use the simualtion
+                next_state = self._execute_exfiltrate_data_action(current_state, action)
+            case components.ActionType.BlockIP:
+                # This Action type is not implemente in real world - use the simualtion
+                next_state = self._execute_block_ip_action(current_state, action)
+            case _:
+                raise ValueError(f"Unknown Action type or other error: '{action.type}'")
+        return next_state
+
+    def step(self, state:components.GameState, action:components.Action, agent_id:tuple)-> components.GameState:
+        """
+        Take a step in the environment given an action
+        in: action
+        out: observation of the state of the env
+        """
+        self.logger.info(f"Agent {agent_id}. Action: {action}")
+        # Reward for taking an action
+        reward = self._rewards["step"]
+
+        # 1. Perform the action
+        self._actions_played.append(action)
+        
+        # No randomness in action success - we are playing in real world
+        next_state = self._execute_action(state, action, agent_id)
+        
+
+        
+        # Make the state we just got into, our current state
+        current_state = state
+        self.logger.info(f'New state: {next_state} ')
+
+
+        # Save the transition to the episode replay buffer if there is any
+        if self._episode_replay_buffer is not None:
+            self._episode_replay_buffer.append((current_state, action, reward, next_state))
+        # Return an observation
+        return next_state
\ No newline at end of file
diff --git a/tests/test_actions.py b/tests/test_actions.py
index aa2aa3d0..c0a3e66d 100644
--- a/tests/test_actions.py
+++ b/tests/test_actions.py
@@ -5,7 +5,7 @@
 import sys
 from os import path
 sys.path.append( path.dirname(path.dirname( path.abspath(__file__) ) ))
-from env.network_security_game import NetworkSecurityEnvironment
+from env.worlds.network_security_game import NetworkSecurityEnvironment
 import env.game_components as components
 import pytest
 

From 3f7c6d1de5a647dc7072e524bd564a21323422da Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Wed, 9 Oct 2024 14:25:12 +0200
Subject: [PATCH 65/87] Do not use Real World NSG as default

---
 coordinator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/coordinator.py b/coordinator.py
index c8737851..332889da 100644
--- a/coordinator.py
+++ b/coordinator.py
@@ -664,6 +664,6 @@ def goal_dict_satistfied(goal_dict:dict, known_dict: dict)-> bool:
     if task_config_file is None:
         raise KeyError("Task configuration must be provided to start the coordinator! Use -h for more details.")
     # Create AI Dojo
-    ai_dojo = AIDojo(host, port, task_config_file, world_type="netsecenv-real-world")
+    ai_dojo = AIDojo(host, port, task_config_file, world_type)
     # Run it!
     ai_dojo.run()

From 4cce0b06f981ee3b1686a0c150d7e23e0010c472 Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Wed, 9 Oct 2024 14:31:58 +0200
Subject: [PATCH 66/87] Create a class for cyst wrapper

---
 env/worlds/cyst_wrapper.py | 42 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 env/worlds/cyst_wrapper.py

diff --git a/env/worlds/cyst_wrapper.py b/env/worlds/cyst_wrapper.py
new file mode 100644
index 00000000..a772b5f7
--- /dev/null
+++ b/env/worlds/cyst_wrapper.py
@@ -0,0 +1,42 @@
+# Author Ondrej Lukas - ondrej.lukas@aic.fel.cvut.cz
+import env.game_components as components
+from env.worlds.aidojo_world import AIDojoWorld
+
+class CYSTWrapper(AIDojoWorld):
+    """
+    Class for connection CYST with the coordinator of AI Dojo
+    """
+    def __init__(self, task_config_file, world_name="CYST") -> None:
+        super().__init__(task_config_file, world_name)
+        self.logger.info("Initializing CYST environment")
+
+
+    def step(current_state:components.GameState, action:components.Action, agent_id:tuple)-> components.GameState:
+        """
+        Executes given action in a current state of the environment and produces new GameState.
+        """
+        raise NotImplementedError
+
+    def create_state_from_view(self, view:dict, add_neighboring_nets:bool=True)->components.GameState:
+        """
+        Produces a GameState based on the view of the world.
+        """
+        raise NotImplementedError
+    
+    def reset()->None:
+        """
+        Resets the world to its initial state.
+        """
+        raise NotImplementedError
+
+    def update_goal_descriptions(self, goal_description:str)->str:
+       """
+       Takes the existing goal description (text) and updates it with respect to the world.
+       """
+       raise NotImplementedError
+    
+    def update_goal_dict(self, goal_dict:dict)->dict:
+        """
+        Takes the existing goal dict and updates it with respect to the world.
+        """
+        raise NotImplementedError
\ No newline at end of file

From 267e1dbcbdc5393e5cf2829e253f5cbbac6eeb4d Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Thu, 25 Jul 2024 17:53:06 +0200
Subject: [PATCH 67/87] fix graph building

---
 utils/trajectory_analysis.py | 39 ++++++++++++++++++++++++++++--------
 1 file changed, 31 insertions(+), 8 deletions(-)

diff --git a/utils/trajectory_analysis.py b/utils/trajectory_analysis.py
index df19d014..a08e523f 100644
--- a/utils/trajectory_analysis.py
+++ b/utils/trajectory_analysis.py
@@ -559,11 +559,10 @@ def gameplay_graph(game_plays:list, states, actions, end_reason=None)->tuple:
     for play in game_plays:
         if end_reason and play["end_reason"] not in end_reason:
             continue
-
+        state = utils.state_as_ordered_string(GameState.from_dict(play["trajectory"]["states"][0]))
         for i in range(1, len(play["trajectory"]["actions"])):
-            state = utils.state_as_ordered_string(GameState.from_dict(play["trajectory"]["states"][i-1]))
-            next_state = utils.state_as_ordered_string(GameState.from_dict(play["trajectory"]["states"][i-1]))
-            action = Action.from_dict((play["trajectory"]["actions"][i-1]))
+            next_state = utils.state_as_ordered_string(GameState.from_dict(play["trajectory"]["states"][i]))
+            action = Action.from_dict((play["trajectory"]["actions"][i]))
             if state not in states:
                 states[state] = len(states)
             if next_state not in states:
@@ -575,6 +574,8 @@ def gameplay_graph(game_plays:list, states, actions, end_reason=None)->tuple:
             if actions[action] not in edges[states[state], states[next_state]]:
                 edges[states[state], states[next_state]][actions[action]] = 0
             edges[states[state], states[next_state]][actions[action]] += 1
+            print(states[state], states[next_state], actions[action])
+            state = next_state
     return edges
 
 def get_graph_stats(edge_list, states, actions)->tuple:
@@ -639,16 +640,38 @@ def get_change_in_nodes(edge_list1, edge_list2):
         new.add(dst)
     return {n for n in new if n not in original}, {n for n in original if n not in new}
 
+def get_graph_modificiation(edge_list1, edege_list2):
+    """
+    Produces the addition and deletion graphs
+    """
+
 if __name__ == '__main__':
     # filter trajectories based on their ending
     END_REASON = None
     #END_REASON = ["goal_reached"]
     #game_plays = read_json("./trajectories/2024-07-03_QAgent_Attacker.jsonl")
-    game_plays = read_json("trajectories/2024-07-02_BaseAgent_Attacker.jsonl")
-    for play in game_plays:
+    game_plays_optimal= read_json("trajectories/2024-07-02_BaseAgent_Attacker.jsonl")
+    for play in game_plays_optimal:
         play["model"] = "Optimal"
-    generate_mdp_from_trajecotries(game_plays, filename="mdp_test", end_reason=END_REASON)
-    generate_sankey_from_trajecotries(game_plays, filename="sankey_test", end_reason=END_REASON, threshold=0.1)
+
+    game_plays_extra_steps = read_json("trajectories/2024-07-25_BaseAgent_Attacker.jsonl")
+    for play in game_plays_optimal:
+        play["model"] = "Not-optimal"
+    states = {}
+    actions = {}
+    edges_optimal = gameplay_graph(game_plays_optimal, states, actions,end_reason=END_REASON)
+    print("---------------------------")
+    edges_not_optimal = gameplay_graph(game_plays_extra_steps, states, actions,end_reason=END_REASON)
+    print(edges_optimal)
+    state_to_id = {v:k for k,v in states.items()}
+    action_to_id = {v:k for k,v in states.items()}
+    # print("optimal")
+    # get_graph_stats(edges_optimal, state_to_id, action_to_id)
+    # print("sub-optimal")
+    # get_graph_stats(edges_not_optimal, state_to_id, action_to_id)
+
+
+
     # print(compute_mean_length(game_plays))
     # get_action_type_barplot_per_step(game_plays, end_reason=END_REASON)
     # get_action_type_histogram_per_step(game_plays, end_reason=END_REASON)

From 8156dbadc3f9d1200da765edb56b849cb00ca1b1 Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Fri, 26 Jul 2024 14:59:14 +0200
Subject: [PATCH 68/87] reset trajectory at the beginning of each episode

---
 coordinator.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/coordinator.py b/coordinator.py
index 332889da..32aa5891 100644
--- a/coordinator.py
+++ b/coordinator.py
@@ -419,7 +419,7 @@ def _create_response_to_reset_game_action(self, agent_addr: tuple) -> dict:
         self._store_trajectory_to_file(agent_addr)
         new_observation = Observation(self._agent_states[agent_addr], 0, self.episode_end, {})
         # reset trajectory
-        self._reset_trajectory(agent_addr)
+        self._agent_trajectories[agent_addr] = self._reset_trajectory(agent_addr)
         output_message_dict = {
             "to_agent": agent_addr,
             "status": str(GameStatus.OK),
@@ -453,7 +453,7 @@ def _store_trajectory_to_file(self, agent_addr, location="./trajectories"):
                 writer.write(self._agent_trajectories[agent_addr])
             self.logger.info(f"Trajectory of {agent_addr} strored in {filename}")
     
-    def _reset_trajectory(self,agent_addr)->dict:
+    def _reset_trajectory(self, agent_addr)->dict:
         agent_name, agent_role = self.agents[agent_addr]
         self.logger.debug(f"Resetting trajectory of {agent_addr}")
         return {

From f92b8f5fea6665bea78adc919b450b572c237f6b Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Fri, 26 Jul 2024 14:59:48 +0200
Subject: [PATCH 69/87] Add optional parameter lo limit amount of trajectories
 to read

---
 utils/trajectory_analysis.py | 97 ++++++++++++++++++++++++------------
 1 file changed, 65 insertions(+), 32 deletions(-)

diff --git a/utils/trajectory_analysis.py b/utils/trajectory_analysis.py
index a08e523f..16d3caf8 100644
--- a/utils/trajectory_analysis.py
+++ b/utils/trajectory_analysis.py
@@ -15,11 +15,15 @@
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__) )))
 from env.game_components import GameState, Action, ActionType
 
-def read_json(filename)->list:
+   
+
+def read_json(filename, max_lines=50)->list:
     trajectories = []
     with jsonlines.open(filename) as reader:
         for obj in reader:
             trajectories.append(obj)
+            if len(trajectories) > max_lines:
+                break
     return trajectories
 
 def compute_mean_length(game_plays:list)->float:
@@ -574,37 +578,37 @@ def gameplay_graph(game_plays:list, states, actions, end_reason=None)->tuple:
             if actions[action] not in edges[states[state], states[next_state]]:
                 edges[states[state], states[next_state]][actions[action]] = 0
             edges[states[state], states[next_state]][actions[action]] += 1
-            print(states[state], states[next_state], actions[action])
             state = next_state
     return edges
 
 def get_graph_stats(edge_list, states, actions)->tuple:
-    visited_states = set()
-    played_actions = []
-    outgoing_edges = {}
-    incoming_edges = {}
-    loops = {}
-    for (src,dst), edges in edge_list.items():
-        visited_states.add(src)
-        visited_states.add(dst)
-        if src not in outgoing_edges:
-            outgoing_edges[src] = 0
-        outgoing_edges[src] += 1
-        if dst not in incoming_edges:
-            incoming_edges[dst] = 0
-        incoming_edges[dst] += 1
-        for a in edges.keys():
-            played_actions.append(a)
-        if src == dst:
-            loops[(src,dst)] = len(edges)
-    
-    print(f"Total visited states: {len(visited_states)}")
-    print(f"Total played actions: {len(played_actions)}")
-    print(f"Unique played actions: {len(set(played_actions))}")
-    print(f"Total State transformation edges: {len(edge_list)}")
-    print(f"Node in-degree - min:{np.min(list(incoming_edges.values()))}, max:{np.max(list(incoming_edges.values()))}, mean:{np.mean(list(incoming_edges.values()))}, std:{np.std(list(incoming_edges.values()))}")
-    print(f"Node out-degree - min:{np.min(list(outgoing_edges.values()))}, max:{np.max(list(outgoing_edges.values()))}, mean:{np.mean(list(outgoing_edges.values()))}, std:{np.std(list(outgoing_edges.values()))}")
-    print(f"Loops - min:{np.min(list(loops.values()))}, max:{np.max(list(loops.values()))}, mean:{np.mean(list(loops.values()))}, std:{np.std(list(loops.values()))}")
+    nodes = set()
+    edges = set()
+    simple_edges = set()
+    node_in_degree = {}
+    node_out_degree = {}
+    loop_edges = set()
+    for (src,dst) in edge_list:
+        nodes.add(src)
+        nodes.add(dst)
+        if src not in node_out_degree.keys():
+            node_out_degree[src] = 0
+        if dst not in node_in_degree.keys():
+            node_in_degree[dst] = 0
+        node_out_degree[src] += 1
+        node_in_degree[dst] += 1
+        simple_edges.add((src,dst))
+        for a in edge_list[src, dst]:
+            edges.add((src,dst,a))
+            if src == dst:
+                loop_edges.add((src,dst,a))
+    print(f"# Nodes:{len(nodes)}")
+    print(f"# Edges:{len(edges)}")
+    print(f"# Simple:{len(simple_edges)}")
+    print(f"# loops:{len(loop_edges)}")
+    print(f"node IN-degree: {np.mean(list(node_in_degree.values()))}+-{np.std(list(node_in_degree.values()))}")
+    print(f"node OUT-degree: {np.mean(list(node_out_degree.values()))}+-{np.std(list(node_out_degree.values()))}")
+    return nodes, edges, simple_edges, node_in_degree, node_out_degree, loop_edges
 
 def get_change_in_edges(edge_list1, edge_list2):
     removed_edges = {}
@@ -640,31 +644,60 @@ def get_change_in_nodes(edge_list1, edge_list2):
         new.add(dst)
     return {n for n in new if n not in original}, {n for n in original if n not in new}
 
-def get_graph_modificiation(edge_list1, edege_list2):
+def get_graph_modificiation(edge_list1, edge_list2):
     """
     Produces the addition and deletion graphs
     """
+    deleted_edges = {}
+    for k in edge_list1.keys():
+        if k not in edge_list2:
+            deleted_edges[k] = set(edge_list1[k].keys())
+        else:
+            diff = set()
+            for a in edge_list1[k].keys():
+                if a not in edge_list2[k]:
+                    diff.add(a)
+            if len(diff) > 0:
+                deleted_edges[k] = diff
+    added_edges = {}
+    for k in edge_list2.keys():
+        if k not in edge_list1:
+            added_edges[k] = set(edge_list2[k].keys())
+        else:
+            diff = set()
+            for a in edge_list2[k].keys():
+                if a not in edge_list1[k]:
+                    diff.add(a)
+            if len(diff) > 0:
+                added_edges[k] = diff
+    return added_edges, deleted_edges
 
 if __name__ == '__main__':
     # filter trajectories based on their ending
     END_REASON = None
     #END_REASON = ["goal_reached"]
     #game_plays = read_json("./trajectories/2024-07-03_QAgent_Attacker.jsonl")
-    game_plays_optimal= read_json("trajectories/2024-07-02_BaseAgent_Attacker.jsonl")
+    game_plays_optimal= read_json("trajectories/2024-07-25_BaseAgent_Attacker_optimal.jsonl")
     for play in game_plays_optimal:
         play["model"] = "Optimal"
 
-    game_plays_extra_steps = read_json("trajectories/2024-07-25_BaseAgent_Attacker.jsonl")
+    game_plays_extra_steps = read_json("trajectories/2024-07-25_BaseAgent_Attacker_failed.jsonl")
     for play in game_plays_optimal:
         play["model"] = "Not-optimal"
     states = {}
     actions = {}
     edges_optimal = gameplay_graph(game_plays_optimal, states, actions,end_reason=END_REASON)
-    print("---------------------------")
     edges_not_optimal = gameplay_graph(game_plays_extra_steps, states, actions,end_reason=END_REASON)
     print(edges_optimal)
+    print(edges_not_optimal)
     state_to_id = {v:k for k,v in states.items()}
     action_to_id = {v:k for k,v in states.items()}
+
+    added, deleted = get_graph_modificiation(edges_optimal, edges_not_optimal)
+    print("added:", added)
+    print("deleted:", deleted)
+    get_graph_stats(edges_optimal, state_to_id, action_to_id)
+    get_graph_stats(edges_not_optimal, state_to_id, action_to_id)
     # print("optimal")
     # get_graph_stats(edges_optimal, state_to_id, action_to_id)
     # print("sub-optimal")

From 170acb5f4e3a2ad24c68964431669c0d54df4618 Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Fri, 26 Jul 2024 15:00:04 +0200
Subject: [PATCH 70/87] Separate behavioral graphs code

---
 utils/gamaplay_graphs.py | 117 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 117 insertions(+)
 create mode 100644 utils/gamaplay_graphs.py

diff --git a/utils/gamaplay_graphs.py b/utils/gamaplay_graphs.py
new file mode 100644
index 00000000..384414f8
--- /dev/null
+++ b/utils/gamaplay_graphs.py
@@ -0,0 +1,117 @@
+from trajectory_analysis import read_json
+import numpy as np
+import sys
+import os 
+import utils
+import argparse
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__) )))
+from env.game_components import GameState, Action, ActionType
+
+
+
+def gameplay_graph(game_plays:list, states, actions, end_reason=None)->tuple:
+    edges = {}
+    nodes_timestamps = {}
+    for play in game_plays:
+        if end_reason and play["end_reason"] not in end_reason:
+            continue
+        state = utils.state_as_ordered_string(GameState.from_dict(play["trajectory"]["states"][0]))
+        print(f'Trajectory len: {len(play["trajectory"]["actions"])}')
+        for i in range(1, len(play["trajectory"]["actions"])):
+            next_state = utils.state_as_ordered_string(GameState.from_dict(play["trajectory"]["states"][i]))
+            action = Action.from_dict((play["trajectory"]["actions"][i]))
+            if state not in states:
+                states[state] = len(states)
+            if next_state not in states:
+                states[next_state] = len(states)
+            if action not in actions:
+                actions[action] = len(actions)
+            if (states[state],states[next_state], actions[action]) not in edges:
+                edges[states[state],states[next_state], actions[action]] = 0
+            edges[states[state], states[next_state], actions[action]] += 1
+            if states[state] not in nodes_timestamps.keys():
+                nodes_timestamps[states[state]] = set()
+            nodes_timestamps[states[state]].add(i-1)
+            if states[next_state] not in nodes_timestamps.keys():
+                nodes_timestamps[states[next_state]] = set()
+            nodes_timestamps[states[next_state]].add(i)
+            state = next_state
+
+    return edges, nodes_timestamps
+
+def get_graph_stats(edge_list, states, actions)->tuple:
+    nodes = set()
+    edges = set()
+    simple_edges = set()
+    node_in_degree = {}
+    node_out_degree = {}
+    loop_edges = set()
+    for (src,dst,action) in edge_list:
+        nodes.add(src)
+        nodes.add(dst)
+        if src not in node_out_degree.keys():
+            node_out_degree[src] = 0
+        if dst not in node_in_degree.keys():
+            node_in_degree[dst] = 0
+        node_out_degree[src] += 1
+        node_in_degree[dst] += 1
+        simple_edges.add((src,dst))
+        edges.add((src,dst,action))
+        if src == dst:
+            loop_edges.add((src,dst,action))
+    print(f"# Nodes:{len(nodes)}")
+    print(f"# Edges:{len(edges)}")
+    print(f"# Simple:{len(simple_edges)}")
+    print(f"# loops:{len(loop_edges)}")
+    print(f"node IN-degree: {np.mean(list(node_in_degree.values()))}+-{np.std(list(node_in_degree.values()))}")
+    print(f"node OUT-degree: {np.mean(list(node_out_degree.values()))}+-{np.std(list(node_out_degree.values()))}")
+    return nodes, edges, simple_edges, node_in_degree, node_out_degree, loop_edges
+
+def node_set(edge_list)->set:
+    nodes = set()
+    for (src,dst,action) in edge_list:
+        nodes.add(src)
+        nodes.add(dst)
+    return nodes
+
+def get_graph_modificiation(edge_list1, edge_list2):
+    deleted_edges = set(edge_list1.keys())-set(edge_list2.keys())
+    added_edges = set(edge_list2.keys())-set(edge_list1.keys())
+    deleted_nodes = node_set(edge_list1) - node_set(edge_list2)
+    added_nodes = node_set(edge_list2) - node_set(edge_list1)
+
+    return added_edges, deleted_edges, added_nodes, deleted_nodes
+
+if __name__ == '__main__':
+
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--t1", help="Trajectory file #1", action='store', required=True)
+    parser.add_argument("--t2", help="Trajectory file #2", action='store', required=True)
+    parser.add_argument("--end_reason", help="Filter options for trajectories", default=None, type=str, action='store', required=False)
+    parser.add_argument("--n_trajectories", help="Limit of how many trajectories to use", action='store', default=1000, required=False)
+    
+    args = parser.parse_args()
+    trajectories1 = read_json(args.t1, max_lines=args.n_trajectories)
+    trajectories2 = read_json(args.t2, max_lines=args.n_trajectories)
+    states = {}
+    actions = {}
+    
+    graph_t1, g1_timestaps = gameplay_graph(trajectories1, states, actions,end_reason=args.end_reason)
+    graph_t2, g2_timestaps = gameplay_graph(trajectories2, states, actions,end_reason=args.end_reason)
+    
+    state_to_id = {v:k for k,v in states.items()}
+    action_to_id = {v:k for k,v in states.items()}
+
+    print(f"Trajectory 1: {args.t1}")
+    get_graph_stats(graph_t1, state_to_id, action_to_id)
+    print(f"Trajectory 2: {args.t2}")
+    get_graph_stats(graph_t2, state_to_id, action_to_id)
+
+    a_edges, d_edges, a_nodes, d_nodes = get_graph_modificiation(graph_t1, graph_t2)
+    print(len(a_edges), len(d_edges), len(a_nodes), len(d_nodes))
+    print("positions of same states:")
+    for node in node_set(graph_t1).intersection(node_set(graph_t2)):
+        print(g1_timestaps[node], g2_timestaps[node])
+        print("-----------------------")
\ No newline at end of file

From f8116370503874ed9d4305273534fab999c1c19c Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Fri, 26 Jul 2024 15:17:02 +0200
Subject: [PATCH 71/87] fix Shutdown on ctrl+C

---
 coordinator.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/coordinator.py b/coordinator.py
index 32aa5891..2aefcaca 100644
--- a/coordinator.py
+++ b/coordinator.py
@@ -69,7 +69,12 @@ async def start_tasks(self):
         stop = loop.create_future()
         
         # register the signal handler to the stopping event
-        loop.add_signal_handler(signal.SIGINT, stop.set_result, None)
+         # register the signal handler to the stopping event
+        def shutdown():
+            self.logger.info("Received exit signal, shutting down.")
+            stop.set_result(None)
+        loop.add_signal_handler(signal.SIGINT, shutdown)
+        loop.add_signal_handler(signal.SIGTERM, shutdown)
 
         await stop # Event that triggers stopping the AIDojo
         # Stop the server

From 5fbc889a3dfd2bb79f6e3ed83104cac4cc1952cb Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Tue, 10 Sep 2024 14:12:42 +0200
Subject: [PATCH 72/87] Do not work with zero-len trajectories

---
 env/netsecenv_conf.yaml  |  2 +-
 utils/gamaplay_graphs.py | 14 ++++++++------
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/env/netsecenv_conf.yaml b/env/netsecenv_conf.yaml
index 4ce2e3e3..4e855f74 100644
--- a/env/netsecenv_conf.yaml
+++ b/env/netsecenv_conf.yaml
@@ -98,7 +98,7 @@ env:
   # Or you can fix the seed
   # random_seed: 42
   scenario: 'scenario1'
-  max_steps: 100
+  max_steps: 50
   store_replay_buffer: False
   use_dynamic_addresses: False
   use_firewall: True
diff --git a/utils/gamaplay_graphs.py b/utils/gamaplay_graphs.py
index 384414f8..8ea80af1 100644
--- a/utils/gamaplay_graphs.py
+++ b/utils/gamaplay_graphs.py
@@ -16,8 +16,10 @@ def gameplay_graph(game_plays:list, states, actions, end_reason=None)->tuple:
     for play in game_plays:
         if end_reason and play["end_reason"] not in end_reason:
             continue
+        if len(play["trajectory"]["actions"]) == 0:
+            continue
         state = utils.state_as_ordered_string(GameState.from_dict(play["trajectory"]["states"][0]))
-        print(f'Trajectory len: {len(play["trajectory"]["actions"])}')
+        #print(f'Trajectory len: {len(play["trajectory"]["actions"])}')
         for i in range(1, len(play["trajectory"]["actions"])):
             next_state = utils.state_as_ordered_string(GameState.from_dict(play["trajectory"]["states"][i]))
             action = Action.from_dict((play["trajectory"]["actions"][i]))
@@ -110,8 +112,8 @@ def get_graph_modificiation(edge_list1, edge_list2):
     get_graph_stats(graph_t2, state_to_id, action_to_id)
 
     a_edges, d_edges, a_nodes, d_nodes = get_graph_modificiation(graph_t1, graph_t2)
-    print(len(a_edges), len(d_edges), len(a_nodes), len(d_nodes))
-    print("positions of same states:")
-    for node in node_set(graph_t1).intersection(node_set(graph_t2)):
-        print(g1_timestaps[node], g2_timestaps[node])
-        print("-----------------------")
\ No newline at end of file
+    print(f"AE:{len(a_edges)},DE:{len(d_edges)}, AN:{len(a_nodes)},DN:{len(d_nodes)}")
+    # print("positions of same states:")
+    # for node in node_set(graph_t1).intersection(node_set(graph_t2)):
+    #     print(g1_timestaps[node], g2_timestaps[node])
+    #     print("-----------------------")
\ No newline at end of file

From 14d2427f7c8445fac04b9d054797765503c2f6a7 Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Wed, 9 Oct 2024 17:00:50 +0200
Subject: [PATCH 73/87] Fix merging issues

---
 coordinator.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/coordinator.py b/coordinator.py
index 2aefcaca..bbee5d27 100644
--- a/coordinator.py
+++ b/coordinator.py
@@ -69,12 +69,7 @@ async def start_tasks(self):
         stop = loop.create_future()
         
         # register the signal handler to the stopping event
-         # register the signal handler to the stopping event
-        def shutdown():
-            self.logger.info("Received exit signal, shutting down.")
-            stop.set_result(None)
-        loop.add_signal_handler(signal.SIGINT, shutdown)
-        loop.add_signal_handler(signal.SIGTERM, shutdown)
+        loop.add_signal_handler(signal.SIGINT, stop.set_result, None)
 
         await stop # Event that triggers stopping the AIDojo
         # Stop the server
@@ -458,7 +453,7 @@ def _store_trajectory_to_file(self, agent_addr, location="./trajectories"):
                 writer.write(self._agent_trajectories[agent_addr])
             self.logger.info(f"Trajectory of {agent_addr} strored in {filename}")
     
-    def _reset_trajectory(self, agent_addr)->dict:
+    def _reset_trajectory(self,agent_addr)->dict:
         agent_name, agent_role = self.agents[agent_addr]
         self.logger.debug(f"Resetting trajectory of {agent_addr}")
         return {
@@ -671,4 +666,4 @@ def goal_dict_satistfied(goal_dict:dict, known_dict: dict)-> bool:
     # Create AI Dojo
     ai_dojo = AIDojo(host, port, task_config_file, world_type)
     # Run it!
-    ai_dojo.run()
+    ai_dojo.run()
\ No newline at end of file

From 49e5a25110dfedf0d8d9d708b8084faa95bfe872 Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Thu, 10 Oct 2024 22:00:48 +0200
Subject: [PATCH 74/87] add blocks to GameState ordered string

---
 utils/utils.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/utils/utils.py b/utils/utils.py
index eaba5497..36d09fc7 100644
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -57,6 +57,9 @@ def state_as_ordered_string(state:GameState)->str:
     ret += "},data:{"
     for host in sorted(state.known_data.keys()):
         ret += f"{host}:[{','.join([str(x) for x in sorted(state.known_data[host])])}]"
+    ret += "}, blocks:{"
+    for host in sorted(state.known_blocks.keys()):
+        ret += f"{host}:[{','.join([str(x) for x in sorted(state.known_blocks[host])])}]"
     ret += "}"
     return ret
 

From e905399d3d107c1c1a319b857d869e3905af52fa Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Wed, 16 Oct 2024 12:01:22 +0200
Subject: [PATCH 75/87] Set logging level to WARNING

---
 coordinator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/coordinator.py b/coordinator.py
index bbee5d27..5815f7d0 100644
--- a/coordinator.py
+++ b/coordinator.py
@@ -626,7 +626,7 @@ def goal_dict_satistfied(goal_dict:dict, known_dict: dict)-> bool:
         action="store",
         required=False,
         type=str,
-        default="INFO",
+        default="WARNING",
     )
 
     args = parser.parse_args()

From 305eaaeffbdcb17b23087019ef2b26a5afb23606 Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Wed, 16 Oct 2024 12:01:38 +0200
Subject: [PATCH 76/87] Add winrate to graphs

---
 utils/gamaplay_graphs.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/utils/gamaplay_graphs.py b/utils/gamaplay_graphs.py
index 8ea80af1..20179d72 100644
--- a/utils/gamaplay_graphs.py
+++ b/utils/gamaplay_graphs.py
@@ -13,11 +13,16 @@
 def gameplay_graph(game_plays:list, states, actions, end_reason=None)->tuple:
     edges = {}
     nodes_timestamps = {}
+    wins = []
     for play in game_plays:
         if end_reason and play["end_reason"] not in end_reason:
             continue
         if len(play["trajectory"]["actions"]) == 0:
             continue
+        if play["end_reason"] == "goal_reached":
+            wins.append(1)
+        else:
+            wins.append(0)
         state = utils.state_as_ordered_string(GameState.from_dict(play["trajectory"]["states"][0]))
         #print(f'Trajectory len: {len(play["trajectory"]["actions"])}')
         for i in range(1, len(play["trajectory"]["actions"])):
@@ -40,7 +45,7 @@ def gameplay_graph(game_plays:list, states, actions, end_reason=None)->tuple:
             nodes_timestamps[states[next_state]].add(i)
             state = next_state
 
-    return edges, nodes_timestamps
+    return edges, nodes_timestamps, np.mean(wins), np.std(wins)
 
 def get_graph_stats(edge_list, states, actions)->tuple:
     nodes = set()
@@ -100,15 +105,17 @@ def get_graph_modificiation(edge_list1, edge_list2):
     states = {}
     actions = {}
     
-    graph_t1, g1_timestaps = gameplay_graph(trajectories1, states, actions,end_reason=args.end_reason)
-    graph_t2, g2_timestaps = gameplay_graph(trajectories2, states, actions,end_reason=args.end_reason)
+    graph_t1, g1_timestaps, t1_wr_mean, t1_wr_std = gameplay_graph(trajectories1, states, actions,end_reason=args.end_reason)
+    graph_t2, g2_timestaps, t2_wr_mean, t2_wr_std = gameplay_graph(trajectories2, states, actions,end_reason=args.end_reason)
     
     state_to_id = {v:k for k,v in states.items()}
     action_to_id = {v:k for k,v in states.items()}
 
     print(f"Trajectory 1: {args.t1}")
+    print(f"WR={t1_wr_mean}±{t1_wr_std}")
     get_graph_stats(graph_t1, state_to_id, action_to_id)
     print(f"Trajectory 2: {args.t2}")
+    print(f"WR={t2_wr_mean}±{t2_wr_std}")
     get_graph_stats(graph_t2, state_to_id, action_to_id)
 
     a_edges, d_edges, a_nodes, d_nodes = get_graph_modificiation(graph_t1, graph_t2)

From a3670629cc6d03f10048cfd8701c7140d70ff183 Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Wed, 16 Oct 2024 12:02:08 +0200
Subject: [PATCH 77/87] add backward compatibility to GS.from_dict

---
 env/game_components.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/env/game_components.py b/env/game_components.py
index f5352f91..fbe09166 100755
--- a/env/game_components.py
+++ b/env/game_components.py
@@ -413,6 +413,10 @@ def as_dict(self)->dict:
 
     @classmethod
     def from_dict(cls, data_dict:dict):
+        if "known_blocks" in data_dict:
+            known_blocks = {IP(target_host):{IP(blocked_host["ip"]) for blocked_host in blocked_hosts} for target_host, blocked_hosts in data_dict["known_blocks"].items()}
+        else:
+            known_blocks = {}
         state = GameState(
             known_networks = {Network(x["ip"], x["mask"]) for x in data_dict["known_networks"]},
             known_hosts = {IP(x["ip"]) for x in data_dict["known_hosts"]},
@@ -420,7 +424,7 @@ def from_dict(cls, data_dict:dict):
             known_services = {IP(k):{Service(s["name"], s["type"], s["version"], s["is_local"])
                 for s in services} for k,services in data_dict["known_services"].items()},  
             known_data = {IP(k):{Data(v["owner"], v["id"]) for v in values} for k,values in data_dict["known_data"].items()},
-            known_blocks = {IP(target_host):{IP(blocked_host["ip"]) for blocked_host in blocked_hosts} for target_host, blocked_hosts in data_dict["known_blocks"].items()}
+            known_blocks = known_blocks
                 )
         return state
 

From 63b5ec16666026ed4cd447a0e99bfac76954fd86 Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Mon, 21 Oct 2024 17:02:43 +0200
Subject: [PATCH 78/87] Include only the required files in the Dockerfile copy

---
 .dockerignore | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
 create mode 100644 .dockerignore

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 00000000..d37ac870
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,18 @@
+.git
+.github
+.gitignore
+.gitmodules
+.pytest_cache
+.ruff_cache
+.vscode
+docs/
+figures/
+mlruns/
+tests/
+trajectories/
+NetSecGameAgents/
+notebooks/
+readme_images/
+tests/
+*trajectories*.json
+README.md
\ No newline at end of file

From 1ce2d6469da9d45ebb3555d4a579d3d866c25a01 Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Mon, 21 Oct 2024 17:03:05 +0200
Subject: [PATCH 79/87] Make sure to change the ip when createing the image

---
 Dockerfile | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 Dockerfile

diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 00000000..45677cae
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,29 @@
+# Use an official Python 3.12 runtime as a parent image
+FROM python:3.12-slim
+
+# Set the working directory in the container
+ENV DESTINATION_DIR=/aidojo
+
+
+# Install system dependencies
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    git \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+RUN pip install --upgrade pip
+
+COPY .  ${DESTINATION_DIR}/
+
+# Set the working directory in the container
+WORKDIR  ${DESTINATION_DIR}
+
+# Install any necessary Python dependencies
+# If a requirements.txt file is in the repository
+RUN if [ -f requirements.txt ]; then pip install --no-cache-dir -r requirements.txt; fi
+
+# change the server ip to 0.0.0.0
+RUN sed -i 's/"host": "127.0.0.1"/"host": "0.0.0.0"/' coordinator.conf
+
+# Run the Python script when the container launches
+CMD ["python3", "coordinator.py"]

From be9a4b87a4a2399bf2f7f0ee598a76634cecfad6 Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Tue, 22 Oct 2024 16:01:38 +0200
Subject: [PATCH 80/87] Fixed correct paths

---
 env/worlds/aidojo_world.py |  6 +++++-
 env/worlds/cyst_wrapper.py | 19 ++++++++++++++++---
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/env/worlds/aidojo_world.py b/env/worlds/aidojo_world.py
index 7ae13fe6..3a813ce3 100644
--- a/env/worlds/aidojo_world.py
+++ b/env/worlds/aidojo_world.py
@@ -1,6 +1,10 @@
 # Author Ondrej Lukas - ondrej.lukas@aic.fel.cvut.cz
 # Template of world to be used in AI Dojo
-import env.game_components as components
+import sys
+import os
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+import game_components as components
 import logging
 from utils.utils import ConfigParser
 
diff --git a/env/worlds/cyst_wrapper.py b/env/worlds/cyst_wrapper.py
index a772b5f7..02242dc1 100644
--- a/env/worlds/cyst_wrapper.py
+++ b/env/worlds/cyst_wrapper.py
@@ -1,6 +1,12 @@
 # Author Ondrej Lukas - ondrej.lukas@aic.fel.cvut.cz
-import env.game_components as components
-from env.worlds.aidojo_world import AIDojoWorld
+
+import sys
+import os
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import game_components as components
+from worlds.aidojo_world import AIDojoWorld
+from cyst.api.environment.environment import Environment 
 
 class CYSTWrapper(AIDojoWorld):
     """
@@ -39,4 +45,11 @@ def update_goal_dict(self, goal_dict:dict)->dict:
         """
         Takes the existing goal dict and updates it with respect to the world.
         """
-        raise NotImplementedError
\ No newline at end of file
+        raise NotImplementedError
+
+
+if __name__ == "__main__":
+    cyst_wrapper = CYSTWrapper("env/netsecenv_conf.yaml")
+    objects = cyst_wrapper.task_config.get_scenario()
+    print(objects)
+    #e = Environment.create().configure(target, attacker, router, exploit1, connection1, connection2)
\ No newline at end of file

From e500813826642eee49dfa8671a2d0f1936f4ee3d Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Wed, 23 Oct 2024 14:03:57 +0200
Subject: [PATCH 81/87] add init file to worlds

---
 env/worlds/__init__.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 env/worlds/__init__.py

diff --git a/env/worlds/__init__.py b/env/worlds/__init__.py
new file mode 100644
index 00000000..e69de29b

From 7d4d0de57eca2797a3e40d69fe590742c0843e4c Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Wed, 23 Oct 2024 15:58:47 +0200
Subject: [PATCH 82/87] fix path

---
 env/worlds/aidojo_world.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/env/worlds/aidojo_world.py b/env/worlds/aidojo_world.py
index 3a813ce3..a99ae501 100644
--- a/env/worlds/aidojo_world.py
+++ b/env/worlds/aidojo_world.py
@@ -4,7 +4,7 @@
 import os
 
 sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
-import game_components as components
+import env.game_components as components
 import logging
 from utils.utils import ConfigParser
 

From 05abd19eb4cfb1a0566f3585757f62c1213f3c5d Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Wed, 23 Oct 2024 18:17:01 +0200
Subject: [PATCH 83/87] Add detection functionality

---
 coordinator.py          | 23 +++++++++++++++++++----
 env/netsecenv_conf.yaml |  2 +-
 utils/utils.py          |  7 +++++++
 3 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/coordinator.py b/coordinator.py
index 5815f7d0..507d7eeb 100644
--- a/coordinator.py
+++ b/coordinator.py
@@ -183,7 +183,8 @@ def __init__(self, actions_queue, answers_queue, net_sec_config, allowed_roles,
                 self._world = AIDojoWorld(net_sec_config)
         self.world_type = world_type
         
-        #  
+        
+
         self._starting_positions_per_role = self._get_starting_position_per_role()
         self._win_conditions_per_role = self._get_win_condition_per_role()
         self._goal_description_per_role = self._get_goal_description_per_role()
@@ -203,6 +204,7 @@ def __init__(self, actions_queue, answers_queue, net_sec_config, allowed_roles,
         # goal reach status per agent_addr (bool)
         self._agent_goal_reached = {}
         self._agent_episode_ends = {}
+        self._agent_detected = {}
         # trajectories per agent_addr
         self._agent_trajectories = {}
     
@@ -302,8 +304,9 @@ def _initialize_new_player(self, agent_addr:tuple, agent_name:str, agent_role:st
         self._agent_starting_position[agent_addr] = self._starting_positions_per_role[agent_role]
         self._agent_states[agent_addr] = self._world.create_state_from_view(self._agent_starting_position[agent_addr])
         self._agent_goal_reached[agent_addr] = self._goal_reached(agent_addr) 
+        self._agent_detected[agent_addr] = self._check_detection(agent_addr, None) 
         self._agent_episode_ends[agent_addr] = False
-        if self._world.task_config.get_store_trajectories():
+        if self._world.task_config.get_store_trajectories() or self._world.task_config.get_use_global_defender():
             self._agent_trajectories[agent_addr] = self._reset_trajectory(agent_addr)
         self.logger.info(f"\tAgent {agent_name} ({agent_addr}), registred as {agent_role}")
         return Observation(self._agent_states[agent_addr], 0, False, {})
@@ -416,7 +419,8 @@ def _create_response_to_reset_game_action(self, agent_addr: tuple) -> dict:
             f"Coordinator responding to RESET request from agent {agent_addr}"
         )
         # store trajectory in file if needed
-        self._store_trajectory_to_file(agent_addr)
+        if self._world.task_config.get_store_trajectories():
+            self._store_trajectory_to_file(agent_addr)
         new_observation = Observation(self._agent_states[agent_addr], 0, self.episode_end, {})
         # reset trajectory
         self._agent_trajectories[agent_addr] = self._reset_trajectory(agent_addr)
@@ -483,6 +487,8 @@ def _process_generic_action(self, agent_addr: tuple, action: Action) -> dict:
             self._agent_states[agent_addr] = self._world.step(current_state, action, agent_addr)
             self._agent_goal_reached[agent_addr] = self._goal_reached(agent_addr)
 
+            self._agent_detected[agent_addr] = self._check_detection(agent_addr, action)
+
             reward = self._world._rewards["step"]
             obs_info = {}
             end_reason = None
@@ -495,6 +501,11 @@ def _process_generic_action(self, agent_addr: tuple, action: Action) -> dict:
                 self._agent_episode_ends[agent_addr] = True
                 obs_info = {"end_reason": "max_steps"}
                 end_reason = "max_steps"
+            elif self._agent_detected[agent_addr]:
+                reward += self._world._rewards["detection"]
+                self._agent_episode_ends[agent_addr] = True
+                obs_info = {"end_reason": "max_steps"}
+            
             # record step in trajecory
             self._add_step_to_trajectory(agent_addr, action, reward,self._agent_states[agent_addr], end_reason)
             new_observation = Observation(self._agent_states[agent_addr], reward, self.episode_end, info=obs_info)
@@ -584,6 +595,10 @@ def goal_dict_satistfied(goal_dict:dict, known_dict: dict)-> bool:
         self.logger.debug(f"\t{goal_reached}")
         return all(goal_reached.values())
 
+    def _check_detection(self, agent_addr:tuple, last_action:Action):
+        self.logger.info(f"Detection check for {agent_addr}({self.agents[agent_addr][1]})")
+        self.logger.info("\tNot detected!")
+        return False
 
 __version__ = "v0.2.1"
 
@@ -626,7 +641,7 @@ def goal_dict_satistfied(goal_dict:dict, known_dict: dict)-> bool:
         action="store",
         required=False,
         type=str,
-        default="WARNING",
+        default="INFO",
     )
 
     args = parser.parse_args()
diff --git a/env/netsecenv_conf.yaml b/env/netsecenv_conf.yaml
index 4e855f74..31403ecf 100644
--- a/env/netsecenv_conf.yaml
+++ b/env/netsecenv_conf.yaml
@@ -98,8 +98,8 @@ env:
   # Or you can fix the seed
   # random_seed: 42
   scenario: 'scenario1'
+  use_global_defender: True
   max_steps: 50
-  store_replay_buffer: False
   use_dynamic_addresses: False
   use_firewall: True
   save_trajectories: False
diff --git a/utils/utils.py b/utils/utils.py
index 36d09fc7..562925d0 100644
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -498,6 +498,13 @@ def get_use_firewall(self)->bool:
             use_firewall = False
         return use_firewall
 
+    def get_use_global_defender(self)->bool:
+        try:
+            use_firewall = self.config['env']['use_global_defender']
+        except KeyError:
+            use_firewall = False
+        return use_firewall
+    
 def get_logging_level(debug_level):
     """
     Configure logging level based on the provided debug_level string.

From 00cd35d80aac30cad611cbd0813ac9dd03e3dc0b Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Wed, 23 Oct 2024 23:50:13 +0200
Subject: [PATCH 84/87] Add option to have global defender

---
 coordinator.py         | 21 ++++++++----
 env/global_defender.py | 77 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 92 insertions(+), 6 deletions(-)
 create mode 100644 env/global_defender.py

diff --git a/coordinator.py b/coordinator.py
index 507d7eeb..29cb98f0 100644
--- a/coordinator.py
+++ b/coordinator.py
@@ -16,6 +16,7 @@
 from pathlib import Path
 import os
 import signal
+from env.global_defender import stochastic_with_threshold
 
 class AIDojo:
     def __init__(self, host: str, port: int, net_sec_config: str, world_type) -> None:
@@ -189,7 +190,7 @@ def __init__(self, actions_queue, answers_queue, net_sec_config, allowed_roles,
         self._win_conditions_per_role = self._get_win_condition_per_role()
         self._goal_description_per_role = self._get_goal_description_per_role()
         self._steps_limit = self._world.task_config.get_max_steps()
-
+        self._use_global_defender = self._world.task_config.get_use_global_defender()
         # player information
         self.agents = {}
         # step counter per agent_addr (int)
@@ -306,7 +307,7 @@ def _initialize_new_player(self, agent_addr:tuple, agent_name:str, agent_role:st
         self._agent_goal_reached[agent_addr] = self._goal_reached(agent_addr) 
         self._agent_detected[agent_addr] = self._check_detection(agent_addr, None) 
         self._agent_episode_ends[agent_addr] = False
-        if self._world.task_config.get_store_trajectories() or self._world.task_config.get_use_global_defender():
+        if self._world.task_config.get_store_trajectories() or self._use_global_defender:
             self._agent_trajectories[agent_addr] = self._reset_trajectory(agent_addr)
         self.logger.info(f"\tAgent {agent_name} ({agent_addr}), registred as {agent_role}")
         return Observation(self._agent_states[agent_addr], 0, False, {})
@@ -595,11 +596,19 @@ def goal_dict_satistfied(goal_dict:dict, known_dict: dict)-> bool:
         self.logger.debug(f"\t{goal_reached}")
         return all(goal_reached.values())
 
-    def _check_detection(self, agent_addr:tuple, last_action:Action):
+    def _check_detection(self, agent_addr:tuple, last_action:Action)->bool:
         self.logger.info(f"Detection check for {agent_addr}({self.agents[agent_addr][1]})")
-        self.logger.info("\tNot detected!")
-        return False
-
+        detection = False
+        if last_action:
+            if self._use_global_defender:
+                self.logger.warning("Global defender - ONLY use for backward compatibility!")
+                episode_actions = self._agent_trajectories[agent_addr]["actions"] if "actions" in self._agent_trajectories[agent_addr] else []
+                detection =  stochastic_with_threshold(last_action, episode_actions)
+        if detection:
+            self.logger.info("\tDetected!")
+        else:
+            self.logger.info("\tNot detected!")
+        return detection
 __version__ = "v0.2.1"
 
 
diff --git a/env/global_defender.py b/env/global_defender.py
new file mode 100644
index 00000000..177c6c14
--- /dev/null
+++ b/env/global_defender.py
@@ -0,0 +1,77 @@
+# Author: Ondrej Lukas - ondrej.lukas@aic.fel.cvut.cz
+from itertools import groupby
+from .game_components import ActionType, Action
+from random import random
+
+
+DEFAULT_DETECTION_PROBS = {
+    ActionType.ScanNetwork: 0.05,
+    ActionType.FindServices: 0.075,
+    ActionType.ExploitService: 0.1,
+    ActionType.FindData: 0.025,
+    ActionType.ExfiltrateData: 0.025,
+    ActionType.BlockIP:0
+}
+
+TW_RATIOS = {
+    ActionType.ScanNetwork: 0.25,
+    ActionType.FindServices: 0.3,
+    ActionType.ExploitService: 0.25,
+    ActionType.FindData: 0.5,
+    ActionType.ExfiltrateData: 0.25,
+    ActionType.BlockIP:1
+}
+
+CONSECUTIVE_THRESHOLD = {
+    ActionType.ScanNetwork: 2,
+    ActionType.FindServices: 3,
+    ActionType.ExfiltrateData: 2,
+}
+REPEATED_THRESHOLD = {
+    ActionType.ExploitService: 2,
+    ActionType.FindData: 2,
+}
+
+def stochastic(action_type:ActionType)->bool:
+    """
+    Simple random detection based on predefied probability and ActionType
+    """
+    roll = random()
+    return roll < DEFAULT_DETECTION_PROBS[action_type]
+
+def stochastic_with_threshold(action:Action, episode_actions:list, tw_size:int=5)-> bool:
+    """
+    Only detect based on set probabilities if pre-defined thresholds are crossed.
+    """
+    # extend the episode with the latest action
+    episode_actions.append(action)
+    if len(episode_actions) >= tw_size:
+        last_n_actions = episode_actions[-tw_size:]
+        last_n_action_types = [action.type for action in last_n_actions]
+        # compute ratio of action type in the TW
+        tw_ratio = last_n_action_types.count(action.type)/tw_size
+        # Count how many times this exact (parametrized) action was played in episode
+        repeats_in_episode = episode_actions.count(action)
+        # Compute Action type ration in the TW
+        tw_ratio = last_n_action_types.count(action.type)/tw_size
+        # compute the highest consecutive number of action type in TW
+        max_consecutive_action_type= max(sum(1 for item in grouped if item == action.type)
+                                            for _, grouped in groupby(last_n_action_types))
+        
+        if action.type in CONSECUTIVE_THRESHOLD.keys():
+            # ScanNetwork, FindServices, ExfiltrateData
+            if tw_ratio < TW_RATIOS[action.type] and max_consecutive_action_type < CONSECUTIVE_THRESHOLD[action.type]:
+                return False
+            else:
+                return stochastic(action.type)
+        elif action.type in REPEATED_THRESHOLD.keys():
+            # FindData, Exploit service
+            if tw_ratio < TW_RATIOS[action.type] and repeats_in_episode < REPEATED_THRESHOLD[action.type]:
+                return False
+            else:
+                return stochastic(action.type)
+        else: #Other actions - Do not detect
+            return False
+
+    else:
+        return False

From 2d77899a29b89eedfdb495bfca23356836da50f1 Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Wed, 23 Oct 2024 23:56:07 +0200
Subject: [PATCH 85/87] Disable global defender by default

---
 env/netsecenv_conf.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/env/netsecenv_conf.yaml b/env/netsecenv_conf.yaml
index 31403ecf..45675b2e 100644
--- a/env/netsecenv_conf.yaml
+++ b/env/netsecenv_conf.yaml
@@ -98,7 +98,7 @@ env:
   # Or you can fix the seed
   # random_seed: 42
   scenario: 'scenario1'
-  use_global_defender: True
+  use_global_defender: False
   max_steps: 50
   use_dynamic_addresses: False
   use_firewall: True

From 7e8b46c155951ccb356a8b303eb3becba46e5bef Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Thu, 24 Oct 2024 00:04:38 +0200
Subject: [PATCH 86/87] Add update for the global defender in the readme

---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 37cb8c4e..c89cda0c 100755
--- a/README.md
+++ b/README.md
@@ -64,7 +64,9 @@ The [scenarios](#definition-of-the-network-topology) define the **topology** of
 ### Actions for the defender
 The defender does have the action to block an IP address in a target host. 
 
-There is no global defender anymore as there was before, because now it is a multi-agent system.
+
+> [!NOTE]  
+> The global defender, which was available in the previous versions of the environment will not be supported in the fuutre. To enable backward compatibilty, the global defender functionality can be enabled by adding `use_global_defender: True` to the configuration YAML file in the `env` section. This option is disabled by default.
 
 The actions are:
 - BlockIP(). That takes as parameters:

From 8d69a1240588e5826c0a8f5663f641b3426533ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ond=C5=99ej=20Luk=C3=A1=C5=A1?= <ondrej.lukas95@gmail.com>
Date: Thu, 24 Oct 2024 00:09:38 +0200
Subject: [PATCH 87/87] Fix typos and grammar README.md

---
 README.md | 56 +++++++++++++++++++++++++++----------------------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/README.md b/README.md
index c89cda0c..e41404da 100755
--- a/README.md
+++ b/README.md
@@ -40,7 +40,7 @@ The NetSecGame environment has several components in the following files:
 
 - File `env/network_security_game.py` implements the game environment
 - File `env/game_components.py` implements a library with objects used in the environment. See [detailed explanation](docs/Components.md) of the game components.
-- File `utils/utils.py` is a collection of utils function which the agents can use
+- File `utils/utils.py` is a collection of utils functions which the agents can use
 - Files in the `env/scenarios` folder, such as `env/scenarios/scenario_configuration.py`. Implements the network game's configuration of hosts, data, services, and connections. It is taken from CYST.
 The [scenarios](#definition-of-the-network-topology) define the **topology** of a network (number of hosts, connections, networks, services, data, users, firewall rules, etc.) while the [task-configuration](#task-configuration) is to be used for definition of the exact task for the agent in one of the scenarios (with fix topology).
 - Agents compatible with the NetSecGame are located in a separate repository [NetSecGameAgents](https://github.com/stratosphereips/NetSecGameAgents/tree/main)
@@ -50,7 +50,7 @@ The [scenarios](#definition-of-the-network-topology) define the **topology** of
 2. If the attacker does a successful action in the same step that the defender successfully detects the action, the priority goes to the defender. The reward is a penalty, and the game ends.
 (From commit d6d4ac9, July 18th, 2024, the new action BlockIP removes controlled hosts from the state of others. So the state can get smaller)
 
-- The action FindServices finds the new services in a host. If in a subsequent call to FindServices there are less services, they completely replace the list of previous services found. That is, each list of services is the final one and no memory is retained of previous open services.
+- The action FindServices finds the new services in a host. If in a subsequent call to FindServices there are less services, they completely replace the list of previous services found. That is, each list of services is the final one, and no memory of previous open services is retained.
 
 ### Assumption and Conditions for Actions
 1. When playing the `ExploitService` action, it is expected that the agent has discovered this service before (by playing `FindServices` in the `target_host` before this action)
@@ -59,24 +59,24 @@ The [scenarios](#definition-of-the-network-topology) define the **topology** of
 4. Playing `ExfiltrateData` requires controlling **BOTH** source and target hosts
 5. Playing `Find Services` can be used to discover hosts (if those have any active services)
 6. Parameters of `ScanNetwork` and `FindServices` can be chosen arbitrarily (they don't have to be listed in `known_newtworks`/`known_hosts`)
-7. The `BlockIP` action needs its three parameters (Source host, Target host and Blocked host) to be in the controlled list of the Agent. 
+7. The `BlockIP` action needs its three parameters (Source host, Target host, and Blocked host) to be in the controlled list of the Agent. 
 
 ### Actions for the defender
 The defender does have the action to block an IP address in a target host. 
 
 
 > [!NOTE]  
-> The global defender, which was available in the previous versions of the environment will not be supported in the fuutre. To enable backward compatibilty, the global defender functionality can be enabled by adding `use_global_defender: True` to the configuration YAML file in the `env` section. This option is disabled by default.
+> The global defender, available in the previous environment versions, will not be supported in the future. To enable backward compatibility, the global defender functionality can be enabled by adding `use_global_defender: True` to the configuration YAML file in the `env` section. This option is disabled by default.
 
 The actions are:
 - BlockIP(). That takes as parameters:
   - "target_host": IP object where the block will be applied.
-  - "source_host": IP object where this actions is executed from.
+  - "source_host": IP object from which this action is executed.
   - "blocked_host": IP object to block in ANY direction as seen in the target_host.
 
 
 ### Starting the game
-The environment should be created prior strating the agents. The properties of the environment can be defined in a YAML file. The game server can be started by running:
+The environment should be created before starting the agents. The properties of the environment can be defined in a YAML file. The game server can be started by running:
 ```python3 coordinator.py```
 
 When created, the environment:
@@ -84,16 +84,16 @@ When created, the environment:
 2. loads the network configuration from the config file
 3. reads the defender type from the configuration
 4. creates starting position and goal position following the config file
-5. starts the game server in specified address and port
+5. starts the game server in a specified address and port
 
 ### Interaction with the Environment
-When the game server is created, [agents](https://github.com/stratosphereips/NetSecGameAgents/tree/main) connect to it and interact with the environment. In every step of the interaction, agents submits an [Action](./docs/Components.md#actions) and receives [Observation](./docs/Components.md#observations) with `next_state`, `reward`, `is_terminal`, `end`, and `info` values. Once the terminal state or timeout is reached, no more interaction is possible until the agent asks for game reset. Each agent should extend the `BaseAgent` class in [agents](https://github.com/stratosphereips/NetSecGameAgents/tree/main).
+When the game server is created, [agents](https://github.com/stratosphereips/NetSecGameAgents/tree/main) connect to it and interact with the environment. In every step of the interaction, agents submits an [Action](./docs/Components.md#actions) and receives [Observation](./docs/Components.md#observations) with `next_state`, `reward`, `is_terminal`, `end`, and `info` values. Once the terminal state or timeout is reached, no more interaction is possible until the agent asks for a game reset. Each agent should extend the `BaseAgent` class in [agents](https://github.com/stratosphereips/NetSecGameAgents/tree/main).
 
 
 ## Configuration
-The NetSecEnv is highly configurable in terms of the properties of the world, tasks and agent interacation. Modification of the world is done in the YAML configuration file in two main areas:
-1. Environment (`env` section) controls the properties of the world (taxonomy of networks, maximum allowed steps per episode, probabilities of success of actions etc.)
-2. Task configuration defines the agents properties (starting position, goal)
+The NetSecEnv is highly configurable in terms of the properties of the world, tasks, and agent interaction. Modification of the world is done in the YAML configuration file in two main areas:
+1. Environment (`env` section) controls the properties of the world (taxonomy of networks, maximum allowed steps per episode, probabilities of action success, etc.)
+2. Task configuration defines the agents' properties (starting position, goal)
 
 ### Environment configuration
 The environment part defines the properties of the environment for the task (see the example below). In particular:
@@ -102,11 +102,11 @@ The environment part defines the properties of the environment for the task (see
 - `max_steps` - sets the maximum number of steps an agent can make before an episode is terminated
 - `store_replay_buffer` - if `True`, interaction of the agents is serialized and stored in a file
 - `use_dynamic_addresses` - if `True`, the network and IP addresses defined in `scenario` are randomly changed at the beginning of **EVERY** episode (the network topology is kept as defined in the `scenario`. Relations between networks are kept, IPs inside networks are chosen at random based on the network IP and mask)
-- `  use_firewall` - if `True` firewall rules defined in `scenario` are used when executing actions. When `False`, firewall is ignored and all connections are allowed (Default)
+- `  use_firewall` - if `True` firewall rules defined in `scenario` are used when executing actions. When `False`, the firewall is ignored, and all connections are allowed (Default)
 - `goal_reward` - sets reward which agent gets when it reaches the goal (default 100)
-- `detection_reward` - sets reward which agent gets when it is detected (default -50)
+- `detection_reward` - sets the reward that which agent gets when it is detected (default -50)
 - `step_reward` - sets reward which agent gets for every step taken (default -1)
-- `actions` - defines probability of success for every ActionType
+- `actions` - defines the probability of success for every ActionType
 
 ```YAML
 env:
@@ -132,28 +132,28 @@ env:
 ```
 
 ## Task configuration
-The task configuration part (section `coordinator[agents]`) defines the starting and goal position of the attacker and type of defender that is used.
+The task configuration part (section `coordinator[agents]`) defines the starting and goal position of the attacker and the type of defender that is used.
 
 ### Attacker configuration (`attackers`)
 Configuration of the attacking agents. Consists of two parts:
-1. Goal definition (`goal`) which describes the `GameState` properties that must be fullfiled to award `goal_reward` to the attacker:
+1. Goal definition (`goal`) which describes the `GameState` properties that must be fulfilled to award `goal_reward` to the attacker:
     - `known_networks:`(set)
     - `known_hosts`(set)
     - `controlled_hosts`(set)
     - `known_services`(dict)
     - `known_data`(dict)
 
-     Each of the part can be empty (not part of the goal, exactly defined (e.g. `known_networks: [192.168.1.0/24, 192.168.3.0/24]`) or include keyword `random` (`controlled_hosts: [213.47.23.195, random]`, `known_data: {213.47.23.195: [random]}`.
-    Addtitionally  if `random` keyword is used int he goal definition, 
+     Each of the parts can be empty (not part of the goal, exactly defined (e.g., `known_networks: [192.168.1.0/24, 192.168.3.0/24]`) or include the keyword `random` (`controlled_hosts: [213.47.23.195, random]`, `known_data: {213.47.23.195: [random]}`.
+    Additionally,  if `random` keyword is used in the goal definition, 
     `randomize_goal_every_episode`. If set to `True`, each keyword `random` is replaced with a randomly selected, valid option at the beginning of **EVERY** episode. If set to `False`, randomization is performed only **once** when the environment is 
-2. Definiton of starting position (`start_position`) which describes the `GameState` in which the attacker starts. It consists of:
+2. Definition of starting position (`start_position`), which describes the `GameState` in which the attacker starts. It consists of:
     - `known_networks:`(set)
     - `known_hosts`(set)
     - `controlled_hosts`(set)
     - `known_services`(dict)
     - `known_data`(dict)
 
-    The initial network configuration must assign at least **one** controlled host to the attacker in the network. Any item in `controlled_hosts` is copied to `known_hosts` so there is no need to include these in both sets. `known_networks` is also extended with a set of **all** networks accessible from the `controlled_hosts`
+    The initial network configuration must assign at least **one** controlled host to the attacker in the network. Any item in `controlled_hosts` is copied to `known_hosts`, so there is no need to include these in both sets. `known_networks` is also extended with a set of **all** networks accessible from the `controlled_hosts`
 
 Example attacker configuration:
 ```YAML
@@ -174,7 +174,7 @@ agents:
       # The attacker must always at least control the CC if the goal is to exfiltrate there
       # Example of fixing the starting point of the agent in a local host
       controlled_hosts: [213.47.23.195, random]
-      # Services are defined as a target host where the service must be, and then a description in the form 'name,type,version,is_local'
+      # Services are defined as a target host where the service must be, and then a description in the form 'name, type, version, is_local'
       known_services: {}
       known_data: {}
       known_blocks: {}
@@ -182,7 +182,7 @@ agents:
 ### Defender configuration (`defenders`)
 Currently, the defender **is** a separate agent.
 
-If you want to have an defender in the game, you need to connect a defender agent. If you don't want to have a defender, just don't use any.
+If you want a defender in the game, you must connect a defender agent. For playing without a defender, leave the section empty.
 
 Example of defender configuration:
 ```YAML
@@ -207,7 +207,7 @@ Example of defender configuration:
         known_blocks: {}
 ```
 
-As in other agents, the description is only a text for the agent, so it can know what is supposed to do to win. In this example the goal of the defender is determined by a state where the known blocks can be applied in any router's firewall and must include all the controlled hosts of all the attackers. These are `magic` words that will push the coordinator to check these positions without reviling them to the defender.
+As in other agents, the description is only a text for the agent, so it can know what is supposed to do to win. In this example, the goal of the defender is determined by a state where the known blocks can be applied in any router's firewall and must include all the controlled hosts of all the attackers. These are `magic` words that will push the coordinator to check these positions without revealing them to the defender.
 
 
 ## Definition of the network topology
@@ -215,7 +215,7 @@ The network topology and rules are defined using a [CYST](https://pypi.org/proje
 
 - Server hosts (are a NodeConf in CYST)
     - Interfaces, each with one IP address
-    - Users that can login to the host
+    - Users that can log in to the host
     - Active and passive services
     - Data in the server
     - To which network is connected
@@ -238,22 +238,22 @@ The network topology and rules are defined using a [CYST](https://pypi.org/proje
 In the current state, we support a single scenario: Data exfiltration to a remote C&C server.
 
 #### Data exfiltration to a remote C&C
-For the data exfiltration we support 3 variants. The full scenario contains 5 clients (where the attacker can start) and 5 servers where the data which is supposed to be exfiltrated can be located. *scenario1_small* is a variant with a single client (attacker always starts there) and all 5 servers. *scenario1_tiny* contains only single server with data. The tiny scenario is trivial and intended only for debuggin purposes.
+For the data exfiltration we support 3 variants. The full scenario contains 5 clients (where the attacker can start) and 5 servers where the data that is supposed to be exfiltrated can be located. *scenario1_small* is a variant with a single client (the attacker always starts there) and all 5 servers. *scenario1_tiny* contains only a single server with data. The tiny scenario is trivial and intended only for debugging purposes.
 <table>
   <tr><th>Scenario 1</th><th>Scenario 1 - small</th><th>Scenario 1 -tiny</th></tr>
   <tr><td><img src="readme_images/scenario_1.png" alt="Scenario 1 - Data exfiltration" width="300"></td><td><img src="readme_images/scenario 1_small.png" alt="Scenario 1 - small" width="300"</td><td><img src="readme_images/scenario_1_tiny.png" alt="Scenario 1 - tiny" width="300"></td></tr>
 </table>
 
 ## Trajectory storing and analysis
-Trajectory is a sequence of GameStates, Actions and rewards in one run of a game. It contains the complete information of the actions played by the agent, the rewards observed and their effect on the state of the environment. Trajectory visualization and analysis tools are described in [Trajectory analysis tools](./docs/Trajectory_analysis.md)
+The trajectory is a sequence of GameStates, Actions, and rewards in one run of a game. It contains the complete information of the actions played by the agent, the rewards observed and their effect on the state of the environment. Trajectory visualization and analysis tools are described in [Trajectory analysis tools](./docs/Trajectory_analysis.md)
 
-Trajectories performed by the agents can be stored in a file using following configuration:
+Trajectories performed by the agents can be stored in a file using the following configuration:
 ```YAML
 env:
   save_trajectories: True
 ```
 > [!CAUTION]
-> Trajectory file can grow large very fast. It is recommended to use this feature on evaluation/testing runs only. By default this feature is not enabled.
+> Trajectory files can grow very fast. It is recommended to use this feature on evaluation/testing runs only. By default, this feature is not enabled.
 ## Testing the environment
 
 It is advised after every change you test if the env is running correctly by doing