forked from mfigura/Resilient-consensus-based-MARL
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
191 lines (168 loc) · 11.8 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
import os
import numpy as np
import gym
import argparse
import pickle
import pandas as pd
from gym import spaces
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Input, Model, Sequential, layers
from environments.grid_world import Grid_World
from agents.resilient_CAC_agents import RPBCAC_agent, RTMCAC_agent
from agents.adversarial_CAC_agents import Faulty_CAC_agent, Greedy_CAC_agent, Malicious_CAC_agent, Byzantine_CAC_agent
import training.train_agents as training
import neptune
'''
Cooperative navigation problem with resilient consensus and adversarial actor-critic agents
- This is a main file, where the user selects learning hyperparameters, environment parameters,
and neural network architecture for the actor, critic, and team reward estimates.
- The script triggers a training process whose results are passed to folder Simulation_results.
'''
if __name__ == '__main__':
'''USER-DEFINED PARAMETERS'''
parser = argparse.ArgumentParser(description='Provide parameters for training consensus AC agents')
parser.add_argument('--exp_name', type=str, default='')
parser.add_argument('--grid_size', type=int, default=10)
parser.add_argument('--scn', type=str, default='coop')
parser.add_argument('--n_agents',help='total number of agents',type=int,default=5)
#parser.add_argument('--agent_label', help='classification of each agent (Cooperative,Malicious,Faulty,Greedy)',type=str, default=['Cooperative','Cooperative','Cooperative','Cooperative','Malicious'])
parser.add_argument('--agent_label', nargs='+',help='classification of each agent (Cooperative,Malicious,Faulty,Greedy)', required=True)
parser.add_argument('--n_actions',help='size of action space of each agent',type=int,default=5)
parser.add_argument('--n_states',help='state dimension of each agent',type=int,default=2)
parser.add_argument('--n_episodes', help='number of episodes', type=int, default=10000)
parser.add_argument('--max_ep_len', help='Number of steps per episode', type=int, default=20)
parser.add_argument('--slow_lr', help='actor network learning rate',type=float, default=0.002)
parser.add_argument('--fast_lr', help='critic network learning rate',type=float, default=0.01)
parser.add_argument('--gamma', help='discount factor', type=float, default=0.9)
parser.add_argument('--H', help='max number of adversaries in the local neighborhood', type=int, default=1)
parser.add_argument('--eps', help='exploration noise',type=float,default=0.05)
parser.add_argument('--n_ep_fixed',help='number of episodes under a fixed policy',type=int,default=100)
parser.add_argument('--n_epochs',help='number of gradient steps in the critic and team reward updates',type=int,default=20)
parser.add_argument('--in_nodes',help='specify a list of neighbors that transmit values to each agent (include the index of the agent as the first element)',type=int,default=[[0,1,2,3],[1,2,3,4],[2,3,4,0],[3,4,0,1],[4,0,1,2]])
parser.add_argument('--randomize_state',help='Set to True if the agents start at random initial state in every episode',type=bool,default=True)
parser.add_argument('--scaling', help='Normalize states for training?', type = bool, default=True)
parser.add_argument('--resilient_method', help='Choose between trimmed mean and projection-based consensus', default='projection-based')
parser.add_argument('--summary_dir',help='Create a directory to save simulation results', default='./new_results/raw_data/')
parser.add_argument('--random_seed',help='Set random seed for the random number generator',type=int,default=20)
parser.add_argument('--save_every', type=int, default=100)
parser.add_argument('--common_reward',help='Set to True if the agents receive the team-average reward',default=False)
#parser.add_argument('--desired_state',help='desired state of the agents',type=int,default=np.random.randint(0,6,size=(4,2)))
#parser.add_argument('--initial_state',help='initial state of the agents',type=int,default=np.random.randint(0,6,size=(4,2)))
args = vars(parser.parse_args())
np.random.seed(args['random_seed'])
tf.random.set_seed(args['random_seed'])
args['desired_state'] = np.random.randint(0,args['grid_size'],size=(args['n_agents'],args['n_states']))
args['initial_state'] = np.random.randint(0,args['grid_size'],size=(args['n_agents'],args['n_states']))
#folder_path = os.path.join(os.getcwd(),'simulation_results/scenarios/' + args['resilient_method'] + '/' + args['agent_label'][-1] + '_h' + str(args['H']) + '/seed=' + str(args['random_seed']) + '/')
#if not os.path.isdir(folder_path):
# os.makedirs(folder_path)
gpus = tf.config.list_physical_devices('GPU')
if gpus:
# Restrict TensorFlow to only allocate 1GB of memory on the first GPU
try:
tf.config.set_logical_device_configuration(
gpus[0],
[tf.config.LogicalDeviceConfiguration(memory_limit=2048)])
logical_gpus = tf.config.list_logical_devices('GPU')
print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
except RuntimeError as e:
# Virtual devices must be set before GPUs have been initialized
print(e)
run = None
# Neptune client
try:
#import neptune
run = neptune.init_run(
name=args["exp_name"],
project="mpal/RPBCAC",
api_token="ENTER YOUR API TOKEN HERE",
)
except Exception as e:
print(e)
pass
else:
# Neptune Param log
run["parameters"] = args
path2save = args['summary_dir'] + args['resilient_method'] + '/' + args['scn'] + '/' + 'H={}'.format(args['H']) + '/' + 'seed={}'.format(args['random_seed']) + '/'
args['path2save'] = path2save
os.makedirs(path2save, exist_ok=True)
#----------------------------------------------------------------------------------------------------------------------------------------
'''NEURAL NETWORK ARCHITECTURE'''
agents = []
critic_template = keras.Sequential([
keras.layers.Dense(30, activation=keras.layers.LeakyReLU(alpha=0.1),input_shape=(args['n_agents']*args['n_states'],)),
keras.layers.Dense(30, activation=keras.layers.LeakyReLU(alpha=0.1)),
#keras.layers.Dense(30, activation=keras.layers.LeakyReLU(alpha=0.3)),
keras.layers.Dense(1)
])
team_reward_template = keras.Sequential([
keras.layers.Dense(30, activation=keras.layers.LeakyReLU(alpha=0.1),input_shape=(args['n_agents']*args['n_states']+args['n_agents'],)),
keras.layers.Dense(30, activation=keras.layers.LeakyReLU(alpha=0.1)),
#keras.layers.Dense(30, activation=keras.layers.LeakyReLU(alpha=0.3)),
keras.layers.Dense(1)
])
for node in range(args['n_agents']):
actor = keras.Sequential([
keras.layers.Dense(30, activation=keras.layers.LeakyReLU(alpha=0.1),input_shape=(args['n_agents']*args['n_states'],)),
keras.layers.Dense(30, activation=keras.layers.LeakyReLU(alpha=0.1)),
#keras.layers.Dense(30, activation=keras.layers.LeakyReLU(alpha=0.3)),
keras.layers.Dense(args['n_actions'], activation='softmax')
])
critic = keras.Sequential([
keras.layers.Dense(30, activation=keras.layers.LeakyReLU(alpha=0.1),input_shape=(args['n_agents']*args['n_states'],)),
keras.layers.Dense(30, activation=keras.layers.LeakyReLU(alpha=0.1)),
#keras.layers.Dense(30, activation=keras.layers.LeakyReLU(alpha=0.3)),
keras.layers.Dense(1)
])
critic2 = keras.Sequential([
keras.layers.Dense(30, activation=keras.layers.LeakyReLU(alpha=0.1),input_shape=(args['n_agents']*args['n_states'],)),
keras.layers.Dense(30, activation=keras.layers.LeakyReLU(alpha=0.1)),
#keras.layers.Dense(30, activation=keras.layers.LeakyReLU(alpha=0.3)),
keras.layers.Dense(1)
])
team_reward = keras.Sequential([
keras.layers.Dense(30, activation=keras.layers.LeakyReLU(alpha=0.1),input_shape=(args['n_agents']*args['n_states']+args['n_agents'],)),
keras.layers.Dense(30, activation=keras.layers.LeakyReLU(alpha=0.1)),
#keras.layers.Dense(30, activation=keras.layers.LeakyReLU(alpha=0.3)),
keras.layers.Dense(1)
])
critic.set_weights(critic_template.get_weights())
critic2.set_weights(critic_template.get_weights())
team_reward.set_weights(team_reward_template.get_weights())
if args['agent_label'][node] == 'Malicious': #create a malicious agent
print("This is a malicious agent")
agents.append(Malicious_CAC_agent(actor,critic,critic2,team_reward,slow_lr = args['slow_lr'],fast_lr = args['fast_lr'],gamma = args['gamma']))
elif args['agent_label'][node] == 'Faulty': #create a faulty agent
print("This is a faulty agent")
agents.append(Faulty_CAC_agent(actor,critic,team_reward,slow_lr = args['slow_lr'],gamma = args['gamma']))
elif args['agent_label'][node] == 'Greedy': #create a greedy agent
print("This is a greedy agent")
agents.append(Greedy_CAC_agent(actor,critic,team_reward,slow_lr = args['slow_lr'],fast_lr = args['fast_lr'],gamma = args['gamma']))
elif args['agent_label'][node] == 'Byzantine': #create a greedy agent
print("This is a Byzantine agent")
agents.append(Byzantine_CAC_agent(actor,critic,critic2,team_reward,slow_lr = args['slow_lr'],fast_lr = args['fast_lr'],gamma = args['gamma']))
elif args['agent_label'][node] == 'Cooperative': #create a cooperative agent
if args['resilient_method'] == 'projection-based':
print("This is an RPBCAC agent") #create a cooperative agent
agents.append(RPBCAC_agent(actor,critic,team_reward,slow_lr = args['slow_lr'],fast_lr = args['fast_lr'],gamma = args['gamma'],H = args['H']))
elif args['resilient_method'] == 'trimmed-mean':
print("This is an RTMCAC agent")
agents.append(RTMCAC_agent(actor,critic,team_reward,slow_lr = args['slow_lr'],fast_lr = args['fast_lr'],gamma = args['gamma'],H = args['H']))
print(args)
#---------------------------------------------------------------------------------------------------------------------------------------------
'''TRAIN AGENTS'''
env = Grid_World(nrow=args['grid_size'],
ncol=args['grid_size'],
n_agents=args['n_agents'],
desired_state=args['desired_state'],
initial_state=args['initial_state'],
randomize_state=args['randomize_state'],
scaling=args['scaling']
)
if args['resilient_method'] == 'projection-based':
trained_agents,sim_data = training.train_RPBCAC(env,agents,args,run)
else:
trained_agents,sim_data = training.train_RTMCAC(env,agents,args,run)
#----------------------------------------------------------------------------------------------------
sim_data.to_pickle(args['path2save'] + "/" + "sim_data.pkl")