diff --git a/examples/atari/README.md b/examples/atari/README.md index 06f3dc8a..956f839b 100644 --- a/examples/atari/README.md +++ b/examples/atari/README.md @@ -8,12 +8,19 @@ Then install auto-rom via: or: ```shell pip install autorom - AutoROM --accept-license ``` +or, if you can not download the ROMs, you can download them manually from [Google Drive](https://drive.google.com/file/d/1agerLX3fP2YqUCcAkMF7v_ZtABAOhlA7/view?usp=sharing). +Then, you can install the ROMs via: +```shell +pip install autorom +AutoROM --source-file +```` + + ## Usage ```shell -python train_ppo.py --config atari_ppo.yaml +python train_ppo.py ``` \ No newline at end of file diff --git a/examples/atari/atari_ppo.yaml b/examples/atari/atari_ppo.yaml index 8097cff6..d25ac301 100644 --- a/examples/atari/atari_ppo.yaml +++ b/examples/atari/atari_ppo.yaml @@ -2,22 +2,27 @@ seed: 0 lr: 2.5e-4 critic_lr: 2.5e-4 episode_length: 128 -ppo_epoch: 4 +gamma: 0.99 +ppo_epoch: 3 gain: 0.01 use_linear_lr_decay: true use_share_model: true entropy_coef: 0.01 hidden_size: 512 -num_mini_batch: 4 -clip_param: 0.1 +num_mini_batch: 8 +clip_param: 0.2 value_loss_coef: 0.5 +max_grad_norm: 10 + run_dir: ./run_results/ -experiment_name: atari_ppo + log_interval: 1 use_recurrent_policy: false use_valuenorm: true use_adv_normalize: true + wandb_entity: openrl-lab +experiment_name: atari_ppo vec_info_class: id: "EPS_RewardInfo" \ No newline at end of file diff --git a/examples/atari/train_ppo.py b/examples/atari/train_ppo.py index 5920e819..e6386ec3 100644 --- a/examples/atari/train_ppo.py +++ b/examples/atari/train_ppo.py @@ -43,11 +43,11 @@ def train(): cfg_parser = create_config_parser() - cfg = cfg_parser.parse_args() + cfg = cfg_parser.parse_args(["--config", "atari_ppo.yaml"]) # create environment, set environment parallelism to 9 env = make( - "ALE/Pong-v5", env_num=9, cfg=cfg, asynchronous=True, env_wrappers=env_wrappers + "ALE/Pong-v5", env_num=16, cfg=cfg, asynchronous=True, env_wrappers=env_wrappers ) # create the neural network @@ -56,7 +56,7 @@ def train(): env, cfg=cfg, device="cuda" if "macOS" not in get_system_info()["OS"] else "cpu" ) # initialize the trainer - agent = Agent(net, use_wandb=True) + agent = Agent(net, use_wandb=True, project_name="Pong-v5") # start training, set total number of training steps to 20000 agent.train(total_time_steps=5000000) diff --git a/openrl/configs/config.py b/openrl/configs/config.py index 49bdae79..fa189385 100644 --- a/openrl/configs/config.py +++ b/openrl/configs/config.py @@ -726,8 +726,8 @@ def create_config_parser(): parser.add_argument( "--max_grad_norm", type=float, - default=10.0, - help="max norm of gradients (default: 0.5)", + default=10, + help="max norm of gradients (default: 10)", ) parser.add_argument( "--use_gae", diff --git a/setup.py b/setup.py index 28cffd3c..79a16101 100644 --- a/setup.py +++ b/setup.py @@ -41,6 +41,7 @@ def get_install_requires() -> list: "mujoco", "tqdm", "Jinja2", + "pettingzoo", ]