diff --git a/openrl/envs/common/build_envs.py b/openrl/envs/common/build_envs.py index 0893400a..76f4b35b 100644 --- a/openrl/envs/common/build_envs.py +++ b/openrl/envs/common/build_envs.py @@ -2,6 +2,7 @@ import inspect from typing import Callable, Iterable, List, Optional, Union +import gymnasium as gym from gymnasium import Env from openrl.envs.wrappers.base_wrapper import BaseWrapper @@ -33,7 +34,7 @@ def _make_env() -> Env: if need_env_id: new_kwargs["env_id"] = env_id new_kwargs["env_num"] = env_num - if id.startswith("ALE/"): + if id.startswith("ALE/") or id in gym.envs.registry.keys(): new_kwargs.pop("cfg", None) env = make( diff --git a/openrl/modules/vdn_module.py b/openrl/modules/vdn_module.py index 32987372..10a9b541 100644 --- a/openrl/modules/vdn_module.py +++ b/openrl/modules/vdn_module.py @@ -68,6 +68,8 @@ def __init__( device=device, ) self.cfg = cfg + self.obs_space = input_space + self.act_space = act_space def lr_decay(self, episode, episodes): update_linear_schedule(self.optimizers["q_net"], episode, episodes, self.lr) diff --git a/tests/test_examples/test_train_gail.py b/tests/test_examples/test_train_gail.py new file mode 100644 index 00000000..656ff2d0 --- /dev/null +++ b/tests/test_examples/test_train_gail.py @@ -0,0 +1,75 @@ +"""""" + +import os +import sys + +import pytest + +from openrl.configs.config import create_config_parser +from openrl.envs.common import make +from openrl.envs.vec_env.wrappers.gen_data import GenDataWrapper +from openrl.envs.wrappers.extra_wrappers import ZeroRewardWrapper +from openrl.envs.wrappers.monitor import Monitor +from openrl.modules.common import GAILNet as Net +from openrl.modules.common import PPONet +from openrl.runners.common import GAILAgent as Agent +from openrl.runners.common import PPOAgent + + +@pytest.fixture(scope="function") +def gen_data(tmpdir): + tmp_data_path = os.path.join(tmpdir, "data.pkl") + env_wrappers = [ + Monitor, + ] + print("generate data....") + env = make( + "CartPole-v1", + env_num=2, + asynchronous=True, + env_wrappers=env_wrappers, + ) + agent = PPOAgent(PPONet(env)) + env = GenDataWrapper(env, data_save_path=tmp_data_path, total_episode=5) + obs, info = env.reset() + done = False + while not done: + # Based on environmental observation input, predict next action. + action, _ = agent.act(obs, deterministic=True) + obs, r, done, info = env.step(action) + env.close() + print("generate data done!") + return tmp_data_path + + +@pytest.fixture( + scope="function", params=[" --gail_use_action false", " --gail_use_action true"] +) +def config(request, gen_data): + input_str = ( + "--episode_length 5 --use_recurrent_policy true --use_joint_action_loss true" + " --use_valuenorm true --use_adv_normalize true --reward_class.id GAILReward" + ) + input_str += request.param + input_str += " --expert_data " + gen_data + cfg_parser = create_config_parser() + cfg = cfg_parser.parse_args(input_str.split()) + return cfg + + +@pytest.mark.unittest +def test_train_gail(config): + env = make("CartPole-v1", env_num=2, cfg=config, env_wrappers=[ZeroRewardWrapper]) + + net = Net( + env, + cfg=config, + ) + # initialize the trainer + agent = Agent(net) + agent.train(total_time_steps=200) + env.close() + + +if __name__ == "__main__": + sys.exit(pytest.main(["-sv", os.path.basename(__file__)])) diff --git a/tests/test_modules/test_common/test_ddpg_net.py b/tests/test_modules/test_common/test_ddpg_net.py new file mode 100644 index 00000000..a4c03354 --- /dev/null +++ b/tests/test_modules/test_common/test_ddpg_net.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright 2023 The OpenRL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""""" + +import os +import sys + +import pytest + +from openrl.configs.config import create_config_parser +from openrl.envs.common import make +from openrl.envs.wrappers.extra_wrappers import AddStep +from openrl.modules.common import DDPGNet as Net +from openrl.runners.common import DDPGAgent as Agent + +env_wrappers = [AddStep] + + +@pytest.fixture(scope="module", params=[""]) +def config(request): + cfg_parser = create_config_parser() + cfg = cfg_parser.parse_args(request.param.split()) + return cfg + + +def train(Agent, Net, env_name, env_num, total_time_steps, config): + cfg = config + env = make(env_name, env_num=env_num, cfg=cfg, env_wrappers=env_wrappers) + + net = Net( + env, + cfg=cfg, + ) + # initialize the trainer + agent = Agent(net) + # start training, set total number of training steps to 20000 + agent.train(total_time_steps=total_time_steps) + env.close() + + +@pytest.mark.unittest +def test_ddpg_net(config): + train(Agent, Net, "IdentityEnvcontinuous", 2, 100, config) + + +if __name__ == "__main__": + sys.exit(pytest.main(["-sv", os.path.basename(__file__)])) diff --git a/tests/test_modules/test_common/test_dqn_net.py b/tests/test_modules/test_common/test_dqn_net.py new file mode 100644 index 00000000..292c08b4 --- /dev/null +++ b/tests/test_modules/test_common/test_dqn_net.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright 2023 The OpenRL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""""" + +import os +import sys + +import pytest + +from openrl.configs.config import create_config_parser +from openrl.envs.common import make +from openrl.envs.wrappers.extra_wrappers import AddStep +from openrl.modules.common import DQNNet as Net +from openrl.runners.common import DQNAgent as Agent + +env_wrappers = [AddStep] + + +@pytest.fixture(scope="module", params=[""]) +def config(request): + cfg_parser = create_config_parser() + cfg = cfg_parser.parse_args(request.param.split()) + return cfg + + +def train(Agent, Net, env_name, env_num, total_time_steps, config): + cfg = config + env = make(env_name, env_num=env_num, cfg=cfg, env_wrappers=env_wrappers) + + net = Net( + env, + cfg=cfg, + ) + # initialize the trainer + agent = Agent(net) + # start training, set total number of training steps to 20000 + agent.train(total_time_steps=total_time_steps) + env.close() + + +@pytest.mark.unittest +def test_dqn_net(config): + train(Agent, Net, "IdentityEnv", 2, 100, config) + + +if __name__ == "__main__": + sys.exit(pytest.main(["-sv", os.path.basename(__file__)])) diff --git a/tests/test_modules/test_common/test_sac_net.py b/tests/test_modules/test_common/test_sac_net.py new file mode 100644 index 00000000..8839986e --- /dev/null +++ b/tests/test_modules/test_common/test_sac_net.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright 2023 The OpenRL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""""" + +import os +import sys + +import pytest + +from openrl.configs.config import create_config_parser +from openrl.envs.common import make +from openrl.envs.wrappers.extra_wrappers import AddStep +from openrl.modules.common import SACNet as Net +from openrl.runners.common import SACAgent as Agent + +env_wrappers = [AddStep] + + +@pytest.fixture(scope="module", params=[""]) +def config(request): + cfg_parser = create_config_parser() + cfg = cfg_parser.parse_args(request.param.split()) + return cfg + + +def train(Agent, Net, env_name, env_num, total_time_steps, config): + cfg = config + env = make(env_name, env_num=env_num, cfg=cfg, env_wrappers=env_wrappers) + + net = Net( + env, + cfg=cfg, + ) + # initialize the trainer + agent = Agent(net) + # start training, set total number of training steps to 20000 + agent.train(total_time_steps=total_time_steps) + env.close() + + +@pytest.mark.unittest +def test_sac_net(config): + train(Agent, Net, "IdentityEnvcontinuous", 2, 100, config) + + +if __name__ == "__main__": + sys.exit(pytest.main(["-sv", os.path.basename(__file__)])) diff --git a/tests/test_modules/test_common/test_vdn_net.py b/tests/test_modules/test_common/test_vdn_net.py new file mode 100644 index 00000000..29f1f58f --- /dev/null +++ b/tests/test_modules/test_common/test_vdn_net.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright 2023 The OpenRL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""""" + +import os +import sys + +import pytest + +from openrl.configs.config import create_config_parser +from openrl.envs.common import make +from openrl.envs.wrappers.mat_wrapper import MATWrapper +from openrl.modules.common import VDNNet +from openrl.runners.common import VDNAgent as Agent + + +@pytest.fixture(scope="module", params=[""]) +def config(request): + cfg_parser = create_config_parser() + cfg = cfg_parser.parse_args(request.param.split()) + return cfg + + +@pytest.mark.unittest +def test_vdn_net(config): + env_num = 2 + env = make( + "simple_spread", + env_num=env_num, + asynchronous=True, + ) + env = MATWrapper(env) + + net = VDNNet(env, cfg=config) + # initialize the trainer + agent = Agent(net) + # start training + agent.train(total_time_steps=100) + env.close() + + +if __name__ == "__main__": + sys.exit(pytest.main(["-sv", os.path.basename(__file__)]))