From 2260821e96d0d83fa0395183f4252728a15edbaf Mon Sep 17 00:00:00 2001 From: mkghaas Date: Wed, 2 Sep 2020 23:50:48 -0400 Subject: [PATCH 1/8] added custom logging to PPO2 and VecMonitor --- roam_rl/baselines/ppo.py | 7 +++++-- roam_rl/baselines/utils/vec_env_maker.py | 4 ++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/roam_rl/baselines/ppo.py b/roam_rl/baselines/ppo.py index 30f7e40..317e400 100644 --- a/roam_rl/baselines/ppo.py +++ b/roam_rl/baselines/ppo.py @@ -36,6 +36,9 @@ def __init__(self, config, section): self.seed = config.getint(section, 'seed') + info_keywords_str = config.get(section, 'info_keywords', fallback='') + self.info_keywords = eval('("'+info_keywords_str+'",)') + def _get_parameter_descr_dict(self): """ @@ -70,11 +73,11 @@ def learn(self, model_path=None): logdir = utils.get_log_dir(self.experiment_dir, self.seed) # setup ppo logging logger.configure(dir=logdir, format_strs=['stdout', 'log', 'csv', 'tensorboard']) monitor_file_path = os.path.join(logdir, 'monitor.csv') - env = self.vec_env_maker(self.env_maker, self.seed, monitor_file=monitor_file_path) + env = self.vec_env_maker(self.env_maker, self.seed, monitor_file=monitor_file_path, info_keywords=self.info_keywords) # Learn # pylint: disable=E1125 - model = self._learn(env=env, **self.params, seed=self.seed, load_path=model_path) # learn model + model = self._learn(env=env, **self.params, seed=self.seed, load_path=model_path, extra_keys=self.info_keywords) # learn model # Save model.save(utils.get_model_path(self.experiment_dir, self.seed)) diff --git a/roam_rl/baselines/utils/vec_env_maker.py b/roam_rl/baselines/utils/vec_env_maker.py index d6e131d..54e9323 100644 --- a/roam_rl/baselines/utils/vec_env_maker.py +++ b/roam_rl/baselines/utils/vec_env_maker.py @@ -26,7 +26,7 @@ def __init__(self, config, section): self.normalize_obs = config.getboolean(section, 'normalize_obs', fallback=False) self.normalize_ret = config.getboolean(section, 'normalize_ret', fallback=False) - def __call__(self, env_maker, seed=None, monitor_file=None): + def __call__(self, env_maker, seed=None, monitor_file=None, info_keywords=()): """ :param env_maker: instance of roam_learning.robot_env.EnvMaker :param seed: int that is used to generate seeds for vectorized envs @@ -48,7 +48,7 @@ def __call__(self, env_maker, seed=None, monitor_file=None): # Monitor the envs before normalization if monitor_file is not None: - envs = VecMonitor(envs, filename=monitor_file) + envs = VecMonitor(envs, filename=monitor_file, info_keywords=info_keywords) if self.normalize_obs or self.normalize_ret: envs = VecNormalize(envs, ob=self.normalize_obs, ret=self.normalize_ret, use_tf=True) return envs From 5ecea91417279bd33d2a88d68ec2686996288fab Mon Sep 17 00:00:00 2001 From: Maximilian Haas-Heger Date: Fri, 4 Sep 2020 13:20:00 -0400 Subject: [PATCH 2/8] fixed bug (#18) --- roam_rl/baselines/ppo.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/roam_rl/baselines/ppo.py b/roam_rl/baselines/ppo.py index 317e400..dc419ee 100644 --- a/roam_rl/baselines/ppo.py +++ b/roam_rl/baselines/ppo.py @@ -36,8 +36,11 @@ def __init__(self, config, section): self.seed = config.getint(section, 'seed') - info_keywords_str = config.get(section, 'info_keywords', fallback='') - self.info_keywords = eval('("'+info_keywords_str+'",)') + info_keywords_str = config.get(section, 'info_keywords', fallback=None) + if info_keywords_str: + self.info_keywords = eval('("'+info_keywords_str+'",)') + else: + self.info_keywords = () def _get_parameter_descr_dict(self): From 67d081c1ce729ae291ab8068e8e48d411bb29809 Mon Sep 17 00:00:00 2001 From: Gagan Khandate Date: Sat, 17 Oct 2020 01:19:48 -0400 Subject: [PATCH 3/8] In hand manipulation merge master (#22) * removed roam_robot_world (#17) * Welcome garage (#19) * added Dockerfile for garage * added configurable SAC and training script * added parallel sampling * do not copy mjkey * Added wrappers / script for HIPPO (#20) * added Dockerfile for garage * added configurable SAC and training script * added parallel sampling * do not copy mjkey * added wrapper for hippo * set learn method * fixed bug in hippo and generalized training script * rm exec permissions * rename learn to train * ~one script to train them all~ * install baselines-hippo in docker image --- Dockerfile.garage | 96 ++++++++++++++ Makefile | 1 + README.md | 6 +- roam_rl/baselines/hippo.py | 27 ++++ roam_rl/baselines/ppo.py | 4 +- roam_rl/garage/sac.py | 122 ++++++++++++++++++ .../baselines/configs/hippo_fetch_reach.cfg | 38 ++++++ .../configs/train_ppo_point_robot_env.cfg | 71 ---------- .../train_ppo_point_robot_goal_env.cfg | 80 ------------ .../{plot_training_curves.py => plot.py} | 0 .../{run_ppo_policy.py => run_policy.py} | 9 +- scripts/garage/halfcheetah.cfg | 23 ++++ scripts/garage/run_policy.py | 75 +++++++++++ .../train_ppo.py => garage/train.py} | 14 +- scripts/train.py | 26 ++++ 15 files changed, 423 insertions(+), 169 deletions(-) create mode 100644 Dockerfile.garage create mode 100644 roam_rl/baselines/hippo.py create mode 100644 roam_rl/garage/sac.py create mode 100644 scripts/baselines/configs/hippo_fetch_reach.cfg delete mode 100644 scripts/baselines/configs/train_ppo_point_robot_env.cfg delete mode 100644 scripts/baselines/configs/train_ppo_point_robot_goal_env.cfg rename scripts/baselines/{plot_training_curves.py => plot.py} (100%) rename scripts/baselines/{run_ppo_policy.py => run_policy.py} (82%) create mode 100644 scripts/garage/halfcheetah.cfg create mode 100644 scripts/garage/run_policy.py rename scripts/{baselines/train_ppo.py => garage/train.py} (78%) mode change 100644 => 100755 create mode 100644 scripts/train.py diff --git a/Dockerfile.garage b/Dockerfile.garage new file mode 100644 index 0000000..11fdbaf --- /dev/null +++ b/Dockerfile.garage @@ -0,0 +1,96 @@ +# To be used as a base image for your project. In your project's image +# make sure you place your MuJoCo key at /root/.mujoco/ + +FROM ubuntu:18.04 + +# http://bugs.python.org/issue19846 +# > At the moment, setting "LANG=C" on a Linux system *fundamentally breaks Python 3*, and that's not OK. +ENV LANG C.UTF-8 + +# apt dependencies +RUN \ + apt-get -y -q update && \ + # Prevents debconf from prompting for user input + # See https://github.com/phusion/baseimage-docker/issues/58 + DEBIAN_FRONTEND=noninteractive apt-get install -y \ + # Dockerfile deps + wget \ + unzip \ + git \ + curl \ + # For building glfw + cmake \ + xorg-dev \ + # mujoco_py + # See https://github.com/openai/mujoco-py/blob/master/Dockerfile + # 18.04 repo is old, install glfw from source instead + # libglfw3 \ + libglew-dev \ + libosmesa6-dev \ + patchelf \ + # OpenAI baselines + libopenmpi-dev \ + # virtualenv + python3 \ + python3-pip \ + python3-tk \ + python3-virtualenv && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Build GLFW because the Ubuntu 18.04 version is too old +# See https://github.com/glfw/glfw/issues/1004 +RUN apt-get purge -y -v libglfw* +RUN wget https://github.com/glfw/glfw/releases/download/3.3/glfw-3.3.zip && \ + unzip glfw-3.3.zip && \ + rm glfw-3.3.zip && \ + cd glfw-3.3 && \ + mkdir glfw-build && \ + cd glfw-build && \ + cmake -DBUILD_SHARED_LIBS=ON -DGLFW_BUILD_EXAMPLES=OFF -DGLFW_BUILD_TESTS=OFF -DGLFW_BUILD_DOCS=OFF .. && \ + make -j"$(nproc)" && \ + make install && \ + cd ../../ && \ + rm -rf glfw + +# MuJoCo 2.0 (for dm_control) +RUN mkdir -p /root/.mujoco && \ + wget https://www.roboti.us/download/mujoco200_linux.zip -O mujoco.zip && \ + unzip mujoco.zip -d $HOME/.mujoco && \ + rm mujoco.zip && \ + ln -s $HOME/.mujoco/mujoco200_linux $HOME/.mujoco/mujoco200 + ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/root/.mujoco/mujoco200/bin + +# Copy over just setup.py first, so the Docker cache doesn't expire until +# dependencies change +# +# Files needed to run setup.py +# - README.md +# - roam_rl/__init__.py +# - setup.py +COPY README.md /root/code/roam_rl/README.md +COPY roam_rl/__init__.py /root/code/roam_rl/roam_rl/__init__.py +COPY setup.py /root/code/roam_rl/setup.py +WORKDIR /root/code/roam_rl + +# Create virtualenv +ENV VIRTUAL_ENV=/root/venv +RUN python3 -m virtualenv --python=/usr/bin/python3 $VIRTUAL_ENV +ENV PATH="$VIRTUAL_ENV/bin:$PATH" + +# Prevent pip from complaining about available upgrades +RUN pip install --upgrade pip + +# Install pip dependencies +# Note: Empty license file for installing mujoco_py +RUN touch /root/.mujoco/mjkey.txt && pip install mujoco_py && rm /root/.mujoco/mjkey.txt && \ + pip install git+https://git@github.com/rlworkgroup/garage@v2020.06.3#egg=garage&& \ + pip install git+https://git@github.com/roamlab/confac@master#egg=confac && \ + pip install git+https://git@github.com/roamlab/roam_env@master#egg=roam_env && \ + pip install -e . && \ + rm -r /root/.cache/pip + +COPY . /root/code/roam_rl/ + +CMD /bin/bash + diff --git a/Makefile b/Makefile index bd917e1..c6df36a 100644 --- a/Makefile +++ b/Makefile @@ -31,6 +31,7 @@ common: pip install git+https://git@github.com/roamlab/confac@master#egg=confac pip install git+https://git@github.com/roamlab/roam_env@master#egg=roam_env pip install --force-reinstall git+https://git@github.com/openai/baselines@master#egg=baselines + pip install --force-reinstall git+https://git@github.com/roamlab/baselines-hippo@master#egg=baselines-hippo pip install -e . gpu: test-env diff --git a/README.md b/README.md index b2d3962..b7779c3 100644 --- a/README.md +++ b/README.md @@ -10,9 +10,7 @@ If you need to setup apt dependencies and install MuJoCo, run `sudo make setup`. For installing the pip dependencies, activate your virtual environment and then run `make default` or just `make`. If your machine is configured to use NVIDIA GPU you can run `make gpu` instead to make use of the GPU. -### Testing +### Testing For a quick test you can run -`python scripts/baselines/train_ppo.py scripts/baselines/configs/train_ppo_acrobat.cfg` - -Install `roam_robot_worlds` to run config files `configs/train_ppo_point_robot_env.cfg` and `configs/train_ppo_point_robot_goal_env.cfg`. +`python scripts/baselines/train_ppo.py scripts/baselines/configs/train_ppo_acrobat.cfg` \ No newline at end of file diff --git a/roam_rl/baselines/hippo.py b/roam_rl/baselines/hippo.py new file mode 100644 index 0000000..9614a3b --- /dev/null +++ b/roam_rl/baselines/hippo.py @@ -0,0 +1,27 @@ +from roam_rl.baselines.ppo import PPO +from hippo import hippo +from hippo.hippo import extract_reward_fn + +class HIPPO(PPO): + + def __init__(self, config, section): + super().__init__(config, section) + self.reward_fn = extract_reward_fn(self.env_maker) + # wrap hippo.learn to sneak-in the reward function + def learn(*args, **kwargs): + return hippo.learn(*args, **kwargs, reward_fn=self.reward_fn) + self._learn = learn + + def _get_parameter_descr_dict(self): + + parameters = super()._get_parameter_descr_dict() + parameters.update( + { + 'nbatch': 'int', + 'mode': 'str', + 'use_buffer': 'bool', + 'buffer_capacity': 'int', + 'hindsight': 'float' + } + ) + return parameters diff --git a/roam_rl/baselines/ppo.py b/roam_rl/baselines/ppo.py index dc419ee..b77754b 100644 --- a/roam_rl/baselines/ppo.py +++ b/roam_rl/baselines/ppo.py @@ -64,12 +64,12 @@ def _get_parameter_descr_dict(self): 'value_network': 'str', 'log_interval': 'int', 'save_interval': 'int', - 'total_timesteps': 'int' # to read int from int sci notation + 'total_timesteps': 'int' } return parameters - def learn(self, model_path=None): + def train(self, model_path=None): # Create vec env set_global_seeds(self.seed) diff --git a/roam_rl/garage/sac.py b/roam_rl/garage/sac.py new file mode 100644 index 0000000..af7cea2 --- /dev/null +++ b/roam_rl/garage/sac.py @@ -0,0 +1,122 @@ +import gym +import numpy as np +import torch +from torch import nn +from torch.nn import functional as F + +from garage import wrap_experiment +from garage.envs import GarageEnv, normalize +from garage.experiment import deterministic, LocalRunner +from garage.replay_buffer import PathBuffer +from garage.sampler import LocalSampler +from garage.torch import set_gpu_mode +from garage.torch.algos import SAC as _SAC_ +from garage.torch.policies import TanhGaussianMLPPolicy + +from garage.torch.q_functions import ContinuousMLPQFunction +import os +from confac import make +from garage.sampler import RaySampler + +class SAC: + + def __init__(self, config, section): + + # Setup + self.experiment_dir = None + self.config = config + self.section = section + self.env_maker = make(config, config.get(section, 'env_maker')) + self.seed = config.getint(section, 'seed') + self.snapshot_mode = config.get(section, 'snapshot_mode', fallback='last') + + # SAC hyper parameters + self.policy_hidden_sizes = eval(config.get(section, 'policy_hidden_sizes', fallback='[256, 256]')) + self.qf_hidden_sizes = eval(config.get(section, 'qf_hidden_sizes', fallback = '[256, 256]')) + self.buffer_capacity_in_transitions = int(config.getfloat(section, 'buffer_capacity_in_transitions', fallback=1e6)) + self.gradient_steps_per_itr = config.getint(section, 'gradient_steps_per_iteration', fallback=1000) + self.max_path_length = config.getint(section, 'max_path_length', fallback=1000) + self.max_eval_path_length = config.getint(section, 'max_eval_path_length', fallback=1000) + self.min_buffer_size = int(config.getfloat(section, 'min_buffer_size', fallback=1e4)) + self.target_update_tau = config.getfloat(section, 'target_update_tau', fallback=5e-3) + self.discount = config.getfloat(section, 'discount', fallback=0.99) + self.buffer_batch_size = config.getint(section, 'buffer_batch_size', fallback=256) + self.reward_scale = config.getfloat(section, 'reward_scale', fallback=1.) + self.steps_per_epoch = config.getint(section, 'steps_per_epoch', fallback=1) + self.batch_size = config.getint(section, 'batch_size', fallback=1000) + self.n_epochs = config.getint(section, 'n_epochs', fallback=1000) + self.parallel_sampling = config.getboolean(section, 'parallel_sampling', fallback=False) + if self.parallel_sampling: + self.n_workers = config.getint(section, 'n_workers') + + def set_experiment_dir(self, experiment_dir): + self.experiment_dir = experiment_dir + + def train(self): + + # define + @wrap_experiment(snapshot_mode=self.snapshot_mode, log_dir=self.experiment_dir) + def run(ctxt=None): + """ Set up environment and algorithm and run the task. + + Args: + ctxt (garage.experiment.ExperimentContext): The experiment + configuration used by LocalRunner to create the snapshotter. + seed (int): Used to seed the random number generator to produce + determinism. + + """ + deterministic.set_seed(self.seed) + runner = LocalRunner(snapshot_config=ctxt, max_cpus=32) + env = GarageEnv(normalize(self.env_maker())) + + policy = TanhGaussianMLPPolicy( + env_spec=env.spec, + hidden_sizes=self.policy_hidden_sizes, + hidden_nonlinearity=nn.ReLU, + output_nonlinearity=None, + min_std=np.exp(-20.), + max_std=np.exp(2.), + ) + + qf1 = ContinuousMLPQFunction(env_spec=env.spec, + hidden_sizes=self.qf_hidden_sizes, + hidden_nonlinearity=F.relu) + + qf2 = ContinuousMLPQFunction(env_spec=env.spec, + hidden_sizes=self.qf_hidden_sizes, + hidden_nonlinearity=F.relu) + + replay_buffer = PathBuffer(capacity_in_transitions=self.buffer_capacity_in_transitions) + + algo = _SAC_(env_spec=env.spec, + policy=policy, + qf1=qf1, + qf2=qf2, + gradient_steps_per_itr=self.gradient_steps_per_itr, + max_path_length=self.max_path_length, + max_eval_path_length=self.max_eval_path_length, + replay_buffer=replay_buffer, + min_buffer_size=self.min_buffer_size, + target_update_tau=self.target_update_tau, + discount=self.discount, + buffer_batch_size=self.buffer_batch_size, + reward_scale=self.reward_scale, + steps_per_epoch=self.steps_per_epoch) + + if torch.cuda.is_available(): + set_gpu_mode(True) + else: + set_gpu_mode(False) + algo.to() + + if self.parallel_sampling: + runner.setup(algo=algo, env=env, sampler_cls=RaySampler, n_workers=self.n_workers) + else: + runner.setup(algo=algo, env=env, sampler_cls=LocalSampler) + + runner.train(n_epochs=self.n_epochs, batch_size=self.batch_size) + + run() + + diff --git a/scripts/baselines/configs/hippo_fetch_reach.cfg b/scripts/baselines/configs/hippo_fetch_reach.cfg new file mode 100644 index 0000000..cdd3c07 --- /dev/null +++ b/scripts/baselines/configs/hippo_fetch_reach.cfg @@ -0,0 +1,38 @@ +[experiment] +experiment_no = 0 +algo = hippo + +[hippo] +entrypoint = roam_rl.baselines.hippo:HIPPO +env_maker = my_env_maker +vec_env_maker = my_vec_env_maker +network = my_network +total_timesteps = 1e4 +lr = lambda f: 3.0e-4 *f +nsteps = 500 +log_interval = 1 +value_network = copy +nminibatches = 2 +noptepochs = 10 + +seed = 0 + +[my_network] +type = mlp +num_hidden = 64 +num_layers = 2 + +[my_vec_env_maker] +type = subproc +nenvs = 4 +normalize = True + +[my_env_maker] +entrypoint = roam_env.utils.env_maker:EnvMaker +env = my_robot_env + +[flatten] +entrypoint = roam_env.utils.wrappers:FlattenObservation + +[my_robot_env] +id = FetchReach-v1 diff --git a/scripts/baselines/configs/train_ppo_point_robot_env.cfg b/scripts/baselines/configs/train_ppo_point_robot_env.cfg deleted file mode 100644 index bd176c8..0000000 --- a/scripts/baselines/configs/train_ppo_point_robot_env.cfg +++ /dev/null @@ -1,71 +0,0 @@ -[experiment] -experiment_no = 0 -ppo = my_ppo - -[my_ppo] -env_maker = my_env_maker -vec_env_maker = my_vec_env_maker -network = my_network -total_timesteps = 1e6 -lr = lambda f: 3.0e-4 *f -nsteps = 1024 -log_interval = 1 -value_network = copy -nminibatches = 32 -noptepochs = 10 -seed = 0 - -[my_network] -type = mlp -num_hidden = 64 -num_layers = 2 - -[my_vec_env_maker] -type = shmem -nenvs = 1 -normalize = False - -[my_env_maker] -entrypoint = roam_env.utils.env_maker:WrappedEnvMaker -wrappers = ['my_wrapper_1', 'my_wrapper_2'] -env = my_robot_env - -[my_wrapper_1] -entrypoint = roam_env.utils.wrappers:FilterObservation -filter_keys = ['state'] - -[my_wrapper_2] -entrypoint = roam_env.utils.wrappers:FlattenObservation - -[my_robot_env] -entrypoint = roam_rl.robot_env:RobotEnv -robot_world = my_robot_world -state_sampler = my_robot_env_state_sampler -observation_func = my_robot_env_observation_func -reward_func = my_robot_env_reward_func -max_episode_steps = 100 - -[my_robot_world] -entrypoint = roam_robot_worlds.robot_world:SimulatedRobotWorld -dynamics = my_robot_world_dynamics -sensor_model = my_robot_world_sensor_model -steps_per_action = 100 - -[my_robot_env_state_sampler] -entrypoint = roam_rl.robot_env.components:UniformSampler -max = [1.0, 1.0] -min = [-1.0, -1.0] - -[my_robot_env_observation_func] -entrypoint = roam_rl.simple_robot_envs.point_robot_env:ObservationFunc - -[my_robot_env_reward_func] -entrypoint = roam_rl.simple_robot_envs.point_robot_env:RewardFunc - -[my_robot_world_dynamics] -entrypoint = roam_robot_worlds.simple_robots.point_numpy_robot:PointNumpyDynamics -mass = 1.0 -delta_t = 0.001 - -[my_robot_world_sensor_model] -entrypoint = roam_robot_worlds.robot_world.sensor_model:StateSensorModel diff --git a/scripts/baselines/configs/train_ppo_point_robot_goal_env.cfg b/scripts/baselines/configs/train_ppo_point_robot_goal_env.cfg deleted file mode 100644 index 96a381b..0000000 --- a/scripts/baselines/configs/train_ppo_point_robot_goal_env.cfg +++ /dev/null @@ -1,80 +0,0 @@ -[experiment] -experiment_no = 0 -ppo = my_ppo - -[my_ppo] -env_maker = my_env_maker -vec_env_maker = my_vec_env_maker -network = my_network -total_timesteps = 1e6 -lr = lambda f: 3.0e-4 *f -nsteps = 1024 -log_interval = 1 -value_network = copy -nminibatches = 32 -noptepochs = 10 -seed = 0 - -[my_network] -type = mlp -num_hidden = 64 -num_layers = 2 - -[my_vec_env_maker] -type = shmem -nenvs = 1 -normalize = False - -[my_env_maker] -entrypoint = roam_env.utils.env_maker:WrappedEnvMaker -wrappers = ['my_wrapper_1', 'my_wrapper_2'] -env = my_robot_env - -[my_wrapper_1] -entrypoint = roam_env.utils.wrappers:FilterObservation -filter_keys = ['achieved_goal', 'desired_goal'] - -[my_wrapper_2] -entrypoint = roam_env.utils.wrappers:FlattenObservation - -[my_robot_env] -entrypoint = roam_rl.robot_env:RobotGoalEnv -robot_world = my_robot_world -state_sampler = my_robot_env_state_sampler -observation_func = my_robot_env_observation_func -reward_func = my_robot_env_reward_func -goal_sampler = my_robot_env_goal_sampler -max_episode_steps = 100 - -[my_robot_world] -entrypoint = roam_robot_worlds.robot_world:SimulatedRobotWorld -dynamics = my_robot_world_dynamics -sensor_model = my_robot_world_sensor_model -steps_per_action = 100 - -[my_robot_env_state_sampler] -entrypoint = roam_rl.robot_env.components:UniformSampler -max = [1.0, 1.0] -min = [-1.0, -1.0] - -[my_robot_env_goal_sampler] -entrypoint = roam_rl.robot_env.components:UniformSampler -max = [2.0] -min = [-2.0] - -[my_robot_env_observation_func] -entrypoint = roam_rl.simple_robot_envs.point_robot_goal_env:ObservationFunc - -[my_robot_env_reward_func] -entrypoint = roam_rl.robot_env.components.goal_reward_func:DenseGoalRewardFunc -achieved_goal_reward = linear -action_reward = none -alpha = 1.0 - -[my_robot_world_dynamics] -entrypoint = roam_robot_worlds.simple_robots.point_numpy_robot:PointNumpyDynamics -mass = 1.0 -delta_t = 0.001 - -[my_robot_world_sensor_model] -entrypoint = roam_robot_worlds.robot_world.sensor_model:StateSensorModel \ No newline at end of file diff --git a/scripts/baselines/plot_training_curves.py b/scripts/baselines/plot.py similarity index 100% rename from scripts/baselines/plot_training_curves.py rename to scripts/baselines/plot.py diff --git a/scripts/baselines/run_ppo_policy.py b/scripts/baselines/run_policy.py similarity index 82% rename from scripts/baselines/run_ppo_policy.py rename to scripts/baselines/run_policy.py index 94aba84..780a567 100644 --- a/scripts/baselines/run_ppo_policy.py +++ b/scripts/baselines/run_policy.py @@ -26,12 +26,11 @@ def main(args): for section in copy_sections: run_config.dump_section(section, recursive=True, dump=config) - ppo_section = config.get('experiment', 'ppo') - ppo = PPO(config, ppo_section) - ppo.set_experiment_dir(experiment_dir) + algo = PPO(config, config.get('experiment', 'algo')) + algo.set_experiment_dir(experiment_dir) - model, env = ppo.load(model_seed=load_model_seed, model_checkpoint=model_checkpoint, env_seed=env_seed) - ppo.run(model=model, env=env, stochastic=stochastic) + model, env = algo.load(model_seed=load_model_seed, model_checkpoint=model_checkpoint, env_seed=env_seed) + algo.run(model=model, env=env, stochastic=stochastic) if __name__ == "__main__": diff --git a/scripts/garage/halfcheetah.cfg b/scripts/garage/halfcheetah.cfg new file mode 100644 index 0000000..4145fcb --- /dev/null +++ b/scripts/garage/halfcheetah.cfg @@ -0,0 +1,23 @@ +# To run this test you need MuJoCo and mujoco_py installed + +[experiment] +experiment_no = 0 +algo = sac + +[sac] +entrypoint = roam_rl.garage.sac:SAC +env_maker = my_env_maker +seed = 0 +snapshot_mode = all +qf_hidden_sizes = [256, 256] +policy_hidden_sizes = [256, 256] +n_epochs = 6 +parallel_sampling = True +n_workers = 12 + +[my_env_maker] +entrypoint = roam_env.utils.env_maker:EnvMaker +env = my_robot_env + +[my_robot_env] +id = HalfCheetah-v2 diff --git a/scripts/garage/run_policy.py b/scripts/garage/run_policy.py new file mode 100644 index 0000000..1cde23c --- /dev/null +++ b/scripts/garage/run_policy.py @@ -0,0 +1,75 @@ +"""Simulates pre-learned policy.""" +import argparse +import sys + +import joblib +import tensorflow as tf + +from garage.sampler.utils import rollout +import torch +from garage.torch import set_gpu_mode + +def query_yes_no(question, default='yes'): + """Ask a yes/no question via raw_input() and return their answer. + + "question" is a string that is presented to the user. + "default" is the presumed answer if the user just hits . + It must be "yes" (the default), "no" or None (meaning + an answer is required of the user). + + The "answer" return value is True for "yes" or False for "no". + """ + valid = {'yes': True, 'y': True, 'ye': True, 'no': False, 'n': False} + if default is None: + prompt = ' [y/n] ' + elif default == 'yes': + prompt = ' [Y/n] ' + elif default == 'no': + prompt = ' [y/N] ' + else: + raise ValueError("invalid default answer: '%s'" % default) + + while True: + sys.stdout.write(question + prompt) + choice = input().lower() + if default is not None and choice == '': + return valid[default] + elif choice in valid: + return valid[choice] + else: + sys.stdout.write("Please respond with 'yes' or 'no' " + "(or 'y' or 'n').\n") + + +if __name__ == '__main__': + + if torch.cuda.is_available(): + set_gpu_mode(True) + else: + set_gpu_mode(False) + + parser = argparse.ArgumentParser() + parser.add_argument('file', type=str, help='path to the snapshot file') + parser.add_argument('--max_path_length', + type=int, + default=1000, + help='Max length of rollout') + parser.add_argument('--speedup', type=float, default=1, help='Speedup') + args = parser.parse_args() + + # If the snapshot file use tensorflow, do: + # import tensorflow as tf + # with tf.compat.v1.Session(): + # [rest of the code] + with tf.compat.v1.Session() as sess: + data = joblib.load(args.file) + policy = data['algo'].policy + env = data['env'] + while True: + path = rollout(env, + policy, + max_path_length=args.max_path_length, + animated=True, + speedup=args.speedup) + if not query_yes_no('Continue simulation?'): + break \ No newline at end of file diff --git a/scripts/baselines/train_ppo.py b/scripts/garage/train.py old mode 100644 new mode 100755 similarity index 78% rename from scripts/baselines/train_ppo.py rename to scripts/garage/train.py index 832e07d..50a3431 --- a/scripts/baselines/train_ppo.py +++ b/scripts/garage/train.py @@ -1,8 +1,10 @@ import argparse -from roam_rl.baselines.ppo import PPO from roam_rl import utils from confac import ConfigParser +from confac import make import os +import torch +from garage.torch import set_gpu_mode def main(args): @@ -14,15 +16,13 @@ def main(args): experiment_dir = utils.get_experiment_dir(os.environ['EXPERIMENTS_DIR'], experiment_no, mkdir=True) config_path = utils.get_config_path(experiment_dir, experiment_no) config.save(config_path) - - ppo_section = config.get('experiment', 'ppo') - ppo = PPO(config, ppo_section) - ppo.set_experiment_dir(experiment_dir) - ppo.learn() + algo = make(config, config.get('experiment', 'algo')) + algo.set_experiment_dir(experiment_dir) + algo.train() if __name__ == "__main__": + parser = argparse.ArgumentParser() parser.add_argument('config_file', help='A string specifying the path to a config file') arg = parser.parse_args() main(arg) - diff --git a/scripts/train.py b/scripts/train.py new file mode 100644 index 0000000..a2964ca --- /dev/null +++ b/scripts/train.py @@ -0,0 +1,26 @@ +import argparse +from roam_rl import utils +from confac import ConfigParser, make +import os + +def main(args): + + config_file = args.config_file + config = ConfigParser() + config.read(config_file) + experiment_no = config.get('experiment', 'experiment_no') + os.makedirs(os.environ['EXPERIMENTS_DIR'], exist_ok=True) + experiment_dir = utils.get_experiment_dir(os.environ['EXPERIMENTS_DIR'], experiment_no, mkdir=True) + config_path = utils.get_config_path(experiment_dir, experiment_no) + config.save(config_path) + + algo = make(config, config.get('experiment', 'algo')) + algo.set_experiment_dir(experiment_dir) + algo.train() + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('config_file', help='A string specifying the path to a config file') + arg = parser.parse_args() + main(arg) + From 982b85e9900e75adff255463c59002c018561d3b Mon Sep 17 00:00:00 2001 From: Gagan Khandate Date: Wed, 21 Oct 2020 23:34:08 -0400 Subject: [PATCH 4/8] In hand manipulation merge master (#24) * cleanup and added imports in __init__.py (#21) --- roam_rl/baselines/__init__.py | 5 +++++ roam_rl/baselines/hippo.py | 3 +-- roam_rl/garage/__init__.py | 1 + scripts/baselines/configs/hippo_fetch_reach.cfg | 7 +++---- .../{train_ppo_acrobot.cfg => ppo_acrobot.cfg} | 7 ++++--- .../{train_ppo_hopper.cfg => ppo_hopper.cfg} | 13 ++++++------- .../configs/{run_ppo_policy.cfg => run_policy.cfg} | 2 -- 7 files changed, 20 insertions(+), 18 deletions(-) create mode 100644 roam_rl/garage/__init__.py rename scripts/baselines/configs/{train_ppo_acrobot.cfg => ppo_acrobot.cfg} (88%) rename scripts/baselines/configs/{train_ppo_hopper.cfg => ppo_hopper.cfg} (80%) rename scripts/baselines/configs/{run_ppo_policy.cfg => run_policy.cfg} (79%) diff --git a/roam_rl/baselines/__init__.py b/roam_rl/baselines/__init__.py index e69de29..f5fb983 100644 --- a/roam_rl/baselines/__init__.py +++ b/roam_rl/baselines/__init__.py @@ -0,0 +1,5 @@ +from .ppo import PPO +try: + from .hippo import HIPPO +except ImportError: + pass \ No newline at end of file diff --git a/roam_rl/baselines/hippo.py b/roam_rl/baselines/hippo.py index 9614a3b..9a95335 100644 --- a/roam_rl/baselines/hippo.py +++ b/roam_rl/baselines/hippo.py @@ -1,12 +1,11 @@ from roam_rl.baselines.ppo import PPO from hippo import hippo -from hippo.hippo import extract_reward_fn class HIPPO(PPO): def __init__(self, config, section): super().__init__(config, section) - self.reward_fn = extract_reward_fn(self.env_maker) + self.reward_fn = hippo.extract_reward_fn(self.env_maker) # wrap hippo.learn to sneak-in the reward function def learn(*args, **kwargs): return hippo.learn(*args, **kwargs, reward_fn=self.reward_fn) diff --git a/roam_rl/garage/__init__.py b/roam_rl/garage/__init__.py new file mode 100644 index 0000000..b4adf0f --- /dev/null +++ b/roam_rl/garage/__init__.py @@ -0,0 +1 @@ +from .sac import SAC \ No newline at end of file diff --git a/scripts/baselines/configs/hippo_fetch_reach.cfg b/scripts/baselines/configs/hippo_fetch_reach.cfg index cdd3c07..73d8827 100644 --- a/scripts/baselines/configs/hippo_fetch_reach.cfg +++ b/scripts/baselines/configs/hippo_fetch_reach.cfg @@ -1,9 +1,9 @@ [experiment] experiment_no = 0 -algo = hippo +algo = my_hippo -[hippo] -entrypoint = roam_rl.baselines.hippo:HIPPO +[my_hippo] +entrypoint = roam_rl.baselines:HIPPO env_maker = my_env_maker vec_env_maker = my_vec_env_maker network = my_network @@ -14,7 +14,6 @@ log_interval = 1 value_network = copy nminibatches = 2 noptepochs = 10 - seed = 0 [my_network] diff --git a/scripts/baselines/configs/train_ppo_acrobot.cfg b/scripts/baselines/configs/ppo_acrobot.cfg similarity index 88% rename from scripts/baselines/configs/train_ppo_acrobot.cfg rename to scripts/baselines/configs/ppo_acrobot.cfg index ff733ca..5f20b7f 100644 --- a/scripts/baselines/configs/train_ppo_acrobot.cfg +++ b/scripts/baselines/configs/ppo_acrobot.cfg @@ -1,8 +1,9 @@ [experiment] experiment_no = 0 -ppo = my_ppo +algo = my_ppo [my_ppo] +entrypoint = roam_rl.baselines:PPO env_maker = my_env_maker vec_env_maker = my_vec_env_maker network = my_network @@ -10,8 +11,8 @@ total_timesteps = 1e6 lr = lambda f: 3.0e-4 *f nsteps = 2048 log_interval = 1 -value_network = shared -nminibatches = 2 +value_network = copy +nminibatches = 4 noptepochs = 10 seed = 0 diff --git a/scripts/baselines/configs/train_ppo_hopper.cfg b/scripts/baselines/configs/ppo_hopper.cfg similarity index 80% rename from scripts/baselines/configs/train_ppo_hopper.cfg rename to scripts/baselines/configs/ppo_hopper.cfg index 485e7ab..f63d225 100644 --- a/scripts/baselines/configs/train_ppo_hopper.cfg +++ b/scripts/baselines/configs/ppo_hopper.cfg @@ -1,19 +1,18 @@ -# To run this test you need MuJoCo and mujoco_py installed - [experiment] experiment_no = 0 -ppo = my_ppo +algo = my_ppo [my_ppo] +entrypoint = roam_rl.baselines:PPO env_maker = my_env_maker vec_env_maker = my_vec_env_maker network = my_network -total_timesteps = 1e4 +total_timesteps = 1e6 lr = lambda f: 3.0e-4 *f -nsteps = 500 +nsteps = 2048 log_interval = 1 -value_network = shared -nminibatches = 2 +value_network = copy +nminibatches = 4 noptepochs = 10 seed = 0 diff --git a/scripts/baselines/configs/run_ppo_policy.cfg b/scripts/baselines/configs/run_policy.cfg similarity index 79% rename from scripts/baselines/configs/run_ppo_policy.cfg rename to scripts/baselines/configs/run_policy.cfg index 912d0e5..5b09b17 100644 --- a/scripts/baselines/configs/run_ppo_policy.cfg +++ b/scripts/baselines/configs/run_policy.cfg @@ -8,5 +8,3 @@ copy_sections = ['my_vec_env_maker'] [my_vec_env_maker] type = dummy nenvs = 1 -# [my_robot_env] -# max_episode_steps = 100 From 463b8b0d2ab90f418d0d0764f88e5687a3302812 Mon Sep 17 00:00:00 2001 From: Maximilian Haas-Heger Date: Thu, 19 Nov 2020 14:59:09 -0500 Subject: [PATCH 5/8] updated PPO2 wrapper to allow for multiple extra keywords (#25) --- roam_rl/baselines/ppo.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/roam_rl/baselines/ppo.py b/roam_rl/baselines/ppo.py index b77754b..50a4055 100644 --- a/roam_rl/baselines/ppo.py +++ b/roam_rl/baselines/ppo.py @@ -36,11 +36,7 @@ def __init__(self, config, section): self.seed = config.getint(section, 'seed') - info_keywords_str = config.get(section, 'info_keywords', fallback=None) - if info_keywords_str: - self.info_keywords = eval('("'+info_keywords_str+'",)') - else: - self.info_keywords = () + self.info_keywords = eval(config.get(section, 'info_keywords', fallback='()')) def _get_parameter_descr_dict(self): From 9dfa2888d23333669c75ac7d23630b9b6fa11d37 Mon Sep 17 00:00:00 2001 From: Gagan Khandate Date: Mon, 28 Jun 2021 16:25:26 -0400 Subject: [PATCH 6/8] Remove garage --- Dockerfile.garage | 96 ---------------------------------- scripts/garage/halfcheetah.cfg | 23 -------- scripts/garage/run_policy.py | 75 -------------------------- scripts/garage/train.py | 28 ---------- 4 files changed, 222 deletions(-) delete mode 100644 Dockerfile.garage delete mode 100644 scripts/garage/halfcheetah.cfg delete mode 100644 scripts/garage/run_policy.py delete mode 100755 scripts/garage/train.py diff --git a/Dockerfile.garage b/Dockerfile.garage deleted file mode 100644 index 11fdbaf..0000000 --- a/Dockerfile.garage +++ /dev/null @@ -1,96 +0,0 @@ -# To be used as a base image for your project. In your project's image -# make sure you place your MuJoCo key at /root/.mujoco/ - -FROM ubuntu:18.04 - -# http://bugs.python.org/issue19846 -# > At the moment, setting "LANG=C" on a Linux system *fundamentally breaks Python 3*, and that's not OK. -ENV LANG C.UTF-8 - -# apt dependencies -RUN \ - apt-get -y -q update && \ - # Prevents debconf from prompting for user input - # See https://github.com/phusion/baseimage-docker/issues/58 - DEBIAN_FRONTEND=noninteractive apt-get install -y \ - # Dockerfile deps - wget \ - unzip \ - git \ - curl \ - # For building glfw - cmake \ - xorg-dev \ - # mujoco_py - # See https://github.com/openai/mujoco-py/blob/master/Dockerfile - # 18.04 repo is old, install glfw from source instead - # libglfw3 \ - libglew-dev \ - libosmesa6-dev \ - patchelf \ - # OpenAI baselines - libopenmpi-dev \ - # virtualenv - python3 \ - python3-pip \ - python3-tk \ - python3-virtualenv && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -# Build GLFW because the Ubuntu 18.04 version is too old -# See https://github.com/glfw/glfw/issues/1004 -RUN apt-get purge -y -v libglfw* -RUN wget https://github.com/glfw/glfw/releases/download/3.3/glfw-3.3.zip && \ - unzip glfw-3.3.zip && \ - rm glfw-3.3.zip && \ - cd glfw-3.3 && \ - mkdir glfw-build && \ - cd glfw-build && \ - cmake -DBUILD_SHARED_LIBS=ON -DGLFW_BUILD_EXAMPLES=OFF -DGLFW_BUILD_TESTS=OFF -DGLFW_BUILD_DOCS=OFF .. && \ - make -j"$(nproc)" && \ - make install && \ - cd ../../ && \ - rm -rf glfw - -# MuJoCo 2.0 (for dm_control) -RUN mkdir -p /root/.mujoco && \ - wget https://www.roboti.us/download/mujoco200_linux.zip -O mujoco.zip && \ - unzip mujoco.zip -d $HOME/.mujoco && \ - rm mujoco.zip && \ - ln -s $HOME/.mujoco/mujoco200_linux $HOME/.mujoco/mujoco200 - ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/root/.mujoco/mujoco200/bin - -# Copy over just setup.py first, so the Docker cache doesn't expire until -# dependencies change -# -# Files needed to run setup.py -# - README.md -# - roam_rl/__init__.py -# - setup.py -COPY README.md /root/code/roam_rl/README.md -COPY roam_rl/__init__.py /root/code/roam_rl/roam_rl/__init__.py -COPY setup.py /root/code/roam_rl/setup.py -WORKDIR /root/code/roam_rl - -# Create virtualenv -ENV VIRTUAL_ENV=/root/venv -RUN python3 -m virtualenv --python=/usr/bin/python3 $VIRTUAL_ENV -ENV PATH="$VIRTUAL_ENV/bin:$PATH" - -# Prevent pip from complaining about available upgrades -RUN pip install --upgrade pip - -# Install pip dependencies -# Note: Empty license file for installing mujoco_py -RUN touch /root/.mujoco/mjkey.txt && pip install mujoco_py && rm /root/.mujoco/mjkey.txt && \ - pip install git+https://git@github.com/rlworkgroup/garage@v2020.06.3#egg=garage&& \ - pip install git+https://git@github.com/roamlab/confac@master#egg=confac && \ - pip install git+https://git@github.com/roamlab/roam_env@master#egg=roam_env && \ - pip install -e . && \ - rm -r /root/.cache/pip - -COPY . /root/code/roam_rl/ - -CMD /bin/bash - diff --git a/scripts/garage/halfcheetah.cfg b/scripts/garage/halfcheetah.cfg deleted file mode 100644 index 4145fcb..0000000 --- a/scripts/garage/halfcheetah.cfg +++ /dev/null @@ -1,23 +0,0 @@ -# To run this test you need MuJoCo and mujoco_py installed - -[experiment] -experiment_no = 0 -algo = sac - -[sac] -entrypoint = roam_rl.garage.sac:SAC -env_maker = my_env_maker -seed = 0 -snapshot_mode = all -qf_hidden_sizes = [256, 256] -policy_hidden_sizes = [256, 256] -n_epochs = 6 -parallel_sampling = True -n_workers = 12 - -[my_env_maker] -entrypoint = roam_env.utils.env_maker:EnvMaker -env = my_robot_env - -[my_robot_env] -id = HalfCheetah-v2 diff --git a/scripts/garage/run_policy.py b/scripts/garage/run_policy.py deleted file mode 100644 index 1cde23c..0000000 --- a/scripts/garage/run_policy.py +++ /dev/null @@ -1,75 +0,0 @@ -"""Simulates pre-learned policy.""" -import argparse -import sys - -import joblib -import tensorflow as tf - -from garage.sampler.utils import rollout -import torch -from garage.torch import set_gpu_mode - -def query_yes_no(question, default='yes'): - """Ask a yes/no question via raw_input() and return their answer. - - "question" is a string that is presented to the user. - "default" is the presumed answer if the user just hits . - It must be "yes" (the default), "no" or None (meaning - an answer is required of the user). - - The "answer" return value is True for "yes" or False for "no". - """ - valid = {'yes': True, 'y': True, 'ye': True, 'no': False, 'n': False} - if default is None: - prompt = ' [y/n] ' - elif default == 'yes': - prompt = ' [Y/n] ' - elif default == 'no': - prompt = ' [y/N] ' - else: - raise ValueError("invalid default answer: '%s'" % default) - - while True: - sys.stdout.write(question + prompt) - choice = input().lower() - if default is not None and choice == '': - return valid[default] - elif choice in valid: - return valid[choice] - else: - sys.stdout.write("Please respond with 'yes' or 'no' " - "(or 'y' or 'n').\n") - - -if __name__ == '__main__': - - if torch.cuda.is_available(): - set_gpu_mode(True) - else: - set_gpu_mode(False) - - parser = argparse.ArgumentParser() - parser.add_argument('file', type=str, help='path to the snapshot file') - parser.add_argument('--max_path_length', - type=int, - default=1000, - help='Max length of rollout') - parser.add_argument('--speedup', type=float, default=1, help='Speedup') - args = parser.parse_args() - - # If the snapshot file use tensorflow, do: - # import tensorflow as tf - # with tf.compat.v1.Session(): - # [rest of the code] - with tf.compat.v1.Session() as sess: - data = joblib.load(args.file) - policy = data['algo'].policy - env = data['env'] - while True: - path = rollout(env, - policy, - max_path_length=args.max_path_length, - animated=True, - speedup=args.speedup) - if not query_yes_no('Continue simulation?'): - break \ No newline at end of file diff --git a/scripts/garage/train.py b/scripts/garage/train.py deleted file mode 100755 index 50a3431..0000000 --- a/scripts/garage/train.py +++ /dev/null @@ -1,28 +0,0 @@ -import argparse -from roam_rl import utils -from confac import ConfigParser -from confac import make -import os -import torch -from garage.torch import set_gpu_mode - -def main(args): - - config_file = args.config_file - config = ConfigParser() - config.read(config_file) - experiment_no = config.get('experiment', 'experiment_no') - os.makedirs(os.environ['EXPERIMENTS_DIR'], exist_ok=True) - experiment_dir = utils.get_experiment_dir(os.environ['EXPERIMENTS_DIR'], experiment_no, mkdir=True) - config_path = utils.get_config_path(experiment_dir, experiment_no) - config.save(config_path) - algo = make(config, config.get('experiment', 'algo')) - algo.set_experiment_dir(experiment_dir) - algo.train() - -if __name__ == "__main__": - - parser = argparse.ArgumentParser() - parser.add_argument('config_file', help='A string specifying the path to a config file') - arg = parser.parse_args() - main(arg) From 0fb805f12006e2579af537622f58e21854129631 Mon Sep 17 00:00:00 2001 From: Gagan Khandate Date: Mon, 28 Jun 2021 16:33:47 -0400 Subject: [PATCH 7/8] Remove Makefile --- Dockerfile | 14 ++++++++------ Makefile | 4 ++-- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/Dockerfile b/Dockerfile index dc9c5c6..4c18e6e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -72,7 +72,6 @@ RUN mkdir -p /root/.mujoco && \ COPY README.md /root/code/roam_rl/README.md COPY roam_rl/__init__.py /root/code/roam_rl/roam_rl/__init__.py COPY setup.py /root/code/roam_rl/setup.py -COPY Makefile /root/code/roam_rl/Makefile WORKDIR /root/code/roam_rl # Create virtualenv @@ -86,11 +85,14 @@ RUN pip install --upgrade pip # We need a MuJoCo key to install mujoco_py # In this step only the presence of the file mjkey.txt is required, so we only # create an empty file -RUN touch /root/.mujoco/mjkey.txt && \ - pip install mujoco_py && \ - make default && \ - rm -r /root/.cache/pip && \ - rm /root/.mujoco/mjkey.txt +RUN touch /root/.mujoco/mjkey.txt && pip install mujoco_py && rm /root/.mujoco/mjkey.txt && \ + pip uninstall --yes tensorflow tensorflow-gpu && pip install tensorflow==1.14 && \ + pip install git+https://git@github.com/roamlab/confac@master#egg=confac && \ + pip install git+https://git@github.com/roamlab/roam_env@master#egg=roam_env && \ + pip install --force-reinstall git+https://git@github.com/openai/baselines@master#egg=baselines && \ + pip install --force-reinstall git+https://git@github.com/roamlab/baselines-hippo@master#egg=baselines-hippo && \ + pip install -e . && \ + rm -r /root/.cache/pip COPY . /root/code/roam_rl/ diff --git a/Makefile b/Makefile index c6df36a..27124fe 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ default: test-env make common # dependencies for baselines, we use garage's well maintained setup script -setup: +setup: if [ -d "tmp" ]; then\ rm -rf tmp;\ fi @@ -27,7 +27,7 @@ test-env: exit 1;\ fi -common: +common: pip install git+https://git@github.com/roamlab/confac@master#egg=confac pip install git+https://git@github.com/roamlab/roam_env@master#egg=roam_env pip install --force-reinstall git+https://git@github.com/openai/baselines@master#egg=baselines From a4e375e9779e2110b340826fa2f4df5e9453711e Mon Sep 17 00:00:00 2001 From: Gagan Khandate Date: Mon, 28 Jun 2021 17:43:24 -0400 Subject: [PATCH 8/8] Delete garage, hippo and re-org --- roam_rl/__init__.py | 1 + roam_rl/baselines/__init__.py | 5 - roam_rl/baselines/hippo.py | 26 ---- roam_rl/baselines/utils/__init__.py | 1 - roam_rl/garage/__init__.py | 1 - roam_rl/garage/sac.py | 122 ---------------- roam_rl/{baselines => }/ppo.py | 20 +-- roam_rl/utils/__init__.py | 1 + roam_rl/utils/config_utils.py | 132 ++++++++++++++++++ roam_rl/utils/env_factory.py | 57 ++++++++ roam_rl/utils/env_maker.py | 46 ++++++ roam_rl/utils/env_wrappers.py | 14 ++ roam_rl/{baselines => utils}/models.py | 4 +- roam_rl/{utils.py => utils/path_utils.py} | 2 +- .../{baselines => }/utils/vec_env_maker.py | 2 +- scripts/train.py | 10 +- 16 files changed, 270 insertions(+), 174 deletions(-) delete mode 100644 roam_rl/baselines/__init__.py delete mode 100644 roam_rl/baselines/hippo.py delete mode 100644 roam_rl/baselines/utils/__init__.py delete mode 100644 roam_rl/garage/__init__.py delete mode 100644 roam_rl/garage/sac.py rename roam_rl/{baselines => }/ppo.py (87%) create mode 100644 roam_rl/utils/__init__.py create mode 100644 roam_rl/utils/config_utils.py create mode 100644 roam_rl/utils/env_factory.py create mode 100644 roam_rl/utils/env_maker.py create mode 100644 roam_rl/utils/env_wrappers.py rename roam_rl/{baselines => utils}/models.py (98%) rename roam_rl/{utils.py => utils/path_utils.py} (97%) rename roam_rl/{baselines => }/utils/vec_env_maker.py (98%) diff --git a/roam_rl/__init__.py b/roam_rl/__init__.py index e69de29..a7a3c87 100644 --- a/roam_rl/__init__.py +++ b/roam_rl/__init__.py @@ -0,0 +1 @@ +from .ppo import PPO \ No newline at end of file diff --git a/roam_rl/baselines/__init__.py b/roam_rl/baselines/__init__.py deleted file mode 100644 index f5fb983..0000000 --- a/roam_rl/baselines/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .ppo import PPO -try: - from .hippo import HIPPO -except ImportError: - pass \ No newline at end of file diff --git a/roam_rl/baselines/hippo.py b/roam_rl/baselines/hippo.py deleted file mode 100644 index 9a95335..0000000 --- a/roam_rl/baselines/hippo.py +++ /dev/null @@ -1,26 +0,0 @@ -from roam_rl.baselines.ppo import PPO -from hippo import hippo - -class HIPPO(PPO): - - def __init__(self, config, section): - super().__init__(config, section) - self.reward_fn = hippo.extract_reward_fn(self.env_maker) - # wrap hippo.learn to sneak-in the reward function - def learn(*args, **kwargs): - return hippo.learn(*args, **kwargs, reward_fn=self.reward_fn) - self._learn = learn - - def _get_parameter_descr_dict(self): - - parameters = super()._get_parameter_descr_dict() - parameters.update( - { - 'nbatch': 'int', - 'mode': 'str', - 'use_buffer': 'bool', - 'buffer_capacity': 'int', - 'hindsight': 'float' - } - ) - return parameters diff --git a/roam_rl/baselines/utils/__init__.py b/roam_rl/baselines/utils/__init__.py deleted file mode 100644 index 66c0757..0000000 --- a/roam_rl/baselines/utils/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .vec_env_maker import VecEnvMaker diff --git a/roam_rl/garage/__init__.py b/roam_rl/garage/__init__.py deleted file mode 100644 index b4adf0f..0000000 --- a/roam_rl/garage/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .sac import SAC \ No newline at end of file diff --git a/roam_rl/garage/sac.py b/roam_rl/garage/sac.py deleted file mode 100644 index af7cea2..0000000 --- a/roam_rl/garage/sac.py +++ /dev/null @@ -1,122 +0,0 @@ -import gym -import numpy as np -import torch -from torch import nn -from torch.nn import functional as F - -from garage import wrap_experiment -from garage.envs import GarageEnv, normalize -from garage.experiment import deterministic, LocalRunner -from garage.replay_buffer import PathBuffer -from garage.sampler import LocalSampler -from garage.torch import set_gpu_mode -from garage.torch.algos import SAC as _SAC_ -from garage.torch.policies import TanhGaussianMLPPolicy - -from garage.torch.q_functions import ContinuousMLPQFunction -import os -from confac import make -from garage.sampler import RaySampler - -class SAC: - - def __init__(self, config, section): - - # Setup - self.experiment_dir = None - self.config = config - self.section = section - self.env_maker = make(config, config.get(section, 'env_maker')) - self.seed = config.getint(section, 'seed') - self.snapshot_mode = config.get(section, 'snapshot_mode', fallback='last') - - # SAC hyper parameters - self.policy_hidden_sizes = eval(config.get(section, 'policy_hidden_sizes', fallback='[256, 256]')) - self.qf_hidden_sizes = eval(config.get(section, 'qf_hidden_sizes', fallback = '[256, 256]')) - self.buffer_capacity_in_transitions = int(config.getfloat(section, 'buffer_capacity_in_transitions', fallback=1e6)) - self.gradient_steps_per_itr = config.getint(section, 'gradient_steps_per_iteration', fallback=1000) - self.max_path_length = config.getint(section, 'max_path_length', fallback=1000) - self.max_eval_path_length = config.getint(section, 'max_eval_path_length', fallback=1000) - self.min_buffer_size = int(config.getfloat(section, 'min_buffer_size', fallback=1e4)) - self.target_update_tau = config.getfloat(section, 'target_update_tau', fallback=5e-3) - self.discount = config.getfloat(section, 'discount', fallback=0.99) - self.buffer_batch_size = config.getint(section, 'buffer_batch_size', fallback=256) - self.reward_scale = config.getfloat(section, 'reward_scale', fallback=1.) - self.steps_per_epoch = config.getint(section, 'steps_per_epoch', fallback=1) - self.batch_size = config.getint(section, 'batch_size', fallback=1000) - self.n_epochs = config.getint(section, 'n_epochs', fallback=1000) - self.parallel_sampling = config.getboolean(section, 'parallel_sampling', fallback=False) - if self.parallel_sampling: - self.n_workers = config.getint(section, 'n_workers') - - def set_experiment_dir(self, experiment_dir): - self.experiment_dir = experiment_dir - - def train(self): - - # define - @wrap_experiment(snapshot_mode=self.snapshot_mode, log_dir=self.experiment_dir) - def run(ctxt=None): - """ Set up environment and algorithm and run the task. - - Args: - ctxt (garage.experiment.ExperimentContext): The experiment - configuration used by LocalRunner to create the snapshotter. - seed (int): Used to seed the random number generator to produce - determinism. - - """ - deterministic.set_seed(self.seed) - runner = LocalRunner(snapshot_config=ctxt, max_cpus=32) - env = GarageEnv(normalize(self.env_maker())) - - policy = TanhGaussianMLPPolicy( - env_spec=env.spec, - hidden_sizes=self.policy_hidden_sizes, - hidden_nonlinearity=nn.ReLU, - output_nonlinearity=None, - min_std=np.exp(-20.), - max_std=np.exp(2.), - ) - - qf1 = ContinuousMLPQFunction(env_spec=env.spec, - hidden_sizes=self.qf_hidden_sizes, - hidden_nonlinearity=F.relu) - - qf2 = ContinuousMLPQFunction(env_spec=env.spec, - hidden_sizes=self.qf_hidden_sizes, - hidden_nonlinearity=F.relu) - - replay_buffer = PathBuffer(capacity_in_transitions=self.buffer_capacity_in_transitions) - - algo = _SAC_(env_spec=env.spec, - policy=policy, - qf1=qf1, - qf2=qf2, - gradient_steps_per_itr=self.gradient_steps_per_itr, - max_path_length=self.max_path_length, - max_eval_path_length=self.max_eval_path_length, - replay_buffer=replay_buffer, - min_buffer_size=self.min_buffer_size, - target_update_tau=self.target_update_tau, - discount=self.discount, - buffer_batch_size=self.buffer_batch_size, - reward_scale=self.reward_scale, - steps_per_epoch=self.steps_per_epoch) - - if torch.cuda.is_available(): - set_gpu_mode(True) - else: - set_gpu_mode(False) - algo.to() - - if self.parallel_sampling: - runner.setup(algo=algo, env=env, sampler_cls=RaySampler, n_workers=self.n_workers) - else: - runner.setup(algo=algo, env=env, sampler_cls=LocalSampler) - - runner.train(n_epochs=self.n_epochs, batch_size=self.batch_size) - - run() - - diff --git a/roam_rl/baselines/ppo.py b/roam_rl/ppo.py similarity index 87% rename from roam_rl/baselines/ppo.py rename to roam_rl/ppo.py index 50a4055..86c6106 100644 --- a/roam_rl/baselines/ppo.py +++ b/roam_rl/ppo.py @@ -1,13 +1,13 @@ import os -from confac import make from baselines.common import set_global_seeds from baselines.ppo2 import ppo2 from baselines import logger -from roam_rl.baselines.utils import VecEnvMaker -from roam_rl.baselines.models import get_network +from roam_rl.utils.vec_env_maker import VecEnvMaker +from roam_rl.utils.models import get_network from gym import spaces import numpy as np -from roam_rl import utils +from roam_rl.utils import path_utils +from roam_rl.utils import config_utils class PPO: @@ -30,7 +30,7 @@ def __init__(self, config, section): # env env_maker_section = config.get(section, 'env_maker') - self.env_maker = make(config, env_maker_section) + self.env_maker = config_utils.initfromconfig(config, env_maker_section) vec_env_maker_section = config.get(section, 'vec_env_maker') self.vec_env_maker = VecEnvMaker(config, vec_env_maker_section) @@ -69,7 +69,7 @@ def train(self, model_path=None): # Create vec env set_global_seeds(self.seed) - logdir = utils.get_log_dir(self.experiment_dir, self.seed) # setup ppo logging + logdir = path_utils.get_log_dir(self.experiment_dir, self.seed) # setup ppo logging logger.configure(dir=logdir, format_strs=['stdout', 'log', 'csv', 'tensorboard']) monitor_file_path = os.path.join(logdir, 'monitor.csv') env = self.vec_env_maker(self.env_maker, self.seed, monitor_file=monitor_file_path, info_keywords=self.info_keywords) @@ -79,7 +79,7 @@ def train(self, model_path=None): model = self._learn(env=env, **self.params, seed=self.seed, load_path=model_path, extra_keys=self.info_keywords) # learn model # Save - model.save(utils.get_model_path(self.experiment_dir, self.seed)) + model.save(path_utils.get_model_path(self.experiment_dir, self.seed)) env.close() def set_experiment_dir(self, dir_name): @@ -94,7 +94,7 @@ def load(self, model_seed, model_checkpoint=None, env_seed=0, monitor_file=None) # train for 0 timesteps to load self.params['total_timesteps'] = 0 - model_path = utils.get_model_path(self.experiment_dir, model_seed, model_checkpoint) + model_path = path_utils.get_model_path(self.experiment_dir, model_seed, model_checkpoint) # pylint: disable=E1125 model = self._learn(env=env, **self.params, load_path=model_path) return model, env @@ -103,10 +103,10 @@ def run(self, model, env, stochastic=False): """ """ obs = env.reset() _states = None - # after training stochasticity of the policy is not relevant, + # after training stochasticity of the policy is not relevant, # set the actions to be mean of the policy if not stochastic: - model.act_model.action = model.act_model.pi + model.act_model.action = model.act_model.pi def determinstic_action(pi): if isinstance(env.action_space, spaces.Box): diff --git a/roam_rl/utils/__init__.py b/roam_rl/utils/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/roam_rl/utils/__init__.py @@ -0,0 +1 @@ + diff --git a/roam_rl/utils/config_utils.py b/roam_rl/utils/config_utils.py new file mode 100644 index 0000000..3755539 --- /dev/null +++ b/roam_rl/utils/config_utils.py @@ -0,0 +1,132 @@ +import configparser +import importlib +import ast + +def initfromconfig(config, section): + """ + Returns instance of the class specified by the entrypoint. The config and + section arguments received above are passed to the class contructor. + Args: + config: ConfigParser object of the config file + section: section name in the config file + Returns: + Module class initialized from config + Consider a config file as below. + example config: + [example_section] + entrypoint = some.path.in.module:SomeClass + parameter1 = value1 + . + . + parameterN = value1 + This function will return the object SomeClass(config, section). + """ + entrypoint = config.get(section, 'entrypoint') + module, name = entrypoint.split(':') + module = importlib.import_module(module) + attr = getattr(module, name) + + return attr(config, section) + +def configfromfile(file): + config = configparser.ConfigParser() + config.read(file) + return config + + +class ConfigParser(configparser.ConfigParser): + + """ Extend configparser.ConfigParser with additional methods """ + + def __init__(self): + super().__init__() + + def save(self, path): + + """ + Save config to path + Args: + path: str + Returns: + None + """ + + with open(path, 'w') as f: + self.write(f) + + def get_section(self, section, options): + + """ + Equivalent of get but for section + Args: + section(str): section name + options(dict): dict of options and type + Returns: + dict of options with corresponding parsed values + """ + assert self.has_section(section), 'section {} not found'.format(section) + assert isinstance(options, dict), 'options must be a dict' + + sec = {} + for opt, typ in options.items(): + if self.has_option(section, opt): + if typ == 'bool': + sec[opt] = self.getboolean(section, opt) + elif typ == 'int': + sec[opt] = self.getint(section, opt) + elif typ == 'float': + sec[opt] = self.getfloat(section, opt) + elif typ == 'list': + sec[opt] = self.getlist(section, opt) + elif typ == 'eval': + sec[opt] = eval(self.get(section, opt)) + elif typ == 'str': + sec[opt] = self.get(section, opt) + else: + ValueError("invalid type {}".format(typ)) + return sec + + def dump_section(self, section, recursive=False, dump=None): + + """ + Get section and copy into dump, recurse if required + Args: + section: section name + recursive: set True to copy recursively + dump: (Optional) ConfigParser object to copy to + """ + if dump is None: + dump = ConfigParser() + else: + assert isinstance(dump, configparser.ConfigParser) + + if self.has_section(section): + if not dump.has_section(section): + dump.add_section(section) + for opt, val in self.items(section): + dump.set(section, opt, val) + if recursive: + dump = self.dump_section(val, recursive=recursive, dump=dump) + else: + return dump + + def rename_section(self, old, new): + + """ Renames section """ + + if not self.has_section(old): + raise ValueError("section {} does not exist".format(old)) + if self.has_section(new): + raise ValueError("section {} already exists".format(new)) + + self.add_section(new) + for opt, val in self.items(old): + self.set(new, opt, val) + + self.remove_section(old) + + def getlist(self, section, option): + return ast.literal_eval(self.get(section, option)) + + def getint(self, *args, **kwargs): + return int(super().getfloat(*args, **kwargs)) \ No newline at end of file diff --git a/roam_rl/utils/env_factory.py b/roam_rl/utils/env_factory.py new file mode 100644 index 0000000..c5d0392 --- /dev/null +++ b/roam_rl/utils/env_factory.py @@ -0,0 +1,57 @@ +from .config_utils import initfromconfig +import gym + + +def make_env(config, section): + """ create env using config + + envs specified by entrypoint can be created with make() but envs registered with gym + need to be created differently ( _make_gym_registered_env()) + + """ + assert (config.has_option(section, 'id') and config.has_option(section, 'entrypoint')) is False, \ + "cannot specify both id and entrypoint" + + if config.has_option(section, 'id'): + env = _make_gym_registered_env(config, section) + elif config.has_option(section, 'entrypoint'): + env = initfromconfig(config, section) + else: + raise ValueError('env unknown') + + return env + + +def _make_gym_registered_env(config, section): + """ handles creation of envs registered with gym - including envs defined outside of gym as long as they are + correctly registered with gym """ + + # If the env that is to be created is different module then include the name of the module to import as shown below + # id: my_module:EnvName-v0, gym will 'import my_module' and then proceed to creating the env + env_id = config.get(section, 'id') + try: + # Try creating the env with config, this will if the environment's __init__() either accepts config + # and section as arguments directly or through **kwargs + env = gym.make(id=env_id, config=config, section=section) + except TypeError: + # The above method will fail for all gym environments by OpenAI as their __init__() does not accept **kwargs, + # so create the environment with just the id as the argument + env = gym.make(id=env_id) + + # for gym's robotics environments (https://github.com/openai/gym/tree/master/gym/envs/robotics) the reward type + # is configurable between the 'sparse'(default) and 'dense' reward. + if config.has_option(section, 'reward_type'): + if hasattr(env.env, 'reward_type'): + reward_type = config.get(section, 'reward_type') + if reward_type == 'sparse' or reward_type == 'dense': + env.env.reward_type = reward_type + else: + raise ValueError('reward type unknown') + else: + raise ValueError('reward_type cannot be configured for {}'.format(env.env)) + + return env + + + + diff --git a/roam_rl/utils/env_maker.py b/roam_rl/utils/env_maker.py new file mode 100644 index 0000000..9b531d4 --- /dev/null +++ b/roam_rl/utils/env_maker.py @@ -0,0 +1,46 @@ +from gym.utils import seeding +from .env_factory import make_env +import warnings + + +class EnvMaker: + """ callable class for creating env + __call__ method creates an env and sets seed for the env if one has been configured + """ + + def __init__(self, config, section): + self.seed = None + self.experiment_dir = None + self.config = config + self.section = section + + def set_seed(self, seed): + assert isinstance(seed, int) + self.seed = seeding.hash_seed(seed) + + def set_experiment_dir(self, experiment_dir): + self.experiment_dir = experiment_dir + + def __call__(self): + env_section = self.config.get(self.section, 'env') + env = make_env(config=self.config, section=env_section) + if type(self.seed) is int: + env.seed(self.seed) + else: + warnings.warn("seed not set, using global RNG ") + + config = self.config + section = self.section + if config.has_option(section, 'wrappers'): + wrappers = config.getlist(section, 'wrappers') + for wrapper_section in wrappers: + wrapper = get(config, wrapper_section) + env = wrapper(env, config, wrapper_section) + return env + + def __deepcopy__(self, memodict={}): + env_maker = self.__class__(self.config, self.section) + env_maker.seed = self.seed + env_maker.experiment_dir = self.experiment_dir + return env_maker + diff --git a/roam_rl/utils/env_wrappers.py b/roam_rl/utils/env_wrappers.py new file mode 100644 index 0000000..96f25b4 --- /dev/null +++ b/roam_rl/utils/env_wrappers.py @@ -0,0 +1,14 @@ +import gym.wrappers +import ast + +class FilterObservation(gym.wrappers.FilterObservation): + + def __init__(self, env, config, section): + filter_keys = ast.literal_eval(config.get(section, 'filter_keys')) + super().__init__(env, filter_keys=filter_keys) + + +class FlattenObservation(gym.wrappers.FlattenObservation): + + def __init__(self, env, config, section): + super().__init__(env) diff --git a/roam_rl/baselines/models.py b/roam_rl/utils/models.py similarity index 98% rename from roam_rl/baselines/models.py rename to roam_rl/utils/models.py index c398258..43d85f9 100644 --- a/roam_rl/baselines/models.py +++ b/roam_rl/utils/models.py @@ -3,7 +3,7 @@ from baselines.a2c import utils from baselines.a2c.utils import conv, fc, conv_to_fc, batch_to_seq, seq_to_batch from baselines.common.models import get_network_builder -from confac import make +from .config_utils import initfromconfig class NetworkFn: @@ -149,6 +149,6 @@ def get_network(config, section): _type = config.get(section, 'type') return _mapping[_type](config, section) elif config.has_option(section, 'entrypoint'): - return make(config, section) + return initfromconfig(config, section) else: raise ValueError("network unknown") diff --git a/roam_rl/utils.py b/roam_rl/utils/path_utils.py similarity index 97% rename from roam_rl/utils.py rename to roam_rl/utils/path_utils.py index 1d8acbc..2e5667e 100644 --- a/roam_rl/utils.py +++ b/roam_rl/utils/path_utils.py @@ -21,4 +21,4 @@ def get_log_dir(experiments_dir, seed): return logdir def get_config_path(load_dir, experiment_no): - return os.path.join(load_dir, 'config_' + str(experiment_no).zfill(2) + '.cfg') \ No newline at end of file + return os.path.join(load_dir, 'config_' + str(experiment_no).zfill(2) + '.cfg') \ No newline at end of file diff --git a/roam_rl/baselines/utils/vec_env_maker.py b/roam_rl/utils/vec_env_maker.py similarity index 98% rename from roam_rl/baselines/utils/vec_env_maker.py rename to roam_rl/utils/vec_env_maker.py index 54e9323..b5db12d 100644 --- a/roam_rl/baselines/utils/vec_env_maker.py +++ b/roam_rl/utils/vec_env_maker.py @@ -7,7 +7,7 @@ from copy import deepcopy -class VecEnvMaker(object): +class VecEnvMaker: """ Callable class that takes instance of roam_learning.robot_env.EnvMaker and returns either a DummyVecEnv, SubprocVecEnv or ShmemVecEnv """ diff --git a/scripts/train.py b/scripts/train.py index 212f875..24606d1 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -1,6 +1,6 @@ import argparse -from roam_rl import utils -from confac import ConfigParser, make +from roam_rl.utils.path_utils import get_experiment_dir, get_config_path +from roam_rl.utils.config_utils import ConfigParser, initfromconfig import os def main(args): @@ -10,10 +10,10 @@ def main(args): config.read(config_file) experiment_no = config.get('experiment', 'experiment_no') os.makedirs(os.environ['EXPERIMENTS_DIR'], exist_ok=True) - experiment_dir = utils.get_experiment_dir(os.environ['EXPERIMENTS_DIR'], experiment_no, mkdir=True) - config_path = utils.get_config_path(experiment_dir, experiment_no) + experiment_dir = get_experiment_dir(os.environ['EXPERIMENTS_DIR'], experiment_no, mkdir=True) + config_path = get_config_path(experiment_dir, experiment_no) config.save(config_path) - algo = make(config, config.get('experiment', 'algo')) + algo = initfromconfig(config, config.get('experiment', 'algo')) algo.set_experiment_dir(experiment_dir) algo.train(model_path=args.model_path)