diff --git a/Dockerfile b/Dockerfile index dc9c5c6..4c18e6e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -72,7 +72,6 @@ RUN mkdir -p /root/.mujoco && \ COPY README.md /root/code/roam_rl/README.md COPY roam_rl/__init__.py /root/code/roam_rl/roam_rl/__init__.py COPY setup.py /root/code/roam_rl/setup.py -COPY Makefile /root/code/roam_rl/Makefile WORKDIR /root/code/roam_rl # Create virtualenv @@ -86,11 +85,14 @@ RUN pip install --upgrade pip # We need a MuJoCo key to install mujoco_py # In this step only the presence of the file mjkey.txt is required, so we only # create an empty file -RUN touch /root/.mujoco/mjkey.txt && \ - pip install mujoco_py && \ - make default && \ - rm -r /root/.cache/pip && \ - rm /root/.mujoco/mjkey.txt +RUN touch /root/.mujoco/mjkey.txt && pip install mujoco_py && rm /root/.mujoco/mjkey.txt && \ + pip uninstall --yes tensorflow tensorflow-gpu && pip install tensorflow==1.14 && \ + pip install git+https://git@github.com/roamlab/confac@master#egg=confac && \ + pip install git+https://git@github.com/roamlab/roam_env@master#egg=roam_env && \ + pip install --force-reinstall git+https://git@github.com/openai/baselines@master#egg=baselines && \ + pip install --force-reinstall git+https://git@github.com/roamlab/baselines-hippo@master#egg=baselines-hippo && \ + pip install -e . && \ + rm -r /root/.cache/pip COPY . /root/code/roam_rl/ diff --git a/Dockerfile.garage b/Dockerfile.garage deleted file mode 100644 index 11fdbaf..0000000 --- a/Dockerfile.garage +++ /dev/null @@ -1,96 +0,0 @@ -# To be used as a base image for your project. In your project's image -# make sure you place your MuJoCo key at /root/.mujoco/ - -FROM ubuntu:18.04 - -# http://bugs.python.org/issue19846 -# > At the moment, setting "LANG=C" on a Linux system *fundamentally breaks Python 3*, and that's not OK. -ENV LANG C.UTF-8 - -# apt dependencies -RUN \ - apt-get -y -q update && \ - # Prevents debconf from prompting for user input - # See https://github.com/phusion/baseimage-docker/issues/58 - DEBIAN_FRONTEND=noninteractive apt-get install -y \ - # Dockerfile deps - wget \ - unzip \ - git \ - curl \ - # For building glfw - cmake \ - xorg-dev \ - # mujoco_py - # See https://github.com/openai/mujoco-py/blob/master/Dockerfile - # 18.04 repo is old, install glfw from source instead - # libglfw3 \ - libglew-dev \ - libosmesa6-dev \ - patchelf \ - # OpenAI baselines - libopenmpi-dev \ - # virtualenv - python3 \ - python3-pip \ - python3-tk \ - python3-virtualenv && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -# Build GLFW because the Ubuntu 18.04 version is too old -# See https://github.com/glfw/glfw/issues/1004 -RUN apt-get purge -y -v libglfw* -RUN wget https://github.com/glfw/glfw/releases/download/3.3/glfw-3.3.zip && \ - unzip glfw-3.3.zip && \ - rm glfw-3.3.zip && \ - cd glfw-3.3 && \ - mkdir glfw-build && \ - cd glfw-build && \ - cmake -DBUILD_SHARED_LIBS=ON -DGLFW_BUILD_EXAMPLES=OFF -DGLFW_BUILD_TESTS=OFF -DGLFW_BUILD_DOCS=OFF .. && \ - make -j"$(nproc)" && \ - make install && \ - cd ../../ && \ - rm -rf glfw - -# MuJoCo 2.0 (for dm_control) -RUN mkdir -p /root/.mujoco && \ - wget https://www.roboti.us/download/mujoco200_linux.zip -O mujoco.zip && \ - unzip mujoco.zip -d $HOME/.mujoco && \ - rm mujoco.zip && \ - ln -s $HOME/.mujoco/mujoco200_linux $HOME/.mujoco/mujoco200 - ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/root/.mujoco/mujoco200/bin - -# Copy over just setup.py first, so the Docker cache doesn't expire until -# dependencies change -# -# Files needed to run setup.py -# - README.md -# - roam_rl/__init__.py -# - setup.py -COPY README.md /root/code/roam_rl/README.md -COPY roam_rl/__init__.py /root/code/roam_rl/roam_rl/__init__.py -COPY setup.py /root/code/roam_rl/setup.py -WORKDIR /root/code/roam_rl - -# Create virtualenv -ENV VIRTUAL_ENV=/root/venv -RUN python3 -m virtualenv --python=/usr/bin/python3 $VIRTUAL_ENV -ENV PATH="$VIRTUAL_ENV/bin:$PATH" - -# Prevent pip from complaining about available upgrades -RUN pip install --upgrade pip - -# Install pip dependencies -# Note: Empty license file for installing mujoco_py -RUN touch /root/.mujoco/mjkey.txt && pip install mujoco_py && rm /root/.mujoco/mjkey.txt && \ - pip install git+https://git@github.com/rlworkgroup/garage@v2020.06.3#egg=garage&& \ - pip install git+https://git@github.com/roamlab/confac@master#egg=confac && \ - pip install git+https://git@github.com/roamlab/roam_env@master#egg=roam_env && \ - pip install -e . && \ - rm -r /root/.cache/pip - -COPY . /root/code/roam_rl/ - -CMD /bin/bash - diff --git a/Makefile b/Makefile index c6df36a..27124fe 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ default: test-env make common # dependencies for baselines, we use garage's well maintained setup script -setup: +setup: if [ -d "tmp" ]; then\ rm -rf tmp;\ fi @@ -27,7 +27,7 @@ test-env: exit 1;\ fi -common: +common: pip install git+https://git@github.com/roamlab/confac@master#egg=confac pip install git+https://git@github.com/roamlab/roam_env@master#egg=roam_env pip install --force-reinstall git+https://git@github.com/openai/baselines@master#egg=baselines diff --git a/roam_rl/__init__.py b/roam_rl/__init__.py index e69de29..a7a3c87 100644 --- a/roam_rl/__init__.py +++ b/roam_rl/__init__.py @@ -0,0 +1 @@ +from .ppo import PPO \ No newline at end of file diff --git a/roam_rl/baselines/__init__.py b/roam_rl/baselines/__init__.py deleted file mode 100644 index f5fb983..0000000 --- a/roam_rl/baselines/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .ppo import PPO -try: - from .hippo import HIPPO -except ImportError: - pass \ No newline at end of file diff --git a/roam_rl/baselines/hippo.py b/roam_rl/baselines/hippo.py deleted file mode 100644 index 9a95335..0000000 --- a/roam_rl/baselines/hippo.py +++ /dev/null @@ -1,26 +0,0 @@ -from roam_rl.baselines.ppo import PPO -from hippo import hippo - -class HIPPO(PPO): - - def __init__(self, config, section): - super().__init__(config, section) - self.reward_fn = hippo.extract_reward_fn(self.env_maker) - # wrap hippo.learn to sneak-in the reward function - def learn(*args, **kwargs): - return hippo.learn(*args, **kwargs, reward_fn=self.reward_fn) - self._learn = learn - - def _get_parameter_descr_dict(self): - - parameters = super()._get_parameter_descr_dict() - parameters.update( - { - 'nbatch': 'int', - 'mode': 'str', - 'use_buffer': 'bool', - 'buffer_capacity': 'int', - 'hindsight': 'float' - } - ) - return parameters diff --git a/roam_rl/baselines/utils/__init__.py b/roam_rl/baselines/utils/__init__.py deleted file mode 100644 index 66c0757..0000000 --- a/roam_rl/baselines/utils/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .vec_env_maker import VecEnvMaker diff --git a/roam_rl/garage/__init__.py b/roam_rl/garage/__init__.py deleted file mode 100644 index b4adf0f..0000000 --- a/roam_rl/garage/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .sac import SAC \ No newline at end of file diff --git a/roam_rl/garage/sac.py b/roam_rl/garage/sac.py deleted file mode 100644 index af7cea2..0000000 --- a/roam_rl/garage/sac.py +++ /dev/null @@ -1,122 +0,0 @@ -import gym -import numpy as np -import torch -from torch import nn -from torch.nn import functional as F - -from garage import wrap_experiment -from garage.envs import GarageEnv, normalize -from garage.experiment import deterministic, LocalRunner -from garage.replay_buffer import PathBuffer -from garage.sampler import LocalSampler -from garage.torch import set_gpu_mode -from garage.torch.algos import SAC as _SAC_ -from garage.torch.policies import TanhGaussianMLPPolicy - -from garage.torch.q_functions import ContinuousMLPQFunction -import os -from confac import make -from garage.sampler import RaySampler - -class SAC: - - def __init__(self, config, section): - - # Setup - self.experiment_dir = None - self.config = config - self.section = section - self.env_maker = make(config, config.get(section, 'env_maker')) - self.seed = config.getint(section, 'seed') - self.snapshot_mode = config.get(section, 'snapshot_mode', fallback='last') - - # SAC hyper parameters - self.policy_hidden_sizes = eval(config.get(section, 'policy_hidden_sizes', fallback='[256, 256]')) - self.qf_hidden_sizes = eval(config.get(section, 'qf_hidden_sizes', fallback = '[256, 256]')) - self.buffer_capacity_in_transitions = int(config.getfloat(section, 'buffer_capacity_in_transitions', fallback=1e6)) - self.gradient_steps_per_itr = config.getint(section, 'gradient_steps_per_iteration', fallback=1000) - self.max_path_length = config.getint(section, 'max_path_length', fallback=1000) - self.max_eval_path_length = config.getint(section, 'max_eval_path_length', fallback=1000) - self.min_buffer_size = int(config.getfloat(section, 'min_buffer_size', fallback=1e4)) - self.target_update_tau = config.getfloat(section, 'target_update_tau', fallback=5e-3) - self.discount = config.getfloat(section, 'discount', fallback=0.99) - self.buffer_batch_size = config.getint(section, 'buffer_batch_size', fallback=256) - self.reward_scale = config.getfloat(section, 'reward_scale', fallback=1.) - self.steps_per_epoch = config.getint(section, 'steps_per_epoch', fallback=1) - self.batch_size = config.getint(section, 'batch_size', fallback=1000) - self.n_epochs = config.getint(section, 'n_epochs', fallback=1000) - self.parallel_sampling = config.getboolean(section, 'parallel_sampling', fallback=False) - if self.parallel_sampling: - self.n_workers = config.getint(section, 'n_workers') - - def set_experiment_dir(self, experiment_dir): - self.experiment_dir = experiment_dir - - def train(self): - - # define - @wrap_experiment(snapshot_mode=self.snapshot_mode, log_dir=self.experiment_dir) - def run(ctxt=None): - """ Set up environment and algorithm and run the task. - - Args: - ctxt (garage.experiment.ExperimentContext): The experiment - configuration used by LocalRunner to create the snapshotter. - seed (int): Used to seed the random number generator to produce - determinism. - - """ - deterministic.set_seed(self.seed) - runner = LocalRunner(snapshot_config=ctxt, max_cpus=32) - env = GarageEnv(normalize(self.env_maker())) - - policy = TanhGaussianMLPPolicy( - env_spec=env.spec, - hidden_sizes=self.policy_hidden_sizes, - hidden_nonlinearity=nn.ReLU, - output_nonlinearity=None, - min_std=np.exp(-20.), - max_std=np.exp(2.), - ) - - qf1 = ContinuousMLPQFunction(env_spec=env.spec, - hidden_sizes=self.qf_hidden_sizes, - hidden_nonlinearity=F.relu) - - qf2 = ContinuousMLPQFunction(env_spec=env.spec, - hidden_sizes=self.qf_hidden_sizes, - hidden_nonlinearity=F.relu) - - replay_buffer = PathBuffer(capacity_in_transitions=self.buffer_capacity_in_transitions) - - algo = _SAC_(env_spec=env.spec, - policy=policy, - qf1=qf1, - qf2=qf2, - gradient_steps_per_itr=self.gradient_steps_per_itr, - max_path_length=self.max_path_length, - max_eval_path_length=self.max_eval_path_length, - replay_buffer=replay_buffer, - min_buffer_size=self.min_buffer_size, - target_update_tau=self.target_update_tau, - discount=self.discount, - buffer_batch_size=self.buffer_batch_size, - reward_scale=self.reward_scale, - steps_per_epoch=self.steps_per_epoch) - - if torch.cuda.is_available(): - set_gpu_mode(True) - else: - set_gpu_mode(False) - algo.to() - - if self.parallel_sampling: - runner.setup(algo=algo, env=env, sampler_cls=RaySampler, n_workers=self.n_workers) - else: - runner.setup(algo=algo, env=env, sampler_cls=LocalSampler) - - runner.train(n_epochs=self.n_epochs, batch_size=self.batch_size) - - run() - - diff --git a/roam_rl/baselines/ppo.py b/roam_rl/ppo.py similarity index 83% rename from roam_rl/baselines/ppo.py rename to roam_rl/ppo.py index 1bc61ff..86c6106 100644 --- a/roam_rl/baselines/ppo.py +++ b/roam_rl/ppo.py @@ -1,13 +1,13 @@ import os -from confac import make from baselines.common import set_global_seeds from baselines.ppo2 import ppo2 from baselines import logger -from roam_rl.baselines.utils import VecEnvMaker -from roam_rl.baselines.models import get_network +from roam_rl.utils.vec_env_maker import VecEnvMaker +from roam_rl.utils.models import get_network from gym import spaces import numpy as np -from roam_rl import utils +from roam_rl.utils import path_utils +from roam_rl.utils import config_utils class PPO: @@ -30,12 +30,14 @@ def __init__(self, config, section): # env env_maker_section = config.get(section, 'env_maker') - self.env_maker = make(config, env_maker_section) + self.env_maker = config_utils.initfromconfig(config, env_maker_section) vec_env_maker_section = config.get(section, 'vec_env_maker') self.vec_env_maker = VecEnvMaker(config, vec_env_maker_section) self.seed = config.getint(section, 'seed') + self.info_keywords = eval(config.get(section, 'info_keywords', fallback='()')) + def _get_parameter_descr_dict(self): """ @@ -67,17 +69,17 @@ def train(self, model_path=None): # Create vec env set_global_seeds(self.seed) - logdir = utils.get_log_dir(self.experiment_dir, self.seed) # setup ppo logging + logdir = path_utils.get_log_dir(self.experiment_dir, self.seed) # setup ppo logging logger.configure(dir=logdir, format_strs=['stdout', 'log', 'csv', 'tensorboard']) monitor_file_path = os.path.join(logdir, 'monitor.csv') - env = self.vec_env_maker(self.env_maker, self.seed, monitor_file=monitor_file_path) + env = self.vec_env_maker(self.env_maker, self.seed, monitor_file=monitor_file_path, info_keywords=self.info_keywords) # Learn # pylint: disable=E1125 - model = self._learn(env=env, **self.params, seed=self.seed, load_path=model_path) # learn model + model = self._learn(env=env, **self.params, seed=self.seed, load_path=model_path, extra_keys=self.info_keywords) # learn model # Save - model.save(utils.get_model_path(self.experiment_dir, self.seed)) + model.save(path_utils.get_model_path(self.experiment_dir, self.seed)) env.close() def set_experiment_dir(self, dir_name): @@ -92,7 +94,7 @@ def load(self, model_seed, model_checkpoint=None, env_seed=0, monitor_file=None) # train for 0 timesteps to load self.params['total_timesteps'] = 0 - model_path = utils.get_model_path(self.experiment_dir, model_seed, model_checkpoint) + model_path = path_utils.get_model_path(self.experiment_dir, model_seed, model_checkpoint) # pylint: disable=E1125 model = self._learn(env=env, **self.params, load_path=model_path) return model, env @@ -101,10 +103,10 @@ def run(self, model, env, stochastic=False): """ """ obs = env.reset() _states = None - # after training stochasticity of the policy is not relevant, + # after training stochasticity of the policy is not relevant, # set the actions to be mean of the policy if not stochastic: - model.act_model.action = model.act_model.pi + model.act_model.action = model.act_model.pi def determinstic_action(pi): if isinstance(env.action_space, spaces.Box): diff --git a/roam_rl/utils/__init__.py b/roam_rl/utils/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/roam_rl/utils/__init__.py @@ -0,0 +1 @@ + diff --git a/roam_rl/utils/config_utils.py b/roam_rl/utils/config_utils.py new file mode 100644 index 0000000..3755539 --- /dev/null +++ b/roam_rl/utils/config_utils.py @@ -0,0 +1,132 @@ +import configparser +import importlib +import ast + +def initfromconfig(config, section): + """ + Returns instance of the class specified by the entrypoint. The config and + section arguments received above are passed to the class contructor. + Args: + config: ConfigParser object of the config file + section: section name in the config file + Returns: + Module class initialized from config + Consider a config file as below. + example config: + [example_section] + entrypoint = some.path.in.module:SomeClass + parameter1 = value1 + . + . + parameterN = value1 + This function will return the object SomeClass(config, section). + """ + entrypoint = config.get(section, 'entrypoint') + module, name = entrypoint.split(':') + module = importlib.import_module(module) + attr = getattr(module, name) + + return attr(config, section) + +def configfromfile(file): + config = configparser.ConfigParser() + config.read(file) + return config + + +class ConfigParser(configparser.ConfigParser): + + """ Extend configparser.ConfigParser with additional methods """ + + def __init__(self): + super().__init__() + + def save(self, path): + + """ + Save config to path + Args: + path: str + Returns: + None + """ + + with open(path, 'w') as f: + self.write(f) + + def get_section(self, section, options): + + """ + Equivalent of get but for section + Args: + section(str): section name + options(dict): dict of options and type + Returns: + dict of options with corresponding parsed values + """ + assert self.has_section(section), 'section {} not found'.format(section) + assert isinstance(options, dict), 'options must be a dict' + + sec = {} + for opt, typ in options.items(): + if self.has_option(section, opt): + if typ == 'bool': + sec[opt] = self.getboolean(section, opt) + elif typ == 'int': + sec[opt] = self.getint(section, opt) + elif typ == 'float': + sec[opt] = self.getfloat(section, opt) + elif typ == 'list': + sec[opt] = self.getlist(section, opt) + elif typ == 'eval': + sec[opt] = eval(self.get(section, opt)) + elif typ == 'str': + sec[opt] = self.get(section, opt) + else: + ValueError("invalid type {}".format(typ)) + return sec + + def dump_section(self, section, recursive=False, dump=None): + + """ + Get section and copy into dump, recurse if required + Args: + section: section name + recursive: set True to copy recursively + dump: (Optional) ConfigParser object to copy to + """ + if dump is None: + dump = ConfigParser() + else: + assert isinstance(dump, configparser.ConfigParser) + + if self.has_section(section): + if not dump.has_section(section): + dump.add_section(section) + for opt, val in self.items(section): + dump.set(section, opt, val) + if recursive: + dump = self.dump_section(val, recursive=recursive, dump=dump) + else: + return dump + + def rename_section(self, old, new): + + """ Renames section """ + + if not self.has_section(old): + raise ValueError("section {} does not exist".format(old)) + if self.has_section(new): + raise ValueError("section {} already exists".format(new)) + + self.add_section(new) + for opt, val in self.items(old): + self.set(new, opt, val) + + self.remove_section(old) + + def getlist(self, section, option): + return ast.literal_eval(self.get(section, option)) + + def getint(self, *args, **kwargs): + return int(super().getfloat(*args, **kwargs)) \ No newline at end of file diff --git a/roam_rl/utils/env_factory.py b/roam_rl/utils/env_factory.py new file mode 100644 index 0000000..c5d0392 --- /dev/null +++ b/roam_rl/utils/env_factory.py @@ -0,0 +1,57 @@ +from .config_utils import initfromconfig +import gym + + +def make_env(config, section): + """ create env using config + + envs specified by entrypoint can be created with make() but envs registered with gym + need to be created differently ( _make_gym_registered_env()) + + """ + assert (config.has_option(section, 'id') and config.has_option(section, 'entrypoint')) is False, \ + "cannot specify both id and entrypoint" + + if config.has_option(section, 'id'): + env = _make_gym_registered_env(config, section) + elif config.has_option(section, 'entrypoint'): + env = initfromconfig(config, section) + else: + raise ValueError('env unknown') + + return env + + +def _make_gym_registered_env(config, section): + """ handles creation of envs registered with gym - including envs defined outside of gym as long as they are + correctly registered with gym """ + + # If the env that is to be created is different module then include the name of the module to import as shown below + # id: my_module:EnvName-v0, gym will 'import my_module' and then proceed to creating the env + env_id = config.get(section, 'id') + try: + # Try creating the env with config, this will if the environment's __init__() either accepts config + # and section as arguments directly or through **kwargs + env = gym.make(id=env_id, config=config, section=section) + except TypeError: + # The above method will fail for all gym environments by OpenAI as their __init__() does not accept **kwargs, + # so create the environment with just the id as the argument + env = gym.make(id=env_id) + + # for gym's robotics environments (https://github.com/openai/gym/tree/master/gym/envs/robotics) the reward type + # is configurable between the 'sparse'(default) and 'dense' reward. + if config.has_option(section, 'reward_type'): + if hasattr(env.env, 'reward_type'): + reward_type = config.get(section, 'reward_type') + if reward_type == 'sparse' or reward_type == 'dense': + env.env.reward_type = reward_type + else: + raise ValueError('reward type unknown') + else: + raise ValueError('reward_type cannot be configured for {}'.format(env.env)) + + return env + + + + diff --git a/roam_rl/utils/env_maker.py b/roam_rl/utils/env_maker.py new file mode 100644 index 0000000..9b531d4 --- /dev/null +++ b/roam_rl/utils/env_maker.py @@ -0,0 +1,46 @@ +from gym.utils import seeding +from .env_factory import make_env +import warnings + + +class EnvMaker: + """ callable class for creating env + __call__ method creates an env and sets seed for the env if one has been configured + """ + + def __init__(self, config, section): + self.seed = None + self.experiment_dir = None + self.config = config + self.section = section + + def set_seed(self, seed): + assert isinstance(seed, int) + self.seed = seeding.hash_seed(seed) + + def set_experiment_dir(self, experiment_dir): + self.experiment_dir = experiment_dir + + def __call__(self): + env_section = self.config.get(self.section, 'env') + env = make_env(config=self.config, section=env_section) + if type(self.seed) is int: + env.seed(self.seed) + else: + warnings.warn("seed not set, using global RNG ") + + config = self.config + section = self.section + if config.has_option(section, 'wrappers'): + wrappers = config.getlist(section, 'wrappers') + for wrapper_section in wrappers: + wrapper = get(config, wrapper_section) + env = wrapper(env, config, wrapper_section) + return env + + def __deepcopy__(self, memodict={}): + env_maker = self.__class__(self.config, self.section) + env_maker.seed = self.seed + env_maker.experiment_dir = self.experiment_dir + return env_maker + diff --git a/roam_rl/utils/env_wrappers.py b/roam_rl/utils/env_wrappers.py new file mode 100644 index 0000000..96f25b4 --- /dev/null +++ b/roam_rl/utils/env_wrappers.py @@ -0,0 +1,14 @@ +import gym.wrappers +import ast + +class FilterObservation(gym.wrappers.FilterObservation): + + def __init__(self, env, config, section): + filter_keys = ast.literal_eval(config.get(section, 'filter_keys')) + super().__init__(env, filter_keys=filter_keys) + + +class FlattenObservation(gym.wrappers.FlattenObservation): + + def __init__(self, env, config, section): + super().__init__(env) diff --git a/roam_rl/baselines/models.py b/roam_rl/utils/models.py similarity index 98% rename from roam_rl/baselines/models.py rename to roam_rl/utils/models.py index c398258..43d85f9 100644 --- a/roam_rl/baselines/models.py +++ b/roam_rl/utils/models.py @@ -3,7 +3,7 @@ from baselines.a2c import utils from baselines.a2c.utils import conv, fc, conv_to_fc, batch_to_seq, seq_to_batch from baselines.common.models import get_network_builder -from confac import make +from .config_utils import initfromconfig class NetworkFn: @@ -149,6 +149,6 @@ def get_network(config, section): _type = config.get(section, 'type') return _mapping[_type](config, section) elif config.has_option(section, 'entrypoint'): - return make(config, section) + return initfromconfig(config, section) else: raise ValueError("network unknown") diff --git a/roam_rl/utils.py b/roam_rl/utils/path_utils.py similarity index 97% rename from roam_rl/utils.py rename to roam_rl/utils/path_utils.py index 1d8acbc..2e5667e 100644 --- a/roam_rl/utils.py +++ b/roam_rl/utils/path_utils.py @@ -21,4 +21,4 @@ def get_log_dir(experiments_dir, seed): return logdir def get_config_path(load_dir, experiment_no): - return os.path.join(load_dir, 'config_' + str(experiment_no).zfill(2) + '.cfg') \ No newline at end of file + return os.path.join(load_dir, 'config_' + str(experiment_no).zfill(2) + '.cfg') \ No newline at end of file diff --git a/roam_rl/baselines/utils/vec_env_maker.py b/roam_rl/utils/vec_env_maker.py similarity index 92% rename from roam_rl/baselines/utils/vec_env_maker.py rename to roam_rl/utils/vec_env_maker.py index d6e131d..b5db12d 100644 --- a/roam_rl/baselines/utils/vec_env_maker.py +++ b/roam_rl/utils/vec_env_maker.py @@ -7,7 +7,7 @@ from copy import deepcopy -class VecEnvMaker(object): +class VecEnvMaker: """ Callable class that takes instance of roam_learning.robot_env.EnvMaker and returns either a DummyVecEnv, SubprocVecEnv or ShmemVecEnv """ @@ -26,7 +26,7 @@ def __init__(self, config, section): self.normalize_obs = config.getboolean(section, 'normalize_obs', fallback=False) self.normalize_ret = config.getboolean(section, 'normalize_ret', fallback=False) - def __call__(self, env_maker, seed=None, monitor_file=None): + def __call__(self, env_maker, seed=None, monitor_file=None, info_keywords=()): """ :param env_maker: instance of roam_learning.robot_env.EnvMaker :param seed: int that is used to generate seeds for vectorized envs @@ -48,7 +48,7 @@ def __call__(self, env_maker, seed=None, monitor_file=None): # Monitor the envs before normalization if monitor_file is not None: - envs = VecMonitor(envs, filename=monitor_file) + envs = VecMonitor(envs, filename=monitor_file, info_keywords=info_keywords) if self.normalize_obs or self.normalize_ret: envs = VecNormalize(envs, ob=self.normalize_obs, ret=self.normalize_ret, use_tf=True) return envs diff --git a/scripts/garage/halfcheetah.cfg b/scripts/garage/halfcheetah.cfg deleted file mode 100644 index 4145fcb..0000000 --- a/scripts/garage/halfcheetah.cfg +++ /dev/null @@ -1,23 +0,0 @@ -# To run this test you need MuJoCo and mujoco_py installed - -[experiment] -experiment_no = 0 -algo = sac - -[sac] -entrypoint = roam_rl.garage.sac:SAC -env_maker = my_env_maker -seed = 0 -snapshot_mode = all -qf_hidden_sizes = [256, 256] -policy_hidden_sizes = [256, 256] -n_epochs = 6 -parallel_sampling = True -n_workers = 12 - -[my_env_maker] -entrypoint = roam_env.utils.env_maker:EnvMaker -env = my_robot_env - -[my_robot_env] -id = HalfCheetah-v2 diff --git a/scripts/garage/run_policy.py b/scripts/garage/run_policy.py deleted file mode 100644 index 1cde23c..0000000 --- a/scripts/garage/run_policy.py +++ /dev/null @@ -1,75 +0,0 @@ -"""Simulates pre-learned policy.""" -import argparse -import sys - -import joblib -import tensorflow as tf - -from garage.sampler.utils import rollout -import torch -from garage.torch import set_gpu_mode - -def query_yes_no(question, default='yes'): - """Ask a yes/no question via raw_input() and return their answer. - - "question" is a string that is presented to the user. - "default" is the presumed answer if the user just hits . - It must be "yes" (the default), "no" or None (meaning - an answer is required of the user). - - The "answer" return value is True for "yes" or False for "no". - """ - valid = {'yes': True, 'y': True, 'ye': True, 'no': False, 'n': False} - if default is None: - prompt = ' [y/n] ' - elif default == 'yes': - prompt = ' [Y/n] ' - elif default == 'no': - prompt = ' [y/N] ' - else: - raise ValueError("invalid default answer: '%s'" % default) - - while True: - sys.stdout.write(question + prompt) - choice = input().lower() - if default is not None and choice == '': - return valid[default] - elif choice in valid: - return valid[choice] - else: - sys.stdout.write("Please respond with 'yes' or 'no' " - "(or 'y' or 'n').\n") - - -if __name__ == '__main__': - - if torch.cuda.is_available(): - set_gpu_mode(True) - else: - set_gpu_mode(False) - - parser = argparse.ArgumentParser() - parser.add_argument('file', type=str, help='path to the snapshot file') - parser.add_argument('--max_path_length', - type=int, - default=1000, - help='Max length of rollout') - parser.add_argument('--speedup', type=float, default=1, help='Speedup') - args = parser.parse_args() - - # If the snapshot file use tensorflow, do: - # import tensorflow as tf - # with tf.compat.v1.Session(): - # [rest of the code] - with tf.compat.v1.Session() as sess: - data = joblib.load(args.file) - policy = data['algo'].policy - env = data['env'] - while True: - path = rollout(env, - policy, - max_path_length=args.max_path_length, - animated=True, - speedup=args.speedup) - if not query_yes_no('Continue simulation?'): - break \ No newline at end of file diff --git a/scripts/garage/train.py b/scripts/garage/train.py deleted file mode 100755 index 50a3431..0000000 --- a/scripts/garage/train.py +++ /dev/null @@ -1,28 +0,0 @@ -import argparse -from roam_rl import utils -from confac import ConfigParser -from confac import make -import os -import torch -from garage.torch import set_gpu_mode - -def main(args): - - config_file = args.config_file - config = ConfigParser() - config.read(config_file) - experiment_no = config.get('experiment', 'experiment_no') - os.makedirs(os.environ['EXPERIMENTS_DIR'], exist_ok=True) - experiment_dir = utils.get_experiment_dir(os.environ['EXPERIMENTS_DIR'], experiment_no, mkdir=True) - config_path = utils.get_config_path(experiment_dir, experiment_no) - config.save(config_path) - algo = make(config, config.get('experiment', 'algo')) - algo.set_experiment_dir(experiment_dir) - algo.train() - -if __name__ == "__main__": - - parser = argparse.ArgumentParser() - parser.add_argument('config_file', help='A string specifying the path to a config file') - arg = parser.parse_args() - main(arg) diff --git a/scripts/train.py b/scripts/train.py index 51d15fb..24606d1 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -1,6 +1,6 @@ import argparse -from roam_rl import utils -from confac import ConfigParser, make +from roam_rl.utils.path_utils import get_experiment_dir, get_config_path +from roam_rl.utils.config_utils import ConfigParser, initfromconfig import os def main(args): @@ -10,11 +10,10 @@ def main(args): config.read(config_file) experiment_no = config.get('experiment', 'experiment_no') os.makedirs(os.environ['EXPERIMENTS_DIR'], exist_ok=True) - experiment_dir = utils.get_experiment_dir(os.environ['EXPERIMENTS_DIR'], experiment_no, mkdir=True) - config_path = utils.get_config_path(experiment_dir, experiment_no) + experiment_dir = get_experiment_dir(os.environ['EXPERIMENTS_DIR'], experiment_no, mkdir=True) + config_path = get_config_path(experiment_dir, experiment_no) config.save(config_path) - - algo = make(config, config.get('experiment', 'algo')) + algo = initfromconfig(config, config.get('experiment', 'algo')) algo.set_experiment_dir(experiment_dir) algo.train(model_path=args.model_path)