From c443c1230ccb066e40aea1d8825b23879976e69a Mon Sep 17 00:00:00 2001 From: Xintong Date: Thu, 11 Nov 2021 11:45:11 +0000 Subject: [PATCH] new release --- README.md | 27 +- pybullet_multigoal_gym/__init__.py | 7 +- .../base_envs/kuka_multi_step_base_env.py | 348 +++++----- .../envs/task_envs/kuka_multi_step_envs.py | 622 ++++++++++-------- .../kuka_block_rearrange_curriculum.py | 59 ++ .../kuka_chest_push_task_decomposition.py | 67 ++ .../{kuka_reach_example.py => kuka_reach.py} | 0 pybullet_multigoal_gym/robots/kuka.py | 8 + .../test/multigoal/gym_image_obs_test.py | 29 +- .../test/multigoal/gym_test.py | 43 +- pybullet_multigoal_gym/test/test.py | 6 +- 11 files changed, 729 insertions(+), 487 deletions(-) create mode 100644 pybullet_multigoal_gym/examples/kuka_block_rearrange_curriculum.py create mode 100644 pybullet_multigoal_gym/examples/kuka_chest_push_task_decomposition.py rename pybullet_multigoal_gym/examples/{kuka_reach_example.py => kuka_reach.py} (100%) diff --git a/README.md b/README.md index ab615d2..c33be21 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,8 @@ Expired mujoco license got me here. I will also retrain those agents and pose pe This package also provides some harder tasks for long-horizon sparse reward robotic arm manipulation tasks on this package as well. All the environments have been summarised in a paper. -There are still on-going updates for this package, the [v1.0 release] is the one published on Taros 2021 and [ArXiv](https://arxiv.org/abs/2105.05985). +The newest release is the most recommended. There are still on-going updates for this package, the [v1.0 release] was the version published on Taros 2021 and +[ArXiv](https://arxiv.org/abs/2105.05985). ``` @article{xyangPMG, @@ -64,6 +65,9 @@ See examples below. ### Try it out +See the [examples folder](https://github.com/IanYangChina/pybullet_multigoal_gym/tree/master/pybullet_multigoal_gym/examples) +for more scripts to play with. + ```python # Single-stage manipulation environments # Reach, Push, PickAndPlace, Slide @@ -91,12 +95,12 @@ camera_setup = [ env = pmg.make_env( # task args ['reach', 'push', 'slide', 'pick_and_place', # 'block_stack', 'block_rearrange', 'chest_pick_and_place', 'chest_push'] - task='block_rearrange', + task='block_stack', gripper='parallel_jaw', - num_block=4, # only meaningful for multi-block tasks - render=False, + num_block=4, # only meaningful for multi-block tasks, up to 5 blocks + render=True, binary_reward=True, - max_episode_steps=5, + max_episode_steps=50, # image observation args image_observation=True, depth_image=False, @@ -104,15 +108,10 @@ env = pmg.make_env( visualize_target=True, camera_setup=camera_setup, observation_cam_id=0, - goal_cam_id=1, - # curriculum args - use_curriculum=True, - num_goals_to_generate=90) + goal_cam_id=1) obs = env.reset() -t = 0 while True: - t += 1 action = env.action_space.sample() obs, reward, done, info = env.step(action) print('state: ', obs['state'], '\n', @@ -120,7 +119,7 @@ while True: 'achieved_goal: ', obs['achieved_goal'], '\n', 'reward: ', reward, '\n') plt.imshow(obs['observation']) - plt.pause(0.00001) + plt.pause(0.00001) if done: env.reset() ``` @@ -131,7 +130,7 @@ while True: -### Updates +### Update log 2020.12.26 --- Add hierarchical environments for a pick and place task, with image observation and goal supported. See the above example. @@ -149,3 +148,5 @@ See the above example. 2021.03.17 --- Joint space control support 2021.03.18 --- Finish curriculum; add on-hand camera observation + +2021.11.11 --- Finish task decomposition, subgoal generation codes and some compatibility issues, new release \ No newline at end of file diff --git a/pybullet_multigoal_gym/__init__.py b/pybullet_multigoal_gym/__init__.py index a94279e..ff466a1 100644 --- a/pybullet_multigoal_gym/__init__.py +++ b/pybullet_multigoal_gym/__init__.py @@ -41,7 +41,8 @@ def print_id(): def make_env(task='reach', gripper='parallel_jaw', num_block=5, render=False, binary_reward=True, - task_decomposition=False, abstract_demonstration=False, + grip_informed_goal=False, + task_decomposition=False, joint_control=False, max_episode_steps=50, distance_threshold=0.05, image_observation=False, depth_image=False, goal_image=False, visualize_target=True, camera_setup=None, observation_cam_id=0, goal_cam_id=0, @@ -129,8 +130,6 @@ def make_env(task='reach', gripper='parallel_jaw', num_block=5, render=False, bi max_episode_steps=max_episode_steps, ) else: - # if task == 'block_stack': - # assert num_block >= 2, "need at least 2 blocks to stack" assert num_block <= 5, "only support up to 5 blocks" register( id=env_id, @@ -141,7 +140,6 @@ def make_env(task='reach', gripper='parallel_jaw', num_block=5, render=False, bi 'joint_control': joint_control, 'distance_threshold': distance_threshold, 'task_decomposition': task_decomposition, - 'abstract_demonstration': abstract_demonstration, 'image_observation': image_observation, 'depth_image': depth_image, 'goal_image': goal_image, @@ -150,6 +148,7 @@ def make_env(task='reach', gripper='parallel_jaw', num_block=5, render=False, bi 'observation_cam_id': observation_cam_id, 'goal_cam_id': goal_cam_id, 'gripper_type': gripper, + 'grip_informed_goal': grip_informed_goal, 'num_block': num_block, 'use_curriculum': use_curriculum, 'num_goals_to_generate': int(num_goals_to_generate) diff --git a/pybullet_multigoal_gym/envs/base_envs/kuka_multi_step_base_env.py b/pybullet_multigoal_gym/envs/base_envs/kuka_multi_step_base_env.py index a738e4c..e5970ba 100644 --- a/pybullet_multigoal_gym/envs/base_envs/kuka_multi_step_base_env.py +++ b/pybullet_multigoal_gym/envs/base_envs/kuka_multi_step_base_env.py @@ -1,5 +1,7 @@ import os import numpy as np +from warnings import warn +from pybullet_multigoal_gym.utils.demonstrator import StepDemonstrator from pybullet_multigoal_gym.envs.base_envs.base_env import BaseBulletMGEnv from pybullet_multigoal_gym.robots.kuka import Kuka from pybullet_multigoal_gym.robots.chest import Chest @@ -16,7 +18,8 @@ def __init__(self, render=True, binary_reward=True, grip_informed_goal=False, gripper_type='parallel_jaw', end_effector_start_on_table=False, num_block=3, joint_control=False, grasping=False, chest=False, chest_door='front_sliding', obj_range=0.15, target_range=0.15, distance_threshold=0.05, - use_curriculum=False, num_curriculum=5, base_curriculum_episode_steps=50, num_goals_to_generate=1e5): + use_curriculum=False, task_decomposition=False, + num_curriculum=5, base_curriculum_episode_steps=50, num_goals_to_generate=1e5): self.test = False self.binary_reward = binary_reward self.grip_informed_goal = grip_informed_goal @@ -103,25 +106,78 @@ def __init__(self, render=True, binary_reward=True, grip_informed_goal=False, else: self.chest_door_opened_state = 0.12 + self.task_decomposition = task_decomposition self.curriculum = use_curriculum - self.curriculum_update = False - self.num_curriculum = num_curriculum - # start with the easiest goal being the only possible goal - self.curriculum_prob = np.concatenate([[1.0], np.zeros(self.num_curriculum-1)]) - self.base_curriculum_episode_steps = base_curriculum_episode_steps - # the number of episode steps increases as goals become harder to achieve - self.curriculum_goal_step = 0 * 25 + self.base_curriculum_episode_steps - # the number of goals to generate for each curriculum - # commonly equal to the total number of episodes / the number of curriculum - self.num_goals_per_curriculum = num_goals_to_generate / self.num_curriculum - # record the number of generated goals per curriculum - self.num_generated_goals_per_curriculum = np.zeros(self.num_curriculum) + if self.task_decomposition: + assert not self.curriculum, 'if using task decomposition, curriculum should be False, vice versa' + + demonstrations = [] + for i in range(self.num_steps): + demonstrations.append([_ for _ in range(i + 1)]) + self.step_demonstrator = StepDemonstrator(demonstrations) + + if self.curriculum: + assert not self.task_decomposition, 'if using curriculum, task decomposition should be False, vice versa' + self.last_curriculum_level = None # used by the BlockStack & ChestPush tasks + self.last_ind_block_to_move = None # used by the ChestPush & ChestPickAndPlace task + + warn("You will need to call env.activate_curriculum_update() before your training phase, " + "and env.deactivate_curriculum_update() before your evaluation phase.") + + self.curriculum_update = False + self.num_curriculum = num_curriculum + # start with the easiest goal being the only possible goal + self.curriculum_prob = np.concatenate([[1.0], np.zeros(self.num_curriculum - 1)]) + self.base_curriculum_episode_steps = base_curriculum_episode_steps + # the number of episode steps increases as goals become harder to achieve + self.curriculum_goal_step = 0 * 25 + self.base_curriculum_episode_steps + # the number of goals to generate for each curriculum + # commonly equal to the total number of episodes / the number of curriculum + self.num_goals_per_curriculum = num_goals_to_generate // self.num_curriculum + # record the number of generated goals per curriculum + self.num_generated_goals_per_curriculum = np.zeros(self.num_curriculum) BaseBulletMGEnv.__init__(self, robot=robot, render=render, image_observation=image_observation, goal_image=goal_image, camera_setup=camera_setup, seed=0, timestep=0.002, frame_skip=20) + def activate_curriculum_update(self): + if not self.curriculum: + warn("This method should not be called while not using curriculum.") + return + self.curriculum_update = True + + def deactivate_curriculum_update(self): + if not self.curriculum: + warn("This method should not be called while not using curriculum.") + return + self.curriculum_update = False + + def set_sub_goal(self, sub_goal_ind): + if not self.task_decomposition: + warn("The set_sub_goal() method should only be called when using task decomposition,\n" + "It does nothing and returns None when self.task_decomposition is False.") + return None + self.sub_goal_ind = sub_goal_ind + self.desired_goal = self.sub_goals[sub_goal_ind].copy() + if self.visualize_target: + index_offset = 0 + if self.chest: + index_offset = 1 + block_target_pos = [] + for _ in range(self.num_block): + block_target_pos.append( + self.desired_goal[index_offset + _ * 3:index_offset + _ * 3 + 3] + ) + self._update_block_target(block_target_pos) + if self.grip_informed_goal: + if self.grasping: + self._update_gripper_target(self.desired_goal[-4:-1]) + else: + self._update_gripper_target(self.desired_goal[-3:]) + return self.desired_goal + def _task_reset(self, test=False): self.test = test if not self.objects_urdf_loaded: @@ -148,9 +204,9 @@ def _task_reset(self, test=False): baseOrientation=self.object_initial_pos[target_name][3:]) if not self.visualize_target: - self.set_object_pose(self.object_bodies[target_name], - [0.0, 0.0, -3.0], - self.object_initial_pos[target_name][3:]) + self._set_object_pose(self.object_bodies[target_name], + [0.0, 0.0, -3.0], + self.object_initial_pos[target_name][3:]) if self.grip_informed_goal: self.object_bodies[self.grip_target_key] = self._p.loadURDF( @@ -158,9 +214,9 @@ def _task_reset(self, test=False): basePosition=self.object_initial_pos[self.grip_target_key][:3], baseOrientation=self.object_initial_pos[self.grip_target_key][3:]) if not self.visualize_target: - self.set_object_pose(self.object_bodies[self.grip_target_key], - [0.0, 0.0, -3.0], - self.object_initial_pos[self.grip_target_key][3:]) + self._set_object_pose(self.object_bodies[self.grip_target_key], + [0.0, 0.0, -3.0], + self.object_initial_pos[self.grip_target_key][3:]) # randomize object positions block_poses = [] @@ -177,152 +233,20 @@ def _task_reset(self, test=False): done = True for i in range(self.num_block): - self.set_object_pose(self.object_bodies[self.block_keys[i]], - block_poses[i], - self.object_initial_pos[self.block_keys[i]][3:]) + self._set_object_pose(self.object_bodies[self.block_keys[i]], + block_poses[i], + self.object_initial_pos[self.block_keys[i]][3:]) if self.chest: - # start the episode with a closed chest if it is during evaluation - # if (not test) and (self.np_random.uniform(0, 1) <= 0.5): - # self.chest_robot.rest_joint_state = self.chest_door_opened_state - # if self.grasping: - # self.robot.set_finger_joint_state(pos=self.robot.gripper_grasp_block_state) - # self.robot.set_kuka_joint_state(pos=None, vel=None, gripper_tip_pos=block_poses[0], bullet_client=self._p) - # else: - # grip_pos = block_poses[0].copy() - # grip_pos[0] += 0.03 - # self.robot.set_kuka_joint_state(pos=None, vel=None, gripper_tip_pos=grip_pos, bullet_client=self._p) - # else: - # self.chest_robot.rest_joint_state = 0 self.chest_robot.robot_specific_reset(self._p) - # new_y = self.np_random.uniform(-self.chest_pos_y_range, self.chest_pos_y_range) - # chest_xyz = self.object_initial_pos['chest'][:3].copy() - # chest_xyz[1] = new_y - # self.chest_robot.set_base_pos(self._p, position=chest_xyz) - - # random pickup block chance for the chest-pick-and-place task - if test: - self.random_pickup_chance = 0.0 - else: - # only apply random pickup during training - self.random_pickup_chance = 0.75 + # generate goals & images self._generate_goal(block_poses, new_target=True) - self.sub_goal_ind = -1 + if self.task_decomposition: + self.sub_goal_ind = -1 if self.goal_image: self._generate_goal_image(block_poses) - def activate_curriculum_update(self): - self.curriculum_update = True - - def deactivate_curriculum_update(self): - self.curriculum_update = False - - def update_curriculum_prob(self): - # array of boolean masks - mask_finished = self.num_generated_goals_per_curriculum == self.num_goals_per_curriculum - mask_half = self.num_generated_goals_per_curriculum >= (self.num_goals_per_curriculum / 2) - # set finished curriculum prob to 0.0 - self.curriculum_prob[mask_finished] = 0.0 - # process the first curriculum separately - if mask_half[0] and not mask_finished[0]: - self.curriculum_prob[0] = 0.5 - self.curriculum_prob[1] = 0.5 - - # process the second to the second-last curriculums - for i in range(1, self.num_curriculum-1): - if mask_finished[i-1] and not mask_finished[i]: - - if mask_half[i]: - # set the next curriculum prob to 0.5 - # if the current one has been trained for half the total number - # and the last one has been trained completely - self.curriculum_prob[i] = 0.5 - self.curriculum_prob[i+1] = 0.5 - else: - # set the next curriculum prob to 1.0 - # if the current one has not yet been trained for half the total number - # and the last one has been trained completely - self.curriculum_prob[i] = 1.0 - - # process the last curriculum separately - if mask_finished[-2]: - self.curriculum_prob[-1] = 1.0 - - def set_sub_goal(self, sub_goal_ind): - self.sub_goal_ind = sub_goal_ind - self.desired_goal = self.sub_goals[sub_goal_ind].copy() - if self.visualize_target: - index_offset = 0 - if self.chest: - index_offset = 1 - block_target_pos = [] - for _ in range(self.num_block): - block_target_pos.append( - self.desired_goal[index_offset+_*3:index_offset+_*3+3] - ) - self._update_block_target(block_target_pos) - if self.grip_informed_goal: - if self.grasping: - self._update_gripper_target(self.desired_goal[-4:-1]) - else: - self._update_gripper_target(self.desired_goal[-3:]) - return self.desired_goal - - def _generate_goal(self, block_poses, new_target=True): - raise NotImplementedError - - def _update_block_target(self, desired_goal, index_offset=0): - for _ in range(self.num_block): - self.set_object_pose(self.object_bodies[self.target_keys[_]], - desired_goal[_+index_offset], - self.object_initial_pos[self.target_keys[_]][3:]) - - def _update_gripper_target(self, pos): - self.set_object_pose(self.object_bodies[self.grip_target_key], - pos, - self.object_initial_pos[self.grip_target_key][3:]) - - def _generate_goal_image(self, block_poses): - target_obj_pos = self.desired_goal.copy() - if self.chest: - # this is tricky... - self.desired_goal_image = self.render(mode=self.render_mode, camera_id=self.goal_cam_id) - elif self.grasping: - # block stacking - self.robot.set_finger_joint_state(self.robot.gripper_grasp_block_state) - target_gripper_pos = self.desired_goal[-3:].copy() - target_gripper_pos[-1] = 0.175 + self.block_size * (self.num_block - 1) - target_kuka_joint_pos = self.robot.compute_ik(self._p, target_gripper_pos) - self.robot.set_kuka_joint_state(target_kuka_joint_pos) - - for i in range(self.num_block): - self.set_object_pose(self.object_bodies[self.block_keys[i]], - target_obj_pos[i * 3:i * 3 + 3], - self.object_initial_pos[self.block_keys[i]][3:]) - - self.desired_goal_image = self.render(mode=self.render_mode, camera_id=self.goal_cam_id) - - for i in range(self.num_block): - self.set_object_pose(self.object_bodies[self.block_keys[i]], - block_poses[i], - self.object_initial_pos[self.block_keys[i]][3:]) - - self.robot.set_kuka_joint_state(self.robot.kuka_rest_pose, np.zeros(7)) - self.robot.set_finger_joint_state(self.robot.gripper_abs_joint_limit) - else: - for i in range(self.num_block): - self.set_object_pose(self.object_bodies[self.block_keys[i]], - target_obj_pos[i * 3:i * 3 + 3], - self.object_initial_pos[self.block_keys[i]][3:]) - - self.desired_goal_image = self.render(mode=self.render_mode, camera_id=self.goal_cam_id) - - for i in range(self.num_block): - self.set_object_pose(self.object_bodies[self.block_keys[i]], - block_poses[i], - self.object_initial_pos[self.block_keys[i]][3:]) - def _step_callback(self): pass @@ -362,13 +286,12 @@ def _get_obs(self): if self.chest: # door joint state represents the openness and velocity of the door - # chest_xyz, _ = self.chest_robot.get_base_pos(self._p) door_joint_pos, door_joint_vel, keypoint_state = self.chest_robot.calc_robot_state() state = state + [[door_joint_pos], [door_joint_vel]] + keypoint_state policy_state = policy_state + [[door_joint_pos]] + keypoint_state achieved_goal.insert(0, [door_joint_pos]) - # keep the door opened if the robot have somehow opened it + # keep the door opened if the robot has somehow opened it if np.abs(self.chest_door_opened_state - door_joint_pos) <= 0.01: self.chest_robot.apply_action([self.chest_door_opened_state], self._p) @@ -382,8 +305,10 @@ def _get_obs(self): policy_state = np.clip(np.concatenate(policy_state), -5.0, 5.0) achieved_goal = np.concatenate(achieved_goal) + # update goals based on new block positions self._generate_goal(block_poses=block_xyzs, new_target=False) - self.set_sub_goal(sub_goal_ind=self.sub_goal_ind) + if self.task_decomposition: + self.set_sub_goal(sub_goal_ind=self.sub_goal_ind) assert achieved_goal.shape == self.desired_goal.shape @@ -396,14 +321,15 @@ def _get_obs(self): } elif not self.goal_image: return { - 'observation': self.render(mode=self.render_mode, camera_id=self.observation_cam_id), + 'observation': self.render(mode=self.render_mode, camera_id=self.observation_cam_id).copy(), 'state': state.copy(), 'policy_state': policy_state.copy(), 'achieved_goal': achieved_goal.copy(), 'desired_goal': self.desired_goal.copy(), } else: - observation = self.render(mode=self.render_mode, camera_id=self.observation_cam_id) + observation = self.render(mode=self.render_mode, camera_id=self.observation_cam_id).copy() + self._generate_goal_image(block_poses=block_xyzs) return { 'observation': observation.copy(), 'state': state.copy(), @@ -423,7 +349,103 @@ def _compute_reward(self, achieved_goal, desired_goal): else: return -d, ~not_achieved - def set_object_pose(self, body_id, position, orientation=None): + def _generate_goal(self, block_poses, new_target=True): + raise NotImplementedError + + def _update_curriculum_prob(self): + # array of boolean masks + mask_finished = self.num_generated_goals_per_curriculum >= self.num_goals_per_curriculum + mask_half = self.num_generated_goals_per_curriculum >= (self.num_goals_per_curriculum / 2) + # set finished curriculum prob to 0.0 + self.curriculum_prob[mask_finished] = 0.0 + # process the first curriculum separately + if mask_half[0] and not mask_finished[0]: + self.curriculum_prob[0] = 0.5 + self.curriculum_prob[1] = 0.5 + + # process the second to the second-last curriculums + for i in range(1, self.num_curriculum - 1): + if mask_finished[i - 1] and not mask_finished[i]: + + if mask_half[i]: + # set the next curriculum prob to 0.5 + # if the current one has been trained for half the total number + # and the last one has been trained completely + self.curriculum_prob[i] = 0.5 + self.curriculum_prob[i + 1] = 0.5 + else: + # set the next curriculum prob to 1.0 + # if the current one has not yet been trained for half the total number + # and the last one has been trained completely + self.curriculum_prob[i] = 1.0 + + # process the last curriculum separately + if mask_finished[-2]: + self.curriculum_prob[-1] = 1.0 + + def _set_object_pose(self, body_id, position, orientation=None): if orientation is None: orientation = self.object_initial_pos['table'][3:] self._p.resetBasePositionAndOrientation(body_id, position, orientation) + + def _update_block_target(self, desired_goal, index_offset=0): + for _ in range(self.num_block): + self._set_object_pose(self.object_bodies[self.target_keys[_]], + desired_goal[_ + index_offset], + self.object_initial_pos[self.target_keys[_]][3:]) + + def _update_gripper_target(self, pos): + self._set_object_pose(self.object_bodies[self.grip_target_key], + pos, + self.object_initial_pos[self.grip_target_key][3:]) + + def _generate_goal_image(self, block_poses): + target_obj_pos = self.desired_goal.copy() + if self.chest: + warn("Tasks with a chest does not support goal image generation.") + self.desired_goal_image = self.render(mode=self.render_mode, camera_id=self.goal_cam_id) + elif self.grasping: + joint_poses, joint_vels = self.robot.get_kuka_joint_state() + finger_joint_pos, finger_joint_vel = self.robot.get_finger_joint_state() + # block stacking + if self.grip_informed_goal: + self.robot.set_finger_joint_state(self.robot.gripper_grasp_block_state) + target_gripper_pos = self.desired_goal[-4:-1] + else: + self.robot.set_finger_joint_state(self.robot.gripper_grasp_block_state) + target_gripper_pos = self.last_target_poses[0].copy() + if self.task_decomposition: + target_gripper_pos[-1] = 0.175 + self.block_size * (self.sub_goal_ind) + elif self.curriculum: + target_gripper_pos[-1] = 0.175 + self.block_size * self.last_curriculum_level + else: + target_gripper_pos[-1] = 0.175 + self.block_size * (self.num_block - 1) + target_kuka_joint_pos = self.robot.compute_ik(self._p, target_gripper_pos) + self.robot.set_kuka_joint_state(target_kuka_joint_pos) + + for i in range(self.num_block): + self._set_object_pose(self.object_bodies[self.block_keys[i]], + target_obj_pos[i * 3:i * 3 + 3], + self.object_initial_pos[self.block_keys[i]][3:]) + + self.desired_goal_image = self.render(mode=self.render_mode, camera_id=self.goal_cam_id) + + for i in range(self.num_block): + self._set_object_pose(self.object_bodies[self.block_keys[i]], + block_poses[i], + self.object_initial_pos[self.block_keys[i]][3:]) + + self.robot.set_kuka_joint_state(joint_poses, joint_vels) + self.robot.set_finger_joint_state(finger_joint_pos, finger_joint_vel) + else: + for i in range(self.num_block): + self._set_object_pose(self.object_bodies[self.block_keys[i]], + target_obj_pos[i * 3:i * 3 + 3], + self.object_initial_pos[self.block_keys[i]][3:]) + + self.desired_goal_image = self.render(mode=self.render_mode, camera_id=self.goal_cam_id) + + for i in range(self.num_block): + self._set_object_pose(self.object_bodies[self.block_keys[i]], + block_poses[i], + self.object_initial_pos[self.block_keys[i]][3:]) diff --git a/pybullet_multigoal_gym/envs/task_envs/kuka_multi_step_envs.py b/pybullet_multigoal_gym/envs/task_envs/kuka_multi_step_envs.py index 89e7701..cd10fd3 100644 --- a/pybullet_multigoal_gym/envs/task_envs/kuka_multi_step_envs.py +++ b/pybullet_multigoal_gym/envs/task_envs/kuka_multi_step_envs.py @@ -4,44 +4,34 @@ class KukaBlockStackEnv(KukaBulletMultiBlockEnv): - def __init__(self, render=True, binary_reward=True, distance_threshold=0.05, - random_order=True, + def __init__(self, render=True, binary_reward=True, distance_threshold=0.05, random_order=True, image_observation=False, goal_image=False, depth_image=False, visualize_target=True, camera_setup=None, observation_cam_id=0, goal_cam_id=0, - gripper_type='parallel_jaw', num_block=5, joint_control=False, - task_decomposition=False, abstract_demonstration=False, - use_curriculum=False, num_goals_to_generate=1e5): - self.task_decomposition = task_decomposition - self.num_steps = num_block - self.pickup_target = None - if self.task_decomposition: - self.grip_informed_goal = True - self.num_steps = num_block * 2 - self.abstract_demonstration = abstract_demonstration - if self.abstract_demonstration: - demonstrations = [] - for i in range(self.num_steps): - demonstrations.append([_ for _ in range(i+1)]) - self.step_demonstrator = StepDemonstrator(demonstrations) + gripper_type='parallel_jaw', grip_informed_goal=False, num_block=5, joint_control=False, + task_decomposition=False, use_curriculum=False, num_goals_to_generate=1e5): + + self.grip_informed_goal = grip_informed_goal + if self.grip_informed_goal: + self.num_steps = num_block*2 + else: + self.num_steps = num_block + self.random_order = random_order self.last_order = None self.last_target_poses = None + KukaBulletMultiBlockEnv.__init__(self, render=render, binary_reward=binary_reward, distance_threshold=distance_threshold, grip_informed_goal=self.grip_informed_goal, image_observation=image_observation, goal_image=goal_image, depth_image=depth_image, visualize_target=visualize_target, camera_setup=camera_setup, observation_cam_id=observation_cam_id, goal_cam_id=goal_cam_id, gripper_type=gripper_type, end_effector_start_on_table=False, - num_block=num_block,joint_control=joint_control, - grasping=True, chest=False, - obj_range=0.15, target_range=0.15, - use_curriculum=use_curriculum, - num_curriculum=num_block, - num_goals_to_generate=num_goals_to_generate) + num_block=num_block, obj_range=0.15, target_range=0.15, + joint_control=joint_control, grasping=True, chest=False, + task_decomposition=task_decomposition, use_curriculum=use_curriculum, + num_curriculum=num_block, num_goals_to_generate=num_goals_to_generate) def _generate_goal(self, block_poses, new_target=True): - desired_goal = [None for _ in range(self.num_block)] - if new_target: # generate a random order of blocks to be stacked new_order = np.arange(self.num_block, dtype=np.int) @@ -49,8 +39,6 @@ def _generate_goal(self, block_poses, new_target=True): self.np_random.shuffle(new_order) new_order = new_order.tolist() - # base_target_xyz = block_poses[0].copy() - # generate a random base block position base_target_xyz = None done = False while not done: @@ -77,7 +65,9 @@ def _generate_goal(self, block_poses, new_target=True): new_order = self.last_order target_xyzs = self.last_target_poses - if not self.curriculum: + if self.curriculum: + desired_goal = self._generate_curriculum(block_poses, target_xyzs, new_order, new_target) + else: # generate goal and set target poses according to the order desired_goal = [None for _ in range(self.num_block)] for _ in range(self.num_block): @@ -87,357 +77,441 @@ def _generate_goal(self, block_poses, new_target=True): desired_goal.append([0.03]) if self.task_decomposition: - self.sub_goals = [] - for _ in range(self.num_block): - sub_goal_pick = [None for _ in range(self.num_block)] - for i in range(self.num_block): - if i < _: - sub_goal_pick[new_order[i]] = target_xyzs[i].copy() - else: - sub_goal_pick[new_order[i]] = block_poses[new_order[i]].copy() - sub_goal_pick.append(block_poses[new_order[_]].copy()) - sub_goal_pick.append([0.03]) - self.sub_goals.append(np.concatenate(sub_goal_pick)) - - sub_goal_place = [None for _ in range(self.num_block)] - for i in range(self.num_block): - if i <= _: - sub_goal_place[new_order[i]] = target_xyzs[i].copy() - else: - sub_goal_place[new_order[i]] = block_poses[new_order[i]].copy() - sub_goal_place.append(target_xyzs[_].copy()) - sub_goal_place.append([0.03]) - self.sub_goals.append(np.concatenate(sub_goal_place)) + self.sub_goals = self._generate_subgoals(block_poses, target_xyzs, new_order, new_target) + + if self.visualize_target: + self._update_block_target(desired_goal) + if self.grip_informed_goal: + self._update_gripper_target(desired_goal[-2]) + + self.desired_goal = np.concatenate(desired_goal) + + def _generate_subgoals(self, block_poses, target_xyzs, new_order, new_target=False): + sub_goals = [] + if self.grip_informed_goal: + for _ in range(self.num_block): + sub_goal_pick = [None for _ in range(self.num_block)] + for i in range(self.num_block): + if i < _: + sub_goal_pick[new_order[i]] = target_xyzs[i].copy() + else: + sub_goal_pick[new_order[i]] = block_poses[new_order[i]].copy() + sub_goal_pick.append(block_poses[new_order[_]].copy()) + sub_goal_pick.append([0.03]) + sub_goals.append(np.concatenate(sub_goal_pick)) + + sub_goal_place = [None for _ in range(self.num_block)] + for i in range(self.num_block): + if i <= _: + sub_goal_place[new_order[i]] = target_xyzs[i].copy() + else: + sub_goal_place[new_order[i]] = block_poses[new_order[i]].copy() + sub_goal_place.append(target_xyzs[_].copy()) + sub_goal_place.append([0.03]) + sub_goals.append(np.concatenate(sub_goal_place)) else: + for _ in range(self.num_block): + sub_goal = [None for _ in range(self.num_block)] + for i in range(self.num_block): + if i <= _: + sub_goal[new_order[i]] = target_xyzs[i].copy() + else: + sub_goal[new_order[i]] = block_poses[new_order[i]].copy() + sub_goals.append(np.concatenate(sub_goal)) + + return sub_goals + + def _generate_curriculum(self, block_poses, target_xyzs, new_order, new_target=False): + desired_goal = [None for _ in range(self.num_block)] + + if new_target: curriculum_level = self.np_random.choice(self.num_curriculum, p=self.curriculum_prob) self.curriculum_goal_step = curriculum_level * 25 + self.base_curriculum_episode_steps - - for _ in range(self.num_block): - if _ <= curriculum_level: - desired_goal[_] = target_xyzs[_].copy() - else: - desired_goal[_] = block_poses[_].copy() + self.last_curriculum_level = curriculum_level if self.curriculum_update: self.num_generated_goals_per_curriculum[curriculum_level] += 1 - self.update_curriculum_prob() + self._update_curriculum_prob() + else: + curriculum_level = self.last_curriculum_level - if self.visualize_target: - self._update_block_target(desired_goal) + for i in range(self.num_block): + if i <= curriculum_level: + desired_goal[new_order[i]] = target_xyzs[i].copy() + else: + desired_goal[new_order[i]] = block_poses[new_order[i]].copy() - self.desired_goal = np.concatenate(desired_goal) + if self.grip_informed_goal: + desired_goal.append(target_xyzs[curriculum_level].copy()) + desired_goal.append([0.03]) + + return desired_goal class KukaBlockRearrangeEnv(KukaBulletMultiBlockEnv): def __init__(self, render=True, binary_reward=True, distance_threshold=0.05, image_observation=False, goal_image=False, depth_image=False, visualize_target=True, camera_setup=None, observation_cam_id=0, goal_cam_id=0, - gripper_type='parallel_jaw', num_block=5, joint_control=False, - use_curriculum=False, num_goals_to_generate=1e5): + gripper_type='parallel_jaw', grip_informed_goal=False, num_block=5, joint_control=False, + task_decomposition=False, use_curriculum=False, num_goals_to_generate=1e5): + + assert not grip_informed_goal, "Block rearranging task does not support gripper informed goal representation." + assert not task_decomposition, "Block rearranging task does not support task decomposition." + + self.last_target_poses = None + KukaBulletMultiBlockEnv.__init__(self, render=render, binary_reward=binary_reward, distance_threshold=distance_threshold, + grip_informed_goal=False, image_observation=image_observation, goal_image=goal_image, depth_image=depth_image, visualize_target=visualize_target, camera_setup=camera_setup, observation_cam_id=observation_cam_id, goal_cam_id=goal_cam_id, gripper_type=gripper_type, end_effector_start_on_table=True, - num_block=num_block, joint_control=joint_control, grasping=False, chest=False, - obj_range=0.15, target_range=0.15, - use_curriculum=use_curriculum, - num_curriculum=num_block, - num_goals_to_generate=num_goals_to_generate) + num_block=num_block, obj_range=0.15, target_range=0.15, + joint_control=joint_control, grasping=False, chest=False, + task_decomposition=task_decomposition, use_curriculum=use_curriculum, + num_curriculum=num_block, num_goals_to_generate=num_goals_to_generate) def _generate_goal(self, block_poses, new_target=True): - desired_goal = [] - - if not self.curriculum: + if new_target: + target_xyzs = [] for _ in range(self.num_block): done = False while not done: new_target_xy = self.np_random.uniform(self.robot.target_bound_lower[:-1], self.robot.target_bound_upper[:-1]) target_not_overlap = [] - for pos in desired_goal + block_poses: + for pos in target_xyzs + block_poses: target_not_overlap.append((np.linalg.norm(new_target_xy - pos[:-1]) > 0.06)) if all(target_not_overlap): - desired_goal.append(np.concatenate((new_target_xy.copy(), [0.175]))) + target_xyzs.append(np.concatenate((new_target_xy.copy(), [0.175]))) done = True + self.last_target_poses = target_xyzs.copy() else: + target_xyzs = self.last_target_poses.copy() + + if not self.curriculum: + desired_goal = target_xyzs + else: + desired_goal = self._generate_curriculum(block_poses, target_xyzs, new_target) + + if self.visualize_target: + self._update_block_target(desired_goal) + + self.desired_goal = np.concatenate(desired_goal) + + def _generate_curriculum(self, block_poses, target_xyzs, new_target=False): + desired_goal = [] + + if new_target: curriculum_level = self.np_random.choice(self.num_curriculum, p=self.curriculum_prob) self.curriculum_goal_step = curriculum_level * 25 + self.base_curriculum_episode_steps - ind_block_to_move = np.sort(self.np_random.choice(np.arange(self.num_block), size=curriculum_level+1), + ind_block_to_move = np.sort(self.np_random.choice(np.arange(self.num_block), + size=curriculum_level + 1, + replace=False), kind='stable').tolist() - targets = [] - for _ in range(curriculum_level + 1): - done = False - while not done: - new_target_xy = self.np_random.uniform(self.robot.target_bound_lower[:-1], - self.robot.target_bound_upper[:-1]) - target_not_overlap = [] - for pos in targets + block_poses: - target_not_overlap.append((np.linalg.norm(new_target_xy - pos[:-1]) > 0.08)) - if all(target_not_overlap): - targets.append(np.concatenate((new_target_xy.copy(), [0.175]))) - done = True - - for i in range(self.num_block): - if i in ind_block_to_move: - desired_goal.append(targets[0]) - del targets[0] - else: - desired_goal.append(block_poses[i]) + self.last_ind_block_to_move = ind_block_to_move if self.curriculum_update: self.num_generated_goals_per_curriculum[curriculum_level] += 1 - self.update_curriculum_prob() + self._update_curriculum_prob() + else: + ind_block_to_move = self.last_ind_block_to_move - if self.visualize_target: - self._update_block_target(desired_goal) + for i in range(self.num_block): + if i in ind_block_to_move: + desired_goal.append(target_xyzs[0].copy()) + del target_xyzs[0] + else: + desired_goal.append(block_poses[i].copy()) - self.desired_goal = np.concatenate(desired_goal) + return desired_goal class KukaChestPickAndPlaceEnv(KukaBulletMultiBlockEnv): def __init__(self, render=True, binary_reward=True, distance_threshold=0.05, image_observation=False, goal_image=False, depth_image=False, visualize_target=True, camera_setup=None, observation_cam_id=0, goal_cam_id=0, - gripper_type='parallel_jaw', num_block=5, joint_control=False, - task_decomposition=False, abstract_demonstration=False, - use_curriculum=False, num_goals_to_generate=1e5): - self.task_decomposition = task_decomposition - self.num_steps = num_block+1 - self.grip_informed_goal = False - self.random_pickup_chance = 0.75 - self.pickup_target = None - if self.task_decomposition: - self.grip_informed_goal = True, - self.num_steps = num_block * 3 + 1 - self.abstract_demonstration = abstract_demonstration - if self.abstract_demonstration: - demonstrations = [] - for i in range(self.num_steps): - demonstrations.append([_ for _ in range(i+1)]) - self.step_demonstrator = StepDemonstrator(demonstrations) + gripper_type='parallel_jaw', grip_informed_goal=False, num_block=5, joint_control=False, + task_decomposition=False, use_curriculum=False, num_goals_to_generate=1e5): + + assert not goal_image, "Chest tasks do not support goal images well at the moment." + + self.grip_informed_goal = grip_informed_goal + if self.grip_informed_goal: + self.num_steps = num_block*3+1 + else: + self.num_steps = num_block + 1 + KukaBulletMultiBlockEnv.__init__(self, render=render, binary_reward=binary_reward, distance_threshold=distance_threshold, grip_informed_goal=self.grip_informed_goal, image_observation=image_observation, goal_image=goal_image, depth_image=depth_image, visualize_target=visualize_target, camera_setup=camera_setup, observation_cam_id=observation_cam_id, goal_cam_id=goal_cam_id, gripper_type=gripper_type, end_effector_start_on_table=False, - num_block=num_block, joint_control=joint_control, grasping=True, chest=True, chest_door='up_sliding', - obj_range=0.1, - use_curriculum=use_curriculum, - num_curriculum=num_block+1, - num_goals_to_generate=num_goals_to_generate) + num_block=num_block, obj_range=0.1, + joint_control=joint_control, grasping=True, chest=True, chest_door='up_sliding', + task_decomposition=task_decomposition, use_curriculum=use_curriculum, + num_curriculum=num_block+1, num_goals_to_generate=num_goals_to_generate) def _generate_goal(self, block_poses, new_target=True): - # the first element is the largest openness of the door (equal to the door joint pose upper limit) - desired_goal = [[0.10]] - # all blocks should go into the sphere of 0.05 radius centred at the chest centre chest_center_xyz, _ = self.chest_robot.get_base_pos(self._p) chest_center_xyz = np.array(chest_center_xyz) chest_center_xyz[0] += 0.05 chest_center_xyz[2] = 0.175 - chest_top_xyz = chest_center_xyz.copy() chest_top_xyz[-1] = 0.3 - if not self.curriculum: - current_gripper_tip_xyz = self.robot.parts['iiwa_gripper_tip'].get_position() - + # setup desired goal or subgoals + if self.curriculum: + desired_goal = self._generate_curriculum(block_poses, chest_center_xyz, chest_top_xyz, new_target) + else: + # the first element is the largest openness of the door (equal to the door joint pose upper limit) + desired_goal = [[0.10]] for _ in range(self.num_block): desired_goal.append(chest_center_xyz) - if self.grip_informed_goal: - desired_goal.append(current_gripper_tip_xyz.copy()) - desired_goal.append([0.03]) + desired_goal.append(chest_top_xyz.copy()) + desired_goal.append([0.06]) if self.task_decomposition: - self.sub_goals = [] - - sub_goal_open_door = [[0.10]] - sub_goal_open_door = sub_goal_open_door + block_poses - sub_goal_open_door.append(current_gripper_tip_xyz.copy()) - sub_goal_open_door.append([0.0]) - self.sub_goals.append(np.concatenate(sub_goal_open_door)) - - for _ in range(self.num_block): - # # block positions - sub_goal_pick = block_poses.copy() - # previous blocks should already be within the chest - for i in range(self.num_block): - if i < _: - sub_goal_pick[i] = chest_center_xyz.copy() - # gripper position - sub_goal_pick.append(block_poses[_].copy()) - # finger width - sub_goal_pick.append([0.03]) - # chest door joint state - sub_goal_pick = [[0.10]] + sub_goal_pick - self.sub_goals.append(np.concatenate(sub_goal_pick)) - - # sub_goal_lift_block = block_poses.copy() - # # previous blocks should already be within the chest - # for i in range(self.num_block): - # if i < _: - # sub_goal_lift_block[i] = chest_center_xyz.copy() - # sub_goal_lift_block[_][-1] = chest_top_xyz[-1] - # # if new_target: - # # if self.np_random.uniform() <= self.random_pickup_chance: - # # while True: - # # lift_xyz = self.np_random.uniform(self.robot.target_bound_lower+np.array([0.14, 0, 0]), - # # self.robot.target_bound_upper) - # # target_not_overlap = [] - # # for pos in block_poses: - # # target_not_overlap.append((np.linalg.norm(lift_xyz[:-1] - pos[:-1]) > 0.08)) - # # if all(target_not_overlap): - # # break - # # if self.np_random.uniform(0, 1) >= 0.5: - # # lift_xyz[-1] = 0.175 - # # sub_goal_lift_block[_] = lift_xyz.copy() - # # else: - # # sub_goal_lift_block[_][-1] = chest_top_xyz[-1] - # # self.pickup_target = sub_goal_lift_block[_].copy() - # # else: - # # assert self.pickup_target is not None - # # sub_goal_lift_block[_] = self.pickup_target.copy() - # sub_goal_lift_block.append(sub_goal_lift_block[_].copy()) - # sub_goal_lift_block.append([0.03]) - # sub_goal_lift_block = [[0.10]] + sub_goal_lift_block - # self.sub_goals.append(np.concatenate(sub_goal_lift_block)) - - sub_goal_move_to_chest_top = block_poses.copy() - for i in range(self.num_block): - if i < _: - sub_goal_move_to_chest_top[i] = chest_center_xyz.copy() - sub_goal_move_to_chest_top[_] = chest_top_xyz.copy() - sub_goal_move_to_chest_top.append(chest_top_xyz.copy()) - sub_goal_move_to_chest_top.append([0.03]) - sub_goal_move_to_chest_top = [[0.10]] + sub_goal_move_to_chest_top - self.sub_goals.append(np.concatenate(sub_goal_move_to_chest_top)) - - sub_goal_drop = block_poses.copy() - for i in range(self.num_block): - if i < _: - sub_goal_drop[i] = chest_center_xyz.copy() - sub_goal_drop[_] = chest_center_xyz.copy() - sub_goal_drop.append(chest_top_xyz.copy()) - sub_goal_drop.append([0.06]) - sub_goal_drop = [[0.10]] + sub_goal_drop - self.sub_goals.append(np.concatenate(sub_goal_drop)) + self.sub_goals = self._generate_subgoals(block_poses, chest_center_xyz, chest_top_xyz, new_target) + + if self.visualize_target: + self._update_block_target(desired_goal, index_offset=1) + if self.grip_informed_goal: + self._update_gripper_target(desired_goal[-2]) + + self.desired_goal = np.concatenate(desired_goal) + + def _generate_subgoals(self, block_poses, chest_center_xyz, chest_top_xyz, new_target=False): + sub_goals = [] + + sub_goal_open_door = [[0.10]] + sub_goal_open_door = sub_goal_open_door + block_poses + sub_goal_open_door.append(self.robot.parts['iiwa_gripper_tip'].get_position().copy()) + sub_goal_open_door.append(self.robot.get_finger_closeness()) + sub_goals.append(np.concatenate(sub_goal_open_door)) + + if self.grip_informed_goal: + for _ in range(self.num_block): + # # block positions + sub_goal_pick = block_poses.copy() + # previous blocks should already be within the chest + for i in range(self.num_block): + if i < _: + sub_goal_pick[i] = chest_center_xyz.copy() + # gripper position + sub_goal_pick.append(block_poses[_].copy()) + # finger width + sub_goal_pick.append([0.03]) + # chest door joint state + sub_goal_pick = [[0.10]] + sub_goal_pick + sub_goals.append(np.concatenate(sub_goal_pick)) + + sub_goal_move_to_chest_top = block_poses.copy() + for i in range(self.num_block): + if i < _: + sub_goal_move_to_chest_top[i] = chest_center_xyz.copy() + sub_goal_move_to_chest_top[_] = chest_top_xyz.copy() + sub_goal_move_to_chest_top.append(chest_top_xyz.copy()) + sub_goal_move_to_chest_top.append([0.03]) + sub_goal_move_to_chest_top = [[0.10]] + sub_goal_move_to_chest_top + sub_goals.append(np.concatenate(sub_goal_move_to_chest_top)) + + sub_goal_drop = block_poses.copy() + for i in range(self.num_block): + if i < _: + sub_goal_drop[i] = chest_center_xyz.copy() + sub_goal_drop[_] = chest_center_xyz.copy() + sub_goal_drop.append(chest_top_xyz.copy()) + sub_goal_drop.append([0.06]) + sub_goal_drop = [[0.10]] + sub_goal_drop + sub_goals.append(np.concatenate(sub_goal_drop)) else: + for _ in range(self.num_block): + sub_goal = block_poses.copy() + for i in range(self.num_block): + if i <= _: + sub_goal[i] = chest_center_xyz.copy() + sub_goal = [[0.10]] + sub_goal + sub_goals.append(np.concatenate(sub_goal)) + + return sub_goals + + def _generate_curriculum(self, block_poses, chest_center_xyz, chest_top_xyz, new_target=False): + # the first element is the largest openness of the door (equal to the door joint pose upper limit) + desired_goal = [[0.10]] + + if new_target: curriculum_level = self.np_random.choice(self.num_curriculum, p=self.curriculum_prob) self.curriculum_goal_step = curriculum_level * 25 + self.base_curriculum_episode_steps - ind_block_to_move = np.sort(self.np_random.choice(np.arange(self.num_block), size=curriculum_level), + ind_block_to_move = np.sort(self.np_random.choice(np.arange(self.num_block), + size=curriculum_level, + replace=False), kind='stable').tolist() - - for i in range(self.num_block): - if i in ind_block_to_move: - desired_goal.append(chest_center_xyz) - else: - desired_goal.append(block_poses[i]) + self.last_curriculum_level = curriculum_level + self.last_ind_block_to_move = ind_block_to_move if self.curriculum_update: self.num_generated_goals_per_curriculum[curriculum_level] += 1 - self.update_curriculum_prob() + self._update_curriculum_prob() + else: + ind_block_to_move = self.last_ind_block_to_move + curriculum_level = self.last_curriculum_level - if self.visualize_target: - self._update_block_target(desired_goal, index_offset=1) + for i in range(self.num_block): + if i in ind_block_to_move: + desired_goal.append(chest_center_xyz) + else: + desired_goal.append(block_poses[i]) - self.desired_goal = np.concatenate(desired_goal) + if self.grip_informed_goal: + if curriculum_level == 0: + desired_goal.append(self.robot.parts['iiwa_gripper_tip'].get_position()) + desired_goal.append(self.robot.get_finger_closeness()) + else: + desired_goal.append(chest_top_xyz.copy()) + desired_goal.append([0.06]) + + return desired_goal class KukaChestPushEnv(KukaBulletMultiBlockEnv): - def __init__(self, render=True, binary_reward=True, distance_threshold=0.05, + def __init__(self, render=True, binary_reward=True, distance_threshold=0.05, grip_informed_goal=False, image_observation=False, goal_image=False, depth_image=False, visualize_target=True, camera_setup=None, observation_cam_id=0, goal_cam_id=0, gripper_type='parallel_jaw', num_block=5, joint_control=False, - task_decomposition=False, abstract_demonstration=False, - use_curriculum=False, num_goals_to_generate=1e5): - self.grip_informed_goal = False - self.task_decomposition = task_decomposition - self.num_steps = num_block+1 - if self.task_decomposition: - self.grip_informed_goal = True + task_decomposition=False, use_curriculum=False, num_goals_to_generate=1e5): + + assert not goal_image, "Chest tasks do not support goal images well at the moment." + + self.grip_informed_goal = grip_informed_goal + if self.grip_informed_goal: self.num_steps = num_block*2+1 - self.abstract_demonstration = abstract_demonstration - if self.abstract_demonstration: - demonstrations = [] - for i in range(self.num_steps): - demonstrations.append([_ for _ in range(i+1)]) - self.step_demonstrator = StepDemonstrator(demonstrations) + else: + self.num_steps = num_block + 1 + KukaBulletMultiBlockEnv.__init__(self, render=render, binary_reward=binary_reward, distance_threshold=distance_threshold, image_observation=image_observation, goal_image=goal_image, depth_image=depth_image, visualize_target=visualize_target, grip_informed_goal=self.grip_informed_goal, camera_setup=camera_setup, observation_cam_id=observation_cam_id, goal_cam_id=goal_cam_id, gripper_type=gripper_type, end_effector_start_on_table=True, - num_block=num_block, joint_control=joint_control, grasping=False, chest=True, chest_door='front_sliding', - obj_range=0.1, - use_curriculum=use_curriculum, - num_curriculum=num_block+1, - num_goals_to_generate=num_goals_to_generate) + num_block=num_block, obj_range=0.1, + joint_control=joint_control, grasping=False, chest=True, chest_door='front_sliding', + task_decomposition=task_decomposition, use_curriculum=use_curriculum, + num_curriculum=num_block+1, num_goals_to_generate=num_goals_to_generate) def _generate_goal(self, block_poses, new_target=True): - # the first element is the largest openness of the door (equal to the door joint pose upper limit) - desired_goal = [[0.12]] - - # all blocks should go into the sphere of 0.05 radius centred at the chest centre + # all blocks should go into the sphere of 0.05 radius centred at the chest bottom centre chest_center_xyz, _ = self.chest_robot.get_base_pos(self._p) chest_center_xyz = np.array(chest_center_xyz) chest_center_xyz[0] += 0.05 chest_center_xyz[2] = 0.175 - if not self.curriculum: + # setup desired goal or subgoals + if self.curriculum: + desired_goal = self._generate_curriculum(block_poses, chest_center_xyz, new_target) + else: + # the first element is the largest openness of the door (equal to the door joint pose upper limit) + desired_goal = [[0.12]] + # the final goal is when all the blocks are in the chest for _ in range(self.num_block): desired_goal.append(chest_center_xyz) - if self.grip_informed_goal: - desired_goal.append(self.robot.parts['iiwa_gripper_tip'].get_position()) + desired_goal.append(chest_center_xyz.copy()) + desired_goal[-1][0] += 0.03 if self.task_decomposition: - self.sub_goals = [] - - sub_goal_open_door = [[0.12]] - sub_goal_open_door = sub_goal_open_door + block_poses - sub_goal_open_door.append(self.robot.parts['iiwa_gripper_tip'].get_position()) - self.sub_goals.append(np.concatenate(sub_goal_open_door)) - - for _ in range(self.num_block): - sub_goal_reach = block_poses.copy() - - for i in range(self.num_block): - if i < _: - sub_goal_reach[i] = chest_center_xyz.copy() - sub_goal_reach.append(block_poses[_].copy()) - sub_goal_reach[-1][0] += 0.03 - sub_goal_reach = [[0.12]] + sub_goal_reach - self.sub_goals.append(np.concatenate(sub_goal_reach)) - - sub_goal_push = block_poses.copy() - for i in range(self.num_block): - if i < _: - sub_goal_push[i] = chest_center_xyz.copy() - sub_goal_push[_] = chest_center_xyz.copy() - sub_goal_push.append(chest_center_xyz.copy()) - sub_goal_push[-1][0] += 0.03 - sub_goal_push = [[0.12]] + sub_goal_push - self.sub_goals.append(np.concatenate(sub_goal_push)) + self.sub_goals = self._generate_subgoals(block_poses, chest_center_xyz, new_target) + + if self.visualize_target: + self._update_block_target(desired_goal, index_offset=1) + if self.grip_informed_goal: + self._update_gripper_target(desired_goal[-1]) + + self.desired_goal = np.concatenate(desired_goal) + + def _generate_subgoals(self, block_poses, chest_center_xyz, new_target=False): + # empty subgoal list + sub_goals = [] + + # the first subgoal is to open the door + # the first element is the largest openness of the door (equal to the door joint pose upper limit) + sub_goal_open_door = [[0.12]] + sub_goal_open_door = sub_goal_open_door + block_poses + sub_goal_open_door.append(self.robot.parts['iiwa_gripper_tip'].get_position()) + sub_goals.append(np.concatenate(sub_goal_open_door)) + + if self.grip_informed_goal: + for _ in range(self.num_block): + sub_goal_reach = block_poses.copy() + # block goal state + for i in range(self.num_block): + if i < _: + sub_goal_reach[i] = chest_center_xyz.copy() + # gripper goal state + sub_goal_reach.append(block_poses[_].copy()) + sub_goal_reach[-1][0] += 0.03 + # chest door goal state + sub_goal_reach = [[0.12]] + sub_goal_reach + sub_goals.append(np.concatenate(sub_goal_reach)) + + sub_goal_push = block_poses.copy() + # block goal state + for i in range(self.num_block): + if i <= _: + sub_goal_push[i] = chest_center_xyz.copy() + # gripper goal state + sub_goal_push.append(chest_center_xyz.copy()) + sub_goal_push[-1][0] += 0.03 + # chest door goal state + sub_goal_push = [[0.12]] + sub_goal_push + sub_goals.append(np.concatenate(sub_goal_push)) else: + for _ in range(self.num_block): + sub_goal = block_poses.copy() + # block goal state + for i in range(self.num_block): + if i <= _: + sub_goal[i] = chest_center_xyz.copy() + sub_goal = [[0.12]] + sub_goal + sub_goals.append(np.concatenate(sub_goal)) + + return sub_goals + + def _generate_curriculum(self, block_poses, chest_center_xyz, new_target=False): + # the first element is the largest openness of the door (equal to the door joint pose upper limit) + desired_goal = [[0.12]] + + if new_target: curriculum_level = self.np_random.choice(self.num_curriculum, p=self.curriculum_prob) self.curriculum_goal_step = curriculum_level * 25 + self.base_curriculum_episode_steps - ind_block_to_move = np.sort(self.np_random.choice(np.arange(self.num_block), size=curriculum_level), + ind_block_to_move = np.sort(self.np_random.choice(np.arange(self.num_block), + size=curriculum_level, + replace=False), kind='stable').tolist() - - for i in range(self.num_block): - if i in ind_block_to_move: - desired_goal.append(chest_center_xyz) - else: - desired_goal.append(block_poses[i]) + self.last_curriculum_level = curriculum_level + self.last_ind_block_to_move = ind_block_to_move if self.curriculum_update: self.num_generated_goals_per_curriculum[curriculum_level] += 1 - self.update_curriculum_prob() + self._update_curriculum_prob() + else: + ind_block_to_move = self.last_ind_block_to_move + curriculum_level = self.last_curriculum_level - if self.visualize_target: - self._update_block_target(desired_goal, index_offset=1) + for i in range(self.num_block): + if i in ind_block_to_move: + desired_goal.append(chest_center_xyz) + else: + desired_goal.append(block_poses[i]) - self.desired_goal = np.concatenate(desired_goal) + if self.grip_informed_goal: + if curriculum_level == 0: + desired_goal.append(self.robot.parts['iiwa_gripper_tip'].get_position()) + else: + desired_goal.append(chest_center_xyz.copy()) + desired_goal[-1][0] += 0.03 + + return desired_goal \ No newline at end of file diff --git a/pybullet_multigoal_gym/examples/kuka_block_rearrange_curriculum.py b/pybullet_multigoal_gym/examples/kuka_block_rearrange_curriculum.py new file mode 100644 index 0000000..1724f8c --- /dev/null +++ b/pybullet_multigoal_gym/examples/kuka_block_rearrange_curriculum.py @@ -0,0 +1,59 @@ +import os +import numpy as np +import pybullet_multigoal_gym as pmg +import matplotlib.pyplot as plt +f, axarr = plt.subplots(1, 2) + +camera_setup = [ + { + 'cameraEyePosition': [-1.0, 0.25, 0.6], + 'cameraTargetPosition': [-0.6, 0.05, 0.2], + 'cameraUpVector': [0, 0, 1], + 'render_width': 128, + 'render_height': 128 + }, + { + 'cameraEyePosition': [-1.0, -0.25, 0.6], + 'cameraTargetPosition': [-0.6, -0.05, 0.2], + 'cameraUpVector': [0, 0, 1], + 'render_width': 128, + 'render_height': 128 + } +] + +env = pmg.make_env( + # task args + task='block_rearrange', + gripper='parallel_jaw', + grip_informed_goal=False, + num_block=4, # only meaningful for multi-block tasks + render=True, + binary_reward=True, + max_episode_steps=5, + # image observation args + image_observation=True, + depth_image=False, + goal_image=True, + visualize_target=True, + camera_setup=camera_setup, + observation_cam_id=1, + goal_cam_id=1, + # curriculum args + use_curriculum=True, + num_goals_to_generate=20) + +"""You can expect the desired goal to change every once a while based on the current curriculum level, +and settle down at the hardest one finally.""" + +obs = env.reset() +env.activate_curriculum_update() +time_done = False +while True: + action = env.action_space.sample() + obs, reward, time_done, info = env.step(action) + axarr[0].imshow(obs['desired_goal_img']) + axarr[1].imshow(obs['achieved_goal_img']) + plt.pause(0.00001) + + if time_done: + obs = env.reset() diff --git a/pybullet_multigoal_gym/examples/kuka_chest_push_task_decomposition.py b/pybullet_multigoal_gym/examples/kuka_chest_push_task_decomposition.py new file mode 100644 index 0000000..9fc8d5f --- /dev/null +++ b/pybullet_multigoal_gym/examples/kuka_chest_push_task_decomposition.py @@ -0,0 +1,67 @@ +import os +import numpy as np +import pybullet_multigoal_gym as pmg +import matplotlib.pyplot as plt +f, axarr = plt.subplots(1, 2) + +camera_setup = [ + { + 'cameraEyePosition': [-1.0, 0.25, 0.6], + 'cameraTargetPosition': [-0.6, 0.05, 0.2], + 'cameraUpVector': [0, 0, 1], + 'render_width': 128, + 'render_height': 128 + }, + { + 'cameraEyePosition': [-1.0, -0.25, 0.6], + 'cameraTargetPosition': [-0.6, -0.05, 0.2], + 'cameraUpVector': [0, 0, 1], + 'render_width': 128, + 'render_height': 128 + } +] + +env = pmg.make_env( + # task args + task='chest_push', + gripper='parallel_jaw', + grip_informed_goal=False, + num_block=4, # only meaningful for multi-block tasks + render=True, + binary_reward=True, + max_episode_steps=25, + # image observation args + image_observation=True, + depth_image=False, + goal_image=True, + visualize_target=True, + camera_setup=camera_setup, + observation_cam_id=0, + goal_cam_id=1, + # task decomposition + task_decomposition=True) + +"""The desired goal changes as the subgoal is being set""" + +obs = env.reset() +time_done = False +env.set_sub_goal(0) +t = 0 +while True: + action = env.action_space.sample() + obs, reward, time_done, info = env.step(action) + axarr[0].imshow(obs['desired_goal_img']) + axarr[1].imshow(obs['achieved_goal_img']) + plt.pause(0.00001) + t += 1 + if t == 3: + env.set_sub_goal(1) + if t == 6: + env.set_sub_goal(2) + if t == 9: + env.set_sub_goal(3) + if t == 12: + env.set_sub_goal(4) + + if time_done: + obs = env.reset() diff --git a/pybullet_multigoal_gym/examples/kuka_reach_example.py b/pybullet_multigoal_gym/examples/kuka_reach.py similarity index 100% rename from pybullet_multigoal_gym/examples/kuka_reach_example.py rename to pybullet_multigoal_gym/examples/kuka_reach.py diff --git a/pybullet_multigoal_gym/robots/kuka.py b/pybullet_multigoal_gym/robots/kuka.py index 5eff58f..0268285 100644 --- a/pybullet_multigoal_gym/robots/kuka.py +++ b/pybullet_multigoal_gym/robots/kuka.py @@ -249,3 +249,11 @@ def set_finger_joint_state(self, pos, vel=None): vel = np.zeros(pos.shape[0]) for i in range(pos.shape[0]): self.jdict[self.gripper_joint_name[i]].reset_position(pos[i], vel[i]) + + def get_finger_closeness(self): + # calculate distance between the gripper finger tabs + gripper_finger1_tab_xyz = np.array(self.parts['iiwa_gripper_finger1_finger_tab_link'].get_position()) + gripper_finger2_tab_xyz = np.array(self.parts['iiwa_gripper_finger2_finger_tab_link'].get_position()) + gripper_finger_closeness = np.sqrt( + np.sum(np.square(gripper_finger1_tab_xyz - gripper_finger2_tab_xyz))).ravel() + return gripper_finger_closeness \ No newline at end of file diff --git a/pybullet_multigoal_gym/test/multigoal/gym_image_obs_test.py b/pybullet_multigoal_gym/test/multigoal/gym_image_obs_test.py index 055d9bd..922a42a 100644 --- a/pybullet_multigoal_gym/test/multigoal/gym_image_obs_test.py +++ b/pybullet_multigoal_gym/test/multigoal/gym_image_obs_test.py @@ -22,8 +22,9 @@ env = pmg.make_env( # task args - task='push', + task='block_rearrange', gripper='parallel_jaw', + grip_informed_goal=False, num_block=4, # only meaningful for multi-block tasks render=True, binary_reward=True, @@ -34,21 +35,33 @@ goal_image=True, visualize_target=True, camera_setup=camera_setup, - observation_cam_id=-1, + observation_cam_id=1, goal_cam_id=1, # curriculum args use_curriculum=True, - num_goals_to_generate=90) + num_goals_to_generate=20, + task_decomposition=False) obs = env.reset() +env.activate_curriculum_update() time_done = False f, axarr = plt.subplots(1, 2) +# env.set_sub_goal(0) +# print(env.desired_goal) +# t = 0 while True: - action = env.action_space.sample() - obs, reward, time_done, info = env.step(action) + # action = env.action_space.sample() + # obs, reward, time_done, info = env.step(action) axarr[0].imshow(obs['desired_goal_img']) axarr[1].imshow(obs['achieved_goal_img']) plt.pause(0.00001) - if time_done: - obs = env.reset() - # obs = env.reset() + # t += 1 + # if t == 3: + # env.set_sub_goal(1) + # if t == 6: + # env.set_sub_goal(2) + # if t == 9: + # env.set_sub_goal(3) + # if t == 12: + # env.set_sub_goal(4) + obs = env.reset() diff --git a/pybullet_multigoal_gym/test/multigoal/gym_test.py b/pybullet_multigoal_gym/test/multigoal/gym_test.py index 2ecba17..8b26950 100644 --- a/pybullet_multigoal_gym/test/multigoal/gym_test.py +++ b/pybullet_multigoal_gym/test/multigoal/gym_test.py @@ -3,23 +3,22 @@ import time import pybullet_multigoal_gym as pmg -num_episodes = 300 -env = pmg.make_env(task='chest_pick_and_place', +num_episodes = 32 +env = pmg.make_env(task='block_rearrange', gripper='parallel_jaw', - num_block=2, + grip_informed_goal=False, + num_block=4, render=True, visualize_target=True, binary_reward=True, joint_control=False, - task_decomposition=True, - abstract_demonstration=True, max_episode_steps=10000, image_observation=False, - use_curriculum=False, + use_curriculum=True, + task_decomposition=False, num_goals_to_generate=num_episodes) -# env = pmg.make('KukaParallelGripBlockStackRenderSparseEnv-v0') -# env.activate_curriculum_update() +env.activate_curriculum_update() obs = env.reset(test=False) time_done = False while not time_done: @@ -30,20 +29,20 @@ # if time_done: # env.reset(test=False) # time_done = False - env.set_sub_goal(0) - print(env.desired_goal) - env.set_sub_goal(1) - print(env.desired_goal) - env.set_sub_goal(2) - print(env.desired_goal) - env.set_sub_goal(3) - print(env.desired_goal) - env.set_sub_goal(4) - print(env.desired_goal) - env.set_sub_goal(5) - print(env.desired_goal) - env.set_sub_goal(6) - print(env.desired_goal) + # env.set_sub_goal(0) + # print(env.desired_goal) + # env.set_sub_goal(1) + # print(env.desired_goal) + # env.set_sub_goal(2) + # print(env.desired_goal) + # env.set_sub_goal(3) + # print(env.desired_goal) + # env.set_sub_goal(4) + # print(env.desired_goal) + # env.set_sub_goal(5) + # print(env.desired_goal) + # env.set_sub_goal(6) + # print(env.desired_goal) # env.set_sub_goal(7) # print(env.desired_goal) env.reset(test=False) diff --git a/pybullet_multigoal_gym/test/test.py b/pybullet_multigoal_gym/test/test.py index 0b77a48..19faa66 100644 --- a/pybullet_multigoal_gym/test/test.py +++ b/pybullet_multigoal_gym/test/test.py @@ -1,5 +1,5 @@ import numpy as np +import warnings -a = np.random.choice(np.arange(4), size=0) -a_ = np.sort(a).tolist() -print(a_) + +print("hello") \ No newline at end of file