From 8258b190a3d9ad590e35a8a277d24125cafbdedd Mon Sep 17 00:00:00 2001 From: Joseph Viviano Date: Fri, 16 Feb 2024 16:35:30 -0500 Subject: [PATCH 1/5] added default value for log_reward_clip_min in abstract base class --- src/gfn/gflownet/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gfn/gflownet/base.py b/src/gfn/gflownet/base.py index 5e04151d..e38bb10a 100644 --- a/src/gfn/gflownet/base.py +++ b/src/gfn/gflownet/base.py @@ -24,6 +24,7 @@ class GFlowNet(ABC, nn.Module, Generic[TrainingSampleType]): A formal definition of GFlowNets is given in Sec. 3 of [GFlowNet Foundations](https://arxiv.org/pdf/2111.09266). """ + log_reward_clip_min = float("-inf") # Default off. @abstractmethod def sample_trajectories( @@ -214,7 +215,7 @@ def get_trajectories_scores( total_log_pb_trajectories = log_pb_trajectories.sum(dim=0) log_rewards = trajectories.log_rewards - # TODO: log_reward_clip_min isn't defined in base (#155). + if math.isfinite(self.log_reward_clip_min) and log_rewards is not None: log_rewards = log_rewards.clamp_min(self.log_reward_clip_min) From 7553aac96cb5da6dd140c25cab9c69a5336da2f9 Mon Sep 17 00:00:00 2001 From: Joseph Viviano Date: Fri, 16 Feb 2024 19:43:06 -0500 Subject: [PATCH 2/5] added simple script --- test.py | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 test.py diff --git a/test.py b/test.py new file mode 100644 index 00000000..02dcf7ff --- /dev/null +++ b/test.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python +import torch +from tqdm import tqdm + +from gfn.gym import HyperGrid +from gfn.gflownet import TBGFlowNet +from gfn.modules import DiscretePolicyEstimator +from gfn.samplers import Sampler +from gfn.utils import NeuralNet + + +torch.manual_seed(0) +exploration_rate = 0.5 +learning_rate = 0.0005 + +env = HyperGrid(ndim=5, height=2) + +module_PF = NeuralNet( + input_dim=env.preprocessor.output_dim, + output_dim=env.n_actions, +) +module_PB = NeuralNet( + input_dim=env.preprocessor.output_dim, + output_dim=env.n_actions - 1, + torso=module_PF.torso, +) + +pf_estimator = DiscretePolicyEstimator(module_PF, env.n_actions, is_backward=False, preprocessor=env.preprocessor) +pb_estimator = DiscretePolicyEstimator(module_PB, env.n_actions, is_backward=True, preprocessor=env.preprocessor) +gflownet = TBGFlowNet(init_logZ=0., pf=pf_estimator, pb=pb_estimator, off_policy=True) +sampler = Sampler(estimator=pf_estimator) + +# Policy parameters have their own LR. +non_logz_params = [v for k, v in dict(gflownet.named_parameters()).items() if k != "logZ"] +optimizer = torch.optim.Adam(non_logz_params, lr=1e-3) + +# Log Z gets dedicated learning rate (typically higher). +logz_params = [dict(gflownet.named_parameters())["logZ"]] +optimizer.add_param_group({"params": logz_params, "lr": 1e-1}) + +n_iterations = int(1e4) +batch_size = int(1e5) + +for i in (pbar := tqdm(range(n_iterations))): + trajectories = sampler.sample_trajectories( + env, + n_trajectories=batch_size, + off_policy=True, + epsilon=exploration_rate, + ) + optimizer.zero_grad() + loss = gflownet.loss(env, trajectories) + loss.backward() + optimizer.step() + pbar.set_postfix({"loss": loss.item()}) From 61c08be2f6a3f41aa32d7afb217d5a47a162e9a3 Mon Sep 17 00:00:00 2001 From: Joseph Viviano Date: Fri, 16 Feb 2024 21:14:31 -0500 Subject: [PATCH 3/5] very simple usage example --- .../examples/train_hypergrid_simple.py | 35 ++++++++++++------- 1 file changed, 23 insertions(+), 12 deletions(-) rename test.py => tutorials/examples/train_hypergrid_simple.py (53%) diff --git a/test.py b/tutorials/examples/train_hypergrid_simple.py similarity index 53% rename from test.py rename to tutorials/examples/train_hypergrid_simple.py index 02dcf7ff..d21ef349 100644 --- a/test.py +++ b/tutorials/examples/train_hypergrid_simple.py @@ -2,19 +2,24 @@ import torch from tqdm import tqdm -from gfn.gym import HyperGrid from gfn.gflownet import TBGFlowNet +from gfn.gym import HyperGrid from gfn.modules import DiscretePolicyEstimator from gfn.samplers import Sampler from gfn.utils import NeuralNet - torch.manual_seed(0) exploration_rate = 0.5 learning_rate = 0.0005 -env = HyperGrid(ndim=5, height=2) +# Setup the Environment. +env = HyperGrid( + ndim=5, + height=2, + device_str="cuda" if torch.cuda.is_available() else "cpu", +) +# Build the GFlowNet. module_PF = NeuralNet( input_dim=env.preprocessor.output_dim, output_dim=env.n_actions, @@ -24,19 +29,25 @@ output_dim=env.n_actions - 1, torso=module_PF.torso, ) +pf_estimator = DiscretePolicyEstimator( + module_PF, env.n_actions, is_backward=False, preprocessor=env.preprocessor +) +pb_estimator = DiscretePolicyEstimator( + module_PB, env.n_actions, is_backward=True, preprocessor=env.preprocessor +) +gflownet = TBGFlowNet(init_logZ=0.0, pf=pf_estimator, pb=pb_estimator, off_policy=True) -pf_estimator = DiscretePolicyEstimator(module_PF, env.n_actions, is_backward=False, preprocessor=env.preprocessor) -pb_estimator = DiscretePolicyEstimator(module_PB, env.n_actions, is_backward=True, preprocessor=env.preprocessor) -gflownet = TBGFlowNet(init_logZ=0., pf=pf_estimator, pb=pb_estimator, off_policy=True) +# Feed pf to the sampler. sampler = Sampler(estimator=pf_estimator) -# Policy parameters have their own LR. -non_logz_params = [v for k, v in dict(gflownet.named_parameters()).items() if k != "logZ"] -optimizer = torch.optim.Adam(non_logz_params, lr=1e-3) +# Move the gflownet to the GPU. +if torch.cuda.is_available(): + gflownet = gflownet.to("cuda") -# Log Z gets dedicated learning rate (typically higher). -logz_params = [dict(gflownet.named_parameters())["logZ"]] -optimizer.add_param_group({"params": logz_params, "lr": 1e-1}) +# Policy parameters have their own LR. Log Z gets dedicated learning rate +# (typically higher). +optimizer = torch.optim.Adam(gflownet.pf_pb_parameters(), lr=1e-3) +optimizer.add_param_group({"params": gflownet.logz_parameters(), "lr": 1e-1}) n_iterations = int(1e4) batch_size = int(1e5) From f41a31faa3d87a94aab35449d4b141fcdcb138a8 Mon Sep 17 00:00:00 2001 From: Joseph Viviano Date: Fri, 16 Feb 2024 21:14:46 -0500 Subject: [PATCH 4/5] fixed typecasting error --- src/gfn/states.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gfn/states.py b/src/gfn/states.py index 416e5670..53492861 100644 --- a/src/gfn/states.py +++ b/src/gfn/states.py @@ -410,9 +410,9 @@ def set_nonexit_action_masks(self, cond, allow_exit: bool): trajectory - if so, it should be set to True. """ if allow_exit: - exit_idx = torch.zeros(self.batch_shape + (1,)) + exit_idx = torch.zeros(self.batch_shape + (1,)).to(cond.device) else: - exit_idx = torch.ones(self.batch_shape + (1,)) + exit_idx = torch.ones(self.batch_shape + (1,)).to(cond.device) self.forward_masks[torch.cat([cond, exit_idx], dim=-1).bool()] = False def set_exit_masks(self, batch_idx): From 35908de56ff385108e52e4bfd0858d5c88bef72b Mon Sep 17 00:00:00 2001 From: Joseph Viviano Date: Fri, 16 Feb 2024 21:15:24 -0500 Subject: [PATCH 5/5] no longer automatically casting tensors which causes unexplained errors if the environment isn't initalized with the correct device_str --- src/gfn/utils/modules.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/gfn/utils/modules.py b/src/gfn/utils/modules.py index 2ffbf54a..9820fa05 100644 --- a/src/gfn/utils/modules.py +++ b/src/gfn/utils/modules.py @@ -54,7 +54,6 @@ def __init__( else: self.torso = torso self.last_layer = nn.Linear(self.torso.hidden_dim, output_dim) - self.device = None def forward( self, preprocessed_states: TT["batch_shape", "input_dim", float] @@ -66,11 +65,6 @@ def forward( ingestion by the MLP. Returns: out, a set of continuous variables. """ - if self.device is None: - self.device = preprocessed_states.device - self.to( - self.device - ) # TODO: This is maybe fine but could result in weird errors if the model keeps bouncing between devices. out = self.torso(preprocessed_states) out = self.last_layer(out) return out