Merge pull request #230 from GFNOrg/recalculate

Default behavior: recalculate logprobs
GFNOrg · Jan 22, 2025 · bbcf21f · bbcf21f
2 parents 7f03681 + 0cacc50
commit bbcf21f
Show file tree

Hide file tree

Showing 6 changed files with 16 additions and 13 deletions.
diff --git a/README.md b/README.md
@@ -102,7 +102,7 @@ optimizer.add_param_group({"params": gfn.logz_parameters(), "lr": 1e-1})
 
 # 6 - We train the GFlowNet for 1000 iterations, with 16 trajectories per iteration
 for i in (pbar := tqdm(range(1000))):
-    trajectories = sampler.sample_trajectories(env=env, n=16)
+    trajectories = sampler.sample_trajectories(env=env, n=16, save_logprobs=True)  # The save_logprobs=True makes on-policy training faster
     optimizer.zero_grad()
     loss = gfn.loss(env, trajectories)
     loss.backward()
@@ -152,7 +152,7 @@ logF_estimator = ScalarEstimator(module=module_logF, preprocessor=env.preprocess
 gfn = SubTBGFlowNet(pf=pf_estimator, pb=pb_estimator, logF=logF, lamda=0.9)
 
 # 5 - We define the sampler and the optimizer.
-sampler = Sampler(estimator=pf_estimator)  # We use an on-policy sampler, based on the forward policy
+sampler = Sampler(estimator=pf_estimator) 
 
 # Different policy parameters can have their own LR.
 # Log F gets dedicated learning rate (typically higher).
@@ -161,7 +161,10 @@ optimizer.add_param_group({"params": gfn.logF_parameters(), "lr": 1e-2})
 
 # 6 - We train the GFlowNet for 1000 iterations, with 16 trajectories per iteration
 for i in (pbar := tqdm(range(1000))):
-    trajectories = sampler.sample_trajectories(env=env, n=16)
+    # We are going to sample trajectories off policy, by tempering the distribution. 
+    # We should not save the sampling logprobs, as we are not using them for training.
+    # We should save the estimator outputs to make training faster.
+    trajectories = sampler.sample_trajectories(env=env, n=16, save_logprobs=False, save_estimator_outputs=True, temperature=1.5)
     optimizer.zero_grad()
     loss = gfn.loss(env, trajectories)
     loss.backward()

diff --git a/src/gfn/gflownet/base.py b/src/gfn/gflownet/base.py
@@ -31,7 +31,7 @@ def sample_trajectories(
         self,
         env: Env,
         n: int,
-        save_logprobs: bool = True,
+        save_logprobs: bool = False,
         save_estimator_outputs: bool = False,
     ) -> Trajectories:
         """Sample a specific number of complete trajectories.
@@ -93,7 +93,7 @@ def sample_trajectories(
         env: Env,
         n: int,
         conditioning: torch.Tensor | None = None,
-        save_logprobs: bool = True,
+        save_logprobs: bool = False,
         save_estimator_outputs: bool = False,
         **policy_kwargs: Any,
     ) -> Trajectories:

diff --git a/src/gfn/gflownet/flow_matching.py b/src/gfn/gflownet/flow_matching.py
@@ -45,7 +45,7 @@ def sample_trajectories(
         env: Env,
         n: int,
         conditioning: torch.Tensor | None = None,
-        save_logprobs: bool = True,
+        save_logprobs: bool = False,
         save_estimator_outputs: bool = False,
         **policy_kwargs: Any,
     ) -> Trajectories:

diff --git a/src/gfn/samplers.py b/src/gfn/samplers.py
@@ -34,7 +34,7 @@ def sample_actions(
         states: States,
         conditioning: torch.Tensor | None = None,
         save_estimator_outputs: bool = False,
-        save_logprobs: bool = True,
+        save_logprobs: bool = False,
         **policy_kwargs: Any,
     ) -> Tuple[Actions, torch.Tensor | None, torch.Tensor | None]:
         """Samples actions from the given states.
@@ -104,7 +104,7 @@ def sample_trajectories(
         states: Optional[States] = None,
         conditioning: Optional[torch.Tensor] = None,
         save_estimator_outputs: bool = False,
-        save_logprobs: bool = True,
+        save_logprobs: bool = False,
         **policy_kwargs: Any,
     ) -> Trajectories:
         """Sample trajectories sequentially.
@@ -296,7 +296,7 @@ def local_search(
         trajectories: Trajectories,
         conditioning: torch.Tensor | None = None,
         save_estimator_outputs: bool = False,
-        save_logprobs: bool = True,
+        save_logprobs: bool = False,
         back_steps: torch.Tensor | None = None,
         back_ratio: float | None = None,
         use_metropolis_hastings: bool = True,
@@ -456,7 +456,7 @@ def sample_trajectories(
         states: Optional[States] = None,
         conditioning: Optional[torch.Tensor] = None,
         save_estimator_outputs: bool = False,  # FIXME: currently not work when this is True
-        save_logprobs: bool = True,  # TODO: Support save_logprobs=True
+        save_logprobs: bool = False,  # TODO: Support save_logprobs=True
         n_local_search_loops: int = 0,
         back_steps: torch.Tensor | None = None,
         back_ratio: float | None = None,

diff --git a/tutorials/examples/train_hypergrid_simple.py b/tutorials/examples/train_hypergrid_simple.py
@@ -57,8 +57,8 @@ def main(args):
         trajectories = sampler.sample_trajectories(
             env,
             n=args.batch_size,
-            save_logprobs=False,
-            save_estimator_outputs=True,
+            save_logprobs=True,
+            save_estimator_outputs=False,
             epsilon=args.epsilon,
         )
         visited_terminating_states.extend(trajectories.last_states)

diff --git a/tutorials/examples/train_ising.py b/tutorials/examples/train_ising.py
@@ -85,7 +85,7 @@ def ising_n_to_ij(L, n):
             env,
             n=8,
             save_estimator_outputs=False,
-            save_logprobs=True,
+            save_logprobs=False,
         )
         training_samples = gflownet.to_training_samples(trajectories)
         optimizer.zero_grad()