pytorch · slishak-PX · Aug 2, 2024 · Aug 5, 2024 · Sep 29, 2024 · Sep 30, 2024
diff --git a/botorch/posteriors/multitask.py b/botorch/posteriors/multitask.py
@@ -36,9 +36,10 @@ def __init__(
             distribution: Posterior multivariate normal distribution.
             joint_covariance_matrix: Joint test train covariance matrix over the entire
                 tensor.
-            train_train_covar: Covariance matrix of train points in the data space.
-            test_obs_covar: Covariance matrix of test x train points in the data space.
+            test_train_covar: Covariance matrix of test x train points in the data space.
             train_diff: Difference between train mean and train responses.
+            test_mean: Test mean response.
+            train_train_covar: Covariance matrix of train points in the data space.
             train_noise: Training noise covariance.
             test_noise: Only used if posterior should contain observation noise.
                 Testing noise covariance.
@@ -226,9 +227,26 @@ def rsample_from_base_samples(
             train_diff.reshape(*train_diff.shape[:-2], -1) - updated_obs_samples
         )
         train_covar_plus_noise = self.train_train_covar + self.train_noise
-        obs_solve = train_covar_plus_noise.solve(obs_minus_samples.unsqueeze(-1))
+
+        # permute dimensions to move largest batch dimension to the end (more efficient 
+        # than unsqueezing)
+        largest_batch_dim = torch.argmax(torch.tensor(obs_minus_samples.shape[:-1])).item()
+        # largest_batch_dim = torch.argmax(torch.tensor(sample_shape))
+        perm = list(range(obs_minus_samples.ndim))
+        perm.remove(largest_batch_dim)
+        perm.append(largest_batch_dim)
+        # perm[-1], perm[largest_batch_dim] = perm[largest_batch_dim], perm[-1]
+        inverse_perm = torch.argsort(torch.tensor(perm))
+
+        # solve
+        obs_minus_samples_p = obs_minus_samples.permute(*perm)
+        obs_solve_p = train_covar_plus_noise.solve(obs_minus_samples_p)
+
+        # Undo permutation
+        obs_solve = obs_solve_p.permute(*inverse_perm).unsqueeze(-1)
 
         # and multiply the test-observed matrix against the result of the solve
+        # TODO: this might be made more efficient with obs_solve_p (permuted)
         updated_samples = self.test_train_covar.matmul(obs_solve).squeeze(-1)
 
         # finally, we add the conditioned samples to the prior samples