From 2b8dac88978d7f5e907361571ea36b5249ed453d Mon Sep 17 00:00:00 2001
From: Devesh Sarda <dsarda@cs.wisc.edu>
Date: Sat, 20 Jan 2024 02:51:25 -0600
Subject: [PATCH] Updated gitignore

---
 .gitignore                                    |   3 +
 docs/python_api/data/batch.rst                |  11 -
 docs/python_api/data/dataloader.rst           |  17 --
 docs/python_api/data/dense_graph.rst          |  19 --
 docs/python_api/data/graph.rst                |  25 --
 docs/python_api/data/index.rst                |  13 -
 docs/python_api/data/samplers/edge.rst        |   9 -
 docs/python_api/data/samplers/index.rst       |  11 -
 docs/python_api/data/samplers/negative.rst    |   9 -
 docs/python_api/data/samplers/neighbor.rst    |  15 --
 src/cpp/include/configuration/config.h        |   4 +-
 src/cpp/include/data/dataloader.h             |   2 +-
 src/cpp/include/storage/graph_storage.h       |   6 +-
 src/cpp/python_bindings/data/batch_wrap.cpp   |  55 ----
 .../python_bindings/data/dataloader_wrap.cpp  | 182 --------------
 src/cpp/python_bindings/data/graph_wrap.cpp   |  49 ----
 .../data/samplers/edge_wrap.cpp               |  22 --
 .../data/samplers/negative_wrap.cpp           |  32 ---
 .../data/samplers/neighbor_wrap.cpp           |  85 -------
 src/cpp/python_bindings/data/wrap.cpp         |  29 ---
 src/cpp/src/configuration/config.cpp          |   9 +
 src/cpp/src/data/dataloader.cpp               |  14 +-
 src/cpp/src/marius.cpp                        |   5 +-
 src/cpp/src/pipeline/evaluator.cpp            |   4 +-
 src/cpp/src/pipeline/trainer.cpp              |  16 +-
 src/cpp/src/storage/io.cpp                    |  19 +-
 src/cpp/third_party/CMakeLists.txt            |  21 --
 src/cpp/third_party/googletest                |   1 -
 src/cpp/third_party/parallel-hashmap          |   1 -
 src/cpp/third_party/pybind11                  |   1 -
 src/cpp/third_party/spdlog                    |   1 -
 .../tools/configuration/marius_config.py      |   4 +-
 test/cpp/unit/data/samplers/test_negative.cpp | 234 ------------------
 33 files changed, 64 insertions(+), 864 deletions(-)
 delete mode 100644 docs/python_api/data/batch.rst
 delete mode 100644 docs/python_api/data/dataloader.rst
 delete mode 100644 docs/python_api/data/dense_graph.rst
 delete mode 100644 docs/python_api/data/graph.rst
 delete mode 100644 docs/python_api/data/index.rst
 delete mode 100644 docs/python_api/data/samplers/edge.rst
 delete mode 100644 docs/python_api/data/samplers/index.rst
 delete mode 100644 docs/python_api/data/samplers/negative.rst
 delete mode 100644 docs/python_api/data/samplers/neighbor.rst
 delete mode 100644 src/cpp/python_bindings/data/batch_wrap.cpp
 delete mode 100644 src/cpp/python_bindings/data/dataloader_wrap.cpp
 delete mode 100644 src/cpp/python_bindings/data/graph_wrap.cpp
 delete mode 100644 src/cpp/python_bindings/data/samplers/edge_wrap.cpp
 delete mode 100644 src/cpp/python_bindings/data/samplers/negative_wrap.cpp
 delete mode 100644 src/cpp/python_bindings/data/samplers/neighbor_wrap.cpp
 delete mode 100644 src/cpp/python_bindings/data/wrap.cpp
 delete mode 100644 src/cpp/third_party/CMakeLists.txt
 delete mode 160000 src/cpp/third_party/googletest
 delete mode 160000 src/cpp/third_party/parallel-hashmap
 delete mode 160000 src/cpp/third_party/pybind11
 delete mode 160000 src/cpp/third_party/spdlog
 delete mode 100644 test/cpp/unit/data/samplers/test_negative.cpp

diff --git a/.gitignore b/.gitignore
index 7e75fc8b..cb30fe25 100644
--- a/.gitignore
+++ b/.gitignore
@@ -171,3 +171,6 @@ Thumbs.db
 
 
 # End of https://www.toptal.com/developers/gitignore/api/python
+
+src/cpp/third_party
+test_datasets
\ No newline at end of file
diff --git a/docs/python_api/data/batch.rst b/docs/python_api/data/batch.rst
deleted file mode 100644
index 9472a633..00000000
--- a/docs/python_api/data/batch.rst
+++ /dev/null
@@ -1,11 +0,0 @@
-
-Batch
-=================
-
-
-.. autoclass:: marius.data.Batch
-    :members:
-    :undoc-members:
-    :exclude-members: to
-
-    .. method:: to(self: marius._data.Batch, device: torch.device) -> None
\ No newline at end of file
diff --git a/docs/python_api/data/dataloader.rst b/docs/python_api/data/dataloader.rst
deleted file mode 100644
index f078038e..00000000
--- a/docs/python_api/data/dataloader.rst
+++ /dev/null
@@ -1,17 +0,0 @@
-
-DataLoader
-============================
-
-
-.. autoclass:: marius.data.DataLoader
-    :members:
-    :undoc-members:
-    :exclude-members: __init__, getBatch
-
-    .. method:: __init__(self: marius._data.DataLoader, graph_storage: GraphModelStorage, learning_task: str, training_config: marius._config.TrainingConfig, evaluation_config: marius._config.EvaluationConfig, encoder_config: marius._config.EncoderConfig) -> None
-    
-    .. method:: __init__(self: marius._data.DataLoader, graph_storage: GraphModelStorage, learning_task: str, batch_size: int = 1000, neg_sampler: marius._data.samplers.NegativeSampler = None, nbr_sampler: marius._data.samplers.NeighborSampler = None, train: bool = False) -> None
-    
-    .. method:: __init__(self: marius._data.DataLoader, edges: Optional[torch.Tensor], learning_task: str, nodes: Optional[torch.Tensor] = None, node_features: Optional[torch.Tensor] = None, node_embeddings: Optional[torch.Tensor] = None, node_optim_state: Optional[torch.Tensor] = None, node_labels: Optional[torch.Tensor] = None, train_edges: Optional[torch.Tensor] = None, batch_size: int = 1000, neg_sampler: marius._data.samplers.NegativeSampler = None, nbr_sampler: marius._data.samplers.NeighborSampler = None, filter_edges: List[torch.Tensor] = [], train: bool = False) -> None
-    
-    .. method:: getBatch(self: marius._data.DataLoader, device: Optional[torch.device] = None, perform_map: bool = True) -> marius._data.Batch
\ No newline at end of file
diff --git a/docs/python_api/data/dense_graph.rst b/docs/python_api/data/dense_graph.rst
deleted file mode 100644
index 7ebe0022..00000000
--- a/docs/python_api/data/dense_graph.rst
+++ /dev/null
@@ -1,19 +0,0 @@
-
-DENSEGraph
-============================
-
-
-.. autoclass:: marius.data.DENSEGraph
-    :members:
-    :undoc-members:
-    :exclude-members: __init__, getNeighborIDs, setNodeProperties, to
-
-    .. method:: __init__(self: marius._data.DENSEGraph) -> None
-    
-    .. method:: __init__(self: marius._data.DENSEGraph, hop_offsets: torch.Tensor, node_ids: torch.Tensor, in_offsets: torch.Tensor, in_neighbors_vec: List[torch.Tensor], in_neighbors: torch.Tensor, out_offsets: torch.Tensor, out_neighbors_vec: List[torch.Tensor], out_neighbors: torch.Tensor, num_nodes_in_memory: int) -> None
-    
-    .. method:: getNeighborIDs(self: marius._data.DENSEGraph, incoming: bool = True, global_ids: bool = False) -> torch.Tensor
-    
-    .. method:: setNodeProperties(self: marius._data.DENSEGraph, node_properties: torch.Tensor) -> None
-    
-    .. method:: to(self: marius._data.DENSEGraph, device: torch.device) -> None
\ No newline at end of file
diff --git a/docs/python_api/data/graph.rst b/docs/python_api/data/graph.rst
deleted file mode 100644
index 203900d3..00000000
--- a/docs/python_api/data/graph.rst
+++ /dev/null
@@ -1,25 +0,0 @@
-
-MariusGraph
-============================
-
-
-.. autoclass:: marius.data.MariusGraph
-    :members:
-    :undoc-members:
-    :exclude-members: __init__, getEdges, getNeighborOffsets, getNeighborsForNodeIds, getNumNeighbors, getRelationIDs, to
-
-    .. method:: __init__(self: marius._data.MariusGraph) -> None
-    
-    .. method:: __init__(self: marius._data.MariusGraph, src_sorted_edges: torch.Tensor, dst_sorted_edges: torch.Tensor, num_nodes_in_memory: int) -> None
-    
-    .. method:: getEdges(self: marius._data.MariusGraph, incoming: bool = True) -> torch.Tensor
-    
-    .. method:: getNeighborOffsets(self: marius._data.MariusGraph, incoming: bool = True) -> torch.Tensor
-    
-    .. method:: getNeighborsForNodeIds(self: marius._data.MariusGraph, node_ids: torch.Tensor, incoming: bool, neighbor_sampling_layer: marius._config.NeighborSamplingLayer, max_neighbors_size: int, rate: float) -> Tuple(torch.Tensor, torch.Tensor)
-    
-    .. method:: getNumNeighbors(self: marius._data.MariusGraph, incoming: bool = True) -> torch.Tensor
-    
-    .. method:: getRelationIDs(self: marius._data.MariusGraph, incoming: bool = True) -> torch.Tensor
-    
-    .. method:: to(self: marius._data.MariusGraph, device: torch.device) -> None
\ No newline at end of file
diff --git a/docs/python_api/data/index.rst b/docs/python_api/data/index.rst
deleted file mode 100644
index 6372b54f..00000000
--- a/docs/python_api/data/index.rst
+++ /dev/null
@@ -1,13 +0,0 @@
-
-marius.data
-********************
-
-.. toctree::
-    :glob:
-    :maxdepth: 2
-
-    samplers/index
-    batch
-    dataloader
-    dense_graph
-    graph.rst
\ No newline at end of file
diff --git a/docs/python_api/data/samplers/edge.rst b/docs/python_api/data/samplers/edge.rst
deleted file mode 100644
index 77554eee..00000000
--- a/docs/python_api/data/samplers/edge.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-
-RandomEdgeSampler
-=======================================
-
-
-.. autoclass:: marius.data.samplers.RandomEdgeSampler
-    :members:
-    :undoc-members:
-    :special-members: __init__
\ No newline at end of file
diff --git a/docs/python_api/data/samplers/index.rst b/docs/python_api/data/samplers/index.rst
deleted file mode 100644
index c2a6159d..00000000
--- a/docs/python_api/data/samplers/index.rst
+++ /dev/null
@@ -1,11 +0,0 @@
-
-samplers
-********************
-
-.. toctree::
-    :glob:
-    :maxdepth: 2
-
-    edge
-    negative
-    neighbor
\ No newline at end of file
diff --git a/docs/python_api/data/samplers/negative.rst b/docs/python_api/data/samplers/negative.rst
deleted file mode 100644
index 54d202c4..00000000
--- a/docs/python_api/data/samplers/negative.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-
-CorruptNodeNegativeSampler
-====================================================
-
-
-.. autoclass:: marius.data.samplers.CorruptNodeNegativeSampler
-    :members:
-    :undoc-members:
-    :special-members: __init__
\ No newline at end of file
diff --git a/docs/python_api/data/samplers/neighbor.rst b/docs/python_api/data/samplers/neighbor.rst
deleted file mode 100644
index 99e98b23..00000000
--- a/docs/python_api/data/samplers/neighbor.rst
+++ /dev/null
@@ -1,15 +0,0 @@
-
-LayeredNeighborSampler
-====================================================
-
-
-.. autoclass:: marius.data.samplers.LayeredNeighborSampler
-    :members:
-    :undoc-members:
-    :exclude-members: __init__
-
-    .. method:: __init__(self: marius._data.samplers.LayeredNeighborSampler, storage: GraphModelStorage, num_neighbors: List[int], incoming: bool = True, outgoing: bool = True, use_hashmap_sets: bool = False) -> None
-
-    .. method:: __init__(self: marius._data.samplers.LayeredNeighborSampler, graph: MariusGraph, num_neighbors: List[int], incoming: bool = True, outgoing: bool = True, use_hashmap_sets: bool = False) -> None
-
-    .. method:: __init__(self: marius._data.samplers.LayeredNeighborSampler, num_neighbors: List[int], incoming: bool = True, outgoing: bool = True, use_hashmap_sets: bool = False) -> None
\ No newline at end of file
diff --git a/src/cpp/include/configuration/config.h b/src/cpp/include/configuration/config.h
index b1cabfb3..15334def 100644
--- a/src/cpp/include/configuration/config.h
+++ b/src/cpp/include/configuration/config.h
@@ -143,6 +143,7 @@ struct TrainingConfig {
     int batch_size;
     shared_ptr<NegativeSamplingConfig> negative_sampling = nullptr;
     int num_epochs;
+    int batches_per_epoch; 
     shared_ptr<PipelineConfig> pipeline = nullptr;
     int epochs_per_shuffle;
     int logs_per_epoch;
@@ -157,6 +158,7 @@ struct EvaluationConfig {
     shared_ptr<NegativeSamplingConfig> negative_sampling = nullptr;
     shared_ptr<PipelineConfig> pipeline = nullptr;
     int epochs_per_eval;
+    int batches_per_epoch; 
     string checkpoint_dir;
     bool full_graph_evaluation;
 };
@@ -210,4 +212,4 @@ PYBIND11_EXPORT shared_ptr<MariusConfig> initMariusConfig(pyobj python_config);
 
 shared_ptr<MariusConfig> loadConfig(string config_path, bool save = false);
 
-#endif  // MARIUS_CONFIG_H
+#endif  // MARIUS_CONFIG_H
\ No newline at end of file
diff --git a/src/cpp/include/data/dataloader.h b/src/cpp/include/data/dataloader.h
index 93b2b0b5..4f9e40fb 100644
--- a/src/cpp/include/data/dataloader.h
+++ b/src/cpp/include/data/dataloader.h
@@ -86,7 +86,7 @@ class DataLoader {
 
     void setActiveNodes();
 
-    void initializeBatches(bool prepare_encode = false);
+    void initializeBatches(bool prepare_encode = false, bool in_training_mode = true);
 
     void clearBatches();
 
diff --git a/src/cpp/include/storage/graph_storage.h b/src/cpp/include/storage/graph_storage.h
index 0ff2e9ad..422deba5 100644
--- a/src/cpp/include/storage/graph_storage.h
+++ b/src/cpp/include/storage/graph_storage.h
@@ -11,9 +11,11 @@
 
 struct GraphModelStoragePtrs {
     shared_ptr<Storage> edges = nullptr;
+    shared_ptr<Storage> edges_weights = nullptr;
     shared_ptr<Storage> train_edges = nullptr;
-    shared_ptr<Storage> train_edges_dst_sort = nullptr;
     shared_ptr<Storage> train_edges_weights = nullptr;
+    shared_ptr<Storage> train_edges_dst_sort = nullptr;
+    shared_ptr<Storage> train_edges_dst_sort_weights = nullptr;
     shared_ptr<Storage> validation_edges = nullptr;
     shared_ptr<Storage> validation_edges_weights = nullptr;
     shared_ptr<Storage> test_edges = nullptr;
@@ -311,4 +313,4 @@ class GraphModelStorage {
     void addFilterEdges(shared_ptr<Storage> filter_edges) { storage_ptrs_.filter_edges.emplace_back(filter_edges); }
 };
 
-#endif  // MARIUS_SRC_CPP_INCLUDE_GRAPH_STORAGE_H_
+#endif  // MARIUS_SRC_CPP_INCLUDE_GRAPH_STORAGE_H_
\ No newline at end of file
diff --git a/src/cpp/python_bindings/data/batch_wrap.cpp b/src/cpp/python_bindings/data/batch_wrap.cpp
deleted file mode 100644
index b51afb87..00000000
--- a/src/cpp/python_bindings/data/batch_wrap.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-#include "common/pybind_headers.h"
-#include "data/batch.h"
-
-void init_batch(py::module &m) {
-    py::enum_<BatchStatus>(m, "BatchStatus")
-        .value("Waiting", BatchStatus::Waiting)
-        .value("AccumulatedIndices", BatchStatus::AccumulatedIndices)
-        .value("LoadedEmbeddings", BatchStatus::LoadedEmbeddings)
-        .value("TransferredToDevice", BatchStatus::TransferredToDevice)
-        .value("PreparedForCompute", BatchStatus::PreparedForCompute)
-        .value("ComputedGradients", BatchStatus::ComputedGradients)
-        .value("AccumulatedGradients", BatchStatus::AccumulatedGradients)
-        .value("TransferredToHost", BatchStatus::TransferredToHost)
-        .value("Done", BatchStatus::Done);
-
-    py::class_<Batch, shared_ptr<Batch>>(m, "Batch", py::dynamic_attr())
-        .def_readwrite("batch_id", &Batch::batch_id_)
-        .def_readwrite("start_idx", &Batch::start_idx_)
-        .def_readwrite("batch_size", &Batch::batch_size_)
-        .def_readwrite("train", &Batch::train_)
-        .def_readwrite("device_id", &Batch::device_id_)
-
-        .def_readwrite("status", &Batch::status_)
-
-        .def_readwrite("root_node_indices", &Batch::root_node_indices_)
-        .def_readwrite("unique_node_indices", &Batch::unique_node_indices_)
-        .def_readwrite("node_embeddings", &Batch::node_embeddings_)
-        .def_readwrite("node_gradients", &Batch::node_gradients_)
-        .def_readwrite("node_embeddings_state", &Batch::node_embeddings_state_)
-        .def_readwrite("node_state_update", &Batch::node_state_update_)
-
-        .def_readwrite("node_features", &Batch::node_features_)
-        .def_readwrite("node_labels", &Batch::node_labels_)
-
-        .def_readwrite("src_neg_indices_mapping", &Batch::src_neg_indices_mapping_)
-        .def_readwrite("dst_neg_indices_mapping", &Batch::dst_neg_indices_mapping_)
-
-        .def_readwrite("edges", &Batch::edges_)
-
-        .def_readwrite("dense_graph", &Batch::dense_graph_)
-        .def_readwrite("encoded_uniques", &Batch::encoded_uniques_)
-
-        .def_readwrite("neg_edges", &Batch::neg_edges_)
-        .def_readwrite("rel_neg_indices", &Batch::rel_neg_indices_)
-        .def_readwrite("src_neg_indices", &Batch::src_neg_indices_)
-        .def_readwrite("dst_neg_indices", &Batch::dst_neg_indices_)
-        .def_readwrite("src_neg_filter", &Batch::src_neg_filter_)
-        .def_readwrite("dst_neg_filter", &Batch::dst_neg_filter_)
-
-        .def(py::init<bool>(), py::arg("train"))
-        .def("to", &Batch::to, py::arg("device"), py::arg("stream") = nullptr)
-        .def("accumulateGradients", &Batch::accumulateGradients, py::arg("learning_rate"))
-        .def("embeddingsToHost", &Batch::embeddingsToHost)
-        .def("clear", &Batch::clear);
-}
\ No newline at end of file
diff --git a/src/cpp/python_bindings/data/dataloader_wrap.cpp b/src/cpp/python_bindings/data/dataloader_wrap.cpp
deleted file mode 100644
index ba7de425..00000000
--- a/src/cpp/python_bindings/data/dataloader_wrap.cpp
+++ /dev/null
@@ -1,182 +0,0 @@
-#include "common/pybind_headers.h"
-#include "data/dataloader.h"
-
-void init_dataloader(py::module &m) {
-    py::class_<DataLoader, shared_ptr<DataLoader>>(m, "DataLoader", py::dynamic_attr())
-        .def_readwrite("graph_storage", &DataLoader::graph_storage_)
-        .def_readwrite("edge_sampler", &DataLoader::edge_sampler_)
-        .def_readwrite("negative_sampler", &DataLoader::negative_sampler_)
-        .def_readwrite("neighbor_sampler", &DataLoader::neighbor_sampler_)
-        .def_readwrite("training_config", &DataLoader::training_config_)
-        .def_readwrite("evaluation_config", &DataLoader::evaluation_config_)
-        .def_readwrite("train", &DataLoader::train_)
-        .def_readwrite("epochs_processed", &DataLoader::epochs_processed_)
-        .def_readwrite("batches_processed", &DataLoader::batches_processed_)
-        .def_readwrite("current_edge", &DataLoader::current_edge_)
-        .def_readwrite("batches", &DataLoader::batches_)
-        .def_readwrite("batch_id_offset", &DataLoader::batch_id_offset_)
-        //        .def_readwrite("batch_iterator", &DataLoader::batch_iterator_) # TODO Iterator needs bindings
-        .def_readwrite("batches_left", &DataLoader::batches_left_)
-        .def_readwrite("batches_processed", &DataLoader::total_batches_processed_)
-        .def_readwrite("all_read", &DataLoader::all_read_)
-        .def_readwrite("edge_buckets_per_buffer", &DataLoader::edge_buckets_per_buffer_)
-        .def_readwrite("node_ids_per_buffer", &DataLoader::node_ids_per_buffer_)
-        .def_readwrite("training_neighbor_sampler", &DataLoader::training_neighbor_sampler_)
-        .def_readwrite("evaluation_neighbor_sampler", &DataLoader::evaluation_neighbor_sampler_)
-        .def_readwrite("training_negative_sampler", &DataLoader::training_negative_sampler_)
-        .def_readwrite("evaluation_negative_sampler", &DataLoader::evaluation_negative_sampler_)
-
-        .def(py::init([](shared_ptr<GraphModelStorage> graph_storage, std::string learning_task, shared_ptr<TrainingConfig> training_config,
-                         shared_ptr<EvaluationConfig> evaluation_config, shared_ptr<EncoderConfig> encoder_config) {
-                 LearningTask task = getLearningTask(learning_task);
-                 return std::make_shared<DataLoader>(graph_storage, task, training_config, evaluation_config, encoder_config);
-             }),
-             py::arg("graph_storage"), py::arg("learning_task"), py::arg("training_config"), py::arg("evaluation_config"), py::arg("encoder_config"))
-
-        .def(py::init([](shared_ptr<GraphModelStorage> graph_storage, std::string learning_task, int batch_size, shared_ptr<NegativeSampler> neg_sampler,
-                         shared_ptr<NeighborSampler> nbr_sampler, bool train) {
-                 LearningTask task = getLearningTask(learning_task);
-
-                 if (task == LearningTask::LINK_PREDICTION) {
-                     if (train) {
-                         graph_storage->storage_ptrs_.train_edges = graph_storage->storage_ptrs_.edges;
-                     } else {
-                         graph_storage->storage_ptrs_.test_edges = graph_storage->storage_ptrs_.edges;
-                     }
-                 } else if (task == LearningTask::NODE_CLASSIFICATION) {
-                     if (graph_storage->storage_ptrs_.nodes == nullptr) {
-                         throw MariusRuntimeException("Node ids must be provided for node classification");
-                     }
-
-                     if (train) {
-                         if (graph_storage->storage_ptrs_.node_labels == nullptr) {
-                             throw MariusRuntimeException("Labels for the nodes must be provided when training with node classification");
-                         }
-                         graph_storage->storage_ptrs_.train_nodes = graph_storage->storage_ptrs_.nodes;
-                     } else {
-                         graph_storage->storage_ptrs_.test_nodes = graph_storage->storage_ptrs_.nodes;
-                     }
-                 }
-
-                 return std::make_shared<DataLoader>(graph_storage, task, batch_size, neg_sampler, nbr_sampler, train);
-             }),
-             py::arg("graph_storage"), py::arg("learning_task"), py::arg("batch_size") = 1000, py::arg("neg_sampler") = nullptr,
-             py::arg("nbr_sampler") = nullptr, py::arg("train") = false)
-
-        .def(py::init([](torch::optional<torch::Tensor> edges, std::string learning_task, torch::optional<torch::Tensor> nodes,
-                         torch::optional<torch::Tensor> node_features, torch::optional<torch::Tensor> node_embeddings,
-                         torch::optional<torch::Tensor> node_optimizer_state, torch::optional<torch::Tensor> node_labels,
-                         torch::optional<torch::Tensor> train_edges, int batch_size, shared_ptr<NegativeSampler> neg_sampler,
-                         shared_ptr<NeighborSampler> nbr_sampler, std::vector<torch::Tensor> filter_edges, bool train) {
-                 shared_ptr<Storage> edges_s = nullptr;
-                 shared_ptr<Storage> nodes_s = nullptr;
-                 shared_ptr<Storage> node_features_s = nullptr;
-                 shared_ptr<Storage> node_embeddings_s = nullptr;
-                 shared_ptr<Storage> node_optimizer_state_s = nullptr;
-                 shared_ptr<Storage> node_labels_s = nullptr;
-
-                 LearningTask task = getLearningTask(learning_task);
-
-                 if (edges.has_value()) {
-                     edges_s = std::make_shared<InMemory>(edges.value());
-                 } else {
-                     throw UndefinedTensorException();
-                 }
-
-                 if (nodes.has_value()) {
-                     nodes_s = std::make_shared<InMemory>(nodes.value());
-                 } else {
-                     if (task == LearningTask::NODE_CLASSIFICATION) {
-                         throw MariusRuntimeException("Tensor of node ids must be provided for node classification");
-                     }
-                 }
-
-                 if (node_features.has_value()) {
-                     node_features_s = std::make_shared<InMemory>(node_features.value());
-                 }
-
-                 if (node_embeddings.has_value()) {
-                     node_embeddings_s = std::make_shared<InMemory>(node_embeddings.value());
-                 }
-
-                 if (node_optimizer_state.has_value()) {
-                     node_optimizer_state_s = std::make_shared<InMemory>(node_optimizer_state.value());
-                 } else {
-                     if (train && node_embeddings_s != nullptr) {
-                         OptimizerState emb_state = torch::zeros_like(node_embeddings.value());
-                         node_optimizer_state_s = std::make_shared<InMemory>(emb_state);
-                     }
-                 }
-
-                 if (node_labels.has_value()) {
-                     node_labels_s = std::make_shared<InMemory>(node_labels.value());
-                 } else {
-                     if (task == LearningTask::NODE_CLASSIFICATION && train) {
-                         throw MariusRuntimeException("Labels for the nodes must be provided when training with node classification");
-                     }
-                 }
-
-                 GraphModelStoragePtrs ptrs;
-                 ptrs.edges = edges_s;
-                 ptrs.nodes = nodes_s;
-                 ptrs.node_features = node_features_s;
-                 ptrs.node_embeddings = node_embeddings_s;
-                 ptrs.node_optimizer_state = node_optimizer_state_s;
-                 ptrs.node_labels = node_labels_s;
-
-                 for (auto f_edges : filter_edges) {
-                     ptrs.filter_edges.emplace_back(std::make_shared<InMemory>(f_edges));
-                 }
-
-                 if (task == LearningTask::LINK_PREDICTION) {
-                     if (train) {
-                         ptrs.train_edges = ptrs.edges;
-                     } else {
-                         ptrs.test_edges = ptrs.edges;
-                         if (train_edges.has_value()) {
-                             ptrs.train_edges = std::make_shared<InMemory>(train_edges.value());
-                         }
-                     }
-                 } else if (task == LearningTask::NODE_CLASSIFICATION) {
-                     if (train) {
-                         ptrs.train_nodes = ptrs.nodes;
-                     } else {
-                         ptrs.test_nodes = ptrs.nodes;
-                     }
-                 }
-
-                 auto gms = std::make_shared<GraphModelStorage>(ptrs, false);
-                 return std::make_shared<DataLoader>(gms, task, batch_size, neg_sampler, nbr_sampler, train);
-             }),
-             py::arg("edges"), py::arg("learning_task"), py::arg("nodes") = nullptr, py::arg("node_features") = nullptr, py::arg("node_embeddings") = nullptr,
-             py::arg("node_optim_state") = nullptr, py::arg("node_labels") = nullptr, py::arg("train_edges") = nullptr, py::arg("batch_size") = 1000,
-             py::arg("neg_sampler") = nullptr, py::arg("nbr_sampler") = nullptr, py::arg("filter_edges") = std::vector<torch::Tensor>(),
-             py::arg("train") = false)
-
-        .def("setBufferOrdering", &DataLoader::setBufferOrdering)
-        .def("setActiveEdges", &DataLoader::setActiveEdges)
-        .def("setActiveNodes", &DataLoader::setActiveNodes)
-        .def("initializeBatches", &DataLoader::initializeBatches, py::arg("prepare_encode") = false)
-        .def("clearBatches", &DataLoader::clearBatches)
-        .def("hasNextBatch", &DataLoader::hasNextBatch)
-        .def("getNextBatch", &DataLoader::getNextBatch, py::return_value_policy::reference)
-        .def("finishedBatch", &DataLoader::finishedBatch)
-        .def("getBatch", &DataLoader::getBatch, py::arg("device") = py::none(), py::arg("perform_map") = false, py::arg("worker_id") = 0,
-             py::return_value_policy::reference)
-        .def("edgeSample", &DataLoader::edgeSample, py::arg("batch"), py::arg("worker_id") = 0)
-        .def("nodeSample", &DataLoader::nodeSample, py::arg("batch"), py::arg("worker_id") = 0)
-        .def("loadCPUParameters", &DataLoader::loadCPUParameters, py::arg("batch"))
-        .def("loadGPUParameters", &DataLoader::loadGPUParameters, py::arg("batch"))
-        .def("updateEmbeddings", &DataLoader::updateEmbeddings, py::arg("batch"), py::arg("gpu") = false)
-        .def("nextEpoch", &DataLoader::nextEpoch)
-        .def("loadStorage", &DataLoader::loadStorage)
-        .def("epochComplete", &DataLoader::epochComplete)
-        .def("unloadStorage", &DataLoader::unloadStorage, py::arg("write") = false)
-        .def("getNumEdges", &DataLoader::getNumEdges)
-        .def("getEpochsProcessed", &DataLoader::getEpochsProcessed)
-        .def("getBatchesProcessed", &DataLoader::getBatchesProcessed)
-        .def("isTrain", &DataLoader::isTrain)
-        .def("setTrainSet", &DataLoader::setTrainSet)
-        .def("setValidationSet", &DataLoader::setValidationSet)
-        .def("setTestSet", &DataLoader::setTestSet);
-}
\ No newline at end of file
diff --git a/src/cpp/python_bindings/data/graph_wrap.cpp b/src/cpp/python_bindings/data/graph_wrap.cpp
deleted file mode 100644
index 74f49f80..00000000
--- a/src/cpp/python_bindings/data/graph_wrap.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-#include "common/pybind_headers.h"
-#include "data/graph.h"
-
-void init_graph(py::module &m) {
-    py::class_<MariusGraph, shared_ptr<MariusGraph>>(m, "MariusGraph")
-        .def_readwrite("src_sorted_edges", &MariusGraph::src_sorted_edges_)
-        .def_readwrite("dst_sorted_edges", &MariusGraph::dst_sorted_edges_)
-        .def_readwrite("active_in_memory_subgraph", &MariusGraph::active_in_memory_subgraph_)
-        .def_readwrite("num_nodes_in_memory", &MariusGraph::num_nodes_in_memory_)
-        .def_readwrite("node_ids", &MariusGraph::node_ids_)
-        .def_readwrite("out_sorted_uniques", &MariusGraph::out_sorted_uniques_)
-        .def_readwrite("out_offsets", &MariusGraph::out_offsets_)
-        .def_readwrite("out_num_neighbors", &MariusGraph::out_num_neighbors_)
-        .def_readwrite("in_sorted_uniques", &MariusGraph::in_sorted_uniques_)
-        .def_readwrite("in_offsets", &MariusGraph::in_offsets_)
-        .def_readwrite("in_num_neighbors", &MariusGraph::in_num_neighbors_)
-        .def_readwrite("max_out_num_neighbors_", &MariusGraph::max_out_num_neighbors_)
-        .def_readwrite("max_in_num_neighbors_", &MariusGraph::max_in_num_neighbors_)
-        .def(py::init<>())
-        .def(py::init<EdgeList, EdgeList, int64_t>(), py::arg("src_sorted_edges"), py::arg("dst_sorted_edges"), py::arg("num_nodes_in_memory"))
-        .def("getEdges", &MariusGraph::getEdges, py::arg("incoming") = true)
-        .def("getRelationIDs", &MariusGraph::getRelationIDs, py::arg("incoming") = true)
-        .def("getNeighborOffsets", &MariusGraph::getNeighborOffsets, py::arg("incoming") = true)
-        .def("getNumNeighbors", &MariusGraph::getNumNeighbors, py::arg("incoming") = true)
-        .def("getNeighborsForNodeIds", &MariusGraph::getNeighborsForNodeIds, py::arg("node_ids"), py::arg("incoming"), py::arg("neighbor_sampling_layer"),
-             py::arg("max_neighbors_size"), py::arg("rate"))
-        .def("clear", &MariusGraph::clear)
-        .def("to", &MariusGraph::to, py::arg("device"));
-
-    py::class_<DENSEGraph, MariusGraph, shared_ptr<DENSEGraph>>(m, "DENSEGraph")
-        .def_readwrite("hop_offsets", &DENSEGraph::hop_offsets_)
-        .def_readwrite("in_neighbors", &DENSEGraph::in_neighbors_mapping_, "description of the variable")
-        .def_readwrite("out_neighbors", &DENSEGraph::out_neighbors_mapping_)
-        .def_readwrite("in_neighbors_vec", &DENSEGraph::in_neighbors_vec_)
-        .def_readwrite("out_neighbors_vec", &DENSEGraph::out_neighbors_vec_)
-        .def_readwrite("node_properties", &DENSEGraph::node_properties_)
-        .def_readwrite("num_nodes_in_memory", &DENSEGraph::num_nodes_in_memory_)
-        .def(py::init<>())
-        .def(py::init<Indices, Indices, Indices, std::vector<torch::Tensor>, Indices, Indices, std::vector<torch::Tensor>, Indices, int>(),
-             py::arg("hop_offsets"), py::arg("node_ids"), py::arg("in_offsets"), py::arg("in_neighbors_vec"), py::arg("in_neighbors"), py::arg("out_offsets"),
-             py::arg("out_neighbors_vec"), py::arg("out_neighbors"), py::arg("num_nodes_in_memory"))
-        .def("prepareForNextLayer", &DENSEGraph::prepareForNextLayer)
-        .def("getNeighborIDs", &DENSEGraph::getNeighborIDs, py::arg("incoming") = true, py::arg("global_ids") = false)
-        .def("getLayerOffset", &DENSEGraph::getLayerOffset)
-        .def("performMap", &DENSEGraph::performMap)
-        .def("setNodeProperties", &DENSEGraph::setNodeProperties, py::arg("node_properties"))
-        .def("clear", &DENSEGraph::clear)
-        .def("to", &DENSEGraph::to, py::arg("device"), py::arg("compute_stream") = nullptr, py::arg("transfer_stream") = nullptr);
-}
\ No newline at end of file
diff --git a/src/cpp/python_bindings/data/samplers/edge_wrap.cpp b/src/cpp/python_bindings/data/samplers/edge_wrap.cpp
deleted file mode 100644
index 33aaf966..00000000
--- a/src/cpp/python_bindings/data/samplers/edge_wrap.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-//
-// Created by Jason Mohoney on 2/14/22.
-//
-
-#include "common/pybind_headers.h"
-#include "data/samplers/edge.h"
-
-class PyEdgeSampler : EdgeSampler {
-   public:
-    using EdgeSampler::EdgeSampler;
-    EdgeList getEdges(shared_ptr<Batch> batch) override { PYBIND11_OVERRIDE_PURE_NAME(EdgeList, EdgeSampler, "getEdges", getEdges, batch); }
-};
-
-void init_edge_samplers(py::module &m) {
-    py::class_<EdgeSampler, PyEdgeSampler, std::shared_ptr<EdgeSampler>>(m, "EdgeSampler")
-        .def_readwrite("graph_storage", &EdgeSampler::graph_storage_)
-        .def("getEdges", &EdgeSampler::getEdges, py::arg("batch"));
-
-    py::class_<RandomEdgeSampler, EdgeSampler, std::shared_ptr<RandomEdgeSampler>>(m, "RandomEdgeSampler")
-        .def_readwrite("without_replacement", &RandomEdgeSampler::without_replacement_)
-        .def(py::init<shared_ptr<GraphModelStorage>, bool>(), py::arg("graph_storage"), py::arg("without_replacement") = true);
-}
\ No newline at end of file
diff --git a/src/cpp/python_bindings/data/samplers/negative_wrap.cpp b/src/cpp/python_bindings/data/samplers/negative_wrap.cpp
deleted file mode 100644
index 928b1e60..00000000
--- a/src/cpp/python_bindings/data/samplers/negative_wrap.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-//
-// Created by Jason Mohoney on 2/14/22.
-//
-
-#include "common/pybind_headers.h"
-#include "data/samplers/negative.h"
-
-class PyNegativeSampler : NegativeSampler {
-   public:
-    using NegativeSampler::NegativeSampler;
-    using ReturnTensorTuple = std::tuple<torch::Tensor, torch::Tensor>;
-    std::tuple<torch::Tensor, torch::Tensor> getNegatives(shared_ptr<MariusGraph> graph, torch::Tensor edges, bool inverse) override {
-        PYBIND11_OVERRIDE_PURE_NAME(ReturnTensorTuple, NegativeSampler, "getNegatives", getNegatives, graph, edges, inverse);
-    }
-};
-
-void init_neg_samplers(py::module &m) {
-    py::class_<NegativeSampler, PyNegativeSampler, std::shared_ptr<NegativeSampler>>(m, "NegativeSampler")
-        .def("getNegatives", &NegativeSampler::getNegatives, py::arg("graph"), py::arg("edges") = torch::Tensor(), py::arg("inverse") = false);
-
-    py::class_<CorruptNodeNegativeSampler, NegativeSampler, std::shared_ptr<CorruptNodeNegativeSampler>>(m, "CorruptNodeNegativeSampler")
-        .def_readwrite("num_chunks", &CorruptNodeNegativeSampler::num_chunks_)
-        .def_readwrite("num_negatives", &CorruptNodeNegativeSampler::num_negatives_)
-        .def_readwrite("degree_fraction", &CorruptNodeNegativeSampler::degree_fraction_)
-        .def_readwrite("filtered", &CorruptNodeNegativeSampler::filtered_)
-        .def(py::init([](int num_chunks, int num_negatives, float degree_fraction, bool filtered, string filter_mode) {
-                 auto deg_filter_mode = getLocalFilterMode(filter_mode);
-                 return std::make_shared<CorruptNodeNegativeSampler>(num_chunks, num_negatives, degree_fraction, filtered, deg_filter_mode);
-             }),
-             py::arg("num_chunks") = 1, py::arg("num_negatives") = 500, py::arg("degree_fraction") = 0.0, py::arg("filtered") = false,
-             py::arg("local_filter_mode") = "deg");
-}
diff --git a/src/cpp/python_bindings/data/samplers/neighbor_wrap.cpp b/src/cpp/python_bindings/data/samplers/neighbor_wrap.cpp
deleted file mode 100644
index 6e170ec6..00000000
--- a/src/cpp/python_bindings/data/samplers/neighbor_wrap.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
-//
-// Created by Jason Mohoney on 2/14/22.
-//
-
-#include "common/pybind_headers.h"
-#include "data/samplers/neighbor.h"
-
-class PyNeighborSampler : NeighborSampler {
-   public:
-    using NeighborSampler::NeighborSampler;
-    DENSEGraph getNeighbors(torch::Tensor node_ids, shared_ptr<MariusGraph> graph, int worker_id) override {
-        PYBIND11_OVERRIDE_PURE_NAME(DENSEGraph, NeighborSampler, "getNeighbors", getNeighbors, node_ids, graph, worker_id);
-    }
-};
-
-void init_neighbor_samplers(py::module &m) {
-    py::class_<NeighborSampler, PyNeighborSampler, std::shared_ptr<NeighborSampler>>(m, "NeighborSampler")
-        .def_readwrite("storage", &NeighborSampler::storage_)
-        .def("getNeighbors", &NeighborSampler::getNeighbors, py::arg("node_ids"), py::arg("graph") = nullptr, py::arg("worker_id") = 0);
-
-    py::class_<LayeredNeighborSampler, NeighborSampler, std::shared_ptr<LayeredNeighborSampler>>(m, "LayeredNeighborSampler")
-        .def_readwrite("sampling_layers", &LayeredNeighborSampler::sampling_layers_)
-
-        .def(py::init([](shared_ptr<GraphModelStorage> storage, std::vector<int> num_neighbors, bool use_hashmap_sets) {
-                 std::vector<shared_ptr<NeighborSamplingConfig>> sampling_layers;
-                 for (auto n : num_neighbors) {
-                     shared_ptr<NeighborSamplingConfig> ptr = std::make_shared<NeighborSamplingConfig>();
-                     if (n == -1) {
-                         ptr->type = NeighborSamplingLayer::ALL;
-                         ptr->options = std::make_shared<NeighborSamplingOptions>();
-                     } else {
-                         ptr->type = NeighborSamplingLayer::UNIFORM;
-                         auto opts = std::make_shared<UniformSamplingOptions>();
-                         opts->max_neighbors = n;
-                         ptr->options = opts;
-                     }
-                     ptr->use_hashmap_sets = use_hashmap_sets;
-                     sampling_layers.emplace_back(ptr);
-                 }
-                 return std::make_shared<LayeredNeighborSampler>(storage, sampling_layers);
-             }),
-             py::arg("storage"), py::arg("num_neighbors"), py::arg("use_hashmap_sets") = false)
-
-        .def(py::init([](shared_ptr<MariusGraph> graph, std::vector<int> num_neighbors, bool use_hashmap_sets) {
-                 std::vector<shared_ptr<NeighborSamplingConfig>> sampling_layers;
-
-                 for (auto n : num_neighbors) {
-                     shared_ptr<NeighborSamplingConfig> ptr = std::make_shared<NeighborSamplingConfig>();
-                     if (n == -1) {
-                         ptr->type = NeighborSamplingLayer::ALL;
-                         ptr->options = std::make_shared<NeighborSamplingOptions>();
-                     } else {
-                         ptr->type = NeighborSamplingLayer::UNIFORM;
-                         auto opts = std::make_shared<UniformSamplingOptions>();
-                         opts->max_neighbors = n;
-                         ptr->options = opts;
-                     }
-                     ptr->use_hashmap_sets = use_hashmap_sets;
-                     sampling_layers.emplace_back(ptr);
-                 }
-                 return std::make_shared<LayeredNeighborSampler>(graph, sampling_layers);
-             }),
-             py::arg("graph"), py::arg("num_neighbors"), py::arg("use_hashmap_sets") = false)
-
-        .def(py::init([](std::vector<int> num_neighbors, bool use_hashmap_sets) {
-                 std::vector<shared_ptr<NeighborSamplingConfig>> sampling_layers;
-
-                 for (auto n : num_neighbors) {
-                     shared_ptr<NeighborSamplingConfig> ptr = std::make_shared<NeighborSamplingConfig>();
-                     if (n == -1) {
-                         ptr->type = NeighborSamplingLayer::ALL;
-                         ptr->options = std::make_shared<NeighborSamplingOptions>();
-                     } else {
-                         ptr->type = NeighborSamplingLayer::UNIFORM;
-                         auto opts = std::make_shared<UniformSamplingOptions>();
-                         opts->max_neighbors = n;
-                         ptr->options = opts;
-                     }
-                     ptr->use_hashmap_sets = use_hashmap_sets;
-                     sampling_layers.emplace_back(ptr);
-                 }
-                 return std::make_shared<LayeredNeighborSampler>(sampling_layers);
-             }),
-             py::arg("num_neighbors"), py::arg("use_hashmap_sets") = false);
-}
diff --git a/src/cpp/python_bindings/data/wrap.cpp b/src/cpp/python_bindings/data/wrap.cpp
deleted file mode 100644
index f7cd4167..00000000
--- a/src/cpp/python_bindings/data/wrap.cpp
+++ /dev/null
@@ -1,29 +0,0 @@
-#include "common/pybind_headers.h"
-
-// data
-void init_batch(py::module &);
-void init_dataloader(py::module &);
-void init_graph(py::module &);
-
-// data/samplers
-void init_edge_samplers(py::module &);
-void init_neg_samplers(py::module &);
-void init_neighbor_samplers(py::module &);
-
-PYBIND11_MODULE(_data, m) {
-    m.doc() = "Objects for in memory processing and sampling";
-
-    // data/samplers
-    auto samplers_m = m.def_submodule("samplers");
-
-    samplers_m.doc() = "Graph Samplers";
-
-    init_edge_samplers(samplers_m);
-    init_neg_samplers(samplers_m);
-    init_neighbor_samplers(samplers_m);
-
-    // data
-    init_batch(m);
-    init_dataloader(m);
-    init_graph(m);
-}
diff --git a/src/cpp/src/configuration/config.cpp b/src/cpp/src/configuration/config.cpp
index 119ad520..7561db29 100644
--- a/src/cpp/src/configuration/config.cpp
+++ b/src/cpp/src/configuration/config.cpp
@@ -473,6 +473,10 @@ shared_ptr<TrainingConfig> initTrainingConfig(pyobj python_config) {
     ret_config->checkpoint = initCheckpointConfig(python_config.attr("checkpoint"));
     ret_config->resume_training = cast_helper<bool>(python_config.attr("resume_training"));
     ret_config->resume_from_checkpoint = cast_helper<string>(python_config.attr("resume_from_checkpoint"));
+    ret_config->batches_per_epoch = -1;
+    if(pybind11::hasattr(python_config, "batches_per_epoch")) { 
+        ret_config->batches_per_epoch = cast_helper<int>(python_config.attr("batches_per_epoch"));
+    }
 
     return ret_config;
 }
@@ -485,6 +489,11 @@ shared_ptr<EvaluationConfig> initEvaluationConfig(pyobj python_config) {
     ret_config->pipeline = initPipelineConfig(python_config.attr("pipeline"));
     ret_config->epochs_per_eval = cast_helper<int>(python_config.attr("epochs_per_eval"));
     ret_config->checkpoint_dir = cast_helper<string>(python_config.attr("checkpoint_dir"));
+    ret_config->batches_per_epoch = -1;
+    if(pybind11::hasattr(python_config, "batches_per_epoch")) {
+        ret_config->batches_per_epoch = cast_helper<int>(python_config.attr("batches_per_epoch"));
+    }
+
     return ret_config;
 }
 
diff --git a/src/cpp/src/data/dataloader.cpp b/src/cpp/src/data/dataloader.cpp
index 456e1ccc..da3634f0 100644
--- a/src/cpp/src/data/dataloader.cpp
+++ b/src/cpp/src/data/dataloader.cpp
@@ -199,7 +199,7 @@ void DataLoader::setActiveNodes() {
     graph_storage_->setActiveNodes(node_ids);
 }
 
-void DataLoader::initializeBatches(bool prepare_encode) {
+void DataLoader::initializeBatches(bool prepare_encode, bool in_training_mode) {
     int64_t batch_id = 0;
     int64_t start_idx = 0;
 
@@ -208,6 +208,11 @@ void DataLoader::initializeBatches(bool prepare_encode) {
     all_read_ = false;
     int64_t num_items;
 
+    int max_batches = training_config_->batches_per_epoch;
+    if(!in_training_mode) {
+        max_batches = evaluation_config_->batches_per_epoch;
+    }
+
     if (prepare_encode) {
         num_items = graph_storage_->getNumNodes();
     } else {
@@ -240,9 +245,14 @@ void DataLoader::initializeBatches(bool prepare_encode) {
         batches.emplace_back(curr_batch);
         batch_id++;
         start_idx += batch_size;
+
+        // Only keep the first max_batches batches
+        if(max_batches > 0 && batches.size() >= max_batches) {
+            break;
+        }
     }
-    batches_ = batches;
 
+    batches_ = batches;
     batches_left_ = batches_.size();
     batch_iterator_ = batches_.begin();
 }
diff --git a/src/cpp/src/marius.cpp b/src/cpp/src/marius.cpp
index 394ac728..288cb198 100644
--- a/src/cpp/src/marius.cpp
+++ b/src/cpp/src/marius.cpp
@@ -108,7 +108,6 @@ void marius_train(shared_ptr<MariusConfig> marius_config) {
     auto graph_model_storage = std::get<1>(tup);
     auto dataloader = std::get<2>(tup);
 
-    /*
     shared_ptr<Trainer> trainer;
     shared_ptr<Evaluator> evaluator;
 
@@ -160,7 +159,7 @@ void marius_train(shared_ptr<MariusConfig> marius_config) {
         if (marius_config->storage->export_encoded_nodes) {
             encode_and_export(dataloader, model, marius_config);
         }
-    } */
+    } 
 }
 
 void marius_eval(shared_ptr<MariusConfig> marius_config) {
@@ -171,7 +170,6 @@ void marius_eval(shared_ptr<MariusConfig> marius_config) {
 
     shared_ptr<Evaluator> evaluator;
 
-    /*
     if (marius_config->evaluation->epochs_per_eval > 0) {
         if (marius_config->evaluation->pipeline->sync) {
             evaluator = std::make_shared<SynchronousEvaluator>(dataloader, model);
@@ -184,7 +182,6 @@ void marius_eval(shared_ptr<MariusConfig> marius_config) {
     if (marius_config->storage->export_encoded_nodes) {
         encode_and_export(dataloader, model, marius_config);
     }
-    */
 }
 
 void marius(int argc, char *argv[]) {
diff --git a/src/cpp/src/pipeline/evaluator.cpp b/src/cpp/src/pipeline/evaluator.cpp
index d4475018..256d988b 100644
--- a/src/cpp/src/pipeline/evaluator.cpp
+++ b/src/cpp/src/pipeline/evaluator.cpp
@@ -30,7 +30,7 @@ void PipelineEvaluator::evaluate(bool validation) {
         }
     }
 
-    dataloader_->initializeBatches(false);
+    dataloader_->initializeBatches(false, false);
 
     if (dataloader_->evaluation_negative_sampler_ != nullptr) {
         if (dataloader_->evaluation_config_->negative_sampling->filtered) {
@@ -66,7 +66,7 @@ void SynchronousEvaluator::evaluate(bool validation) {
         }
     }
 
-    dataloader_->initializeBatches(false);
+    dataloader_->initializeBatches(false, false);
 
     if (dataloader_->evaluation_negative_sampler_ != nullptr) {
         if (dataloader_->evaluation_config_->negative_sampling->filtered) {
diff --git a/src/cpp/src/pipeline/trainer.cpp b/src/cpp/src/pipeline/trainer.cpp
index c946357c..a42178db 100644
--- a/src/cpp/src/pipeline/trainer.cpp
+++ b/src/cpp/src/pipeline/trainer.cpp
@@ -23,6 +23,12 @@ PipelineTrainer::PipelineTrainer(shared_ptr<DataLoader> dataloader, shared_ptr<M
         num_items = dataloader_->graph_storage_->storage_ptrs_.train_nodes->getDim0();
     }
 
+    // Log for every batch
+    if(dataloader_->training_config_->batches_per_epoch > 0) {
+        num_items = dataloader_->training_config_->batches_per_epoch * dataloader_->training_config_->batch_size;
+        logs_per_epoch = dataloader_->training_config_->batches_per_epoch;
+    }
+
     progress_reporter_ = std::make_shared<ProgressReporter>(item_name, num_items, logs_per_epoch);
 
     if (model->device_.is_cuda()) {
@@ -37,7 +43,7 @@ void PipelineTrainer::train(int num_epochs) {
         dataloader_->setTrainSet();
     }
 
-    dataloader_->initializeBatches(false);
+    dataloader_->initializeBatches(false, true);
 
     Timer timer = Timer(false);
     for (int epoch = 0; epoch < num_epochs; epoch++) {
@@ -88,6 +94,12 @@ SynchronousTrainer::SynchronousTrainer(shared_ptr<DataLoader> dataloader, shared
         num_items = dataloader_->graph_storage_->storage_ptrs_.train_nodes->getDim0();
     }
 
+    // Log for every batch
+    if(dataloader_->training_config_->batches_per_epoch > 0) {
+        num_items = dataloader_->training_config_->batches_per_epoch * dataloader_->training_config_->batch_size;
+        logs_per_epoch = dataloader_->training_config_->batches_per_epoch;
+    }
+
     progress_reporter_ = std::make_shared<ProgressReporter>(item_name, num_items, logs_per_epoch);
 }
 
@@ -96,7 +108,7 @@ void SynchronousTrainer::train(int num_epochs) {
         dataloader_->setTrainSet();
     }
 
-    dataloader_->initializeBatches(false);
+    dataloader_->initializeBatches(false, true);
 
     Timer timer = Timer(false);
 
diff --git a/src/cpp/src/storage/io.cpp b/src/cpp/src/storage/io.cpp
index 6d5e4494..8b49630d 100644
--- a/src/cpp/src/storage/io.cpp
+++ b/src/cpp/src/storage/io.cpp
@@ -16,6 +16,7 @@ inline bool does_file_exists(const std::string& file_path) {
 }
 
 std::map<std::string, shared_ptr<Storage>> initializeEdges(shared_ptr<StorageConfig> storage_config, LearningTask learning_task) {
+
     // Determined the file paths
     string train_filename =
         storage_config->dataset->dataset_dir + PathConstants::edges_directory + PathConstants::training + PathConstants::edges_file + PathConstants::file_ext;
@@ -106,7 +107,6 @@ std::map<std::string, shared_ptr<Storage>> initializeEdges(shared_ptr<StorageCon
             break;
         }
         case StorageBackend::HOST_MEMORY: {
-            SPDLOG_INFO("Loading using host memory");
             if (num_train != -1) {
                 train_edge_storage = std::make_shared<InMemory>(train_filename, num_train, num_columns, dtype, torch::kCPU);
                 if (!storage_config->train_edges_pre_sorted) {
@@ -144,7 +144,6 @@ std::map<std::string, shared_ptr<Storage>> initializeEdges(shared_ptr<StorageCon
             break;
         }
         case StorageBackend::DEVICE_MEMORY: {
-            SPDLOG_INFO("Loading using device memory");
             if (num_train != -1) {
                 train_edge_storage = std::make_shared<InMemory>(train_filename, num_train, num_columns, dtype, storage_config->device_type);
                 if (!storage_config->train_edges_pre_sorted) {
@@ -230,7 +229,6 @@ std::map<std::string, shared_ptr<Storage>> initializeEdges(shared_ptr<StorageCon
         }
     }
 
-    SPDLOG_INFO("Shuffle input has value of {}", storage_config->shuffle_input);
     if (storage_config->shuffle_input) {
         if (valid_edge_storage != nullptr) {
             valid_edge_storage->shuffle(valid_edge_weights_storage);
@@ -240,14 +238,15 @@ std::map<std::string, shared_ptr<Storage>> initializeEdges(shared_ptr<StorageCon
         }
     }
 
-    std::map<std::string, shared_ptr<Storage>> storage_ptrs{{"train_edge_storage", train_edge_storage},
-                                                            {"train_edge_storage_dst_sort", train_edge_storage_dst_sort},
-                                                            {"valid_edge_storage", valid_edge_storage},
-                                                            {"test_edge_storage", test_edge_storage},
+    std::map<std::string, shared_ptr<Storage>> storage_ptrs{{"train_edges", train_edge_storage},
+                                                            {"train_edges_dst_sort", train_edge_storage_dst_sort},
+                                                            {"validation_edges", valid_edge_storage},
+                                                            {"test_edges", test_edge_storage},
                                                             {"train_edge_weights_storage", train_edge_weights_storage},
                                                             {"valid_edge_weights_storage", valid_edge_weights_storage},
                                                             {"test_edge_weights_storage", test_edge_weights_storage},
                                                             {"train_edge_dst_sort_weights_storage", train_edge_dst_sort_weights_storage}};
+    
     return storage_ptrs;
 }
 
@@ -486,7 +485,9 @@ shared_ptr<GraphModelStorage> initializeStorageLinkPrediction(shared_ptr<Model>
     storage_ptrs.train_edges_dst_sort = edge_storages["train_edges_dst_sort"];
     storage_ptrs.validation_edges = edge_storages["validation_edges"];
     storage_ptrs.test_edges = edge_storages["test_edges"];
+
     storage_ptrs.train_edges_weights = edge_storages["train_edge_weights_storage"];
+    storage_ptrs.train_edges_dst_sort_weights = edge_storages["train_edge_dst_sort_weights_storage"];
     storage_ptrs.validation_edges_weights = edge_storages["valid_edge_weights_storage"];
     storage_ptrs.test_edges_weights = edge_storages["test_edge_weights_storage"];
 
@@ -512,7 +513,11 @@ shared_ptr<GraphModelStorage> initializeStorageNodeClassification(shared_ptr<Mod
     storage_ptrs.train_edges = edge_storages["train_edges"];
     storage_ptrs.train_edges_dst_sort = edge_storages["train_edges_dst_sort"];
     storage_ptrs.edges = storage_ptrs.train_edges;
+
     storage_ptrs.train_edges_weights = edge_storages["train_edge_weights_storage"];
+    storage_ptrs.edges_weights = storage_ptrs.train_edges_weights;
+    storage_ptrs.train_edges_dst_sort_weights = edge_storages["train_edge_dst_sort_weights_storage"];
+
 
     storage_ptrs.train_nodes = std::get<0>(node_id_storages);
     storage_ptrs.valid_nodes = std::get<1>(node_id_storages);
diff --git a/src/cpp/third_party/CMakeLists.txt b/src/cpp/third_party/CMakeLists.txt
deleted file mode 100644
index 1423a979..00000000
--- a/src/cpp/third_party/CMakeLists.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-function(initialize_submodule DIRECTORY)
-    if(NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${DIRECTORY}/.git)
-        find_package(Git QUIET REQUIRED)
-        message(STATUS "${CMAKE_CURRENT_SOURCE_DIR}/${DIRECTORY}/.git does not exist. Initializing ${DIRECTORY} submodule ...")
-        execute_process(COMMAND ${GIT_EXECUTABLE} submodule update --init ${DIRECTORY}
-                WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
-                RESULT_VARIABLE GIT_EXIT_CODE)
-        if(NOT GIT_EXIT_CODE EQUAL "0")
-            message(FATAL_ERROR "${GIT_EXECUTABLE} submodule update --init dependencies/${DIRECTORY} failed with exit code ${GIT_EXIT_CODE}, please checkout submodules")
-        endif()
-    endif()
-endfunction(initialize_submodule)
-
-initialize_submodule(pybind11)
-initialize_submodule(spdlog)
-initialize_submodule(googletest)
-initialize_submodule(parallel-hashmap)
-
-add_subdirectory(googletest EXCLUDE_FROM_ALL)
-add_subdirectory(pybind11 EXCLUDE_FROM_ALL)
-add_subdirectory(spdlog EXCLUDE_FROM_ALL)
diff --git a/src/cpp/third_party/googletest b/src/cpp/third_party/googletest
deleted file mode 160000
index 23ef2955..00000000
--- a/src/cpp/third_party/googletest
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 23ef29555ef4789f555f1ba8c51b4c52975f0907
diff --git a/src/cpp/third_party/parallel-hashmap b/src/cpp/third_party/parallel-hashmap
deleted file mode 160000
index f7fbefac..00000000
--- a/src/cpp/third_party/parallel-hashmap
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit f7fbefac75c9f29538bfe7b65f3b67c082c127d6
diff --git a/src/cpp/third_party/pybind11 b/src/cpp/third_party/pybind11
deleted file mode 160000
index e08a5811..00000000
--- a/src/cpp/third_party/pybind11
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit e08a58111dbea38d667b209f7543864d51a3b185
diff --git a/src/cpp/third_party/spdlog b/src/cpp/third_party/spdlog
deleted file mode 160000
index c5abaedd..00000000
--- a/src/cpp/third_party/spdlog
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit c5abaeddcaa67e885de99db583396899a0cf43e6
diff --git a/src/python/tools/configuration/marius_config.py b/src/python/tools/configuration/marius_config.py
index a9a0a322..fd82740f 100644
--- a/src/python/tools/configuration/marius_config.py
+++ b/src/python/tools/configuration/marius_config.py
@@ -724,6 +724,7 @@ def merge(self, input_config: DictConfig):
 @dataclass
 class TrainingConfig:
     batch_size: int = 1000
+    batches_per_epoch: int = -1
     negative_sampling: NegativeSamplingConfig = MISSING
     num_epochs: int = 10
     pipeline: PipelineConfig = PipelineConfig()
@@ -776,6 +777,7 @@ def merge(self, input_config: DictConfig):
 @dataclass
 class EvaluationConfig:
     batch_size: int = 1000
+    batches_per_epoch: int = -1
     negative_sampling: NegativeSamplingConfig = MISSING
     pipeline: PipelineConfig = PipelineConfig()
     epochs_per_eval: int = 1
@@ -946,4 +948,4 @@ def load_config(input_config_path, save=False):
     check_gnn_layers_alignment(output_config)
     check_full_graph_evaluation(output_config)
 
-    return output_config
+    return output_config
\ No newline at end of file
diff --git a/test/cpp/unit/data/samplers/test_negative.cpp b/test/cpp/unit/data/samplers/test_negative.cpp
deleted file mode 100644
index c025f081..00000000
--- a/test/cpp/unit/data/samplers/test_negative.cpp
+++ /dev/null
@@ -1,234 +0,0 @@
-//
-// Created by Jason Mohoney on 2/9/22.
-//
-
-#include <data/samplers/negative.h>
-#include <gtest/gtest.h>
-
-int num_nodes = 6;
-
-torch::Tensor edges = torch::tensor({{0, 2}, {0, 4}, {1, 3}, {1, 5}, {4, 2}, {5, 2}}, torch::kInt64);
-
-torch::Tensor typed_edges = torch::tensor({{0, 0, 2}, {0, 1, 4}, {1, 1, 3}, {1, 0, 5}, {4, 0, 2}, {5, 1, 2}}, torch::kInt64);
-
-torch::Tensor batch_edges = torch::tensor({{1, 5}, {0, 2}, {4, 2}}, torch::kInt64);
-torch::Tensor batch_typed_edges = torch::tensor({{1, 0, 5}, {0, 0, 2}, {4, 0, 2}}, torch::kInt64);
-
-class CorruptNodeNegativeSamplerTest : public ::testing::Test {
-   protected:
-    shared_ptr<MariusGraph> graph;
-    shared_ptr<MariusGraph> typed_graph;
-
-    void SetUp() override {
-        torch::Tensor dst_sorted_edges = edges.index_select(0, edges.select(1, 1).argsort(0));
-        torch::Tensor dst_sorted_typed_edges = typed_edges.index_select(0, typed_edges.select(1, 2).argsort(0));
-
-        graph = std::make_shared<MariusGraph>(edges, dst_sorted_edges, num_nodes);
-        typed_graph = std::make_shared<MariusGraph>(typed_edges, dst_sorted_typed_edges, num_nodes);
-
-        graph->sortAllEdges(torch::tensor({{0, 3}}, torch::kInt64));
-        typed_graph->sortAllEdges(torch::tensor({{0, 1, 3}}, torch::kInt64));
-    }
-};
-
-void validate_sample(shared_ptr<CorruptNodeNegativeSampler> sampler, torch::Tensor sample, shared_ptr<MariusGraph> graph) {
-    // validate shape
-    ASSERT_EQ(sample.size(0), sampler->num_chunks_);
-
-    if (sampler->num_negatives_ != -1) {
-        ASSERT_EQ(sample.size(1), sampler->num_negatives_);
-    } else {
-        ASSERT_EQ(sample.size(1), graph->num_nodes_in_memory_);
-    }
-
-    // validate max and min ids
-    ASSERT_TRUE(sample.max().item<int64_t>() < graph->num_nodes_in_memory_);
-    ASSERT_TRUE(sample.min().item<int64_t>() >= 0);
-}
-
-void validate_filter_local(torch::Tensor filter, torch::Tensor sample, torch::Tensor edges_t, bool inverse) {
-    // check filtered edges are present in the graph
-    auto batch_accessor = edges_t.accessor<int64_t, 2>();
-    auto sample_accessor = sample.accessor<int64_t, 2>();
-    auto filter_accessor = filter.accessor<int64_t, 2>();
-
-    bool has_relations = false;
-    if (edges_t.size(1) == 3) {
-        has_relations = true;
-    }
-
-    int64_t num_chunks = sample.size(0);
-    int64_t num_edges = edges_t.size(0);
-    int64_t chunk_size = ceil((double)num_edges / num_chunks);
-
-    for (int i = 0; i < filter.size(0); i++) {
-        int64_t src;
-        int64_t rel;
-        int64_t dst;
-
-        bool found = false;
-
-        int64_t edge_id = filter_accessor[i][0];
-
-        int chunk_id = edge_id / chunk_size;
-
-        if (inverse) {
-            src = sample_accessor[chunk_id][filter_accessor[i][1]];
-            if (has_relations) {
-                rel = batch_accessor[edge_id][1];
-                dst = batch_accessor[edge_id][2];
-            } else {
-                dst = batch_accessor[edge_id][1];
-            }
-        } else {
-            src = batch_accessor[edge_id][0];
-            dst = sample_accessor[chunk_id][filter_accessor[i][1]];
-            if (has_relations) {
-                rel = batch_accessor[edge_id][1];
-            }
-        }
-
-        if (has_relations) {
-            for (int k = 0; k < edges_t.size(0); k++) {
-                if (batch_accessor[k][0] == src && batch_accessor[k][1] == rel && batch_accessor[k][2] == dst) {
-                    found = true;
-                }
-            }
-        } else {
-            for (int k = 0; k < edges_t.size(0); k++) {
-                if (batch_accessor[k][0] == src && batch_accessor[k][1] == dst) {
-                    found = true;
-                }
-            }
-        }
-        ASSERT_TRUE(found);
-    }
-}
-
-void validate_filter_global(torch::Tensor filter, torch::Tensor sample, shared_ptr<MariusGraph> graph, torch::Tensor edges_t, bool inverse) {
-    // check filtered edges are present in the graph
-    auto graph_accessor = graph->src_sorted_edges_.accessor<int64_t, 2>();
-    auto batch_accessor = edges_t.accessor<int64_t, 2>();
-    auto sample_accessor = sample.accessor<int64_t, 2>();
-    auto filter_accessor = filter.accessor<int64_t, 2>();
-
-    bool has_relations = false;
-    if (edges_t.size(1) == 3) {
-        has_relations = true;
-    }
-
-    int64_t num_chunks = sample.size(0);
-    int64_t num_edges = edges_t.size(0);
-    int64_t chunk_size = ceil((double)num_edges / num_chunks);
-
-    for (int i = 0; i < filter.size(0); i++) {
-        int64_t src;
-        int64_t rel;
-        int64_t dst;
-
-        bool found = false;
-
-        int64_t edge_id = filter_accessor[i][0];
-
-        int chunk_id = edge_id / chunk_size;
-
-        if (inverse) {
-            src = sample_accessor[chunk_id][filter_accessor[i][1]];
-            if (has_relations) {
-                rel = batch_accessor[edge_id][1];
-                dst = batch_accessor[edge_id][2];
-            } else {
-                dst = batch_accessor[edge_id][1];
-            }
-        } else {
-            src = batch_accessor[edge_id][0];
-            dst = sample_accessor[chunk_id][filter_accessor[i][1]];
-            if (has_relations) {
-                rel = batch_accessor[edge_id][1];
-            }
-        }
-
-        if (has_relations) {
-            for (int k = 0; k < graph->src_sorted_edges_.size(0); k++) {
-                if (graph_accessor[k][0] == src && graph_accessor[k][1] == rel && graph_accessor[k][2] == dst) {
-                    found = true;
-                }
-            }
-        } else {
-            for (int k = 0; k < graph->src_sorted_edges_.size(0); k++) {
-                if (graph_accessor[k][0] == src && graph_accessor[k][1] == dst) {
-                    found = true;
-                }
-            }
-        }
-        ASSERT_TRUE(found);
-    }
-}
-
-void test_unfiltered_corruption_sampler(shared_ptr<CorruptNodeNegativeSampler> sampler, shared_ptr<MariusGraph> graph, torch::Tensor edges_t) {
-    torch::Tensor sample;
-    torch::Tensor filter;
-
-    std::tie(sample, filter) = sampler->getNegatives(graph, edges_t, false);
-    validate_sample(sampler, sample, graph);
-    validate_filter_local(filter, sample, edges_t, false);
-
-    std::tie(sample, filter) = sampler->getNegatives(graph, edges_t, true);
-    validate_sample(sampler, sample, graph);
-    validate_filter_local(filter, sample, edges_t, true);
-}
-
-void test_filtered_corruption_sampler(shared_ptr<CorruptNodeNegativeSampler> sampler, shared_ptr<MariusGraph> graph, torch::Tensor edges_t) {
-    torch::Tensor sample;
-    torch::Tensor filter;
-
-    std::tie(sample, filter) = sampler->getNegatives(graph, edges_t, false);
-    validate_sample(sampler, sample, graph);
-    validate_filter_global(filter, sample, graph, edges_t, false);
-
-    std::tie(sample, filter) = sampler->getNegatives(graph, edges_t, true);
-    validate_sample(sampler, sample, graph);
-    validate_filter_global(filter, sample, graph, edges_t, true);
-}
-
-TEST_F(CorruptNodeNegativeSamplerTest, TestUniform) {
-    auto corrupt_uniform = std::make_shared<CorruptNodeNegativeSampler>(1, 5, 0.0, false);
-    test_unfiltered_corruption_sampler(corrupt_uniform, graph, batch_edges);
-    test_unfiltered_corruption_sampler(corrupt_uniform, typed_graph, batch_typed_edges);
-}
-
-TEST_F(CorruptNodeNegativeSamplerTest, TestUniformChunked) {
-    auto corrupt_uniform_chunked = std::make_shared<CorruptNodeNegativeSampler>(3, 5, 0.0, false);
-    test_unfiltered_corruption_sampler(corrupt_uniform_chunked, graph, batch_edges);
-    test_unfiltered_corruption_sampler(corrupt_uniform_chunked, typed_graph, batch_typed_edges);
-}
-
-TEST_F(CorruptNodeNegativeSamplerTest, TestMix) {
-    auto corrupt_mix = std::make_shared<CorruptNodeNegativeSampler>(1, 5, 0.5, false);
-    test_unfiltered_corruption_sampler(corrupt_mix, graph, batch_edges);
-    test_unfiltered_corruption_sampler(corrupt_mix, typed_graph, batch_typed_edges);
-}
-
-TEST_F(CorruptNodeNegativeSamplerTest, TestMixChunked) {
-    auto corrupt_mix_chunked = std::make_shared<CorruptNodeNegativeSampler>(3, 5, 0.5, false);
-    test_unfiltered_corruption_sampler(corrupt_mix_chunked, graph, batch_edges);
-    test_unfiltered_corruption_sampler(corrupt_mix_chunked, typed_graph, batch_typed_edges);
-}
-
-TEST_F(CorruptNodeNegativeSamplerTest, TestAllDegree) {
-    auto corrupt_all_degree = std::make_shared<CorruptNodeNegativeSampler>(1, 5, 1.0, false);
-    test_unfiltered_corruption_sampler(corrupt_all_degree, graph, batch_edges);
-    test_unfiltered_corruption_sampler(corrupt_all_degree, typed_graph, batch_typed_edges);
-}
-
-TEST_F(CorruptNodeNegativeSamplerTest, TestAllDegreeChunked) {
-    auto corrupt_all_degree_chunked = std::make_shared<CorruptNodeNegativeSampler>(3, 5, 1.0, false);
-    test_unfiltered_corruption_sampler(corrupt_all_degree_chunked, graph, batch_edges);
-    test_unfiltered_corruption_sampler(corrupt_all_degree_chunked, typed_graph, batch_typed_edges);
-}
-
-TEST_F(CorruptNodeNegativeSamplerTest, TestFilter) {
-    auto corrupt_filtered = std::make_shared<CorruptNodeNegativeSampler>(1, -1, 0.0, true);
-    test_filtered_corruption_sampler(corrupt_filtered, graph, batch_edges);
-    test_filtered_corruption_sampler(corrupt_filtered, typed_graph, batch_typed_edges);
-}
\ No newline at end of file