Merge branch 'main' into Add_preprocess_comments

marius-team · Aug 26, 2021 · 3dcd491 · 3dcd491
2 parents 7a4dde4 + 7133811
commit 3dcd491
Show file tree

Hide file tree

Showing 169 changed files with 968 additions and 565 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -9,8 +9,8 @@ include(FindPackageHandleStandardArgs)
 add_compile_definitions(_GLIBCXX_USE_CXX11_ABI=0)
 
 if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
-    if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 12.0)
-        message(FATAL_ERROR "Clang version must be at least 12!")
+    if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 11.0)
+        message(FATAL_ERROR "Clang version must be at least 11!")
     endif()
     set(CLANG TRUE)
 elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")

diff --git a/README.md b/README.md
@@ -1,23 +1,33 @@
 # Marius #
 
-Marius is a system under active development for training embeddings for large-scale graphs on a single machine.
-
-Training on large scale graphs requires a large amount of data movement to get embedding parameters from storage to the computational device. 
-Marius is designed to mitigate/reduce data movement overheads using:
-- Pipelined training and IO
-- Partition caching and buffer-aware data orderings
+Marius is a system for large-scale graph learning. The system is currently in the alpha phase and is under active development.
 
 Details on how Marius works can be found in our [OSDI '21 Paper](https://arxiv.org/abs/2101.08358), where experiment scripts and configurations can be found in the `osdi2021` branch.
 
+Currently we support:
+- Large-scale link prediction training
+- Preprocessing and training of datasets in CSV format (single-file)
+- Configuration file based API
+- Single GPU training and evaluation
+- Dataset sizes that fit in: GPU memory, CPU memory, and Disk.
+
+See `docs/user_guide` for more details.
+
+We are working on expanding the functionality of Marius to include:
+- Graph neural network support
+- Multi-GPU training
+- Node classification
+- Python API for user defined models, sampling and training procedures
+
 ## Requirements ##
 (Other versions may work, but are untested)
 * Ubuntu 18.04 or MacOS 10.15 
-* CUDA 10.1 or 10.2 (If using GPU training)
-* CuDNN 7 (If using GPU training)
+* CUDA >= 10 (If using GPU training)
 * pytorch >= 1.7
 * python >= 3.6
 * pip >= 21
-* GCC >= 9 (On Linux) or Clang 12.0 (On MacOS)
+* GCC >= 9 (On Linux) 
+* Clang >= 11 (On MacOS)
 * cmake >= 3.12
 * make >= 3.8
 
@@ -179,4 +189,17 @@ Arxiv Version:
       primaryClass={cs.LG}
 }
 ```
-OSDI Version (not yet available):
+OSDI Version:
+```
+@inproceedings {273733,
+                author = {Jason Mohoney and Roger Waleffe and Henry Xu and Theodoros Rekatsinas and Shivaram Venkataraman},
+                title = {Marius: Learning Massive Graph Embeddings on a Single Machine},
+                booktitle = {15th {USENIX} Symposium on Operating Systems Design and Implementation ({OSDI} 21)},
+                year = {2021},
+                isbn = {978-1-939133-22-9},
+                pages = {533--549},
+                url = {https://www.usenix.org/conference/osdi21/presentation/mohoney},
+                publisher = {{USENIX} Association},
+                month = jul,
+}
+```
diff --git a/docs/cpp_api/datatypes.rst b/docs/cpp_api/datatypes.rst
@@ -57,5 +57,8 @@ Datatypes
 .. doxygenenum:: LossFunctionType
     :project: Marius
 
+.. doxygenenum:: ReductionType
+    :project: Marius
+
 .. doxygenenum:: RelationOperatorType
     :project: Marius
diff --git a/docs/cpp_api/decoder.rst b/docs/cpp_api/decoder.rst
@@ -58,27 +58,6 @@ Decoder
 	:protected-members:
 	:undoc-members:
 
-.. doxygenclass:: LossFunction
-	:project: Marius
-	:members:
-	:private-members:
-	:protected-members:
-	:undoc-members:
-
-.. doxygenclass:: SoftMax
-	:project: Marius
-	:members:
-	:private-members:
-	:protected-members:
-	:undoc-members:
-
-.. doxygenclass:: RankingLoss
-	:project: Marius
-	:members:
-	:private-members:
-	:protected-members:
-	:undoc-members:
-
 .. doxygenclass:: LinkPredictionDecoder
 	:project: Marius
 	:members:

diff --git a/docs/cpp_api/loss.rst b/docs/cpp_api/loss.rst
@@ -0,0 +1,52 @@
+.. _loss:
+
+LossFunction
+********************
+.. doxygenclass:: LossFunction
+	:project: Marius
+	:members:
+	:private-members:
+	:protected-members:
+	:undoc-members:
+
+.. doxygenclass:: SoftMax
+	:project: Marius
+	:members:
+	:private-members:
+	:protected-members:
+	:undoc-members:
+
+.. doxygenclass:: RankingLoss
+	:project: Marius
+	:members:
+	:private-members:
+	:protected-members:
+	:undoc-members:
+
+.. doxygenclass:: BCEAfterSigmoidLoss
+	:project: Marius
+	:members:
+	:private-members:
+	:protected-members:
+	:undoc-members:
+
+.. doxygenclass:: BCEWithLogitsLoss
+	:project: Marius
+	:members:
+	:private-members:
+	:protected-members:
+	:undoc-members:
+
+.. doxygenclass:: MSELoss
+	:project: Marius
+	:members:
+	:private-members:
+	:protected-members:
+	:undoc-members:
+
+.. doxygenclass:: SoftPlusLoss
+	:project: Marius
+	:members:
+	:private-members:
+	:protected-members:
+	:undoc-members:
diff --git a/docs/user_guide/configuration.rst b/docs/user_guide/configuration.rst
@@ -142,8 +142,6 @@ learning_rate                float   No        .1
 regularization_coef          float   No        2e-6                                                     Coefficient to scale the regularization loss.
 regularization_norm          int     No        2                                                        Norm of the regularization.
 optimizer                    string  No        Adagrad      [Adagrad]                                   Currently Adagrad is the only supported optimizer.
-loss                         string  No        SoftMax      [SoftMax, Ranking]                          Sets the loss function. The Ranking loss can be tuned with the margin parameter.
-margin                       float   No        0                                                        Sets the margin for the Ranking loss function
 average_gradients            bool    No        false                                                    If true, the gradients will be averaged when accumulating gradients for a batch. If false, the gradients will be summed.
 synchronous                  bool    No        false                                                    If true, the training will be performed synchronously without use of the training pipeline. If false, the training pipeline will be used. If embedding data is stored in HostMemory or the PartitionBuffer, synchronous training will be slow due to data movement wait times.
 num_epochs                   int     No        10                                                       The number of epochs to train to.
@@ -152,6 +150,23 @@ shuffle_interval             int     No        1
 ===========================  ======  ========  ==========  ===========================================  ===================
 
 
+.. _loss_option:
+
+[loss]
+^^^^^^
+
+The loss section allows for setting loss function options. 
+
+===========================  ======  ========  ==========  ============================================================================================================================================================================================================  ===================
+   Name                      Type    Required  Default     Valid Values                                                                                                                                                                                                  Description
+---------------------------  ------  --------  ----------  ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------  -------------------
+loss                         string  No        SoftMax      [SoftMax, Ranking, BCEAfterSigmoid, BCEWithLogits, MSE, SoftPlus]  Sets the loss function. The Ranking loss can be tuned with the margin parameter.
+margin                       float   No        0                                                                                                                                                                                                                         Sets the margin for the Ranking loss function.
+reduction                    string  No        Mean         [Mean, Sum]                                                                                                                                                                                                  Sets the reduction to apply to the loss.
+===========================  ======  ========  ==========  ============================================================================================================================================================================================================  ===================                                                      
+
+
+
 [training_pipeline]
 ^^^^^^^^^^^^^^^^^^^
 
@@ -301,8 +316,6 @@ Here we show the defaults for each configuration options in .ini format.
     regularization_coef=2e-6
     regularization_norm=2
     optimizer=Adagrad
-    loss=SoftMax
-    margin=0
     average_gradients=false
     synchronous=false
     num_epochs=10
@@ -311,6 +324,11 @@ Here we show the defaults for each configuration options in .ini format.
     checkpoint_interval=9999
     shuffle_interval=1
 
+    [loss]
+    loss=SoftMax
+    margin=0
+    reduction=Mean
+
     [training_pipeline]
     max_batches_in_flight=16
     embeddings_host_queue_size=4
@@ -359,4 +377,4 @@ Here we show the defaults for each configuration options in .ini format.
 
     [reporting]
     logs_per_epoch=10
-    log_level=info
+    log_level=info
diff --git a/examples/training/configs/codex_l_cpu.ini b/examples/training/configs/codex_l_cpu.ini
@@ -23,15 +23,17 @@ negatives=512
 degree_fraction=.5
 learning_rate=.1
 optimizer=Adagrad
-loss=SoftMax
 synchronous=false
 num_epochs=10
 shuffle_interval=1
 
+[loss]
+loss=SoftMax
+
 [training_pipeline]
 max_batches_in_flight=16
 num_embedding_loader_threads=2
-num_compute_threads=4 
+num_compute_threads=4
 num_embedding_update_threads=2
 
 [evaluation]
@@ -55,3 +57,4 @@ relations_ids=./output_dir/rel_mapping.bin
 [reporting]
 logs_per_epoch=10
 log_level=info
+
diff --git a/examples/training/configs/codex_l_gpu.ini b/examples/training/configs/codex_l_gpu.ini
@@ -23,11 +23,13 @@ negatives=512
 degree_fraction=.5
 learning_rate=.1
 optimizer=Adagrad
-loss=SoftMax
 synchronous=true
 num_epochs=10
 shuffle_interval=1
 
+[loss]
+loss=SoftMax
+
 [evaluation]
 batch_size=1000
 number_of_chunks=1
@@ -49,4 +51,5 @@ relations_ids=./output_dir/rel_mapping.bin
 
 [reporting]
 logs_per_epoch=10
-log_level=info
+log_level=info
+
diff --git a/examples/training/configs/codex_l_multi_gpu.ini b/examples/training/configs/codex_l_multi_gpu.ini
@@ -25,11 +25,13 @@ degree_fraction=.5
 negative_sampling_access=Uniform
 learning_rate=.1
 optimizer=Adagrad
-loss=SoftMax
 synchronous=false
 num_epochs=10
 shuffle_interval=1
 
+[loss]
+loss=SoftMax
+
 [training_pipeline]
 max_batches_in_flight=16
 embeddings_host_queue_size=4
@@ -38,7 +40,7 @@ gradients_host_queue_size=4
 gradients_device_queue_size=4
 num_embedding_loader_threads=2
 num_embedding_transfer_threads=2
-num_compute_threads=2                  
+num_compute_threads=2
 num_gradient_transfer_threads=2
 num_embedding_update_threads=2
 
@@ -58,7 +60,7 @@ embeddings_host_queue_size=8
 embeddings_device_queue_size=8
 num_embedding_loader_threads=4
 num_embedding_transfer_threads=4
-num_evaluate_threads=2             
+num_evaluate_threads=2
 
 [path]
 base_directory=data/
@@ -70,4 +72,5 @@ relations_ids=./output_dir/rel_mapping.bin
 
 [reporting]
 logs_per_epoch=10
-log_level=info
+log_level=info
+
diff --git a/examples/training/configs/codex_m_cpu.ini b/examples/training/configs/codex_m_cpu.ini
@@ -23,15 +23,17 @@ negatives=512
 degree_fraction=.5
 learning_rate=.1
 optimizer=Adagrad
-loss=SoftMax
 synchronous=false
 num_epochs=10
 shuffle_interval=1
 
+[loss]
+loss=SoftMax
+
 [training_pipeline]
 max_batches_in_flight=16
 num_embedding_loader_threads=2
-num_compute_threads=4 
+num_compute_threads=4
 num_embedding_update_threads=2
 
 [evaluation]
@@ -54,4 +56,5 @@ relations_ids=./output_dir/rel_mapping.bin
 
 [reporting]
 logs_per_epoch=10
-log_level=info
+log_level=info
+
diff --git a/examples/training/configs/codex_m_gpu.ini b/examples/training/configs/codex_m_gpu.ini
@@ -23,11 +23,13 @@ negatives=512
 degree_fraction=.5
 learning_rate=.1
 optimizer=Adagrad
-loss=SoftMax
 synchronous=true
 num_epochs=10
 shuffle_interval=1
 
+[loss]
+loss=SoftMax
+
 [evaluation]
 batch_size=1000
 number_of_chunks=1
@@ -49,4 +51,5 @@ relations_ids=./output_dir/rel_mapping.bin
 
 [reporting]
 logs_per_epoch=10
-log_level=info
+log_level=info
+