Skip to content

Commit

Permalink
Numa benchmarking code
Browse files Browse the repository at this point in the history
  • Loading branch information
dsarda committed Sep 11, 2024
1 parent 46954c2 commit 08af4bb
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 4 deletions.
25 changes: 25 additions & 0 deletions .bash_history
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
ls
pwd
cd /root/
ls
numact --hardware
apt install -y numactl
numactl --hardware
apt-get install -y libnuma-dev
ls
cd numa_benchmarking/
g++ -o numa_test_runner numa_test.cpp -lnuma -std=c++9
g++ -o numa_test_runner numa_test.cpp -lnuma -std=c++11
ls
./numa_test_runner
exit
exit
ls
clear
cd numa_benchmarking/
g++ -o numa_test_runner num_test.cpp -lnuma
ls
g++ -o numa_test_runner numa_test.cpp -lnuma
chmod ugo+x ./numa_test_runner
./numa_test_runner
exit
2 changes: 2 additions & 0 deletions .gitconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[safe]
directory = *
11 changes: 7 additions & 4 deletions examples/docker/cpu_ubuntu/dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
FROM ubuntu:22.04
RUN apt update
RUN apt-get update && apt update -y && apt upgrade -y

RUN apt install -y g++ \
make \
Expand All @@ -8,7 +8,8 @@ RUN apt install -y g++ \
vim \
git \
dstat \
python3-pip
numactl \
python3-pip --fix-missing

# install gcc-9
RUN apt install -y software-properties-common
Expand All @@ -27,5 +28,7 @@ RUN ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake
# install pytorch
RUN python3 -m pip install torch --index-url https://download.pytorch.org/whl/cpu

RUN mkdir /working_dir
WORKDIR /working_dir
RUN mkdir -p /root
WORKDIR /root

RUN apt-get install -y libnuma-dev
41 changes: 41 additions & 0 deletions numa_benchmarking/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
cmake_minimum_required(VERSION 3.20)
project(SimSIMDExample)

set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

include(FetchContent)

# Declare and fetch SimSIMD
FetchContent_Declare(
simsimd
GIT_REPOSITORY https://github.com/ashvardanian/simsimd.git
GIT_SHALLOW TRUE
GIT_TAG main
)
FetchContent_MakeAvailable(simsimd)

# Find libnuma
find_package(PkgConfig REQUIRED)
pkg_check_modules(NUMA REQUIRED numa)

# Add your executable
add_executable(main main.cpp)

# Link SimSIMD and libnuma to your executable
target_link_libraries(main PRIVATE simsimd ${NUMA_LIBRARIES})

# Include SimSIMD and libnuma headers
target_include_directories(main PRIVATE
${simsimd_SOURCE_DIR}/include
${NUMA_INCLUDE_DIRS}
)

# Add compile options for libnuma
target_compile_options(main PRIVATE ${NUMA_CFLAGS_OTHER})

# Add preprocessor definitions to handle missing types
target_compile_definitions(main PRIVATE
SIMSIMD_NATIVE_F16=0
SIMSIMD_NATIVE_BF16=0
)
72 changes: 72 additions & 0 deletions numa_benchmarking/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#include <iostream>
#include <chrono>
#include <cstring>
#include <numa.h>
#include <simsimd/simsimd.h>


// Define the necessary constants
const int vector_size = 128;
const size_t ONE_GB = 1024 * 1024 * 1024;
const float NUM_CHUNKS = 10;
const size_t BUFFER_SIZE = NUM_CHUNKS * ONE_GB; // Buffer size of 10 GB

void benchmark_scan_list(float* query_vec, float* search_vectors, size_t num_vectors, std::string benchmark_name) {
float total_distance = 0.0;
double dist_result;

// Run the actual benchmark
auto start = std::chrono::high_resolution_clock::now();
for (size_t j = 0; j < num_vectors; j++) {
float* curr_search_vec = search_vectors + j * vector_size;
simsimd_l2sq_f32(query_vec, curr_search_vec, vector_size, &dist_result);
total_distance += dist_result;
}
auto end = std::chrono::high_resolution_clock::now();
float time_taken = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();

std::cout << "Benchmark " << benchmark_name << ": Got average distance of " << total_distance/num_vectors << " at throughput of " << num_vectors/time_taken << " vectors/ms" << std::endl;
}

int main() {
if (numa_available() < 0) {
std::cerr << "NUMA is not available on this system" << std::endl;
return 1;
}

// Ensure we have at least 2 NUMA nodes
if (numa_max_node() < 1) {
std::cerr << "This system doesn't have at least 2 NUMA nodes" << std::endl;
return 1;
}

if(numa_run_on_node(0) != 0) {
std::cerr << "Failed to force worker to run on node 0" << std::endl;
}

// Initialize the query vector
float query_vector[vector_size];

// Create the target vector on both nodes
float* node_zero_vectors = reinterpret_cast<float*>(numa_alloc_onnode(BUFFER_SIZE, 0));
float* node_one_vectors = reinterpret_cast<float*>(numa_alloc_onnode(BUFFER_SIZE, 1));
if (!node_zero_vectors || !node_one_vectors) {
std::cerr << "Failed to allocate memory on numa nodes" << std::endl;
numa_free(node_zero_vectors, BUFFER_SIZE);
numa_free(node_one_vectors, BUFFER_SIZE);
return 1;
}
size_t single_vector_size = vector_size * sizeof(float);
size_t num_vectors = BUFFER_SIZE/single_vector_size;

// Run the cross node benchmark
std::cout << "Running benchmarking for buffer size of " << NUM_CHUNKS << " GB" << std::endl;
benchmark_scan_list(query_vector, node_one_vectors, num_vectors, "Node1 Vectors");
numa_free(node_one_vectors, BUFFER_SIZE);

// Run the same node benchmark
benchmark_scan_list(query_vector, node_zero_vectors, num_vectors, "Node0 Vectors");
numa_free(node_zero_vectors, BUFFER_SIZE);

return 0;
}

0 comments on commit 08af4bb

Please sign in to comment.