Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fault Tolerance for LULESH #3

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Build the C examples
FILE(GLOB SRCS *.cc)
FILE(GLOB HEADERS *.h)

add_executable ("lulesh"
${SRCS}
${HEADERS}
"../util/fault-tolerance-options.h"
"../util/fault-tolerance-options.c"
)

target_compile_options("lulesh"
PRIVATE
"-Wall"
"-Wextra"
)

add_definitions("-DUSE_MPI=1" "-DREPARTITIONING" "-DFAULT_TOLERANCE")

foreach(SOURCE ${SRCS} ${HEADERS})
set_source_files_properties( ${SOURCE} PROPERTIES COMPILE_FLAGS "-Wno-error=implicit-fallthrough -Wno-error=unused-parameter -Wno-error=unused-but-set-parameter -Wno-error=unknown-pragmas" )
endforeach()

target_link_libraries ("lulesh"
PRIVATE "laik"
PRIVATE "m"
PRIVATE "mpi"
)

345 changes: 254 additions & 91 deletions laik-lulesh-repartition.cc

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions laik_partitioners.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#include <laik_partitioners.h>
#include <lulesh.h>
#include "laik_partitioners.h"
#include "lulesh.h"

/**
* @brief Exclusiv Partitioner
Expand Down
92 changes: 89 additions & 3 deletions laik_vector.cc
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
#include <laik_vector.h>
#include "laik_vector.h"
#include "laik_vector_comm_exclusive_halo.h"
#include "laik_vector_comm_overlapping_overlapping.h"
#include "laik_vector_repart_exclusive.h"
#include "laik_vector_repart_overlapping.h"

#include <laik_partitioners.h>
#include <lulesh.h>
#include "laik_partitioners.h"
#include "lulesh.h"
#include <limits.h>
#include <type_traits>
#include <string.h>
#include <assert.h>
#include <iostream>
#include <inttypes.h>

template <typename T>
laik_vector<T>::laik_vector(Laik_Instance* inst, Laik_Group* world, Laik_Space* indexSpace, Laik_Partitioning *p1, Laik_Partitioning *p2, Laik_Transition* t1, Laik_Transition* t2, Laik_ReductionOperation operation):reduction_operation(operation){
Expand Down Expand Up @@ -44,6 +47,89 @@ void laik_vector<T>::test_print(){
template <typename T>
void laik_vector<T>::clear(){}

template<typename T>
void laik_vector<T>::copyLaikDataToVector(std::vector<T> &data_vector) {
uint64_t cnt;
T *base;
// copy the data back into the stl vectors
// int nSlices = laik_my_slicecount(this->p1);
int nSlices = laik_my_slicecount(laik_data_get_partitioning(this->data));
for (int n = 0; n < nSlices; n++) {
assert(this->data != NULL);
assert(laik_data_get_partitioning(this->data) != nullptr);
assert(laik_my_slicecount(laik_data_get_partitioning(this->data)) == nSlices);
laik_get_map_1d(this->data, n, (void **) &base, &cnt);
uint64_t elemOffset = n * cnt;
laik_log(LAIK_LL_Debug, "Copy LAIK data to vector: vector (capacity) %zu data %" PRIu64
" offset %" PRIu64 " length %" PRIu64, data_vector.capacity(), cnt,
elemOffset, cnt);
assert(elemOffset >= 0 && elemOffset + cnt <= data_vector.capacity());
memcpy(&data_vector[0] + elemOffset, base, cnt * sizeof(T));
//std::copy(data_vector.begin() + n*count ,data_vector.begin() + (n+1)*count-1 , base);
}
}

template<typename T>
void laik_vector<T>::copyVectorToLaikData(std::vector<T> &data_vector) {
uint64_t cnt;
T *base;
// copy the data from stl vector into the laik container
int nSlices = laik_my_slicecount(this->p1);
for (int n = 0; n < nSlices; n++) {
laik_get_map_1d(this->data, n, (void **) &base, &cnt);
// laik_log(LAIK_LL_Info, "Copy vector to LAIK data: vector (size) %lu data %lu", data_vector.size(), cnt);
assert(n * cnt >= 0 && n * cnt + cnt <= data_vector.capacity());
memcpy(base, &data_vector[0] + n * cnt, cnt * sizeof(T));
//std::copy( base, base + cnt, data_vector.begin() + n*count );
}
}

template <typename T>
void laik_vector<T>::resizeVector(std::vector<T> &data_vector) {// resize vector
uint64_t cnt;
T* base;
assert(laik_my_mapcount(laik_data_get_partitioning(this->data)) == 1);
laik_get_map_1d(this->data, 0, (void **)&base, &cnt);
int s = cnt*cnt*cnt;
data_vector.resize(s);
}

template <typename T>
void laik_vector<T>::resizeVectorToLaikData(std::vector<T> &data_vector) {// resize vector
uint64_t cnt = 0;
for (int i = 0; i < laik_my_slicecount(laik_data_get_partitioning(data)); ++i) {
cnt += laik_slice_size(laik_taskslice_get_slice(laik_my_slice(laik_data_get_partitioning(data), i)));
}
laik_log(LAIK_LL_Info, "Resizing vector from %zu to %" PRIu64, data_vector.capacity(), cnt);
data_vector.resize(cnt);
}

template <typename T>
void laik_vector<T>::prepareMigration(bool suppressDataSwitchToP1) {
if(!suppressDataSwitchToP1) {
laik_switchto_partitioning(this->data, this->p1, LAIK_DF_None, LAIK_RO_None);
}
}

#ifdef FAULT_TOLERANCE
template<typename T>
Laik_Checkpoint * laik_vector<T>::checkpoint(int redundancyCount, int rotationDistance) {
// std::cout << "Creating checkpoint of " << laik_my_slicecount(laik_data_get_partitioning(data)) << " slices." << std::endl;
return laik_checkpoint_create(data, laik_Master, redundancyCount, rotationDistance,
laik_data_get_group(data), LAIK_RO_Min);
}

template <typename T>
void laik_vector<T>::restore(Laik_Checkpoint *checkpoint, Laik_Group *newGroup) {
// Set partitioning to backup partitioning so that it can be migrated later
assert(checkpoint->data != nullptr && laik_data_get_partitioning(checkpoint->data) != nullptr);
Laik_Partitioning* newPartitioning = laik_new_partitioning(laik_Master, newGroup, indexSpace, nullptr);
laik_switchto_partitioning(data, newPartitioning, LAIK_DF_None, LAIK_RO_None);
// laik_partitioning_migrate(laik_data_get_partitioning(checkpoint->data), newGroup);
laik_checkpoint_restore(checkpoint, data);
}

#endif

template class laik_vector<double>;

18 changes: 17 additions & 1 deletion laik_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ extern "C"{
template <typename T>
class laik_vector
{

public:
/**
* @brief laik_vector constructor
Expand Down Expand Up @@ -67,13 +68,26 @@ class laik_vector
* @param t_new_1 transition to p_new_1
* @param t_new_2 transition to p_new_2
*/
virtual void migrate(Laik_Group* new_group, Laik_Partitioning* p_new_1, Laik_Partitioning* p_new_2, Laik_Transition* t_new_1, Laik_Transition* t_new_2) = 0;
virtual void
migrate(Laik_Group *new_group, Laik_Partitioning *p_new_1, Laik_Partitioning *p_new_2, Laik_Transition *t_new_1,
Laik_Transition *t_new_2, bool suppressSwitchToP1) = 0;

/**
* @brief clearing laik_vectors
*/
void clear();

void copyLaikDataToVector(std::vector<T> &data_vector);
void copyVectorToLaikData(std::vector<T> &data_vector);

void resizeVector(std::vector<T>&);
void resizeVectorToLaikData(std::vector<T>&);

#ifdef FAULT_TOLERANCE
virtual Laik_Checkpoint *checkpoint(int redundancyCount, int rotationDistance);
virtual void restore(Laik_Checkpoint *checkpoint, Laik_Group *newGroup);
#endif

protected:
// members from laik
Laik_Instance* inst; // laik context
Expand Down Expand Up @@ -111,6 +125,8 @@ class laik_vector
* @brief test_print printing laik_vector for debug
*/
void test_print();

void prepareMigration(bool suppressDataSwitchToP1);
};

#endif // LAIK_VECTOR
9 changes: 5 additions & 4 deletions laik_vector_comm_exclusive_halo.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include"laik_vector_comm_exclusive_halo.h"
#include <laik_partitioners.h>
#include <lulesh.h>
#include "laik_partitioners.h"
#include "lulesh.h"
#include <limits.h>
#include <type_traits>
#include <string.h>
Expand Down Expand Up @@ -248,12 +248,13 @@ void laik_vector_comm_exclusive_halo<T>::switch_to_p2(){
}

template <typename T>
void laik_vector_comm_exclusive_halo<T>::migrate(Laik_Group* new_group, Laik_Partitioning* p_new_1, Laik_Partitioning* p_new_2, Laik_Transition* t_new_1, Laik_Transition* t_new_2){
void laik_vector_comm_exclusive_halo<T>::migrate(Laik_Group* new_group, Laik_Partitioning* p_new_1, Laik_Partitioning* p_new_2, Laik_Transition* t_new_1, Laik_Transition* t_new_2,
bool suppressSwitchToP1){
uint64_t cnt;
int* base;
//int slice = 0;

laik_switchto_partitioning(this->data, this->p1, LAIK_DF_None, LAIK_RO_None);
this->prepareMigration(suppressSwitchToP1);

// use the reservation API to precalculate the pointers
Laik_Reservation* reservation = laik_reservation_new(this->data);
Expand Down
3 changes: 2 additions & 1 deletion laik_vector_comm_exclusive_halo.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ class laik_vector_comm_exclusive_halo:public laik_vector<T>
void resize(int count) override;
void switch_to_p1() override;
void switch_to_p2() override;
void migrate(Laik_Group* new_group, Laik_Partitioning* p_new_1, Laik_Partitioning* p_new_2, Laik_Transition* t_new_1, Laik_Transition* t_new_2) override;
void migrate(Laik_Group* new_group, Laik_Partitioning* p_new_1, Laik_Partitioning* p_new_2, Laik_Transition* t_new_1, Laik_Transition* t_new_2,
bool suppressSwitchToP1) override;
};

template <typename T>
Expand Down
10 changes: 6 additions & 4 deletions laik_vector_comm_overlapping_overlapping.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include"laik_vector_comm_overlapping_overlapping.h"
#include <laik_partitioners.h>
#include <lulesh.h>
#include "laik_partitioners.h"
#include "lulesh.h"
#include <limits.h>
#include <type_traits>
#include <string.h>
Expand Down Expand Up @@ -99,12 +99,14 @@ void laik_vector_comm_overlapping_overlapping<T>::switch_to_p2(){
}

template <typename T>
void laik_vector_comm_overlapping_overlapping<T>::migrate(Laik_Group* new_group, Laik_Partitioning* p_new_1, Laik_Partitioning* p_new_2, Laik_Transition* t_new_1, Laik_Transition* t_new_2){
void laik_vector_comm_overlapping_overlapping<T>::migrate(Laik_Group *new_group, Laik_Partitioning *p_new_1,
Laik_Partitioning *p_new_2, Laik_Transition *t_new_1,
Laik_Transition *t_new_2, bool suppressSwitchToP1) {
uint64_t cnt;
int* base;
//int slice = 0;

laik_switchto_partitioning(this->data, this->p1, LAIK_DF_None, LAIK_RO_Min);
this->prepareMigration(suppressSwitchToP1);

Laik_Reservation* reservation = laik_reservation_new(this->data);
laik_reservation_add(reservation, p_new_1);
Expand Down
4 changes: 3 additions & 1 deletion laik_vector_comm_overlapping_overlapping.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ class laik_vector_comm_overlapping_overlapping:public laik_vector<T>
void resize(int count) override;
void switch_to_p1() override;
void switch_to_p2() override;
void migrate(Laik_Group* new_group, Laik_Partitioning* p_new_1, Laik_Partitioning* p_new_2, Laik_Transition* t_new_1, Laik_Transition* t_new_2) override;
void migrate(Laik_Group *new_group, Laik_Partitioning *p_new_1,
Laik_Partitioning *p_new_2, Laik_Transition *t_new_1,
Laik_Transition *t_new_2, bool suppressSwitchToP1) override;
};

template <typename T>
Expand Down
Loading