Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/repo-refactor' into r-refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
Bob-Chen222 committed Jun 5, 2024
2 parents a2a3dd8 + 6fe5dba commit d785ece
Show file tree
Hide file tree
Showing 27 changed files with 134 additions and 126 deletions.
61 changes: 32 additions & 29 deletions lib/kernels/src/hip/loss_function_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,12 @@
* limitations under the License.
*/

#include "flexflow/model.h"
#include "utils/hip_helper.h"
#include "kernels/loss_function_kernels.h"
#include "device.h"
#include <hip/hip_runtime.h>

namespace FlexFlow {

using namespace Legion;

__global__ void
sparse_categorical_crossentropy_loss_backward(float *logit_grad,
int const *label,
Expand Down Expand Up @@ -59,7 +57,8 @@ __global__ void identity_loss_backward(float *loss_grad,
}
}

void Loss::sparse_categorical_crossentropy_loss_backward_kernel_wrapper(
void sparse_categorical_crossentropy_loss_backward_kernel(
hipStream_t stream,
float *logit_grad_ptr,
float const *logit_ptr,
int const *label_ptr,
Expand All @@ -69,12 +68,14 @@ void Loss::sparse_categorical_crossentropy_loss_backward_kernel_wrapper(
int num_classes,
int k,
float scale_factor) {
hipStream_t stream;
// hipStream_t stream;
checkCUDA(get_legion_stream(&stream));
checkCUDA(hipMemcpy(logit_grad_ptr,
logit_ptr,
logit_volume * sizeof(float),
hipMemcpyDeviceToDevice));

// launch kernel in hip
hipLaunchKernelGGL(sparse_categorical_crossentropy_loss_backward,
GET_BLOCKS(num_samples),
CUDA_NUM_THREADS,
Expand All @@ -94,17 +95,17 @@ void Loss::sparse_categorical_crossentropy_loss_backward_kernel_wrapper(
logit_grad_ptr,
logit_grad_volume,
0,
scale_factor * k);
scale_factor);
}

void Loss::categorical_crossentropy_loss_backward_kernel_wrapper(
float *logit_grad_ptr,
float const *logit_ptr,
float const *label_ptr,
size_t logit_volume,
size_t logit_grad_volume,
float scale_factor) {
hipStream_t stream;
void categorical_crossentropy_loss_backward_kernel(hipStream_t stream,
float *logit_grad_ptr,
float const *logit_ptr,
float const *label_ptr,
size_t logit_volume,
size_t logit_grad_volume,
float scale_factor) {
// hipStream_t stream;
checkCUDA(get_legion_stream(&stream));
hipLaunchKernelGGL(categorical_crossentropy_loss_backward,
GET_BLOCKS(logit_volume),
Expand All @@ -115,6 +116,7 @@ void Loss::categorical_crossentropy_loss_backward_kernel_wrapper(
logit_ptr,
label_ptr,
logit_volume);

// Scale logit gradients by loss->scale_factor
hipLaunchKernelGGL(scale_kernel,
GET_BLOCKS(logit_grad_volume),
Expand All @@ -127,14 +129,14 @@ void Loss::categorical_crossentropy_loss_backward_kernel_wrapper(
scale_factor);
}

void Loss::mean_squared_error_avg_loss_backward_kernel_wrapper(
float *logit_grad_ptr,
float const *logit_ptr,
float const *label_ptr,
size_t logit_volume,
size_t logit_grad_volume,
float scale_factor) {
hipStream_t stream;
void mean_squared_error_avg_loss_backward_kernel(hipStream_t stream,
float *logit_grad_ptr,
float const *logit_ptr,
float const *label_ptr,
size_t logit_volume,
size_t logit_grad_volume,
float scale_factor) {
// hipStream_t stream;
checkCUDA(get_legion_stream(&stream));
hipLaunchKernelGGL(mean_squared_error_avg_loss_backward,
GET_BLOCKS(logit_volume),
Expand All @@ -157,12 +159,13 @@ void Loss::mean_squared_error_avg_loss_backward_kernel_wrapper(
scale_factor);
}

void Loss::identity_loss_backward_kernel_wrapper(float *loss_grad_ptr,
float const *loss_ptr,
size_t loss_volume,
size_t loss_grad_volume,
float scale_factor) {
hipStream_t stream;
void identity_loss_backward_kernel(hipStream_t stream,
float *loss_grad_ptr,
float const *loss_ptr,
size_t loss_volume,
size_t loss_grad_volume,
float scale_factor) {
// hipStream_t stream;
checkCUDA(get_legion_stream(&stream));
hipLaunchKernelGGL(identity_loss_backward,
GET_BLOCKS(loss_volume),
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,15 @@
*/

#include "kernels/dropout_kernels.h"
#include "kernels/hip_helper.h"
#include "device.h"
#include "kernels/ff_handle.h"
#include <hip/hip_runtime.h>

namespace FlexFlow {
namespace Kernels {
namespace Dropout {

DropoutPerDeviceState init_kernel(PerDeviceFFHandle handler,
DropoutPerDeviceState init_kernel(PerDeviceFFHandle handle,
float rate,
unsigned long long seed,
ArrayShape output_shape,
Expand All @@ -47,8 +48,6 @@ DropoutPerDeviceState init_kernel(PerDeviceFFHandle handler,
dropoutStates = allocator.allocate(totalSize);
reserveSpace = ((char *)dropoutStates) + dropoutStateSize;
}
// checkCUDA(hipMalloc(&dropoutStates, dropoutStateSize));
// checkCUDA(hipMalloc(&reserveSpace, reserveSpaceSize));
checkCUDNN(miopenSetDropoutDescriptor(dropoutDesc,
handle.dnn,
rate,
Expand All @@ -74,7 +73,7 @@ void forward_kernel(hipStream_t stream,
DropoutPerDeviceState &m,
float const *input_ptr,
float *output_ptr) {
checkCUDNN(miopenSetStream(m->handle.dnn, stream));
checkCUDNN(miopenSetStream(m.handle.dnn, stream));

checkCUDNN(miopenDropoutForward(m.handle.dnn,
m.dropoutDesc,
Expand All @@ -91,7 +90,7 @@ void backward_kernel(hipStream_t stream,
DropoutPerDeviceState &m,
float const *output_grad_ptr,
float *input_grad_ptr) {
checkCUDNN(miopenSetStream(m->handle.dnn, stream));
checkCUDNN(miopenSetStream(m.handle.dnn, stream));

checkCUDNN(miopenDropoutBackward(m.handle.dnn,
m.dropoutDesc,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@
*/

#include "kernels/flat_kernels.h"
#include "kernels/hip_helper.h"
#include "device.h"
#include "kernels/accessor.h"
#include <hip/hip_runtime.h>

namespace FlexFlow {

namespace Kernels {
namespace Flat {

Expand All @@ -31,7 +31,6 @@ void forward_kernel(hipStream_t stream,
(input.shape.num_elements()) * sizeof(float),
hipMemcpyDeviceToDevice,
stream));
// checkCUDA(hipDeviceSynchronize());
}

void backward_kernel(hipStream_t stream,
Expand All @@ -49,10 +48,6 @@ void backward_kernel(hipStream_t stream,
output_grad_ptr,
input.shape.num_elements(),
alpha);
// checkCUDA(hipMemcpyAsync(acc_input_grad.ptr, acc_output_grad.ptr,
// acc_input_grad.rect.volume() * sizeof(float),
// hipMemcpyDeviceToDevice));
// checkCUDA(hipDeviceSynchronize());
}

} // namespace Flat
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,11 @@
*/

#include "kernels/gather_kernels.h"
#include "device.h"
#include "kernels/datatype_dispatch.h"
#include "kernels/hip_helper.h"
#include <hip/hip_runtime.h>

namespace FlexFlow {

GatherPerDeviceState::GatherPerDeviceState(FFHandler handler)
: PerDeviceOpState(handler){};

namespace Kernels {
namespace Gather {

Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -14,40 +14,36 @@
*/

#include "kernels/softmax_kernels.h"
#include "kernels/hip_helper.h"
#include "device.h"
#include <hip/hip_runtime.h>

namespace FlexFlow {
// declare Legion names
using Legion::Domain;

SoftmaxPerDeviceState::SoftmaxPerDeviceState(FFHandler handler,
Softmax const *softmax,
Domain const &input_domain)
: PerDeviceOpState(handler) {
checkCUDNN(miopenCreateTensorDescriptor(&inputTensor));
checkCUDNN(cudnnSetTensorDescriptorFromDomain(inputTensor, input_domain));
dim = softmax->dim;
profiling = softmax->profiling;
std::strcpy(op_name, softmax->name);
}

namespace Kernels {
namespace Softmax {

SoftmaxPerDeviceState init_kernel(PerDeviceFFHandle const &handle, int dim) {
ffTensorDescriptor_t inputTensor;

checkCUDNN(miopenCreateTensorDescriptor(&inputTensor));

SoftmaxPerDeviceState per_device_state = {handle, inputTensor, dim};
return per_device_state;
}

void forward_kernel(hipStream_t stream,
SoftmaxPerDeviceState const *m,
SoftmaxPerDeviceState const &m,
float const *input_ptr,
float *output_ptr) {
checkCUDNN(miopenSetStream(m->handle.dnn, stream));
checkCUDNN(miopenSetStream(m.handle.dnn, stream));

float alpha = 1.0f, beta = 0.0f;
checkCUDNN(miopenSoftmaxForward_V2(m->handle.dnn,
checkCUDNN(miopenSoftmaxForward_V2(m.handle.dnn,
&alpha,
m->inputTensor,
m.inputTensor,
input_ptr,
&beta,
m->inputTensor,
m.inputTensor,
output_ptr,
MIOPEN_SOFTMAX_ACCURATE,
MIOPEN_SOFTMAX_MODE_CHANNEL));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,10 @@
*/

#include "kernels/split_kernels.h"
#include "kernels/hip_helper.h"
#include "device.h"
#include <hip/hip_runtime.h>

namespace FlexFlow {
// declare Legion names
using Legion::coord_t;

namespace Kernels {
namespace Split {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,10 @@
*/

#include "kernels/topk_kernels.h"
#include "kernels/hip_helper.h"
#include "device.h"
#include <hip/hip_runtime.h>

namespace FlexFlow {
// declare Legion names
using Legion::coord_t;

TopKPerDeviceState::TopKPerDeviceState(FFHandler handler)
: PerDeviceOpState(handler) {}

namespace Kernels {
namespace TopK {
Expand All @@ -36,6 +31,11 @@ struct Entry {
T value;
};

TopKPerDeviceState init_kernel(bool sorted) {
TopKPerDeviceState per_device_state = {sorted};
return per_device_state;
}

template <typename T>
struct LinearData {
typedef Entry<T> Entry;
Expand Down Expand Up @@ -371,7 +371,7 @@ __global__ void topk_forward_kernel(T const *__restrict__ input,
}

void forward_kernel(hipStream_t stream,
TopKPerDeviceState const *m,
TopKPerDeviceState const &m,
float const *input_ptr,
float *output_ptr,
int *indices_ptr,
Expand Down Expand Up @@ -428,7 +428,7 @@ __global__ void topk_backward_kernel(T const *__restrict__ value_grad_ptr,
}

void backward_kernel(hipStream_t stream,
TopKPerDeviceState const *m,
TopKPerDeviceState const &m,
float const *value_grad_ptr,
int const *indices_ptr,
float *in_grad_ptr,
Expand Down
Loading

0 comments on commit d785ece

Please sign in to comment.