Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement ffi for pcg #944

Draft
wants to merge 5 commits into
base: master
Choose a base branch
from
Draft
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
ComputationGraphBuilder functions fixes
lockshaw committed Aug 18, 2023
commit 366d9b77b438150f8f7c7e19b98add96f022633e
2 changes: 1 addition & 1 deletion cmake/nccl.cmake
Original file line number Diff line number Diff line change
@@ -89,7 +89,7 @@ else()
BUILD_BYPRODUCTS ${CMAKE_BINARY_DIR}/deps/nccl/lib/libnccl${LIBEXT}
INSTALL_COMMAND ""
CONFIGURE_COMMAND ""
BUILD_COMMAND make src.build "${NCCL_BUILD_NVCC_GENCODE}" "CUDA_HOME=${CUDA_TOOLKIT_ROOT_DIR}" "BUILDDIR=${CMAKE_BINARY_DIR}/deps/nccl" "CXX=${CMAKE_CXX_COMPILER}" CC="${CMAKE_CC_COMPILER}"
BUILD_COMMAND make src.build "${NCCL_BUILD_NVCC_GENCODE}" "CUDA_HOME=${CUDA_TOOLKIT_ROOT_DIR}" "BUILDDIR=${CMAKE_BINARY_DIR}/deps/nccl" "CXX=${CMAKE_CXX_COMPILER}" CC="${CMAKE_CC_COMPILER}" CXXFLAGS="-w"
BUILD_IN_SOURCE 1
)

1 change: 0 additions & 1 deletion lib/op-attrs/include/op-attrs/ff_dim.h
Original file line number Diff line number Diff line change
@@ -15,6 +15,5 @@ struct ff_dim_t : public numerical_typedef<ff_dim_t, int> {

MAKE_TYPEDEF_HASHABLE(::FlexFlow::ff_dim_t);
MAKE_TYPEDEF_PRINTABLE(::FlexFlow::ff_dim_t, "ff_dim");
static_assert(FlexFlow::is_neq_comparable<::FlexFlow::ff_dim_t>::value);

#endif
72 changes: 46 additions & 26 deletions lib/pcg/src/computation_graph_builder.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
#include "pcg/computation_graph_builder.h"
#include "op-attrs/datatype.h"
#include "op-attrs/get_op_type.h"
#include "op-attrs/get_output_shapes.h"
#include "op-attrs/operator_attrs.h"
#include "op-attrs/tensor_shape.h"
#include "pcg/computation_graph.h"
#include "pcg/create_grad.h"
#include "pcg/layer_guid_t.h"
@@ -10,9 +13,29 @@
#include "utils/expected.h"
#include "utils/fmt.h"
#include "utils/graph/multidiedge.h"
#include "op-attrs/get_output_shapes.h"

namespace FlexFlow {

static TensorShape get_shape(ComputationGraph const &, tensor_guid_t const &);
static TensorShape get_shape(ComputationGraph const &, std::vector<tensor_guid_t> const &);

template <typename Attrs>
static TensorShape get_output_shape(ComputationGraph const &, Attrs const &, tensor_guid_t const &);

template <typename Attrs>
static TensorShape get_output_shape(ComputationGraph const &, Attrs const &, tensor_guid_t const &, tensor_guid_t const &);

template <typename Attrs>
static std::vector<TensorShape> get_output_shapes(ComputationGraph const &, Attrs const &, tensor_guid_t const &, tensor_guid_t const &);

static TensorShape get_broadcast_target_shape(std::vector<TensorShape> const &);
static TensorShape get_broadcast_target_shape(ComputationGraph const &, std::vector<tensor_guid_t> const &);

static tensor_guid_t broadcast(ComputationGraph &, tensor_guid_t, TensorShape const &);

static DataType get_data_type(ComputationGraph const &, tensor_guid_t const &);

static layer_guid_t add_layer(ComputationGraph &cg,
Layer const &layer,
std::vector<tensor_guid_t> const &inputs,
@@ -110,8 +133,6 @@ static tensor_guid_t insert_layer(
cg, add_layer(cg, layer, inputs, weights, {output_shape}));
}

static TensorShape get_broadcast_target_shape(std::vector<TensorShape> const &);

static tensor_guid_t
element_binary(ComputationGraph &,
OperatorType,
@@ -175,28 +196,28 @@ static tensor_guid_t insert_element_unary_layer(
as_type(cg, x, DataType::FLOAT, name + "input_pre_cast");

Layer layer = {widen<ComputationGraphAttrs>(attrs), name};
TensorShape output_shape = get_output_shape(attrs, input);
TensorShape output_shape = get_output_shape(attrs, get_shape(cg, input));

return insert_layer(cg, layer, {input}, {}, output_shape);
}

static tensor_guid_t
insert_element_unary_layer(ComputationGraph &,
insert_element_unary_layer(ComputationGraph &cg,
OperatorType op_type,
tensor_guid_t const &input,
optional<std::string> const &name) {
ElementUnaryAttrs attrs = {op_type};
return insert_element_unary_layer(attrs, input, name);
return insert_element_unary_layer(cg, attrs, input, name);
}

static tensor_guid_t
insert_element_scalar_unary_layer(ComputationGraph &,
insert_element_scalar_unary_layer(ComputationGraph &cg,
OperatorType op_type,
tensor_guid_t const &input,
float scalar,
optional<std::string> const &name) {
ElementScalarUnaryAttrs attrs = {op_type, scalar};
return insert_element_unary_layer(attrs, input, name);
return insert_element_unary_layer(cg, attrs, input, name);
}

static tensor_guid_t
@@ -207,18 +228,18 @@ static tensor_guid_t
optional<std::string> const &maybe_name) {
std::string name = maybe_name.value_or(get_default_name(op_type));

TensorShape compute_shape = get_broadcast_target_shape({lhs, rhs});
DataType compute_type = std::max(get_data_type(lhs), get_data_type(rhs));
TensorShape compute_shape = get_broadcast_target_shape(cg, {lhs, rhs});
DataType compute_type = std::max(get_data_type(cg, lhs), get_data_type(cg, rhs));

tensor_guid_t const lhs_input = as_type(
broadcast(lhs, compute_shape), compute_type, name + "_inputl_pre_cast");
tensor_guid_t const rhs_input = as_type(
broadcast(rhs, compute_shape), compute_type, name + "_inputr_pre_cast");
tensor_guid_t const lhs_input = as_type(cg,
broadcast(cg, lhs, compute_shape), compute_type, name + "_inputl_pre_cast");
tensor_guid_t const rhs_input = as_type(cg,
broadcast(cg, rhs, compute_shape), compute_type, name + "_inputr_pre_cast");

ElementBinaryAttrs attrs = {op_type, compute_type, false, false};

Layer layer = {attrs, name};
TensorShape output_shape = get_output_shape(attrs, lhs_input, rhs_input);
TensorShape output_shape = get_output_shape(cg, attrs, lhs_input, rhs_input);

return insert_layer(cg, layer, {lhs_input, rhs_input}, {}, output_shape);
}
@@ -394,14 +415,14 @@ tensor_guid_t
as_type(cg, x, DataType::FLOAT, name + "input_pre_cast");

Layer layer = {attrs, name};
TensorShape output_shape = get_output_shape(attrs, input);
TensorShape output_shape = get_output_shape(cg, attrs, input);

std::vector<std::pair<TensorShape, optional<Initializer>>> weights;

weights.push_back({get_kernel_shape(attrs, input), kernel_initializer});
weights.push_back({get_kernel_shape(attrs, get_shape(cg, input)), kernel_initializer});

if (use_bias) {
weights.push_back({get_bias_shape(attrs, input), bias_initializer});
weights.push_back({get_bias_shape(attrs, get_shape(cg, input)), bias_initializer});
}

return insert_layer(cg, layer, {input}, weights, output_shape);
@@ -419,7 +440,7 @@ tensor_guid_t insert_dropout_layer(ComputationGraph &cg,
tensor_guid_t input =
as_type(cg, x, DataType::FLOAT, name + "input_pre_cast");

TensorShape output_shape = get_output_shape(attrs, input);
TensorShape output_shape = get_output_shape(attrs, get_shape(cg, input));

return insert_layer(cg, layer, {input}, {}, output_shape);
}
@@ -437,10 +458,10 @@ tensor_guid_t
std::string name = maybe_name.value_or(get_default_name(attrs));

Layer layer = {attrs, name};
tensor_guid_t input = as_type(x, DataType::FLOAT, name + "input_pre_cast");
tensor_guid_t input = as_type(cg, x, DataType::FLOAT, name + "input_pre_cast");

TensorShape output_shape = get_output_shape(attrs, input);
TensorShape weights_shape = get_weights_shape(attrs, input);
TensorShape output_shape = get_output_shape(cg, attrs, input);
TensorShape weights_shape = get_weights_shape(attrs, get_shape(cg, input));

return insert_layer(
cg, layer, {input}, {{weights_shape, kernel_initializer}}, output_shape);
@@ -455,8 +476,7 @@ std::vector<tensor_guid_t>
GatherAttrs attrs = {dim};
std::string name = maybe_name.value_or(get_default_name(attrs));

Tensor index_tensor = cg.at(index);
DataType index_dt = get_data_type(index_tensor);
DataType index_dt = get_data_type(cg.at(index));

Layer layer = {attrs, name};
if (index_dt != DataType::INT32 && index_dt != DataType::INT64) {
@@ -467,7 +487,7 @@ std::vector<tensor_guid_t>
DataType::INT64);
}
std::vector<TensorShape> output_shapes =
get_output_shapes(attrs, input, index_tensor);
get_output_shapes(cg, attrs, input, index);

return insert_layer(cg, layer, {input, index}, {}, output_shapes);
}
@@ -483,7 +503,7 @@ tensor_guid_t
float lambda_bal,
optional<std::string> const &maybe_name) {
auto get_shape = [&](tensor_guid_t const &t) {
return get_data_type(cg.at(t));
return cg.at(t).shape;
};

AggregateAttrs attrs = {n, lambda_bal};
@@ -512,7 +532,7 @@ tensor_guid_t insert_batch_norm_layer(ComputationGraph &cg,

Layer layer = {attrs, name};

TensorShape output_shape = get_output_shape(attrs, get_shape(input));
TensorShape output_shape = get_output_shape(attrs, get_shape(cg, input));

return insert_layer(cg, layer, {input}, {}, output_shape);
}