diff --git a/docs/doxygen/Doxyfile b/docs/doxygen/Doxyfile index b38bfc12b5..fe2f69c500 100644 --- a/docs/doxygen/Doxyfile +++ b/docs/doxygen/Doxyfile @@ -881,6 +881,7 @@ INPUT += $(FF_HOME)/include INPUT += $(FF_HOME)/nmt INPUT += $(FF_HOME)/python INPUT += $(FF_HOME)/src +INPUT += $(FF_HOME)/lib/substitutions/include # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses diff --git a/lib/substitutions/TUTORIAL.md b/lib/substitutions/TUTORIAL.md new file mode 100644 index 0000000000..bcf39da603 --- /dev/null +++ b/lib/substitutions/TUTORIAL.md @@ -0,0 +1,206 @@ +## Tutorial of substitution lib with simple example + +#### Create a pattern + +```c++ +//we should specify both the node pattern and edge pattern when defining a GraphPattern + +//first define an operator pattern for example, specify the node to have a linear +//operator +OperatorPattern operator_pattern_n0{ + std::vector{OperatorAttributeConstraint{ + ConstraintType::EQUAL, OperatorAttributeKey::OP_TYPE, Op::LINEAR}}}; + +//then define a tensor_pattern that restrict the pattern of edge in pcg. for example, +//specify that the first dimension (indexed by 0) of a tensor should be 2 +ParallelTensorPattern tensor_pattern_e0{ + std::vector{ + TensorAttributeConstraint{ConstraintType::EQUAL, + ListIndexAccess{ + TensorAttributeKey::DIM_SIZES, 0}, + 2}}}; +/* +remeber that both operator_pattern and tensor_pattern are std::vector, meaning that you +can define more than one constraint depending on the context +*/ +``` + + +#### Pack into GraphPattern +```c++ +//create a graph with node label of OperatorPattern and edge label of ParallelTensorPattern +auto ig = + OutputLabelledOpenMultiDiGraph:: + create>(); +//add constraints defined above as argument to create a node +Node n0 = ig.add_node(operator_pattern_n0); +//add port number to distinguish different edges going to the same node +NodePort p0 = ig.add_node_port(); +//create edge +InputMultiDiEdge e0{n0, p0, std::make_pair(p0.value(), p0.value())}; +ig.add_edge(e0); +//add edge constraints above to the edge e0 +ig.add_label(e0, tensor_pattern_e0); + +//a pattern graph with one input edge pointing to a node +/* + n0 (Linear) + ↑ +*/ +RC_ASSERT(get_nodes(ig).size() == 1); +RC_ASSERT(get_edges(ig).size() == 1); +``` + +#### Define OutputGraph +```cpp + +//define a 3-node PCG that can be applied from the input graph ig + +//Partition node that can partite the input into two parts +OperatorAttrAssignment op_ass_n1{ + {{OperatorAttributeKey::OP_TYPE, AttrConstant{Op::REPARTITION}}, + {OperatorAttributeKey::PARALLEL_DIM, AttrConstant{ff_dim_t{0}}}, + {OperatorAttributeKey::PARALLEL_DEGREE, AttrConstant{2}}}}; + +//Linear node +OperatorAttrAssignment op_ass_n2{ + {{OperatorAttributeKey::OP_TYPE, AttrConstant{Op::LINEAR}}, + {OperatorAttributeKey::OUT_CHANNELS, + OperatorAttrAccess{n0, OperatorAttributeKey::OUT_CHANNELS}}, + {OperatorAttributeKey::USE_BIAS, + OperatorAttrAccess{n0, OperatorAttributeKey::USE_BIAS}}, + {OperatorAttributeKey::DATA_TYPE, + OperatorAttrAccess{n0, OperatorAttributeKey::DATA_TYPE}}, + {OperatorAttributeKey::ACTIVATION, + OperatorAttrAccess{n0, OperatorAttributeKey::ACTIVATION}}, + {OperatorAttributeKey::REGULARIZER, + OperatorAttrAccess{n0, OperatorAttributeKey::REGULARIZER}}}}; + +//Reduce node that will combine the result of two partitions +OperatorAttrAssignment op_ass_n3{ + {{OperatorAttributeKey::OP_TYPE, AttrConstant{Op::REDUCTION}}, + {OperatorAttributeKey::PARALLEL_DIM, AttrConstant{ff_dim_t{0}}}, + {OperatorAttributeKey::PARALLEL_DEGREE, AttrConstant{2}}}}; + +//notice that these assignments will be evaluated +//into new operators in the apply_substitution function +//and be inserted into the new pcg + +//create outputgraph with 3 nodes and 3 edges +auto og = NodeLabelledOpenMultiDiGraph::create< + UnorderedNodeLabelledOpenMultiDiGraph>(); +Node n1 = og.add_node(op_ass_n1); +Node n2 = og.add_node(op_ass_n2); +Node n3 = og.add_node(op_ass_n3); +NodePort p1 = og.add_node_port(); +NodePort p2 = og.add_node_port(); +NodePort p3 = og.add_node_port(); + +InputMultiDiEdge e1{n1, p1, {p1.value(), p1.value()}}; +MultiDiEdge e2{n2, p2, n1, p1}; +MultiDiEdge e3{n3, p3, n2, p2}; +og.add_edge(e1); +og.add_edge(e2); +og.add_edge(e3); +OutputGraphExpr output_graph_expr{og}; + +/* +The output graph looks like this + n3 (Reduce) + ↑ + n2 (Linear) + ↑ + n1 (Partition) + ↑ +*/ +RC_ASSERT(get_nodes(og).size() == 3); +RC_ASSERT(get_edges(og).size() == 3); +``` + +#### Define substitution +```cpp +//define two dict that specify how the input and output edges are mapped in the substitution +bidict input_mapping; +input_mapping.equate(e0, e1); +bidict output_mapping; + +Substitution substitution{ + input_graph, output_graph_expr, input_mapping, output_mapping}; +``` + +#### Apply substitution +```cpp + +//create the target pcg that we want to apply for substitution +SubParallelComputationGraph pcg = + OutputLabelledOpenMultiDiGraph::create< + UnorderedOutputLabelledOpenMultiDiGraph>(); + +Node n4 = pcg.add_node(Operator{InputAttrs{}, "input"}); +Node n5 = pcg.add_node(Operator{ + LinearAttrs{1, false, DataType::FLOAT, Activation::RELU, std::nullopt}, + "linear"}); +NodePort p4 = pcg.add_node_port(); +NodePort p5 = pcg.add_node_port(); + +MultiDiEdge e4{n5, p5, n4, p4}; +pcg.add_edge(e4); +pcg.add_label(e4, + ParallelTensor(ParallelTensorDims({2, 1}), + DataType::FLOAT, + CreateGrad::YES)); + +/* Our target pcg looks like this + n5 (Linear) + ↑ + n4 (input) +*/ + +//create criterion function that will test every predefined edge and node constraints +MatchAdditionalCriterion criterion{ + [&](Node const &pattern_node, Node const &graph_node) { + return operator_satisfies(pcg.at(graph_node), + input_graph.value().at(pattern_node)); + }, + [&](OpenMultiDiEdge const &pattern_edge, + OpenMultiDiEdge const &graph_edge) { + return parallel_tensor_satisfies( + pcg.at(graph_edge), input_graph.value().at(pattern_edge)); + }}; + +RC_ASSERT(criterion.node_criterion(n0, n5)); + + +//find the match point that we can apply the substitution in the target pcg +std::vector matches = + find_pattern_matches(input_graph, pcg, criterion); + +//there is only one match point in the pcg that we defined +RC_ASSERT(matches.size() == 1); + +//apply substitution +//the number of new pcg generated is bounded by O(2^(sn))where s is the number of +//different substitutions and n is the number of nodes +SubParallelComputationGraph new_pcg = + apply_substitution(pcg, substitution, matches[0]); + +//now the new pcg becomes as follow +/* + n3 (Reduce) + ↑ + n2 (Linear) + ↑ + n1 (Partition) + ↑ + n4 (Input) +*/ +RC_ASSERT(get_nodes(new_pcg).size() == 4); +RC_ASSERT(get_edges(new_pcg).size() == 3); +``` + + + + diff --git a/lib/substitutions/include/substitutions/attribute_expr.h b/lib/substitutions/include/substitutions/attribute_expr.h index 0afd48b431..a5c69649cf 100644 --- a/lib/substitutions/include/substitutions/attribute_expr.h +++ b/lib/substitutions/include/substitutions/attribute_expr.h @@ -7,20 +7,55 @@ namespace FlexFlow { enum class ConstraintType { EQUAL }; +/** + * @struct ListIndexAccess + * @brief Given the attribute key, retrieve the specific value stored at index i in the attribute + * This struct will be used in EvaluateOperatorAttributeExpr and EvaluateTensorAttributeExpr, + * where we evaluate the expression and return the concrete value of the attribute stored at index i + */ template struct ListIndexAccess { T attribute_key; req index; }; +/** + * @struct ListSize + * @brief Given the type of an attribute, retrieve the size of the attribute + * Specifically, for the OperatorAttributeValue, the size of the attribute is always MAX_TENSOR_DIM + * For the TensorAttributeValue, the size of the attribute is the size of the vector that represents + * the specific attribute of tensor in PCG + */ template struct ListSize { req attribute_key; }; +/** + * @struct AttributeExpr + * @brief AttributeExpr is a representation of ways to access the attribute. + * It can be a direct value, or a list index access, or a list size. + * For example, padding of a Conv2D operator will be represented as a int, + * and the dimension of a tensor will be represented as a vector to which + * we can access the vector size with ListSize and access the specific value + * with ListIndexAccess + */ template using AttributeExpr = std::variant, ListSize>; + +/** + * @struct AttributeConstraint + * @brief AttributeConstraint is additional constraint imposed when doing pattern matching other than + * just matching graph topology. Specifically, given a pattern and a graph, matching solely the attribute + * type is not enough as there are other factors to consider. For example, if we want to fuse two dense + * layer, we need to match the input shape; given a dense layer, we need to make sure the input shape matches + * the output shape of the previous layer. + * + * Given an attribute expression, attribute_expr should have a relationship with attribute_value defined by + * constraint_type. Currently only EQUAL is supported, meaning that the attribute_expr should be equal to + * attribute_value after evaluation. + */ template struct AttributeConstraint { ConstraintType constraint_type; @@ -28,6 +63,11 @@ struct AttributeConstraint { V attribute_value; }; + +/** + * @struct AttributePattern + * @brief AttributePattern is a collection of attribute constraints for pattern matching to satisfy. + */ template struct AttributePattern { std::vector> attribute_constraints; diff --git a/lib/substitutions/include/substitutions/get_attribute.h b/lib/substitutions/include/substitutions/get_attribute.h index 0e6dd4c69b..dbf1560986 100644 --- a/lib/substitutions/include/substitutions/get_attribute.h +++ b/lib/substitutions/include/substitutions/get_attribute.h @@ -5,6 +5,10 @@ #include "operator_pattern.h" #include "utils/optional.h" + +/** + * @brief overloading get_attribute functions for different operator attributes. + */ namespace FlexFlow { std::optional get_attribute(PCGOperatorAttrs const &, diff --git a/lib/substitutions/include/substitutions/graph_pattern.h b/lib/substitutions/include/substitutions/graph_pattern.h index 4f4021203b..6e0f839e28 100644 --- a/lib/substitutions/include/substitutions/graph_pattern.h +++ b/lib/substitutions/include/substitutions/graph_pattern.h @@ -8,6 +8,16 @@ namespace FlexFlow { +/** + * @struct GraphPattern + * @brief A GraphPattern is defined as an open graph with node label OperatorPattern + * and output label ParallelTensorPattern, which is refered to as the pattern graph. + * The graph structure of a GraphPattern instance defines the geometrical property + * of the input graph, while the node labels and output labels define the attribute + * property of that. To be detailed, the OperatorPattern and ParallelTensorPattern + * contains a set of constraints and the corresponding graph needs to satisfy these + * constraints in order to be considered as match. + */ struct GraphPattern : public strong_typedef< GraphPattern, @@ -16,15 +26,39 @@ struct GraphPattern using strong_typedef::strong_typedef; }; +/** + * @brief Given a pattern, split_pattern is used to split the pattern + * and recursively match the sub-patterns. + */ GraphSplit split_pattern(OpenMultiDiGraphView const &pattern); +/** + * @brief singleton_pattern is defined as a pattern that has only one node. + * A singleton pattern serves as the base case for recursive pattern matching. + */ bool is_singleton_pattern(OpenMultiDiGraphView const &); +/** + * @brief operator_satisfies checks if the operator satisfies the set of constraints. + * shown in the pattern. + */ bool operator_satisfies(Operator const ¶ms, OperatorPattern const &pattern); + +/** + * @brief parallel_tensor_satisfies checks if the parallel tensor satisfies the set of + * constraints shown in the pattern. + */ bool parallel_tensor_satisfies(ParallelTensor const ¶ms, ParallelTensorPattern const &pattern); +/** + * @brief assignment_satifies checks if the provided MultiDiGraphPatternMatch is a valid + * description of how GraphPattern can be mapped to SubParallelComputationGraph. + * + * It checkes if the node and edge assignments satisfy the constraints of the pattern and whether + * the graph topology matches. + */ bool assignment_satisfies(SubParallelComputationGraph const &, GraphPattern const &, MultiDiGraphPatternMatch const &); diff --git a/lib/substitutions/include/substitutions/graph_pattern_match.h b/lib/substitutions/include/substitutions/graph_pattern_match.h index bf6d6b6921..baf3eae4c2 100644 --- a/lib/substitutions/include/substitutions/graph_pattern_match.h +++ b/lib/substitutions/include/substitutions/graph_pattern_match.h @@ -6,32 +6,83 @@ namespace FlexFlow { +/** + * @struct MultiDiGraphPatternMatch + * @brief MultiDiGraphPatternMatch describes a specific location in an OpenMultiDiGraph where a given pattern matches. + * + * Given a graph and a pattern there can be zero, one, or multiple locations where it can match. + * + * To provide some intuition, consider matching over strings instead of graphs: given a regex pattern "a.b" and a string "acbfadbga", there are two valid match locations: + * we can either match the "acb" at the beginning of the string, or the "adb" in the middle of the string. + * MultiDiGraphPatternMatch represents the difference between the two possible locations using a bidict which maps between + * objects in the pattern and the corresponding objects in the matched data structure. For example, in the string example above, + * the two matchings would be as follows: + * "acbfadbga" "acbfadbga" + * ^^^ ^^^ + * ||| ||| + * vvv vvv + * "a.b" "a.b" + * Of course in the context of graphs there are two types of objects to be matched: nodes and edges. + * As such our match consists of not one but two bidict mappings: one for nodes (node_assignment) and one for edges (edge_assignment). + */ struct MultiDiGraphPatternMatch { using PatternNode = Node; using PCGNode = Node; + + /** + * @see OpenMultiDiEdge + */ using PatternEdge = OpenMultiDiEdge; using PCGEdge = OpenMultiDiEdge; + /** + * @brief node_assignment describes the mapping between PatternNode and PCGNode as a part of the substitution. + */ bidict node_assignment; + + /** + * @brief edge_assignment describes the mapping between PatternEdge and PCGEdge as a part of the substitution. + */ bidict edge_assignment; }; +/** + * @struct MatchSplit + * @brief MatchSplit is a struct that describes a split of a MultiDiGraphPatternMatch into + * two sub MultiDiGraphPatternMatches by dividing the nodes into half. When applying pattern + * matches, the pattern will be split into two parts and recursively matched against the graph. + */ struct MatchSplit { MultiDiGraphPatternMatch prefix_submatch; MultiDiGraphPatternMatch postfix_submatch; }; +/** + * @struct MatchAdditionalCriterion + * @brief The additional conditions need to be satisfied other than geometric properties of the graph. + * Specifically as mentioned in attribute_expr.h, other than matching graph topology, we also need to make sure + * the attributes(eg. shape of dense layer) should be matched as well. The additional constraints + * AttributeConstraint will be imposed inside node_criterion and edge_criterion for each potential match. + */ struct MatchAdditionalCriterion { std::function node_criterion; std::function edge_criterion; }; +/** + * @brief pattern_matches checks if the pattern graph matches the graph with additional conditions defined + * by additional_criterion. It is used as the last checking step to see if the pattern matches the graph + * attributewise inside find_pattern_matches. + */ bool pattern_matches(OpenMultiDiGraphView const &pattern, OpenMultiDiGraphView const &graph, MultiDiGraphPatternMatch const &match, MatchAdditionalCriterion const &additional_criterion); +/** + * @brief find_pattern_matches generate all valid matches from pattern to a subgraph of graph. + */ std::vector find_pattern_matches(OpenMultiDiGraphView const &pattern, OpenMultiDiGraphView const &graph, diff --git a/lib/substitutions/include/substitutions/operator_pattern.h b/lib/substitutions/include/substitutions/operator_pattern.h index 8fc4ebefc2..078a66a26d 100644 --- a/lib/substitutions/include/substitutions/operator_pattern.h +++ b/lib/substitutions/include/substitutions/operator_pattern.h @@ -11,6 +11,18 @@ namespace FlexFlow { +/** + * @enum OperatorAttributeKey + * @brief OperatorAttributeKey represents the keys of the attributes of an Operator. + * Specifically, each operator have a set of attributes, and each attribute will have + * a key as its name and a concrete value representation. + * The OP_TYPE is a OperatorAttributeKey is a special attribute key that represents the + * type of the Operator and will exist in every Operator. Given the OP_TYPE, the other + * attributes will be determined accordingly. + * + * For example, a batch matrix multiplication Operator will have OP_TYPE BATCH_MATMUL and + * dimensions as A_SEQ_LENGTH_DIM and B_SEQ_LENGTH_DIM + */ enum class OperatorAttributeKey { OP_TYPE, // AnyOp USE_BIAS, @@ -70,6 +82,12 @@ enum class OperatorAttributeKey { NUM_INPUTS }; + +/** + * @brief OperatorAttributeValue is a representation of the concrete value of an attribute of an Operator. + * The OperatorAttributeValue is evaluated from AttributeExpr. The datatype of the value corresponds to the + * datatype of the attributekey listed in OperatorAttributeKey. + */ using OperatorAttributeValue = std::variant, index); FF_VISITABLE_STRUCT(ListSize, attribute_key); +/** + * @brief OperatorAttributeConstraint is an instance of template struct AttributeConstraint. + */ using OperatorAttributeConstraint = AttributeConstraint; +/** + * @brief OperatorPattern is an instance of template struct AttributePattern. + */ using OperatorPattern = AttributePattern; +/** + * @brief Given a specific attribute of an Operator, evaluate the expression of the attribute + * using one of the three methods: direct value, list index access, or list size and return the + * value of the attribute. + */ std::optional evaluate_attribute_expr(Operator const &attrs, AttributeExpr const &expr); diff --git a/lib/substitutions/include/substitutions/output_graph.h b/lib/substitutions/include/substitutions/output_graph.h index 4ed90aed06..2d1651dc66 100644 --- a/lib/substitutions/include/substitutions/output_graph.h +++ b/lib/substitutions/include/substitutions/output_graph.h @@ -6,23 +6,53 @@ namespace FlexFlow { // NOTE(@wmdi) I am not sure whether these should be part of attribute expr. + +/** + * @struct OperatorAttrAccess + * @brief OperatorAttrAccess consists of a node and an expression attr_expr + * on the attributes of the operator associated with the node. The value of a + * NodeAttrAccess instance is the value of attr_expr evaluated on the operator + * associated with the node. + */ struct OperatorAttrAccess { Node node; AttributeExpr attr_expr; }; +/** + * @struct AttrConstant + * @brief AttrConstant is a constant value that is used as an attribute expression. + */ struct AttrConstant { OperatorAttributeValue value; }; + +/** + * @brief OperatorAttributeExpr is a access to the attribute of an operator and can be + * evaluated to a concrete value. OperatorAttributeExpr is used at substitution phase. + * It will be evaluated and used to create new operator with the evaluated value. + */ using OperatorAttributeExpr = std::variant; -// NOTE(@wmdi): Not sure if it aligns with other design. Or alternatively we can -// define the assignment for each operator type. +/** + * @brief OperatorAttrAssignment is a collection of OperatorAttributeKey and + * GraphAttributeExpr pairs for a single operator. It defines how the attributes + * of a single operator is calculated from the input graph. A pair + * {operator_attribute_key, graph_attribute_expr} in the collection means the value + * of graph_attribute_expr is assigned to the attribute named operator_attribute_key + * of the operator. + */ struct OperatorAttrAssignment { std::unordered_map assignments; }; +/** + * @brief An OutputGraphExpr is defined as an open graph with node label + * OperatorAttrAssignment and output label ParallelTensorAttrAssignment, which + * defines how the operator attributes and the parallel tensor attributes of the + * output graph are derived from the input graph. + */ struct OutputGraphExpr : public strong_typedef< OutputGraphExpr, diff --git a/lib/substitutions/include/substitutions/parallel_tensor_pattern.h b/lib/substitutions/include/substitutions/parallel_tensor_pattern.h index 741554142f..612486fe09 100644 --- a/lib/substitutions/include/substitutions/parallel_tensor_pattern.h +++ b/lib/substitutions/include/substitutions/parallel_tensor_pattern.h @@ -6,16 +6,41 @@ namespace FlexFlow { +/** + * @brief TensorAttributeKey is an enum class that represents the keys of the + * attributes of a Tensor(matrix). + * DIM_SIZES describes the size of each dimension of the tensor for data parallelism computation + * DIM_DEGREES describes the number of partitions along each dimension of the tensor for data parallelism computation + */ enum class TensorAttributeKey { DIM_SIZES, DIM_DEGREES }; + +/** + * @brief DIM_SIZES and DIM_DEGREES are represented by + * a vector of ints that is listed as corresponding dimension + */ using TensorAttributeValue = std::variant>; +/** + * @brief TensorAttributeConstraint is an instance of AttributeConstraint that + * defines the contraint a tensor should satisfy when doing pattern matching. + */ using TensorAttributeConstraint = AttributeConstraint; +/** + * @brief ParallelTensor is an instance of OperatorAttributeExpr that represents + * a set of constraints pattern matching should satisfy. + */ using ParallelTensorPattern = AttributePattern; + +/** + * @brief evaluate_attribute_expr evaluates the attribute expression for a given ParallelTensor + * the ParallelTensor parameter is named tensor_shape because the numerical value will only be used + * in runtime. For the substitution phase, all that matters is the shape of the tensor. + */ std::optional evaluate_attribute_expr(ParallelTensor const &tensor_shape, AttributeExpr const &expr); diff --git a/lib/substitutions/include/substitutions/sub_parallel_computation_graph.h b/lib/substitutions/include/substitutions/sub_parallel_computation_graph.h index 0d6bfe7628..e5940007c8 100644 --- a/lib/substitutions/include/substitutions/sub_parallel_computation_graph.h +++ b/lib/substitutions/include/substitutions/sub_parallel_computation_graph.h @@ -9,6 +9,13 @@ namespace FlexFlow { +/** + * @brief SubParallelComputationGraph is defined as an open graph, which allows nodes and edges + * that are not from the same graph to be added to it. + * This definition is useful when we want to split and merge graphs when doing pattern matching. + * In contrast, the ParallelComputationGraph is defined as a closed graph and all the edges and + * nodes are within that graph. + */ using SubParallelComputationGraph = OutputLabelledOpenMultiDiGraph; diff --git a/lib/substitutions/include/substitutions/substitution.h b/lib/substitutions/include/substitutions/substitution.h index 8dbe4e66cf..630f834e3d 100644 --- a/lib/substitutions/include/substitutions/substitution.h +++ b/lib/substitutions/include/substitutions/substitution.h @@ -7,6 +7,18 @@ namespace FlexFlow { +/** + * @struct Substitution + * @brief A substitution is to replace a subgraph of the PCG by a new one. + * We refer to the subgraph to be replaced as the input graph, and the new + * subgraph to replace the input graph as the output graph. + * A Substitution object describes a substitution. It consists of An + * input_graph of type GraphPattern that describes which kind of input graphs + * the substitution can be applied to; An output_graph of type OutputGraphExpr + * that describes how the output graph is computed from the input graph; and + * An input_mapping and output_maping that describes how the output graph is + * connected to the original PCG. + */ struct Substitution { using InputPatternInput = InputMultiDiEdge; using InputPatternOutput = OutputMultiDiEdge; @@ -19,8 +31,14 @@ struct Substitution { bidict output_mapping; }; +/** + * @brief is_valid_substitution checks if the substitution is valid. + * The implementation will enumerate all the possible substitutions and filter + * out all the invalid ones. + */ bool is_valid_substitution(Substitution const &); + SubParallelComputationGraph apply_substitution(SubParallelComputationGraph const &, Substitution const &,