From 3a99cc1a86cdb447a454308b92fbe6e11f5a2da1 Mon Sep 17 00:00:00 2001 From: Zhang Lei Date: Mon, 26 Aug 2024 13:58:17 +0800 Subject: [PATCH] fix(interactive): Add support for CBO on Interactive and fix some codgen errors (#3660) - Fix codegen error for project with expressions. - Add sample cypher queries for ldbc interactive complex query 1-12. - Add support for CBO on Interactive. But most of the tests are run on RBO. --- .github/workflows/interactive.yml | 53 +------ flex/bin/load_plan_and_gen.sh | 32 ++++- flex/codegen/gen_code_from_plan.cc | 13 +- flex/codegen/src/codegen_utils.h | 4 + flex/codegen/src/graph_types.h | 10 +- flex/codegen/src/hqps/hqps_expr_builder.h | 38 +++-- .../src/hqps/hqps_path_expand_builder.h | 3 +- flex/codegen/src/hqps/hqps_project_builder.h | 3 - flex/codegen/src/hqps_generator.h | 8 +- flex/engines/hqps_db/core/null_record.h | 5 + .../hqps_db/core/operator/edge_expand.h | 8 +- flex/engines/hqps_db/core/params.h | 7 +- flex/engines/hqps_db/core/utils/props.h | 2 +- .../hqps_db/database/mutable_csr_interface.h | 3 +- .../structures/multi_edge_set/flat_edge_set.h | 63 +++----- .../queries/ic/stored_procedure/ic1.cypher | 45 ++++++ .../queries/ic/stored_procedure/ic10.cypher | 26 ++++ .../queries/ic/stored_procedure/ic11.cypher | 22 ++- .../queries/ic/stored_procedure/ic12.cypher | 19 ++- .../queries/ic/stored_procedure/ic2.cypher | 21 ++- .../queries/ic/stored_procedure/ic3.cypher | 26 +++- .../queries/ic/stored_procedure/ic4.cypher | 17 +++ .../queries/ic/stored_procedure/ic5.cypher | 17 ++- .../queries/ic/stored_procedure/ic6.cypher | 15 +- .../queries/ic/stored_procedure/ic7.cypher | 23 +++ .../queries/ic/stored_procedure/ic8.cypher | 16 ++- .../queries/ic/stored_procedure/ic9.cypher | 20 ++- flex/tests/hqps/engine_config_test_cbo.yaml | 50 +++++++ flex/tests/hqps/hqps_codegen_test.sh | 134 ++++++++++++++++++ .../graphscope/common/config/YamlConfigs.java | 9 ++ 30 files changed, 551 insertions(+), 161 deletions(-) create mode 100644 flex/resources/queries/ic/stored_procedure/ic1.cypher create mode 100644 flex/resources/queries/ic/stored_procedure/ic10.cypher create mode 100644 flex/resources/queries/ic/stored_procedure/ic4.cypher create mode 100644 flex/resources/queries/ic/stored_procedure/ic7.cypher create mode 100644 flex/tests/hqps/engine_config_test_cbo.yaml create mode 100644 flex/tests/hqps/hqps_codegen_test.sh diff --git a/.github/workflows/interactive.yml b/.github/workflows/interactive.yml index 9481af75685f..67672dc66859 100644 --- a/.github/workflows/interactive.yml +++ b/.github/workflows/interactive.yml @@ -200,59 +200,10 @@ jobs: - name: Run codegen test. env: GS_TEST_DIR: ${{ github.workspace }}/gstest - HOME : /home/graphscope/ INTERACTIVE_WORKSPACE: /tmp/interactive_workspace run: | - cd ${GITHUB_WORKSPACE}/flex/bin - sed -i 's/default_graph: modern_graph/default_graph: ldbc/g' ../tests/hqps/engine_config_test.yaml - - for i in 1 2 3 4 5 6 7 8 9 10 11 12; - do - cmd="./load_plan_and_gen.sh -e=hqps -i=../resources/queries/ic/adhoc/ic${i}_adhoc.cypher -w=/tmp/codegen/" - cmd=${cmd}" -o=/tmp/plugin --ir_conf=../tests/hqps/engine_config_test.yaml " - cmd=${cmd}" --graph_schema_path=${INTERACTIVE_WORKSPACE}/data/ldbc/graph.yaml" - echo $cmd - eval ${cmd} || exit 1 - done - - for i in 1 2 3 4 5 6 7 8 9 11 12; # 10 is not supported now - do - cmd="./load_plan_and_gen.sh -e=hqps -i=../resources/queries/ic/adhoc/simple_match_${i}.cypher -w=/tmp/codegen/" - cmd=${cmd}" -o=/tmp/plugin --ir_conf=../tests/hqps/engine_config_test.yaml " - cmd=${cmd}" --graph_schema_path=${INTERACTIVE_WORKSPACE}/data/ldbc/graph.yaml" - echo $cmd - eval ${cmd} || exit 1 - done - - # test movie graph, 8,9,10 are not supported now - # change the default_graph config in ../tests/hqps/engine_config_test.yaml to movies - sed -i 's/default_graph: ldbc/default_graph: movies/g' ../tests/hqps/engine_config_test.yaml - for i in 1 2 3 4 5 6 7 11 12 13 14 15; - do - cmd="./load_plan_and_gen.sh -e=hqps -i=../tests/hqps/queries/movie/query${i}.cypher -w=/tmp/codegen/" - cmd=${cmd}" -o=/tmp/plugin --ir_conf=../tests/hqps/engine_config_test.yaml " - cmd=${cmd}" --graph_schema_path=${INTERACTIVE_WORKSPACE}/data/movies/graph.yaml" - echo $cmd - eval ${cmd} || exit 1 - done - - # test graph_algo plugin building - sed -i 's/default_graph: movies/default_graph: graph_algo/g' ../tests/hqps/engine_config_test.yaml - cypher_files=$(ls ${GITHUB_WORKSPACE}/flex/interactive/examples/graph_algo/*.cypher) - for cypher_file in ${cypher_files}; - do - cmd="./load_plan_and_gen.sh -e=hqps -i=${cypher_file} -w=/tmp/codegen/" - cmd=${cmd}" -o=/tmp/plugin --ir_conf=../tests/hqps/engine_config_test.yaml " - cmd=${cmd}" --graph_schema_path=${INTERACTIVE_WORKSPACE}/data/graph_algo/graph.yaml" - cmd=${cmd}" --procedure_name=$(basename ${cypher_file} .cypher)" - cmd=${cmd}" --procedure_desc=\"This is test procedure, change the description if needed.\"" - echo $cmd - eval ${cmd} || exit 1 - done - - #set back - sed -i 's/default_graph: graph_algo/default_graph: ldbc/g' ../tests/hqps/engine_config_test.yaml - rm -rf /tmp/codegen + cd ${GITHUB_WORKSPACE}/flex/tests/hqps + bash hqps_codegen_test.sh ${INTERACTIVE_WORKSPACE} ./engine_config_test.yaml ./engine_config_test_cbo.yaml - name: Test cypher&cpp procedure generation and loading env: diff --git a/flex/bin/load_plan_and_gen.sh b/flex/bin/load_plan_and_gen.sh index 6fae6a1e5200..e0f2d95ef555 100755 --- a/flex/bin/load_plan_and_gen.sh +++ b/flex/bin/load_plan_and_gen.sh @@ -106,10 +106,10 @@ query: | cypher_to_plan() { - if [ $# -ne 8 ]; then + if [ $# -lt 8 ] || [ $# -gt 9 ]; then echo "Usage: cypher_to_plan " echo " " - echo " " + echo " [statistic_path]" echo " but receive: "$# exit 1 fi @@ -123,6 +123,11 @@ cypher_to_plan() { # get procedure_name and procedure_description procedure_name=$7 procedure_description=$8 + if [ $# -eq 9 ]; then + statistic_path=$9 + else + statistic_path="" + fi # find java executable info "IR compiler properties = ${ir_compiler_properties}" @@ -172,6 +177,9 @@ EOM cmd="java -cp ${COMPILER_LIB_DIR}/*:${COMPILER_JAR}" cmd="${cmd} -Dgraph.schema=${graph_schema_path}" cmd="${cmd} -Djna.library.path=${IR_CORE_LIB_DIR}" + if [ ! -z ${statistic_path} ]; then + cmd="${cmd} -Dgraph.statistics=${statistic_path}" + fi cmd="${cmd} com.alibaba.graphscope.common.ir.tools.GraphPlanner ${ir_compiler_properties} ${real_input_path} ${real_output_path} ${real_output_yaml} ${extra_arg_config_file}" info "running physical plan generation with ${cmd}" eval ${cmd} @@ -192,9 +200,9 @@ EOM compile_hqps_so() { #check input params size eq 2 or 3 - if [ $# -gt 7 ] || [ $# -lt 4 ]; then + if [ $# -gt 8 ] || [ $# -lt 4 ]; then echo "Usage: $0 " - echo " [output_dir] [stored_procedure_name] [stored_procedure_description]" + echo " [statistic_path] [output_dir] [stored_procedure_name] [stored_procedure_description]" exit 1 fi input_path=$1 @@ -219,6 +227,12 @@ compile_hqps_so() { procedure_description="" fi + if [ $# -ge 8 ]; then + statistic_path=$8 + else + statistic_path="" + fi + info "Input path = ${input_path}" info "Work dir = ${work_dir}" info "ir compiler properties = ${ir_compiler_properties}" @@ -226,6 +240,7 @@ compile_hqps_so() { info "Output dir = ${output_dir}" info "Procedure name = ${procedure_name}" info "Procedure description = ${procedure_description}" + info "Statistic path = ${statistic_path}" last_file_name=$(basename ${input_path}) @@ -279,7 +294,7 @@ compile_hqps_so() { output_pb_path="${cur_dir}/${procedure_name}.pb" cypher_to_plan ${procedure_name} ${input_path} ${output_pb_path} \ ${output_yaml_path} ${ir_compiler_properties} ${graph_schema_path} \ - ${procedure_name} "${procedure_description}" + ${procedure_name} "${procedure_description}" ${statistic_path} info "----------------------------" info "Codegen from cypher query done." @@ -554,6 +569,10 @@ run() { PROCEDURE_DESCRIPTION="${i#*=}" shift # past argument=value ;; + --statistic_path=*) + STATISTIC_PATH="${i#*=}" + shift # past argument=value + ;; -* | --*) err "Unknown option $i" exit 1 @@ -571,6 +590,7 @@ run() { echo "Output path ="${OUTPUT_DIR} echo "Procedure name ="${PROCEDURE_NAME} echo "Procedure description ="${PROCEDURE_DESCRIPTION} + echo "Statistic path ="${STATISTIC_PATH} find_resources @@ -598,7 +618,7 @@ run() { PROCEDURE_NAME="${PROCEDURE_NAME%.cc}" PROCEDURE_NAME="${PROCEDURE_NAME%.pb}" fi - compile_hqps_so ${INPUT} ${WORK_DIR} ${IR_CONF} ${GRAPH_SCHEMA_PATH} ${OUTPUT_DIR} ${PROCEDURE_NAME} "${PROCEDURE_DESCRIPTION}" + compile_hqps_so ${INPUT} ${WORK_DIR} ${IR_CONF} ${GRAPH_SCHEMA_PATH} ${OUTPUT_DIR} ${PROCEDURE_NAME} "${PROCEDURE_DESCRIPTION}" ${STATISTIC_PATH} # else if engine_type equals pegasus elif [ ${ENGINE_TYPE} == "pegasus" ]; then diff --git a/flex/codegen/gen_code_from_plan.cc b/flex/codegen/gen_code_from_plan.cc index 81042e91811e..23394a0f9e5d 100644 --- a/flex/codegen/gen_code_from_plan.cc +++ b/flex/codegen/gen_code_from_plan.cc @@ -73,7 +73,8 @@ void deserialize_plan_and_gen_pegasus(const std::string& input_file_path, void deserialize_plan_and_gen_hqps(const std::string& input_file_path, const std::string& output_file_path, - const std::string& graph_schema_path) { + const std::string& graph_schema_path, + bool dump_json_plan = true) { LOG(INFO) << "Start deserializing from: " << input_file_path; std::string content_str = read_binary_str_from_path(input_file_path); LOG(INFO) << "Deserialized plan size : " << content_str.size() << ", from " @@ -83,6 +84,16 @@ void deserialize_plan_and_gen_hqps(const std::string& input_file_path, CHECK(plan_pb.ParseFromArray(content_str.data(), content_str.size())); LOG(INFO) << "deserialized plan size : " << plan_pb.ByteSizeLong(); VLOG(1) << "deserialized plan : " << plan_pb.DebugString(); + if (dump_json_plan) { + std::string output_path = output_file_path + ".json"; + std::string json_plan; + google::protobuf::util::JsonOptions option; + option.always_print_primitive_fields = true; + google::protobuf::util::MessageToJsonString(plan_pb, &json_plan, option); + std::ofstream out(output_path); + out << json_plan; + out.close(); + } BuildingContext context; std::shared_ptr> query_generator; // load schema diff --git a/flex/codegen/src/codegen_utils.h b/flex/codegen/src/codegen_utils.h index 841549cfb4ea..563cd6544521 100644 --- a/flex/codegen/src/codegen_utils.h +++ b/flex/codegen/src/codegen_utils.h @@ -201,13 +201,16 @@ codegen::ParamConst variable_to_param_const(const common::Variable& var, auto& var_property = var.property(); if (var_property.has_label()) { param_const.var_name = "label"; + param_const.expr_var_name = "label"; param_const.type = codegen::DataType::kLabelId; } else if (var_property.has_key()) { param_const.var_name = var.property().key().name(); + param_const.expr_var_name = var.property().key().name(); param_const.type = common_data_type_pb_2_data_type(var.node_type().data_type()); } else if (var_property.has_id()) { param_const.var_name = ctx.GetNextVarName(); + param_const.expr_var_name = ctx.GetNextVarName(); param_const.type = codegen::DataType::kGlobalVertexId; } else { LOG(FATAL) << "Unexpected property type: " << var_property.DebugString(); @@ -217,6 +220,7 @@ codegen::ParamConst variable_to_param_const(const common::Variable& var, if (var.has_node_type()) { auto node_type = var.node_type(); param_const.var_name = ctx.GetNextVarName(); + param_const.expr_var_name = param_const.var_name; if (node_type.type_case() == common::IrDataType::kDataType) { param_const.type = common_data_type_pb_2_data_type(node_type.data_type()); diff --git a/flex/codegen/src/graph_types.h b/flex/codegen/src/graph_types.h index 7b9027d460fa..48a7630537a2 100644 --- a/flex/codegen/src/graph_types.h +++ b/flex/codegen/src/graph_types.h @@ -51,13 +51,14 @@ enum class DataType { struct ParamConst { DataType type; std::string var_name; + std::string expr_var_name; int32_t id; // unique id for each param const }; // implement operator == for ParamConst inline bool operator==(const ParamConst& lhs, const ParamConst& rhs) { return lhs.type == rhs.type && lhs.var_name == rhs.var_name && - lhs.id == rhs.id; + lhs.expr_var_name == rhs.expr_var_name && lhs.id == rhs.id; } } // namespace codegen @@ -159,7 +160,7 @@ static codegen::ParamConst param_const_pb_to_param_const( CHECK(data_type_pb.type_case() == common::IrDataType::kDataType); return codegen::ParamConst{ common_data_type_pb_2_data_type(data_type_pb.data_type()), - param_const_pb.name(), param_const_pb.index()}; + param_const_pb.name(), param_const_pb.name(), param_const_pb.index()}; } static codegen::ParamConst param_const_pb_to_param_const( @@ -168,7 +169,8 @@ static codegen::ParamConst param_const_pb_to_param_const( if (ir_data_type.type_case() == common::IrDataType::kDataType) { auto primitive_type = ir_data_type.data_type(); return codegen::ParamConst{common_data_type_pb_2_data_type(primitive_type), - param_const_pb.name(), param_const_pb.index()}; + param_const_pb.name(), param_const_pb.name(), + param_const_pb.index()}; } else { throw std::runtime_error("Expect node type in ir_data_type"); } @@ -280,11 +282,13 @@ static void parse_param_const_from_pb( if (data_type.type_case() == common::IrDataType::kDataType) { param_cost.type = common_data_type_pb_2_data_type(data_type.data_type()); param_cost.var_name = param_const_pb.name(); + param_cost.expr_var_name = param_const_pb.name(); param_cost.id = param_const_pb.index(); return; } else if (node_type.type_case() == common::IrDataType::kDataType) { param_cost.type = common_data_type_pb_2_data_type(node_type.data_type()); param_cost.var_name = param_const_pb.name(); + param_cost.expr_var_name = param_const_pb.name(); param_cost.id = param_const_pb.index(); return; } else { diff --git a/flex/codegen/src/hqps/hqps_expr_builder.h b/flex/codegen/src/hqps/hqps_expr_builder.h index febfcdb93a1e..c7b228bed3ad 100644 --- a/flex/codegen/src/hqps/hqps_expr_builder.h +++ b/flex/codegen/src/hqps/hqps_expr_builder.h @@ -358,11 +358,8 @@ class ExprBuilder { // and also set a expr node for it. which is unique. make_var_name_unique(param_const); func_call_vars_.push_back(param_const); - expr_nodes_.emplace_back(param_const.var_name); + expr_nodes_.emplace_back(param_const.expr_var_name); - // expr_nodes_.emplace_back(param_const.var_name); - // convert a variable to a tag property, - // gs::NamedProperty{"prop1"}, saved for later use. tag_selectors_.emplace_back( variable_to_tag_id_property_selector(ctx_, var)); VLOG(10) << "Got var: " << var.DebugString(); @@ -389,7 +386,7 @@ class ExprBuilder { VLOG(10) << "receive param const: " << param_const_pb.DebugString(); make_var_name_unique(param_const); construct_params_.push_back(param_const); - expr_nodes_.emplace_back(param_const.var_name + "_"); + expr_nodes_.emplace_back(param_const.expr_var_name); break; } @@ -413,14 +410,11 @@ class ExprBuilder { // for each variable, we need add the variable to func_call_vars_. // and also set a expr node for it. which is unique. func_call_vars_.push_back(param_const); - ss << param_const.var_name; + ss << param_const.expr_var_name; if (i < vars.keys_size() - 1) { ss << ","; } - // expr_nodes_.emplace_back(param_const.var_name); - // convert a variable to a tag property, - // gs::NamedProperty{"prop1"}, saved for later use. tag_selectors_.emplace_back( variable_to_tag_id_property_selector(ctx_, cur_var)); } @@ -534,7 +528,7 @@ class ExprBuilder { std::stringstream ss; for (size_t i = 0; i < construct_params_.size(); ++i) { ss << data_type_2_string(construct_params_[i].type) << " " - << construct_params_[i].var_name; + << construct_params_[i].expr_var_name; if (i + 1 != construct_params_.size()) { ss << ","; } @@ -548,8 +542,8 @@ class ExprBuilder { ss << ":"; } for (size_t i = 0; i < construct_params_.size(); ++i) { - ss << construct_params_[i].var_name << "_" - << "(" << construct_params_[i].var_name << ")"; + ss << construct_params_[i].expr_var_name << "(" + << construct_params_[i].expr_var_name << ")"; if (i != construct_params_.size() - 1) { ss << ","; } @@ -578,7 +572,7 @@ class ExprBuilder { std::stringstream ss; for (size_t i = 0; i < func_call_vars_.size(); ++i) { ss << data_type_2_string(func_call_vars_[i].type) << " " - << func_call_vars_[i].var_name; + << func_call_vars_[i].expr_var_name; if (i != func_call_vars_.size() - 1) { ss << ","; } @@ -600,7 +594,7 @@ class ExprBuilder { std::stringstream ss; for (size_t i = 0; i < construct_params_.size(); ++i) { ss << data_type_2_string(construct_params_[i].type) << " " - << construct_params_[i].var_name << "_;" << std::endl; + << construct_params_[i].expr_var_name << ";" << std::endl; } return ss.str(); } @@ -608,23 +602,23 @@ class ExprBuilder { void make_var_name_unique(codegen::ParamConst& param_const) { std::unordered_set var_names; for (auto& param : construct_params_) { - auto res = var_names.insert(param.var_name); - CHECK(res.second) << "var name: " << param.var_name + auto res = var_names.insert(param.expr_var_name); + CHECK(res.second) << "var name: " << param.expr_var_name << " already exists, illegal state"; } for (auto& param : func_call_vars_) { - auto res = var_names.insert(param.var_name); - CHECK(res.second) << "var name: " << param.var_name + auto res = var_names.insert(param.expr_var_name); + CHECK(res.second) << "var name: " << param.expr_var_name << " already exists, illegal state"; } - auto cur_var_name = param_const.var_name; + auto cur_var_name = param_const.expr_var_name; int i = 0; while (var_names.find(cur_var_name) != var_names.end()) { - cur_var_name = param_const.var_name + "_" + std::to_string(i); + cur_var_name = param_const.expr_var_name + "_" + std::to_string(i); ++i; } - param_const.var_name = cur_var_name; - VLOG(10) << "make var name unique: " << param_const.var_name; + param_const.expr_var_name = cur_var_name; + VLOG(10) << "make var name unique: " << param_const.expr_var_name; } // this corresponding to the input params. diff --git a/flex/codegen/src/hqps/hqps_path_expand_builder.h b/flex/codegen/src/hqps/hqps_path_expand_builder.h index 7d7534869fae..269ae550f414 100644 --- a/flex/codegen/src/hqps/hqps_path_expand_builder.h +++ b/flex/codegen/src/hqps/hqps_path_expand_builder.h @@ -201,12 +201,11 @@ class PathExpandOpBuilder { if (params.tables().size() < 1) { throw std::runtime_error("no edge labels found"); } else if (params.tables().size() == 1) { - physical::PhysicalOpr::MetaData meta_data; // pass an empty meta_data, since we need no meta_data for std::tie(edge_expand_opt_name_, edge_expand_opt_) = BuildOneLabelEdgeExpandOpt(ctx_, direction_, params, dst_vertex_labels_, expand_opt, - meta_data); + meta_data_pb[0]); } else { // get the first meta_data if (meta_data_pb.size() < 1) { diff --git a/flex/codegen/src/hqps/hqps_project_builder.h b/flex/codegen/src/hqps/hqps_project_builder.h index ca15b7b5ecab..a0b06aab0acd 100644 --- a/flex/codegen/src/hqps/hqps_project_builder.h +++ b/flex/codegen/src/hqps/hqps_project_builder.h @@ -70,9 +70,6 @@ std::tuple concatenate_expr_built_result( } { std::stringstream ss; - if (func_construct_param_const.size() > 0) { - ss << ", "; - } for (size_t i = 0; i < func_construct_param_const.size(); ++i) { ss << func_construct_param_const[i].var_name; if (i != func_construct_param_const.size() - 1) { diff --git a/flex/codegen/src/hqps_generator.h b/flex/codegen/src/hqps_generator.h index 71d682a6a001..3bbe5524e2c5 100644 --- a/flex/codegen/src/hqps_generator.h +++ b/flex/codegen/src/hqps_generator.h @@ -280,7 +280,9 @@ class QueryGenerator { for (size_t i = 0; i < param_vars.size(); ++i) { if (i > 0 && param_vars[i].id == param_vars[i - 1].id) { // found duplicate - CHECK(param_vars[i] == param_vars[i - 1]); + CHECK(param_vars[i].var_name == param_vars[i - 1].var_name) + << " " << param_vars[i].var_name << " " + << param_vars[i - 1].var_name; continue; } else { ss << data_type_2_string(param_vars[i].type) << " " @@ -308,7 +310,9 @@ class QueryGenerator { for (size_t i = 0; i < param_vars.size(); ++i) { if (i > 0 && param_vars[i].id == param_vars[i - 1].id) { - CHECK(param_vars[i] == param_vars[i - 1]); + CHECK(param_vars[i].var_name == param_vars[i - 1].var_name) + << " " << i << " " << param_vars[i].var_name << " " + << param_vars[i - 1].var_name; continue; } else { auto& cur_param_var = param_vars[i]; diff --git a/flex/engines/hqps_db/core/null_record.h b/flex/engines/hqps_db/core/null_record.h index bfb298492cd8..fec59c3094b6 100644 --- a/flex/engines/hqps_db/core/null_record.h +++ b/flex/engines/hqps_db/core/null_record.h @@ -44,6 +44,11 @@ struct NullRecordCreator { static inline std::string_view GetNull() { return ""; } }; +template <> +struct NullRecordCreator { + static inline Direction GetNull() { return Direction::Unknown; } +}; + template <> struct NullRecordCreator { static inline Date GetNull() { diff --git a/flex/engines/hqps_db/core/operator/edge_expand.h b/flex/engines/hqps_db/core/operator/edge_expand.h index 77aaa87c8133..1f3ee240174c 100644 --- a/flex/engines/hqps_db/core/operator/edge_expand.h +++ b/flex/engines/hqps_db/core/operator/edge_expand.h @@ -1565,7 +1565,7 @@ class EdgeExpand { offset.reserve(cur_set.Size() + 1); size_t size = 0; offset.emplace_back(size); - std::vector>> + std::vector, Direction>> prop_tuples; prop_tuples.reserve(cur_set.Size() + 1); using adj_list_array_t = @@ -1590,7 +1590,6 @@ class EdgeExpand { adj_list_array_vec.emplace_back( std::make_pair(std::move(adj_list_array), Direction::In)); } - std::vector directions; auto cur_set_iter = cur_set.begin(); auto end_iter = cur_set.end(); @@ -1610,8 +1609,7 @@ class EdgeExpand { // TODO: better performance if (run_expr_filter(state.edge_filter_.expr_, props)) { prop_tuples.emplace_back( - std::make_tuple(src, edge.neighbor(), props)); - directions.emplace_back(direction); + std::make_tuple(src, edge.neighbor(), props, direction)); } } } @@ -1625,7 +1623,7 @@ class EdgeExpand { state.edge_label_}; SingleLabelEdgeSet> edge_set( std::move(prop_tuples), std::move(label_triplet), - std::vector{array_to_vec(prop_names)}, std::move(directions)); + std::vector{array_to_vec(prop_names)}); CHECK(offset.back() == edge_set.Size()) << "offset: " << offset.back() << ", " << edge_set.Size(); diff --git a/flex/engines/hqps_db/core/params.h b/flex/engines/hqps_db/core/params.h index b27fd8121168..1656048f4aab 100644 --- a/flex/engines/hqps_db/core/params.h +++ b/flex/engines/hqps_db/core/params.h @@ -354,7 +354,12 @@ enum JoinKind { LeftOuterJoin = 3, }; -enum Direction { Out = 0, In = 1, Both = 2 }; +enum Direction { + Out = 0, + In = 1, + Both = 2, + Unknown = 3, +}; enum VOpt { Start = 0, // The start vertex of current expanded edge. End = 1, // the ending vertex of this expanding. diff --git a/flex/engines/hqps_db/core/utils/props.h b/flex/engines/hqps_db/core/utils/props.h index 2ce201f621d8..dac58f4adfa8 100644 --- a/flex/engines/hqps_db/core/utils/props.h +++ b/flex/engines/hqps_db/core/utils/props.h @@ -380,7 +380,7 @@ class EdgeSetInnerIdGetter { auto& tuple = gs::get_from_tuple(all_ele); using cur_tuple_t = std::remove_const_t>; - if constexpr (std::tuple_size_v < cur_tuple_t >> 2) { + if constexpr ((std::tuple_size_v) > 2) { auto src_vid = std::get<0>(tuple); auto dst_vid = std::get<1>(tuple); return Edge(src_vid, dst_vid); diff --git a/flex/engines/hqps_db/database/mutable_csr_interface.h b/flex/engines/hqps_db/database/mutable_csr_interface.h index b98d55ba2d2b..1bb3a237836e 100644 --- a/flex/engines/hqps_db/database/mutable_csr_interface.h +++ b/flex/engines/hqps_db/database/mutable_csr_interface.h @@ -662,12 +662,13 @@ class MutableCSRInterface { edge_label_id); auto oe_csr = db_session_.graph().get_oe_csr(src_label_id, dst_label_id, edge_label_id); - auto size = 0; + size_t size = 0; for (size_t i = 0; i < vids.size(); ++i) { auto v = vids[i]; size += ie_csr->edge_iter(v)->size(); size += oe_csr->edge_iter(v)->size(); } + LOG(INFO) << "size: " << size; ret_v.reserve(size); ret_offset.reserve(vids.size() + 1); ret_offset.emplace_back(0); diff --git a/flex/engines/hqps_db/structures/multi_edge_set/flat_edge_set.h b/flex/engines/hqps_db/structures/multi_edge_set/flat_edge_set.h index 5a9757098d08..2362d6fd0561 100644 --- a/flex/engines/hqps_db/structures/multi_edge_set/flat_edge_set.h +++ b/flex/engines/hqps_db/structures/multi_edge_set/flat_edge_set.h @@ -589,7 +589,7 @@ class FlatEdgeSet { template class SingleLabelEdgeSetBuilder { public: - using ele_tuple_t = std::tuple; + using ele_tuple_t = std::tuple; using index_ele_tuple_t = std::tuple; using result_t = SingleLabelEdgeSet; @@ -599,44 +599,35 @@ class SingleLabelEdgeSetBuilder { static constexpr bool is_general_vertex_set_builder = false; SingleLabelEdgeSetBuilder(const std::array& label_triplet, - std::vector prop_names, - const std::vector& direction) - : label_triplet_(label_triplet), - prop_names_(prop_names), - old_direction_(direction) {} + std::vector prop_names) + : label_triplet_(label_triplet), prop_names_(prop_names) {} // There could be null record. - void Insert(const index_ele_tuple_t& tuple) { - vec_.push_back(std::get<1>(tuple)); - new_directions.push_back(old_direction_[std::get<0>(tuple)]); - } + void Insert(const index_ele_tuple_t& tuple) { Insert(std::get<1>(tuple)); } + + void Insert(const ele_tuple_t& ele_tuple) { vec_.push_back(ele_tuple); } result_t Build() { - return result_t(std::move(vec_), std::move(label_triplet_), prop_names_, - std::move(new_directions)); + return result_t(std::move(vec_), std::move(label_triplet_), prop_names_); } private: std::vector vec_; std::array label_triplet_; std::vector prop_names_; - const std::vector& old_direction_; - std::vector new_directions; }; template class SingleLabelEdgeSetIter { public: - using ele_tuple_t = std::tuple; + using ele_tuple_t = std::tuple; using self_type_t = SingleLabelEdgeSetIter; using index_ele_tuple_t = std::tuple; using data_tuple_t = ele_tuple_t; - SingleLabelEdgeSetIter(const std::vector& dir, - const std::vector& vec, size_t ind, + SingleLabelEdgeSetIter(const std::vector& vec, size_t ind, const std::array& label_triplet, const std::vector& prop_names) - : dir_(dir), - vec_(vec), + : vec_(vec), ind_(ind), label_triplet_(label_triplet), prop_names_(prop_names) {} @@ -722,8 +713,7 @@ class SingleLabelEdgeSetIter { inline const self_type_t* operator->() const { return this; } private: - inline Direction get_direction() const { return dir_[ind_]; } - const std::vector& dir_; + inline Direction get_direction() const { return std::get<3>(vec_[ind_]); } const std::vector& vec_; size_t ind_; const std::array& label_triplet_; @@ -734,7 +724,7 @@ class SingleLabelEdgeSetIter { template class SingleLabelEdgeSet { public: - using ele_tuple_t = std::tuple; + using ele_tuple_t = std::tuple; using index_ele_tuple_t = std::tuple; using iterator = SingleLabelEdgeSetIter; using self_type_t = SingleLabelEdgeSet; @@ -751,29 +741,17 @@ class SingleLabelEdgeSet { SingleLabelEdgeSet(std::vector&& vec, std::array&& label_triplet, - std::vector prop_names, Direction direction) + std::vector prop_names) : vec_(std::move(vec)), label_triplet_(std::move(label_triplet)), - prop_names_(prop_names) { - directions_.resize(vec_.size(), direction); - } - - SingleLabelEdgeSet(std::vector&& vec, - std::array&& label_triplet, - std::vector prop_names, - std::vector&& direction) - : vec_(std::move(vec)), - label_triplet_(std::move(label_triplet)), - prop_names_(prop_names), - directions_(std::move(direction)) {} + prop_names_(prop_names) {} iterator begin() const { - return iterator(directions_, vec_, 0, label_triplet_, prop_names_); + return iterator(vec_, 0, label_triplet_, prop_names_); } iterator end() const { - return iterator(directions_, vec_, vec_.size(), label_triplet_, - prop_names_); + return iterator(vec_, vec_.size(), label_triplet_, prop_names_); } std::vector GetLabelVec() const { @@ -794,8 +772,7 @@ class SingleLabelEdgeSet { auto cur_ind_ele = std::get(index_ele_tuple[i]); res.emplace_back(std::get<1>(cur_ind_ele)); } - return SingleLabelEdgeSet(std::move(res), label_triplet_, prop_names_, - directions_); + return SingleLabelEdgeSet(std::move(res), label_triplet_, prop_names_); } // we assume edata_t is tuple @@ -909,9 +886,8 @@ class SingleLabelEdgeSet { } auto copy_label_triplet = label_triplet_; - auto copied_directions = directions_; return self_type_t(std::move(new_vec), std::move(copy_label_triplet), - prop_names_, std::move(copied_directions)); + prop_names_); } void Repeat(std::vector& cur_offset, @@ -940,14 +916,13 @@ class SingleLabelEdgeSet { } builder_t CreateBuilder() const { - return builder_t(label_triplet_, prop_names_, directions_); + return builder_t(label_triplet_, prop_names_); } private: std::vector vec_; std::array label_triplet_; std::vector prop_names_; - std::vector directions_; }; } // namespace gs diff --git a/flex/resources/queries/ic/stored_procedure/ic1.cypher b/flex/resources/queries/ic/stored_procedure/ic1.cypher new file mode 100644 index 000000000000..d79a8d0e68cd --- /dev/null +++ b/flex/resources/queries/ic/stored_procedure/ic1.cypher @@ -0,0 +1,45 @@ +MATCH (p: PERSON{id: $personId}) -[k:KNOWS*1..4]-(f: PERSON {firstName: $firstName}) +OPTIONAL MATCH (f: PERSON)-[workAt:WORKAT]->(company:ORGANISATION)-[:ISLOCATEDIN]->(country:PLACE) +OPTIONAL MATCH (f: PERSON)-[studyAt:STUDYAT]->(university)-[:ISLOCATEDIN]->(universityCity:PLACE) +MATCH (f:PERSON)-[:ISLOCATEDIN]->(locationCity:PLACE) +WHERE + p <> f +with + f AS f, + company, + university, + workAt, + country, + studyAt, + universityCity, + locationCity, + length(k) as len +with f AS f, company, university, workAt, country, studyAt, universityCity, locationCity, min(len) as distance +ORDER BY distance ASC, f.lastName ASC, f.id ASC +LIMIT 20 + +WITH + f, distance, locationCity, +CASE + WHEN company is null Then null + ELSE [company.name, workAt.workFrom, country.name] +END as companies, +CASE + WHEN university is null Then null + ELSE [university.name, studyAt.classYear, universityCity.name] +END as universities +WITH f, distance, locationCity, collect(companies) as company_info, collect(universities) as university_info + +return f.id AS friendId, + f.lastName AS friendLastName, + distance AS distanceFromPerson, + f.birthday AS friendBirthday, + f.creationDate AS friendCreationDate, + f.gender AS friendGender, + f.browserUsed AS friendBrowserUsed, + f.locationIP AS friendLocationIp, + f.email AS friendEmail, + f.language AS friendLanguage, + locationCity.name AS friendCityName, + university_info AS friendUniversities, + company_info AS friendCompanies; \ No newline at end of file diff --git a/flex/resources/queries/ic/stored_procedure/ic10.cypher b/flex/resources/queries/ic/stored_procedure/ic10.cypher new file mode 100644 index 000000000000..7dd97b72f78f --- /dev/null +++ b/flex/resources/queries/ic/stored_procedure/ic10.cypher @@ -0,0 +1,26 @@ +MATCH (person:PERSON {id: $personId})-[:KNOWS*2..3]-(friend: PERSON)-[:ISLOCATEDIN]->(city:PLACE) +OPTIONAL MATCH (friend : PERSON)<-[:HASCREATOR]-(post:POST) +OPTIONAL MATCH (friend)<-[:HASCREATOR]-(post1:POST)-[:HASTAG]->(tag:TAG)<-[:HASINTEREST]-(person: PERSON) +WHERE NOT friend=person + AND NOT (friend:PERSON)-[:KNOWS]-(person :PERSON {id: $personId}) +WITH + person, city, friend, post, post1, friend.birthday as birthday + +WHERE (birthday.month=$month AND birthday.day>=21) OR + (birthday.month=($month + 1) AND birthday.day<22) + +WITH + friend, + city, + count(distinct post) as postCount, + count(distinct post1) as commonPostCount + +RETURN + friend.id AS personId, + friend.firstName AS personFirstName, + friend.lastName AS personLastName, + commonPostCount - (postCount - commonPostCount) AS commonInterestScore, + friend.gender AS personGender, + city.name AS personCityName +ORDER BY commonInterestScore DESC, personId ASC +LIMIT 10; \ No newline at end of file diff --git a/flex/resources/queries/ic/stored_procedure/ic11.cypher b/flex/resources/queries/ic/stored_procedure/ic11.cypher index f83dbb6e0cab..47881e1c1838 100644 --- a/flex/resources/queries/ic/stored_procedure/ic11.cypher +++ b/flex/resources/queries/ic/stored_procedure/ic11.cypher @@ -1,4 +1,18 @@ -MATCH (p:PERSON {id: $personId})-[:KNOWS*1..3]-(friend:PERSON)-[wa:workAt]->(com:organisation)-[:ISLOCATEDIN]->(:PLACE {name: $countryName}) -WHERE p <> friend and wa.workFrom < $workFromYear with distinct friend as friend, com AS com, wa.workFrom as organizationWorkFromYear -ORDER BY organizationWorkFromYear ASC, friend.id ASC, com.name DESC LIMIT 10 return friend.id AS personId, friend.firstName AS personFirstName, -friend.lastName AS personLastName, com.name as organizationName, organizationWorkFromYear as organizationWorkFromYear \ No newline at end of file +MATCH (p:PERSON {id: $personId})-[:KNOWS*1..3]-(friend:PERSON)-[wa:WORKAT]->(com:ORGANISATION)-[:ISLOCATEDIN]->(:PLACE {name: $countryName}) +WHERE + p <> friend + AND wa.workFrom < $workFromYear +WITH DISTINCT + friend as friend, + com AS com, + wa.workFrom as organizationWorkFromYear +ORDER BY + organizationWorkFromYear ASC, + friend.id ASC, com.name DESC +LIMIT 10 +return + friend.id AS personId, + friend.firstName AS personFirstName, + friend.lastName AS personLastName, + com.name as organizationName, + organizationWorkFromYear as organizationWorkFromYear; \ No newline at end of file diff --git a/flex/resources/queries/ic/stored_procedure/ic12.cypher b/flex/resources/queries/ic/stored_procedure/ic12.cypher index b2bf0425b073..a7f9ef3ecc35 100644 --- a/flex/resources/queries/ic/stored_procedure/ic12.cypher +++ b/flex/resources/queries/ic/stored_procedure/ic12.cypher @@ -1,3 +1,16 @@ -MATCH (unused:PERSON {id: $personId})-[:KNOWS]-(friend:PERSON)<-[:HASCREATOR]-(comments:COMMENT)-[:REPLYOF]->(:POST)-[:HASTAG]->(tags:TAG)-[:hasType]->(:TAGClass)-[:isSubclassOf*0..10]->(:TAGClass {name: $tagClassName}) - with friend AS friend, collect(DISTINCT tags.name) AS tagNames, count(DISTINCT comments) AS replyCount - ORDER BY replyCount DESC, friend.id ASC LIMIT 20 return friend.id AS personId, friend.firstName AS personFirstName, friend.lastName AS personLastName, tagNames, replyCount \ No newline at end of file +MATCH + (unused:PERSON {id: $personId })-[:KNOWS]-(friend:PERSON)<-[:HASCREATOR]-(comments:COMMENT)-[:REPLYOF]->(:POST)-[:HASTAG]->(tags:TAG)-[:HASTYPE]->(:TAGCLASS)-[:ISSUBCLASSOF*0..10]->(:TAGCLASS {name: $tagClassName}) +WITH + friend AS friend, + collect(DISTINCT tags.name) AS tagNames, + count(DISTINCT comments) AS replyCount +ORDER BY + replyCount DESC, + friend.id ASC +LIMIT 20 +RETURN + friend.id AS personId, + friend.firstName AS personFirstName, + friend.lastName AS personLastName, + tagNames, + replyCount \ No newline at end of file diff --git a/flex/resources/queries/ic/stored_procedure/ic2.cypher b/flex/resources/queries/ic/stored_procedure/ic2.cypher index 179ff812a2e8..fe81f1d31385 100644 --- a/flex/resources/queries/ic/stored_procedure/ic2.cypher +++ b/flex/resources/queries/ic/stored_procedure/ic2.cypher @@ -1,4 +1,17 @@ -MATCH (p :PERSON {id: $personId })-[:KNOWS]-(friend:PERSON)<-[:HASCREATOR]-(message : POST | COMMENT) -WHERE message.creationDate < $maxDate WITH friend, message ORDER BY message.creationDate DESC, message.id ASC -LIMIT 20 return friend.id AS personId, friend.firstName AS personFirstName, friend.lastName AS personLastName, message.id AS postOrCommentId, -message.content AS content,message.imageFile AS imageFile,message.creationDate AS postOrCommentCreationDate \ No newline at end of file +MATCH (p :PERSON {id: $personId})-[:KNOWS]-(friend:PERSON)<-[:HASCREATOR]-(message : POST | COMMENT) +WHERE + message.creationDate < $maxDate +WITH + friend, + message +ORDER BY + message.creationDate DESC, + message.id ASC LIMIT 20 +return + friend.id AS personId, + friend.firstName AS personFirstName, + friend.lastName AS personLastName, + message.id AS postOrCommentId, + message.content AS content, + message.imageFile AS imageFile, + message.creationDate AS postOrCommentCreationDate; \ No newline at end of file diff --git a/flex/resources/queries/ic/stored_procedure/ic3.cypher b/flex/resources/queries/ic/stored_procedure/ic3.cypher index 475fe2c2a285..84bd2a255918 100644 --- a/flex/resources/queries/ic/stored_procedure/ic3.cypher +++ b/flex/resources/queries/ic/stored_procedure/ic3.cypher @@ -1 +1,25 @@ -MATCH (countryX:PLACE {name: $countryXName})<-[:ISLOCATEDIN]-(messageX : POST | COMMENT)-[:HASCREATOR]->(otherP:PERSON),(countryY:PLACE {name: $countryYName})<-[:ISLOCATEDIN]-(messageY: POST | COMMENT)-[:HASCREATOR]->(otherP:PERSON),(otherP:PERSON)-[:ISLOCATEDIN]->(city:PLACE)-[:ISPARTOF]->(countryCity:PLACE),(p:PERSON {id:$personId})-[:KNOWS*1..3]-(otherP:PERSON) WHERE messageX.creationDate >= $startDate and messageX.creationDate < $endDate AND messageY.creationDate >= $startDate and messageY.creationDate < $endDate AND countryCity.name <> $countryXName and countryCity.name <> $countryYName WITH otherP, count(messageX) as xCount, count(messageY) as yCount RETURN otherP.id as id,otherP.firstName as firstName, otherP.lastName as lastName, xCount, yCount, xCount + yCount as total ORDER BY total DESC, id ASC LIMIT 20 +MATCH + (countryX: PLACE {name: $countryXName })<-[:ISLOCATEDIN]-(messageX : POST | COMMENT)-[:HASCREATOR]->(otherP:PERSON), + (countryY: PLACE {name: $countryYName })<-[:ISLOCATEDIN]-(messageY : POST | COMMENT)-[:HASCREATOR]->(otherP:PERSON), + (otherP:PERSON)-[:ISLOCATEDIN]->(city:PLACE)-[:ISPARTOF]->(countryCity:PLACE), + (p:PERSON {id: $personId})-[:KNOWS*1..3]-(otherP:PERSON) +WHERE + otherP <> p + AND messageX.creationDate >= $startDate + AND messageX.creationDate < $endDate + AND messageY.creationDate >= $startDate + AND messageY.creationDate < $endDate + AND countryCity.name <> $countryXName + AND countryCity.name <> $countryYName +WITH + otherP, + count(messageX) as xCount, + count(messageY) as yCount +RETURN + otherP.id as id, + otherP.firstName as firstName, + otherP.lastName as lastName, + xCount, + yCount, + xCount + yCount as total +ORDER BY total DESC, id ASC LIMIT 20; \ No newline at end of file diff --git a/flex/resources/queries/ic/stored_procedure/ic4.cypher b/flex/resources/queries/ic/stored_procedure/ic4.cypher new file mode 100644 index 000000000000..096916c2f243 --- /dev/null +++ b/flex/resources/queries/ic/stored_procedure/ic4.cypher @@ -0,0 +1,17 @@ +MATCH (person:PERSON {id: $personId})-[:KNOWS]-(friend:PERSON)<-[:HASCREATOR]-(post:POST)-[:HASTAG]->(tag: TAG) +WITH DISTINCT tag, post +WITH tag, + CASE + WHEN post.creationDate < $endDate AND post.creationDate >= $startDate THEN 1 + ELSE 0 + END AS valid, + CASE + WHEN $startDate > post.creationDate THEN 1 + ELSE 0 + END AS inValid +WITH tag, sum(valid) AS postCount, sum(inValid) AS inValidPostCount +WHERE postCount>0 AND inValidPostCount=0 + +RETURN tag.name AS tagName, postCount +ORDER BY postCount DESC, tagName ASC +LIMIT 10; \ No newline at end of file diff --git a/flex/resources/queries/ic/stored_procedure/ic5.cypher b/flex/resources/queries/ic/stored_procedure/ic5.cypher index f30503ba9f4f..e44e08361416 100644 --- a/flex/resources/queries/ic/stored_procedure/ic5.cypher +++ b/flex/resources/queries/ic/stored_procedure/ic5.cypher @@ -1 +1,16 @@ -MATCH (p:PERSON {id: $personId})-[k:KNOWS*1..3]-(other:PERSON)<-[hasMem:HASMEMBER]-(f:FORUM), (f:FORUM)-[:CONTAINEROF]->(po:POST)-[:HASCREATOR]->(other:PERSON) WHERE hasMem.joinDate > $minDate WITH f as f, count(distinct po) AS postCount ORDER BY postCount DESC, f.id ASC LIMIT 20 RETURN f.title as title, postCount \ No newline at end of file +MATCH (person:PERSON { id: $personId })-[:KNOWS*1..2]-(friend) +MATCH (friend)<-[membership:HASMEMBER]-(forum) +WHERE membership.joinDate > $minDate +OPTIONAL MATCH (friend)<-[:HASCREATOR]-(post)<-[:CONTAINEROF]-(forum) +WHERE + NOT person=friend +WITH + forum, + count(distinct post) AS postCount +ORDER BY + postCount DESC, + forum.id ASC +LIMIT 20 +RETURN + forum.title AS forumName, + postCount; \ No newline at end of file diff --git a/flex/resources/queries/ic/stored_procedure/ic6.cypher b/flex/resources/queries/ic/stored_procedure/ic6.cypher index 2d91e813afa9..125a4e5c6a55 100644 --- a/flex/resources/queries/ic/stored_procedure/ic6.cypher +++ b/flex/resources/queries/ic/stored_procedure/ic6.cypher @@ -1,2 +1,13 @@ -MATCH (p_:PERSON {id:$personId})-[:KNOWS*1..3]-(other:PERSON)<-[:HASCREATOR]-(p:POST)-[:HASTAG]->(t:TAG {name:$tagName}),(p:POST)-[:HASTAG]->(otherTag:TAG) -WHERE otherTag <> t RETURN otherTag.name as name, count(distinct p) as postCnt ORDER BY postCnt desc, name asc LIMIT 10 \ No newline at end of file +MATCH + (p_:PERSON {id: $personId})-[:KNOWS*1..3]-(other:PERSON)<-[:HASCREATOR]-(p:POST)-[:HASTAG]->(t:TAG {name: $tagName}), + (p:POST)-[:HASTAG]->(otherTag:TAG) +WHERE + p_ <> other AND + otherTag <> t +RETURN + otherTag.name as name, + count(distinct p) as postCnt +ORDER BY + postCnt desc, + name asc +LIMIT 10; \ No newline at end of file diff --git a/flex/resources/queries/ic/stored_procedure/ic7.cypher b/flex/resources/queries/ic/stored_procedure/ic7.cypher new file mode 100644 index 000000000000..083f86ab22f4 --- /dev/null +++ b/flex/resources/queries/ic/stored_procedure/ic7.cypher @@ -0,0 +1,23 @@ +MATCH (person:PERSON {id: $personId})<-[:HASCREATOR]-(message: POST | COMMENT)<-[like:LIKES]-(liker:PERSON) +OPTIONAL MATCH (liker: PERSON)-[k:KNOWS]-(person: PERSON {id: $personId}) +WITH liker, message, like.creationDate AS likeTime, person, + CASE + WHEN k is null THEN true + ELSE false + END AS isNew +ORDER BY likeTime DESC, message.id ASC +WITH liker, person, head(collect(message)) as message, head(collect(likeTime)) AS likeTime, isNew +RETURN + liker.id AS personId, + liker.firstName AS personFirstName, + liker.lastName AS personLastName, + likeTime AS likeCreationDate, + message.id AS commentOrPostId, + message.content AS messageContent, + message.imageFile AS messageImageFile, + (likeTime - message.creationDate)/1000/60 AS minutesLatency, + isNew +ORDER BY + likeCreationDate DESC, + personId ASC +LIMIT 20; \ No newline at end of file diff --git a/flex/resources/queries/ic/stored_procedure/ic8.cypher b/flex/resources/queries/ic/stored_procedure/ic8.cypher index 1be53b901ec9..820b3030df13 100644 --- a/flex/resources/queries/ic/stored_procedure/ic8.cypher +++ b/flex/resources/queries/ic/stored_procedure/ic8.cypher @@ -1,2 +1,14 @@ -MATCH(p:PERSON {id: $personId}) <-[:HASCREATOR] -(msg : POST | COMMENT) <- [:REPLYOF] - (cmt: COMMENT) - [:HASCREATOR] -> (author : PERSON) -with p, msg, cmt, author ORDER BY cmt.creationDate DESC, cmt.id ASC limit 20 return author.id, author.firstName, author.lastName, cmt.creationDate, cmt.id, cmt.content \ No newline at end of file +MATCH(p:PERSON {id: $personId}) <-[:HASCREATOR] -(msg : POST | COMMENT) <- [:REPLYOF] - (cmt: COMMENT) - [:HASCREATOR] -> (author : PERSON) +WITH + p, msg, cmt, author +ORDER BY + cmt.creationDate DESC, + cmt.id ASC +LIMIT 20 +RETURN + author.id, + author.firstName, + author.lastName, + cmt.creationDate, + cmt.id, + cmt.content \ No newline at end of file diff --git a/flex/resources/queries/ic/stored_procedure/ic9.cypher b/flex/resources/queries/ic/stored_procedure/ic9.cypher index 099127ad30c0..ea15939a625e 100644 --- a/flex/resources/queries/ic/stored_procedure/ic9.cypher +++ b/flex/resources/queries/ic/stored_procedure/ic9.cypher @@ -1,2 +1,18 @@ -MATCH (p:PERSON {id: $personId})-[:KNOWS*1..3]-(friend:PERSON)<-[:HASCREATOR]-(message : POST | COMMENT) -WHERE friend <> p and message.creationDate < $maxDate with friend,message ORDER BY message.creationDate DESC, message.id ASC LIMIT 20 RETURN friend.id AS personId, friend.firstName AS personFirstName, friend.lastName AS personLastName, message.id AS commentOrPostId, message.content AS messageContent, message.imageFile AS messageImageFile, message.creationDate AS commentOrPostCreationDate \ No newline at end of file +MATCH (p:PERSON {id: $personId})-[:KNOWS*1..3]-(friend:PERSON)<-[:HASCREATOR]-(message : POST | COMMENT) +WHERE + friend <> p + and message.creationDate < $maxDate +with + friend,message +ORDER BY + message.creationDate DESC, + message.id ASC +LIMIT 20 +RETURN + friend.id AS personId, + friend.firstName AS personFirstName, + friend.lastName AS personLastName, + message.id AS commentOrPostId, + message.content AS messageContent, + message.imageFile AS messageImageFile, + message.creationDate AS commentOrPostCreationDate \ No newline at end of file diff --git a/flex/tests/hqps/engine_config_test_cbo.yaml b/flex/tests/hqps/engine_config_test_cbo.yaml new file mode 100644 index 000000000000..e180f341d1e0 --- /dev/null +++ b/flex/tests/hqps/engine_config_test_cbo.yaml @@ -0,0 +1,50 @@ +directories: + workspace: /tmp/interactive_workspace + subdirs: + data: data + logs: logs + conf: conf +log_level: INFO +default_graph: ldbc +compute_engine: + type: hiactor + workers: + - localhost:10000 + thread_num_per_worker: 1 + store: + type: cpp-mcsr +compiler: + physical: + opt: + config: proto # default is ffi + planner: + is_on: true + opt: CBO + rules: + - FilterIntoJoinRule + - FilterMatchRule + - NotMatchToAntiJoinRule + - ExtendIntersectRule + - ExpandGetVFusionRule + meta: + reader: + schema: + uri: http://localhost:7777/v1/service/status + interval: 1000 # ms + statistics: + uri: http://localhost:7777/v1/graph/%s/statistics + interval: 86400000 # ms + endpoint: + default_listen_address: localhost + bolt_connector: + disabled: false + port: 7687 + gremlin_connector: + disabled: false + port: 8182 + query_timeout: 40000 + gremlin_script_language_name: antlr_gremlin_calcite +http_service: + default_listen_address: localhost + admin_port: 7777 + query_port: 10000 \ No newline at end of file diff --git a/flex/tests/hqps/hqps_codegen_test.sh b/flex/tests/hqps/hqps_codegen_test.sh new file mode 100644 index 000000000000..c0eedade8655 --- /dev/null +++ b/flex/tests/hqps/hqps_codegen_test.sh @@ -0,0 +1,134 @@ +#!/bin/bash +# Copyright 2020 Alibaba Group Holding Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +set -e +SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) +FLEX_HOME=${SCRIPT_DIR}/../../ +SERVER_BIN=${FLEX_HOME}/build/bin/interactive_server +GIE_HOME=${FLEX_HOME}/../interactive_engine/ +CODEGEN_SCRIPT=${FLEX_HOME}/bin/load_plan_and_gen.sh +LDBC_STATISTICS=${GIE_HOME}/compiler/src/test/resources/statistics/ldbc1_statistics.json +ADMIN_PORT=7777 +QUERY_PORT=10000 + +if [ ! $# -eq 3 ]; then + echo "only receives: $# args, need 3" + echo "Usage: $0 " + exit 1 +fi + +INTERACTIVE_WORKSPACE=$1 +RBO_ENGINE_CONFIG_PATH=$2 +CBO_ENGINE_CONFIG_PATH=$3 +if [ ! -d ${INTERACTIVE_WORKSPACE} ]; then + echo "INTERACTIVE_WORKSPACE: ${INTERACTIVE_WORKSPACE} not exists" + exit 1 +fi +if [ ! -f ${RBO_ENGINE_CONFIG_PATH} ]; then + echo "ENGINE_CONFIG: ${RBO_ENGINE_CONFIG_PATH} not exists" + exit 1 +fi +if [ ! -f ${CBO_ENGINE_CONFIG_PATH} ]; then + echo "ENGINE_CONFIG: ${CBO_ENGINE_CONFIG_PATH} not exists" + exit 1 +fi + + +RED='\033[0;31m' +GREEN='\033[0;32m' +NC='\033[0m' # No Color +err() { + echo -e "${RED}[$(date +'%Y-%m-%d %H:%M:%S')] -ERROR- $* ${NC}" >&2 +} + +info() { + echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')] -INFO- $* ${NC}" +} + + +test_codegen_on_ldbc_cbo(){ + # we need to start engine service first for cbo test, since statistics is needed + # failed and reason: + # 1. PathExpand output Path with Both Vertex and Edges + for i in 2 3 4 5 6 8 9 10 11 12; + # 7 is not supported now + do + cmd="${CODEGEN_SCRIPT} -e=hqps -i=${FLEX_HOME}/resources/queries/ic/stored_procedure/ic${i}.cypher -w=/tmp/codegen/" + cmd=${cmd}" -o=/tmp/plugin --ir_conf=${CBO_ENGINE_CONFIG_PATH} " + cmd=${cmd}" --graph_schema_path=${INTERACTIVE_WORKSPACE}/data/ldbc/graph.yaml" + cmd=${cmd}" --statistic_path=${LDBC_STATISTICS}" + echo $cmd + eval ${cmd} || exit 1 + done +} + +test_codegen_on_ldbc_rbo(){ + sed -i 's/default_graph: modern_graph/default_graph: ldbc/g' ${RBO_ENGINE_CONFIG_PATH} + for i in 1 2 3 4 5 6 7 8 9 10 11 12; + do + cmd="${CODEGEN_SCRIPT} -e=hqps -i=${FLEX_HOME}/resources/queries/ic/adhoc/ic${i}_adhoc.cypher -w=/tmp/codegen/" + cmd=${cmd}" -o=/tmp/plugin --ir_conf=${RBO_ENGINE_CONFIG_PATH} " + cmd=${cmd}" --graph_schema_path=${INTERACTIVE_WORKSPACE}/data/ldbc/graph.yaml" + echo $cmd + eval ${cmd} || exit 1 + done + for i in 1 2 3 4 5 6 7 8 9 11 12; # 10 is not supported now + do + cmd="${CODEGEN_SCRIPT} -e=hqps -i=${FLEX_HOME}/resources/queries/ic/adhoc/simple_match_${i}.cypher -w=/tmp/codegen/" + cmd=${cmd}" -o=/tmp/plugin --ir_conf=${RBO_ENGINE_CONFIG_PATH} " + cmd=${cmd}" --graph_schema_path=${INTERACTIVE_WORKSPACE}/data/ldbc/graph.yaml" + echo $cmd + eval ${cmd} || exit 1 + done + sed -i 's/default_graph: ldbc/default_graph: modern_graph/g' ${RBO_ENGINE_CONFIG_PATH} +} + +test_codegen_on_movie_rbo(){ + # test movie graph, 8,9,10 are not supported now + # change the default_graph config in ../tests/hqps/engine_config_test.yaml to movies + sed -i 's/default_graph: modern_graph/default_graph: movies/g' ${RBO_ENGINE_CONFIG_PATH} + for i in 1 2 3 4 5 6 7 11 12 13 14 15; + do + cmd="${CODEGEN_SCRIPT} -e=hqps -i=${FLEX_HOME}/tests/hqps/queries/movie/query${i}.cypher -w=/tmp/codegen/" + cmd=${cmd}" -o=/tmp/plugin --ir_conf=${RBO_ENGINE_CONFIG_PATH} " + cmd=${cmd}" --graph_schema_path=${INTERACTIVE_WORKSPACE}/data/movies/graph.yaml" + echo $cmd + eval ${cmd} || exit 1 + done + sed -i 's/default_graph: movies/default_graph: modern_graph/g' ${RBO_ENGINE_CONFIG_PATH} +} + +test_codegen_on_graph_algo(){ + sed -i 's/default_graph: modern_graph/default_graph: graph_algo/g' ${RBO_ENGINE_CONFIG_PATH} + cypher_files=$(ls ${GITHUB_WORKSPACE}/flex/interactive/examples/graph_algo/*.cypher) + for cypher_file in ${cypher_files}; + do + cmd="${CODEGEN_SCRIPT} -e=hqps -i=${cypher_file} -w=/tmp/codegen/" + cmd=${cmd}" -o=/tmp/plugin --ir_conf=${RBO_ENGINE_CONFIG_PATH} " + cmd=${cmd}" --graph_schema_path=${INTERACTIVE_WORKSPACE}/data/graph_algo/graph.yaml" + cmd=${cmd}" --procedure_name=$(basename ${cypher_file} .cypher)" + cmd=${cmd}" --procedure_desc=\"This is test procedure, change the description if needed.\"" + echo $cmd + eval ${cmd} || exit 1 + done + sed -i 's/default_graph: graph_algo/default_graph: modern_graph/g' ${RBO_ENGINE_CONFIG_PATH} +} + +test_codegen_on_ldbc_cbo +test_codegen_on_ldbc_rbo +test_codegen_on_movie_rbo +test_codegen_on_graph_algo + +rm -rf /tmp/codegen +rm -rf /tmp/plugin \ No newline at end of file diff --git a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/config/YamlConfigs.java b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/config/YamlConfigs.java index 4bffe88c5eb5..e55682a86cdb 100644 --- a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/config/YamlConfigs.java +++ b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/config/YamlConfigs.java @@ -81,6 +81,15 @@ public class YamlConfigs extends Configs { return "cpp-mcsr"; } }) + .put( + "graph.physical.opt", + (Configs configs) -> { + if (configs.get("compiler.physical.opt.config") != null) { + return configs.get("compiler.physical.opt.config"); + } else { + return "ffi"; // default proto + } + }) .put( "pegasus.worker.num", (Configs configs) -> {