Skip to content

Commit

Permalink
fix(interactive): Add support for CBO on Interactive and fix some cod…
Browse files Browse the repository at this point in the history
…gen errors (#3660)

- Fix codegen error for project with expressions.
- Add sample cypher queries for ldbc interactive complex query 1-12.
- Add support for CBO on Interactive. But most of the tests are run on
RBO.
  • Loading branch information
zhanglei1949 authored Aug 26, 2024
1 parent 27c8725 commit 3a99cc1
Show file tree
Hide file tree
Showing 30 changed files with 551 additions and 161 deletions.
53 changes: 2 additions & 51 deletions .github/workflows/interactive.yml
Original file line number Diff line number Diff line change
Expand Up @@ -200,59 +200,10 @@ jobs:
- name: Run codegen test.
env:
GS_TEST_DIR: ${{ github.workspace }}/gstest
HOME : /home/graphscope/
INTERACTIVE_WORKSPACE: /tmp/interactive_workspace
run: |
cd ${GITHUB_WORKSPACE}/flex/bin
sed -i 's/default_graph: modern_graph/default_graph: ldbc/g' ../tests/hqps/engine_config_test.yaml
for i in 1 2 3 4 5 6 7 8 9 10 11 12;
do
cmd="./load_plan_and_gen.sh -e=hqps -i=../resources/queries/ic/adhoc/ic${i}_adhoc.cypher -w=/tmp/codegen/"
cmd=${cmd}" -o=/tmp/plugin --ir_conf=../tests/hqps/engine_config_test.yaml "
cmd=${cmd}" --graph_schema_path=${INTERACTIVE_WORKSPACE}/data/ldbc/graph.yaml"
echo $cmd
eval ${cmd} || exit 1
done
for i in 1 2 3 4 5 6 7 8 9 11 12; # 10 is not supported now
do
cmd="./load_plan_and_gen.sh -e=hqps -i=../resources/queries/ic/adhoc/simple_match_${i}.cypher -w=/tmp/codegen/"
cmd=${cmd}" -o=/tmp/plugin --ir_conf=../tests/hqps/engine_config_test.yaml "
cmd=${cmd}" --graph_schema_path=${INTERACTIVE_WORKSPACE}/data/ldbc/graph.yaml"
echo $cmd
eval ${cmd} || exit 1
done
# test movie graph, 8,9,10 are not supported now
# change the default_graph config in ../tests/hqps/engine_config_test.yaml to movies
sed -i 's/default_graph: ldbc/default_graph: movies/g' ../tests/hqps/engine_config_test.yaml
for i in 1 2 3 4 5 6 7 11 12 13 14 15;
do
cmd="./load_plan_and_gen.sh -e=hqps -i=../tests/hqps/queries/movie/query${i}.cypher -w=/tmp/codegen/"
cmd=${cmd}" -o=/tmp/plugin --ir_conf=../tests/hqps/engine_config_test.yaml "
cmd=${cmd}" --graph_schema_path=${INTERACTIVE_WORKSPACE}/data/movies/graph.yaml"
echo $cmd
eval ${cmd} || exit 1
done
# test graph_algo plugin building
sed -i 's/default_graph: movies/default_graph: graph_algo/g' ../tests/hqps/engine_config_test.yaml
cypher_files=$(ls ${GITHUB_WORKSPACE}/flex/interactive/examples/graph_algo/*.cypher)
for cypher_file in ${cypher_files};
do
cmd="./load_plan_and_gen.sh -e=hqps -i=${cypher_file} -w=/tmp/codegen/"
cmd=${cmd}" -o=/tmp/plugin --ir_conf=../tests/hqps/engine_config_test.yaml "
cmd=${cmd}" --graph_schema_path=${INTERACTIVE_WORKSPACE}/data/graph_algo/graph.yaml"
cmd=${cmd}" --procedure_name=$(basename ${cypher_file} .cypher)"
cmd=${cmd}" --procedure_desc=\"This is test procedure, change the description if needed.\""
echo $cmd
eval ${cmd} || exit 1
done
#set back
sed -i 's/default_graph: graph_algo/default_graph: ldbc/g' ../tests/hqps/engine_config_test.yaml
rm -rf /tmp/codegen
cd ${GITHUB_WORKSPACE}/flex/tests/hqps
bash hqps_codegen_test.sh ${INTERACTIVE_WORKSPACE} ./engine_config_test.yaml ./engine_config_test_cbo.yaml
- name: Test cypher&cpp procedure generation and loading
env:
Expand Down
32 changes: 26 additions & 6 deletions flex/bin/load_plan_and_gen.sh
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,10 @@ query: |


cypher_to_plan() {
if [ $# -ne 8 ]; then
if [ $# -lt 8 ] || [ $# -gt 9 ]; then
echo "Usage: cypher_to_plan <query_name> <input_file> <output_plan file>"
echo " <output_yaml_file> <ir_compiler_properties> <graph_schema_path>"
echo " <procedure_name> <procedure_description>"
echo " <procedure_name> <procedure_description> [statistic_path]"
echo " but receive: "$#
exit 1
fi
Expand All @@ -123,6 +123,11 @@ cypher_to_plan() {
# get procedure_name and procedure_description
procedure_name=$7
procedure_description=$8
if [ $# -eq 9 ]; then
statistic_path=$9
else
statistic_path=""
fi

# find java executable
info "IR compiler properties = ${ir_compiler_properties}"
Expand Down Expand Up @@ -172,6 +177,9 @@ EOM
cmd="java -cp ${COMPILER_LIB_DIR}/*:${COMPILER_JAR}"
cmd="${cmd} -Dgraph.schema=${graph_schema_path}"
cmd="${cmd} -Djna.library.path=${IR_CORE_LIB_DIR}"
if [ ! -z ${statistic_path} ]; then
cmd="${cmd} -Dgraph.statistics=${statistic_path}"
fi
cmd="${cmd} com.alibaba.graphscope.common.ir.tools.GraphPlanner ${ir_compiler_properties} ${real_input_path} ${real_output_path} ${real_output_yaml} ${extra_arg_config_file}"
info "running physical plan generation with ${cmd}"
eval ${cmd}
Expand All @@ -192,9 +200,9 @@ EOM

compile_hqps_so() {
#check input params size eq 2 or 3
if [ $# -gt 7 ] || [ $# -lt 4 ]; then
if [ $# -gt 8 ] || [ $# -lt 4 ]; then
echo "Usage: $0 <input_file> <work_dir> <ir_compiler_properties_file> <graph_schema_file> "
echo " [output_dir] [stored_procedure_name] [stored_procedure_description]"
echo " [statistic_path] [output_dir] [stored_procedure_name] [stored_procedure_description]"
exit 1
fi
input_path=$1
Expand All @@ -219,13 +227,20 @@ compile_hqps_so() {
procedure_description=""
fi

if [ $# -ge 8 ]; then
statistic_path=$8
else
statistic_path=""
fi

info "Input path = ${input_path}"
info "Work dir = ${work_dir}"
info "ir compiler properties = ${ir_compiler_properties}"
info "graph schema path = ${graph_schema_path}"
info "Output dir = ${output_dir}"
info "Procedure name = ${procedure_name}"
info "Procedure description = ${procedure_description}"
info "Statistic path = ${statistic_path}"

last_file_name=$(basename ${input_path})

Expand Down Expand Up @@ -279,7 +294,7 @@ compile_hqps_so() {
output_pb_path="${cur_dir}/${procedure_name}.pb"
cypher_to_plan ${procedure_name} ${input_path} ${output_pb_path} \
${output_yaml_path} ${ir_compiler_properties} ${graph_schema_path} \
${procedure_name} "${procedure_description}"
${procedure_name} "${procedure_description}" ${statistic_path}

info "----------------------------"
info "Codegen from cypher query done."
Expand Down Expand Up @@ -554,6 +569,10 @@ run() {
PROCEDURE_DESCRIPTION="${i#*=}"
shift # past argument=value
;;
--statistic_path=*)
STATISTIC_PATH="${i#*=}"
shift # past argument=value
;;
-* | --*)
err "Unknown option $i"
exit 1
Expand All @@ -571,6 +590,7 @@ run() {
echo "Output path ="${OUTPUT_DIR}
echo "Procedure name ="${PROCEDURE_NAME}
echo "Procedure description ="${PROCEDURE_DESCRIPTION}
echo "Statistic path ="${STATISTIC_PATH}

find_resources

Expand Down Expand Up @@ -598,7 +618,7 @@ run() {
PROCEDURE_NAME="${PROCEDURE_NAME%.cc}"
PROCEDURE_NAME="${PROCEDURE_NAME%.pb}"
fi
compile_hqps_so ${INPUT} ${WORK_DIR} ${IR_CONF} ${GRAPH_SCHEMA_PATH} ${OUTPUT_DIR} ${PROCEDURE_NAME} "${PROCEDURE_DESCRIPTION}"
compile_hqps_so ${INPUT} ${WORK_DIR} ${IR_CONF} ${GRAPH_SCHEMA_PATH} ${OUTPUT_DIR} ${PROCEDURE_NAME} "${PROCEDURE_DESCRIPTION}" ${STATISTIC_PATH}

# else if engine_type equals pegasus
elif [ ${ENGINE_TYPE} == "pegasus" ]; then
Expand Down
13 changes: 12 additions & 1 deletion flex/codegen/gen_code_from_plan.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ void deserialize_plan_and_gen_pegasus(const std::string& input_file_path,

void deserialize_plan_and_gen_hqps(const std::string& input_file_path,
const std::string& output_file_path,
const std::string& graph_schema_path) {
const std::string& graph_schema_path,
bool dump_json_plan = true) {
LOG(INFO) << "Start deserializing from: " << input_file_path;
std::string content_str = read_binary_str_from_path(input_file_path);
LOG(INFO) << "Deserialized plan size : " << content_str.size() << ", from "
Expand All @@ -83,6 +84,16 @@ void deserialize_plan_and_gen_hqps(const std::string& input_file_path,
CHECK(plan_pb.ParseFromArray(content_str.data(), content_str.size()));
LOG(INFO) << "deserialized plan size : " << plan_pb.ByteSizeLong();
VLOG(1) << "deserialized plan : " << plan_pb.DebugString();
if (dump_json_plan) {
std::string output_path = output_file_path + ".json";
std::string json_plan;
google::protobuf::util::JsonOptions option;
option.always_print_primitive_fields = true;
google::protobuf::util::MessageToJsonString(plan_pb, &json_plan, option);
std::ofstream out(output_path);
out << json_plan;
out.close();
}
BuildingContext context;
std::shared_ptr<QueryGenerator<uint8_t>> query_generator;
// load schema
Expand Down
4 changes: 4 additions & 0 deletions flex/codegen/src/codegen_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,13 +201,16 @@ codegen::ParamConst variable_to_param_const(const common::Variable& var,
auto& var_property = var.property();
if (var_property.has_label()) {
param_const.var_name = "label";
param_const.expr_var_name = "label";
param_const.type = codegen::DataType::kLabelId;
} else if (var_property.has_key()) {
param_const.var_name = var.property().key().name();
param_const.expr_var_name = var.property().key().name();
param_const.type =
common_data_type_pb_2_data_type(var.node_type().data_type());
} else if (var_property.has_id()) {
param_const.var_name = ctx.GetNextVarName();
param_const.expr_var_name = ctx.GetNextVarName();
param_const.type = codegen::DataType::kGlobalVertexId;
} else {
LOG(FATAL) << "Unexpected property type: " << var_property.DebugString();
Expand All @@ -217,6 +220,7 @@ codegen::ParamConst variable_to_param_const(const common::Variable& var,
if (var.has_node_type()) {
auto node_type = var.node_type();
param_const.var_name = ctx.GetNextVarName();
param_const.expr_var_name = param_const.var_name;
if (node_type.type_case() == common::IrDataType::kDataType) {
param_const.type =
common_data_type_pb_2_data_type(node_type.data_type());
Expand Down
10 changes: 7 additions & 3 deletions flex/codegen/src/graph_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,14 @@ enum class DataType {
struct ParamConst {
DataType type;
std::string var_name;
std::string expr_var_name;
int32_t id; // unique id for each param const
};

// implement operator == for ParamConst
inline bool operator==(const ParamConst& lhs, const ParamConst& rhs) {
return lhs.type == rhs.type && lhs.var_name == rhs.var_name &&
lhs.id == rhs.id;
lhs.expr_var_name == rhs.expr_var_name && lhs.id == rhs.id;
}

} // namespace codegen
Expand Down Expand Up @@ -159,7 +160,7 @@ static codegen::ParamConst param_const_pb_to_param_const(
CHECK(data_type_pb.type_case() == common::IrDataType::kDataType);
return codegen::ParamConst{
common_data_type_pb_2_data_type(data_type_pb.data_type()),
param_const_pb.name(), param_const_pb.index()};
param_const_pb.name(), param_const_pb.name(), param_const_pb.index()};
}

static codegen::ParamConst param_const_pb_to_param_const(
Expand All @@ -168,7 +169,8 @@ static codegen::ParamConst param_const_pb_to_param_const(
if (ir_data_type.type_case() == common::IrDataType::kDataType) {
auto primitive_type = ir_data_type.data_type();
return codegen::ParamConst{common_data_type_pb_2_data_type(primitive_type),
param_const_pb.name(), param_const_pb.index()};
param_const_pb.name(), param_const_pb.name(),
param_const_pb.index()};
} else {
throw std::runtime_error("Expect node type in ir_data_type");
}
Expand Down Expand Up @@ -280,11 +282,13 @@ static void parse_param_const_from_pb(
if (data_type.type_case() == common::IrDataType::kDataType) {
param_cost.type = common_data_type_pb_2_data_type(data_type.data_type());
param_cost.var_name = param_const_pb.name();
param_cost.expr_var_name = param_const_pb.name();
param_cost.id = param_const_pb.index();
return;
} else if (node_type.type_case() == common::IrDataType::kDataType) {
param_cost.type = common_data_type_pb_2_data_type(node_type.data_type());
param_cost.var_name = param_const_pb.name();
param_cost.expr_var_name = param_const_pb.name();
param_cost.id = param_const_pb.index();
return;
} else {
Expand Down
38 changes: 16 additions & 22 deletions flex/codegen/src/hqps/hqps_expr_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -358,11 +358,8 @@ class ExprBuilder {
// and also set a expr node for it. which is unique.
make_var_name_unique(param_const);
func_call_vars_.push_back(param_const);
expr_nodes_.emplace_back(param_const.var_name);
expr_nodes_.emplace_back(param_const.expr_var_name);

// expr_nodes_.emplace_back(param_const.var_name);
// convert a variable to a tag property,
// gs::NamedProperty<gs::Int64>{"prop1"}, saved for later use.
tag_selectors_.emplace_back(
variable_to_tag_id_property_selector(ctx_, var));
VLOG(10) << "Got var: " << var.DebugString();
Expand All @@ -389,7 +386,7 @@ class ExprBuilder {
VLOG(10) << "receive param const: " << param_const_pb.DebugString();
make_var_name_unique(param_const);
construct_params_.push_back(param_const);
expr_nodes_.emplace_back(param_const.var_name + "_");
expr_nodes_.emplace_back(param_const.expr_var_name);
break;
}

Expand All @@ -413,14 +410,11 @@ class ExprBuilder {
// for each variable, we need add the variable to func_call_vars_.
// and also set a expr node for it. which is unique.
func_call_vars_.push_back(param_const);
ss << param_const.var_name;
ss << param_const.expr_var_name;
if (i < vars.keys_size() - 1) {
ss << ",";
}

// expr_nodes_.emplace_back(param_const.var_name);
// convert a variable to a tag property,
// gs::NamedProperty<gs::Int64>{"prop1"}, saved for later use.
tag_selectors_.emplace_back(
variable_to_tag_id_property_selector(ctx_, cur_var));
}
Expand Down Expand Up @@ -534,7 +528,7 @@ class ExprBuilder {
std::stringstream ss;
for (size_t i = 0; i < construct_params_.size(); ++i) {
ss << data_type_2_string(construct_params_[i].type) << " "
<< construct_params_[i].var_name;
<< construct_params_[i].expr_var_name;
if (i + 1 != construct_params_.size()) {
ss << ",";
}
Expand All @@ -548,8 +542,8 @@ class ExprBuilder {
ss << ":";
}
for (size_t i = 0; i < construct_params_.size(); ++i) {
ss << construct_params_[i].var_name << "_"
<< "(" << construct_params_[i].var_name << ")";
ss << construct_params_[i].expr_var_name << "("
<< construct_params_[i].expr_var_name << ")";
if (i != construct_params_.size() - 1) {
ss << ",";
}
Expand Down Expand Up @@ -578,7 +572,7 @@ class ExprBuilder {
std::stringstream ss;
for (size_t i = 0; i < func_call_vars_.size(); ++i) {
ss << data_type_2_string(func_call_vars_[i].type) << " "
<< func_call_vars_[i].var_name;
<< func_call_vars_[i].expr_var_name;
if (i != func_call_vars_.size() - 1) {
ss << ",";
}
Expand All @@ -600,31 +594,31 @@ class ExprBuilder {
std::stringstream ss;
for (size_t i = 0; i < construct_params_.size(); ++i) {
ss << data_type_2_string(construct_params_[i].type) << " "
<< construct_params_[i].var_name << "_;" << std::endl;
<< construct_params_[i].expr_var_name << ";" << std::endl;
}
return ss.str();
}

void make_var_name_unique(codegen::ParamConst& param_const) {
std::unordered_set<std::string> var_names;
for (auto& param : construct_params_) {
auto res = var_names.insert(param.var_name);
CHECK(res.second) << "var name: " << param.var_name
auto res = var_names.insert(param.expr_var_name);
CHECK(res.second) << "var name: " << param.expr_var_name
<< " already exists, illegal state";
}
for (auto& param : func_call_vars_) {
auto res = var_names.insert(param.var_name);
CHECK(res.second) << "var name: " << param.var_name
auto res = var_names.insert(param.expr_var_name);
CHECK(res.second) << "var name: " << param.expr_var_name
<< " already exists, illegal state";
}
auto cur_var_name = param_const.var_name;
auto cur_var_name = param_const.expr_var_name;
int i = 0;
while (var_names.find(cur_var_name) != var_names.end()) {
cur_var_name = param_const.var_name + "_" + std::to_string(i);
cur_var_name = param_const.expr_var_name + "_" + std::to_string(i);
++i;
}
param_const.var_name = cur_var_name;
VLOG(10) << "make var name unique: " << param_const.var_name;
param_const.expr_var_name = cur_var_name;
VLOG(10) << "make var name unique: " << param_const.expr_var_name;
}

// this corresponding to the input params.
Expand Down
3 changes: 1 addition & 2 deletions flex/codegen/src/hqps/hqps_path_expand_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,12 +201,11 @@ class PathExpandOpBuilder {
if (params.tables().size() < 1) {
throw std::runtime_error("no edge labels found");
} else if (params.tables().size() == 1) {
physical::PhysicalOpr::MetaData meta_data;
// pass an empty meta_data, since we need no meta_data for
std::tie(edge_expand_opt_name_, edge_expand_opt_) =
BuildOneLabelEdgeExpandOpt(ctx_, direction_, params,
dst_vertex_labels_, expand_opt,
meta_data);
meta_data_pb[0]);
} else {
// get the first meta_data
if (meta_data_pb.size() < 1) {
Expand Down
3 changes: 0 additions & 3 deletions flex/codegen/src/hqps/hqps_project_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,6 @@ std::tuple<std::string, std::string, std::string> concatenate_expr_built_result(
}
{
std::stringstream ss;
if (func_construct_param_const.size() > 0) {
ss << ", ";
}
for (size_t i = 0; i < func_construct_param_const.size(); ++i) {
ss << func_construct_param_const[i].var_name;
if (i != func_construct_param_const.size() - 1) {
Expand Down
Loading

0 comments on commit 3a99cc1

Please sign in to comment.