diff --git a/.github/workflows/interactive.yml b/.github/workflows/interactive.yml index 279603cbca35..716c79ac2627 100644 --- a/.github/workflows/interactive.yml +++ b/.github/workflows/interactive.yml @@ -605,3 +605,15 @@ jobs: SCHEMA_FILE=${GITHUB_WORKSPACE}/flex/tests/rt_mutable_graph/movie_schema_test.yaml BULK_LOAD_FILE=${GITHUB_WORKSPACE}/flex/tests/rt_mutable_graph/movie_import_test.yaml GLOG_v=10 ./bin/bulk_loader -g ${SCHEMA_FILE} -l ${BULK_LOAD_FILE} -d /tmp/csr-data-dir/ + + - name: Test graph loading with different delimiter + env: + GS_TEST_DIR: ${{ github.workspace }}/gstest/ + FLEX_DATA_DIR: ${{ github.workspace }}/gstest/flex/modern_graph_tab_delimiter/ + run: | + rm -rf /tmp/csr-data-dir/ + cd ${GITHUB_WORKSPACE}/flex/build/ + SCHEMA_FILE=${GITHUB_WORKSPACE}/flex/interactive/examples/modern_graph/graph.yaml + BULK_LOAD_FILE=${GITHUB_WORKSPACE}/flex/interactive/examples/modern_graph/bulk_load.yaml + sed -i 's/|/\\t/g' ${BULK_LOAD_FILE} + GLOG_v=10 ./bin/bulk_loader -g ${SCHEMA_FILE} -l ${BULK_LOAD_FILE} -d /tmp/csr-data-dir/ diff --git a/docs/flex/interactive/data_import.md b/docs/flex/interactive/data_import.md index 7b7b454b5aa6..90d71eb350bc 100644 --- a/docs/flex/interactive/data_import.md +++ b/docs/flex/interactive/data_import.md @@ -227,7 +227,7 @@ The table below offers a detailed breakdown of each configuration item. In this | loading_config.scheme | file | The source of input data. Currently only `file` and `odps` are supported | No | | loading_config.format | N/A | The format of the raw data in CSV | Yes | | loading_config.format.metadata | N/A | Mainly for configuring the options for reading CSV | Yes | -| loading_config.format.metadata.delimiter | '\|' | Delimiter used to split a row of data | Yes | +| loading_config.format.metadata.delimiter | '|' | Delimiter used to split a row of data, escaped char are also supported, i.e. '\t' | Yes | | loading_config.format.metadata.header_row | true | Indicate if the first row should be used as the header | No | | loading_config.format.metadata.quoting | false | Whether quoting is used | No | | loading_config.format.metadata.quote_char | '\"' | Quoting character (if `quoting` is true) | No | diff --git a/flex/storages/rt_mutable_graph/loader/csv_fragment_loader.cc b/flex/storages/rt_mutable_graph/loader/csv_fragment_loader.cc index ef2f9412b0dc..682a6a460e3b 100644 --- a/flex/storages/rt_mutable_graph/loader/csv_fragment_loader.cc +++ b/flex/storages/rt_mutable_graph/loader/csv_fragment_loader.cc @@ -158,10 +158,25 @@ static std::vector read_header( static void put_delimiter_option(const LoadingConfig& loading_config, arrow::csv::ParseOptions& parse_options) { auto delimiter_str = loading_config.GetDelimiter(); - if (delimiter_str.size() != 1) { - LOG(FATAL) << "Delimiter should be a single character"; + if (delimiter_str.size() != 1 && delimiter_str[0] != '\\') { + LOG(FATAL) << "Delimiter should be a single character, or a escape " + "character, like '\\t'"; + } + if (delimiter_str[0] == '\\') { + if (delimiter_str.size() != 2) { + LOG(FATAL) << "Delimiter should be a single character"; + } + // escape the special character + switch (delimiter_str[1]) { + case 't': + parse_options.delimiter = '\t'; + break; + default: + LOG(FATAL) << "Unsupported escape character: " << delimiter_str[1]; + } + } else { + parse_options.delimiter = delimiter_str[0]; } - parse_options.delimiter = delimiter_str[0]; } static bool put_skip_rows_option(const LoadingConfig& loading_config,