Skip to content

Commit

Permalink
fix(core): enable date and time types support in vineyard-graph (and …
Browse files Browse the repository at this point in the history
…GIE) (#3498)

Signed-off-by: Tao He <[email protected]>
  • Loading branch information
sighingnow authored Jan 21, 2024
1 parent 6346651 commit 763a1ed
Show file tree
Hide file tree
Showing 31 changed files with 1,202 additions and 54 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build-graphscope-wheels-macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ jobs:
run: |
. ~/.graphscope_env
python3 -m pip install libclang
git clone --single-branch -b v0.18.2 --depth=1 https://github.com/v6d-io/v6d.git /tmp/v6d
git clone --single-branch -b v0.20.2 --depth=1 https://github.com/v6d-io/v6d.git /tmp/v6d
cd /tmp/v6d
git submodule update --init
cmake . -DCMAKE_INSTALL_PREFIX=/usr/local \
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/gae.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
runs-on: ubuntu-20.04
if: ${{ github.repository == 'alibaba/GraphScope' }}
container:
image: registry.cn-hongkong.aliyuncs.com/graphscope/graphscope-dev:v0.18.2
image: registry.cn-hongkong.aliyuncs.com/graphscope/graphscope-dev:v0.20.2
steps:
- uses: actions/checkout@v3

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/gaia.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ concurrency:

jobs:
gaia-test:
runs-on: [self-hosted, manylinux2014-ci-test]
runs-on: [self-hosted, manylinux2014-ci]
steps:
- uses: actions/checkout@v3
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/gss.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ jobs:
# Require the host is able to run docker without sudo and
# can `ssh localhost` without password, which may need to
# be configured manually when a new self-hosted runner is added.
runs-on: [self-hosted, manylinux2014-ci-test]
runs-on: [self-hosted, manylinux2014-ci]
if: ${{ github.repository == 'alibaba/GraphScope' }}
steps:
- uses: actions/checkout@v3
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/k8s-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ jobs:
fi
build-analytical:
runs-on: [self-hosted, manylinux2014-ci-test]
runs-on: [self-hosted, manylinux2014-ci]
needs: [changes]
if: ${{ github.repository == 'alibaba/GraphScope' }}
steps:
Expand All @@ -224,7 +224,7 @@ jobs:
retention-days: 5

build-analytical-java:
runs-on: [self-hosted, manylinux2014-ci-test]
runs-on: [self-hosted, manylinux2014-ci]
needs: [changes]
#if: ${{ github.repository == 'alibaba/GraphScope' }}
if: false
Expand Down Expand Up @@ -259,7 +259,7 @@ jobs:
retention-days: 5

build-interactive:
runs-on: [self-hosted, manylinux2014-ci-test]
runs-on: [self-hosted, manylinux2014-ci]
needs: [changes]
if: ${{ github.repository == 'alibaba/GraphScope' }}
steps:
Expand Down Expand Up @@ -301,7 +301,7 @@ jobs:
retention-days: 5

build-learning:
runs-on: [self-hosted, manylinux2014-ci-test]
runs-on: [self-hosted, manylinux2014-ci]
needs: [changes]
if: ${{ github.repository == 'alibaba/GraphScope' }}
steps:
Expand Down Expand Up @@ -616,7 +616,7 @@ jobs:
needs: [changes]
# Require the user id of the self-hosted is 1001, which may need to be
# configured manually when a new self-hosted runner is added.
runs-on: [self-hosted, manylinux2014-ci-test]
runs-on: [self-hosted, manylinux2014-ci]
if: ${{ github.repository == 'alibaba/GraphScope' }}
steps:
- uses: actions/checkout@v3
Expand Down
5 changes: 5 additions & 0 deletions .github/workflows/local-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,8 @@ jobs:
path: artifacts

- name: Prepare Environment
env:
GS_TEST_DIR: ${{ github.workspace }}/gstest
run: |
# install graphscope-client
cd artifacts
Expand All @@ -300,6 +302,9 @@ jobs:
# install java
sudo apt update -y && sudo apt install openjdk-11-jdk -y
# download dataset
git clone -b master --single-branch --depth=1 https://github.com/7br/gstest.git ${GS_TEST_DIR}
- name: Setup tmate session
uses: mxschmitt/action-tmate@v3
if: false
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/networkx-forward-algo-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
run:
shell: bash --noprofile --norc -eo pipefail {0}
container:
image: registry.cn-hongkong.aliyuncs.com/graphscope/graphscope-dev:v0.18.2
image: registry.cn-hongkong.aliyuncs.com/graphscope/graphscope-dev:v0.20.2
options:
--shm-size 4096m

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ jobs:
sudo mkdir /opt/graphscope
sudo chown -R $(id -u):$(id -g) /opt/graphscope
python3 -m pip install click
python3 gsctl.py install-deps dev --v6d-version v0.18.2
python3 gsctl.py install-deps dev --v6d-version v0.20.2
- name: Setup tmate session
if: false
Expand Down
72 changes: 72 additions & 0 deletions analytical_engine/core/object/fragment_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,46 @@ gs::rpc::graph::DataTypePb PropertyTypeToPb(vineyard::PropertyType type) {
return gs::rpc::graph::DataTypePb::STRING;
} else if (arrow::large_utf8()->Equals(type)) {
return gs::rpc::graph::DataTypePb::STRING;
} else if (arrow::date32()->Equals(type)) {
return gs::rpc::graph::DataTypePb::DATE32;
} else if (arrow::date64()->Equals(type)) {
return gs::rpc::graph::DataTypePb::DATE64;
} else if (type->id() == arrow::Type::TIME32) {
auto time32_type = std::dynamic_pointer_cast<arrow::Time32Type>(type);
switch (time32_type->unit()) {
case arrow::TimeUnit::SECOND:
return gs::rpc::graph::DataTypePb::TIME32_S;
case arrow::TimeUnit::MILLI:
return gs::rpc::graph::DataTypePb::TIME32_MS;
case arrow::TimeUnit::MICRO:
return gs::rpc::graph::DataTypePb::TIME32_US;
case arrow::TimeUnit::NANO:
return gs::rpc::graph::DataTypePb::TIME32_NS;
}
} else if (type->id() == arrow::Type::TIME64) {
auto time64_type = std::dynamic_pointer_cast<arrow::Time64Type>(type);
switch (time64_type->unit()) {
case arrow::TimeUnit::SECOND:
return gs::rpc::graph::DataTypePb::TIME64_S;
case arrow::TimeUnit::MILLI:
return gs::rpc::graph::DataTypePb::TIME64_MS;
case arrow::TimeUnit::MICRO:
return gs::rpc::graph::DataTypePb::TIME64_US;
case arrow::TimeUnit::NANO:
return gs::rpc::graph::DataTypePb::TIME64_NS;
}
} else if (type->id() == arrow::Type::TIMESTAMP) {
auto timestamp_type = std::dynamic_pointer_cast<arrow::TimestampType>(type);
switch (timestamp_type->unit()) {
case arrow::TimeUnit::SECOND:
return gs::rpc::graph::DataTypePb::TIMESTAMP_S;
case arrow::TimeUnit::MILLI:
return gs::rpc::graph::DataTypePb::TIMESTAMP_MS;
case arrow::TimeUnit::MICRO:
return gs::rpc::graph::DataTypePb::TIMESTAMP_US;
case arrow::TimeUnit::NANO:
return gs::rpc::graph::DataTypePb::TIMESTAMP_NS;
}
} else if (arrow::large_list(arrow::int32())->Equals(type)) {
return gs::rpc::graph::DataTypePb::INT_LIST;
} else if (arrow::large_list(arrow::int64())->Equals(type)) {
Expand Down Expand Up @@ -138,6 +178,38 @@ gs::rpc::graph::DataTypePb PropertyTypeToPb(const std::string& type) {
return gs::rpc::graph::DataTypePb::LONG_LIST;
} else if (type == "float_list") {
return gs::rpc::graph::DataTypePb::FLOAT_LIST;
} else if (type == "date32[day]") {
return gs::rpc::graph::DataTypePb::DATE32;
} else if (type == "date64[ms]") {
return gs::rpc::graph::DataTypePb::DATE64;
} else if (type == "time32[s]") {
return gs::rpc::graph::DataTypePb::TIME32_S;
} else if (type == "time32[ms]") {
return gs::rpc::graph::DataTypePb::TIME32_MS;
} else if (type == "time32[us]") {
return gs::rpc::graph::DataTypePb::TIME32_US;
} else if (type == "time32[ns]") {
return gs::rpc::graph::DataTypePb::TIME32_NS;
} else if (type == "time64[s]") {
return gs::rpc::graph::DataTypePb::TIME64_S;
} else if (type == "time64[ms]") {
return gs::rpc::graph::DataTypePb::TIME64_MS;
} else if (type == "time64[us]") {
return gs::rpc::graph::DataTypePb::TIME64_US;
} else if (type == "time64[ns]") {
return gs::rpc::graph::DataTypePb::TIME64_NS;
} else if (type.substr(0, std::string("timestamp[s]").length()) ==
"timestamp[s]") {
return gs::rpc::graph::DataTypePb::TIMESTAMP_S;
} else if (type.substr(0, std::string("timestamp[ms]").length()) ==
"timestamp[ms]") {
return gs::rpc::graph::DataTypePb::TIMESTAMP_MS;
} else if (type.substr(0, std::string("timestamp[us]").length()) ==
"timestamp[us]") {
return gs::rpc::graph::DataTypePb::TIMESTAMP_US;
} else if (type.substr(0, std::string("timestamp[ns]").length()) ==
"timestamp[ns]") {
return gs::rpc::graph::DataTypePb::TIMESTAMP_NS;
} else if (type == "double_list") {
return gs::rpc::graph::DataTypePb::DOUBLE_LIST;
} else if (type == "string_list" || type == "str_list") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,13 @@ public class DataType {
public static final DataType DOUBLE = new DataType(InternalDataType.DOUBLE);
public static final DataType BYTES = new DataType(InternalDataType.BYTES);
public static final DataType STRING = new DataType(InternalDataType.STRING);

// See also: `Date32` in common.proto.
public static final DataType DATE = new DataType(InternalDataType.DATE);
// See also: `Time32` in common.proto.
public static final DataType TIME = new DataType(InternalDataType.TIME);
// See also: `Timestamp` in common.proto.
public static final DataType TIMESTAMP = new DataType(InternalDataType.TIMESTAMP);

// For LIST, SET and MAP
@JsonProperty private String expression;
Expand All @@ -58,6 +64,15 @@ public static DataType toDataType(int i) {
}

public static DataType valueOf(String typeName) {
if (typeName.startsWith("DATE")) {
return DATE;
}
if (typeName.startsWith("TIME")) {
return TIME;
}
if (typeName.startsWith("TIMESTAMP")) {
return TIMESTAMP;
}
return new DataType(InternalDataType.valueOf(typeName));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,19 @@ public enum InternalDataType {
* STRING data type in InteractiveEngine, map to String in Java
*/
STRING,

/**
* INT data type in InteractiveEngine, map to Integer(int) in Java
* Date data type in InteractiveEngine, map to DateValue in Java
*/
DATE,
/**
* Date data type in InteractiveEngine, map to TimeValue in Java
*/
TIME,
/**
* Date data type in InteractiveEngine, map to DateTimeValue in Java
*/
TIMESTAMP,

/**
* SET data type, Collection Type, can mixed with list and map, example:Set, value: List<Map<List<String>,List<String>>>
Expand Down Expand Up @@ -94,6 +103,8 @@ public enum InternalDataType {
&& value != UNKNOWN
&& value != CHAR
&& value != DATE
&& value != TIME
&& value != TIMESTAMP
&& value != SHORT) {
primitiveTypes.add(value.name());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,16 @@ public static DataType parseFromDataType(
return DataType.DOUBLE;

case STRING:
return DataType.STRING;

case DATE:
return DataType.STRING;
return DataType.DATE;

case TIME:
return DataType.TIME32;

case TIMESTAMP:
return DataType.TIMESTAMP;

case BYTES:
return DataType.BYTES;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import com.alibaba.graphscope.gaia.proto.Common;
import com.alibaba.graphscope.gaia.proto.IrResult;
import com.alibaba.graphscope.gremlin.exception.GremlinResultParserException;
import com.google.common.base.Preconditions;

import org.apache.tinkerpop.gremlin.structure.Edge;
import org.apache.tinkerpop.gremlin.structure.Vertex;
Expand All @@ -29,10 +30,8 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.time.*;
import java.util.*;
import java.util.stream.Collectors;

public class ParserUtils {
Expand Down Expand Up @@ -91,6 +90,51 @@ private static Object parseCommonValue(Common.Value value) {
return value.getF64();
case STR:
return value.getStr();
case DATE:
Preconditions.checkArgument(
value.getDate().getItem() >= 0,
"Date prior to 1970-00-00 is not supported, got %d",
value.getDate().getItem());
return new Date(((long) value.getDate().getItem()) * 24 * 60 * 60 * 1000);
case TIME:
Preconditions.checkArgument(
value.getTime().getItem() >= 0,
"Time of day must be greater than 00:00:00, got %d",
value.getTime().getItem());
// gremlin-python doesn't support local time
//
// see also:
// https://github.com/apache/tinkerpop/blob/master/gremlin-python/src/main/python/gremlin_python/structure/io/graphbinaryV1.py#L105-L107
OffsetTime time =
LocalTime.ofNanoOfDay(((long) value.getTime().getItem()) * 1000_000L)
.atOffset(ZoneOffset.UTC);
// to ISO-8601 formats: HH:mm:ss.SSSSSS
//
// see also:
// - https://docs.oracle.com/javase/8/docs/api/java/time/LocalTime.html#toString--
// - https://docs.oracle.com/javase/8/docs/api/java/time/OffsetTime.html#toString--
return time.toString();
case TIMESTAMP:
Preconditions.checkArgument(
value.getTimestamp().getItem() >= 0,
"Timestamp prior to 1970-00-00 00:00:00 is not supported, got %d",
value.getTimestamp().getItem());
// gremlin-python will convert timestamp to float, that isn't what we want
//
// see also:
// https://github.com/apache/tinkerpop/blob/master/gremlin-python/src/main/python/gremlin_python/statics.py#L48
//
// We use java.util.Instant rather than java.sql.Timestamp for a UTC timestamp value
OffsetDateTime ts =
Instant.ofEpochSecond(
value.getTimestamp().getItem() / 1000L,
value.getTimestamp().getItem() % 1000L * 1000_000L)
.atOffset(ZoneOffset.UTC);
// to ISO-8601 format: uuuu-MM-dd'T'HH:mm:ss.SSSSSSXXXXX
//
// see also:
// https://docs.oracle.com/javase/8/docs/api/java/time/OffsetDateTime.html#toString--
return ts.toString();
case PAIR_ARRAY:
Common.PairArray pairs = value.getPairArray();
Map pairInMap = new HashMap();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
use std::fmt;

use ahash::HashMap;
use chrono::{Datelike, NaiveDate, NaiveDateTime, NaiveTime, Timelike};
use dyn_type::DateTimeFormats;
use dyn_type::Object;
use dyn_type::Primitives;
use global_query::store_api::prelude::Property;
Expand Down Expand Up @@ -52,6 +54,18 @@ fn encode_runtime_prop_val(prop_val: Property) -> Object {
Property::Double(d) => Object::Primitive(Primitives::Float(d)),
Property::Bytes(v) => Object::Blob(v.into_boxed_slice()),
Property::String(s) => Object::String(s),
Property::Date(s) => match NaiveDate::parse_from_str(&s, "%Y-%m-%d") {
Ok(date) => Object::DateFormat(DateTimeFormats::Date(date)),
Err(_) => match NaiveTime::parse_from_str(&s, "%H:%M:%S.%6f") {
Ok(time) => Object::DateFormat(DateTimeFormats::Time(time)),
Err(_) => match NaiveDateTime::parse_from_str(&s, "%Y-%m-%d %H:%M:%S.%6f") {
Ok(datetime) => Object::DateFormat(DateTimeFormats::DateTime(datetime)),
Err(_) => {
unimplemented!("Failed to parse the datetime/timestamp property value: '{}'", s)
}
},
},
},
_ => unimplemented!(),
}
}
Expand Down
1 change: 1 addition & 0 deletions interactive_engine/executor/store/global_query/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
chrono = "0.4"
log = "0.4"
itertools = "0.10"
byteorder = "1.4.3"
Expand Down
Loading

0 comments on commit 763a1ed

Please sign in to comment.