Skip to content

Commit

Permalink
Allow empty seconds when parsing timestamps strings (#9401)
Browse files Browse the repository at this point in the history
Summary:

Following Presto semantics, seconds are allowed to be empty when
parsing timestamp strings. For example, both formats should be allowed:

> 2024-01-01 00:00:00
> 2024-01-01 00:00

It is an open question whether we want to change it only for Preto string to
timestamp casts, or across the board. This PR changes for all conversions since
this seems pretty harmless (and intuitive).

Also fixing a small bug: "00:00:00." shouldn't be allowed.

Reviewed By: mbasmanova

Differential Revision: D55831722
  • Loading branch information
pedroerp authored and facebook-github-bot committed Apr 11, 2024
1 parent f51e34d commit 4366298
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 31 deletions.
48 changes: 26 additions & 22 deletions velox/type/TimestampConversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -338,15 +338,16 @@ bool tryParseDateString(
return daysSinceEpochFromDate(year, month, day, daysSinceEpoch).ok();
}

// String format is hh:mm:ss.microseconds (microseconds are optional).
// String format is hh:mm:ss.microseconds (seconds and microseconds are
// optional).
// ISO 8601
bool tryParseTimeString(
const char* buf,
size_t len,
size_t& pos,
int64_t& result,
bool strict) {
int32_t hour = -1, min = -1, sec = -1, micros = -1;
int32_t hour = 0, min = 0, sec = 0, micros = 0;
pos = 0;

if (len == 0) {
Expand All @@ -366,6 +367,7 @@ bool tryParseTimeString(
return false;
}

// Read the hours.
if (!parseDoubleDigit(buf, len, pos, hour)) {
return false;
}
Expand All @@ -384,36 +386,38 @@ bool tryParseTimeString(
return false;
}

// Read the minutes.
if (!parseDoubleDigit(buf, len, pos, min)) {
return false;
}
if (min < 0 || min >= 60) {
return false;
}

if (pos >= len) {
return false;
}
// Try to read seconds.
if (pos < len && buf[pos] == sep) {
++pos;
if (!parseDoubleDigit(buf, len, pos, sec)) {
return false;
}
if (sec < 0 || sec > 60) {
return false;
}

if (buf[pos++] != sep) {
return false;
}
// Try to read microseconds.
if (pos < len && buf[pos] == '.') {
++pos;

if (!parseDoubleDigit(buf, len, pos, sec)) {
return false;
}
if (sec < 0 || sec > 60) {
return false;
}
if (pos >= len) {
return false;
}

micros = 0;
if (pos < len && buf[pos] == '.') {
pos++;
// We expect microseconds.
int32_t mult = 100000;
for (; pos < len && characterIsDigit(buf[pos]); pos++, mult /= 10) {
if (mult > 0) {
micros += (buf[pos] - '0') * mult;
// We expect microseconds.
int32_t mult = 100000;
for (; pos < len && characterIsDigit(buf[pos]); pos++, mult /= 10) {
if (mult > 0) {
micros += (buf[pos] - '0') * mult;
}
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions velox/type/TimestampConversion.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ int64_t
fromTime(int32_t hour, int32_t minute, int32_t second, int32_t microseconds);

/// Parses the input string and returns the number of cumulative microseconds,
/// following the "HH:MM:SS[.MS]" format (ISO 8601).
/// following the "HH:MM[:SS[.MS]]" format (ISO 8601).
//
/// Throws VeloxUserError if the format or time is invalid.
int64_t fromTimeString(const char* buf, size_t len);
Expand All @@ -132,7 +132,7 @@ inline int64_t fromTimeString(const StringView& str) {
// Timestamp conversion

/// Parses a full ISO 8601 timestamp string, following the format
/// "YYYY-MM-DD HH:MM:SS[.MS] +00:00"
/// "YYYY-MM-DD HH:MM[:SS[.MS]] +00:00"
Timestamp fromTimestampString(const char* buf, size_t len);

inline Timestamp fromTimestampString(const StringView& str) {
Expand Down
21 changes: 14 additions & 7 deletions velox/type/tests/TimestampConversionTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,17 +229,22 @@ TEST(DateTimeUtilTest, fromTimeString) {
}

TEST(DateTimeUtilTest, fromTimeStrInvalid) {
EXPECT_THROW(fromTimeString(""), VeloxUserError);
EXPECT_THROW(fromTimeString("00"), VeloxUserError);
EXPECT_THROW(fromTimeString("00:00"), VeloxUserError);
const std::string errorMsg = "Unable to parse time value: ";
VELOX_ASSERT_THROW(fromTimeString(""), errorMsg);
VELOX_ASSERT_THROW(fromTimeString("00"), errorMsg);
VELOX_ASSERT_THROW(fromTimeString("00:"), errorMsg);
VELOX_ASSERT_THROW(fromTimeString("00:00:"), errorMsg);
VELOX_ASSERT_THROW(fromTimeString("00:00:00."), errorMsg);
VELOX_ASSERT_THROW(fromTimeString("00:00-00"), errorMsg);
VELOX_ASSERT_THROW(fromTimeString("00/00:00"), errorMsg);

// Invalid hour, minutes and seconds.
EXPECT_THROW(fromTimeString("24:00:00"), VeloxUserError);
EXPECT_THROW(fromTimeString("00:61:00"), VeloxUserError);
EXPECT_THROW(fromTimeString("00:00:61"), VeloxUserError);
VELOX_ASSERT_THROW(fromTimeString("24:00:00"), errorMsg);
VELOX_ASSERT_THROW(fromTimeString("00:61:00"), errorMsg);
VELOX_ASSERT_THROW(fromTimeString("00:00:61"), errorMsg);

// Trailing characters.
EXPECT_THROW(fromTimeString("00:00:00 12"), VeloxUserError);
VELOX_ASSERT_THROW(fromTimeString("00:00:00 12"), errorMsg);
}

// bash command to verify:
Expand All @@ -249,7 +254,9 @@ TEST(DateTimeUtilTest, fromTimestampString) {
EXPECT_EQ(Timestamp(0, 0), fromTimestampString("1970-01-01"));
EXPECT_EQ(Timestamp(946684800, 0), fromTimestampString("2000-01-01"));

EXPECT_EQ(Timestamp(0, 0), fromTimestampString("1970-01-01 00:00"));
EXPECT_EQ(Timestamp(0, 0), fromTimestampString("1970-01-01 00:00:00"));

EXPECT_EQ(
Timestamp(946729316, 0), fromTimestampString("2000-01-01 12:21:56"));
EXPECT_EQ(
Expand Down

0 comments on commit 4366298

Please sign in to comment.