diff --git a/velox/connectors/hive/HiveConfig.cpp b/velox/connectors/hive/HiveConfig.cpp index 477f555b5a4aa..dbbcd723af2f3 100644 --- a/velox/connectors/hive/HiveConfig.cpp +++ b/velox/connectors/hive/HiveConfig.cpp @@ -272,6 +272,16 @@ bool HiveConfig::s3UseProxyFromEnv() const { return config_->get(kS3UseProxyFromEnv, false); } +uint8_t HiveConfig::readTimestampUnit(const Config* session) const { + const auto unit = session->get( + kReadTimestampUnitSession, + config_->get(kReadTimestampUnit, 3 /*milli*/)); + VELOX_CHECK( + unit == 3 || unit == 6 /*micro*/ || unit == 9 /*nano*/, + "Invalid timestamp unit."); + return unit; +} + uint8_t HiveConfig::parquetWriteTimestampUnit(const Config* session) const { const auto unit = session->get( kParquetWriteTimestampUnitSession, diff --git a/velox/connectors/hive/HiveConfig.h b/velox/connectors/hive/HiveConfig.h index 46bf85f797d4c..c93470bf8cfbe 100644 --- a/velox/connectors/hive/HiveConfig.h +++ b/velox/connectors/hive/HiveConfig.h @@ -219,6 +219,12 @@ class HiveConfig { static constexpr const char* kS3UseProxyFromEnv = "hive.s3.use-proxy-from-env"; + // The unit for reading timestamps from files. + static constexpr const char* kReadTimestampUnit = + "hive.reader.timestamp-unit"; + static constexpr const char* kReadTimestampUnitSession = + "hive.reader.timestamp_unit"; + /// Timestamp unit for Parquet write through Arrow bridge. static constexpr const char* kParquetWriteTimestampUnit = "hive.parquet.writer.timestamp-unit"; @@ -317,6 +323,9 @@ class HiveConfig { bool s3UseProxyFromEnv() const; + // Returns the timestamp unit used when reading timestamps from files. + uint8_t readTimestampUnit(const Config* session) const; + /// Returns the timestamp unit used when writing timestamps into Parquet /// through Arrow bridge. 0: second, 3: milli, 6: micro, 9: nano. uint8_t parquetWriteTimestampUnit(const Config* session) const; diff --git a/velox/connectors/hive/HiveConnectorUtil.cpp b/velox/connectors/hive/HiveConnectorUtil.cpp index 57c29a3e1d39a..7a3004db1a383 100644 --- a/velox/connectors/hive/HiveConnectorUtil.cpp +++ b/velox/connectors/hive/HiveConnectorUtil.cpp @@ -572,7 +572,9 @@ void configureRowReaderOptions( const std::shared_ptr& scanSpec, std::shared_ptr metadataFilter, const RowTypePtr& rowType, - const std::shared_ptr& hiveSplit) { + const std::shared_ptr& hiveSplit, + const std::shared_ptr& hiveConfig, + const Config* sessionProperties) { auto skipRowsIt = tableParameters.find(dwio::common::TableParameter::kSkipHeaderLineCount); if (skipRowsIt != tableParameters.end()) { @@ -582,6 +584,10 @@ void configureRowReaderOptions( rowReaderOptions.setMetadataFilter(std::move(metadataFilter)); rowReaderOptions.setRequestedType(rowType); rowReaderOptions.range(hiveSplit->start, hiveSplit->length); + if (hiveConfig && sessionProperties) { + rowReaderOptions.setTimestampPrecision(static_cast( + hiveConfig->readTimestampUnit(sessionProperties))); + } } namespace { diff --git a/velox/connectors/hive/HiveConnectorUtil.h b/velox/connectors/hive/HiveConnectorUtil.h index 621a8c59aaf33..fd39c9cd1810e 100644 --- a/velox/connectors/hive/HiveConnectorUtil.h +++ b/velox/connectors/hive/HiveConnectorUtil.h @@ -81,7 +81,9 @@ void configureRowReaderOptions( const std::shared_ptr& scanSpec, std::shared_ptr metadataFilter, const RowTypePtr& rowType, - const std::shared_ptr& hiveSplit); + const std::shared_ptr& hiveSplit, + const std::shared_ptr& hiveConfig = nullptr, + const Config* sessionProperties = nullptr); bool testFilters( const common::ScanSpec* scanSpec, diff --git a/velox/connectors/hive/SplitReader.cpp b/velox/connectors/hive/SplitReader.cpp index 617633ff0e44b..8565228f2778b 100644 --- a/velox/connectors/hive/SplitReader.cpp +++ b/velox/connectors/hive/SplitReader.cpp @@ -271,7 +271,9 @@ void SplitReader::createReader( scanSpec_, std::move(metadataFilter), ROW(std::move(columnNames), std::move(columnTypes)), - hiveSplit_); + hiveSplit_, + hiveConfig_, + connectorQueryCtx_->sessionProperties()); } bool SplitReader::checkIfSplitIsEmpty( diff --git a/velox/dwio/common/SelectiveColumnReader.cpp b/velox/dwio/common/SelectiveColumnReader.cpp index 895444b9eebf7..e8cebff97ebf5 100644 --- a/velox/dwio/common/SelectiveColumnReader.cpp +++ b/velox/dwio/common/SelectiveColumnReader.cpp @@ -216,9 +216,6 @@ void SelectiveColumnReader::getIntValues( VELOX_FAIL("Unsupported value size: {}", valueSize_); } break; - case TypeKind::TIMESTAMP: - getFlatValues(rows, result, requestedType); - break; default: VELOX_FAIL( "Not a valid type for integer reader: {}", requestedType->toString()); diff --git a/velox/dwio/parquet/reader/PageReader.cpp b/velox/dwio/parquet/reader/PageReader.cpp index 8372eda1ca0f3..004b0f6b801c7 100644 --- a/velox/dwio/parquet/reader/PageReader.cpp +++ b/velox/dwio/parquet/reader/PageReader.cpp @@ -31,6 +31,11 @@ namespace facebook::velox::parquet { using thrift::Encoding; using thrift::PageHeader; +struct __attribute__((__packed__)) Int96Timestamp { + int32_t days; + uint64_t nanos; +}; + void PageReader::seekToPage(int64_t row) { defineDecoder_.reset(); repeatDecoder_.reset(); diff --git a/velox/dwio/parquet/reader/ParquetData.h b/velox/dwio/parquet/reader/ParquetData.h index 15463b0f2fda4..360fe4febc8d4 100644 --- a/velox/dwio/parquet/reader/ParquetData.h +++ b/velox/dwio/parquet/reader/ParquetData.h @@ -36,17 +36,24 @@ class ParquetParams : public dwio::common::FormatParams { memory::MemoryPool& pool, dwio::common::ColumnReaderStatistics& stats, const FileMetaDataPtr metaData, - const date::time_zone* sessionTimezone) + const date::time_zone* sessionTimezone, + TimestampPrecision timestampPrecision) : FormatParams(pool, stats), metaData_(metaData), - sessionTimezone_(sessionTimezone) {} + sessionTimezone_(sessionTimezone), + timestampPrecision_(timestampPrecision) {} std::unique_ptr toFormatData( const std::shared_ptr& type, const common::ScanSpec& scanSpec) override; + TimestampPrecision timestampPrecision() const { + return timestampPrecision_; + } + private: const FileMetaDataPtr metaData_; const date::time_zone* sessionTimezone_; + const TimestampPrecision timestampPrecision_; }; /// Format-specific data created for each leaf column of a Parquet rowgroup. diff --git a/velox/dwio/parquet/reader/ParquetReader.cpp b/velox/dwio/parquet/reader/ParquetReader.cpp index 987cb8d9f260f..02eb14829166d 100644 --- a/velox/dwio/parquet/reader/ParquetReader.cpp +++ b/velox/dwio/parquet/reader/ParquetReader.cpp @@ -764,7 +764,8 @@ class ParquetRowReader::Impl { pool_, columnReaderStats_, readerBase_->fileMetaData(), - readerBase->sessionTimezone()); + readerBase->sessionTimezone(), + options_.timestampPrecision()); requestedType_ = options_.requestedType() ? options_.requestedType() : readerBase_->schema(); columnReader_ = ParquetColumnReader::build( diff --git a/velox/dwio/parquet/reader/TimestampColumnReader.h b/velox/dwio/parquet/reader/TimestampColumnReader.h index ebf6e0e175b93..23cd6c956b3a8 100644 --- a/velox/dwio/parquet/reader/TimestampColumnReader.h +++ b/velox/dwio/parquet/reader/TimestampColumnReader.h @@ -24,16 +24,44 @@ namespace facebook::velox::parquet { class TimestampColumnReader : public IntegerColumnReader { public: TimestampColumnReader( - const std::shared_ptr& requestedType, + const TypePtr& requestedType, std::shared_ptr fileType, ParquetParams& params, common::ScanSpec& scanSpec) - : IntegerColumnReader(requestedType, fileType, params, scanSpec) {} + : IntegerColumnReader(requestedType, fileType, params, scanSpec), + timestampPrecision_(params.timestampPrecision()) {} bool hasBulkPath() const override { return false; } + void getValues(RowSet rows, VectorPtr* result) override { + getFlatValues(rows, result, requestedType_); + if (allNull_) { + return; + } + + // Adjust timestamp nanos to the requested precision. + VectorPtr resultVector = *result; + auto rawValues = + resultVector->asUnchecked>()->mutableRawValues(); + for (auto i = 0; i < numValues_; ++i) { + const auto timestamp = rawValues[i]; + uint64_t nanos = timestamp.getNanos(); + switch (timestampPrecision_) { + case TimestampPrecision::kMilliseconds: + nanos = nanos / 1'000'000 * 1'000'000; + break; + case TimestampPrecision::kMicroseconds: + nanos = nanos / 1'000 * 1'000; + break; + case TimestampPrecision::kNanoseconds: + break; + } + rawValues[i] = Timestamp(timestamp.getSeconds(), nanos); + } + } + void read( vector_size_t offset, RowSet rows, @@ -44,6 +72,11 @@ class TimestampColumnReader : public IntegerColumnReader { readCommon(rows); readOffset_ += rows.back() + 1; } + + private: + // The requested precision can be specified from HiveConfig to read timestamp + // from Parquet. + TimestampPrecision timestampPrecision_; }; } // namespace facebook::velox::parquet diff --git a/velox/dwio/parquet/tests/reader/E2EFilterTest.cpp b/velox/dwio/parquet/tests/reader/E2EFilterTest.cpp index f94b5c4ca8db3..52f53fa02e922 100644 --- a/velox/dwio/parquet/tests/reader/E2EFilterTest.cpp +++ b/velox/dwio/parquet/tests/reader/E2EFilterTest.cpp @@ -256,6 +256,19 @@ TEST_F(E2EFilterTest, integerDictionary) { 20); } +TEST_F(E2EFilterTest, timestampDictionary) { + options_.dataPageSize = 4 * 1024; + options_.writeInt96AsTimestamp = true; + + testWithTypes( + "timestamp_val_0:timestamp," + "timestamp_val_1:timestamp", + [&]() {}, + true, + {"timestamp_val_0", "timestamp_val_1"}, + 20); +} + TEST_F(E2EFilterTest, floatAndDoubleDirect) { options_.enableDictionary = false; options_.dataPageSize = 4 * 1024; diff --git a/velox/dwio/parquet/tests/reader/ParquetTableScanTest.cpp b/velox/dwio/parquet/tests/reader/ParquetTableScanTest.cpp index 2f10d87d6b318..49c7136376f84 100644 --- a/velox/dwio/parquet/tests/reader/ParquetTableScanTest.cpp +++ b/velox/dwio/parquet/tests/reader/ParquetTableScanTest.cpp @@ -27,6 +27,7 @@ #include "velox/type/tests/SubfieldFiltersBuilder.h" #include "velox/connectors/hive/HiveConfig.h" +#include "velox/dwio/parquet/writer/Writer.h" using namespace facebook::velox; using namespace facebook::velox::exec; @@ -94,34 +95,6 @@ class ParquetTableScanTest : public HiveConnectorTestBase { assertQuery(plan, splits_, sql); } - void assertSelectWithFilter( - std::vector&& outputColumnNames, - const std::vector& subfieldFilters, - const std::string& remainingFilter, - const std::string& sql, - bool isFilterPushdownEnabled) { - auto rowType = getRowType(std::move(outputColumnNames)); - parse::ParseOptions options; - options.parseDecimalAsDouble = false; - - auto plan = PlanBuilder(pool_.get()) - .setParseOptions(options) - // Function extractFiltersFromRemainingFilter will extract - // filters to subfield filters, but for some types, filter - // pushdown is not supported. - .tableScan( - "hive_table", - rowType, - {}, - subfieldFilters, - remainingFilter, - nullptr, - isFilterPushdownEnabled) - .planNode(); - - assertQuery(plan, splits_, sql); - } - void assertSelectWithAgg( std::vector&& outputColumnNames, const std::vector& aggregates, @@ -217,6 +190,33 @@ class ParquetTableScanTest : public HiveConnectorTestBase { infoColumns)[0]; } + // Write data to a parquet file on specified path. + // @param writeInt96AsTimestamp Write timestamp as Int96 if enabled. + void writeToParquetFile( + const std::string& path, + const std::vector& data, + bool writeInt96AsTimestamp) { + VELOX_CHECK_GT(data.size(), 0); + + WriterOptions options; + options.writeInt96AsTimestamp = writeInt96AsTimestamp; + + auto writeFile = std::make_unique(path, true, false); + auto sink = std::make_unique( + std::move(writeFile), path); + auto childPool = + rootPool_->addAggregateChild("ParquetTableScanTest.Writer"); + options.memoryPool = childPool.get(); + + auto writer = std::make_unique( + std::move(sink), options, asRowType(data[0]->type())); + + for (const auto& vector : data) { + writer->write(vector); + } + writer->close(); + } + private: RowTypePtr getRowType(std::vector&& outputColumnNames) const { std::vector types; @@ -720,16 +720,16 @@ TEST_F(ParquetTableScanTest, timestampFilter) { // |2007-12-12 04:27:56| // +-------------------+ auto vector = makeFlatVector( - {Timestamp(1433116800, 70496000000000), - Timestamp(1433203200, 70496000000000), - Timestamp(981158400, 12846000000000), - Timestamp(888710400, 28866000000000), - Timestamp(1671753600, 14161000000000), - Timestamp(317520000, 1387000000000), - Timestamp(944611200, 49166000000000), - Timestamp(1682035200, 32974000000000), - Timestamp(968716800, 81389000000000), - Timestamp(1197417600, 16076000000000)}); + {Timestamp(1433187296, 0), + Timestamp(1433273696, 0), + Timestamp(981171246, 0), + Timestamp(888739266, 0), + Timestamp(1671767761, 0), + Timestamp(317521387, 0), + Timestamp(944660366, 0), + Timestamp(1682068174, 0), + Timestamp(968798189, 0), + Timestamp(1197433676, 0)}); loadData( getExampleFilePath("timestamp_int96.parquet"), @@ -740,44 +740,83 @@ TEST_F(ParquetTableScanTest, timestampFilter) { vector, })); - assertSelectWithFilter({"t"}, {}, "", "SELECT t from tmp", false); + assertSelectWithFilter({"t"}, {}, "", "SELECT t from tmp"); assertSelectWithFilter( {"t"}, {}, "t < TIMESTAMP '2000-09-12 22:36:29'", - "SELECT t from tmp where t < TIMESTAMP '2000-09-12 22:36:29'", - false); + "SELECT t from tmp where t < TIMESTAMP '2000-09-12 22:36:29'"); assertSelectWithFilter( {"t"}, {}, "t <= TIMESTAMP '2000-09-12 22:36:29'", - "SELECT t from tmp where t <= TIMESTAMP '2000-09-12 22:36:29'", - false); + "SELECT t from tmp where t <= TIMESTAMP '2000-09-12 22:36:29'"); assertSelectWithFilter( {"t"}, {}, "t > TIMESTAMP '1980-01-24 00:23:07'", - "SELECT t from tmp where t > TIMESTAMP '1980-01-24 00:23:07'", - false); + "SELECT t from tmp where t > TIMESTAMP '1980-01-24 00:23:07'"); assertSelectWithFilter( {"t"}, {}, "t >= TIMESTAMP '1980-01-24 00:23:07'", - "SELECT t from tmp where t >= TIMESTAMP '1980-01-24 00:23:07'", - false); + "SELECT t from tmp where t >= TIMESTAMP '1980-01-24 00:23:07'"); assertSelectWithFilter( {"t"}, {}, "t == TIMESTAMP '2022-12-23 03:56:01'", - "SELECT t from tmp where t == TIMESTAMP '2022-12-23 03:56:01'", - false); - VELOX_ASSERT_THROW( - assertSelectWithFilter( - {"t"}, - {"t < TIMESTAMP '2000-09-12 22:36:29'"}, - "", - "SELECT t from tmp where t < TIMESTAMP '2000-09-12 22:36:29'"), - "testInt128() is not supported"); + "SELECT t from tmp where t == TIMESTAMP '2022-12-23 03:56:01'"); +} + +TEST_F(ParquetTableScanTest, timestampPrecisionMicrosecond) { + // Write timestamp data into parquet. + constexpr int kSize = 10; + auto vector = makeRowVector({ + makeFlatVector( + kSize, [](auto i) { return Timestamp(i, i * 1'001'001); }), + }); + auto schema = asRowType(vector->type()); + auto file = TempFilePath::create(); + writeToParquetFile(file->getPath(), {vector}, true); + auto plan = PlanBuilder().tableScan(schema).planNode(); + + // Read timestamp data from parquet with microsecond precision. + CursorParameters params; + std::shared_ptr executor = + std::make_shared( + std::thread::hardware_concurrency()); + std::shared_ptr queryCtx = + core::QueryCtx::create(executor.get()); + std::unordered_map session = { + {std::string(connector::hive::HiveConfig::kReadTimestampUnitSession), + "6"}}; + queryCtx->setConnectorSessionOverridesUnsafe( + kHiveConnectorId, std::move(session)); + params.queryCtx = queryCtx; + params.planNode = plan; + const int numSplitsPerFile = 1; + + bool noMoreSplits = false; + auto addSplits = [&](exec::Task* task) { + if (!noMoreSplits) { + auto const splits = HiveConnectorTestBase::makeHiveConnectorSplits( + {file->getPath()}, + numSplitsPerFile, + dwio::common::FileFormat::PARQUET); + for (const auto& split : splits) { + task->addSplit("0", exec::Split(split)); + } + task->noMoreSplits("0"); + } + noMoreSplits = true; + }; + auto result = readCursor(params, addSplits); + ASSERT_TRUE(waitForTaskCompletion(result.first->task().get())); + auto expected = makeRowVector({ + makeFlatVector( + kSize, [](auto i) { return Timestamp(i, i * 1'001'000); }), + }); + assertEqualResults({expected}, result.second); } int main(int argc, char** argv) { diff --git a/velox/dwio/parquet/writer/Writer.cpp b/velox/dwio/parquet/writer/Writer.cpp index a0ba3a6cc6a5d..7bb2505e9ac70 100644 --- a/velox/dwio/parquet/writer/Writer.cpp +++ b/velox/dwio/parquet/writer/Writer.cpp @@ -240,6 +240,7 @@ Writer::Writer( arrowContext_->properties = getArrowParquetWriterOptions(options, flushPolicy_); setMemoryReclaimers(); + writeInt96AsTimestamp_ = options.writeInt96AsTimestamp; } Writer::Writer( @@ -257,7 +258,11 @@ Writer::Writer( void Writer::flush() { if (arrowContext_->stagingRows > 0) { if (!arrowContext_->writer) { - auto arrowProperties = ArrowWriterProperties::Builder().build(); + ArrowWriterProperties::Builder builder; + if (writeInt96AsTimestamp_) { + builder.enable_deprecated_int96_timestamps(); + } + auto arrowProperties = builder.build(); PARQUET_ASSIGN_OR_THROW( arrowContext_->writer, FileWriter::Open( diff --git a/velox/dwio/parquet/writer/Writer.h b/velox/dwio/parquet/writer/Writer.h index 7f1886708a2bd..2d74d1a5d2ba6 100644 --- a/velox/dwio/parquet/writer/Writer.h +++ b/velox/dwio/parquet/writer/Writer.h @@ -105,6 +105,7 @@ struct WriterOptions { columnCompressionsMap; uint8_t parquetWriteTimestampUnit = static_cast(TimestampUnit::kNano); + bool writeInt96AsTimestamp = false; }; // Writes Velox vectors into a DataSink using Arrow Parquet writer. @@ -163,6 +164,9 @@ class Writer : public dwio::common::Writer { const RowTypePtr schema_; ArrowOptions options_{.flattenDictionary = true, .flattenConstant = true}; + + // Whether to write Int96 timestamps in Arrow Parquet write. + bool writeInt96AsTimestamp_; }; class ParquetWriterFactory : public dwio::common::WriterFactory { diff --git a/velox/exec/tests/utils/PlanBuilder.cpp b/velox/exec/tests/utils/PlanBuilder.cpp index 019424f0a9907..a1db6fb235447 100644 --- a/velox/exec/tests/utils/PlanBuilder.cpp +++ b/velox/exec/tests/utils/PlanBuilder.cpp @@ -99,13 +99,11 @@ PlanBuilder& PlanBuilder::tableScan( const RowTypePtr& dataColumns, const std::unordered_map< std::string, - std::shared_ptr>& assignments, - bool isFilterPushdownEnabled) { + std::shared_ptr>& assignments) { return TableScanBuilder(*this) .tableName(tableName) .outputType(outputType) .columnAliases(columnAliases) - .filterPushdown(isFilterPushdownEnabled) .subfieldFilters(subfieldFilters) .remainingFilter(remainingFilter) .dataColumns(dataColumns) @@ -206,7 +204,7 @@ core::PlanNodePtr PlanBuilder::TableScanBuilder::build(core::PlanNodeId id) { tableHandle_ = std::make_shared( connectorId_, tableName_, - isFilterPushdownEnabled_, + true, std::move(filters), remainingFilterExpr, dataColumns_); diff --git a/velox/exec/tests/utils/PlanBuilder.h b/velox/exec/tests/utils/PlanBuilder.h index 352152b786843..471940f6766e9 100644 --- a/velox/exec/tests/utils/PlanBuilder.h +++ b/velox/exec/tests/utils/PlanBuilder.h @@ -151,8 +151,7 @@ class PlanBuilder { const RowTypePtr& dataColumns = nullptr, const std::unordered_map< std::string, - std::shared_ptr>& assignments = {}, - bool isFilterPushdownEnabled = true); + std::shared_ptr>& assignments = {}); /// Add a TableScanNode to scan a TPC-H table. /// @@ -188,12 +187,6 @@ class PlanBuilder { return *this; } - /// @param isFilterPushdownEnabled Whether filter push-down is enabled. - TableScanBuilder& filterPushdown(bool isFilterPushdownEnabled) { - isFilterPushdownEnabled_ = isFilterPushdownEnabled; - return *this; - } - /// @param outputType List of column names and types to read from the table. /// This property is required. TableScanBuilder& outputType(RowTypePtr outputType) { @@ -280,7 +273,6 @@ class PlanBuilder { PlanBuilder& planBuilder_; std::string tableName_{"hive_table"}; std::string connectorId_{"test-hive"}; - bool isFilterPushdownEnabled_ = true; RowTypePtr outputType_; std::vector subfieldFilters_; std::string remainingFilter_; diff --git a/velox/type/Filter.h b/velox/type/Filter.h index 8a079ed0f8756..4b4befbc17331 100644 --- a/velox/type/Filter.h +++ b/velox/type/Filter.h @@ -559,6 +559,10 @@ class IsNotNull final : public Filter { return true; } + bool testInt128(int128_t /* unused */) const final { + return true; + } + bool testInt64Range(int64_t /*min*/, int64_t /*max*/, bool /*hasNull*/) const final { return true; @@ -1819,6 +1823,11 @@ class TimestampRange final : public Filter { nullAllowed_ ? "with nulls" : "no nulls"); } + bool testInt128(int128_t value) const final { + const auto& ts = reinterpret_cast(value); + return ts >= lower_ && ts <= upper_; + } + bool testTimestamp(Timestamp value) const override { return value >= lower_ && value <= upper_; } diff --git a/velox/type/Timestamp.cpp b/velox/type/Timestamp.cpp index ba6c1655815ef..05061eb0c4c1a 100644 --- a/velox/type/Timestamp.cpp +++ b/velox/type/Timestamp.cpp @@ -36,6 +36,18 @@ inline int64_t getPrestoTZOffsetInSeconds(int16_t tzID) { } // namespace +// static +Timestamp Timestamp::fromDaysAndNanos(int32_t days, int64_t nanos) { + int64_t seconds = + (days - kJulianToUnixEpochDays) * kSecondsInDay + nanos / kNanosInSecond; + int64_t remainingNanos = nanos % kNanosInSecond; + if (remainingNanos < 0) { + remainingNanos += kNanosInSecond; + seconds--; + } + return Timestamp(seconds, remainingNanos); +} + // static Timestamp Timestamp::now() { auto now = std::chrono::system_clock::now(); diff --git a/velox/type/Timestamp.h b/velox/type/Timestamp.h index 08d6df6078a5c..29bdc0b526f8d 100644 --- a/velox/type/Timestamp.h +++ b/velox/type/Timestamp.h @@ -85,6 +85,7 @@ struct Timestamp { static constexpr int64_t kNanosecondsInMillisecond = 1'000'000; static constexpr int64_t kNanosInSecond = kNanosecondsInMillisecond * kMillisecondsInSecond; + // The number of days between the Julian epoch and the Unix epoch. static constexpr int64_t kJulianToUnixEpochDays = 2440588LL; static constexpr int64_t kSecondsInDay = 86400LL; @@ -112,16 +113,9 @@ struct Timestamp { VELOX_USER_DCHECK_LE(nanos, kMaxNanos, "Timestamp nanos out of range"); } - static Timestamp fromDaysAndNanos(int32_t days, int64_t nanos) { - int64_t seconds = (days - kJulianToUnixEpochDays) * kSecondsInDay + - nanos / kNanosInSecond; - int64_t remainingNanos = nanos % kNanosInSecond; - if (remainingNanos < 0) { - remainingNanos += kNanosInSecond; - seconds--; - } - return Timestamp(seconds, remainingNanos); - } + /// Creates a timestamp from the number of days since the Julian epoch + /// and the number of nanoseconds. + static Timestamp fromDaysAndNanos(int32_t days, int64_t nanos); // Returns the current unix timestamp (ms precision). static Timestamp now(); diff --git a/velox/type/Type.h b/velox/type/Type.h index fa88c726e3a0e..8a004dec043a8 100644 --- a/velox/type/Type.h +++ b/velox/type/Type.h @@ -42,11 +42,6 @@ namespace facebook::velox { using int128_t = __int128_t; -struct __attribute__((__packed__)) Int96Timestamp { - int32_t days; - uint64_t nanos; -}; - /// Velox type system supports a small set of SQL-compatible composeable types: /// BOOLEAN, TINYINT, SMALLINT, INTEGER, BIGINT, HUGEINT, REAL, DOUBLE, VARCHAR, /// VARBINARY, TIMESTAMP, ARRAY, MAP, ROW