-
Notifications
You must be signed in to change notification settings - Fork 453
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[VL] Add support to write parquet files to GCS #3978
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,6 +36,9 @@ | |
#include "velox/connectors/hive/storage_adapters/s3fs/S3FileSystem.h" | ||
#include "velox/connectors/hive/storage_adapters/s3fs/S3Util.h" | ||
#endif | ||
#ifdef ENABLE_GCS | ||
#include "velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.h" | ||
#endif | ||
#ifdef ENABLE_HDFS | ||
#include "velox/connectors/hive/storage_adapters/hdfs/HdfsFileSystem.h" | ||
#include "velox/connectors/hive/storage_adapters/hdfs/HdfsUtil.h" | ||
|
@@ -54,12 +57,14 @@ class VeloxParquetDatasource final : public Datasource { | |
const std::string& filePath, | ||
std::shared_ptr<facebook::velox::memory::MemoryPool> veloxPool, | ||
std::shared_ptr<facebook::velox::memory::MemoryPool> s3SinkPool, | ||
std::shared_ptr<facebook::velox::memory::MemoryPool> gcsSinkPool, | ||
std::shared_ptr<arrow::Schema> schema) | ||
: Datasource(filePath, schema), | ||
filePath_(filePath), | ||
schema_(schema), | ||
pool_(std::move(veloxPool)), | ||
s3SinkPool_(std::move(s3SinkPool)) {} | ||
s3SinkPool_(std::move(s3SinkPool)), | ||
gcsSinkPool_(std::move(gcsSinkPool)) {} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's extend the class to VeloxParquetDatsourceS3, VeloxParquetDatsourceGCS and VeloxParquetDatsourceABFS, insted of puting all of them in the same file There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree that the class needs a refactor, but that goes out of the scope of this change. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
||
void init(const std::unordered_map<std::string, std::string>& sparkConfs) override; | ||
void inspectSchema(struct ArrowSchema* out) override; | ||
|
@@ -92,6 +97,7 @@ class VeloxParquetDatasource final : public Datasource { | |
std::shared_ptr<facebook::velox::parquet::Writer> parquetWriter_; | ||
std::shared_ptr<facebook::velox::memory::MemoryPool> pool_; | ||
std::shared_ptr<facebook::velox::memory::MemoryPool> s3SinkPool_; | ||
std::shared_ptr<facebook::velox::memory::MemoryPool> gcsSinkPool_; | ||
std::unique_ptr<facebook::velox::dwio::common::FileSink> sink_; | ||
}; | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same here, Let's move the logic to independent file and compile the file depends on compile flag. Let's avoid to use ifdef ENABLE_S3/GCS etc.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree, but please read my previous answer.