Skip to content

Commit

Permalink
Revert "Add file size to FileOptions (facebookincubator#9313)"
Browse files Browse the repository at this point in the history
This reverts commit 387ae3b.
  • Loading branch information
zhli1142015 authored and SurbhiVijayvargeeya committed May 22, 2024
1 parent be896e1 commit 6aad15c
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 71 deletions.
2 changes: 0 additions & 2 deletions velox/common/file/FileSystems.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,6 @@ struct FileOptions {

std::unordered_map<std::string, std::string> values;
memory::MemoryPool* pool{nullptr};
/// If specified then can be trusted to be the file size.
std::optional<int64_t> fileSize;
};

/// An abstract FileSystem
Expand Down
10 changes: 2 additions & 8 deletions velox/connectors/hive/storage_adapters/abfs/AbfsFileSystem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,6 @@ class AbfsReadFile::Impl {
}

void initialize(const FileOptions& options) {
if (options.fileSize.has_value()) {
VELOX_CHECK_GE(
options.fileSize.value(), 0, "File size must be non-negative");
length_ = options.fileSize.value();
}

if (eTag_.empty() || length_ == -1) {

try {
Expand Down Expand Up @@ -303,8 +297,8 @@ AbfsReadFile::AbfsReadFile(
path, loadQuantum, ioExecutor, abfsEndpoint, passEtagLength);
}

void AbfsReadFile::initialize(const FileOptions& options) {
return impl_->initialize(options);
void AbfsReadFile::initialize() {
return impl_->initialize();
}

std::string_view
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
#include "velox/exec/tests/utils/TempFilePath.h"

using namespace facebook::velox;
using namespace facebook::velox::filesystems;
using namespace facebook::velox::filesystems::abfs;
using namespace Azure::Storage::Blobs;

Expand Down Expand Up @@ -222,34 +221,11 @@ TEST_F(AbfsFileSystemTest, readFile) {
auto hiveConfig = AbfsFileSystemTest::hiveConfig(
{{facebook::velox::filesystems::test::AzuriteSparkConfig,
azuriteServer->connectionStr()}});
AbfsFileSystem abfs{hiveConfig};
auto readFile = abfs.openFileForRead(fullFilePath);
readData(readFile.get());
}

TEST_F(AbfsFileSystemTest, openFileForReadWithOptions) {
auto hiveConfig = AbfsFileSystemTest::hiveConfig(
{{"fs.azure.account.key.test.dfs.core.windows.net",
azuriteServer->connectionStr()}});
AbfsFileSystem abfs{hiveConfig};
FileOptions options;
options.fileSize = 15 + kOneMB;
auto readFile = abfs.openFileForRead(fullFilePath, options);
auto abfs = std::make_shared<filesystems::abfs::AbfsFileSystem>(hiveConfig);
auto readFile = abfs->openFileForRead(fullFilePath);
readData(readFile.get());
}

TEST_F(AbfsFileSystemTest, openFileForReadWithInvalidOptions) {
auto hiveConfig = AbfsFileSystemTest::hiveConfig(
{{"fs.azure.account.key.test.dfs.core.windows.net",
azuriteServer->connectionStr()}});
AbfsFileSystem abfs{hiveConfig};
FileOptions options;
options.fileSize = -kOneMB;
VELOX_ASSERT_THROW(
abfs.openFileForRead(fullFilePath, options),
"File size must be non-negative");
}

TEST_F(AbfsFileSystemTest, readWasbFile) {
auto hiveConfig = AbfsFileSystemTest::hiveConfig(
{{facebook::velox::filesystems::test::AzuriteSparkConfig,
Expand All @@ -265,7 +241,7 @@ TEST_F(AbfsFileSystemTest, multipleThreadsWithReadFile) {
auto hiveConfig = AbfsFileSystemTest::hiveConfig(
{{facebook::velox::filesystems::test::AzuriteSparkConfig,
azuriteServer->connectionStr()}});
AbfsFileSystem abfs{hiveConfig};
auto abfs = std::make_shared<filesystems::abfs::AbfsFileSystem>(hiveConfig);

std::vector<std::thread> threads;
std::mt19937 generator(std::random_device{}());
Expand All @@ -280,7 +256,7 @@ TEST_F(AbfsFileSystemTest, multipleThreadsWithReadFile) {
}
std::this_thread::sleep_for(
std::chrono::microseconds(sleepTimesInMicroseconds[index]));
auto readFile = abfs.openFileForRead(fullFilePath);
auto readFile = abfs->openFileForRead(fullFilePath);
readData(readFile.get());
});
threads.emplace_back(std::move(thread));
Expand All @@ -299,7 +275,7 @@ TEST_F(AbfsFileSystemTest, missingFile) {
azuriteServer->connectionStr()}});
auto abfs = std::make_shared<filesystems::abfs::AbfsFileSystem>(hiveConfig);
VELOX_ASSERT_RUNTIME_THROW_CODE(
abfs.openFileForRead(abfsFile), error_code::kFileNotFound, "404");
abfs->openFileForRead(abfsFile), error_code::kFileNotFound, "404");
}

TEST_F(AbfsFileSystemTest, OpenABFSFileForWriteTest) {
Expand Down Expand Up @@ -328,49 +304,49 @@ TEST_F(AbfsFileSystemTest, renameNotImplemented) {
auto hiveConfig = AbfsFileSystemTest::hiveConfig(
{{facebook::velox::filesystems::test::AzuriteSparkConfig,
azuriteServer->connectionStr()}});
AbfsFileSystem abfs{hiveConfig};
auto abfs = std::make_shared<filesystems::abfs::AbfsFileSystem>(hiveConfig);
VELOX_ASSERT_THROW(
abfs.rename("text", "text2"), "rename for abfs not implemented");
abfs->rename("text", "text2"), "rename for abfs not implemented");
}

TEST_F(AbfsFileSystemTest, removeNotImplemented) {
auto hiveConfig = AbfsFileSystemTest::hiveConfig(
{{facebook::velox::filesystems::test::AzuriteSparkConfig,
azuriteServer->connectionStr()}});
AbfsFileSystem abfs{hiveConfig};
VELOX_ASSERT_THROW(abfs.remove("text"), "remove for abfs not implemented");
auto abfs = std::make_shared<filesystems::abfs::AbfsFileSystem>(hiveConfig);
VELOX_ASSERT_THROW(abfs->remove("text"), "remove for abfs not implemented");
}

TEST_F(AbfsFileSystemTest, existsNotImplemented) {
auto hiveConfig = AbfsFileSystemTest::hiveConfig(
{{facebook::velox::filesystems::test::AzuriteSparkConfig,
azuriteServer->connectionStr()}});
AbfsFileSystem abfs{hiveConfig};
VELOX_ASSERT_THROW(abfs.exists("text"), "exists for abfs not implemented");
auto abfs = std::make_shared<filesystems::abfs::AbfsFileSystem>(hiveConfig);
VELOX_ASSERT_THROW(abfs->exists("text"), "exists for abfs not implemented");
}

TEST_F(AbfsFileSystemTest, listNotImplemented) {
auto hiveConfig = AbfsFileSystemTest::hiveConfig(
{{facebook::velox::filesystems::test::AzuriteSparkConfig,
azuriteServer->connectionStr()}});
AbfsFileSystem abfs{hiveConfig};
VELOX_ASSERT_THROW(abfs.list("dir"), "list for abfs not implemented");
auto abfs = std::make_shared<filesystems::abfs::AbfsFileSystem>(hiveConfig);
VELOX_ASSERT_THROW(abfs->list("dir"), "list for abfs not implemented");
}

TEST_F(AbfsFileSystemTest, mkdirNotImplemented) {
auto hiveConfig = AbfsFileSystemTest::hiveConfig(
{{facebook::velox::filesystems::test::AzuriteSparkConfig,
azuriteServer->connectionStr()}});
AbfsFileSystem abfs{hiveConfig};
VELOX_ASSERT_THROW(abfs.mkdir("dir"), "mkdir for abfs not implemented");
auto abfs = std::make_shared<filesystems::abfs::AbfsFileSystem>(hiveConfig);
VELOX_ASSERT_THROW(abfs->mkdir("dir"), "mkdir for abfs not implemented");
}

TEST_F(AbfsFileSystemTest, rmdirNotImplemented) {
auto hiveConfig = AbfsFileSystemTest::hiveConfig(
{{facebook::velox::filesystems::test::AzuriteSparkConfig,
azuriteServer->connectionStr()}});
AbfsFileSystem abfs{hiveConfig};
VELOX_ASSERT_THROW(abfs.rmdir("dir"), "rmdir for abfs not implemented");
auto abfs = std::make_shared<filesystems::abfs::AbfsFileSystem>(hiveConfig);
VELOX_ASSERT_THROW(abfs->rmdir("dir"), "rmdir for abfs not implemented");
}

TEST(AbfsReadFileTest, splitRegion) {
Expand Down
12 changes: 3 additions & 9 deletions velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,7 @@ class GCSReadFile final : public ReadFile {

// Gets the length of the file.
// Checks if there are any issues reading the file.
void initialize(const filesystems::FileOptions& options) {
if (options.fileSize.has_value()) {
VELOX_CHECK_GE(
options.fileSize.value(), 0, "File size must be non-negative");
length_ = options.fileSize.value();
}

void initialize() {
// Make it a no-op if invoked twice.
if (length_ != -1) {
return;
Expand Down Expand Up @@ -311,10 +305,10 @@ void GCSFileSystem::initializeClient() {

std::unique_ptr<ReadFile> GCSFileSystem::openFileForRead(
std::string_view path,
const FileOptions& options) {
const FileOptions& /*unused*/) {
const auto gcspath = gcsPath(path);
auto gcsfile = std::make_unique<GCSReadFile>(gcspath, impl_->getClient());
gcsfile->initialize(options);
gcsfile->initialize();
return gcsfile;
}

Expand Down
12 changes: 3 additions & 9 deletions velox/connectors/hive/storage_adapters/s3fs/S3FileSystem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,7 @@ class S3ReadFile final : public ReadFile {

// Gets the length of the file.
// Checks if there are any issues reading the file.
void initialize(const filesystems::FileOptions& options) {
if (options.fileSize.has_value()) {
VELOX_CHECK_GE(
options.fileSize.value(), 0, "File size must be non-negative");
length_ = options.fileSize.value();
}

void initialize() {
// Make it a no-op if invoked twice.
if (length_ != -1) {
return;
Expand Down Expand Up @@ -667,10 +661,10 @@ std::string S3FileSystem::getLogLevelName() const {

std::unique_ptr<ReadFile> S3FileSystem::openFileForRead(
std::string_view path,
const FileOptions& options) {
const FileOptions& /*unused*/) {
const auto file = s3Path(path);
auto s3file = std::make_unique<S3ReadFile>(file, impl_->s3Client());
s3file->initialize(options);
s3file->initialize();
return s3file;
}

Expand Down
3 changes: 1 addition & 2 deletions velox/exec/SpillFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,7 @@ SpillWriteFile::SpillWriteFile(
filesystems::FileOptions{
{{filesystems::FileOptions::kFileCreateConfig.toString(),
fileCreateConfig}},
nullptr,
std::nullopt});
nullptr});
}

void SpillWriteFile::finish() {
Expand Down

0 comments on commit 6aad15c

Please sign in to comment.