diff --git a/velox/connectors/hive/HiveConfig.cpp b/velox/connectors/hive/HiveConfig.cpp index 27ee4cd8c633..48ad95f399f0 100644 --- a/velox/connectors/hive/HiveConfig.cpp +++ b/velox/connectors/hive/HiveConfig.cpp @@ -143,8 +143,8 @@ std::string HiveConfig::gcsScheme() const { return config_->get(kGCSScheme, std::string("https")); } -std::string HiveConfig::gcsCredentials() const { - return config_->get(kGCSCredentials, std::string("")); +std::string HiveConfig::gcsCredentialsPath() const { + return config_->get(kGCSCredentialsPath, std::string("")); } std::optional HiveConfig::gcsMaxRetryCount() const { diff --git a/velox/connectors/hive/HiveConfig.h b/velox/connectors/hive/HiveConfig.h index 8f572a3dab57..975106788835 100644 --- a/velox/connectors/hive/HiveConfig.h +++ b/velox/connectors/hive/HiveConfig.h @@ -110,8 +110,9 @@ class HiveConfig { /// The GCS storage scheme, https for default credentials. static constexpr const char* kGCSScheme = "hive.gcs.scheme"; - /// The GCS service account configuration as json string - static constexpr const char* kGCSCredentials = "hive.gcs.credentials"; + /// The GCS service account configuration JSON key file. + static constexpr const char* kGCSCredentialsPath = + "hive.gcs.json-key-file-path"; /// The GCS maximum retry counter of transient errors. static constexpr const char* kGCSMaxRetryCount = "hive.gcs.max-retry-count"; @@ -284,7 +285,7 @@ class HiveConfig { std::string gcsScheme() const; - std::string gcsCredentials() const; + std::string gcsCredentialsPath() const; std::optional gcsMaxRetryCount() const; diff --git a/velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.cpp b/velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.cpp index bd4ccfa907ba..583a25c291e8 100644 --- a/velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.cpp +++ b/velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.cpp @@ -295,12 +295,21 @@ class GCSFileSystem::Impl { options.set(scheme + "://" + endpointOverride); } - auto cred = hiveConfig_->gcsCredentials(); - if (!cred.empty()) { - auto credentials = gc::MakeServiceAccountCredentials(cred); - options.set(credentials); + auto credFile = hiveConfig_->gcsCredentialsPath(); + if (!credFile.empty() && std::filesystem::exists(credFile)) { + std::ifstream jsonFile(credFile, std::ios::in); + if (!jsonFile.is_open()) { + LOG(WARNING) << "Error opening file " << credFile; + } else { + std::stringstream credsBuffer; + credsBuffer << jsonFile.rdbuf(); + auto creds = credsBuffer.str(); + auto credentials = gc::MakeServiceAccountCredentials(std::move(creds)); + options.set(credentials); + } } else { - LOG(WARNING) << "Config::gcsCredentials is empty"; + LOG(WARNING) + << "Config hive.gcs.json-key-file-path is empty or key file path not found"; } client_ = std::make_shared(options); diff --git a/velox/connectors/hive/storage_adapters/gcs/tests/CMakeLists.txt b/velox/connectors/hive/storage_adapters/gcs/tests/CMakeLists.txt index edf4c69eeffd..1b43a54c4efe 100644 --- a/velox/connectors/hive/storage_adapters/gcs/tests/CMakeLists.txt +++ b/velox/connectors/hive/storage_adapters/gcs/tests/CMakeLists.txt @@ -16,12 +16,13 @@ add_executable(velox_gcsfile_test GCSUtilTest.cpp GCSFileSystemTest.cpp) add_test(velox_gcsfile_test velox_gcsfile_test) target_link_libraries( velox_gcsfile_test - velox_file - velox_gcs velox_core - velox_hive_connector velox_dwio_common_exception velox_exec + velox_file + velox_gcs + velox_hive_connector + velox_temp_path GTest::gmock GTest::gtest GTest::gtest_main) diff --git a/velox/connectors/hive/storage_adapters/gcs/tests/GCSFileSystemTest.cpp b/velox/connectors/hive/storage_adapters/gcs/tests/GCSFileSystemTest.cpp index 5293901f61f9..69832a1b1896 100644 --- a/velox/connectors/hive/storage_adapters/gcs/tests/GCSFileSystemTest.cpp +++ b/velox/connectors/hive/storage_adapters/gcs/tests/GCSFileSystemTest.cpp @@ -310,7 +310,7 @@ TEST_F(GCSFileSystemTest, credentialsConfig) { // for authentication because the key has been deactivated on the server-side, // *and* the account(s) involved are deleted *and* they are not the accounts // or projects do not match its contents. - configOverride["hive.gcs.credentials"] = R"""({ + auto creds = R"""({ "type": "service_account", "project_id": "foo-project", "private_key_id": "a1a111aa1111a11a11a11aa111a111a1a1111111", @@ -344,13 +344,17 @@ TEST_F(GCSFileSystemTest, credentialsConfig) { "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/foo-email%40foo-project.iam.gserviceaccount.com" })"""; + auto jsonFile = exec::test::TempFilePath::create(); + std::ofstream credsOut(jsonFile->getPath()); + credsOut << creds; + credsOut.close(); + configOverride["hive.gcs.json-key-file-path"] = jsonFile->getPath(); configOverride["hive.gcs.scheme"] = "http"; configOverride["hive.gcs.endpoint"] = "localhost:" + testbench_->port(); std::shared_ptr conf = std::make_shared(std::move(configOverride)); filesystems::GCSFileSystem gcfs(conf); - gcfs.initializeClient(); try { const std::string gcsFile = diff --git a/velox/connectors/hive/tests/HiveConfigTest.cpp b/velox/connectors/hive/tests/HiveConfigTest.cpp index b7bbfdbc7904..d40cc3a58293 100644 --- a/velox/connectors/hive/tests/HiveConfigTest.cpp +++ b/velox/connectors/hive/tests/HiveConfigTest.cpp @@ -44,7 +44,7 @@ TEST(HiveConfigTest, defaultConfig) { ASSERT_EQ(hiveConfig.s3IAMRoleSessionName(), "velox-session"); ASSERT_EQ(hiveConfig.gcsEndpoint(), ""); ASSERT_EQ(hiveConfig.gcsScheme(), "https"); - ASSERT_EQ(hiveConfig.gcsCredentials(), ""); + ASSERT_EQ(hiveConfig.gcsCredentialsPath(), ""); ASSERT_EQ(hiveConfig.isOrcUseColumnNames(emptySession.get()), false); ASSERT_EQ( hiveConfig.isFileColumnNamesReadAsLowerCase(emptySession.get()), false); @@ -95,7 +95,7 @@ TEST(HiveConfigTest, overrideConfig) { {HiveConfig::kS3IamRoleSessionName, "velox"}, {HiveConfig::kGCSEndpoint, "hey"}, {HiveConfig::kGCSScheme, "http"}, - {HiveConfig::kGCSCredentials, "hey"}, + {HiveConfig::kGCSCredentialsPath, "hey"}, {HiveConfig::kOrcUseColumnNames, "true"}, {HiveConfig::kFileColumnNamesReadAsLowerCase, "true"}, {HiveConfig::kAllowNullPartitionKeys, "false"}, @@ -134,7 +134,7 @@ TEST(HiveConfigTest, overrideConfig) { ASSERT_EQ(hiveConfig.s3IAMRoleSessionName(), "velox"); ASSERT_EQ(hiveConfig.gcsEndpoint(), "hey"); ASSERT_EQ(hiveConfig.gcsScheme(), "http"); - ASSERT_EQ(hiveConfig.gcsCredentials(), "hey"); + ASSERT_EQ(hiveConfig.gcsCredentialsPath(), "hey"); ASSERT_EQ(hiveConfig.isOrcUseColumnNames(emptySession.get()), true); ASSERT_EQ( hiveConfig.isFileColumnNamesReadAsLowerCase(emptySession.get()), true); @@ -206,7 +206,7 @@ TEST(HiveConfigTest, overrideSession) { ASSERT_EQ(hiveConfig.s3IAMRoleSessionName(), "velox-session"); ASSERT_EQ(hiveConfig.gcsEndpoint(), ""); ASSERT_EQ(hiveConfig.gcsScheme(), "https"); - ASSERT_EQ(hiveConfig.gcsCredentials(), ""); + ASSERT_EQ(hiveConfig.gcsCredentialsPath(), ""); ASSERT_EQ(hiveConfig.isOrcUseColumnNames(session.get()), true); ASSERT_EQ(hiveConfig.isFileColumnNamesReadAsLowerCase(session.get()), true); diff --git a/velox/docs/configs.rst b/velox/docs/configs.rst index 2abf0ba77e62..507fa7685419 100644 --- a/velox/docs/configs.rst +++ b/velox/docs/configs.rst @@ -641,10 +641,10 @@ Each query can override the config by setting corresponding query session proper - string - - The GCS storage scheme, https for default credentials. - * - hive.gcs.credentials + * - hive.gcs.json-key-file-path - string - - - The GCS service account configuration as json string. + - The GCS service account configuration JSON key file. * - hive.gcs.max-retry-count - integer -