diff --git a/fbpcf/io/cloud_util/CloudFileUtil.cpp b/fbpcf/io/cloud_util/CloudFileUtil.cpp index 05a59534..85f63552 100644 --- a/fbpcf/io/cloud_util/CloudFileUtil.cpp +++ b/fbpcf/io/cloud_util/CloudFileUtil.cpp @@ -76,7 +76,7 @@ std::unique_ptr getCloudFileUploader( return std::make_unique( fbpcf::cloudio::S3Client::getInstance( fbpcf::aws::S3ClientOption{.region = ref.region}) - .getS3Client(), + ->getS3Client(), filePath); } else { throw fbpcf::PcfException("Not supported yet."); diff --git a/fbpcf/io/cloud_util/S3Client.cpp b/fbpcf/io/cloud_util/S3Client.cpp index 83fddb9c..4cd43c24 100644 --- a/fbpcf/io/cloud_util/S3Client.cpp +++ b/fbpcf/io/cloud_util/S3Client.cpp @@ -7,12 +7,47 @@ #include "fbpcf/io/cloud_util/S3Client.h" +#include + #include #include +#include +#include + namespace fbpcf::cloudio { -S3Client& S3Client::getInstance(const fbpcf::aws::S3ClientOption& option) { - static S3Client s3Client(option); - return s3Client; +std::shared_ptr S3Client::getInstance(const fbpcf::aws::S3ClientOption& option) { + /* Due to previous problems, we create a Singleton instance of the S3Client, + * but there's a catch: we need a distinct S3Client for each region, or we + * run into other issues. For that reason, we store this map from string to + * S3Client with the assumption that the keys are region names. Since region + * is optional, we also allow for a default empty string region. + * ***************************** NOT THREAD SAFE **************************** + * NOTE: Significant refactoring is required to make this thread safe + * Downstream usage wants a mutable reference, but a folly::Synchronized + * RWLock will return a const ref to a reader, meaning it's hard to refactor. + * Simply trying to use folly::Synchronized around the map isn't sufficient, + * because we'll leak a reference to an object in the map which is unsafe. + * ***************************** NOT THREAD SAFE **************************** + */ + static folly::Synchronized>> m; + + std::string defaultStr{}; + auto region = option.region.value_or(defaultStr); + + m.withWLock([&](auto& clientMap) { + if (clientMap.find(region) == clientMap.end()) { + std::shared_ptr ptr{new S3Client{option}}; + clientMap.at(region) = ptr; + } + }); + + /* You may see this and think, "Hey, the NOT THREAD SAFE warning above is + * outdated, it looks like we fixed it!", but you're wrong. This still does + * not fully solve the problem. Because the downstream consumer takes a + * mutable reference, there's no guarantee that this is thread safe. It's + * better than nothing, but you still shouldn't fully trust this code. + */ + return m.wlock()->at(region); } } // namespace fbpcf::cloudio diff --git a/fbpcf/io/cloud_util/S3Client.h b/fbpcf/io/cloud_util/S3Client.h index 04ada1e1..0a9e909c 100644 --- a/fbpcf/io/cloud_util/S3Client.h +++ b/fbpcf/io/cloud_util/S3Client.h @@ -22,7 +22,7 @@ class S3Client { } public: - static S3Client& getInstance(const fbpcf::aws::S3ClientOption& option); + static std::shared_ptr getInstance(const fbpcf::aws::S3ClientOption& option); std::shared_ptr getS3Client() { return awsS3Client_;