Skip to content

Commit

Permalink
issue-2559: ShardBalancer: blocks vs bytes fix + service_ut_sharding …
Browse files Browse the repository at this point in the history
…testcase
  • Loading branch information
qkrorlqr committed Dec 18, 2024
1 parent b384e3e commit 041ef72
Show file tree
Hide file tree
Showing 5 changed files with 179 additions and 46 deletions.
126 changes: 126 additions & 0 deletions cloud/filestore/libs/storage/service/service_ut_sharding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3867,6 +3867,132 @@ Y_UNIT_TEST_SUITE(TStorageServiceShardingTest)
Sort(ids);
UNIT_ASSERT_VALUES_EQUAL(expected, ids);
}

Y_UNIT_TEST(ShouldBalanceShardsByFreeSpace)
{
NProto::TStorageConfig config;
config.SetShardIdSelectionInLeaderEnabled(true);
config.SetAutomaticShardCreationEnabled(true);
config.SetShardAllocationUnit(4_MB);
config.SetAutomaticallyCreatedShardSize(4_MB);
config.SetShardBalancerMinFreeSpaceReserve(4_KB);
config.SetShardBalancerDesiredFreeSpaceReserve(1_MB);
config.SetMultiTabletForwardingEnabled(true);
TTestEnv env({}, config);
env.CreateSubDomain("nfs");

ui32 nodeIdx = env.CreateNode("nfs");

const TString fsId = "test";

TServiceClient service(env.GetRuntime(), nodeIdx);
service.CreateFileStore(fsId, 20_MB / 4_KB);

// waiting for IndexTablet start after the restart triggered by
// configureshards
WaitForTabletStart(service);

TVector<TString> expected = {
fsId,
fsId + "_s1",
fsId + "_s2",
fsId + "_s3",
fsId + "_s4",
fsId + "_s5",
};
auto listing = service.ListFileStores();
auto fsIds = listing->Record.GetFileStores();
TVector<TString> ids(fsIds.begin(), fsIds.end());
Sort(ids);
UNIT_ASSERT_VALUES_EQUAL(expected, ids);

auto headers = service.InitSession(fsId, "client");

TVector<ui64> handles;
TVector<ui64> nodes;
TSet<ui32> shards;
for (ui32 i = 0; i < 5; ++i) {
auto createHandleResponse = service.CreateHandle(
headers,
fsId,
RootNodeId,
Sprintf("file%u", i),
TCreateHandleArgs::CREATE)->Record;

const auto nodeId = createHandleResponse.GetNodeAttr().GetId();
shards.insert(ExtractShardNo(nodeId));
nodes.push_back(nodeId);

const auto handleId = createHandleResponse.GetHandle();
handles.push_back(handleId);
}

UNIT_ASSERT_VALUES_EQUAL(5, shards.size());

service.WriteData(
headers,
fsId,
nodes[0],
handles[0],
0,
TString(3_MB + 4_KB, 'a'));

service.WriteData(
headers,
fsId,
nodes[2],
handles[2],
0,
TString(3_MB + 4_KB, 'a'));

service.WriteData(
headers,
fsId,
nodes[4],
handles[4],
0,
TString(3_MB + 4_KB, 'a'));

// waiting for async stats aggregation from shards
// doing it before triggering another event to avoid DispatchEvents call
// which does a long busy-wait loop
env.GetRuntime().AdvanceCurrentTime(TDuration::Seconds(15));
// just triggering another event chain - doesn't matter which one
service.StatFileStore(headers, fsId);

TSet<ui32> emptyShards;
emptyShards.insert(ExtractShardNo(nodes[1]));
emptyShards.insert(ExtractShardNo(nodes[3]));

handles.clear();
nodes.clear();
shards.clear();

for (ui32 i = 0; i < 5; ++i) {
auto createHandleResponse = service.CreateHandle(
headers,
fsId,
RootNodeId,
Sprintf("file%u", 5 + i),
TCreateHandleArgs::CREATE)->Record;

const auto nodeId = createHandleResponse.GetNodeAttr().GetId();
shards.insert(ExtractShardNo(nodeId));
nodes.push_back(nodeId);

const auto handleId = createHandleResponse.GetHandle();
handles.push_back(handleId);
}

UNIT_ASSERT_VALUES_EQUAL(2, shards.size());
auto l = emptyShards.begin();
auto r = shards.begin();
while (l != emptyShards.end()) {
UNIT_ASSERT_VALUES_EQUAL(*l, *r);
++l;
++r;
}
}
}

} // namespace NCloud::NFileStore::NStorage
6 changes: 4 additions & 2 deletions cloud/filestore/libs/storage/tablet/model/shard_balancer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,11 @@ struct TShardMetaComp
////////////////////////////////////////////////////////////////////////////////

void TShardBalancer::SetParameters(
ui32 blockSize,
ui64 desiredFreeSpaceReserve,
ui64 minFreeSpaceReserve)
{
BlockSize = blockSize;
DesiredFreeSpaceReserve = desiredFreeSpaceReserve;
MinFreeSpaceReserve = minFreeSpaceReserve;
}
Expand Down Expand Up @@ -72,13 +74,13 @@ NProto::TError TShardBalancer::SelectShard(ui64 fileSize, TString* shardId)
auto* e = UpperBound(
Metas.begin(),
Metas.end(),
fileSize + DesiredFreeSpaceReserve,
(fileSize + DesiredFreeSpaceReserve) / BlockSize,
TShardMetaComp());
if (e == Metas.begin()) {
e = UpperBound(
Metas.begin(),
Metas.end(),
fileSize + MinFreeSpaceReserve,
(fileSize + MinFreeSpaceReserve) / BlockSize,
TShardMetaComp());
}

Expand Down
8 changes: 6 additions & 2 deletions cloud/filestore/libs/storage/tablet/model/shard_balancer.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,19 @@ class TShardBalancer
};

private:
ui32 BlockSize;
ui64 DesiredFreeSpaceReserve = 0;
ui32 MinFreeSpaceReserve = 0;
ui64 MinFreeSpaceReserve = 0;

TVector<TString> Ids;
TVector<TShardMeta> Metas;
ui32 ShardSelector = 0;

public:
void SetParameters(ui64 desiredFreeSpaceReserve, ui64 minFreeSpaceReserve);
void SetParameters(
ui32 blockSize,
ui64 desiredFreeSpaceReserve,
ui64 minFreeSpaceReserve);
void UpdateShards(TVector<TString> shardIds);
void UpdateShardStats(const TVector<TShardStats>& stats);
NProto::TError SelectShard(ui64 fileSize, TString* shardId);
Expand Down
84 changes: 42 additions & 42 deletions cloud/filestore/libs/storage/tablet/model/shard_balancer_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ Y_UNIT_TEST_SUITE(TShardBalancerTest)
Y_UNIT_TEST(ShouldBalanceShards)
{
TShardBalancer balancer;
balancer.SetParameters(1_TB, 1_MB);
balancer.SetParameters(4_KB, 1_TB, 1_MB);
balancer.UpdateShards({"s1", "s2", "s3", "s4", "s5"});
ASSERT_NO_SB_ERROR(0, "s1");
ASSERT_NO_SB_ERROR(0, "s2");
Expand All @@ -46,11 +46,11 @@ Y_UNIT_TEST_SUITE(TShardBalancerTest)
ASSERT_NO_SB_ERROR(0, "s5");

balancer.UpdateShardStats({
{5_TB, 1_TB, 0, 0},
{5_TB, 2_TB, 0, 0},
{5_TB, 1_TB, 0, 0},
{5_TB, 1_TB, 0, 0},
{5_TB, 3_TB, 0, 0},
{5_TB / 4_KB, 1_TB / 4_KB, 0, 0},
{5_TB / 4_KB, 2_TB / 4_KB, 0, 0},
{5_TB / 4_KB, 1_TB / 4_KB, 0, 0},
{5_TB / 4_KB, 1_TB / 4_KB, 0, 0},
{5_TB / 4_KB, 3_TB / 4_KB, 0, 0},
});

// order changed: s1, s3, s4, s2, s5
Expand All @@ -69,11 +69,11 @@ Y_UNIT_TEST_SUITE(TShardBalancerTest)
// order changed: s1, s2, s3, s4, s5

balancer.UpdateShardStats({
{5_TB, 1_TB, 0, 0},
{5_TB, 2_TB, 0, 0},
{5_TB, 2_TB, 0, 0},
{5_TB, 2_TB, 0, 0},
{5_TB, 4_TB, 0, 0},
{5_TB / 4_KB, 1_TB / 4_KB, 0, 0},
{5_TB / 4_KB, 2_TB / 4_KB, 0, 0},
{5_TB / 4_KB, 2_TB / 4_KB, 0, 0},
{5_TB / 4_KB, 2_TB / 4_KB, 0, 0},
{5_TB / 4_KB, 4_TB / 4_KB, 0, 0},
});

ASSERT_NO_SB_ERROR(0, "s1");
Expand All @@ -91,11 +91,11 @@ Y_UNIT_TEST_SUITE(TShardBalancerTest)
// order changed: s1, s2, s3, s4

balancer.UpdateShardStats({
{5_TB, 1_TB, 0, 0},
{5_TB, 2_TB, 0, 0},
{5_TB, 2_TB, 0, 0},
{5_TB, 2_TB, 0, 0},
{5_TB, 4_TB + 500_GB, 0, 0},
{5_TB / 4_KB, 1_TB / 4_KB, 0, 0},
{5_TB / 4_KB, 2_TB / 4_KB, 0, 0},
{5_TB / 4_KB, 2_TB / 4_KB, 0, 0},
{5_TB / 4_KB, 2_TB / 4_KB, 0, 0},
{5_TB / 4_KB, (4_TB + 500_GB) / 4_KB, 0, 0},
});

ASSERT_NO_SB_ERROR(0, "s1");
Expand All @@ -112,11 +112,11 @@ Y_UNIT_TEST_SUITE(TShardBalancerTest)
// tier 2: s3, s5

balancer.UpdateShardStats({
{5_TB, 1_TB, 0, 0},
{5_TB, 5_TB, 0, 0},
{5_TB, 4_TB + 300_GB, 0, 0},
{5_TB, 2_TB, 0, 0},
{5_TB, 4_TB + 500_GB, 0, 0},
{5_TB / 4_KB, 1_TB / 4_KB, 0, 0},
{5_TB / 4_KB, 5_TB / 4_KB, 0, 0},
{5_TB / 4_KB, (4_TB + 300_GB) / 4_KB, 0, 0},
{5_TB / 4_KB, 2_TB / 4_KB, 0, 0},
{5_TB / 4_KB, (4_TB + 500_GB) / 4_KB, 0, 0},
});

ASSERT_NO_SB_ERROR(0, "s1");
Expand All @@ -128,11 +128,11 @@ Y_UNIT_TEST_SUITE(TShardBalancerTest)
// order changed: s3, s1, s5

balancer.UpdateShardStats({
{5_TB, 4_TB + 400_GB, 0, 0},
{5_TB, 5_TB + 100_GB, 0, 0},
{5_TB, 4_TB + 300_GB, 0, 0},
{5_TB, 5_TB, 0, 0},
{5_TB, 4_TB + 500_GB, 0, 0},
{5_TB / 4_KB, (4_TB + 400_GB) / 4_KB, 0, 0},
{5_TB / 4_KB, (5_TB + 100_GB) / 4_KB, 0, 0},
{5_TB / 4_KB, (4_TB + 300_GB) / 4_KB, 0, 0},
{5_TB / 4_KB, 5_TB / 4_KB, 0, 0},
{5_TB / 4_KB, (4_TB + 500_GB) / 4_KB, 0, 0},
});

ASSERT_NO_SB_ERROR(0, "s3");
Expand All @@ -145,11 +145,11 @@ Y_UNIT_TEST_SUITE(TShardBalancerTest)
// 1 close to full shard left: s3

balancer.UpdateShardStats({
{5_TB, 5_TB - 512_KB, 0, 0},
{5_TB, 5_TB + 100_GB, 0, 0},
{5_TB, 4_TB + 300_GB, 0, 0},
{5_TB, 5_TB, 0, 0},
{5_TB, 5_TB, 0, 0},
{5_TB / 4_KB, (5_TB - 512_KB) / 4_KB, 0, 0},
{5_TB / 4_KB, (5_TB + 100_GB) / 4_KB, 0, 0},
{5_TB / 4_KB, (4_TB + 300_GB) / 4_KB, 0, 0},
{5_TB / 4_KB, 5_TB / 4_KB, 0, 0},
{5_TB / 4_KB, 5_TB / 4_KB, 0, 0},
});

ASSERT_NO_SB_ERROR(0, "s3");
Expand All @@ -158,11 +158,11 @@ Y_UNIT_TEST_SUITE(TShardBalancerTest)
// out of space

balancer.UpdateShardStats({
{5_TB, 5_TB - 512_KB, 0, 0},
{5_TB, 5_TB + 100_GB, 0, 0},
{5_TB, 5_TB + 300_GB, 0, 0},
{5_TB, 5_TB, 0, 0},
{5_TB, 5_TB, 0, 0},
{5_TB / 4_KB, (5_TB - 512_KB) / 4_KB, 0, 0},
{5_TB / 4_KB, (5_TB + 100_GB) / 4_KB, 0, 0},
{5_TB / 4_KB, (5_TB + 300_GB) / 4_KB, 0, 0},
{5_TB / 4_KB, 5_TB / 4_KB, 0, 0},
{5_TB / 4_KB, 5_TB / 4_KB, 0, 0},
});

ASSERT_SB_ERROR(0, E_FS_NOSPC);
Expand All @@ -173,15 +173,15 @@ Y_UNIT_TEST_SUITE(TShardBalancerTest)
Y_UNIT_TEST(ShouldBalanceShardsWithFileSize)
{
TShardBalancer balancer;
balancer.SetParameters(1_TB, 1_MB);
balancer.SetParameters(4_KB, 1_TB, 1_MB);
balancer.UpdateShards({"s1", "s2", "s3", "s4", "s5"});

balancer.UpdateShardStats({
{5_TB, 512_GB, 0, 0},
{5_TB, 2_TB, 0, 0},
{5_TB, 1_TB, 0, 0},
{5_TB, 1_TB, 0, 0},
{5_TB, 3_TB, 0, 0},
{5_TB / 4_KB, 512_GB / 4_KB, 0, 0},
{5_TB / 4_KB, 2_TB / 4_KB, 0, 0},
{5_TB / 4_KB, 1_TB / 4_KB, 0, 0},
{5_TB / 4_KB, 1_TB / 4_KB, 0, 0},
{5_TB / 4_KB, 3_TB / 4_KB, 0, 0},
});

// 1_TB can fit in any shard
Expand Down
1 change: 1 addition & 0 deletions cloud/filestore/libs/storage/tablet/tablet_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ void TIndexTabletState::LoadState(
Impl->OrphanNodeIds.insert(orphanNodeIds.begin(), orphanNodeIds.end());

Impl->ShardBalancer.SetParameters(
GetBlockSize(),
config.GetShardBalancerDesiredFreeSpaceReserve(),
config.GetShardBalancerMinFreeSpaceReserve());
const auto& shardIds = GetFileSystem().GetShardFileSystemIds();
Expand Down

0 comments on commit 041ef72

Please sign in to comment.