diff --git a/cloud/blockstore/libs/diagnostics/critical_events.h b/cloud/blockstore/libs/diagnostics/critical_events.h index ccaa9cb3454..ca6a506c7f2 100644 --- a/cloud/blockstore/libs/diagnostics/critical_events.h +++ b/cloud/blockstore/libs/diagnostics/critical_events.h @@ -64,6 +64,7 @@ namespace NCloud::NBlockStore { xxx(DiskRegistryCleanupAgentConfigError) \ xxx(DiskRegistryOccupiedDeviceConfigurationHasChanged) \ xxx(MirroredDiskChecksumMismatchUponRead) \ + xxx(DiskRegistryWrongMigratedDeviceOwnership) \ // BLOCKSTORE_CRITICAL_EVENTS #define BLOCKSTORE_IMPOSSIBLE_EVENTS(xxx) \ diff --git a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.cpp b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.cpp index 1591631abf5..0ee39952938 100644 --- a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.cpp +++ b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.cpp @@ -5032,18 +5032,10 @@ void TDiskRegistryState::ApplyAgentStateChange( continue; } - bool found = false; - for (const auto& d: disk.Devices) { - if (d == deviceId) { - found = true; - break; - } - } - - if (!found) { - // this device is allocated for this disk but it is not among - // this disk's active device set - // NBS-3726 + if (Find(disk.Devices, deviceId) == disk.Devices.end()) { + ReportDiskRegistryWrongMigratedDeviceOwnership( + TStringBuilder() << "ApplyAgentStateChange: device " + << deviceId << " not found"); continue; } @@ -6145,9 +6137,10 @@ NProto::TError TDiskRegistryState::FinishDeviceMigration( auto devIt = Find(disk.Devices, sourceId); if (devIt == disk.Devices.end()) { - // temporarily commented out, see NBS-3726 - // return MakeError(E_INVALID_STATE, TStringBuilder() << - // "device " << sourceId.Quote() << " not found"); + auto message = ReportDiskRegistryWrongMigratedDeviceOwnership( + TStringBuilder() << "FinishDeviceMigration: device " + << sourceId.Quote() << " not found"); + return MakeError(E_INVALID_STATE, std::move(message)); } if (auto it = disk.MigrationTarget2Source.find(targetId); @@ -6174,27 +6167,16 @@ NProto::TError TDiskRegistryState::FinishDeviceMigration( } const ui64 seqNo = AddReallocateRequest(db, diskId); - // this condition is needed because of NBS-3726 - if (devIt != disk.Devices.end()) { - *devIt = targetId; - disk.FinishedMigrations.push_back({ - .DeviceId = sourceId, - .SeqNo = seqNo - }); + *devIt = targetId; + disk.FinishedMigrations.push_back({.DeviceId = sourceId, .SeqNo = seqNo}); - if (disk.MasterDiskId) { - const bool replaced = ReplicaTable.ReplaceDevice( - disk.MasterDiskId, - sourceId, - targetId); - // targetId is actually fully initialized after migration - ReplicaTable.MarkReplacementDevice( - disk.MasterDiskId, - targetId, - false); - - Y_DEBUG_ABORT_UNLESS(replaced); - } + if (disk.MasterDiskId) { + const bool replaced = + ReplicaTable.ReplaceDevice(disk.MasterDiskId, sourceId, targetId); + // targetId is actually fully initialized after migration + ReplicaTable.MarkReplacementDevice(disk.MasterDiskId, targetId, false); + + Y_DEBUG_ABORT_UNLESS(replaced); } *diskStateUpdated = TryUpdateDiskState(db, diskId, disk, timestamp); diff --git a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state_ut_migration.cpp b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state_ut_migration.cpp index 1939f31d670..619b180e294 100644 --- a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state_ut_migration.cpp +++ b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state_ut_migration.cpp @@ -314,6 +314,14 @@ Y_UNIT_TEST_SUITE(TDiskRegistryStateMigrationTest) UNIT_ASSERT_VALUES_EQUAL(0, affectedDisks.size()); }); + NMonitoring::TDynamicCountersPtr counters = + new NMonitoring::TDynamicCounters(); + InitCriticalEventsCounter(counters); + auto configCounter = counters->GetCounter( + "AppCriticalEvents/DiskRegistryWrongMigratedDeviceOwnership", + true); + UNIT_ASSERT_VALUES_EQUAL(0, configCounter->Val()); + executor.WriteTx([&] (TDiskRegistryDatabase db) mutable { auto affectedDisks = ChangeAgentState( state, @@ -331,6 +339,9 @@ Y_UNIT_TEST_SUITE(TDiskRegistryStateMigrationTest) } UNIT_ASSERT(state.IsMigrationListEmpty()); + // Now bug is fixed, but, if it reproduce in future, we must report + // event. + UNIT_ASSERT_VALUES_EQUAL(1, configCounter->Val()); } Y_UNIT_TEST(ShouldEraseMigrationsForDeletedDisk)