From 804f3d61b935a45524d9b6312a8f176638413ab2 Mon Sep 17 00:00:00 2001 From: Alexey Efimov Date: Thu, 7 Nov 2024 16:28:08 +0100 Subject: [PATCH] add network diagnostics columns to viewer/nodes handler (#11344) --- ydb/core/protos/node_whiteboard.proto | 2 +- ydb/core/viewer/json_handlers_viewer.cpp | 2 +- ydb/core/viewer/json_pipe_req.cpp | 9 + ydb/core/viewer/protos/viewer.proto | 32 +- ydb/core/viewer/viewer_nodes.h | 798 +++++++++++++++++++++-- ydb/core/viewer/viewer_request.cpp | 26 + 6 files changed, 789 insertions(+), 80 deletions(-) diff --git a/ydb/core/protos/node_whiteboard.proto b/ydb/core/protos/node_whiteboard.proto index 2b876bbe25d0..29f75a4a5d06 100644 --- a/ydb/core/protos/node_whiteboard.proto +++ b/ydb/core/protos/node_whiteboard.proto @@ -105,7 +105,7 @@ message TNodeStateInfo { optional uint64 ConnectTime = 7; // changed every time the connection changes it state optional uint32 PeerNodeId = 8 [(DefaultField) = true]; // node if of the peer optional int64 ClockSkewUs = 9; // a positive value means the peer is ahead in time; a negative value means it's behind - optional uint32 PingTimeUs = 10; // RTT for the peer + optional uint64 PingTimeUs = 10; // RTT for the peer optional float Utilization = 11; // network utilization 0-1 optional NActorsInterconnect.TScopeId ScopeId = 12; // scope id of the peer optional uint32 Count = 13; // filled during group count diff --git a/ydb/core/viewer/json_handlers_viewer.cpp b/ydb/core/viewer/json_handlers_viewer.cpp index fb7868033d96..e0d81f326b17 100644 --- a/ydb/core/viewer/json_handlers_viewer.cpp +++ b/ydb/core/viewer/json_handlers_viewer.cpp @@ -244,7 +244,7 @@ void InitViewerHealthCheckJsonHandler(TJsonHandlers& handlers) { } void InitViewerNodesJsonHandler(TJsonHandlers& handlers) { - handlers.AddHandler("/viewer/nodes", new TJsonHandler(TJsonNodes::GetSwagger()), 12); + handlers.AddHandler("/viewer/nodes", new TJsonHandler(TJsonNodes::GetSwagger()), 13); } void InitViewerACLJsonHandler(TJsonHandlers &jsonHandlers) { diff --git a/ydb/core/viewer/json_pipe_req.cpp b/ydb/core/viewer/json_pipe_req.cpp index 52777ea8492c..8d8d6f95cb43 100644 --- a/ydb/core/viewer/json_pipe_req.cpp +++ b/ydb/core/viewer/json_pipe_req.cpp @@ -254,6 +254,15 @@ TViewerPipeClient::TRequestResponse TViewerPipeCli case NKikimrViewer::TEvViewerRequest::kSystemRequest: response.Span.Attribute("request_type", "SystemRequest"); break; + case NKikimrViewer::TEvViewerRequest::kPDiskRequest: + response.Span.Attribute("request_type", "PDiskRequest"); + break; + case NKikimrViewer::TEvViewerRequest::kVDiskRequest: + response.Span.Attribute("request_type", "VDiskRequest"); + break; + case NKikimrViewer::TEvViewerRequest::kNodeRequest: + response.Span.Attribute("request_type", "NodeRequest"); + break; case NKikimrViewer::TEvViewerRequest::kQueryRequest: response.Span.Attribute("request_type", "QueryRequest"); break; diff --git a/ydb/core/viewer/protos/viewer.proto b/ydb/core/viewer/protos/viewer.proto index 6ad06ff4e428..b2da2cddf0de 100644 --- a/ydb/core/viewer/protos/viewer.proto +++ b/ydb/core/viewer/protos/viewer.proto @@ -532,10 +532,27 @@ message TNodeInfo { bool Disconnected = 4; float CpuUsage = 5; float DiskSpaceUsage = 6; - NKikimrWhiteboard.TSystemStateInfo SystemState = 10; - repeated NKikimrWhiteboard.TPDiskStateInfo PDisks = 20; - repeated NKikimrWhiteboard.TVDiskStateInfo VDisks = 30; - repeated TTabletStateInfo Tablets = 40; + optional uint32 Connections = 7; // total number of live connections + EFlag ConnectStatus = 10; // Max + optional uint64 ReceiveThroughput = 11; + optional uint64 SendThroughput = 12; + optional float NetworkUtilization = 20; // Sum + optional float NetworkUtilizationMin = 21; + optional float NetworkUtilizationMax = 22; + optional int64 ClockSkewUs = 30; // Avg + optional int64 ClockSkewMinUs = 31; + optional int64 ClockSkewMaxUs = 32; + optional int64 ReverseClockSkewUs = 33; // Avg + optional uint64 PingTimeUs = 40; // Avg + optional uint64 PingTimeMinUs = 41; + optional uint64 PingTimeMaxUs = 42; + optional uint64 ReversePingTimeUs = 43; // Avg + NKikimrWhiteboard.TSystemStateInfo SystemState = 50; + repeated NKikimrWhiteboard.TPDiskStateInfo PDisks = 51; + repeated NKikimrWhiteboard.TVDiskStateInfo VDisks = 52; + repeated TTabletStateInfo Tablets = 53; + repeated NKikimrWhiteboard.TNodeStateInfo Peers = 54; + repeated NKikimrWhiteboard.TNodeStateInfo ReversePeers = 55; } message TNodesInfo { @@ -668,13 +685,14 @@ message TEvViewerRequest { NKikimrWhiteboard.TEvVDiskStateRequest VDiskRequest = 16; NKikimrWhiteboard.TEvPDiskStateRequest PDiskRequest = 17; NKikimrWhiteboard.TEvBSGroupStateRequest BSGroupRequest = 18; + NKikimrWhiteboard.TEvNodeStateRequest NodeRequest = 19; THttpProxyRequest QueryRequest = 13; THttpProxyRequest RenderRequest = 14; TSchemeCacheRequest AutocompleteRequest = 15; - bytes Reserved19 = 19; bytes Reserved20 = 20; bytes Reserved21 = 21; bytes Reserved22 = 22; + bytes Reserved23 = 23; } } @@ -686,13 +704,14 @@ message TEvViewerResponse { NKikimrWhiteboard.TEvVDiskStateResponse VDiskResponse = 16; NKikimrWhiteboard.TEvPDiskStateResponse PDiskResponse = 17; NKikimrWhiteboard.TEvBSGroupStateResponse BSGroupResponse = 18; + NKikimrWhiteboard.TEvNodeStateResponse NodeResponse = 19; NKikimrKqp.TEvQueryResponse QueryResponse = 13; NKikimrGraph.TEvMetricsResult RenderResponse = 14; TQueryAutocomplete AutocompleteResponse = 15; - bytes Reserved19 = 19; bytes Reserved20 = 20; bytes Reserved21 = 21; bytes Reserved22 = 22; + bytes Reserved23 = 23; } } @@ -789,3 +808,4 @@ message TFeatureFlagsConfig { uint32 Version = 1; repeated TDatabase Databases = 2; } + diff --git a/ydb/core/viewer/viewer_nodes.h b/ydb/core/viewer/viewer_nodes.h index a6f2154ce102..793ec2661f97 100644 --- a/ydb/core/viewer/viewer_nodes.h +++ b/ydb/core/viewer/viewer_nodes.h @@ -15,12 +15,14 @@ using namespace NActors; using namespace NNodeWhiteboard; enum class ENodeFields : ui8 { + NodeId, NodeInfo, SystemState, PDisks, VDisks, Tablets, - NodeId, + Peers, + ReversePeers, HostName, NodeName, DC, @@ -37,6 +39,13 @@ enum class ENodeFields : ui8 { DisconnectTime, Database, HasDisks, + Connections, + ConnectStatus, + SendThroughput, + ReceiveThroughput, + NetworkUtilization, + ClockSkew, + PingTime, COUNT }; @@ -44,6 +53,10 @@ constexpr ui8 operator +(ENodeFields e) { return static_cast(e); } +bool operator ==(const NActorsInterconnect::TScopeId& x, const NActorsInterconnect::TScopeId& y) { + return x.GetX1() == y.GetX1() && x.GetX2() == y.GetX2(); +} + class TJsonNodes : public TViewerPipeClient { using TThis = TJsonNodes; using TBase = TViewerPipeClient; @@ -82,8 +95,12 @@ class TJsonNodes : public TViewerPipeClient { std::unordered_map> VDiskStateResponse; std::unordered_map> PDiskStateResponse; std::unordered_map> TabletStateResponse; + std::unordered_map> PeersStateResponse; std::unordered_map> SystemViewerResponse; + std::unordered_map> VDiskViewerResponse; + std::unordered_map> PDiskViewerResponse; std::unordered_map> TabletViewerResponse; + std::unordered_map> PeersViewerResponse; TJsonSettings JsonSettings; ui32 Timeout = 0; @@ -156,6 +173,8 @@ class TJsonNodes : public TViewerPipeClient { std::vector VDisks; std::vector SysViewVDisks; std::vector Tablets; + std::vector Peers; // information about sessions from this node + std::vector ReversePeers; // information about sessions to this node TSubDomainKey SubDomainKey; TString Database; ui32 MissingDisks = 0; @@ -163,12 +182,27 @@ class TJsonNodes : public TViewerPipeClient { float CpuUsage = 0; // total, normalized float LoadAverage = 0; // normalized bool Problems = false; - bool Connected = false; + NKikimrWhiteboard::TNodeStateInfo NetworkStateInfo; bool Disconnected = false; bool HasDisks = false; bool GotDatabaseFromDatabaseBoardInfo = false; bool GotDatabaseFromResourceBoardInfo = false; int UptimeSeconds = 0; + ui32 Connections = 0; + ui64 SendThroughput = 0; + ui64 ReceiveThroughput = 0; + NKikimrWhiteboard::EFlag ConnectStatus = NKikimrWhiteboard::EFlag::Grey; + float NetworkUtilization = 0; // Sum + float NetworkUtilizationMin = 0; + float NetworkUtilizationMax = 0; + int64 ClockSkewUs = 0; // Avg + int64 ClockSkewMinUs = 0; + int64 ClockSkewMaxUs = 0; + int64 ReverseClockSkewUs = 0; // Avg + uint64 PingTimeUs = 0; // Avg + uint64 PingTimeMinUs = 0; + uint64 PingTimeMaxUs = 0; + uint64 ReversePingTimeUs = 0; // Avg TNodeId GetNodeId() const { return NodeInfo.NodeId; @@ -370,11 +404,18 @@ class TJsonNodes : public TViewerPipeClient { int GetCandidateScore() const { int score = 0; - if (Connected) { - score += 100; + if (NetworkStateInfo.GetConnected() && NetworkStateInfo.GetConnectStatus() != NKikimrWhiteboard::EFlag::Red) { + score += 10000; // because already connected node is always preferable + } + if (NetworkStateInfo.GetConnectStatus() != NKikimrWhiteboard::EFlag::Grey && NetworkStateInfo.GetConnectStatus() != NKikimrWhiteboard::EFlag::Green) { + score -= 3000 * static_cast(NetworkStateInfo.GetConnectStatus()); // connection state is important } + score -= NetworkStateInfo.GetPingTimeUs(); // lower ping is better if (IsStatic()) { - score += 10; + score += 10000; // static nodes are always preferable too + } + if (NetworkStateInfo.GetSessionState() == NKikimrWhiteboard::TNodeStateInfo_ESessionState_PENDING_CONNECTION) { + score -= 100000; // avoid pending connections } return score; } @@ -385,6 +426,12 @@ class TJsonNodes : public TViewerPipeClient { return TStringBuilder() << std::floor(std::clamp(DiskSpaceUsage, 0, 100) / 5) * 5 << '%'; } + TString GetNetworkUtilizationForGroup() const { + //return TStringBuilder() << std::ceil(std::clamp(NetworkUtilization, 0, 100) / 5) * 5 << '%'; + // we want 0%-95% groups instead of 5%-100% groups + return TStringBuilder() << std::floor(std::clamp(NetworkUtilization, 0, 100) / 5) * 5 << '%'; + } + TInstant GetStartTime() const { return TInstant::MilliSeconds(SystemState.GetStartTime()); } @@ -405,6 +452,67 @@ class TJsonNodes : public TViewerPipeClient { UptimeSeconds = GetUptimeSeconds(now); } + void CalcPeers() { + Connections = 0; + SendThroughput = 0; + ReceiveThroughput = 0; + std::array connectStatuses = {}; + ConnectStatus = NKikimrWhiteboard::EFlag::Grey; + NetworkUtilization = 0; + NetworkUtilizationMin = 0; + NetworkUtilizationMax = 0; + ClockSkewUs = 0; + ClockSkewMinUs = 0; + ClockSkewMaxUs = 0; + ReverseClockSkewUs = 0; + PingTimeUs = 0; + PingTimeMinUs = 0; + PingTimeMaxUs = 0; + ReversePingTimeUs = 0; + if (!Peers.empty()) { + NetworkUtilizationMin = NetworkUtilizationMax = Peers.front().GetUtilization(); + ClockSkewMinUs = ClockSkewMaxUs = Peers.front().GetClockSkewUs(); + PingTimeMinUs = PingTimeMaxUs = Peers.front().GetPingTimeUs(); + } + for (const auto& peer : Peers) { + NKikimrWhiteboard::EFlag connectStatus = peer.GetConnected() ? peer.GetConnectStatus() : NKikimrWhiteboard::EFlag::Grey; + connectStatuses[connectStatus]++; + if (peer.GetConnected() && peer.GetConnectStatus() != NKikimrWhiteboard::EFlag::Red && peer.GetSessionState() != NKikimrWhiteboard::TNodeStateInfo_ESessionState_PENDING_CONNECTION) { + ++Connections; + } + SendThroughput += peer.GetWriteThroughput(); + NetworkUtilization += peer.GetUtilization(); + NetworkUtilizationMin = std::min(NetworkUtilizationMin, peer.GetUtilization()); + NetworkUtilizationMax = std::max(NetworkUtilizationMax, peer.GetUtilization()); + ClockSkewUs += peer.GetClockSkewUs(); + ClockSkewMinUs = std::min(ClockSkewMinUs, peer.GetClockSkewUs()); + ClockSkewMaxUs = std::max(ClockSkewMaxUs, peer.GetClockSkewUs()); + PingTimeUs += peer.GetPingTimeUs(); + PingTimeMinUs = std::min(PingTimeMinUs, peer.GetPingTimeUs()); + PingTimeMaxUs = std::max(PingTimeMaxUs, peer.GetPingTimeUs()); + } + if (!Peers.empty()) { + // NetworkUtilization /= Peers.size(); // alexvru suggests to use sum instead of average + ClockSkewUs = ClockSkewUs / static_cast(Peers.size()); + PingTimeUs = PingTimeUs / Peers.size(); + } + int percent5 = Peers.size() / 20; + for (int i = 0; i < NKikimrWhiteboard::EFlag_ARRAYSIZE; ++i) { + if (connectStatuses[i] > percent5) { + ConnectStatus = static_cast(i); + } + } + for (const auto& peer : ReversePeers) { + ReceiveThroughput += peer.GetWriteThroughput(); + ReverseClockSkewUs += peer.GetClockSkewUs(); + ReversePingTimeUs += peer.GetPingTimeUs(); + } + if (!ReversePeers.empty()) { + ReverseClockSkewUs = ReverseClockSkewUs / static_cast(ReversePeers.size()); + ReversePingTimeUs = ReversePingTimeUs / ReversePeers.size(); + } + } + TString GetUptimeForGroup() const { if (!Disconnected && UptimeSeconds >= 0) { if (UptimeSeconds < 60 * 10) { @@ -449,6 +557,27 @@ class TJsonNodes : public TViewerPipeClient { } } + TString GetClockSkewForGroup() const { + auto clockSkew = abs(ClockSkewUs) / 1000; + if (clockSkew < 1) { + return "<1ms"; + } + if (clockSkew < 10) { + return "1ms..10ms"; + } + return "10ms+"; + } + + TString GetPingTimeForGroup() const { + if (PingTimeUs < 1000) { + return "<1ms"; + } + if (PingTimeUs < 10000) { + return "1ms..10ms"; + } + return "10ms+"; + } + bool HasDatabase(const TString& database) const { return Database == database; } @@ -493,6 +622,18 @@ class TJsonNodes : public TViewerPipeClient { case ENodeFields::SystemState: groupName = NKikimrWhiteboard::EFlag_Name(GetOverall()); break; + case ENodeFields::ConnectStatus: + groupName = NKikimrWhiteboard::EFlag_Name(ConnectStatus); + break; + case ENodeFields::NetworkUtilization: + groupName = GetNetworkUtilizationForGroup(); + break; + case ENodeFields::ClockSkew: + groupName = GetClockSkewForGroup(); + break; + case ENodeFields::PingTime: + groupName = GetPingTimeForGroup(); + break; default: break; } @@ -520,6 +661,14 @@ class TJsonNodes : public TViewerPipeClient { return UptimeSeconds; case ENodeFields::SystemState: return static_cast(GetOverall()); + case ENodeFields::ConnectStatus: + return static_cast(ConnectStatus); + case ENodeFields::NetworkUtilization: + return NetworkUtilization; + case ENodeFields::ClockSkew: + return static_cast(abs(ClockSkewUs) / 1000); + case ENodeFields::PingTime: + return PingTimeUs; default: return TString(); } @@ -588,6 +737,17 @@ class TJsonNodes : public TViewerPipeClient { const TFieldsType FieldsHiveNodeStat = TFieldsType().set(+ENodeFields::SubDomainKey) .set(+ENodeFields::DisconnectTime); + const TFieldsType FieldsPeers = TFieldsType().set(+ENodeFields::Peers) + .set(+ENodeFields::SendThroughput) + .set(+ENodeFields::Connections) + .set(+ENodeFields::ConnectStatus) + .set(+ENodeFields::NetworkUtilization) + .set(+ENodeFields::PingTime) + .set(+ENodeFields::ClockSkew); + + const TFieldsType FieldsReversePeers = TFieldsType().set(+ENodeFields::ReversePeers) + .set(+ENodeFields::ReceiveThroughput); + const std::unordered_map DependentFields = { { ENodeFields::DC, TFieldsType().set(+ENodeFields::SystemState) }, { ENodeFields::Rack, TFieldsType().set(+ENodeFields::SystemState) }, @@ -600,6 +760,14 @@ class TJsonNodes : public TViewerPipeClient { { ENodeFields::Database, TFieldsType().set(+ENodeFields::SystemState) }, { ENodeFields::Missing, TFieldsType().set(+ENodeFields::PDisks) }, { ENodeFields::MemoryDetailed, TFieldsType().set(+ENodeFields::SystemState) }, + { ENodeFields::SendThroughput, TFieldsType().set(+ENodeFields::Peers) }, + { ENodeFields::ReceiveThroughput, TFieldsType().set(+ENodeFields::ReversePeers) }, + { ENodeFields::ReversePeers, TFieldsType().set(+ENodeFields::Peers) }, + { ENodeFields::Connections, TFieldsType().set(+ENodeFields::Peers) }, + { ENodeFields::ConnectStatus, TFieldsType().set(+ENodeFields::Peers) }, + { ENodeFields::NetworkUtilization, TFieldsType().set(+ENodeFields::Peers) }, + { ENodeFields::PingTime, TFieldsType().set(+ENodeFields::Peers) }, + { ENodeFields::ClockSkew, TFieldsType().set(+ENodeFields::Peers) }, }; bool FieldsNeeded(TFieldsType fields) const { @@ -672,6 +840,24 @@ class TJsonNodes : public TViewerPipeClient { result = ENodeFields::VDisks; } else if (field == "Tablets") { result = ENodeFields::Tablets; + } else if (field == "Peers") { + result = ENodeFields::Peers; + } else if (field == "Connections") { + result = ENodeFields::Connections; + } else if (field == "SendThroughput") { + result = ENodeFields::SendThroughput; + } else if (field == "ReceiveThroughput") { + result = ENodeFields::ReceiveThroughput; + } else if (field == "ReversePeers") { + result = ENodeFields::ReversePeers; + } else if (field == "ConnectStatus") { + result = ENodeFields::ConnectStatus; + } else if (field == "NetworkUtilization") { + result = ENodeFields::NetworkUtilization; + } else if (field == "PingTime") { + result = ENodeFields::PingTime; + } else if (field == "ClockSkew") { + result = ENodeFields::ClockSkew; } return result; } @@ -811,9 +997,11 @@ class TJsonNodes : public TViewerPipeClient { return; } - if (FieldsNeeded(FieldsNodeInfo)) { - NodesInfoResponse = MakeRequest(GetNameserviceActorId(), new TEvInterconnect::TEvListNodes()); - NodeStateResponse = MakeWhiteboardRequest(TActivationContext::ActorSystem()->NodeId, new TEvWhiteboard::TEvNodeStateRequest()); + NodesInfoResponse = MakeRequest(GetNameserviceActorId(), new TEvInterconnect::TEvListNodes()); + { + auto request = std::make_unique(); + request->Record.AddFieldsRequired(-1); + NodeStateResponse = MakeWhiteboardRequest(TActivationContext::ActorSystem()->NodeId, request.release()); } if (FilterStoragePool || !FilterGroupIds.empty()) { FilterDatabase = false; // we disable database filter if we're filtering by pool or group @@ -1121,6 +1309,10 @@ class TJsonNodes : public TViewerPipeClient { case ENodeFields::Missing: case ENodeFields::Version: case ENodeFields::SystemState: + case ENodeFields::ConnectStatus: + case ENodeFields::NetworkUtilization: + case ENodeFields::ClockSkew: + case ENodeFields::PingTime: GroupCollection(); SortCollection(NodeGroups, [](const TNodeGroup& nodeGroup) { return nodeGroup.SortKey; }, true); NeedGroup = false; @@ -1129,6 +1321,7 @@ class TJsonNodes : public TViewerPipeClient { case ENodeFields::PDisks: case ENodeFields::VDisks: case ENodeFields::Tablets: + case ENodeFields::Peers: case ENodeFields::SubDomainKey: case ENodeFields::COUNT: case ENodeFields::Memory: @@ -1137,6 +1330,10 @@ class TJsonNodes : public TViewerPipeClient { case ENodeFields::LoadAverage: case ENodeFields::DisconnectTime: case ENodeFields::HasDisks: + case ENodeFields::Connections: + case ENodeFields::ReceiveThroughput: + case ENodeFields::SendThroughput: + case ENodeFields::ReversePeers: break; } } @@ -1202,6 +1399,42 @@ class TJsonNodes : public TViewerPipeClient { SortCollection(NodeView, [](const TNode* node) { return static_cast(node->GetOverall()); }, ReverseSort); NeedSort = false; break; + case ENodeFields::Connections: + SortCollection(NodeView, [](const TNode* node) { return node->Connections; }, ReverseSort); + NeedSort = false; + break; + case ENodeFields::SendThroughput: + SortCollection(NodeView, [](const TNode* node) { return node->SendThroughput; }, ReverseSort); + NeedSort = false; + break; + case ENodeFields::ReceiveThroughput: + SortCollection(NodeView, [](const TNode* node) { return node->ReceiveThroughput; }, ReverseSort); + NeedSort = false; + break; + case ENodeFields::NetworkUtilization: + SortCollection(NodeView, [](const TNode* node) { return node->NetworkUtilization; }, ReverseSort); + NeedSort = false; + break; + case ENodeFields::ConnectStatus: + SortCollection(NodeView, [](const TNode* node) { return static_cast(node->ConnectStatus); }, ReverseSort); + NeedSort = false; + break; + case ENodeFields::PingTime: + SortCollection(NodeView, [](const TNode* node) { return node->PingTimeUs; }, ReverseSort); + NeedSort = false; + break; + case ENodeFields::ClockSkew: + SortCollection(NodeView, [](const TNode* node) { return node->ClockSkewUs; }, ReverseSort); + NeedSort = false; + break; + case ENodeFields::Peers: + SortCollection(NodeView, [](const TNode* node) { return node->Peers.size(); }, ReverseSort); + NeedSort = false; + break; + case ENodeFields::ReversePeers: + SortCollection(NodeView, [](const TNode* node) { return node->ReversePeers.size(); }, ReverseSort); + NeedSort = false; + break; case ENodeFields::NodeInfo: case ENodeFields::PDisks: case ENodeFields::VDisks: @@ -1276,12 +1509,12 @@ class TJsonNodes : public TViewerPipeClient { } } - std::vector BatchNodes() { + std::vector BatchNodes(const TNodeView& nodeView) { std::vector batches; if (OffloadMerge) { std::unordered_map batchSubDomain; std::unordered_map batchDataCenters; - for (TNode* node : NodeView) { + for (TNode* node : nodeView) { if (node->IsStatic()) { batchDataCenters[node->GetDataCenter()].NodesToAskAbout.push_back(node); } else { @@ -1301,7 +1534,7 @@ class TJsonNodes : public TViewerPipeClient { } } else { TNodeBatch nodeBatch; - for (TNode* node : NodeView) { + for (TNode* node : nodeView) { nodeBatch.NodesToAskAbout.push_back(node); } SplitBatch(nodeBatch, batches); @@ -1374,8 +1607,11 @@ class TJsonNodes : public TViewerPipeClient { if (PDisksResponse && !PDisksResponse->IsDone()) { return false; } - if (!SystemStateResponse.empty() || !TabletStateResponse.empty() || !PDiskStateResponse.empty() - || !VDiskStateResponse.empty() || !SystemViewerResponse.empty() || !TabletViewerResponse.empty()) { + if (!SystemStateResponse.empty() || !SystemViewerResponse.empty() + || !TabletStateResponse.empty() || !TabletViewerResponse.empty() + || !PDiskStateResponse.empty() || !PDiskViewerResponse.empty() + || !VDiskStateResponse.empty() || !VDiskViewerResponse.empty() + || !PeersStateResponse.empty() || !PeersViewerResponse.empty()) { return false; } return CurrentTimeoutState < TimeoutFinal; @@ -1423,16 +1659,21 @@ class TJsonNodes : public TViewerPipeClient { if (NodeStateResponse && NodeStateResponse->IsDone() && TotalNodes > 0) { if (NodeStateResponse->IsOk()) { for (const auto& nodeStateInfo : NodeStateResponse->Get()->Record.GetNodeStateInfo()) { - if (nodeStateInfo.GetConnected()) { - TNodeId nodeId = FromStringWithDefault(TStringBuf(nodeStateInfo.GetPeerName()).Before(':'), 0); - if (nodeId) { - TNode* node = FindNode(nodeId); - if (node) { - node->Connected = true; - } + TNodeId nodeId = nodeStateInfo.GetPeerNodeId() + ? nodeStateInfo.GetPeerNodeId() + : FromStringWithDefault(TStringBuf(nodeStateInfo.GetPeerName()).Before(':'), 0); + if (nodeId) { + TNode* node = FindNode(nodeId); + if (node) { + node->NetworkStateInfo = nodeStateInfo; } } } + TNode* node = FindNode(TActivationContext::ActorSystem()->NodeId); + if (node) { + node->NetworkStateInfo.MutableScopeId()->SetX1(AppData()->LocalScopeId.GetInterconnectScopeId().first); + node->NetworkStateInfo.MutableScopeId()->SetX2(AppData()->LocalScopeId.GetInterconnectScopeId().second); + } } else { AddProblem("no-node-state-info"); } @@ -1718,7 +1959,7 @@ class TJsonNodes : public TViewerPipeClient { if (FilterDatabase) { FieldsRequired.set(+ENodeFields::SystemState); } - std::vector batches = BatchNodes(); + std::vector batches = BatchNodes(NodeView); SendWhiteboardRequests(batches); } } @@ -1744,11 +1985,32 @@ class TJsonNodes : public TViewerPipeClient { } } - void SendWhiteboardSystemAndTabletsBatch(TNodeBatch& batch) { - TNodeId nodeId = OffloadMerge ? batch.ChooseNodeId() : 0; - if (batch.HasStaticNodes && (FieldsNeeded(FieldsVDisks) || FieldsNeeded(FieldsPDisks))) { - nodeId = 0; // we need to ask for all nodes anyway (for the compatibility with older versions) + template<> + void InitWhiteboardRequest(NKikimrWhiteboard::TEvTabletStateRequest* request) { + if (AllWhiteboardFields) { + request->AddFieldsRequired(-1); + } + request->SetGroupBy("Type,State"); + } + + template<> + void InitWhiteboardRequest(NKikimrWhiteboard::TEvNodeStateRequest* request) { + if (AllWhiteboardFields) { + request->AddFieldsRequired(-1); + } else { + request->MutableFieldsRequired()->CopyFrom(GetDefaultWhiteboardFields()); + request->AddFieldsRequired(NKikimrWhiteboard::TNodeStateInfo::kConnectTimeFieldNumber); + request->AddFieldsRequired(NKikimrWhiteboard::TNodeStateInfo::kClockSkewUsFieldNumber); + request->AddFieldsRequired(NKikimrWhiteboard::TNodeStateInfo::kPingTimeUsFieldNumber); + request->AddFieldsRequired(NKikimrWhiteboard::TNodeStateInfo::kUtilizationFieldNumber); + request->AddFieldsRequired(NKikimrWhiteboard::TNodeStateInfo::kScopeIdFieldNumber); + request->AddFieldsRequired(NKikimrWhiteboard::TNodeStateInfo::kBytesWrittenFieldNumber); + request->AddFieldsRequired(NKikimrWhiteboard::TNodeStateInfo::kWriteThroughputFieldNumber); } + } + + void SendWhiteboardRequest(TNodeBatch& batch) { + TNodeId nodeId = OffloadMerge ? batch.ChooseNodeId() : 0; if (nodeId) { if (FieldsNeeded(FieldsSystemState) && SystemViewerResponse.count(nodeId) == 0) { auto viewerRequest = std::make_unique(); @@ -1763,7 +2025,7 @@ class TJsonNodes : public TViewerPipeClient { } if (FieldsNeeded(FieldsTablets) && TabletViewerResponse.count(nodeId) == 0) { auto viewerRequest = std::make_unique(); - viewerRequest->Record.MutableTabletRequest()->SetGroupBy("Type,State"); + InitWhiteboardRequest(viewerRequest->Record.MutableTabletRequest()); viewerRequest->Record.SetTimeout(Timeout / 2); for (const TNode* node : batch.NodesToAskAbout) { viewerRequest->Record.MutableLocation()->AddNodeId(node->GetNodeId()); @@ -1772,6 +2034,45 @@ class TJsonNodes : public TViewerPipeClient { NodeBatches.emplace(nodeId, batch); // ignore second insert because they are the same ++WhiteboardStateRequestsInFlight; } + if (batch.HasStaticNodes) { + if (FieldsNeeded(FieldsPDisks) && PDiskViewerResponse.count(nodeId) == 0) { + auto viewerRequest = std::make_unique(); + InitWhiteboardRequest(viewerRequest->Record.MutablePDiskRequest()); + viewerRequest->Record.SetTimeout(Timeout / 2); + for (const TNode* node : batch.NodesToAskAbout) { + if (node->IsStatic()) { + viewerRequest->Record.MutableLocation()->AddNodeId(node->GetNodeId()); + } + } + PDiskViewerResponse.emplace(nodeId, MakeViewerRequest(nodeId, viewerRequest.release())); + NodeBatches.emplace(nodeId, batch); // ignore second insert because they are the same + ++WhiteboardStateRequestsInFlight; + } + if (FieldsNeeded(FieldsVDisks) && VDiskViewerResponse.count(nodeId) == 0) { + auto viewerRequest = std::make_unique(); + InitWhiteboardRequest(viewerRequest->Record.MutableVDiskRequest()); + viewerRequest->Record.SetTimeout(Timeout / 2); + for (const TNode* node : batch.NodesToAskAbout) { + if (node->IsStatic()) { + viewerRequest->Record.MutableLocation()->AddNodeId(node->GetNodeId()); + } + } + VDiskViewerResponse.emplace(nodeId, MakeViewerRequest(nodeId, viewerRequest.release())); + NodeBatches.emplace(nodeId, batch); // ignore second insert because they are the same + ++WhiteboardStateRequestsInFlight; + } + } + if (FieldsNeeded(FieldsPeers) && PeersViewerResponse.count(nodeId) == 0) { + auto viewerRequest = std::make_unique(); + InitWhiteboardRequest(viewerRequest->Record.MutableNodeRequest()); + viewerRequest->Record.SetTimeout(Timeout / 2); + for (const TNode* node : batch.NodesToAskAbout) { + viewerRequest->Record.MutableLocation()->AddNodeId(node->GetNodeId()); + } + PeersViewerResponse.emplace(nodeId, MakeViewerRequest(nodeId, viewerRequest.release())); + NodeBatches.emplace(nodeId, batch); // ignore second insert because they are the same + ++WhiteboardStateRequestsInFlight; + } } else { for (const TNode* node : batch.NodesToAskAbout) { if (node->Disconnected) { @@ -1789,35 +2090,35 @@ class TJsonNodes : public TViewerPipeClient { if (FieldsNeeded(FieldsTablets)) { if (TabletStateResponse.count(nodeId) == 0) { auto request = std::make_unique(); - request->Record.SetGroupBy("Type,State"); + InitWhiteboardRequest(&request->Record); TabletStateResponse.emplace(nodeId, MakeWhiteboardRequest(nodeId, request.release())); ++WhiteboardStateRequestsInFlight; } } - } - } - } - - void SendWhiteboardRequest(TNodeBatch& batch) { - SendWhiteboardSystemAndTabletsBatch(batch); - for (const TNode* node : batch.NodesToAskAbout) { - TNodeId nodeId = node->GetNodeId(); - - if (node->IsStatic()) { - if (FieldsNeeded(FieldsVDisks)) { - if (VDiskStateResponse.count(nodeId) == 0) { - auto request = new TEvWhiteboard::TEvVDiskStateRequest(); + if (FieldsNeeded(FieldsPeers)) { + if (PeersStateResponse.count(nodeId) == 0) { + auto request = std::make_unique(); InitWhiteboardRequest(&request->Record); - VDiskStateResponse.emplace(nodeId, MakeWhiteboardRequest(nodeId, request)); + PeersStateResponse.emplace(nodeId, MakeWhiteboardRequest(nodeId, request.release())); ++WhiteboardStateRequestsInFlight; } } - if (FieldsNeeded(FieldsPDisks)) { - if (PDiskStateResponse.count(nodeId) == 0) { - auto request = new TEvWhiteboard::TEvPDiskStateRequest(); - InitWhiteboardRequest(&request->Record); - PDiskStateResponse.emplace(nodeId, MakeWhiteboardRequest(nodeId, request)); - ++WhiteboardStateRequestsInFlight; + if (node->IsStatic()) { + if (FieldsNeeded(FieldsVDisks)) { + if (VDiskStateResponse.count(nodeId) == 0) { + auto request = new TEvWhiteboard::TEvVDiskStateRequest(); + InitWhiteboardRequest(&request->Record); + VDiskStateResponse.emplace(nodeId, MakeWhiteboardRequest(nodeId, request)); + ++WhiteboardStateRequestsInFlight; + } + } + if (FieldsNeeded(FieldsPDisks)) { + if (PDiskStateResponse.count(nodeId) == 0) { + auto request = new TEvWhiteboard::TEvPDiskStateRequest(); + InitWhiteboardRequest(&request->Record); + PDiskStateResponse.emplace(nodeId, MakeWhiteboardRequest(nodeId, request)); + ++WhiteboardStateRequestsInFlight; + } } } } @@ -1830,7 +2131,60 @@ class TJsonNodes : public TViewerPipeClient { } } + int WhiteboardRequestRound = 1; + void ProcessWhiteboard() { + AddEvent("ProcessWhiteboard"); + if (FieldsNeeded(FieldsReversePeers) && WhiteboardRequestRound++ == 1) { + std::unordered_set nodeIds; + std::unordered_set reverseNodeIds; + for (auto& [nodeId, response] : PeersViewerResponse) { + if (response.IsOk()) { + auto& nodeResponse(*(response.Get()->Record.MutableNodeResponse())); + for (const auto& nodeState : nodeResponse.GetNodeStateInfo()) { + if (nodeState.GetNodeId()) { + nodeIds.insert(nodeState.GetNodeId()); + } + if (nodeState.GetConnected() && nodeState.GetPeerNodeId()) { + reverseNodeIds.insert(nodeState.GetPeerNodeId()); + } + } + } + } + for (auto& [nodeId, response] : PeersStateResponse) { + if (response.IsOk()) { + nodeIds.insert(nodeId); + const auto& nodeState(response.Get()->Record); + for (const auto& nodeStateInfo : nodeState.GetNodeStateInfo()) { + if (nodeStateInfo.GetConnected() && nodeStateInfo.GetPeerNodeId()) { + reverseNodeIds.insert(nodeStateInfo.GetPeerNodeId()); + } + } + } + } + for (auto nodeId : nodeIds) { + reverseNodeIds.erase(nodeId); + } + if (!reverseNodeIds.empty()) { + std::unordered_map reverseNodesByNodeId; + TNodeView reverseNodeView; + for (TNode& node : NodeData) { + reverseNodesByNodeId[node.GetNodeId()] = &node; + } + for (TNodeId reverseNodeId : reverseNodeIds) { + auto it = reverseNodesByNodeId.find(reverseNodeId); + if (it != reverseNodesByNodeId.end()) { + reverseNodeView.push_back(it->second); + } + } + AddEvent("ReversePeers"); + std::vector batches = BatchNodes(reverseNodeView); + SendWhiteboardRequests(batches); + if (WhiteboardStateRequestsInFlight > 0) { + return; + } + } + } if (FieldsNeeded(FieldsSystemState)) { TInstant now = TInstant::Now(); bool hasMemoryDetailed = false; @@ -1941,6 +2295,17 @@ class TJsonNodes : public TViewerPipeClient { FieldsAvailable |= FieldsTablets; } if (FieldsNeeded(FieldsVDisks)) { + for (auto& [nodeId, response] : VDiskViewerResponse) { + if (response.IsOk()) { + auto& vDiskResponse(*(response.Get()->Record.MutableVDiskResponse())); + for (const auto& vDiskState : vDiskResponse.GetVDiskStateInfo()) { + TNode* node = FindNode(vDiskState.GetNodeId()); + if (node) { + node->VDisks.emplace_back(vDiskState); + } + } + } + } for (const auto& [nodeId, response] : VDiskStateResponse) { if (response.IsOk()) { const auto& vDiskState(response.Get()->Record); @@ -1955,6 +2320,18 @@ class TJsonNodes : public TViewerPipeClient { FieldsAvailable |= FieldsVDisks; } if (FieldsNeeded(FieldsPDisks)) { + for (auto& [nodeId, response] : PDiskViewerResponse) { + if (response.IsOk()) { + auto& pDiskResponse(*(response.Get()->Record.MutablePDiskResponse())); + for (const auto& pDiskState : pDiskResponse.GetPDiskStateInfo()) { + TNode* node = FindNode(pDiskState.GetNodeId()); + if (node) { + node->PDisks.emplace_back(pDiskState); + node->CalcDisks(); + } + } + } + } for (const auto& [nodeId, response] : PDiskStateResponse) { if (response.IsOk()) { const auto& pDiskState(response.Get()->Record); @@ -1965,13 +2342,72 @@ class TJsonNodes : public TViewerPipeClient { } node->CalcDisks(); } - MaximumDisksPerNode = std::max(MaximumDisksPerNode.value_or(0), pDiskState.PDiskStateInfoSize()); } } FieldsAvailable |= FieldsPDisks; FieldsAvailable.set(+ENodeFields::Missing); FieldsAvailable.set(+ENodeFields::DiskSpaceUsage); } + bool needCalcPeers = false; + if (FieldsNeeded(FieldsPeers)) { + for (auto& [nodeId, response] : PeersViewerResponse) { + if (response.IsOk()) { + auto& nodeResponse(*(response.Get()->Record.MutableNodeResponse())); + for (const auto& nodeState : nodeResponse.GetNodeStateInfo()) { + TNode* node = FindNode(nodeState.GetNodeId()); + if (node) { + node->Peers.emplace_back(nodeState); + } + } + } + } + for (auto& [nodeId, response] : PeersStateResponse) { + if (response.IsOk()) { + const auto& nodeState(response.Get()->Record); + TNode* node = FindNode(nodeId); + if (node) { + for (const auto& protoNodeState : nodeState.GetNodeStateInfo()) { + node->Peers.emplace_back(protoNodeState).SetNodeId(nodeId); + } + } + } + } + FieldsAvailable |= FieldsPeers; + needCalcPeers = true; + } + if (FieldsNeeded(FieldsReversePeers)) { + for (auto& [nodeId, response] : PeersViewerResponse) { + if (response.IsOk()) { + auto& nodeResponse(*(response.Get()->Record.MutableNodeResponse())); + for (const auto& nodeState : nodeResponse.GetNodeStateInfo()) { + if (nodeState.GetPeerNodeId()) { + TNode* reverseNode = FindNode(nodeState.GetPeerNodeId()); + if (reverseNode) { + reverseNode->ReversePeers.emplace_back(nodeState); + } + } + } + } + } + for (auto& [nodeId, response] : PeersStateResponse) { + if (response.IsOk()) { + const auto& nodeState(response.Get()->Record); + for (const auto& protoNodeState : nodeState.GetNodeStateInfo()) { + TNode* reverseNode = FindNode(protoNodeState.GetPeerNodeId()); + if (reverseNode) { + reverseNode->ReversePeers.emplace_back(protoNodeState).SetNodeId(nodeId); + } + } + } + } + FieldsAvailable |= FieldsReversePeers; + needCalcPeers = true; + } + if (needCalcPeers) { + for (TNode* node : NodeView) { + node->CalcPeers(); + } + } ApplyEverything(); } @@ -1982,9 +2418,14 @@ class TJsonNodes : public TViewerPipeClient { } void Handle(TEvWhiteboard::TEvNodeStateResponse::TPtr& ev) { - NodeStateResponse->Set(std::move(ev)); - ProcessResponses(); - RequestDone(); + ui64 nodeId = ev.Get()->Cookie; + if (NodeStateResponse && !NodeStateResponse->IsDone() && nodeId == TActivationContext::ActorSystem()->NodeId) { + NodeStateResponse->Set(std::move(ev)); + ProcessResponses(); + RequestDone(); + } else if (PeersStateResponse[nodeId].Set(std::move(ev))) { + WhiteboardRequestDone(); + } } void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { @@ -2067,6 +2508,24 @@ class TJsonNodes : public TViewerPipeClient { WhiteboardRequestDone(); } return; + case NKikimrViewer::TEvViewerResponse::ResponseCase::kPDiskResponse: + if (PDiskViewerResponse[nodeId].Set(std::move(ev))) { + NodeBatches.erase(nodeId); + WhiteboardRequestDone(); + } + return; + case NKikimrViewer::TEvViewerResponse::ResponseCase::kVDiskResponse: + if (VDiskViewerResponse[nodeId].Set(std::move(ev))) { + NodeBatches.erase(nodeId); + WhiteboardRequestDone(); + } + return; + case NKikimrViewer::TEvViewerResponse::ResponseCase::kNodeResponse: + if (PeersViewerResponse[nodeId].Set(std::move(ev))) { + NodeBatches.erase(nodeId); + WhiteboardRequestDone(); + } + return; default: break; } @@ -2076,7 +2535,7 @@ class TJsonNodes : public TViewerPipeClient { if (itSystemViewerResponse != SystemViewerResponse.end()) { if (itSystemViewerResponse->second.Error(error)) { if (NodeBatches.count(nodeId)) { - SendWhiteboardSystemAndTabletsBatch(NodeBatches[nodeId]); + SendWhiteboardRequest(NodeBatches[nodeId]); NodeBatches.erase(nodeId); } WhiteboardRequestDone(); @@ -2088,7 +2547,43 @@ class TJsonNodes : public TViewerPipeClient { if (itTabletViewerResponse != TabletViewerResponse.end()) { if (itTabletViewerResponse->second.Error(error)) { if (NodeBatches.count(nodeId)) { - SendWhiteboardSystemAndTabletsBatch(NodeBatches[nodeId]); + SendWhiteboardRequest(NodeBatches[nodeId]); + NodeBatches.erase(nodeId); + } + WhiteboardRequestDone(); + } + } + } + { + auto itPDiskViewerResponse = PDiskViewerResponse.find(nodeId); + if (itPDiskViewerResponse != PDiskViewerResponse.end()) { + if (itPDiskViewerResponse->second.Error(error)) { + if (NodeBatches.count(nodeId)) { + SendWhiteboardRequest(NodeBatches[nodeId]); + NodeBatches.erase(nodeId); + } + WhiteboardRequestDone(); + } + } + } + { + auto itVDiskViewerResponse = VDiskViewerResponse.find(nodeId); + if (itVDiskViewerResponse != VDiskViewerResponse.end()) { + if (itVDiskViewerResponse->second.Error(error)) { + if (NodeBatches.count(nodeId)) { + SendWhiteboardRequest(NodeBatches[nodeId]); + NodeBatches.erase(nodeId); + } + WhiteboardRequestDone(); + } + } + } + { + auto itPeersViewerResponse = PeersViewerResponse.find(nodeId); + if (itPeersViewerResponse != PeersViewerResponse.end()) { + if (itPeersViewerResponse->second.Error(error)) { + if (NodeBatches.count(nodeId)) { + SendWhiteboardRequest(NodeBatches[nodeId]); NodeBatches.erase(nodeId); } WhiteboardRequestDone(); @@ -2125,6 +2620,69 @@ class TJsonNodes : public TViewerPipeClient { } } + void FailViewerRequestsForNode(TNodeId nodeId, const TString& error) { + { + auto itSystemViewerResponse = SystemViewerResponse.find(nodeId); + if (itSystemViewerResponse != SystemViewerResponse.end()) { + if (itSystemViewerResponse->second.Error(error)) { + if (NodeBatches.count(nodeId)) { + SendWhiteboardRequest(NodeBatches[nodeId]); + NodeBatches.erase(nodeId); + } + WhiteboardRequestDone(); + } + } + } + { + auto itTabletViewerResponse = TabletViewerResponse.find(nodeId); + if (itTabletViewerResponse != TabletViewerResponse.end()) { + if (itTabletViewerResponse->second.Error(error)) { + if (NodeBatches.count(nodeId)) { + SendWhiteboardRequest(NodeBatches[nodeId]); + NodeBatches.erase(nodeId); + } + WhiteboardRequestDone(); + } + } + } + { + auto itPDiskViewerResponse = PDiskViewerResponse.find(nodeId); + if (itPDiskViewerResponse != PDiskViewerResponse.end()) { + if (itPDiskViewerResponse->second.Error(error)) { + if (NodeBatches.count(nodeId)) { + SendWhiteboardRequest(NodeBatches[nodeId]); + NodeBatches.erase(nodeId); + } + WhiteboardRequestDone(); + } + } + } + { + auto itVDiskViewerResponse = VDiskViewerResponse.find(nodeId); + if (itVDiskViewerResponse != VDiskViewerResponse.end()) { + if (itVDiskViewerResponse->second.Error(error)) { + if (NodeBatches.count(nodeId)) { + SendWhiteboardRequest(NodeBatches[nodeId]); + NodeBatches.erase(nodeId); + } + WhiteboardRequestDone(); + } + } + } + { + auto itPeersViewerResponse = PeersViewerResponse.find(nodeId); + if (itPeersViewerResponse != PeersViewerResponse.end()) { + if (itPeersViewerResponse->second.Error(error)) { + if (NodeBatches.count(nodeId)) { + SendWhiteboardRequest(NodeBatches[nodeId]); + NodeBatches.erase(nodeId); + } + WhiteboardRequestDone(); + } + } + } + } + void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr& ev) { TNodeId nodeId = ev->Get()->NodeId; TNode* node = FindNode(nodeId); @@ -2169,29 +2727,14 @@ class TJsonNodes : public TViewerPipeClient { } } { - auto itSystemViewerResponse = SystemViewerResponse.find(nodeId); - if (itSystemViewerResponse != SystemViewerResponse.end()) { - if (itSystemViewerResponse->second.Error(error)) { - if (NodeBatches.count(nodeId)) { - SendWhiteboardSystemAndTabletsBatch(NodeBatches[nodeId]); - NodeBatches.erase(nodeId); - } - WhiteboardRequestDone(); - } - } - } - { - auto itTabletViewerResponse = TabletViewerResponse.find(nodeId); - if (itTabletViewerResponse != TabletViewerResponse.end()) { - if (itTabletViewerResponse->second.Error(error)) { - if (NodeBatches.count(nodeId)) { - SendWhiteboardSystemAndTabletsBatch(NodeBatches[nodeId]); - NodeBatches.erase(nodeId); - } + auto itPeersStateResponse = PeersStateResponse.find(nodeId); + if (itPeersStateResponse != PeersStateResponse.end()) { + if (itPeersStateResponse->second.Error(error)) { WhiteboardRequestDone(); } } } + FailViewerRequestsForNode(nodeId, error); } bool OnBscError(const TString& error) { @@ -2290,6 +2833,24 @@ class TJsonNodes : public TViewerPipeClient { WhiteboardRequestDone(); } } + for (auto& [nodeId, response] : PDiskViewerResponse) { + if (response.Error(error)) { + AddProblem("wb-incomplete"); + WhiteboardRequestDone(); + } + } + for (auto& [nodeId, response] : VDiskViewerResponse) { + if (response.Error(error)) { + AddProblem("wb-incomplete"); + WhiteboardRequestDone(); + } + } + for (auto& [nodeId, response] : PeersViewerResponse) { + if (response.Error(error)) { + AddProblem("wb-incomplete"); + WhiteboardRequestDone(); + } + } for (auto& [nodeId, response] : SystemStateResponse) { if (response.Error(error)) { AddProblem("wb-incomplete"); @@ -2314,12 +2875,26 @@ class TJsonNodes : public TViewerPipeClient { WhiteboardRequestDone(); } } + for (auto& [nodeId, response] : PeersStateResponse) { + if (response.Error(error)) { + AddProblem("wb-incomplete"); + WhiteboardRequestDone(); + } + } if (WaitingForResponse()) { ReplyAndPassAway(); } } } + void Undelivered(TEvents::TEvUndelivered::TPtr& ev) { + if (ev->Get()->Reason == TEvents::TEvUndelivered::ReasonActorUnknown && ev->Get()->SourceType == TEvViewer::TEvViewerRequest::EventType) { + static const TString error = "Undelivered"; + TNodeId nodeId = ev.Get()->Cookie; + FailViewerRequestsForNode(nodeId, error); + } + } + STATEFN(StateWork) { switch (ev->GetTypeRewrite()) { hFunc(TEvInterconnect::TEvNodesInfo, Handle); @@ -2339,6 +2914,7 @@ class TJsonNodes : public TViewerPipeClient { hFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected); hFunc(TEvTabletPipe::TEvClientConnected, Handle); hFunc(TEvents::TEvWakeup, HandleTimeout); + hFunc(TEvents::TEvUndelivered, Undelivered); } } @@ -2399,6 +2975,39 @@ class TJsonNodes : public TViewerPipeClient { if (node->DiskSpaceUsage) { jsonNode.SetDiskSpaceUsage(node->DiskSpaceUsage); } + if (FieldsAvailable.test(+ENodeFields::Connections)) { + jsonNode.SetConnections(node->Connections); + } + if (FieldsAvailable.test(+ENodeFields::ConnectStatus)) { + jsonNode.SetConnectStatus(GetViewerFlag(node->ConnectStatus)); + } + if (FieldsAvailable.test(+ENodeFields::NetworkUtilization)) { + jsonNode.SetNetworkUtilization(node->NetworkUtilization); + jsonNode.SetNetworkUtilizationMin(node->NetworkUtilizationMin); + jsonNode.SetNetworkUtilizationMax(node->NetworkUtilizationMax); + } + if (FieldsAvailable.test(+ENodeFields::ClockSkew)) { + jsonNode.SetClockSkewUs(node->ClockSkewUs); + jsonNode.SetClockSkewMinUs(node->ClockSkewMinUs); + jsonNode.SetClockSkewMaxUs(node->ClockSkewMaxUs); + if (FieldsAvailable.test(+ENodeFields::ReversePeers)) { + jsonNode.SetReverseClockSkewUs(node->ReverseClockSkewUs); + } + } + if (FieldsAvailable.test(+ENodeFields::PingTime)) { + jsonNode.SetPingTimeUs(node->PingTimeUs); + jsonNode.SetPingTimeMinUs(node->PingTimeMinUs); + jsonNode.SetPingTimeMaxUs(node->PingTimeMaxUs); + if (FieldsAvailable.test(+ENodeFields::ReversePeers)) { + jsonNode.SetReversePingTimeUs(node->ReversePingTimeUs); + } + } + if (FieldsAvailable.test(+ENodeFields::SendThroughput)) { + jsonNode.SetSendThroughput(node->SendThroughput); + } + if (FieldsAvailable.test(+ENodeFields::ReceiveThroughput)) { + jsonNode.SetReceiveThroughput(node->ReceiveThroughput); + } if (FieldsAvailable.test(+ENodeFields::NodeInfo) || FieldsAvailable.test(+ENodeFields::SystemState)) { *jsonNode.MutableSystemState() = std::move(node->SystemState); } @@ -2426,6 +3035,28 @@ class TJsonNodes : public TViewerPipeClient { (*jsonNode.AddTablets()) = std::move(tablet); } } + if (FieldsAvailable.test(+ENodeFields::SendThroughput)) { + jsonNode.SetSendThroughput(node->SendThroughput); + } + if (FieldsAvailable.test(+ENodeFields::ReceiveThroughput)) { + jsonNode.SetReceiveThroughput(node->ReceiveThroughput); + } + if (FieldsRequired.test(+ENodeFields::Peers)) { + std::sort(node->Peers.begin(), node->Peers.end(), [](const NKikimrWhiteboard::TNodeStateInfo& a, const NKikimrWhiteboard::TNodeStateInfo& b) { + return a.peernodeid() < b.peernodeid(); + }); + for (NKikimrWhiteboard::TNodeStateInfo& peer : node->Peers) { + (*jsonNode.AddPeers()) = std::move(peer); + } + } + if (FieldsRequired.test(+ENodeFields::ReversePeers)) { + std::sort(node->ReversePeers.begin(), node->ReversePeers.end(), [](const NKikimrWhiteboard::TNodeStateInfo& a, const NKikimrWhiteboard::TNodeStateInfo& b) { + return a.nodeid() < b.nodeid(); + }); + for (NKikimrWhiteboard::TNodeStateInfo& peer : node->ReversePeers) { + (*jsonNode.AddReversePeers()) = std::move(peer); + } + } } } else { for (const TNodeGroup& nodeGroup : NodeGroups) { @@ -2536,6 +3167,13 @@ class TJsonNodes : public TViewerPipeClient { * `DiskSpaceUsage` * `Database` * `SystemState` + * `Connections` + * `ConnectStatus` + * `NetworkUtilization` + * `ClockSkew` + * `PingTime` + * `SendThroughput` + * `ReceiveThroughput` required: false type: string - name: group @@ -2553,6 +3191,10 @@ class TJsonNodes : public TViewerPipeClient { * `Uptime` * `Version` * `SystemState` + * `ConnectStatus` + * `NetworkUtilization` + * `ClockSkew` + * `PingTime` required: false type: string - name: filter_group_by @@ -2570,6 +3212,10 @@ class TJsonNodes : public TViewerPipeClient { * `Uptime` * `Version` * `SystemState` + * `ConnectStatus` + * `NetworkUtilization` + * `ClockSkew` + * `PingTime` required: false type: string - name: filter_group @@ -2586,6 +3232,7 @@ class TJsonNodes : public TViewerPipeClient { * `PDisks` * `VDisks` * `Tablets` + * `Peers` * `Host` * `NodeName` * `DC` @@ -2601,6 +3248,13 @@ class TJsonNodes : public TViewerPipeClient { * `SubDomainKey` * `DisconnectTime` * `Database` + * `Connections` + * `ConnectStatus` + * `NetworkUtilization` + * `ClockSkew` + * `PingTime` + * `SendThroughput` + * `ReceiveThroughput` required: false type: string - name: offset diff --git a/ydb/core/viewer/viewer_request.cpp b/ydb/core/viewer/viewer_request.cpp index 7683321d2a4b..1afcbfc69a52 100644 --- a/ydb/core/viewer/viewer_request.cpp +++ b/ydb/core/viewer/viewer_request.cpp @@ -7,6 +7,7 @@ #include "viewer_vdiskinfo.h" #include "viewer_pdiskinfo.h" #include "viewer_bsgroupinfo.h" +#include "viewer_nodeinfo.h" #include "wb_req.h" #include @@ -76,6 +77,11 @@ class TViewerWhiteboardRequest : public TWhiteboardRequestRecord.MutableBSGroupResponse()), perNodeStateInfo, fields); } + template<> + void MergeWhiteboardResponses(TEvViewer::TEvViewerResponse* response, TMap& perNodeStateInfo, const TString& fields) { + NKikimr::NViewer::MergeWhiteboardResponses(*(response->Record.MutableNodeResponse()), perNodeStateInfo, fields); + } + static void Merge(NKikimrViewer::TEvViewerResponse& viewerResponse, TNodeId nodeId, TResponseType& nodeResponse); void ReplyAndPassAway() override { @@ -114,6 +120,8 @@ IActor* CreateViewerRequestHandler(TEvViewer::TEvViewerRequest::TPtr& request) { return new TViewerWhiteboardRequest(request); case NKikimrViewer::TEvViewerRequest::kBSGroupRequest: return new TViewerWhiteboardRequest(request); + case NKikimrViewer::TEvViewerRequest::kNodeRequest: + return new TViewerWhiteboardRequest(request); case NKikimrViewer::TEvViewerRequest::kQueryRequest: return new TJsonQueryOld(request); case NKikimrViewer::TEvViewerRequest::kRenderRequest: @@ -216,6 +224,24 @@ void TViewerWhiteboardRequest +THolder TViewerWhiteboardRequest::BuildRequest() { + auto request = TBase::BuildRequest(); + request->Record.MergeFrom(Event->Get()->Record.GetNodeRequest()); + return request; +} + +template<> +void TViewerWhiteboardRequest::Merge( + NKikimrViewer::TEvViewerResponse& viewerResponse, TNodeId nodeId, NKikimrWhiteboard::TEvNodeStateResponse& nodeResponse) { + auto& target = *viewerResponse.MutableNodeResponse(); + for (auto& info : *nodeResponse.MutableNodeStateInfo()) { + auto& i = *target.AddNodeStateInfo(); + i.MergeFrom(info); + i.SetNodeId(nodeId); + } +} + bool IsPostContent(const NMon::TEvHttpInfo::TPtr& event) { if (event->Get()->Request.GetMethod() == HTTP_METHOD_POST) { const THttpHeaders& headers = event->Get()->Request.GetHeaders();