From 34eade2da9c1a0df29eb13518fcf0537d3d0d120 Mon Sep 17 00:00:00 2001 From: Tigran Mkrtchyan Date: Tue, 19 Nov 2024 19:07:10 +0100 Subject: [PATCH] cells: ignore empty core domain uris propagated by zk Motivation: When Zookeeper updates core domain infos, dCache will first kill the existing cell tunnels and then later try to read and parse the new value. If the new value is an empty string (for whatever reason), parsing will fail, but a new connection will not be established. The corresponding error in the log: 18 Nov 2024 08:45:00 (c-dcache-head-xxx03_messageDomain-AAYmVA1LtnA-AAYmVA16phA) [dcache-head-xxx03_messageDomain,9.2.21,CORE] Error while reading from tunnel: java.net.SocketExceptio> 18 Nov 2024 08:45:43 (c-dcache-head-xxx03_messageDomain-AAYnKxn40fA) [] Uncaught exception in thread TunnelConnector-dcache-head-xxx03_messageDomain java.lang.NullPointerException: null at java.base/java.net.Socket.(Socket.java:448) at java.base/java.net.Socket.(Socket.java:264) at java.base/javax.net.DefaultSocketFactory.createSocket(SocketFactory.java:277) at dmg.cells.network.LocationManagerConnector.connect(LocationManagerConnector.java:64) at dmg.cells.network.LocationManagerConnector.run(LocationManagerConnector.java:94) at dmg.cells.nucleus.CellNucleus.lambda$wrapLoggingContext$2(CellNucleus.java:725) at java.base/java.lang.Thread.run(Thread.java:829) Modification: before killing existing tunnel check that ZK didn't propagate empty data. Result: More roust cell communication NOTE: a non empty invalid data still accepted!!! Fixes: #7696 Acked-by: Lea Morschel Target: master, 10.2, 10.1, 10.0, 9.2 Require-book: no Require-notes: yes (cherry picked from commit 30829c96aa1f14b817d2c2e568d5dadc5d84d530) Signed-off-by: Tigran Mkrtchyan --- .../dmg/cells/services/LocationManager.java | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/modules/cells/src/main/java/dmg/cells/services/LocationManager.java b/modules/cells/src/main/java/dmg/cells/services/LocationManager.java index a417ddb9561..84900b00790 100644 --- a/modules/cells/src/main/java/dmg/cells/services/LocationManager.java +++ b/modules/cells/src/main/java/dmg/cells/services/LocationManager.java @@ -225,7 +225,7 @@ public CoreDomainInfo(byte[] bytes) { } } catch (IOException ie) { throw new IllegalArgumentException( - "Failed deserializing LocationManager Cores as uri: {}", ie.getCause()); + "Failed deserializing LocationManager Cores as uri", ie); } } @@ -513,6 +513,10 @@ public void close() { public void reset(Mode mode, State state) { } + private boolean hasNoData(ChildData data) { + return data == null || data.getData() == null || data.getData().length == 0; + } + public void update(PathChildrenCacheEvent event) { LOGGER.info("{}", event); String cell; @@ -525,12 +529,22 @@ public void update(PathChildrenCacheEvent event) { } break; case CHILD_UPDATED: - cell = connectors.remove(ZKPaths.getNodeFromPath(event.getData().getPath())); + if (hasNoData(event.getData())) { + LOGGER.warn("Ignoring empty data on UPDATED for {}", event.getData().getPath()); + break; + } + cell = connectors.remove( + ZKPaths.getNodeFromPath(event.getData().getPath())); if (cell != null) { killConnector(cell); } // fall through case CHILD_ADDED: + if (hasNoData(event.getData())) { + LOGGER.warn("Ignoring empty data on ADDED for {}", event.getData().getPath()); + break; + } + //Log if the Core Domain Information received is incompatible with previous CoreDomainInfo info = infoFromZKEvent(event); String domain = ZKPaths.getNodeFromPath(event.getData().getPath());