From 4b4b97edf20946ac5dc9b121cd7f1db581d54b49 Mon Sep 17 00:00:00 2001 From: Gil Bregman Date: Thu, 19 Oct 2023 18:10:28 +0300 Subject: [PATCH] Make sure OMAP's get_state() reads the entire file, even if it has more than 1024 keys Fixes #263 Fixes #266 Signed-off-by: Gil Bregman --- .github/workflows/build-container.yml | 29 +++++++++++++++++++++++++++ control/discovery.py | 19 +++++------------- control/state.py | 14 +++++++++---- tests/test_grpc.py | 2 +- 4 files changed, 45 insertions(+), 19 deletions(-) diff --git a/.github/workflows/build-container.yml b/.github/workflows/build-container.yml index 089e6ca0f..07b15db29 100644 --- a/.github/workflows/build-container.yml +++ b/.github/workflows/build-container.yml @@ -87,6 +87,35 @@ jobs: docker load < nvmeof-devel.tar docker load < vstart-cluster.tar + - name: Clear space on disk + run: | + echo "File systems status" + /usr/bin/df -h + if [[ -d /usr/share/dotnet ]]; then + echo "Size of /usr/share/dotnet" + /usr/bin/du -sh /usr/share/dotnet + rm -rf /usr/share/dotnet + fi + if [[ -d /opt/ghc ]]; then + echo "Size of /opt/ghc" + /usr/bin/du -sh /opt/ghc + rm -rf /opt/ghc + fi + if [[ -d /usr/local/share/boost ]]; then + echo "Size of /usr/local/share/boost" + /usr/bin/du -sh /usr/local/share/boost + rm -rf /usr/local/share/boost + fi + if [[ -n "$AGENT_TOOLSDIRECTORY" ]]; then + if [[ -d "$AGENT_TOOLSDIRECTORY" ]]; then + echo "Size of $AGENT_TOOLSDIRECTORY" + /usr/bin/du -sh "$AGENT_TOOLSDIRECTORY" + rm -rf "$AGENT_TOOLSDIRECTORY" + fi + fi + echo "File systems status after clearing space" + /usr/bin/df -h + - name: Start ceph cluster run: | make up SVC=ceph OPTS="--detach" diff --git a/control/discovery.py b/control/discovery.py index d766265f4..d63f24c7b 100644 --- a/control/discovery.py +++ b/control/discovery.py @@ -304,16 +304,11 @@ class DiscoveryService: discovery_port: Discovery controller's listening port """ - BDEV_PREFIX = "bdev_" - NAMESPACE_PREFIX = "namespace_" - SUBSYSTEM_PREFIX = "subsystem_" - HOST_PREFIX = "host_" - LISTENER_PREFIX = "listener_" - def __init__(self, config): self.version = 1 self.config = config self.lock = threading.Lock() + self.omap_state = OmapGatewayState(self.config) self.logger = logging.getLogger(__name__) log_level = self.config.getint_with_default("discovery", "debug", 20) @@ -344,10 +339,7 @@ def __init__(self, config): def _read_all(self) -> Dict[str, str]: """Reads OMAP and returns dict of all keys and values.""" - with rados.ReadOpCtx() as read_op: - iter, _ = self.ioctx.get_omap_vals(read_op, "", "", -1) - self.ioctx.operate_read_op(read_op, self.omap_name) - omap_dict = dict(iter) + omap_dict = self.omap_state.get_state() return omap_dict def _get_vals(self, omap_dict, prefix): @@ -675,8 +667,8 @@ def reply_get_log_page(self, conn, data, cmd_id): self.logger.debug("handle get log page request.") self_conn = self.conn_vals[conn.fileno()] my_omap_dict = self._read_all() - listeners = self._get_vals(my_omap_dict, self.LISTENER_PREFIX) - hosts = self._get_vals(my_omap_dict, self.HOST_PREFIX) + listeners = self._get_vals(my_omap_dict, GatewayState.LISTENER_PREFIX) + hosts = self._get_vals(my_omap_dict, GatewayState.HOST_PREFIX) if len(self_conn.nvmeof_connect_data_hostnqn) != 256: self.logger.error("error hostnqn.") return -1 @@ -1030,10 +1022,9 @@ def start_service(self): t = threading.Thread(target=self.handle_timeout) t.start() - omap_state = OmapGatewayState(self.config) local_state = LocalGatewayState() gateway_state = GatewayStateHandler(self.config, local_state, - omap_state, self._state_notify_update) + self.omap_state, self._state_notify_update) gateway_state.start_update() try: diff --git a/control/state.py b/control/state.py index b99664c38..237002166 100644 --- a/control/state.py +++ b/control/state.py @@ -224,10 +224,16 @@ def get_omap_version(self) -> int: def get_state(self) -> Dict[str, str]: """Returns dict of all OMAP keys and values.""" - with rados.ReadOpCtx() as read_op: - i, _ = self.ioctx.get_omap_vals(read_op, "", "", -1) - self.ioctx.operate_read_op(read_op, self.omap_name) - omap_dict = dict(i) + omap_list = [("", 0)] # Dummy, non empty, list value. Just so we would enter the while + omap_dict = {} + # The number of items returned is limited by Ceph, so we need to read in a loop until no more items are returned + while len(omap_list) > 0: + last_key_read = omap_list[-1][0] + with rados.ReadOpCtx() as read_op: + i, _ = self.ioctx.get_omap_vals(read_op, last_key_read, "", -1) + self.ioctx.operate_read_op(read_op, self.omap_name) + omap_list = list(i) + omap_dict.update(dict(omap_list)) return omap_dict def _add_key(self, key: str, val: str): diff --git a/tests/test_grpc.py b/tests/test_grpc.py index 8a7b9039a..e92a11e37 100644 --- a/tests/test_grpc.py +++ b/tests/test_grpc.py @@ -11,7 +11,7 @@ pool = "rbd" bdev_prefix = "Ceph0" subsystem_prefix = "nqn.2016-06.io.spdk:cnode" -created_resource_count = 150 +created_resource_count = 500 get_subsys_count = 100 def create_resource_by_index(i):