From 3f63f6aaa51aaf806de529c169d09b85ace85241 Mon Sep 17 00:00:00 2001 From: Damien Thenot Date: Fri, 19 Apr 2024 14:55:49 +0200 Subject: [PATCH] feat(Linstor): rewrite linstorhostcall logic Try to use host_OpaqueRef to access primary then try on the master host if it doesn't work, then find the primary with linstor API or if no primary, any other host. Signed-off-by: Damien Thenot Co-authored-by: Ronan Abhamon --- drivers/linstorvhdutil.py | 86 ++++++++++++++++++--------------- drivers/linstorvolumemanager.py | 18 +++++++ 2 files changed, 66 insertions(+), 38 deletions(-) diff --git a/drivers/linstorvhdutil.py b/drivers/linstorvhdutil.py index 7f8efa12d..4b7115b68 100644 --- a/drivers/linstorvhdutil.py +++ b/drivers/linstorvhdutil.py @@ -33,18 +33,21 @@ def call_remote_method(session, host_ref, method, device_path, args): + host_rec = session.xenapi.host.get_record(host_ref) + host_uuid = host_rec['uuid'] + try: response = session.xenapi.host.call_plugin( host_ref, MANAGER_PLUGIN, method, args ) except Exception as e: - util.SMlog('call-plugin ({} with {}) exception: {}'.format( - method, args, e + util.SMlog('call-plugin on {} ({} with {}) exception: {}'.format( + host_uuid, method, args, e )) raise util.SMException(str(e)) - util.SMlog('call-plugin ({} with {}) returned: {}'.format( - method, args, response + util.SMlog('call-plugin on {} ({} with {}) returned: {}'.format( + host_uuid, method, args, response )) return response @@ -86,33 +89,6 @@ def wrapper(*args, **kwargs): self._linstor.get_volume_name(vdi_uuid) ) - # A. Try a call using directly the DRBD device to avoid - # remote request. - - # Try to read locally if the device is not in use or if the device - # is up to date and not diskless. - (node_names, in_use_by) = \ - self._linstor.find_up_to_date_diskful_nodes(vdi_uuid) - - local_e = None - try: - if not in_use_by or socket.gethostname() in node_names: - return self._call_local_method(local_method, device_path, *args[2:], **kwargs) - except ErofsLinstorCallException as e: - local_e = e.cmd_err - except Exception as e: - local_e = e - - util.SMlog( - 'unable to execute `{}` locally, retry using a readable host... (cause: {})'.format( - remote_method, local_e if local_e else 'local diskless + in use or not up to date' - ) - ) - - if in_use_by: - node_names = {in_use_by} - - # B. Execute the plugin on master or slave. remote_args = { 'devicePath': device_path, 'groupName': self._linstor.group_name @@ -121,14 +97,48 @@ def wrapper(*args, **kwargs): remote_args = {str(key): str(value) for key, value in remote_args.iteritems()} try: - def remote_call(): - host_ref = self._get_readonly_host(vdi_uuid, device_path, node_names) - return call_remote_method(self._session, host_ref, remote_method, device_path, remote_args) - response = util.retry(remote_call, 5, 2) - except Exception as remote_e: - self._raise_openers_exception(device_path, local_e or remote_e) + host_ref_attached = util.get_hosts_attached_on(self._session, [vdi_uuid])[0] + if host_ref_attached: + response = call_remote_method( + self._session, host_ref_attached, remote_method, device_path, remote_args + ) + return response_parser(self, vdi_uuid, response) + except Exception as e: + util.SMlog( + 'Failed to call method on attached host. Trying local access... (cause: {})'.format(e), + priority=util.LOG_DEBUG + ) + + try: + master_ref = self._session.xenapi.pool.get_all_records().values()[0]['master'] + response = call_remote_method(self._session, master_ref, remote_method, device_path, remote_args) + return response_parser(self, vdi_uuid, response) + except Exception as e: + util.SMlog( + 'Failed to call method on master host. Finding primary node... (cause: {})'.format(e), + priority=util.LOG_DEBUG + ) + + nodes, primary_hostname = self._linstor.find_up_to_date_diskful_nodes(vdi_uuid) + if primary_hostname: + try: + host_ref = self._get_readonly_host(vdi_uuid, device_path, {primary_hostname}) + response = call_remote_method(self._session, host_ref, remote_method, device_path, remote_args) + return response_parser(self, vdi_uuid, response) + except Exception as remote_e: + self._raise_openers_exception(device_path, remote_e) + else: + util.SMlog( + 'Couldn\'t get primary for {}. Trying with another node...'.format(vdi_uuid), + priority=util.LOG_DEBUG + ) + try: + host = self._get_readonly_host(vdi_uuid, device_path, nodes) + response = call_remote_method(self._session, host, remote_method, device_path, remote_args) + return response_parser(self, vdi_uuid, response) + except Exception as remote_e: + self._raise_openers_exception(device_path, remote_e) - return response_parser(self, vdi_uuid, response) return wrapper return decorated diff --git a/drivers/linstorvolumemanager.py b/drivers/linstorvolumemanager.py index 8bfd1c1b4..fb47b09be 100755 --- a/drivers/linstorvolumemanager.py +++ b/drivers/linstorvolumemanager.py @@ -1438,6 +1438,24 @@ def find_up_to_date_diskful_nodes(self, volume_uuid): return (node_names, in_use_by) + def get_primary(self, volume_uuid): + """ + Find the node that opened a volume, i.e. the primary. + :rtype: str + """ + volume_name = self.get_volume_name(volume_uuid) + + resource_states = filter( + lambda resource_state: resource_state.name == volume_name, + self._get_resource_cache().resource_states + ) + + for resource_state in resource_states: + if resource_state.in_use: + return resource_state.node_name + + return None + def invalidate_resource_cache(self): """ If resources are impacted by external commands like vhdutil,