From 86bac177140e7bfe69609de9690db7d27b9263a7 Mon Sep 17 00:00:00 2001 From: Francesco Pantano Date: Sat, 11 May 2024 15:00:18 +0200 Subject: [PATCH] Allow to unmanage a spec Even though we explicitly redeploy a given mon using the host:ip format, it is possible that the orchestrator (who owns the process and the spec) takes initiative to redeploy a mon, on an arbitrary ip address (if many) between the `ceph orch rm ...` command and the `ceph orch daemon add ...` one. For this reason, the deamon redeploy follows this flow: 1. update and unmanage the spec 2. redeploy on the right network 3. manage the spec The ceph_spec module has been update to support manage/unmanage spec. Signed-off-by: Francesco Pantano --- tests/plugins/module_utils/ca_common.py | 18 ++++-- tests/plugins/module_utils/ceph_spec.py | 13 ++++- tests/plugins/modules/ceph_mkspec.py | 22 ++++++- tests/roles/ceph_migrate/defaults/main.yml | 4 ++ tests/roles/ceph_migrate/tasks/mon.yaml | 68 ++++++++++++++++++++++ tests/roles/ceph_migrate/tasks/rbd.yaml | 4 +- 6 files changed, 116 insertions(+), 13 deletions(-) diff --git a/tests/plugins/module_utils/ca_common.py b/tests/plugins/module_utils/ca_common.py index c9c07601d..2ec6c4ad0 100644 --- a/tests/plugins/module_utils/ca_common.py +++ b/tests/plugins/module_utils/ca_common.py @@ -1,4 +1,5 @@ #!/usr/bin/python + # -*- coding: utf-8 -*- # Copyright (c) 2020 OpenStack Foundation # All Rights Reserved. @@ -15,21 +16,24 @@ # License for the specific language governing permissions and limitations # under the License. # Included from: https://github.com/ceph/ceph-ansible/blob/master/module_utils/ca_common.py +# + import os import datetime - def generate_ceph_cmd(sub_cmd, args, spec_path, user_key=None, cluster='ceph', - user='client.admin', container_image=None, interactive=False): + user='client.admin', container_image=None, + interactive=False): ''' Generate 'ceph' command line to execute ''' - + ceph_config_path = '/etc/ceph' if not user_key: - user_key = '/etc/ceph/{}.{}.keyring'.format(cluster, user) - - cmd = pre_generate_ceph_cmd(container_image=container_image, interactive=interactive, spec_path=spec_path) + user_key = '{}/{}.{}.keyring'.format(ceph_config_path, cluster, user) + cmd = pre_generate_ceph_cmd(container_image=container_image, + interactive=interactive, + spec_path=spec_path) base_cmd = [ '-n', user, @@ -59,6 +63,8 @@ def container_exec(binary, container_image, spec_path=None, interactive=False): if 'CEPH_FSID' in os.environ: fsid = os.getenv('CEPH_FSID') ceph_config_path = '/etc/ceph' + if 'CEPH_CONF' in os.environ: + ceph_config_path = os.getenv('CEPH_CONF') if fsid: path = '/var/lib/ceph/{}/config'.format(fsid) if os.path.exists(path): diff --git a/tests/plugins/module_utils/ceph_spec.py b/tests/plugins/module_utils/ceph_spec.py index 7cba2b4e4..69edff88a 100644 --- a/tests/plugins/module_utils/ceph_spec.py +++ b/tests/plugins/module_utils/ceph_spec.py @@ -168,6 +168,7 @@ def __init__(self, daemon_type: str, spec: dict, label: str, count: int, + unmanaged: bool, **kwargs: dict): self.daemon_name = daemon_name @@ -177,6 +178,7 @@ def __init__(self, daemon_type: str, self.placement = placement_pattern self.label = label self.count = count + self.unmanaged = unmanaged # network list where the current daemon should be bound if not networks: @@ -234,17 +236,22 @@ def make_daemon_spec(self): # process extra parameters if present if not self.validate_keys(self.extra.keys(), ALLOWED_EXTRA_KEYS): - raise Exception("Fatal: the spec should be composed by only allowed keywords") + raise Exception("Fatal: the spec should be composed by only \ + allowed keywords") # append the spec if provided if len(self.spec.keys()) > 0: if self.validate_keys(self.spec.keys(), ALLOWED_SPEC_KEYS): sp = {'spec': self.normalize_spec(self.filter_spec(self.spec))} else: - raise Exception("Fatal: the spec should be composed by only allowed keywords") + raise Exception("Fatal: the spec should be composed by only \ + allowed keywords") + unmgd = { + 'unmanaged': self.unmanaged, + } # build the resulting daemon template - spec_template = {**spec_template, **ntw, **self.extra, **pl, **sp} + spec_template = {**spec_template, **ntw, **unmgd, **self.extra, **pl, **sp} return spec_template def normalize_spec(self, spec): diff --git a/tests/plugins/modules/ceph_mkspec.py b/tests/plugins/modules/ceph_mkspec.py index 1727124fb..6222b377f 100644 --- a/tests/plugins/modules/ceph_mkspec.py +++ b/tests/plugins/modules/ceph_mkspec.py @@ -91,6 +91,12 @@ - The total number of instances that should be deployed required: false type: int + unmanaged: + description: + - The unmanaged field is used to avoid cephadm to take over the daemon + type and redeploy them + required: false + type: bool label: description: - The label used to apply the daemon on the Ceph custer nodes @@ -162,6 +168,16 @@ label: "controller" count: 2 apply: true +- name: create the Ceph RGW daemon spec + ceph_mkspec: + service_type: mon + service_id: mon + service_name: mon + render_path: '/home/ceph-admin/specs' + label: "mon" + count: 2 + apply: true + unmanaged: true ''' RETURN = '''# ''' @@ -218,6 +234,7 @@ def run_module(): networks = module.params.get('networks') label = module.params.get('label') count = module.params.get('count') + unmanaged = module.params.get('unmanaged') spec = module.params.get('spec') extra = module.params.get('extra') apply = module.params.get('apply') @@ -264,9 +281,12 @@ def run_module(): if count is None: count = -1 + if unmanaged is None: + unmanaged = False + d = ceph_spec.CephDaemonSpec(service_type, service_id, service_name, hosts, host_pattern, networks, spec, label, - count, **extra) + count, unmanaged, **extra) render('{}/{}'.format(render_path, service_type), d.make_daemon_spec()) if apply: diff --git a/tests/roles/ceph_migrate/defaults/main.yml b/tests/roles/ceph_migrate/defaults/main.yml index 3d062fd64..94d16166b 100644 --- a/tests/roles/ceph_migrate/defaults/main.yml +++ b/tests/roles/ceph_migrate/defaults/main.yml @@ -23,4 +23,8 @@ ceph_rgw_virtual_ips_list: [] ceph_storage_net_prefix: "172.17.3." ceph_client_ip: "172.17.3.254" os_net_conf_path: "/etc/os-net-config/config.yaml" +# a generic timeout common to multiple tasks ceph_timeout: 30 +# wait for mon to be deployed and the orch spec to be +# updated +ceph_wait_mon_timeout: 10 diff --git a/tests/roles/ceph_migrate/tasks/mon.yaml b/tests/roles/ceph_migrate/tasks/mon.yaml index e8e8caa04..60d88cb6a 100644 --- a/tests/roles/ceph_migrate/tasks/mon.yaml +++ b/tests/roles/ceph_migrate/tasks/mon.yaml @@ -38,6 +38,8 @@ until: (monmap.stdout | from_json | community.general.json_query('monmap.num_mons') | int) >= ((decomm_nodes |default([]) | length | int) | default(3)) loop_control: label: "check mons quorum" + tags: + - ceph_mon_quorum - name: Backup data for client purposes delegate_to: "{{ cur_mon.split('.')[0] }}.ctlplane" @@ -189,17 +191,65 @@ loop_control: label: "MON - wait for mon" + # Even though we explicitly redeploy a given mon using the host:ip format, + # it is possible that the orchestrator (who owns the process and the spec) + # takes initiative to redeploy a mon, on an arbitrary ip address (if many) + # between the `ceph orch rm ...` command and the `ceph orch daemon add ...` + # one. For this reason, the deamon redeploy follows this flow: + # 1. update and unmanage the spec + # 2. redeploy on the right network as per the official doc: + # https://docs.ceph.com/en/quincy/cephadm/services/mon/ + # 3. manage the spec + # Note: a pause between the steps is required to allow the orchestrator to + # process the spec and update the daemons ref + - name: Unmanage mons + # root privileges required to run cephadm + # and apply the new spec + become: true + ceph_mkspec: + service_type: mon + cluster: ceph + apply: true + label: "mon" + render_path: "{{ ceph_spec_render_dir }}" + unmanaged: true + register: spc + environment: + CEPH_CONTAINER_IMAGE: "{{ ceph_container }}" + CEPH_CONTAINER_BINARY: "{{ ceph_container_cli }}" + CEPH_CONF: "{{ ceph_config_tmp_client_home }}" + + - name: Print the resulting spec + when: debug | default(true) + ansible.builtin.debug: + msg: "{{ spc }}" + - name: MON - Delete the running mon become: true ansible.builtin.command: "{{ ceph_cli }} orch daemon rm mon.{{ target_node.split('.')[0] }} --force" ignore_errors: true + - name: Wait for the spec to be updated + pause: + seconds: "{{ ceph_wait_mon_timeout }}" + + - name: MON - Redeploy mon on {{ target_node }} - Print command + when: debug | default(true) + ansible.builtin.debug: + msg: "{{ ceph_cli }} orch daemon add mon {{ target_node.split('.')[0] }}:{{ mon_ipaddr }}" + - name: MON - Redeploy mon on {{ target_node }} become: true + when: + - mon_ipaddr | default('') ansible.builtin.command: "{{ ceph_cli }} orch daemon add mon {{ target_node.split('.')[0] }}:{{ mon_ipaddr }}" + - name: Wait for the spec to be updated + pause: + seconds: "{{ ceph_wait_mon_timeout }}" + - name: MON - check mons quorum become: true ansible.builtin.command: "{{ ceph_cli }} -s -f json" @@ -211,6 +261,8 @@ until: (monmap.stdout | from_json | community.general.json_query('monmap.num_mons') | int) >= ((decomm_nodes |default([]) | length | int) | default(3)) loop_control: label: "check mons quorum" + tags: + - ceph_mon_quorum # Post actions: refresh mgr data, reconfigure osds - name: MON - refresh cephadm info - fail ceph mgr and refresh osd config @@ -221,6 +273,22 @@ - name: reconfig osds ansible.builtin.command: "{{ ceph_cli }} orch reconfig osd.default_drive_group " +- name: Manage mons + # root privileges required to run cephadm + # and apply the new spec + become: true + ceph_mkspec: + service_type: mon + cluster: ceph + apply: true + label: "mon" + render_path: "{{ ceph_spec_render_dir }}" + unmanaged: false + environment: + CEPH_CONTAINER_IMAGE: "{{ ceph_container }}" + CEPH_CONTAINER_BINARY: "{{ ceph_container_cli }}" + CEPH_CONF: "{{ ceph_config_tmp_client_home }}" + # Wait for the redeploy to finish before moving to the next stage - ansible.builtin.include_tasks: wait_daemons.yaml vars: diff --git a/tests/roles/ceph_migrate/tasks/rbd.yaml b/tests/roles/ceph_migrate/tasks/rbd.yaml index e19bba179..c751268ca 100644 --- a/tests/roles/ceph_migrate/tasks/rbd.yaml +++ b/tests/roles/ceph_migrate/tasks/rbd.yaml @@ -79,13 +79,11 @@ - name: MON - Migrate RBD node ansible.builtin.include_tasks: mon.yaml vars: - #cur_mon: controller-2 #.redhat.local - #target_node: cephstorage-2 #.redhat.local cur_mon: "{{ node.0 }}" target_node: "{{ node.1 }}" # This condition might be a different one loop: "{{ decomm_nodes| zip(target_nodes) }}" - ##loop: "{{ decomm_nodes|zip(hostmap.keys() | difference(decomm_nodes) | sort) | list }}" + #loop: "{{ decomm_nodes|zip(hostmap.keys() | difference(decomm_nodes) | sort) | list }}" loop_control: loop_var: node