Skip to content

Commit

Permalink
Allow to unmanage a spec
Browse files Browse the repository at this point in the history
 Even though we explicitly redeploy a given mon using the host:ip format,
 it is possible that the orchestrator (who owns the process and the spec)
 takes initiative to redeploy a mon, on an arbitrary ip address (if many)
 between the `ceph orch rm ...` command and the `ceph orch daemon add ...`
 one.
 For this reason, the deamon redeploy follows this flow:
 1. update and unmanage the spec
 2. redeploy on the right network
 3. manage the spec

The ceph_spec module has been update to support manage/unmanage spec.

Signed-off-by: Francesco Pantano <[email protected]>
  • Loading branch information
fmount committed May 15, 2024
1 parent ae2c55f commit 86bac17
Show file tree
Hide file tree
Showing 6 changed files with 116 additions and 13 deletions.
18 changes: 12 additions & 6 deletions tests/plugins/module_utils/ca_common.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/usr/bin/python

# -*- coding: utf-8 -*-
# Copyright (c) 2020 OpenStack Foundation
# All Rights Reserved.
Expand All @@ -15,21 +16,24 @@
# License for the specific language governing permissions and limitations
# under the License.
# Included from: https://github.com/ceph/ceph-ansible/blob/master/module_utils/ca_common.py
#

import os
import datetime


def generate_ceph_cmd(sub_cmd, args, spec_path, user_key=None, cluster='ceph',
user='client.admin', container_image=None, interactive=False):
user='client.admin', container_image=None,
interactive=False):
'''
Generate 'ceph' command line to execute
'''

ceph_config_path = '/etc/ceph'
if not user_key:
user_key = '/etc/ceph/{}.{}.keyring'.format(cluster, user)

cmd = pre_generate_ceph_cmd(container_image=container_image, interactive=interactive, spec_path=spec_path)
user_key = '{}/{}.{}.keyring'.format(ceph_config_path, cluster, user)

cmd = pre_generate_ceph_cmd(container_image=container_image,
interactive=interactive,
spec_path=spec_path)
base_cmd = [
'-n',
user,
Expand Down Expand Up @@ -59,6 +63,8 @@ def container_exec(binary, container_image, spec_path=None, interactive=False):
if 'CEPH_FSID' in os.environ:
fsid = os.getenv('CEPH_FSID')
ceph_config_path = '/etc/ceph'
if 'CEPH_CONF' in os.environ:
ceph_config_path = os.getenv('CEPH_CONF')
if fsid:
path = '/var/lib/ceph/{}/config'.format(fsid)
if os.path.exists(path):
Expand Down
13 changes: 10 additions & 3 deletions tests/plugins/module_utils/ceph_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ def __init__(self, daemon_type: str,
spec: dict,
label: str,
count: int,
unmanaged: bool,
**kwargs: dict):

self.daemon_name = daemon_name
Expand All @@ -177,6 +178,7 @@ def __init__(self, daemon_type: str,
self.placement = placement_pattern
self.label = label
self.count = count
self.unmanaged = unmanaged

# network list where the current daemon should be bound
if not networks:
Expand Down Expand Up @@ -234,17 +236,22 @@ def make_daemon_spec(self):

# process extra parameters if present
if not self.validate_keys(self.extra.keys(), ALLOWED_EXTRA_KEYS):
raise Exception("Fatal: the spec should be composed by only allowed keywords")
raise Exception("Fatal: the spec should be composed by only \
allowed keywords")

# append the spec if provided
if len(self.spec.keys()) > 0:
if self.validate_keys(self.spec.keys(), ALLOWED_SPEC_KEYS):
sp = {'spec': self.normalize_spec(self.filter_spec(self.spec))}
else:
raise Exception("Fatal: the spec should be composed by only allowed keywords")
raise Exception("Fatal: the spec should be composed by only \
allowed keywords")

unmgd = {
'unmanaged': self.unmanaged,
}
# build the resulting daemon template
spec_template = {**spec_template, **ntw, **self.extra, **pl, **sp}
spec_template = {**spec_template, **ntw, **unmgd, **self.extra, **pl, **sp}
return spec_template

def normalize_spec(self, spec):
Expand Down
22 changes: 21 additions & 1 deletion tests/plugins/modules/ceph_mkspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,12 @@
- The total number of instances that should be deployed
required: false
type: int
unmanaged:
description:
- The unmanaged field is used to avoid cephadm to take over the daemon
type and redeploy them
required: false
type: bool
label:
description:
- The label used to apply the daemon on the Ceph custer nodes
Expand Down Expand Up @@ -162,6 +168,16 @@
label: "controller"
count: 2
apply: true
- name: create the Ceph RGW daemon spec
ceph_mkspec:
service_type: mon
service_id: mon
service_name: mon
render_path: '/home/ceph-admin/specs'
label: "mon"
count: 2
apply: true
unmanaged: true
'''

RETURN = '''# '''
Expand Down Expand Up @@ -218,6 +234,7 @@ def run_module():
networks = module.params.get('networks')
label = module.params.get('label')
count = module.params.get('count')
unmanaged = module.params.get('unmanaged')
spec = module.params.get('spec')
extra = module.params.get('extra')
apply = module.params.get('apply')
Expand Down Expand Up @@ -264,9 +281,12 @@ def run_module():
if count is None:
count = -1

if unmanaged is None:
unmanaged = False

d = ceph_spec.CephDaemonSpec(service_type, service_id, service_name,
hosts, host_pattern, networks, spec, label,
count, **extra)
count, unmanaged, **extra)

render('{}/{}'.format(render_path, service_type), d.make_daemon_spec())
if apply:
Expand Down
4 changes: 4 additions & 0 deletions tests/roles/ceph_migrate/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,8 @@ ceph_rgw_virtual_ips_list: []
ceph_storage_net_prefix: "172.17.3."
ceph_client_ip: "172.17.3.254"
os_net_conf_path: "/etc/os-net-config/config.yaml"
# a generic timeout common to multiple tasks
ceph_timeout: 30
# wait for mon to be deployed and the orch spec to be
# updated
ceph_wait_mon_timeout: 10
68 changes: 68 additions & 0 deletions tests/roles/ceph_migrate/tasks/mon.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
until: (monmap.stdout | from_json | community.general.json_query('monmap.num_mons') | int) >= ((decomm_nodes |default([]) | length | int) | default(3))
loop_control:
label: "check mons quorum"
tags:
- ceph_mon_quorum

- name: Backup data for client purposes
delegate_to: "{{ cur_mon.split('.')[0] }}.ctlplane"
Expand Down Expand Up @@ -189,17 +191,65 @@
loop_control:
label: "MON - wait for mon"

# Even though we explicitly redeploy a given mon using the host:ip format,
# it is possible that the orchestrator (who owns the process and the spec)
# takes initiative to redeploy a mon, on an arbitrary ip address (if many)
# between the `ceph orch rm ...` command and the `ceph orch daemon add ...`
# one. For this reason, the deamon redeploy follows this flow:
# 1. update and unmanage the spec
# 2. redeploy on the right network as per the official doc:
# https://docs.ceph.com/en/quincy/cephadm/services/mon/
# 3. manage the spec
# Note: a pause between the steps is required to allow the orchestrator to
# process the spec and update the daemons ref
- name: Unmanage mons
# root privileges required to run cephadm
# and apply the new spec
become: true
ceph_mkspec:
service_type: mon
cluster: ceph
apply: true
label: "mon"
render_path: "{{ ceph_spec_render_dir }}"
unmanaged: true
register: spc
environment:
CEPH_CONTAINER_IMAGE: "{{ ceph_container }}"
CEPH_CONTAINER_BINARY: "{{ ceph_container_cli }}"
CEPH_CONF: "{{ ceph_config_tmp_client_home }}"

- name: Print the resulting spec
when: debug | default(true)
ansible.builtin.debug:
msg: "{{ spc }}"

- name: MON - Delete the running mon
become: true
ansible.builtin.command:
"{{ ceph_cli }} orch daemon rm mon.{{ target_node.split('.')[0] }} --force"
ignore_errors: true

- name: Wait for the spec to be updated
pause:
seconds: "{{ ceph_wait_mon_timeout }}"

- name: MON - Redeploy mon on {{ target_node }} - Print command
when: debug | default(true)
ansible.builtin.debug:
msg: "{{ ceph_cli }} orch daemon add mon {{ target_node.split('.')[0] }}:{{ mon_ipaddr }}"

- name: MON - Redeploy mon on {{ target_node }}
become: true
when:
- mon_ipaddr | default('')
ansible.builtin.command:
"{{ ceph_cli }} orch daemon add mon {{ target_node.split('.')[0] }}:{{ mon_ipaddr }}"

- name: Wait for the spec to be updated
pause:
seconds: "{{ ceph_wait_mon_timeout }}"

- name: MON - check mons quorum
become: true
ansible.builtin.command: "{{ ceph_cli }} -s -f json"
Expand All @@ -211,6 +261,8 @@
until: (monmap.stdout | from_json | community.general.json_query('monmap.num_mons') | int) >= ((decomm_nodes |default([]) | length | int) | default(3))
loop_control:
label: "check mons quorum"
tags:
- ceph_mon_quorum

# Post actions: refresh mgr data, reconfigure osds
- name: MON - refresh cephadm info - fail ceph mgr and refresh osd config
Expand All @@ -221,6 +273,22 @@
- name: reconfig osds
ansible.builtin.command: "{{ ceph_cli }} orch reconfig osd.default_drive_group "

- name: Manage mons
# root privileges required to run cephadm
# and apply the new spec
become: true
ceph_mkspec:
service_type: mon
cluster: ceph
apply: true
label: "mon"
render_path: "{{ ceph_spec_render_dir }}"
unmanaged: false
environment:
CEPH_CONTAINER_IMAGE: "{{ ceph_container }}"
CEPH_CONTAINER_BINARY: "{{ ceph_container_cli }}"
CEPH_CONF: "{{ ceph_config_tmp_client_home }}"

# Wait for the redeploy to finish before moving to the next stage
- ansible.builtin.include_tasks: wait_daemons.yaml
vars:
Expand Down
4 changes: 1 addition & 3 deletions tests/roles/ceph_migrate/tasks/rbd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,11 @@
- name: MON - Migrate RBD node
ansible.builtin.include_tasks: mon.yaml
vars:
#cur_mon: controller-2 #.redhat.local
#target_node: cephstorage-2 #.redhat.local
cur_mon: "{{ node.0 }}"
target_node: "{{ node.1 }}"
# This condition might be a different one
loop: "{{ decomm_nodes| zip(target_nodes) }}"
##loop: "{{ decomm_nodes|zip(hostmap.keys() | difference(decomm_nodes) | sort) | list }}"
#loop: "{{ decomm_nodes|zip(hostmap.keys() | difference(decomm_nodes) | sort) | list }}"
loop_control:
loop_var: node

Expand Down

0 comments on commit 86bac17

Please sign in to comment.