diff --git a/ChangeLog.md b/ChangeLog.md index 695d3ceba42..aa5add69d64 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -1,3 +1,50 @@ +# Pacemaker-3.0.0-rc3 (23 Dec 2024) +* 33 commits with 9 files changed, 77 insertions(+), 12 deletions(-) + +## Features added since Pacemaker-3.0.0-rc2 + +* Inkscape is no longer a build dependency for Pacemaker documentation +* The `ocf:pacemaker:controld` agent will now always manage `dlm_controld` + (previously, it would try to manage the long-obsolete `gfs_controld` if the + resource name started with `gfs`) + +## Fixes since Pacemaker-3.0.0-rc2 + +* **agents:** `ocf:pacemaker:ping` now uses `grep -E` instead of the obsolete + `egrep` +* **tools:** when injecting a fail count with `crm_simulate`, use an `INFINITY` + score when the cluster would +* **tools:** validate `stonith_admin --timeout` value + +## Public API changes since Pacemaker-3.0.0-rc2 + +* **Python:** add `PACEMAKER_CONFIG_DIR` to `BuildOptions` + +# Pacemaker-3.0.0-rc2 (11 Dec 2024) +* 57 commits with 44 files changed, 1487 insertions(+), 633 deletions(-) + +## Features added since Pacemaker-3.0.0-rc1 + +* **Pacemaker Remote and CIB manager:** support X.509 (SSL/TLS) certificates + for encrypting Pacemaker Remote connections and remote CIB administration + +## Fixes since Pacemaker-3.0.0-rc1 + +* **libcrmcluster:** restore ability to do rolling upgrades + (regression introduced in 3.0.0-rc1) +* **controller:** avoid memory leak when updating join phase + (regression introduced in 3.0.0-rc1) +* **scheduler:** avoid memory leaks in bundles and when freeing node copies + (regression introduced in 3.0.0-rc1) +* **CIB:** log warnings if CIB upgrade might not preserve behavior exactly +* **CIB:** ensure ACLs remain valid after CIB upgrades, and warn if upgrade + might change ACL effect (regression introduced in 3.0.0-rc1) + +## Public API changes since Pacemaker-3.0.0-rc1 + +* **libcrmcommon:** add `pcmk_common_cleanup()` + + # Pacemaker-3.0.0-rc1 (14 Nov 2024) * 1938 commits with 685 files changed, 26363 insertions(+), 33503 deletions(-) diff --git a/cts/README.md b/cts/README.md index 943ef1bf173..ef7b46117cc 100644 --- a/cts/README.md +++ b/cts/README.md @@ -112,6 +112,10 @@ CTS includes: the log file, then CTS lab will not check for aggregation. * CTS lab does not currently detect systemd journal log aggregation. +* Optionally, if the lab nodes use the systemd journal for logs, create + /etc/systemd/journald.conf.d/cts-lab.conf on each with + `RateLimitIntervalSec=0` or `RateLimitBurst=0`, to avoid issues with log + detection. ### Run diff --git a/cts/cts-exec.in b/cts/cts-exec.in index dceab9ee36c..bcf2dd72bde 100644 --- a/cts/cts-exec.in +++ b/cts/cts-exec.in @@ -304,7 +304,10 @@ class ExecTests(Tests): def setup_environment(self): """Prepare the host before executing any tests.""" + if BuildOptions.REMOTE_ENABLED: + # @TODO Use systemctl when available, and use the subprocess module + # with an argument array instead of os.system() os.system("service pacemaker_remote stop") self.cleanup_environment() @@ -312,7 +315,9 @@ class ExecTests(Tests): authkey = "%s/authkey" % BuildOptions.PACEMAKER_CONFIG_DIR if self.tls and not os.path.isfile(authkey): print("Installing %s ..." % authkey) + # @TODO Use os.mkdir() instead os.system("mkdir -p %s" % BuildOptions.PACEMAKER_CONFIG_DIR) + # @TODO Use the subprocess module with an argument array instead os.system("dd if=/dev/urandom of=%s bs=4096 count=1" % authkey) self._installed_files.append(authkey) diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c index 2af13816cdf..fd70d85dcc1 100644 --- a/daemons/controld/controld_callbacks.c +++ b/daemons/controld/controld_callbacks.c @@ -106,6 +106,7 @@ node_alive(const pcmk__node_status_t *node) #define state_text(state) ((state)? (const char *)(state) : "in unknown state") +// @TODO This is insanely long, and some parts should be functionized void peer_update_callback(enum pcmk__node_update type, pcmk__node_status_t *node, const void *data) @@ -128,9 +129,8 @@ peer_update_callback(enum pcmk__node_update type, pcmk__node_status_t *node, && pcmk_is_set(node->processes, crm_get_cluster_proc()) && !AM_I_DC && !is_remote) { - /* - * This is a hack until we can send to a nodeid and/or we fix node name lookups - * These messages are ignored in crmd_ha_msg_filter() + /* relay_message() on the recipient ignores these messages, but + * libcrmcluster will have cached the node name by then */ xmlNode *query = pcmk__new_request(pcmk_ipc_controld, CRM_SYSTEM_CRMD, NULL, CRM_SYSTEM_CRMD, CRM_OP_HELLO, @@ -140,7 +140,6 @@ peer_update_callback(enum pcmk__node_update type, pcmk__node_status_t *node, "node name", node->cluster_layer_id); pcmk__cluster_send_message(node, pcmk_ipc_controld, query); - pcmk__xml_free(query); } diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c index 36d7a3a7afe..84e4ca2eaa4 100644 --- a/daemons/controld/controld_execd.c +++ b/daemons/controld/controld_execd.c @@ -572,6 +572,7 @@ build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list) xmlNode * controld_query_executor_state(void) { + // @TODO Ensure all callers handle NULL returns xmlNode *xml_state = NULL; xmlNode *xml_data = NULL; xmlNode *rsc_list = NULL; @@ -597,7 +598,7 @@ controld_query_executor_state(void) crm_xml_add(xml_data, PCMK_XA_ID, peer->xml_id); rsc_list = pcmk__xe_create(xml_data, PCMK__XE_LRM_RESOURCES); - /* Build a list of active (not always running) resources */ + // Build a list of active (not necessarily running) resources build_active_RAs(lrm_state, rsc_list); crm_log_xml_trace(xml_state, "Current executor state"); diff --git a/daemons/controld/controld_execd_state.c b/daemons/controld/controld_execd_state.c index 6714801ff03..99aac814240 100644 --- a/daemons/controld/controld_execd_state.c +++ b/daemons/controld/controld_execd_state.c @@ -319,6 +319,9 @@ controld_get_executor_state(const char *node_name, bool create) return state; } +/* @TODO the lone caller just needs to iterate over the values, so replace this + * with a g_hash_table_foreach() wrapper instead + */ GList * lrm_state_get_list(void) { diff --git a/daemons/controld/controld_membership.c b/daemons/controld/controld_membership.c index 561857ef1e4..8075955953d 100644 --- a/daemons/controld/controld_membership.c +++ b/daemons/controld/controld_membership.c @@ -125,6 +125,7 @@ xmlNode * create_node_state_update(pcmk__node_status_t *node, int flags, xmlNode *parent, const char *source) { + // @TODO Ensure all callers handle NULL returns const char *value = NULL; xmlNode *node_state; diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c index 060a231d749..6076fa38427 100644 --- a/daemons/controld/controld_remote_ra.c +++ b/daemons/controld/controld_remote_ra.c @@ -606,6 +606,7 @@ monitor_timeout_cb(gpointer data) free_cmd(cmd); if(lrm_state) { + // @TODO Should we move this before reporting the result above? lrm_state_disconnect(lrm_state); } return FALSE; @@ -950,12 +951,13 @@ handle_remote_ra_exec(gpointer user_data) } else if (!strcmp(cmd->action, PCMK_ACTION_STOP)) { if (pcmk_is_set(ra_data->status, expect_takeover)) { - /* briefly wait on stop for the takeover event to occur. If the - * takeover event does not occur during the wait period, that's fine. - * It just means that the remote-node's lrm_status section is going to get - * cleared which will require all the resources running in the remote-node - * to be explicitly re-detected via probe actions. If the takeover does occur - * successfully, then we can leave the status section intact. */ + /* Briefly wait on stop for an expected takeover to occur. If + * the takeover does not occur during the wait, that's fine; it + * just means that the remote node's resource history will be + * cleared, which will require probing all resources on the + * remote node. If the takeover does occur successfully, then we + * can leave the status section intact. + */ cmd->takeover_timeout_id = pcmk__create_timer((cmd->timeout/2), connection_takeover_timeout_cb, cmd); diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c index ece2315e366..0311d070ea7 100644 --- a/daemons/execd/execd_commands.c +++ b/daemons/execd/execd_commands.c @@ -1516,6 +1516,15 @@ process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id, // This is a remote connection from a cluster node's controller ipc_proxy_add_provider(client); + /* @TODO Allowing multiple proxies makes no sense given that clients + * have no way to choose between them. Maybe always use the most + * recent one and switch any existing IPC connections to use it, + * by iterating over ipc_clients here, and if client->id doesn't + * match the client's userdata, replace the userdata with the new + * ID. After the iteration, call lrmd_remote_client_destroy() on any + * of the replaced values in ipc_providers. + */ + /* If this was a register operation, also ask for new schema files but * only if it's supported by the protocol version. */ diff --git a/daemons/execd/pacemaker-execd.c b/daemons/execd/pacemaker-execd.c index e6e3d078e3a..c32f6b26aa2 100644 --- a/daemons/execd/pacemaker-execd.c +++ b/daemons/execd/pacemaker-execd.c @@ -139,6 +139,10 @@ lrmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) return 0; } + /* @TODO functionize some of this to reduce duplication with + * lrmd_remote_client_msg() + */ + if (!client->name) { const char *value = crm_element_value(request, PCMK__XA_LRMD_CLIENTNAME); @@ -297,6 +301,7 @@ exit_executor(void) g_hash_table_destroy(rsc_list); + // @TODO End mainloop instead so all cleanup is done crm_exit(CRM_EX_OK); } diff --git a/daemons/execd/remoted_proxy.c b/daemons/execd/remoted_proxy.c index dffecbc6e24..9083a9d9cc1 100644 --- a/daemons/execd/remoted_proxy.c +++ b/daemons/execd/remoted_proxy.c @@ -33,7 +33,13 @@ static qb_ipcs_service_t *pacemakerd_ipcs = NULL; // An IPC provider is a cluster node controller connecting as a client static GList *ipc_providers = NULL; -/* ipc clients == things like cibadmin, crm_resource, connecting locally */ + + +/* ipc clients == things like cibadmin, crm_resource, connecting locally + * + * @TODO This should be unnecessary (pcmk__foreach_ipc_client() should be + * sufficient) + */ static GHashTable *ipc_clients = NULL; /*! diff --git a/daemons/fenced/fenced_cib.c b/daemons/fenced/fenced_cib.c index 5fb65fc3fa2..02011f193e8 100644 --- a/daemons/fenced/fenced_cib.c +++ b/daemons/fenced/fenced_cib.c @@ -173,6 +173,7 @@ update_stonith_watchdog_timeout_ms(xmlNode *cib) xmlNode *stonith_watchdog_xml = NULL; const char *value = NULL; + // @TODO An XPath search can't handle multiple instances or rules stonith_watchdog_xml = get_xpath_object(XPATH_WATCHDOG_TIMEOUT, cib, LOG_NEVER); if (stonith_watchdog_xml) { diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c index 13501bc4b24..38a875c21ca 100644 --- a/daemons/fenced/fenced_commands.c +++ b/daemons/fenced/fenced_commands.c @@ -46,6 +46,7 @@ struct device_search_s { /* requested fence action */ char *action; /* timeout to use if a device is queried dynamically for possible targets */ + // @TODO This name is misleading now, it's the value of stonith-timeout int per_device_timeout; /* number of registered fencing devices at time of request */ int replies_needed; @@ -3323,7 +3324,10 @@ handle_fence_request(pcmk__request_t *request) crm_xml_add(request->xml, PCMK__XA_ST_CLIENTID, request->ipc_client->id); crm_xml_add(request->xml, PCMK__XA_ST_REMOTE_OP, op->id); + + // @TODO On failure, fail request immediately, or maybe panic pcmk__cluster_send_message(node, pcmk_ipc_fenced, request->xml); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c index a5ac3d7d777..58348eba8ab 100644 --- a/daemons/fenced/pacemaker-fenced.c +++ b/daemons/fenced/pacemaker-fenced.c @@ -40,7 +40,9 @@ #define SUMMARY "daemon for executing fencing devices in a Pacemaker cluster" +// @TODO This should be guint long long stonith_watchdog_timeout_ms = 0; + GList *stonith_watchdog_targets = NULL; static GMainLoop *mainloop = NULL; diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h index c4ea58cd74c..04aadedc39e 100644 --- a/daemons/fenced/pacemaker-fenced.h +++ b/daemons/fenced/pacemaker-fenced.h @@ -75,6 +75,11 @@ enum st_remap_phase { }; typedef struct remote_fencing_op_s { + /* @TODO Abstract the overlap with async_command_t (some members have + * different names for the same thing), which should allow reducing + * duplication in some functions + */ + /* The unique id associated with this operation */ char *id; /*! The node this operation will fence */ diff --git a/devel/Makefile.am b/devel/Makefile.am index 9c0532b180a..71cb6b02d21 100644 --- a/devel/Makefile.am +++ b/devel/Makefile.am @@ -250,6 +250,8 @@ coverage-clean: coverage-partial-clean # # indent cannot cope with all our exceptions and needs heavy manual editing # +# @TODO investigate using clang-format instead +# # indent target: Limit indent to these directories INDENT_DIRS ?= . diff --git a/doc/sphinx/Pacemaker_Administration/configuring.rst b/doc/sphinx/Pacemaker_Administration/configuring.rst index 70ce349d8a2..6943b245829 100644 --- a/doc/sphinx/Pacemaker_Administration/configuring.rst +++ b/doc/sphinx/Pacemaker_Administration/configuring.rst @@ -229,9 +229,9 @@ To use TLS certificates, the administrator's machine also needs their public/private key pair, signed client certificate, and root CA certificate. Those must additionally be specified with the following environment variables: -* :ref:`CIB_ca_file ` -* :ref:`CIB_cert_file ` -* :ref:`CIB_key_file ` +* :ref:`CIB_ca_file ` +* :ref:`CIB_cert_file ` +* :ref:`CIB_key_file ` As an example, if **node1** is a cluster node, and the CIB is configured with ``remote-tls-port`` set to 1234, the administrator could read the current @@ -246,6 +246,9 @@ the daemon user's password: # export CIB_key_file=/etc/pacemaker/admin.key.pem # cibadmin -Q +Optionally, :ref:`CIB_crl_file ` may be set to the location of a +Certificate Revocation List in PEM format. + .. note:: Pacemaker must have been built with PAM support for remote access to work. diff --git a/doc/sphinx/Pacemaker_Administration/options.rst b/doc/sphinx/Pacemaker_Administration/options.rst index 731d17f288b..776bb3606c6 100644 --- a/doc/sphinx/Pacemaker_Administration/options.rst +++ b/doc/sphinx/Pacemaker_Administration/options.rst @@ -72,6 +72,60 @@ Pacemaker uses several environment variables set on the client side. - The host to connect to. Used with :ref:`CIB_port ` for connecting to a remote CIB instance; ignored if :ref:`CIB_port ` is not set. + * - .. _CIB_ca_file: + + .. index:: + single: CIB_ca_file + single: environment variable; CIB_ca_file + + CIB_ca_file + - + - If this, :ref:`CIB_cert_file `, and + :ref:`CIB_key_file ` are set, remote CIB administration + will be encrypted using X.509 (SSL/TLS) certificates, with this root + certificate for the certificate authority. Used with :ref:`CIB_port + ` for connecting to a remote CIB instance; ignored if + :ref:`CIB_port ` is not set. + * - .. _CIB_cert_file: + + .. index:: + single: CIB_cert_file + single: environment variable; CIB_cert_file + + CIB_cert_file + - + - If this, :ref:`CIB_ca_file `, and + :ref:`CIB_key_file ` are set, remote CIB administration + will be encrypted using X.509 (SSL/TLS) certificates, with this + certificate for the local host. Used with :ref:`CIB_port ` for + connecting to a remote CIB instance; ignored if + :ref:`CIB_port ` is not set. + * - .. _CIB_key_file: + + .. index:: + single: CIB_key_file + single: environment variable; CIB_key_file + + CIB_key_file + - + - If this, :ref:`CIB_ca_file `, and + :ref:`CIB_cert_file ` are set, remote CIB administration + will be encrypted using X.509 (SSL/TLS) certificates, with this + private key for the local host. Used with :ref:`CIB_port ` for + connecting to a remote CIB instance; ignored if + :ref:`CIB_port ` is not set. + * - .. _CIB_crl_file: + + .. index:: + single: CIB_crl_file + single: environment variable; CIB_crl_file + + CIB_crl_file + - + - If this, :ref:`CIB_ca_file `, + :ref:`CIB_cert_file `, and + :ref:`CIB_key_file ` are all set, then certificates listed + in this PEM-format Certificate Revocation List file will be rejected. * - .. _CIB_shadow: .. index:: diff --git a/doc/sphinx/Pacemaker_Development/components.rst b/doc/sphinx/Pacemaker_Development/components.rst index f332410871e..1676f711cac 100644 --- a/doc/sphinx/Pacemaker_Development/components.rst +++ b/doc/sphinx/Pacemaker_Development/components.rst @@ -341,16 +341,13 @@ Working with the scheduler is difficult. Challenges include: later. For example, data unpacked from the CIB can safely be used anytime after ``unpack_cib(),`` but actions may become optional or required anytime before ``pcmk__create_graph()``. There's no easy way to deal with this. -* Many names of struct members, functions, etc., are suboptimal, but are part - of the public API and cannot be changed until an API backward compatibility - break. .. index:: single: pcmk_scheduler_t -Cluster Working Set -___________________ +The Scheduler Object +____________________ The main data object for the scheduler is ``pcmk_scheduler_t``, which contains all information needed about nodes, resources, constraints, etc., both as the @@ -363,18 +360,21 @@ transition graph XML. The variable name is usually ``scheduler``. Resources _________ -``pcmk_resource_t`` is the data object representing cluster resources. A -resource has a variant: :term:`primitive`, group, clone, or :term:`bundle`. +``pcmk_resource_t`` is the data object representing cluster resources. It has a +couple of public members for backward compatibility reasons, but most of the +implementation is in the internal ``pcmk__resource_private_t`` type. -The resource object has members for two sets of methods, -``resource_object_functions_t`` from the ``libpe_status`` public API, and -``resource_alloc_functions_t`` whose implementation is internal to +A resource has a variant: :term:`primitive`, group, clone, or :term:`bundle`. + +The private resource object has members for two sets of methods, +``pcmk__rsc_methods_t`` from ``libcrmcommon``, and +``pcmk__assignment_methods_t`` whose implementation is internal to ``libpacemaker``. The actual functions vary by variant. -The object functions have basic capabilities such as unpacking the resource +The resource methods have basic capabilities such as unpacking the resource XML, and determining the current or planned location of the resource. -The :term:`assignment ` functions have more obscure capabilities needed +The :term:`assignment ` methods have more obscure capabilities needed for scheduling, such as processing location and ordering constraints. For example, ``pcmk__create_internal_constraints()`` simply calls the ``internal_constraints()`` method for each top-level resource in the cluster. @@ -390,25 +390,33 @@ with the highest :term:`score` for a given resource. The scheduler does a bunch of processing to generate the scores, then the actual assignment is straightforward. +The scheduler node implementation is a little confusing. + +``pcmk_node_t`` (``struct pcmk__scored_node``) is the primary object used. + +It contains two sub-structs, ``pcmk__node_private_t *priv`` (which is internal) +and ``struct pcmk__node_details *details`` (which is public for backward +compatibility reasons), that contain all node information that is independent +of resource assignment (the node name, etc.). + +It contains one other (internal) sub-struct, ``struct pcmk__node_assignment +*assign``, which contains information particular to a specific resource being +assigned. + Node lists are frequently used. For example, ``pcmk_scheduler_t`` has a -``nodes`` member which is a list of all nodes in the cluster, and -``pcmk_resource_t`` has a ``running_on`` member which is a list of all nodes on -which the resource is (or might be) active. These are lists of ``pcmk_node_t`` -objects. +``nodes`` member which is a list of all nodes in the cluster, and the internal +resource object has an ``active_nodes`` member which is a list of all nodes on +which the resource is (or might be) active. -The ``pcmk_node_t`` object contains a ``struct pe_node_shared_s *details`` -member with all node information that is independent of resource assignment -(the node name, etc.). +Only the scheduler's ``nodes`` list has the full, original node instances. All +other node lists have shallow copies created by ``pe__copy_node()``, which +share ``details`` and ``priv`` from the main list (but can differ in their +``assign`` member). -The working set's ``nodes`` member contains the original of this information. -All other node lists contain copies of ``pcmk_node_t`` where only the -``details`` member points to the originals in the working set's ``nodes`` list. -In this way, the other members of ``pcmk_node_t`` (such as ``weight``, which is -the node score) may vary by node list, while the common details are shared. .. index:: single: pcmk_action_t - single: pe_action_flags + single: pcmk__action_flags Actions _______ @@ -418,16 +426,16 @@ taken. These could be resource actions, cluster-wide actions such as fencing a node, or "pseudo-actions" which are abstractions used as convenient points for ordering other actions against. -It has a ``flags`` member which is a bitmask of ``enum pe_action_flags``. The -most important of these are ``pe_action_runnable`` (if not set, the action is -"blocked" and cannot be added to the transition graph) and -``pe_action_optional`` (actions with this set will not be added to the -transition graph; actions often start out as optional, and may become required -later). +Its (internal) implementation has a ``flags`` member which is a bitmask of +``enum pcmk__action_flags``. The most important of these are +``pcmk__action_runnable`` (if not set, the action is "blocked" and cannot be +added to the transition graph) and ``pcmk__action_optional`` (actions with this +set will not be added to the transition graph; actions often start out as +optional, and may become required later). .. index:: - single: pe__colocation_t + single: pcmk__colocation_t Colocations ___________ @@ -462,30 +470,45 @@ The resource assignment functions have several methods related to colocations: .. index:: - single: pe__ordering_t - single: pe_ordering + single: pcmk__action_relation_t + single: action; relation -Orderings -_________ +Action Relations +________________ Ordering constraints are simple in concept, but they are one of the most important, powerful, and difficult to follow aspects of the scheduler code. -``pe__ordering_t`` is the data object representing an ordering, better thought -of as a relationship between two actions, since the relation can be more -complex than just "this one runs after that one". +``pcmk__action_relation_t`` is the data object representing an ordering, better +thought of as a relationship between two actions, since the relation can be +more complex than just "this one runs after that one". -For an ordering "A then B", the code generally refers to A as "first" or +For a relation "A then B", the code generally refers to A as "first" or "before", and B as "then" or "after". -Much of the power comes from ``enum pe_ordering``, which are flags that -determine how an ordering behaves. There are many obscure flags with big -effects. A few examples: - -* ``pe_order_none`` means the ordering is disabled and will be ignored. It's 0, - meaning no flags set, so it must be compared with equality rather than - ``pcmk_is_set()``. -* ``pe_order_optional`` means the ordering does not make either action - required, so it only applies if they both become required for other reasons. -* ``pe_order_implies_first`` means that if action B becomes required for any - reason, then action A will become required as well. +Much of the power comes from ``enum pcmk__action_relation_flags``, which are +flags that determine how a relation behaves. There are many obscure flags with +big effects. A few examples: + +* ``pcmk__ar_none`` means the relation is disabled and will be ignored. The + value is 0, meaning no flags set, so it must be compared with equality rather + than ``pcmk_is_set()``. +* ``pcmk__ar_ordered`` without any other flags set means the relation does not + make either action required, so it applies only if they both become required + for other reasons. +* ``pcmk__ar_then_implies_first`` means that if action B becomes required for + any reason, then action A will become required as well. + +Adding a New Scheduler Regression Test +______________________________________ + +#. Choose a test name. +#. Copy the uncompressed input CIB to cts/scheduler/xml/TESTNAME.xml. It's + helpful to add an XML comment at the top describing the essential features of + the test (which configuration and status scenarios are being tested). +#. Edit ``cts/cts-scheduler.in`` and add the test name and description to the + ``TESTS`` array. +#. Run ``cts/cts-scheduler --update --run TESTNAME`` to generate the expected + transition graph, scores, etc. Look over the generated files to make sure + they are as expected. +#. Commit your changes. diff --git a/include/crm/common/scheduler_internal.h b/include/crm/common/scheduler_internal.h index 39e806d92c1..c2ebaf329b9 100644 --- a/include/crm/common/scheduler_internal.h +++ b/include/crm/common/scheduler_internal.h @@ -169,7 +169,10 @@ struct pcmk__scheduler_private { guint priority_fencing_ms; // Priority-based fencing delay (in ms) guint shutdown_lock_ms; // How long to lock resources (in ms) guint node_pending_ms; // Pending join times out after this (in ms) + + // @TODO convert to enum const char *placement_strategy; // Value of placement-strategy property + xmlNode *rsc_defaults; // Configured resource defaults xmlNode *op_defaults; // Configured operation defaults GList *resources; // Resources in cluster diff --git a/include/pcmki/pcmki_transition.h b/include/pcmki/pcmki_transition.h index d66d7e43ce8..63ac6392817 100644 --- a/include/pcmki/pcmki_transition.h +++ b/include/pcmki/pcmki_transition.h @@ -27,6 +27,10 @@ enum pcmk__graph_action_type { pcmk__pseudo_graph_action, pcmk__rsc_graph_action, pcmk__cluster_graph_action, + /* @TODO maybe separate a new pcmk__fencing_graph_action from + * pcmk__cluster_graph_action to make code cleaner (for example, see + * initiate_action()) + */ }; enum pcmk__synapse_flags { diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c index c644b9c94cc..6f2d65be2f1 100644 --- a/lib/common/ipc_client.c +++ b/lib/common/ipc_client.c @@ -674,7 +674,11 @@ pcmk__send_ipc_request(pcmk_ipc_api_t *api, const xmlNode *request) flags = crm_ipc_client_response; } - // The 0 here means a default timeout of 5 seconds + /* The 0 here means a default timeout of 5 seconds + * + * @TODO Maybe add a timeout_ms member to pcmk_ipc_api_t and a + * pcmk_set_ipc_timeout() setter for it, then use it here. + */ rc = crm_ipc_send(api->ipc, request, flags, 0, &reply); if (rc < 0) { diff --git a/lib/common/iso8601.c b/lib/common/iso8601.c index dd2e75de6cd..007a64fff59 100644 --- a/lib/common/iso8601.c +++ b/lib/common/iso8601.c @@ -338,23 +338,23 @@ crm_time_get_seconds(const crm_time_t *dt) return 0; } + // @TODO This is inefficient if dt is already in UTC utc = crm_get_utc_time(dt); if (utc == NULL) { return 0; } + // @TODO We should probably use <= if dt is a duration for (lpc = 1; lpc < utc->years; lpc++) { long long dmax = year_days(lpc); in_seconds += DAY_SECONDS * dmax; } - /* utc->months is an offset that can only be set for a duration. - * By definition, the value is variable depending on the date to - * which it is applied. - * - * Force 30-day months so that something vaguely sane happens - * for anyone that tries to use a month in this way. + /* utc->months can be set only for durations. By definition, the value + * varies depending on the (unknown) start date to which the duration will + * be applied. Assume 30-day months so that something vaguely sane happens + * in this case. */ if (utc->months > 0) { in_seconds += DAY_SECONDS * 30 * (long long) (utc->months); diff --git a/lib/pacemaker/pcmk_graph_producer.c b/lib/pacemaker/pcmk_graph_producer.c index 31ea0227d64..e6cff3e26c8 100644 --- a/lib/pacemaker/pcmk_graph_producer.c +++ b/lib/pacemaker/pcmk_graph_producer.c @@ -296,7 +296,8 @@ add_resource_details(const pcmk_action_t *action, xmlNode *action_xml) * transition period until all nodes in the cluster * are running the new software /and/ have rebooted * once (meaning that they've only ever spoken to a DC - * supporting this feature). + * supporting this feature). (@TODO The effect of removing this on + * regression tests suggests that it is still needed for unique clones) * * If anyone toggles the unique flag to 'on', the * 'instance free' name will correspond to an orphan diff --git a/lib/pacemaker/pcmk_injections.c b/lib/pacemaker/pcmk_injections.c index d728cfea1ea..2f51e5f676b 100644 --- a/lib/pacemaker/pcmk_injections.c +++ b/lib/pacemaker/pcmk_injections.c @@ -31,6 +31,7 @@ #include "libpacemaker_private.h" +// @TODO Replace this with a new scheduler flag bool pcmk__simulate_node_config = false; #define XPATH_NODE_CONFIG "//" PCMK_XE_NODE "[@" PCMK_XA_UNAME "='%s']" diff --git a/lib/pacemaker/pcmk_sched_actions.c b/lib/pacemaker/pcmk_sched_actions.c index e6ed67c0d9b..60f6ec236ab 100644 --- a/lib/pacemaker/pcmk_sched_actions.c +++ b/lib/pacemaker/pcmk_sched_actions.c @@ -473,6 +473,12 @@ update_action_for_ordering_flags(pcmk_action_t *first, pcmk_action_t *then, && !pcmk_is_set(first->flags, pcmk__action_runnable) && pcmk__str_eq(first->task, PCMK_ACTION_STOP, pcmk__str_none)) { + /* @TODO This seems odd; why wouldn't an unrunnable "first" already + * block "then" before this? Note that the unmanaged-stop-{1,2} + * scheduler regression tests and the test CIB for T209 have tests for + * "stop then stop" relations that would be good for checking any + * changes. + */ if (pcmk_is_set(then->flags, pcmk__action_runnable)) { pcmk__clear_action_flags(then, pcmk__action_runnable); pcmk__set_updated_flags(changed, first, pcmk__updated_then); @@ -1149,6 +1155,11 @@ pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *op, * pre-OCF-1.1 resource agent, but we don't know that here, and we should * only ever get results for actions scheduled by us, so we can reasonably * assume any "reload" is actually a pre-1.1 agent reload. + * + * @TODO This remapping can make log messages with task confusing for users + * (for example, an "Initiating reload ..." followed by "... start ... + * confirmed"). Either do this remapping in the scheduler if possible, or + * store the original task in a new XML attribute for later logging. */ if (pcmk__str_any_of(task, PCMK_ACTION_RELOAD, PCMK_ACTION_RELOAD_AGENT, NULL)) { @@ -1764,6 +1775,9 @@ process_rsc_history(const xmlNode *rsc_entry, pcmk_resource_t *rsc, if (pcmk_is_set(rsc->flags, pcmk__rsc_removed)) { if (pcmk__is_anonymous_clone(pe__const_top_resource(rsc, false))) { + /* @TODO Should this be done for bundled primitives as well? Added + * by 2ac43ae31 + */ pcmk__rsc_trace(rsc, "Skipping configuration check " "for orphaned clone instance %s", diff --git a/lib/pacemaker/pcmk_sched_location.c b/lib/pacemaker/pcmk_sched_location.c index a3915b54355..22b76940d26 100644 --- a/lib/pacemaker/pcmk_sched_location.c +++ b/lib/pacemaker/pcmk_sched_location.c @@ -454,6 +454,8 @@ unpack_simple_location(xmlNode *xml_obj, pcmk_scheduler_t *scheduler) free(pmatch); } + // @TODO Maybe log a notice if we did not match any resources + regfree(®ex); } } diff --git a/lib/pacemaker/pcmk_sched_ordering.c b/lib/pacemaker/pcmk_sched_ordering.c index 5bc2ffc919a..2308be795e8 100644 --- a/lib/pacemaker/pcmk_sched_ordering.c +++ b/lib/pacemaker/pcmk_sched_ordering.c @@ -29,6 +29,7 @@ enum ordering_symmetry { ordering_symmetric_inverse, // the inverse relation in a symmetric ordering }; +// @TODO de-functionize this for readability and possibly better log messages #define EXPAND_CONSTRAINT_IDREF(__set, __rsc, __name) do { \ __rsc = pcmk__find_constraint_resource(scheduler->priv->resources, \ __name); \ @@ -1153,6 +1154,8 @@ pcmk__order_stops_before_shutdown(pcmk_node_t *node, pcmk_action_t *shutdown_op) /* Don't touch a resource that is unmanaged or blocked, to avoid * blocking the shutdown (though if another action depends on this one, * we may still end up blocking) + * + * @TODO This "if" looks wrong, create a regression test for these cases */ if (!pcmk_any_flags_set(action->rsc->flags, pcmk__rsc_managed|pcmk__rsc_blocked)) { diff --git a/lib/pacemaker/pcmk_sched_primitive.c b/lib/pacemaker/pcmk_sched_primitive.c index ad68c6b49f5..d95eef25a4f 100644 --- a/lib/pacemaker/pcmk_sched_primitive.c +++ b/lib/pacemaker/pcmk_sched_primitive.c @@ -1077,7 +1077,8 @@ pcmk__primitive_internal_constraints(pcmk_resource_t *rsc) PCMK_ACTION_STOP, 0), NULL, pcmk__ar_then_implies_first, scheduler); - if (pcmk_is_set(rsc->flags, pcmk__rsc_remote_nesting_allowed)) { + if (pcmk_is_set(rsc->flags, pcmk__rsc_remote_nesting_allowed) + /* @TODO: && non-bundle Pacemaker Remote nodes exist */) { score = 10000; /* Highly preferred but not essential */ } else { score = PCMK_SCORE_INFINITY; // Force to run on same host diff --git a/lib/pacemaker/pcmk_scheduler.c b/lib/pacemaker/pcmk_scheduler.c index 8960c298f57..de30f4d8f11 100644 --- a/lib/pacemaker/pcmk_scheduler.c +++ b/lib/pacemaker/pcmk_scheduler.c @@ -160,6 +160,9 @@ apply_exclusive_discovery(gpointer data, gpointer user_data) pcmk_resource_t *rsc = data; const pcmk_node_t *node = user_data; + /* @TODO This checks rsc and the top rsc, but should probably check all + * ancestors (a cloned group could have it set on the group) + */ if (pcmk_is_set(rsc->flags, pcmk__rsc_exclusive_probes) || pcmk_is_set(pe__const_top_resource(rsc, false)->flags, pcmk__rsc_exclusive_probes)) { diff --git a/lib/pengine/bundle.c b/lib/pengine/bundle.c index f19fbd11c04..fc2d1a3d80d 100644 --- a/lib/pengine/bundle.c +++ b/lib/pengine/bundle.c @@ -72,6 +72,9 @@ typedef struct pe__bundle_variant_data_s { GList *ports; // pe__bundle_port_t * GList *mounts; // pe__bundle_mount_t * + /* @TODO Maybe use a more object-oriented design instead, with a set of + * methods that are different per type rather than switching on this + */ enum pe__container_agent agent_type; } pe__bundle_variant_data_t; @@ -1983,6 +1986,9 @@ pe__bundle_is_filtered(const pcmk_resource_t *rsc, GList *only_rsc, GList * pe__bundle_containers(const pcmk_resource_t *bundle) { + /* @TODO It would be more efficient to do this once when unpacking the + * bundle, creating a new GList* in the variant data + */ GList *containers = NULL; const pe__bundle_variant_data_t *data = NULL; diff --git a/lib/pengine/clone.c b/lib/pengine/clone.c index 24953cd131e..f8db1db221d 100644 --- a/lib/pengine/clone.c +++ b/lib/pengine/clone.c @@ -351,6 +351,8 @@ clone_unpack(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler) /* Use number of nodes (but always at least 1, which is handy for crm_verify * for a CIB without nodes) as default, but 0 for minimum and invalid + * + * @TODO Exclude bundle nodes when counting */ clone_data->clone_max = unpack_meta_int(rsc, PCMK_META_CLONE_MAX, NULL, QB_MAX(1, g_list_length(scheduler->nodes))); diff --git a/lib/pengine/pe_actions.c b/lib/pengine/pe_actions.c index 7bc65dcad1c..3497e567797 100644 --- a/lib/pengine/pe_actions.c +++ b/lib/pengine/pe_actions.c @@ -883,6 +883,10 @@ pcmk__parse_on_fail(const pcmk_resource_t *rsc, const char *action_name, "%s of %s to 'stop' because 'fence' is not " "valid when fencing is disabled", action_name, rsc->id); + /* @TODO This should probably do + g_hash_table_remove(meta, PCMK_META_ON_FAIL); + like the other "Resetting" spots, to avoid repeating the message + */ on_fail = pcmk__on_fail_stop; desc = "stop resource"; } diff --git a/lib/pengine/pe_notif.c b/lib/pengine/pe_notif.c index 4e17cdd5c2e..89d678a7128 100644 --- a/lib/pengine/pe_notif.c +++ b/lib/pengine/pe_notif.c @@ -130,6 +130,10 @@ get_node_names(const GList *list, GString **all_node_names, const pcmk_node_t *node = (const pcmk_node_t *) iter->data; if (node->priv->name == NULL) { + /* @TODO This breaks the comparability of the various notification + * variables and thus any agent relying on it. Maybe add "UNKNOWN" + * or something like that. + */ continue; } @@ -496,7 +500,14 @@ pe__action_notif_pseudo_ops(pcmk_resource_t *rsc, const char *task, pcmk__insert_meta(n_data->post_done, "notify_operation", n_data->action); - // Order original action complete -> "post-" -> "post-" complete + /* Order original action complete -> "post-" -> "post-" complete + * + * @TODO Should we add |pcmk__ar_unrunnable_first_blocks to these? + * Otherwise we might get an invalid transition due to unresolved + * dependencies when "complete" is a fencing op (which can happen at + * least for bundles) but that op is unrunnable (due to lack of quorum, + * for example). + */ order_actions(complete, n_data->post, pcmk__ar_first_implies_then); order_actions(n_data->post, n_data->post_done, pcmk__ar_first_implies_then); diff --git a/lib/pengine/pe_output.c b/lib/pengine/pe_output.c index 3d87bdbca4e..04857dfba52 100644 --- a/lib/pengine/pe_output.c +++ b/lib/pengine/pe_output.c @@ -2353,6 +2353,7 @@ node_attribute_list(pcmk__output_t *out, va_list args) { continue; } + // @TODO Maybe skip filtering for XML output g_hash_table_iter_init(&iter, node->priv->attrs); while (g_hash_table_iter_next (&iter, &key, NULL)) { attr_list = filter_attr_list(attr_list, key); diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index 1fe58ab9c2d..973e94bdc00 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -520,6 +520,7 @@ expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pcmk_scheduler_t *data const char *remote_allow_migrate=NULL; const char *is_managed = NULL; + // @TODO This doesn't handle rules or id-ref for (attr_set = pcmk__xe_first_child(xml_obj, PCMK_XE_META_ATTRIBUTES, NULL, NULL); attr_set != NULL; @@ -2523,8 +2524,12 @@ process_rsc_state(pcmk_resource_t *rsc, pcmk_node_t *node, } else if ((rsc->priv->history_id != NULL) && (strchr(rsc->priv->history_id, ':') != NULL)) { - /* Only do this for older status sections that included instance numbers - * Otherwise stopped instances will appear as orphans + /* @COMPAT This is for older (<1.1.8) status sections that included + * instance numbers, otherwise stopped instances are considered orphans. + * + * @TODO We should be able to drop this, but some old regression tests + * will need to be updated. Double-check that this is not still needed + * for unique clones (which may have been later converted to anonymous). */ pcmk__rsc_trace(rsc, "Clearing history ID %s for %s (stopped)", rsc->priv->history_id, rsc->id); @@ -3814,6 +3819,11 @@ static void remap_operation(struct action_history *history, enum pcmk__on_fail *on_fail, bool expired) { + /* @TODO It would probably also be a good idea to map an exit status of + * CRM_EX_PROMOTED or CRM_EX_DEGRADED_PROMOTED to CRM_EX_OK for promote + * actions + */ + bool is_probe = false; int orig_exit_status = history->exit_status; int orig_exec_status = history->execution_status; diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c index ff4763a20cf..2f8a46d1956 100644 --- a/lib/services/services_linux.c +++ b/lib/services/services_linux.c @@ -977,6 +977,10 @@ action_launch_child(svc_action_t *op) * below) instead of exit status. However, we've already forked, so * exit status is all we have. At least for OCF actions, we can output an * exit reason for the parent to parse. + * + * @TODO It might be better to substitute secrets in the parent before + * forking, so that if it fails, we can give a better message and result, + * and avoid the fork. */ #if PCMK__ENABLE_CIBSECRETS diff --git a/python/pacemaker/_cts/environment.py b/python/pacemaker/_cts/environment.py index 9538da1faf7..7b5f47df336 100644 --- a/python/pacemaker/_cts/environment.py +++ b/python/pacemaker/_cts/environment.py @@ -144,6 +144,8 @@ def __setitem__(self, key, value): # GoodThing(tm). try: n = node.strip() + # @TODO This only handles IPv4, use getaddrinfo() instead + # (here and in _discover()) socket.gethostbyname_ex(n) self._nodes.append(n) except socket.herror: diff --git a/python/pacemaker/_cts/remote.py b/python/pacemaker/_cts/remote.py index ba5b878f133..ac70f8aeb80 100644 --- a/python/pacemaker/_cts/remote.py +++ b/python/pacemaker/_cts/remote.py @@ -222,6 +222,8 @@ def copy(self, source, target, silent=False): Returns the return code of the cp_command. """ + # @TODO Use subprocess module with argument array instead + # (self._cp_command should be an array too) cmd = "%s '%s' '%s'" % (self._cp_command, source, target) rc = os.system(cmd) diff --git a/python/pacemaker/_cts/tests/componentfail.py b/python/pacemaker/_cts/tests/componentfail.py index 0832407fee1..08fae3add87 100644 --- a/python/pacemaker/_cts/tests/componentfail.py +++ b/python/pacemaker/_cts/tests/componentfail.py @@ -21,6 +21,10 @@ # pylint: disable=unsubscriptable-object +# @TODO Separate this into a separate test for each component, so the patterns +# can be made specific to each component, investigating failures is a little +# easier, and specific testing can be done for each component (for example, +# set attributes before and after killing pacemaker-attrd and check values). class ComponentFail(CTSTest): """Kill a random pacemaker daemon and wait for the cluster to recover.""" diff --git a/tools/cibadmin.c b/tools/cibadmin.c index 6fc5aed82c5..a5b5505a99c 100644 --- a/tools/cibadmin.c +++ b/tools/cibadmin.c @@ -119,6 +119,9 @@ report_schema_unchanged(void) static inline bool cib_action_is_dangerous(void) { + /* @TODO Ideally, --upgrade wouldn't be considered dangerous if the CIB + * already uses the latest schema. + */ return options.delete_all || pcmk__str_any_of(options.cib_action, PCMK__CIB_REQUEST_UPGRADE, diff --git a/tools/crm_mon.c b/tools/crm_mon.c index 149ff7737e0..240e1547295 100644 --- a/tools/crm_mon.c +++ b/tools/crm_mon.c @@ -601,7 +601,9 @@ use_cib_file_cb(const gchar *option_name, const gchar *optarg, gpointer data, GE /* *INDENT-OFF* */ static GOptionEntry addl_entries[] = { { "interval", 'i', 0, G_OPTION_ARG_CALLBACK, reconnect_cb, - "Update frequency (default is 5 seconds)", + "Update frequency (default is 5 seconds). Note: When run interactively\n" + INDENT "on a live cluster, the display will be updated automatically\n" + INDENT "whenever the cluster configuration or status changes.", "TIMESPEC" }, { "one-shot", '1', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, diff --git a/tools/crm_resource.c b/tools/crm_resource.c index 5f177799e22..e6e75686bd2 100644 --- a/tools/crm_resource.c +++ b/tools/crm_resource.c @@ -642,7 +642,11 @@ static GOptionEntry advanced_entries[] = { NULL }, { "restart", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "(Advanced) Tell the cluster to restart this resource and\n" - INDENT "anything that depends on it", + INDENT "anything that depends on it. This temporarily modifies\n" + INDENT "the CIB, and other CIB modifications should be avoided\n" + INDENT "while this is in progress. If a node is fenced because\n" + INDENT "the stop portion of the restart fails, CIB modifications\n" + INDENT "such as target-role may remain.", NULL }, { "wait", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "(Advanced) Wait until the cluster settles into a stable state", @@ -717,7 +721,7 @@ static GOptionEntry addl_entries[] = { "Operation to clear instead of all (with -C -r)", "OPERATION" }, { "interval", 'I', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.interval_spec, - "Interval of operation to clear (default 0) (with -C -r -n)", + "Interval of operation to clear (default 0s) (with -C -r -n)", "N" }, { "class", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, cmdline_config_cb, "The standard the resource agent conforms to (for example, ocf).\n" diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c index 0c1384ddf48..bbe3eb80ed8 100644 --- a/tools/crm_resource_runtime.c +++ b/tools/crm_resource_runtime.c @@ -1214,6 +1214,9 @@ check_node_health(resource_checks_t *checks, pcmk_node_t *node) } } +/* @TODO Make this check all resources if rsc is NULL, so it can be called after + * cleanup of all resources + */ int cli_resource_check(pcmk__output_t *out, pcmk_resource_t *rsc, pcmk_node_t *node) { @@ -1636,6 +1639,10 @@ cli_resource_restart(pcmk__output_t *out, pcmk_resource_t *rsc, int lpc = 0; int before = 0; guint step_timeout_s = 0; + + /* @TODO Due to this sleep interval, a timeout <2s will cause problems and + * should be rejected + */ guint sleep_interval = 2U; guint timeout = pcmk__timeout_ms2s(timeout_ms); @@ -2456,6 +2463,11 @@ cli_resource_move(const pcmk_resource_t *rsc, const char *rsc_id, } } + /* @TODO The constraint changes in the following commands should done + * atomically in a single CIB transaction, to avoid the possibility of + * multiple moves + */ + /* Clear any previous prefer constraints across all nodes. */ cli_resource_clear(rsc_id, NULL, scheduler->nodes, cib, false, force);