-
Notifications
You must be signed in to change notification settings - Fork 287
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Restructure valid allowed_operations computation #6253
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | |||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -20,14 +20,38 @@ open Record_util | ||||||||||||||||
|
|||||||||||||||||
let finally = Xapi_stdext_pervasives.Pervasiveext.finally | |||||||||||||||||
|
|||||||||||||||||
type blocking_operations = | |||||||||||||||||
[ `apply_updates | |||||||||||||||||
| `cluster_create | |||||||||||||||||
| `configure_repositories | |||||||||||||||||
| `designate_new_master | |||||||||||||||||
| `ha_disable | |||||||||||||||||
| `ha_enable | |||||||||||||||||
| `sync_bundle | |||||||||||||||||
| `sync_updates | |||||||||||||||||
| `tls_verification_enable ] | |||||||||||||||||
|
|||||||||||||||||
type waiting_operations = | |||||||||||||||||
[ `cert_refresh | |||||||||||||||||
| `copy_primary_host_certs | |||||||||||||||||
| `eject | |||||||||||||||||
| `exchange_ca_certificates_on_join | |||||||||||||||||
| `exchange_certificates_on_join | |||||||||||||||||
| `get_updates ] | |||||||||||||||||
|
|||||||||||||||||
type all_operations = [blocking_operations | waiting_operations] | |||||||||||||||||
|
|||||||||||||||||
(* Unused, ensure every API operation is statically partitioned here. *) | |||||||||||||||||
let _id (op : API.pool_allowed_operations) : all_operations = op | |||||||||||||||||
|
|||||||||||||||||
(* psr is not included as a pool op because it can be considered in progress | |||||||||||||||||
in between api calls (i.e. wrapping it inside with_pool_operation won't work) *) | |||||||||||||||||
|
|||||||||||||||||
(* these ops will: | |||||||||||||||||
* a) throw an error if any other blocked op is in progress | |||||||||||||||||
* b) wait if only a wait op is in progress | |||||||||||||||||
*) | |||||||||||||||||
let blocking_ops = | |||||||||||||||||
let blocking_ops_table : (blocking_operations * string) list = | |||||||||||||||||
[ | |||||||||||||||||
(`ha_enable, Api_errors.ha_enable_in_progress) | |||||||||||||||||
; (`ha_disable, Api_errors.ha_disable_in_progress) | |||||||||||||||||
|
@@ -45,7 +69,7 @@ let blocking_ops = | ||||||||||||||||
* | |||||||||||||||||
* waiting is symmetric: if `ha_enable is in progress, and we want to perform | |||||||||||||||||
* `copy_primary_host_certs, then we wait in this case too *) | |||||||||||||||||
let wait_ops = | |||||||||||||||||
let waiting_ops : waiting_operations list = | |||||||||||||||||
[ | |||||||||||||||||
`cert_refresh | |||||||||||||||||
; `exchange_certificates_on_join | |||||||||||||||||
|
@@ -55,115 +79,143 @@ let wait_ops = | ||||||||||||||||
; `get_updates | |||||||||||||||||
] | |||||||||||||||||
|
|||||||||||||||||
let all_operations = blocking_ops |> List.map fst |> List.append wait_ops | |||||||||||||||||
(* Shadow with widening coercions to allow us to query using | |||||||||||||||||
operations from either set, whilst maintaining the static guarantees | |||||||||||||||||
of the original listings. *) | |||||||||||||||||
let blocking_ops_table : (all_operations * string) list = | |||||||||||||||||
List.map (fun (op, v) -> ((op :> all_operations), v)) blocking_ops_table | |||||||||||||||||
|
|||||||||||||||||
let blocking_ops : all_operations list = List.map fst blocking_ops_table | |||||||||||||||||
|
|||||||||||||||||
(* see [Helpers.retry]. this error code causes a 'wait' *) | |||||||||||||||||
let wait_error = Api_errors.other_operation_in_progress | |||||||||||||||||
let waiting_ops = List.map (fun op -> (op :> all_operations)) waiting_ops | |||||||||||||||||
|
|||||||||||||||||
(** Returns a table of operations -> API error options (None if the operation would be ok) *) | |||||||||||||||||
let valid_operations ~__context record (pool : API.ref_pool) = | |||||||||||||||||
let all_operations : all_operations list = blocking_ops @ waiting_ops | |||||||||||||||||
|
|||||||||||||||||
type validity = Unknown | Allowed | Disallowed of string * string list | |||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I prefer |
|||||||||||||||||
|
|||||||||||||||||
(* Computes a function (all_operations -> error option) that maps each | |||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems that it is |
|||||||||||||||||
element of all_operations to a reason why it would be invalid for | |||||||||||||||||
it to be executed in the inputted execution context. *) | |||||||||||||||||
let compute_valid_operations ~__context record pool : | |||||||||||||||||
API.pool_allowed_operations -> validity = | |||||||||||||||||
let ref = Ref.string_of pool in | |||||||||||||||||
let current_ops = List.map snd record.Db_actions.pool_current_operations in | |||||||||||||||||
let table = Hashtbl.create 10 in | |||||||||||||||||
all_operations |> List.iter (fun x -> Hashtbl.replace table x None) ; | |||||||||||||||||
let set_errors (code : string) (params : string list) | |||||||||||||||||
(ops : API.pool_allowed_operations_set) = | |||||||||||||||||
List.iter | |||||||||||||||||
(fun op -> | |||||||||||||||||
if Hashtbl.find table op = None then | |||||||||||||||||
Hashtbl.replace table op (Some (code, params)) | |||||||||||||||||
) | |||||||||||||||||
ops | |||||||||||||||||
let table = (Hashtbl.create 32 : (all_operations, validity) Hashtbl.t) in | |||||||||||||||||
let set_validity = Hashtbl.replace table in | |||||||||||||||||
(* Start by assuming all operations are allowed. *) | |||||||||||||||||
List.iter (fun op -> set_validity op Allowed) all_operations ; | |||||||||||||||||
(* Given a list of operations, map each to the given error. If an | |||||||||||||||||
error has already been specified for a given operation, do | |||||||||||||||||
nothing. *) | |||||||||||||||||
let set_errors ops ((error, detail) : string * string list) = | |||||||||||||||||
let populate op = | |||||||||||||||||
match Hashtbl.find table op with | |||||||||||||||||
| Allowed -> | |||||||||||||||||
set_validity op (Disallowed (error, detail)) | |||||||||||||||||
| Disallowed _ | Unknown -> | |||||||||||||||||
(* These cases should be impossible here. *) | |||||||||||||||||
() | |||||||||||||||||
in | |||||||||||||||||
List.iter populate ops | |||||||||||||||||
in | |||||||||||||||||
if current_ops <> [] then ( | |||||||||||||||||
List.iter | |||||||||||||||||
(fun (blocking_op, err) -> | |||||||||||||||||
if List.mem blocking_op current_ops then ( | |||||||||||||||||
set_errors err [] (blocking_ops |> List.map fst) ; | |||||||||||||||||
set_errors Api_errors.other_operation_in_progress | |||||||||||||||||
[Datamodel_common._pool; ref] | |||||||||||||||||
wait_ops | |||||||||||||||||
) | |||||||||||||||||
) | |||||||||||||||||
blocking_ops ; | |||||||||||||||||
List.iter | |||||||||||||||||
(fun wait_op -> | |||||||||||||||||
if List.mem wait_op current_ops then | |||||||||||||||||
set_errors wait_error [Datamodel_common._pool; ref] all_operations | |||||||||||||||||
) | |||||||||||||||||
wait_ops | |||||||||||||||||
) ; | |||||||||||||||||
(* HA disable cannot run if HA is already disabled on a pool *) | |||||||||||||||||
(* HA enable cannot run if HA is already enabled on a pool *) | |||||||||||||||||
let ha_enabled = | |||||||||||||||||
Db.Pool.get_ha_enabled ~__context ~self:(Helpers.get_pool ~__context) | |||||||||||||||||
let other_operation_in_progress = | |||||||||||||||||
(Api_errors.other_operation_in_progress, [Datamodel_common._pool; ref]) | |||||||||||||||||
in | |||||||||||||||||
let current_stack = | |||||||||||||||||
Db.Pool.get_ha_cluster_stack ~__context ~self:(Helpers.get_pool ~__context) | |||||||||||||||||
let is_current_op = Fun.flip List.mem current_ops in | |||||||||||||||||
let blocking = | |||||||||||||||||
List.find_opt (fun (op, _) -> is_current_op op) blocking_ops_table | |||||||||||||||||
in | |||||||||||||||||
if ha_enabled then ( | |||||||||||||||||
set_errors Api_errors.ha_is_enabled [] [`ha_enable] ; | |||||||||||||||||
(* TLS verification is not allowed to run if HA is enabled *) | |||||||||||||||||
set_errors Api_errors.ha_is_enabled [] [`tls_verification_enable] | |||||||||||||||||
) else | |||||||||||||||||
set_errors Api_errors.ha_not_enabled [] [`ha_disable] ; | |||||||||||||||||
(* cluster create cannot run during a rolling pool upgrade *) | |||||||||||||||||
if Helpers.rolling_upgrade_in_progress ~__context then ( | |||||||||||||||||
set_errors Api_errors.not_supported_during_upgrade [] [`cluster_create] ; | |||||||||||||||||
set_errors Api_errors.not_supported_during_upgrade [] | |||||||||||||||||
[`tls_verification_enable] | |||||||||||||||||
) ; | |||||||||||||||||
(* cluster create cannot run if a cluster already exists on the pool *) | |||||||||||||||||
( match Db.Cluster.get_all ~__context with | |||||||||||||||||
| [_] -> | |||||||||||||||||
set_errors Api_errors.cluster_already_exists [] [`cluster_create] | |||||||||||||||||
(* indicates a bug or a need to update this code (if we ever support multiple clusters in the pool *) | |||||||||||||||||
| _ :: _ -> | |||||||||||||||||
failwith "Multiple clusters exist in the pool" | |||||||||||||||||
(* cluster create cannot run if ha is already enabled *) | |||||||||||||||||
| [] -> | |||||||||||||||||
if ha_enabled then | |||||||||||||||||
set_errors Api_errors.incompatible_cluster_stack_active [current_stack] | |||||||||||||||||
[`cluster_create] | |||||||||||||||||
let waiting = List.find_opt is_current_op waiting_ops in | |||||||||||||||||
( match (blocking, waiting) with | |||||||||||||||||
| Some (_, reason), _ -> | |||||||||||||||||
(* Mark all potentially blocking operations as invalid due | |||||||||||||||||
to the specific blocking operation's "in progress" error. *) | |||||||||||||||||
set_errors blocking_ops (reason, []) ; | |||||||||||||||||
(* Mark all waiting operations as invalid for the generic | |||||||||||||||||
"OTHER_OPERATION_IN_PROGRESS" reason. *) | |||||||||||||||||
set_errors waiting_ops other_operation_in_progress | |||||||||||||||||
(* Note that all_operations ⊆ blocking_ops ∪ waiting_ops, so this | |||||||||||||||||
invalidates all operations (with the reason partitioned | |||||||||||||||||
between whether the operation is blocking or waiting). *) | |||||||||||||||||
| None, Some _ -> | |||||||||||||||||
(* If there's no blocking operation in current operations, but | |||||||||||||||||
there is a waiting operation, invalidate all operations for the | |||||||||||||||||
generic reason. Again, this covers every operation. *) | |||||||||||||||||
set_errors all_operations other_operation_in_progress | |||||||||||||||||
| None, None -> ( | |||||||||||||||||
(* If there's no blocking or waiting operation in current | |||||||||||||||||
operations (i.e. current operations is empty), we can report | |||||||||||||||||
more precise reasons why operations would be invalid. *) | |||||||||||||||||
let ha_enabled, current_stack = | |||||||||||||||||
let self = Helpers.get_pool ~__context in | |||||||||||||||||
Db.Pool. | |||||||||||||||||
( get_ha_enabled ~__context ~self | |||||||||||||||||
, get_ha_cluster_stack ~__context ~self | |||||||||||||||||
) | |||||||||||||||||
in | |||||||||||||||||
if ha_enabled then ( | |||||||||||||||||
(* Can't enable HA if it's already enabled. *) | |||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These states are still handled particularly.
I think it's clearer and easy to expand both ops and actions There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, it may be worth tabulating the actions. There's an ongoing discussion to consider some kind of incremental computation library, where we try to express how to recompute inputs to the function as declaratively as possible - so this kind of thing would fall out of that effort. |
|||||||||||||||||
let ha_is_enabled = (Api_errors.ha_is_enabled, []) in | |||||||||||||||||
set_errors [`ha_enable] ha_is_enabled ; | |||||||||||||||||
(* TLS verification is not allowed to run if HA is enabled. *) | |||||||||||||||||
set_errors [`tls_verification_enable] ha_is_enabled | |||||||||||||||||
) else (* Can't disable HA if it's not enabled. *) | |||||||||||||||||
set_errors [`ha_disable] (Api_errors.ha_not_enabled, []) ; | |||||||||||||||||
(* Cluster create cannot run during a rolling pool upgrade. *) | |||||||||||||||||
if Helpers.rolling_upgrade_in_progress ~__context then ( | |||||||||||||||||
let not_supported_during_upgrade = | |||||||||||||||||
(Api_errors.not_supported_during_upgrade, []) | |||||||||||||||||
in | |||||||||||||||||
set_errors [`cluster_create] not_supported_during_upgrade ; | |||||||||||||||||
set_errors [`tls_verification_enable] not_supported_during_upgrade | |||||||||||||||||
) ; | |||||||||||||||||
(* Cluster create cannot run if a cluster already exists on the pool. *) | |||||||||||||||||
match Db.Cluster.get_all ~__context with | |||||||||||||||||
| [_] -> | |||||||||||||||||
set_errors [`cluster_create] (Api_errors.cluster_already_exists, []) | |||||||||||||||||
(* Indicates a bug or a need to update this code (if we ever support multiple clusters in the pool). *) | |||||||||||||||||
| _ :: _ -> | |||||||||||||||||
failwith "Multiple clusters exist in the pool" | |||||||||||||||||
(* Cluster create cannot run if HA is already enabled. *) | |||||||||||||||||
| [] -> | |||||||||||||||||
if ha_enabled then | |||||||||||||||||
let error = | |||||||||||||||||
(Api_errors.incompatible_cluster_stack_active, [current_stack]) | |||||||||||||||||
in | |||||||||||||||||
set_errors [`cluster_create] error | |||||||||||||||||
) | |||||||||||||||||
) ; | |||||||||||||||||
table | |||||||||||||||||
|
|||||||||||||||||
let throw_error table op = | |||||||||||||||||
match Hashtbl.find_opt table op with | |||||||||||||||||
| None -> | |||||||||||||||||
raise | |||||||||||||||||
(Api_errors.Server_error | |||||||||||||||||
( Api_errors.internal_error | |||||||||||||||||
, [ | |||||||||||||||||
Printf.sprintf | |||||||||||||||||
"xapi_pool_helpers.assert_operation_valid unknown operation: \ | |||||||||||||||||
%s" | |||||||||||||||||
(pool_allowed_operations_to_string op) | |||||||||||||||||
] | |||||||||||||||||
) | |||||||||||||||||
) | |||||||||||||||||
| Some (Some (code, params)) -> | |||||||||||||||||
raise (Api_errors.Server_error (code, params)) | |||||||||||||||||
| Some None -> | |||||||||||||||||
() | |||||||||||||||||
fun op -> Hashtbl.find_opt table op |> Option.value ~default:Unknown | |||||||||||||||||
|
|||||||||||||||||
let assert_operation_valid ~__context ~self ~(op : API.pool_allowed_operations) | |||||||||||||||||
= | |||||||||||||||||
(* no pool operations allowed during a pending PSR *) | |||||||||||||||||
(* No pool operations allowed during a pending PSR. *) | |||||||||||||||||
if Db.Pool.get_is_psr_pending ~__context ~self:(Helpers.get_pool ~__context) | |||||||||||||||||
then | |||||||||||||||||
raise Api_errors.(Server_error (pool_secret_rotation_pending, [])) ; | |||||||||||||||||
let all = Db.Pool.get_record_internal ~__context ~self in | |||||||||||||||||
let table = valid_operations ~__context all self in | |||||||||||||||||
throw_error table op | |||||||||||||||||
let lookup = compute_valid_operations ~__context all self in | |||||||||||||||||
match lookup op with | |||||||||||||||||
| Allowed -> | |||||||||||||||||
() | |||||||||||||||||
| Disallowed (error, detail) -> | |||||||||||||||||
raise (Api_errors.Server_error (error, detail)) | |||||||||||||||||
| Unknown -> | |||||||||||||||||
(* This should never happen and implies our validity algorithm is incomplete. *) | |||||||||||||||||
let detail = | |||||||||||||||||
let op = pool_allowed_operations_to_string op in | |||||||||||||||||
Printf.sprintf "%s.%s unknown operation: %s" __MODULE__ __FUNCTION__ op | |||||||||||||||||
in | |||||||||||||||||
raise Api_errors.(Server_error (internal_error, [detail])) | |||||||||||||||||
|
|||||||||||||||||
let update_allowed_operations ~__context ~self : unit = | |||||||||||||||||
let all = Db.Pool.get_record_internal ~__context ~self in | |||||||||||||||||
let valid = valid_operations ~__context all self in | |||||||||||||||||
let keys = | |||||||||||||||||
Hashtbl.fold (fun k v acc -> if v = None then k :: acc else acc) valid [] | |||||||||||||||||
let is_allowed_op = | |||||||||||||||||
let lookup = compute_valid_operations ~__context all self in | |||||||||||||||||
fun op -> lookup op = Allowed | |||||||||||||||||
in | |||||||||||||||||
Db.Pool.set_allowed_operations ~__context ~self ~value:keys | |||||||||||||||||
let value = List.filter is_allowed_op all_operations in | |||||||||||||||||
Db.Pool.set_allowed_operations ~__context ~self ~value | |||||||||||||||||
|
|||||||||||||||||
(** Add to the Pool's current operations, call a function and then remove from the | |||||||||||||||||
current operations. Ensure the allowed_operations are kept up to date. *) | |||||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I find the relationship as explained in the existing comments not super obvious. What does it mean when an operation from one of these lists is in progress and a new operation is coming in - likewise from one of these lists. What happens to the incoming operation in all cases? Does "blocking" mean an operation blocks another one, or does it mean it is itself blocked?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, the terminology should probably be changed (as I'm unsure if the information is ever acted upon in any way that lines up with the naming).
It "blocking" and "waiting" distinction seems to only exist for changing what is the reported reason why an operation would fail. A blocking operation in
current_operations
blocks other operations of its own designation with a specific reason (the current - blocking - operation's "in progress" error). Whereas, a waiting operation incurrent_operations
invalidates every operation with a generic reason.To me, the current semantics are really just that all operations are invalid if there is any operation in
current_operations
(as there doesn't seem to exist an operation which isn't categorised as blocking or waiting). Then, the distinction exists to guide what reason should be reported for each operation. Blocking and waiting operations invalidate everything, so you only get precise errors for an attempted operation if there is no operation ongoing. However, nothing seems to operationally block or wait in this code, so it's not a real concern here (and the naming ought to line up with expectations).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The distinction is how the incoming operation handles the fact that something else is in progress? The reaction does not depend on the operation in progress - is that how it works? So we are computing the reaction of an operation that finds an arbitrary operation in progress.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The distinction is what the error message cites as the reason that an operation cannot be performed. The reaction depends on the current operation because, if there is a current operation that belongs to the blocking set, the error cites a specific "in progress" error for that operation. Whereas, all other responses - in the case of wait - are a generic error relating to the pool. The most specific errors arise when there's no current operations but the intended operation is illogical, like enabling HA if it's already enabled (the responses for blocking and waiting will overrule this though).
So, my understanding is: if HA is already enabled and there is a blocking operation (
foo
) within current operations, then the user tries to enable HA, it will error saying "foo
in progress". Then, iffoo
completes and is removed from current operations (and there's nothing else in current operations), trying again will yield "HA is already enabled" error (so, a more precise error which is not overruled by a more generic reason). So the response is guided by what's already in current operations.