Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

numa: do not fail when loading invalid distance matrices #6242

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 25 additions & 23 deletions ocaml/xenopsd/lib/topology.ml
Original file line number Diff line number Diff line change
Expand Up @@ -189,30 +189,32 @@ module NUMA = struct
Array.iteri
(fun i node -> node_cpus.(node) <- CPUSet.add i node_cpus.(node))
cpu_to_node ;
Array.iteri
(fun i row ->
let d = distances.(i).(i) in
if d <> 10 then
invalid_arg
(Printf.sprintf "NUMA distance from node to itself must be 10: %d" d) ;
Array.iteri
(fun _ d ->
if d < 10 then
invalid_arg (Printf.sprintf "NUMA distance must be >= 10: %d" d)
)
row
)
distances ;
let all = Array.fold_left CPUSet.union CPUSet.empty node_cpus in
let candidates = gen_candidates distances in
{
let numa_matrix_is_reasonable =
distances
; cpu_to_node= Array.map node_of_int cpu_to_node
; node_cpus
; all
; node_usage= Array.map (fun _ -> 0) distances
; candidates
}
|> Array.to_seqi
|> Seq.for_all (fun (i, row) ->
let d = distances.(i).(i) in
d = 10 && Array.for_all (fun d -> d >= 10) row
)
in

if not numa_matrix_is_reasonable then (
D.info
"Not enabling NUMA: the ACPI SLIT table contains values that are \
invalid." ;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The wording could be more in line with the test: the table is implausible.

None
) else
let all = Array.fold_left CPUSet.union CPUSet.empty node_cpus in
let candidates = gen_candidates distances in
Some
{
distances
; cpu_to_node= Array.map node_of_int cpu_to_node
; node_cpus
; all
; node_usage= Array.map (fun _ -> 0) distances
; candidates
}

let distance t (Node a) (Node b) = t.distances.(a).(b)

Expand Down
2 changes: 1 addition & 1 deletion ocaml/xenopsd/lib/topology.mli
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ module NUMA : sig
(** A NUMA node index. Distinct from an int to avoid mixing with CPU numbers *)
type node = private Node of int

val make : distances:int array array -> cpu_to_node:int array -> t
val make : distances:int array array -> cpu_to_node:int array -> t option
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see the [invalid_arg] is not raised any more, should the comment(L100) be updated?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll take a look, thanks

(** [make distances cpu_to_node] stores the topology. [distances] is a square
matrix [d] where [d.(i).(j)] is an approximation to how much slower it is
to access memory from node [j] when running on node [i]. Distances are
Expand Down
4 changes: 2 additions & 2 deletions ocaml/xenopsd/test/test_topology.ml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ let make_numa ~numa ~cores =
in
let cores_per_numa = cores / numa in
let cpu_to_node = Array.init cores (fun core -> core / cores_per_numa) in
(cores, NUMA.make ~distances ~cpu_to_node)
(cores, Option.get (NUMA.make ~distances ~cpu_to_node))

let make_numa_amd ~cores_per_numa =
(* e.g. AMD Opteron 6272 *)
Expand All @@ -28,7 +28,7 @@ let make_numa_amd ~cores_per_numa =
let cpu_to_node =
Array.init (cores_per_numa * numa) (fun core -> core / cores_per_numa)
in
(cores_per_numa * numa, NUMA.make ~distances ~cpu_to_node)
(cores_per_numa * numa, Option.get (NUMA.make ~distances ~cpu_to_node))

type t = {worst: int; average: float; nodes: NUMA.node list; best: int}

Expand Down
5 changes: 3 additions & 2 deletions ocaml/xenopsd/xc/domain.ml
Original file line number Diff line number Diff line change
Expand Up @@ -851,7 +851,7 @@ let numa_init () =
let host = Lazy.force numa_hierarchy in
let mem = (Xenctrlext.numainfo xcext).memory in
D.debug "Host NUMA information: %s"
(Fmt.to_to_string Topology.NUMA.pp_dump host) ;
(Fmt.to_to_string (Fmt.Dump.option Topology.NUMA.pp_dump) host) ;
Array.iteri
(fun i m ->
let open Xenctrlext in
Expand All @@ -864,8 +864,9 @@ let numa_placement domid ~vcpus ~memory =
let open Topology in
let hint =
with_lock numa_mutex (fun () ->
let ( let* ) = Option.bind in
let xcext = get_handle () in
let host = Lazy.force numa_hierarchy in
let* host = Lazy.force numa_hierarchy in
let numa_meminfo = (numainfo xcext).memory |> Array.to_list in
let nodes =
ListLabels.map2
Expand Down
Loading