Skip to content

Commit

Permalink
numa: do not fail to load invalid distance matrices
Browse files Browse the repository at this point in the history
Instead disable NUMA for the host

Fixes xapi-project#6240

Signed-off-by: Pau Ruiz Safont <[email protected]>
  • Loading branch information
psafont committed Jan 21, 2025
1 parent c0fbb69 commit 73bf03a
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 28 deletions.
48 changes: 25 additions & 23 deletions ocaml/xenopsd/lib/topology.ml
Original file line number Diff line number Diff line change
Expand Up @@ -189,30 +189,32 @@ module NUMA = struct
Array.iteri
(fun i node -> node_cpus.(node) <- CPUSet.add i node_cpus.(node))
cpu_to_node ;
Array.iteri
(fun i row ->
let d = distances.(i).(i) in
if d <> 10 then
invalid_arg
(Printf.sprintf "NUMA distance from node to itself must be 10: %d" d) ;
Array.iteri
(fun _ d ->
if d < 10 then
invalid_arg (Printf.sprintf "NUMA distance must be >= 10: %d" d)
)
row
)
distances ;
let all = Array.fold_left CPUSet.union CPUSet.empty node_cpus in
let candidates = gen_candidates distances in
{
let numa_matrix_is_reasonable =
distances
; cpu_to_node= Array.map node_of_int cpu_to_node
; node_cpus
; all
; node_usage= Array.map (fun _ -> 0) distances
; candidates
}
|> Array.to_seqi
|> Seq.for_all (fun (i, row) ->
let d = distances.(i).(i) in
d = 10 || Array.for_all (fun d -> d >= 10) row
)
in

if not numa_matrix_is_reasonable then (
D.info
"Not enabling NUMA: the ACPI SLIT table contains values that are \
invalid." ;
None
) else
let all = Array.fold_left CPUSet.union CPUSet.empty node_cpus in
let candidates = gen_candidates distances in
Some
{
distances
; cpu_to_node= Array.map node_of_int cpu_to_node
; node_cpus
; all
; node_usage= Array.map (fun _ -> 0) distances
; candidates
}

let distance t (Node a) (Node b) = t.distances.(a).(b)

Expand Down
2 changes: 1 addition & 1 deletion ocaml/xenopsd/lib/topology.mli
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ module NUMA : sig
(** A NUMA node index. Distinct from an int to avoid mixing with CPU numbers *)
type node = private Node of int

val make : distances:int array array -> cpu_to_node:int array -> t
val make : distances:int array array -> cpu_to_node:int array -> t option
(** [make distances cpu_to_node] stores the topology. [distances] is a square
matrix [d] where [d.(i).(j)] is an approximation to how much slower it is
to access memory from node [j] when running on node [i]. Distances are
Expand Down
4 changes: 2 additions & 2 deletions ocaml/xenopsd/test/test_topology.ml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ let make_numa ~numa ~cores =
in
let cores_per_numa = cores / numa in
let cpu_to_node = Array.init cores (fun core -> core / cores_per_numa) in
(cores, NUMA.make ~distances ~cpu_to_node)
(cores, Option.get (NUMA.make ~distances ~cpu_to_node))

let make_numa_amd ~cores_per_numa =
(* e.g. AMD Opteron 6272 *)
Expand All @@ -28,7 +28,7 @@ let make_numa_amd ~cores_per_numa =
let cpu_to_node =
Array.init (cores_per_numa * numa) (fun core -> core / cores_per_numa)
in
(cores_per_numa * numa, NUMA.make ~distances ~cpu_to_node)
(cores_per_numa * numa, Option.get (NUMA.make ~distances ~cpu_to_node))

type t = {worst: int; average: float; nodes: NUMA.node list; best: int}

Expand Down
5 changes: 3 additions & 2 deletions ocaml/xenopsd/xc/domain.ml
Original file line number Diff line number Diff line change
Expand Up @@ -851,7 +851,7 @@ let numa_init () =
let host = Lazy.force numa_hierarchy in
let mem = (Xenctrlext.numainfo xcext).memory in
D.debug "Host NUMA information: %s"
(Fmt.to_to_string Topology.NUMA.pp_dump host) ;
(Fmt.to_to_string (Fmt.Dump.option Topology.NUMA.pp_dump) host) ;
Array.iteri
(fun i m ->
let open Xenctrlext in
Expand All @@ -864,8 +864,9 @@ let numa_placement domid ~vcpus ~memory =
let open Topology in
let hint =
with_lock numa_mutex (fun () ->
let ( let* ) = Option.bind in
let xcext = get_handle () in
let host = Lazy.force numa_hierarchy in
let* host = Lazy.force numa_hierarchy in
let numa_meminfo = (numainfo xcext).memory |> Array.to_list in
let nodes =
ListLabels.map2
Expand Down

0 comments on commit 73bf03a

Please sign in to comment.