Skip to content

Commit

Permalink
CP-53335, topology: do not raise exception when loading invalid dista…
Browse files Browse the repository at this point in the history
…nce matrices

Instead disable NUMA for the host

Fixes xapi-project#6240

Signed-off-by: Pau Ruiz Safont <[email protected]>
  • Loading branch information
psafont committed Jan 27, 2025
1 parent bc62e4c commit 41d7b1e
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 79 deletions.
48 changes: 25 additions & 23 deletions ocaml/xenopsd/lib/topology.ml
Original file line number Diff line number Diff line change
Expand Up @@ -189,30 +189,32 @@ module NUMA = struct
Array.iteri
(fun i node -> node_cpus.(node) <- CPUSet.add i node_cpus.(node))
cpu_to_node ;
Array.iteri
(fun i row ->
let d = distances.(i).(i) in
if d <> 10 then
invalid_arg
(Printf.sprintf "NUMA distance from node to itself must be 10: %d" d) ;
Array.iteri
(fun _ d ->
if d < 10 then
invalid_arg (Printf.sprintf "NUMA distance must be >= 10: %d" d)
)
row
)
distances ;
let all = Array.fold_left CPUSet.union CPUSet.empty node_cpus in
let candidates = gen_candidates distances in
{
let numa_matrix_is_reasonable =
distances
; cpu_to_node= Array.map node_of_int cpu_to_node
; node_cpus
; all
; node_usage= Array.map (fun _ -> 0) distances
; candidates
}
|> Array.to_seqi
|> Seq.for_all (fun (i, row) ->
let d = distances.(i).(i) in
d = 10 && Array.for_all (fun d -> d >= 10) row
)
in

if not numa_matrix_is_reasonable then (
D.info
"Not enabling NUMA: the ACPI SLIT table contains values that are \
invalid." ;
None
) else
let all = Array.fold_left CPUSet.union CPUSet.empty node_cpus in
let candidates = gen_candidates distances in
Some
{
distances
; cpu_to_node= Array.map node_of_int cpu_to_node
; node_cpus
; all
; node_usage= Array.map (fun _ -> 0) distances
; candidates
}

let distance t (Node a) (Node b) = t.distances.(a).(b)

Expand Down
4 changes: 1 addition & 3 deletions ocaml/xenopsd/lib/topology.mli
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ module NUMA : sig
(** A NUMA node index. Distinct from an int to avoid mixing with CPU numbers *)
type node = private Node of int

val make : distances:int array array -> cpu_to_node:int array -> t
val make : distances:int array array -> cpu_to_node:int array -> t option
(** [make distances cpu_to_node] stores the topology. [distances] is a square
matrix [d] where [d.(i).(j)] is an approximation to how much slower it is
to access memory from node [j] when running on node [i]. Distances are
Expand All @@ -97,8 +97,6 @@ module NUMA : sig
NUMA nodes without any CPUs are accepted (to handle hard affinities).
Raises [invalid_arg] if above constraints are not met
A typical matrix might look like this:
10 21
Expand Down
3 changes: 2 additions & 1 deletion ocaml/xenopsd/test/test.ml
Original file line number Diff line number Diff line change
Expand Up @@ -1032,4 +1032,5 @@ let _ =
)
in
Debug.log_to_stdout () ;
Alcotest.run "xenops test" ([suite; Test_topology.suite] @ Test_cpuid.tests)
Alcotest.run "xenops test"
(List.concat [[suite]; Test_topology.suite; Test_cpuid.tests])
132 changes: 82 additions & 50 deletions ocaml/xenopsd/test/test_topology.ml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@ let make_numa ~numa ~cores =
in
let cores_per_numa = cores / numa in
let cpu_to_node = Array.init cores (fun core -> core / cores_per_numa) in
(cores, NUMA.make ~distances ~cpu_to_node)
match NUMA.make ~distances ~cpu_to_node with
| None ->
Alcotest.fail "Synthetic matrix can't fail to load"
| Some d ->
(cores, d)

let make_numa_amd ~cores_per_numa =
(* e.g. AMD Opteron 6272 *)
Expand All @@ -28,7 +32,20 @@ let make_numa_amd ~cores_per_numa =
let cpu_to_node =
Array.init (cores_per_numa * numa) (fun core -> core / cores_per_numa)
in
(cores_per_numa * numa, NUMA.make ~distances ~cpu_to_node)
match NUMA.make ~distances ~cpu_to_node with
| None ->
Alcotest.fail "Synthetic matrix can't fail to load"
| Some d ->
(cores_per_numa * numa, d)

let make_numa_unreachable ~cores_per_numa =
let numa = 2 in
(* 4294967295 is exactly (2ˆ32) - 1, meaning the node is unreachable *)
let distances = [|[|10; 4294967295|]; [|4294967295; 4294967295|]|] in
let cpu_to_node =
Array.init (cores_per_numa * numa) (fun core -> core / cores_per_numa)
in
NUMA.make ~distances ~cpu_to_node

type t = {worst: int; average: float; nodes: NUMA.node list; best: int}

Expand Down Expand Up @@ -185,52 +202,67 @@ let test_allocate ?(mem = default_mem) (expected_cores, h) ~vms () =

let () = Printexc.record_backtrace true

let distances_spec =
let numa = Alcotest.testable NUMA.pp_dump ( = ) in
let unreachable ~cores_per_numa expected () =
let actual = make_numa_unreachable ~cores_per_numa in
Alcotest.(check @@ option numa) "oops" expected actual
in
[("unreachable_nodes, 1 cpu / node", unreachable ~cores_per_numa:1 None)]

let test_distances (name, fn) = (name, `Quick, fn)

let distances_tests = List.map test_distances distances_spec

let suite =
( "topology test"
, [
( "Allocation of 1 VM on 1 node"
, `Quick
, test_allocate ~vms:1 @@ make_numa ~numa:1 ~cores:2
)
; ( "Allocation of 10 VMs on 1 node"
, `Quick
, test_allocate ~vms:10 @@ make_numa ~numa:1 ~cores:8
)
; ( "Allocation of 1 VM on 2 nodes"
, `Quick
, test_allocate ~vms:1 @@ make_numa ~numa:2 ~cores:4
)
; ( "Allocation of 10 VM on 2 nodes"
, `Quick
, test_allocate ~vms:10 @@ make_numa ~numa:2 ~cores:4
)
; ( "Allocation of 1 VM on 4 nodes"
, `Quick
, test_allocate ~vms:1 @@ make_numa ~numa:4 ~cores:16
)
; ( "Allocation of 10 VM on 4 nodes"
, `Quick
, test_allocate ~vms:10 @@ make_numa ~numa:4 ~cores:16
)
; ( "Allocation of 40 VM on 16 nodes"
, `Quick
, test_allocate ~vms:40 @@ make_numa ~numa:16 ~cores:256
)
; ( "Allocation of 40 VM on 32 nodes"
, `Quick
, test_allocate ~vms:40 @@ make_numa ~numa:32 ~cores:256
)
; ( "Allocation of 40 VM on 64 nodes"
, `Quick
, test_allocate ~vms:80 @@ make_numa ~numa:64 ~cores:256
)
; ( "Allocation of 10 VM on assymetric nodes"
, `Quick
, test_allocate ~vms:10 (make_numa_amd ~cores_per_numa:4)
)
; ( "Allocation of 10 VM on assymetric nodes"
, `Quick
, test_allocate ~vms:6 ~mem:mem3 (make_numa_amd ~cores_per_numa:4)
)
]
)
[
( "Allocation tests"
, [
( "Allocation of 1 VM on 1 node"
, `Quick
, test_allocate ~vms:1 @@ make_numa ~numa:1 ~cores:2
)
; ( "Allocation of 10 VMs on 1 node"
, `Quick
, test_allocate ~vms:10 @@ make_numa ~numa:1 ~cores:8
)
; ( "Allocation of 1 VM on 2 nodes"
, `Quick
, test_allocate ~vms:1 @@ make_numa ~numa:2 ~cores:4
)
; ( "Allocation of 10 VM on 2 nodes"
, `Quick
, test_allocate ~vms:10 @@ make_numa ~numa:2 ~cores:4
)
; ( "Allocation of 1 VM on 4 nodes"
, `Quick
, test_allocate ~vms:1 @@ make_numa ~numa:4 ~cores:16
)
; ( "Allocation of 10 VM on 4 nodes"
, `Quick
, test_allocate ~vms:10 @@ make_numa ~numa:4 ~cores:16
)
; ( "Allocation of 40 VM on 16 nodes"
, `Quick
, test_allocate ~vms:40 @@ make_numa ~numa:16 ~cores:256
)
; ( "Allocation of 40 VM on 32 nodes"
, `Quick
, test_allocate ~vms:40 @@ make_numa ~numa:32 ~cores:256
)
; ( "Allocation of 40 VM on 64 nodes"
, `Quick
, test_allocate ~vms:80 @@ make_numa ~numa:64 ~cores:256
)
; ( "Allocation of 10 VM on assymetric nodes"
, `Quick
, test_allocate ~vms:10 (make_numa_amd ~cores_per_numa:4)
)
; ( "Allocation of 10 VM on assymetric nodes"
, `Quick
, test_allocate ~vms:6 ~mem:mem3 (make_numa_amd ~cores_per_numa:4)
)
]
)
; ("Distance matrix tests", distances_tests)
]
5 changes: 3 additions & 2 deletions ocaml/xenopsd/xc/domain.ml
Original file line number Diff line number Diff line change
Expand Up @@ -851,7 +851,7 @@ let numa_init () =
let host = Lazy.force numa_hierarchy in
let mem = (Xenctrlext.numainfo xcext).memory in
D.debug "Host NUMA information: %s"
(Fmt.to_to_string Topology.NUMA.pp_dump host) ;
(Fmt.to_to_string (Fmt.Dump.option Topology.NUMA.pp_dump) host) ;
Array.iteri
(fun i m ->
let open Xenctrlext in
Expand All @@ -864,8 +864,9 @@ let numa_placement domid ~vcpus ~memory =
let open Topology in
let hint =
with_lock numa_mutex (fun () ->
let ( let* ) = Option.bind in
let xcext = get_handle () in
let host = Lazy.force numa_hierarchy in
let* host = Lazy.force numa_hierarchy in
let numa_meminfo = (numainfo xcext).memory |> Array.to_list in
let nodes =
ListLabels.map2
Expand Down

0 comments on commit 41d7b1e

Please sign in to comment.