Skip to content

Commit

Permalink
CA-399669: Detect a reason for IPMI readings being unavailable
Browse files Browse the repository at this point in the history
When the IPMI devices are missing, this is printed in the error line:

$ /usr/bin/ipmitool -I open dcmi discover
Could not open device at /dev/ipmi0 or /dev/ipmi/0 or /dev/ipmidev/0: No such file or directory

Detect this situation so an appropriate warning can be printed.

This needed changes in the function that executes the command, to be able to
process stderr

Example output:
$ ./disco.exe
[debug] Forking command /usr/bin/ipmitool dcmi discover
[debug] Forked command /usr/bin/ipmitool dcmi discover
[ info] DCMI discover: Could not open device at /dev/ipmi0 or /dev/ipmi/0 or /dev/ipmidev/0: No such file or directory
[debug] Process 423018 exited normally with code 1
[ warn] IPMI DCMI power readings not available, stopping. Reason: IPMI devices are missing

Signed-off-by: Pau Ruiz Safont <[email protected]>
  • Loading branch information
psafont committed Jan 29, 2025
1 parent 2c2ec17 commit e24a5cc
Show file tree
Hide file tree
Showing 9 changed files with 96 additions and 72 deletions.
2 changes: 1 addition & 1 deletion ocaml/xcp-rrdd/bin/rrdp-dcmi/dune
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
(modes exe)
(name rrdp_dcmi)
(libraries

rrdd-plugin
rrdd-plugin.base
rrdd_plugins_libs
xapi-idl.rrd
xapi-log
Expand Down
61 changes: 42 additions & 19 deletions ocaml/xcp-rrdd/bin/rrdp-dcmi/rrdp_dcmi.ml
Original file line number Diff line number Diff line change
Expand Up @@ -29,29 +29,47 @@ let ipmitool args =
(* we connect to the local /dev/ipmi0 if available to read measurements from local BMC *)
ipmitool_bin :: args |> String.concat " "

type discovery_error = Devices_missing

let discovery_error_to_string = function
| Devices_missing ->
"IPMI devices are missing"

let discover () =
let read_out_line line =
(* this code runs once on startup, logging all the output here will be useful for debugging *)
D.debug "DCMI discover: %s" line ;
let line = String.trim line in
if String.equal line "Power management available" then
Some ()
else
None
in
let read_err_line line =
(* this code runs once on startup, logging all the output here will be useful for debugging *)
D.debug "DCMI discover: %s" line ;
let line = String.trim line in
if String.starts_with ~prefix:"Could not open device at" line then
Some Devices_missing
else
None
in
Utils.exec_cmd
(module Process.D)
~cmdstring:(ipmitool ["dcmi"; "discover"])
~f:(fun line ->
(* this code runs once on startup, logging all the output here will be useful for debugging *)
D.debug "DCMI discover: %s" line ;
if String.trim line = "Power management available" then
Some ()
else
None
)
~read_out_line ~read_err_line

let get_dcmi_power_reading () =
let read_out_line line =
(* example line: ' Instantaneous power reading: 34 Watts' *)
try Scanf.sscanf line " Instantaneous power reading : %f Watts" Option.some
with Scanf.Scan_failure _ | End_of_file -> None
in
let read_err_line _ = None in
Utils.exec_cmd
(module Process.D)
~cmdstring:(ipmitool ["dcmi"; "power"; "reading"])
~f:(fun line ->
(* example line: ' Instantaneous power reading: 34 Watts' *)
try
Scanf.sscanf line " Instantaneous power reading : %f Watts" Option.some
with Scanf.Scan_failure _ | End_of_file -> None
)
~read_out_line ~read_err_line

let gen_dcmi_power_reading value =
( Rrd.Host
Expand All @@ -63,18 +81,23 @@ let gen_dcmi_power_reading value =

let generate_dss () =
match get_dcmi_power_reading () with
| watts :: _ ->
| watts :: _, _ ->
[gen_dcmi_power_reading watts]
| _ ->
[]

let _ =
initialise () ;
match discover () with
| [] ->
D.warn "IPMI DCMI power readings not available, stopping." ;
exit 0
| _ ->
| () :: _, _ ->
D.info "IPMI DCMI power reading is available" ;
main_loop ~neg_shift:0.5 ~target:(Reporter.Local 1)
~protocol:Rrd_interface.V2 ~dss_f:generate_dss
| [], errs ->
let reason =
List.nth_opt errs 0
|> Option.map discovery_error_to_string
|> Option.value ~default:"unknown"
in
D.warn "IPMI DCMI power readings not available, stopping. Reason: %s"
reason
1 change: 1 addition & 0 deletions ocaml/xcp-rrdd/bin/rrdp-iostat/dune
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
mtime
mtime.clock.os
rrdd-plugin
rrdd-plugin.base
rrdd_plugin_xenctrl
rrdd_plugins_libs
str
Expand Down
16 changes: 11 additions & 5 deletions ocaml/xcp-rrdd/bin/rrdp-iostat/rrdp_iostat.ml
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ module Iostat = struct

(* Keep track of how many results headers we've seen so far *)
let parsing_section = ref 0 in
let process_line str =
let read_out_line str =
let res = Utils.cut str in
(* Keep values from the second set of outputs *)
( if !parsing_section = 2 then
Expand All @@ -151,7 +151,10 @@ module Iostat = struct
(* 2 iterations; 1 second between them *)

(* Iterate through each line and populate dev_values_map *)
let _ = Utils.exec_cmd (module Process.D) ~cmdstring ~f:process_line in
let read_err_line _ = None in
let _ =
Utils.exec_cmd (module Process.D) ~cmdstring ~read_out_line ~read_err_line
in

(* Now read the values out of dev_values_map for devices for which we have data *)
List.filter_map
Expand Down Expand Up @@ -341,16 +344,19 @@ let exec_tap_ctl_list () : ((string * string) * int) list =
D.error "Could not find device with physical path %s" phypath ;
None
in
let process_line str =
let read_out_line str =
try Scanf.sscanf str "pid=%d minor=%d state=%s args=%s@:%s" extract_vdis
with Scanf.Scan_failure _ | Failure _ | End_of_file ->
D.warn {|"%s" returned a line that could not be parsed. Ignoring.|}
tap_ctl ;
D.warn "Offending line: %s" str ;
None
in
let pid_and_minor_to_sr_and_vdi =
Utils.exec_cmd (module Process.D) ~cmdstring:tap_ctl ~f:process_line
let read_err_line _ = None in
let pid_and_minor_to_sr_and_vdi, _ =
Utils.exec_cmd
(module Process.D)
~cmdstring:tap_ctl ~read_out_line ~read_err_line
in
let sr_and_vdi_to_minor =
List.map
Expand Down
2 changes: 1 addition & 1 deletion ocaml/xcp-rrdd/bin/rrdp-xenpm/dune
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
(modes exe)
(name rrdp_xenpm)
(libraries

rrdd-plugin
rrdd-plugin.base
rrdd_plugins_libs
str
xapi-idl.rrd
Expand Down
12 changes: 8 additions & 4 deletions ocaml/xcp-rrdd/bin/rrdp-xenpm/rrdp_xenpm.ml
Original file line number Diff line number Diff line change
Expand Up @@ -57,27 +57,30 @@ let gen_pm_cpu_averages cpu_id time =

let get_cpu_averages () : int64 list =
let pattern = Str.regexp "average cpu frequency:[ \t]+\\([0-9]+\\)[ \t]*$" in
let match_fun s =
let read_out_line s =
if Str.string_match pattern s 0 then
Some (Int64.of_string (Str.matched_group 1 s))
else
None
in
let read_err_line _ = None in
Utils.exec_cmd
(module Process.D)
~cmdstring:(Printf.sprintf "%s %s" xenpm_bin "get-cpufreq-average")
~f:match_fun
~read_out_line ~read_err_line
|> fst

let get_states cpu_state : int64 list =
let pattern =
Str.regexp "[ \t]*residency[ \t]+\\[[ \t]*\\([0-9]+\\) ms\\][ \t]*"
in
let match_fun s =
let read_out_line s =
if Str.string_match pattern s 0 then
Some (Int64.of_string (Str.matched_group 1 s))
else
None
in
let read_err_line _ = None in
Utils.exec_cmd
(module Process.D)
~cmdstring:
Expand All @@ -89,7 +92,8 @@ let get_states cpu_state : int64 list =
"get-cpufreq-states"
)
)
~f:match_fun
~read_out_line ~read_err_line
|> fst

(* list_package [1;2;3;4] 2 = [[1;2];[3;4]] *)
let list_package (l : 'a list) (n : int) : 'a list list =
Expand Down
25 changes: 0 additions & 25 deletions ocaml/xcp-rrdd/lib/plugin/rrdd_plugin.mli
Original file line number Diff line number Diff line change
Expand Up @@ -14,31 +14,6 @@

(** Library to simplify writing an rrdd plugin. *)

(** Utility functions useful for rrdd plugins. *)
module Utils : sig
val now : unit -> float
(** Return the current unix epoch as an int64. *)

val cut : string -> string list
(** Split a string into a list of strings as separated by spaces and/or
tabs. *)

val list_directory_unsafe : string -> string list
(** List the contents of a directory, including . and .. *)

val list_directory_entries_unsafe : string -> string list
(** List the contents of a directory, not including . and .. *)

val exec_cmd :
(module Debug.DEBUG)
-> cmdstring:string
-> f:(string -> 'a option)
-> 'a list
(** [exec_cmd cmd f] executes [cmd], applies [f] on each of the lines which
[cmd] outputs on stdout, and returns a list of resulting values for which
applying [f] returns [Some value]. *)
end

(** Asynchronous interface to create, cancel and query the state of stats
reporting threads. *)
module Reporter : sig
Expand Down
35 changes: 22 additions & 13 deletions ocaml/xcp-rrdd/lib/plugin/utils.ml
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,13 @@ let list_directory_entries_unsafe dir =
let dirlist = list_directory_unsafe dir in
List.filter (fun x -> x <> "." && x <> "..") dirlist

let exec_cmd (module D : Debug.DEBUG) ~cmdstring ~(f : string -> 'a option) =
let exec_cmd (module D : Debug.DEBUG) ~cmdstring
~(read_out_line : string -> 'a option) ~(read_err_line : string -> 'b option)
=
D.debug "Forking command %s" cmdstring ;
(* create pipe for reading from the command's output *)
(* create pipes for reading from the command's output *)
let out_readme, out_writeme = Unix.pipe () in
let err_readme, err_writeme = Unix.pipe () in
let cmd, args =
match Astring.String.cuts ~empty:false ~sep:" " cmdstring with
| [] ->
Expand All @@ -45,19 +48,25 @@ let exec_cmd (module D : Debug.DEBUG) ~cmdstring ~(f : string -> 'a option) =
(h, t)
in
let pid =
Forkhelpers.safe_close_and_exec None (Some out_writeme) None [] cmd args
Forkhelpers.safe_close_and_exec None (Some out_writeme) (Some err_writeme)
[] cmd args
in
Unix.close out_writeme ;
let in_channel = Unix.in_channel_of_descr out_readme in
let vals = ref [] in
let rec loop () =
let line = input_line in_channel in
let ret = f line in
(match ret with None -> () | Some v -> vals := v :: !vals) ;
loop ()
Unix.close err_writeme ;
let read_and_close f fd =
let in_channel = Unix.in_channel_of_descr fd in
let vals = ref [] in
let rec loop () =
let line = input_line in_channel in
let ret = f line in
(match ret with None -> () | Some v -> vals := v :: !vals) ;
loop ()
in
(try loop () with End_of_file -> ()) ;
Unix.close fd ; List.rev !vals
in
(try loop () with End_of_file -> ()) ;
Unix.close out_readme ;
let stdout = read_and_close read_out_line out_readme in
let stderr = read_and_close read_err_line err_readme in
let pid, status = Forkhelpers.waitpid pid in
( match status with
| Unix.WEXITED n ->
Expand All @@ -67,4 +76,4 @@ let exec_cmd (module D : Debug.DEBUG) ~cmdstring ~(f : string -> 'a option) =
| Unix.WSTOPPED s ->
D.debug "Process %d was stopped by signal %a" pid Debug.Pp.signal s
) ;
List.rev !vals
(stdout, stderr)
14 changes: 10 additions & 4 deletions ocaml/xcp-rrdd/lib/plugin/utils.mli
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,13 @@ val list_directory_entries_unsafe : string -> string list
(** List the contents of a directory, not including . and .. *)

val exec_cmd :
(module Debug.DEBUG) -> cmdstring:string -> f:(string -> 'a option) -> 'a list
(** [exec_cmd cmd f] executes [cmd], applies [f] on each of the lines which
[cmd] outputs on stdout, and returns a list of resulting values for which
applying [f] returns [Some value]. *)
(module Debug.DEBUG)
-> cmdstring:string
-> read_out_line:(string -> 'a option)
-> read_err_line:(string -> 'b option)
-> 'a list * 'b list
(** [exec_cmd cmd out_line err_line] executes [cmd], applies [read_out_line] to
each of the lines which [cmd] outputs on stdout, applies [read_err_line] to
each of the lines which [cmd] outputs on stderr, and returns a tuple of
list with each of the values that the [read_out_line] and [read_err_line]
returned [Some value]. *)

0 comments on commit e24a5cc

Please sign in to comment.