From 72ea0d93603db8ec98a1a4e11f2c0b10315b7e4f Mon Sep 17 00:00:00 2001 From: Juro Bystricky Date: Mon, 25 Nov 2019 07:47:00 -0800 Subject: [PATCH] bertprobe: fix failures and logging Fix a reported problem: https://github.com/clearlinux/telemetrics-client/issues/170 This patch fixes the case when there is no error reported via BERT data and yet the probe fails with error such as: "Failed to read payload from: /sys/firmware/acpi/tables/data/BERT" Which then results in journalprobe reporting: "bert-probe.service: Main process exited, code=exited, status=1/FAILURE" The file BERT data file now has root only read permissions: $ ls -al /sys/firmware/acpi/tables/data/BERT -r-------- 1 root root 32768 Nov 25 07:37 /sys/firmware/acpi/tables/data/BERT Hence the user "telemetry" cannot read it anymore. In addition, the file size can be more than max payload size (4k). However, the file can be completely blank. The probe inspects the block_status for any present errors. If there aren't any, probe does not report anything. While in there, for consistency reasons, modified "printf" statements with error messages to telem_log(LOG_ERR, msg) Signed-off-by: Juro Bystricky --- src/data/bert-probe.service.in | 2 +- src/probes/bert_probe.c | 90 +++++++++++++++++++++++++++++++--- 2 files changed, 84 insertions(+), 8 deletions(-) diff --git a/src/data/bert-probe.service.in b/src/data/bert-probe.service.in index 12f81a6..fa03089 100644 --- a/src/data/bert-probe.service.in +++ b/src/data/bert-probe.service.in @@ -7,7 +7,7 @@ ConditionPathExists=/sys/firmware/acpi/tables/data/BERT [Service] ExecStart=@bindir@/bertprobe -User=telemetry +User=root [Install] WantedBy=multi-user.target diff --git a/src/probes/bert_probe.c b/src/probes/bert_probe.c index 9831fcf..6c8a38d 100644 --- a/src/probes/bert_probe.c +++ b/src/probes/bert_probe.c @@ -40,6 +40,77 @@ void print_usage(char *prog) printf(" -V, --version Print the program version\n"); } +struct acpi_generic_error_data_entry { + uint8_t section_type[16]; + uint32_t error_severity; + uint16_t revision; + uint8_t validation_bits; + uint8_t flags; + uint32_t error_data_length; + uint8_t fru_ld[16]; + uint8_t fru_text[16]; + uint64_t timestamp; + uint8_t data[1]; // error_data_length +} __attribute__((packed)); + +struct acpi_generic_error_status_block { + // Bit [0] - Uncorrectable Error Valid + // Bit [1] - Correctable Error Valid + // Bit [2] - Multiple Uncorrectable Errors + // Bit [3] - Multiple Correctable Errors + // Bit [13:4] - Error Data Entry Count: Number of Error Data Entries found in the Data section. + // Bit [31:14] - Reserved + uint32_t block_status; + uint32_t raw_data_offset; + // Length in bytes of the generic error data + uint32_t data_length; + uint32_t error_severity; + // The information contained in this field is a collection of zero + // or more Generic Error Data Entrie + struct acpi_generic_error_data_entry data[]; +} __attribute__((packed)); + +static int check_bert_file(const char *filename, char *buff, size_t buff_size) { + + int ret = EXIT_FAILURE; + size_t len; + uint32_t block_status; + FILE *fh = fopen(filename, "rb"); + + if (fh == NULL) { + telem_log(LOG_ERR, "Failed to open: %s\n", filename); + goto done; + } + + len = fread(&block_status, 1, sizeof(block_status), fh); + if (ferror(fh) || (len != sizeof(block_status))){ + telem_log(LOG_ERR, "Error reading block_status: %s\n", filename); + goto done; + } + + /* Check the "Error Data Entry Count", if 0 then nothing + * to report */ + + if ((block_status & 0x3FFF) != 0) { + rewind(fh); + + /* nc_b64enc_file returns 1 in success and 0 in failure */ + if (nc_b64enc_file(fh, buff, buff_size) == 0) { + telem_log(LOG_ERR, "Failed to read payload from: %s\n", filename); + goto done; + } + } + + ret = EXIT_SUCCESS; +done: + if (fh) { + fclose(fh); + } + + return ret; +} + + int main(int argc, char **argv) { struct telem_ref *tm_handle = NULL; @@ -79,29 +150,34 @@ int main(int argc, char **argv) payload = calloc(sizeof(char), MAX_PAYLOAD_LENGTH); if (!payload) { - printf("Unable to allocate more memory.\n"); + telem_log(LOG_ERR, "Unable to allocate more memory.\n"); return -ENOMEM; } - if (!(ret = nc_b64enc_filename(bert_record_file, payload, MAX_PAYLOAD_LENGTH))) { - printf("Failed to read payload from: %s\n", bert_record_file); - ret = EXIT_FAILURE; + ret = check_bert_file(bert_record_file, payload, MAX_PAYLOAD_LENGTH); + if (ret == EXIT_FAILURE) { goto done; + } else { + if (payload[0] == 0) { + /* Nothing to report */ + goto done; + } } + /* Valid payload, send the record */ if ((ret = tm_create_record(&tm_handle, severity, classification, payload_version)) < 0) { - printf("Failed to create record: %s\n", strerror(-ret)); + telem_log(LOG_ERR, "Failed to create record: %s\n", strerror(-ret)); goto done; } if ((ret = tm_set_payload(tm_handle, payload)) < 0) { - printf("Failed to set record payload: %s\n", strerror(-ret)); + telem_log(LOG_ERR, "Failed to set record payload: %s\n", strerror(-ret)); goto done; } if ((ret = tm_send_record(tm_handle)) < 0) { - printf("Failed to send record to daemon: %s\n", strerror(-ret)); + telem_log(LOG_ERR, "Failed to send record to daemon: %s\n", strerror(-ret)); goto done; }