-
Notifications
You must be signed in to change notification settings - Fork 26
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Journal probe rework #7
Changes from 8 commits
d4261b5
8780f3c
1f2083b
0adfc63
9dcdc0d
9834e9f
7dee764
4e79ea1
bf3dc47
15bdcf0
35b6ca2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
/* | ||
* This program is part of the Clear Linux Project | ||
* | ||
* Copyright 2015 Intel Corporation | ||
* Copyright 2015-2017 Intel Corporation | ||
* | ||
* This program is free software; you can redistribute it and/or modify it under | ||
* the terms and conditions of the GNU Lesser General Public License, as | ||
|
@@ -61,11 +61,11 @@ static inline void tm_journal_match_err(int ret) | |
static void add_to_payload(const void *data, size_t length) | ||
{ | ||
if (payload != NULL) { | ||
g_string_append_printf(payload, "%.*s", (int)length, | ||
g_string_append_printf(payload, "%.*s\n", (int)length, | ||
(char *)data); | ||
} else { | ||
payload = g_string_new(NULL); | ||
g_string_printf(payload, "%.*s", (int)length, | ||
g_string_printf(payload, "%.*s\n", (int)length, | ||
(char *)data); | ||
} | ||
} | ||
|
@@ -77,7 +77,7 @@ static bool send_data(char *class) | |
|
||
if ((ret = tm_create_record(&handle, severity, class, | ||
payload_version)) < 0) { | ||
telem_log(LOG_ERR, "Failed to create record: %s", | ||
telem_log(LOG_ERR, "Failed to create record: %s\n", | ||
strerror(-ret)); | ||
goto fail; | ||
} | ||
|
@@ -86,7 +86,7 @@ static bool send_data(char *class) | |
payload = NULL; | ||
|
||
if ((ret = tm_set_payload(handle, (char *)payload_str)) < 0) { | ||
telem_log(LOG_ERR, "Failed to set payload: %s", strerror(-ret)); | ||
telem_log(LOG_ERR, "Failed to set payload: %s\n", strerror(-ret)); | ||
free(payload_str); | ||
tm_free_record(handle); | ||
goto fail; | ||
|
@@ -95,7 +95,7 @@ static bool send_data(char *class) | |
free(payload_str); | ||
|
||
if ((ret = tm_send_record(handle)) < 0) { | ||
telem_log(LOG_ERR, "Failed to send record: %s", strerror(-ret)); | ||
telem_log(LOG_ERR, "Failed to send record: %s\n", strerror(-ret)); | ||
tm_free_record(handle); | ||
goto fail; | ||
} | ||
|
@@ -122,6 +122,16 @@ static int read_new_entries(sd_journal *journal) | |
|
||
add_to_payload(data, length); | ||
|
||
// For now, we send one record per log message, in case the | ||
// there is a large backlog of messages and we exceed the | ||
// payload size limit (8KB). And ignore errors, hoping that it's | ||
// a transient problem. | ||
|
||
if (!send_data(error_class)) { | ||
telem_log(LOG_ERR, "Failed to send data. Ignoring.\n"); | ||
return num_entries; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So I am a little worried about this as the function doesn't seem to take a last tried value so I'm unsure how well this would handle an error in the middle of sending messages and resuming from that point later on. |
||
} | ||
|
||
num_entries++; | ||
} | ||
|
||
|
@@ -148,19 +158,14 @@ static bool process_existing_entries(sd_journal *journal) | |
return false; | ||
} | ||
|
||
if (!read_new_entries(journal)) { | ||
ret = read_new_entries(journal); | ||
if (ret < 0) { | ||
return false; | ||
} | ||
|
||
if (!payload) { | ||
} else if (ret == 0) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah if I would have read a little further on I'd have seen it. So I'm wondering if before returning when send_data fails, we could put the data back onto the journal so it can be attempted to be read next time. Also ret could be 0 when send_data fails so the no existing entries log message would be wrong. I'd return -1 in the send_data case and hopefully there is a way to get data back into the journal after pulling it out. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is an excellent observation. I wasn't considering how the probe should behave in the event of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, I think that I want to continue ignoring But I will revisit the error handling here later. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll remove the LOG_DEBUG you mentioned. |
||
telem_log(LOG_DEBUG, "No existing entries found\n"); | ||
return true; | ||
} | ||
|
||
if (!send_data(error_class)) { | ||
return false; | ||
} | ||
|
||
return true; | ||
} | ||
|
||
|
@@ -185,6 +190,33 @@ static bool get_boot_id(char **data) | |
return true; | ||
} | ||
|
||
#define JOURNAL_MATCH(data) \ | ||
do { \ | ||
r = sd_journal_add_match(journal, data, 0); \ | ||
if (r < 0) { \ | ||
tm_journal_match_err(r); \ | ||
return false; \ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Always a little scary when the macro can return in some cases but in this case it seems reasonable enough. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. In this case, |
||
} \ | ||
} while (0); | ||
|
||
#define JOURNAL_AND \ | ||
do { \ | ||
r = sd_journal_add_conjunction(journal); \ | ||
if (r < 0) { \ | ||
tm_journal_match_err(r); \ | ||
return false; \ | ||
} \ | ||
} while (0); | ||
|
||
#define JOURNAL_OR \ | ||
do { \ | ||
r = sd_journal_add_disjunction(journal); \ | ||
if (r < 0) { \ | ||
tm_journal_match_err(r); \ | ||
return false; \ | ||
} \ | ||
} while (0); | ||
|
||
static bool add_filters(sd_journal *journal) | ||
{ | ||
char *data = NULL; | ||
|
@@ -194,24 +226,16 @@ static bool add_filters(sd_journal *journal) | |
return false; | ||
} | ||
|
||
r = sd_journal_add_match(journal, data, 0); | ||
if (r < 0) { | ||
tm_journal_match_err(r); | ||
return false; | ||
} | ||
JOURNAL_MATCH(data); | ||
free(data); | ||
|
||
r = sd_journal_add_match(journal, "SYSLOG_IDENTIFIER=crashprobe", 0); | ||
if (r < 0) { | ||
tm_journal_match_err(r); | ||
return false; | ||
} | ||
|
||
r = sd_journal_add_match(journal, "PRIORITY=3", 0); | ||
if (r < 0) { | ||
tm_journal_match_err(r); | ||
return false; | ||
} | ||
JOURNAL_AND; | ||
// The three highest log levels, all indicating errors | ||
JOURNAL_MATCH("PRIORITY=1"); | ||
JOURNAL_MATCH("PRIORITY=2"); | ||
JOURNAL_MATCH("PRIORITY=3"); | ||
JOURNAL_OR; | ||
// Only set for service-level error conditions | ||
JOURNAL_MATCH("EXIT_CODE=exited"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So this confuses me a little. I'd expect exited to be a possible issue assuming non zero exit but looking at: dumped I'd assume we may want to look at the dumped and killed case potentially too. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, interesting. In my testing, I only encountered "exited", but I didn't inspect the source to see what other values it accepts. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Filed an issue for this (#13) |
||
|
||
return true; | ||
} | ||
|
@@ -258,10 +282,6 @@ static bool process_journal(void) | |
} else if (r < 0) { | ||
return false; | ||
} | ||
|
||
if (!send_data(error_class)) { | ||
return false; | ||
} | ||
} | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is fine as we can try to put things back together server side. Though could we have a message-id + optional message part X/Y type thing? It would really make things easier.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was thinking of using the telemetry machine-id + client record timestamp fields to piece together a series of journal probe records. Would that be acceptable?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah yea that should work just as well. Drinking from the firehouse of the full log will always be a little tricky.