Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(system_monitor): add a hostname for diagnostic information #1636

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions system/system_monitor/src/cpu_monitor/cpu_monitor_base.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,12 @@ CPUMonitorBase::CPUMonitorBase(const std::string & node_name, const rclcpp::Node
mpstat_exists_ = (p.empty()) ? false : true;

updater_.setHardwareID(hostname_);
updater_.add("CPU Temperature", this, &CPUMonitorBase::checkTemp);
updater_.add("CPU Usage", this, &CPUMonitorBase::checkUsage);
updater_.add("CPU Load Average", this, &CPUMonitorBase::checkLoad);
updater_.add("CPU Thermal Throttling", this, &CPUMonitorBase::checkThrottling);
updater_.add("CPU Frequency", this, &CPUMonitorBase::checkFrequency);
updater_.add(std::string(hostname_) + ": CPU Temperature", this, &CPUMonitorBase::checkTemp);
updater_.add(std::string(hostname_) + ": CPU Usage", this, &CPUMonitorBase::checkUsage);
updater_.add(std::string(hostname_) + ": CPU Load Average", this, &CPUMonitorBase::checkLoad);
updater_.add(
std::string(hostname_) + ": CPU Thermal Throttling", this, &CPUMonitorBase::checkThrottling);
updater_.add(std::string(hostname_) + ": CPU Frequency", this, &CPUMonitorBase::checkFrequency);

// Publisher
rclcpp::QoS durable_qos{1};
Expand Down
12 changes: 7 additions & 5 deletions system/system_monitor/src/gpu_monitor/gpu_monitor_base.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,13 @@ GPUMonitorBase::GPUMonitorBase(const std::string & node_name, const rclcpp::Node
gethostname(hostname_, sizeof(hostname_));

updater_.setHardwareID(hostname_);
updater_.add("GPU Temperature", this, &GPUMonitorBase::checkTemp);
updater_.add("GPU Usage", this, &GPUMonitorBase::checkUsage);
updater_.add("GPU Memory Usage", this, &GPUMonitorBase::checkMemoryUsage);
updater_.add("GPU Thermal Throttling", this, &GPUMonitorBase::checkThrottling);
updater_.add("GPU Frequency", this, &GPUMonitorBase::checkFrequency);
updater_.add(std::string(hostname_) + ": GPU Temperature", this, &GPUMonitorBase::checkTemp);
updater_.add(std::string(hostname_) + ": GPU Usage", this, &GPUMonitorBase::checkUsage);
updater_.add(
std::string(hostname_) + ": GPU Memory Usage", this, &GPUMonitorBase::checkMemoryUsage);
updater_.add(
std::string(hostname_) + ": GPU Thermal Throttling", this, &GPUMonitorBase::checkThrottling);
updater_.add(std::string(hostname_) + ": GPU Frequency", this, &GPUMonitorBase::checkFrequency);
}

void GPUMonitorBase::update()
Expand Down
26 changes: 16 additions & 10 deletions system/system_monitor/src/hdd_monitor/hdd_monitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,16 +51,22 @@ HddMonitor::HddMonitor(const rclcpp::NodeOptions & options)
getHddParams();

updater_.setHardwareID(hostname_);
updater_.add("HDD Temperature", this, &HddMonitor::checkSmartTemperature);
updater_.add("HDD PowerOnHours", this, &HddMonitor::checkSmartPowerOnHours);
updater_.add("HDD TotalDataWritten", this, &HddMonitor::checkSmartTotalDataWritten);
updater_.add("HDD RecoveredError", this, &HddMonitor::checkSmartRecoveredError);
updater_.add("HDD Usage", this, &HddMonitor::checkUsage);
updater_.add("HDD ReadDataRate", this, &HddMonitor::checkReadDataRate);
updater_.add("HDD WriteDataRate", this, &HddMonitor::checkWriteDataRate);
updater_.add("HDD ReadIOPS", this, &HddMonitor::checkReadIops);
updater_.add("HDD WriteIOPS", this, &HddMonitor::checkWriteIops);
updater_.add("HDD Connection", this, &HddMonitor::checkConnection);
updater_.add(
std::string(hostname_) + ": HDD Temperature", this, &HddMonitor::checkSmartTemperature);
updater_.add(
std::string(hostname_) + ": HDD PowerOnHours", this, &HddMonitor::checkSmartPowerOnHours);
updater_.add(
std::string(hostname_) + ": HDD TotalDataWritten", this,
&HddMonitor::checkSmartTotalDataWritten);
updater_.add(
std::string(hostname_) + ": HDD RecoveredError", this, &HddMonitor::checkSmartRecoveredError);
updater_.add(std::string(hostname_) + ": HDD Usage", this, &HddMonitor::checkUsage);
updater_.add(std::string(hostname_) + ": HDD ReadDataRate", this, &HddMonitor::checkReadDataRate);
updater_.add(
std::string(hostname_) + ": HDD WriteDataRate", this, &HddMonitor::checkWriteDataRate);
updater_.add(std::string(hostname_) + ": HDD ReadIOPS", this, &HddMonitor::checkReadIops);
updater_.add(std::string(hostname_) + ": HDD WriteIOPS", this, &HddMonitor::checkWriteIops);
updater_.add(std::string(hostname_) + ": HDD Connection", this, &HddMonitor::checkConnection);

// get HDD connection status
updateHddConnections();
Expand Down
4 changes: 2 additions & 2 deletions system/system_monitor/src/mem_monitor/mem_monitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@ MemMonitor::MemMonitor(const rclcpp::NodeOptions & options)
{
gethostname(hostname_, sizeof(hostname_));
updater_.setHardwareID(hostname_);
updater_.add("Memory Usage", this, &MemMonitor::checkUsage);
updater_.add(std::string(hostname_) + ": Memory Usage", this, &MemMonitor::checkUsage);

// Enable ECC error detection if edac-utils package is installed
if (!bp::search_path("edac-util").empty()) {
updater_.add("Memory ECC", this, &MemMonitor::checkEcc);
updater_.add(std::string(hostname_) + ": Memory ECC", this, &MemMonitor::checkEcc);
}
}

Expand Down
13 changes: 8 additions & 5 deletions system/system_monitor/src/net_monitor/net_monitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,14 @@ NetMonitor::NetMonitor(const rclcpp::NodeOptions & options)

gethostname(hostname_, sizeof(hostname_));
updater_.setHardwareID(hostname_);
updater_.add("Network Connection", this, &NetMonitor::check_connection);
updater_.add("Network Usage", this, &NetMonitor::check_usage);
updater_.add("Network Traffic", this, &NetMonitor::monitor_traffic);
updater_.add("Network CRC Error", this, &NetMonitor::check_crc_error);
updater_.add("IP Packet Reassembles Failed", this, &NetMonitor::check_reassembles_failed);
updater_.add(
std::string(hostname_) + ": Network Connection", this, &NetMonitor::check_connection);
updater_.add(std::string(hostname_) + ": Network Usage", this, &NetMonitor::check_usage);
updater_.add(std::string(hostname_) + ": Network Traffic", this, &NetMonitor::monitor_traffic);
updater_.add(std::string(hostname_) + ": Network CRC Error", this, &NetMonitor::check_crc_error);
updater_.add(
std::string(hostname_) + ": IP Packet Reassembles Failed", this,
&NetMonitor::check_reassembles_failed);

nl80211_.init();

Expand Down
2 changes: 1 addition & 1 deletion system/system_monitor/src/ntp_monitor/ntp_monitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ NTPMonitor::NTPMonitor(const rclcpp::NodeOptions & options)
chronyc_exists_ = (p.empty()) ? false : true;

updater_.setHardwareID(hostname_);
updater_.add("NTP Offset", this, &NTPMonitor::checkOffset);
updater_.add(std::string(hostname_) + ": NTP Offset", this, &NTPMonitor::checkOffset);

// Start timer to execute top command
timer_callback_group_ = this->create_callback_group(rclcpp::CallbackGroupType::MutuallyExclusive);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,17 @@ ProcessMonitor::ProcessMonitor(const rclcpp::NodeOptions & options)
gethostname(hostname_, sizeof(hostname_));

updater_.setHardwareID(hostname_);
updater_.add("Tasks Summary", this, &ProcessMonitor::monitorProcesses);
updater_.add(std::string(hostname_) + ": Tasks Summary", this, &ProcessMonitor::monitorProcesses);

for (index = 0; index < num_of_procs_; ++index) {
auto task = std::make_shared<DiagTask>(fmt::format("High-load Proc[{}]", index));
auto task = std::make_shared<DiagTask>(
fmt::format(std::string(hostname_) + ": High-load Proc[{}]", index));
load_tasks_.push_back(task);
updater_.add(*task);
}
for (index = 0; index < num_of_procs_; ++index) {
auto task = std::make_shared<DiagTask>(fmt::format("High-mem Proc[{}]", index));
auto task = std::make_shared<DiagTask>(
fmt::format(std::string(hostname_) + ": High-mem Proc[{}]", index));
memory_tasks_.push_back(task);
updater_.add(*task);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ VoltageMonitor::VoltageMonitor(const rclcpp::NodeOptions & options)
}
callback = &VoltageMonitor::checkVoltage;
}
updater_.add("CMOS Battery Status", this, callback);
updater_.add(std::string(hostname_) + ": CMOS Battery Status", this, callback);
}

void VoltageMonitor::checkVoltage(diagnostic_updater::DiagnosticStatusWrapper & stat)
Expand Down
Loading