-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(maker): Monitor and expose status of services connected
Monitor the following services: - coordinator - orderbook (we don't subscribe to orderbook feed yet) in order to derive health status of the maker. Health status is exposed via the HTTP API as well as reported as dedicated prometheus metrics. TODO: Subscribe to orderbook websocket stream to be able to detect if it is down TODO: Subcribe to bitmex client stream to be able to spot outdated information almost there, just exposing HTTP API I still need to show quickly how to return different HTTP errors based on health status
- Loading branch information
1 parent
1df5c45
commit 943dfaa
Showing
5 changed files
with
221 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
// TODO: Add health collection metrics | ||
// This can be later used to add health metrics to the health endpoint | ||
|
||
use anyhow::bail; | ||
use anyhow::Context; | ||
use anyhow::Result; | ||
use reqwest::Client; | ||
use reqwest::StatusCode; | ||
use reqwest::Url; | ||
use std::time::Duration; | ||
use tokio::sync::watch; | ||
|
||
/// Health status of a service | ||
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] | ||
pub enum ServiceStatus { | ||
#[default] | ||
Unknown, | ||
Online, | ||
Offline, | ||
} | ||
|
||
/// Health monitoring for the node | ||
/// | ||
/// Simple endpoint querying is handled by provided configuration, for more complex health checks | ||
/// the transmitters are exposed to be plugged in the services that need to publish their health | ||
/// status. | ||
#[derive(Clone)] | ||
pub struct Health { | ||
/// Coordinator HTTP API status | ||
coordinator_rx: watch::Receiver<ServiceStatus>, | ||
/// Orderbook websocket stream status | ||
orderbook_rx: watch::Receiver<ServiceStatus>, | ||
} | ||
|
||
/// Transmitters that need to be plugged in the services that need to publish their health status. | ||
pub struct Tx { | ||
pub orderbook: watch::Sender<ServiceStatus>, | ||
pub coordinator: watch::Sender<ServiceStatus>, | ||
} | ||
|
||
impl Health { | ||
pub fn new() -> (Self, Tx) { | ||
let (orderbook_tx, orderbook_rx) = watch::channel(ServiceStatus::Unknown); | ||
let (coordinator_tx, coordinator_rx) = watch::channel(ServiceStatus::Unknown); | ||
|
||
( | ||
Self { | ||
coordinator_rx, | ||
orderbook_rx, | ||
}, | ||
Tx { | ||
orderbook: orderbook_tx, | ||
coordinator: coordinator_tx, | ||
}, | ||
) | ||
} | ||
|
||
// TODO: Any ideas of the most useful way of sending this information? | ||
// - perhaps a serialized struct (json) with the status of each service? | ||
pub fn get_health(&self) -> Result<String> { | ||
let mut health_info = String::new(); | ||
health_info.push_str(&format!( | ||
"Coordinator: {:?}\n", | ||
self.get_coordinator_status() | ||
)); | ||
health_info.push_str(&format!("Orderbook: {:?}\n", self.get_orderbook_status())); | ||
|
||
if self.get_coordinator_status() == ServiceStatus::Online | ||
// TODO: Uncomment this line when we retrieve the orderbook status | ||
// && (self.get_orderbook_status() == ServiceStatus::Online) | ||
{ | ||
Ok(health_info) | ||
} else { | ||
bail!("Status: ERROR\n + {health_info}"); | ||
} | ||
} | ||
|
||
pub fn get_coordinator_status(&self) -> ServiceStatus { | ||
*self.coordinator_rx.borrow() | ||
} | ||
|
||
pub fn get_orderbook_status(&self) -> ServiceStatus { | ||
*self.orderbook_rx.borrow() | ||
} | ||
// TODO: Add bitmex status? would anything else be useful here? | ||
} | ||
|
||
/// Simple way of checking if a service is online or offline | ||
pub async fn check_health_endpoint( | ||
client: &Client, | ||
endpoint: Url, | ||
tx: watch::Sender<ServiceStatus>, | ||
interval: Duration, | ||
) { | ||
loop { | ||
let status = if check_endpoint_availability(client, endpoint.clone()) | ||
.await | ||
.is_ok() | ||
{ | ||
ServiceStatus::Online | ||
} else { | ||
ServiceStatus::Offline | ||
}; | ||
|
||
tx.send(status).expect("Receiver not to be dropped"); | ||
tokio::time::sleep(interval).await; | ||
} | ||
} | ||
|
||
async fn check_endpoint_availability(client: &Client, endpoint: Url) -> Result<StatusCode> { | ||
tracing::trace!(%endpoint, "Sending request to check health"); | ||
let response = client | ||
.get(endpoint) | ||
.send() | ||
.await | ||
.context("could not send request")? | ||
.error_for_status()?; | ||
Ok(response.status()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters