Skip to content

Commit

Permalink
feat(pageserver): dump read path on missing key error
Browse files Browse the repository at this point in the history
Signed-off-by: Alex Chi Z <[email protected]>
  • Loading branch information
skyzh committed Jan 28, 2025
1 parent c54cd9e commit ce947a2
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 2 deletions.
9 changes: 9 additions & 0 deletions pageserver/src/tenant/storage_layer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -699,6 +699,15 @@ pub(crate) enum LayerId {
InMemoryLayerId(InMemoryLayerFileId),
}

impl std::fmt::Display for LayerId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
LayerId::PersitentLayerId(key) => write!(f, "{}", key),
LayerId::InMemoryLayerId(_) => write!(f, "<in-memory layer>"),
}
}
}

/// Uniquely identify a layer visit by the layer
/// and LSN floor (or start LSN) of the reads.
/// The layer itself is not enough since we may
Expand Down
68 changes: 66 additions & 2 deletions pageserver/src/tenant/timeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,9 @@ use super::{
use super::{debug_assert_current_span_has_tenant_and_timeline_id, AttachedTenantConf};
use super::{remote_timeline_client::index::IndexPart, storage_layer::LayerFringe};
use super::{
remote_timeline_client::RemoteTimelineClient, remote_timeline_client::WaitCompletionError,
storage_layer::ReadableLayer,
remote_timeline_client::RemoteTimelineClient,
remote_timeline_client::WaitCompletionError,
storage_layer::{LayerId, ReadableLayer},
};
use super::{
secondary::heatmap::{HeatMapLayer, HeatMapTimeline},
Expand Down Expand Up @@ -602,13 +603,58 @@ impl From<layer_manager::Shutdown> for GetVectoredError {
}
}

pub struct ReadPath {
keyspace: KeySpace,
lsn: Lsn,
path: Vec<(LayerId, KeySpace, Range<Lsn>)>,
}

impl ReadPath {
pub fn new(keyspace: KeySpace, lsn: Lsn) -> Self {
Self {
keyspace,
lsn,
path: Vec::new(),
}
}

pub fn record_read_op(
&mut self,
layer_to_read: &ReadableLayer,
keyspace_to_read: &KeySpace,
lsn_range: &Range<Lsn>,
) {
self.path.push((
layer_to_read.id(),
keyspace_to_read.clone(),
lsn_range.clone(),
));
}
}

impl std::fmt::Display for ReadPath {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(f, "Read path for {} at lsn {}:", self.keyspace, self.lsn)?;
for (idx, (layer_id, keyspace, lsn_range)) in self.path.iter().enumerate() {
writeln!(
f,
"{}: {} {}..{} {}",
idx, layer_id, lsn_range.start, lsn_range.end, keyspace
)?;
}
Ok(())
}
}

#[derive(thiserror::Error)]
pub struct MissingKeyError {
key: Key,
shard: ShardNumber,
cont_lsn: Lsn,
request_lsn: Lsn,
ancestor_lsn: Option<Lsn>,
/// Debug information about the read path if there's an error
read_path: Option<ReadPath>,
backtrace: Option<std::backtrace::Backtrace>,
}

Expand All @@ -625,10 +671,15 @@ impl std::fmt::Display for MissingKeyError {
"could not find data for key {} (shard {:?}) at LSN {}, request LSN {}",
self.key, self.shard, self.cont_lsn, self.request_lsn
)?;

if let Some(ref ancestor_lsn) = self.ancestor_lsn {
write!(f, ", ancestor {}", ancestor_lsn)?;
}

if let Some(ref read_path) = self.read_path {
write!(f, "\n{}", read_path)?;
}

if let Some(ref backtrace) = self.backtrace {
write!(f, "\n{}", backtrace)?;
}
Expand Down Expand Up @@ -1038,6 +1089,7 @@ impl Timeline {
request_lsn: lsn,
ancestor_lsn: None,
backtrace: None,
read_path: None,
})),
}
}
Expand Down Expand Up @@ -3334,6 +3386,12 @@ impl Timeline {

let mut cont_lsn = Lsn(request_lsn.0 + 1);

let mut read_path = if cfg!(feature = "testing") || cfg!(debug_assertions) {
Some(ReadPath::new(keyspace.clone(), request_lsn))
} else {
None
};

let missing_keyspace = loop {
if self.cancel.is_cancelled() {
return Err(GetVectoredError::Cancelled);
Expand All @@ -3349,6 +3407,7 @@ impl Timeline {
reconstruct_state,
&self.cancel,
ctx,
&mut read_path,
)
.await?;

Expand Down Expand Up @@ -3423,6 +3482,7 @@ impl Timeline {
request_lsn,
ancestor_lsn: Some(timeline.ancestor_lsn),
backtrace: None,
read_path,
}));
}

Expand Down Expand Up @@ -3452,6 +3512,7 @@ impl Timeline {
reconstruct_state: &mut ValuesReconstructState,
cancel: &CancellationToken,
ctx: &RequestContext,
read_path: &mut Option<ReadPath>,
) -> Result<TimelineVisitOutcome, GetVectoredError> {
let mut unmapped_keyspace = keyspace.clone();
let mut fringe = LayerFringe::new();
Expand Down Expand Up @@ -3531,6 +3592,9 @@ impl Timeline {
}

if let Some((layer_to_read, keyspace_to_read, lsn_range)) = fringe.next_layer() {
if let Some(read_path) = read_path {
read_path.record_read_op(&layer_to_read, &keyspace_to_read, &lsn_range);
}
let next_cont_lsn = lsn_range.start;
layer_to_read
.get_values_reconstruct_data(
Expand Down

0 comments on commit ce947a2

Please sign in to comment.