Skip to content

Commit

Permalink
stm32h7-rng: rework driver, add seed error recovery
Browse files Browse the repository at this point in the history
We have a machine in manufacturing that gets periodic seed errors. The
manual specifies a recovery procedure for seed errors... which we
haven't implemented.

This commit makes us more tolerant of seed errors by attempting
recovery, but not twice in a row. Two seed errors in a row will still
return an error.

This also adds a ringbuf so we can monitor frequency. The ringbuf
increases RAM usage enough that it bumps past the 512-byte boundary, so
I removed all the lines that limit it to 512 (life is too short).
  • Loading branch information
cbiffle committed Dec 12, 2024
1 parent 2c8ac9b commit 686a45b
Show file tree
Hide file tree
Showing 8 changed files with 146 additions and 64 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion app/demo-stm32h7-nucleo/app-h743.toml
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,6 @@ start = true
features = ["h743"]
priority = 3
name = "drv-stm32h7-rng"
max-sizes = {flash = 8192, ram = 512}
stacksize = 256
start = true
task-slots = ["sys", "user_leds"]
Expand Down
1 change: 0 additions & 1 deletion app/demo-stm32h7-nucleo/app-h753.toml
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,6 @@ start = true
features = ["h753"]
priority = 3
name = "drv-stm32h7-rng"
max-sizes = {flash = 8192, ram = 512}
stacksize = 256
start = true
task-slots = ["sys", "user_leds"]
Expand Down
1 change: 0 additions & 1 deletion app/gemini-bu/app.toml
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,6 @@ start = true
features = ["h753"]
name = "drv-stm32h7-rng"
priority = 6
max-sizes = {flash = 8192, ram = 512}
uses = ["rng"]
start = true
stacksize = 256
Expand Down
1 change: 0 additions & 1 deletion app/gimletlet/base-gimletlet2.toml
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ start = true
features = ["h753"]
name = "drv-stm32h7-rng"
priority = 6
max-sizes = {flash = 8192, ram = 512}
uses = ["rng"]
start = true
stacksize = 256
Expand Down
1 change: 0 additions & 1 deletion app/sidecar/base.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ owner = {name = "sprot", notification = "rot_irq"}
features = ["h753"]
name = "drv-stm32h7-rng"
priority = 6
max-sizes = {flash = 8192, ram = 512}
uses = ["rng"]
start = true
stacksize = 256
Expand Down
1 change: 1 addition & 0 deletions drv/stm32h7-rng/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ zerocopy = { workspace = true }
drv-rng-api = { path = "../rng-api" }
drv-stm32xx-sys-api = { path = "../stm32xx-sys-api" }
userlib = { path = "../../sys/userlib", features = ["panic-messages"] }
ringbuf = { path = "../../lib/ringbuf" }

[build-dependencies]
idol = { workspace = true }
Expand Down
203 changes: 144 additions & 59 deletions drv/stm32h7-rng/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,73 +23,134 @@ use userlib::*;

task_slot!(SYS, sys);

ringbuf::ringbuf!(Trace, 32, Trace::Blank);

#[derive(Copy, Clone, Debug, Eq, PartialEq)]
enum Trace {
Blank,
ClockError,
SeedError,
}

struct Stm32h7Rng {
cr: &'static device::rng::CR,
dr: &'static device::rng::DR,
sr: &'static device::rng::SR,
regs: &'static device::rng::RegisterBlock,
sys: Sys,
}

impl Stm32h7Rng {
fn new() -> Self {
let registers = unsafe { &*device::RNG::ptr() };
Stm32h7Rng {
cr: &registers.cr,
dr: &registers.dr,
sr: &registers.sr,
regs: registers,
sys: Sys::from(SYS.get_task_id()),
}
}

fn init(&mut self) -> Result<(), RngError> {
fn init(&mut self) {
// Reset the RNG, so that if we restart, so does the hardware.
self.sys.enter_reset(Peripheral::Rng);
self.sys.leave_reset(Peripheral::Rng);

// Turn on the RNG's clock and bus connection.
self.sys.enable_clock(Peripheral::Rng);
self.enable_rng();
if self.is_clock_error() {
let err = RngError::ClockError;
return Err(err);
}

Ok(())
// Turn it on. This _starts_ the initialization process, which takes
// time on the order of microseconds. The process ends with setting
// either the DRDY bit or an ERROR bit in SR, which we'll notice when we
// attempt to pull data out of it.
self.regs.cr.modify(|_, w| w.rngen().set_bit());
}

fn read(&mut self) -> Result<u32, RngError> {
let mut retries = 10;
while !self.is_data_ready() && retries > 0 {
for _retry in 0..10 {
// Sample our status register.
let sr = self.regs.sr.read();

// We do not expect clock errors. The clock configuration is static,
// and clock errors can only occur if the RNG kernel clock is
// waaaaay below the AHB clock, which we don't do. But, we're
// interested in finding out they _were happening_ if they happen,
// so:
if sr.ceis().bit_is_set() {
// TODO this should be an ereport
ringbuf::ringbuf_entry!(Trace::ClockError);
// Clear it so we don't repeat ourselves. The two writable bits
// in this register are write-one-to-preserve, so weirdly we
// want to write SEIS to 1 here.
self.regs.sr.modify(|_, w| {
w.seis().set_bit();
w.ceis().clear_bit();
w
});
}

// If an error occurred, return it so that our caller can attempt
// recovery.
if sr.seis().bit_is_set() {
// TODO this should be an ereport
ringbuf::ringbuf_entry!(Trace::SeedError);
// Clear it so we don't repeat ourselves. The two writable bits
// in this register are write-one-to-preserve, so weirdly we
// want to write CEIS to 1 here.
self.regs.sr.modify(|_, w| {
w.ceis().set_bit();
w.seis().clear_bit();
w
});
return Err(RngError::SeedError);
}
// If data is ready, we can yield it.
if sr.drdy().bit_is_set() {
let data = self.regs.dr.read().rndata().bits();
// There's a note in section 34.3.5 of the reference manual that
// reads:
//
// > When data is not ready (DRDY = 0) RNG_DR returns zero. It
// > is recommended to always verify that RNG_DR is different
// > from zero. Because when it is the case a seed error
// > occurred between RNG_SR polling and RND_DR output reading
// > (rare event).
//
// Why it's not sufficient to check SR.SEIS after reading DR, I
// couldn't tell you. But we'll implement their weird
// recommendation.
if data != 0 {
return Ok(data);
}
}

// Otherwise, keep trying after waiting a tick.
hl::sleep_for(1);
retries -= 1;
}
if !self.is_data_ready() {
return Err(RngError::NoData);
}
if self.is_seed_error() {
return Err(RngError::SeedError);
}
Ok(self.dr.read().rndata().bits())
}

fn enable_rng(&self) {
self.cr.modify(|_, w| w.rngen().set_bit());
Err(RngError::NoData)
}

fn is_clock_error_detect(&self) -> bool {
self.cr.read().ced().bits()
}

fn is_clock_error(&self) -> bool {
// if clock error detection is disabled, CECS & CEIS won't be valid
if self.is_clock_error_detect() {
return self.sr.read().cecs().bits()
|| self.sr.read().ceis().bits();
fn attempt_recovery(&mut self) -> Result<(), RngError> {
// The recovery procedure is in section 34.3.7 of the manual.
//
// First, we clear the SEIS flag. The CEIS and SEIS flags are both
// rc_w0, meaning writing 1 has no effect, and 0 clears. So to clear
// SEIS, we have to _set_ CEIS too.
self.regs.sr.write(|w| {
w.seis().clear_bit();
w.ceis().set_bit();
w
});

// Next, we read 12 words to clear the pipeline. The number of words is
// 12. Not 11, not 10.
for _ in 0..12 {
let _ = self.regs.dr.read();
}
false
}

fn is_data_ready(&self) -> bool {
self.sr.read().drdy().bits()
}

fn is_seed_error(&self) -> bool {
self.sr.read().secs().bits() || self.sr.read().seis().bits()
// Finally, we check whether SEIS is clear.
if self.regs.sr.read().seis().bit_is_clear() {
// Yay!
Ok(())
} else {
Err(RngError::SeedError)
}
}
}

Expand All @@ -109,23 +170,47 @@ impl idl::InOrderRngImpl for Stm32h7RngServer {
_: &RecvMessage,
dest: idol_runtime::Leased<idol_runtime::W, [u8]>,
) -> Result<usize, RequestError<RngError>> {
let len = dest.len();
let mut cnt = 0;
for _ in 0..(dest.len() / 4) {
let ent = self.rng.read()?;
dest.write_range(cnt..cnt + 4, &ent.to_ne_bytes()[0..4])
.map_err(|_| RequestError::Fail(ClientError::WentAway))?;
cnt += 4;
}

let remain = dest.len() - cnt;
if remain > 4 {
panic!("RNG state machine bork");
}
if remain > 0 {
let ent = self.rng.read()?;
dest.write_range(cnt..dest.len(), &ent.to_ne_bytes()[0..remain])
.map_err(|_| RequestError::Fail(ClientError::WentAway))?;
cnt += remain;
// Keep track of whether we're making progress for recovery purposes. We
// start this at 'true' so that, if we detect a seed error at the start
// of this function, we'll try to recover at least once.
let mut made_progress = true;

while cnt < len {
match self.rng.read() {
Ok(word) => {
// We have 32 shiny bits of data. Prepare to write that
// much, or less if we're at the tail, to the client.
let word_bytes = &word.to_ne_bytes();
let chunk_size = usize::min(len - cnt, 4);
dest.write_range(
cnt..cnt + chunk_size,
&word_bytes[..chunk_size],
)
.map_err(|_| RequestError::Fail(ClientError::WentAway))?;
cnt += chunk_size;

// Note that we successfully did a thing, for recovery
// purposes.
made_progress = true;
}
Err(RngError::SeedError) if made_progress => {
// It may be worth attempting recovery. Recovery may not
// succeed, in which case this'll just bomb right out of
// here and send an error to the client.
self.rng.attempt_recovery()?;

// Clear the progress flag so that if another seed error
// happens immediately, we'll exit.
made_progress = false;
}
Err(e) => {
// We have no way to recover from the other cases.
return Err(e.into());
}
}
}

Ok(cnt)
Expand All @@ -146,7 +231,7 @@ impl NotificationHandler for Stm32h7RngServer {
#[export_name = "main"]
fn main() -> ! {
let mut rng = Stm32h7Rng::new();
rng.init().expect("init failed");
rng.init();

let mut srv = Stm32h7RngServer::new(rng);
let mut buffer = [0u8; idl::INCOMING_SIZE];
Expand Down

0 comments on commit 686a45b

Please sign in to comment.