-
-
Notifications
You must be signed in to change notification settings - Fork 132
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix
Lexer::clone
leak and UB + tests
`Lexer::clone` shouldn't clone the inner `ManuallyDrop`, because doing so clones the inner value, which is moved out in `Lexer::next`. This causes use-after-free if the lexer is cloned after the last-returned token is dropped, especially if the token contains an overridden implementation of `Clone` (such as `Rc`) that tries to read the dropped data. It causes a memory leak if the token contains a heap-allocated value, because cloning makes a new allocation. This allocation is in the `ManuallyDrop` and it's guaranteed to be overridden before the call to `ManuallyDrop::take`, so it's never freed. Another thing: #263 (make `Lexer` implement `Copy`) probably should be added (referencing here because it looks like the issue has been forgotten).
- Loading branch information
Showing
3 changed files
with
181 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
use std::cell::Cell; | ||
|
||
use logos::Logos as _; | ||
use logos_derive::Logos; | ||
|
||
#[derive(Logos, Clone, Debug, PartialEq)] | ||
pub enum Token { | ||
#[token("a", |_| Evil::default())] | ||
Evil(Evil), | ||
} | ||
|
||
#[derive(Debug, Default, PartialEq)] | ||
pub struct Evil(Box<Cell<u8>>); | ||
|
||
impl Clone for Evil { | ||
fn clone(&self) -> Self { | ||
self.0.set(self.0.get() + 1); | ||
Self::default() | ||
} | ||
} | ||
|
||
#[test] | ||
fn test_it_works_without_cloning() { | ||
let mut lexer = Token::lexer("aaa"); | ||
assert_eq!(lexer.next(), Some(Ok(Token::Evil(Evil::default())))); | ||
assert_eq!(lexer.next(), Some(Ok(Token::Evil(Evil::default())))); | ||
assert_eq!(lexer.next(), Some(Ok(Token::Evil(Evil::default())))); | ||
assert_eq!(lexer.next(), None); | ||
} | ||
|
||
#[test] | ||
fn test_clone_ub() { | ||
let mut lexer = Token::lexer("a"); | ||
assert_eq!(lexer.next(), Some(Ok(Token::Evil(Evil::default())))); | ||
|
||
// In logos 0.14.0, this causes use-after-free (UB), | ||
// because `Clone` dereferences the value returned by the last call to `lexer.next()`, | ||
// which got deallocated. | ||
// A real-life example where this could happen is with `Rc`. | ||
// Note that it may still pass `cargo test`, it will fail with Miri. | ||
let mut lexer2 = lexer.clone(); | ||
|
||
assert_eq!(lexer2.next(), None); | ||
} | ||
|
||
#[test] | ||
fn test_clone_leak() { | ||
let mut lexer = Token::lexer("a"); | ||
let Some(Ok(Token::Evil(evil))) = lexer.next() else { | ||
panic!("Expected Token::Evil"); | ||
}; | ||
assert_eq!(evil.0.get(), 0); | ||
|
||
// In logos 0.14.0, this causes a memory leak because `evil` is cloned with `lexer`. | ||
// This produces `evil.0.get() == 1`. It will fail even on `cargo test`. | ||
let mut lexer2 = lexer.clone(); | ||
assert_eq!(evil.0.get(), 0); | ||
|
||
assert_eq!(lexer2.next(), None); | ||
let _ = evil.clone(); | ||
assert_eq!(evil.0.get(), 1); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
use logos::{Lexer, Logos as _}; | ||
use logos_derive::Logos; | ||
|
||
#[derive(Logos, Clone, Debug, PartialEq)] | ||
#[logos(skip " ")] | ||
pub enum Token { | ||
#[regex(r#""([^"\\]+|\\.)*""#, lex_single_line_string)] | ||
String(String), | ||
} | ||
|
||
#[test] | ||
fn test_it_works_without_cloning() { | ||
let mut lexer = Token::lexer(r#""Hello, world!" "foo😀bar\nbaz \x3F\u{1234}""#); | ||
assert_eq!( | ||
lexer.next(), | ||
Some(Ok(Token::String("Hello, world!".to_string()))) | ||
); | ||
assert_eq!( | ||
lexer.next(), | ||
Some(Ok(Token::String("foo😀bar\nbaz \x3F\u{1234}".to_string()))) | ||
); | ||
assert_eq!(lexer.next(), None); | ||
} | ||
|
||
#[test] | ||
fn test_it_works_with_cloning() { | ||
let mut lexer = Token::lexer(r#""Hello, world!" "foo😀bar\nbaz \x3F\u{1234}""#); | ||
let mut lexer2 = lexer.clone(); | ||
assert_eq!( | ||
lexer2.next(), | ||
Some(Ok(Token::String("Hello, world!".to_string()))) | ||
); | ||
let mut lexer3 = lexer2.clone(); | ||
let mut lexer4 = lexer3.clone(); | ||
assert_eq!( | ||
lexer3.next(), | ||
Some(Ok(Token::String("foo😀bar\nbaz \x3F\u{1234}".to_string()))) | ||
); | ||
assert_eq!( | ||
lexer4.next(), | ||
Some(Ok(Token::String("foo😀bar\nbaz \x3F\u{1234}".to_string()))) | ||
); | ||
assert_eq!(lexer4.next(), None); | ||
let mut lexer5 = lexer.clone(); | ||
assert_eq!( | ||
lexer5.next(), | ||
Some(Ok(Token::String("Hello, world!".to_string()))) | ||
); | ||
assert_eq!( | ||
lexer5.next(), | ||
Some(Ok(Token::String("foo😀bar\nbaz \x3F\u{1234}".to_string()))) | ||
); | ||
assert_eq!( | ||
lexer.next(), | ||
Some(Ok(Token::String("Hello, world!".to_string()))) | ||
); | ||
assert_eq!(lexer5.next(), None); | ||
assert_eq!( | ||
lexer2.next(), | ||
Some(Ok(Token::String("foo😀bar\nbaz \x3F\u{1234}".to_string()))) | ||
); | ||
assert_eq!(lexer2.next(), None); | ||
assert_eq!(lexer3.next(), None); | ||
assert_eq!( | ||
lexer.next(), | ||
Some(Ok(Token::String("foo😀bar\nbaz \x3F\u{1234}".to_string()))) | ||
); | ||
assert_eq!(lexer.next(), None); | ||
} | ||
|
||
// Not important | ||
pub fn lex_single_line_string(lexer: &mut Lexer<Token>) -> Result<String, ()> { | ||
let mut string = String::new(); | ||
let mut chars = lexer.slice()[1..lexer.slice().len() - 1].chars(); | ||
while let Some(c) = chars.next() { | ||
match c { | ||
'\\' => { | ||
let c = chars.next().ok_or(())?; | ||
match c { | ||
'\n' => {} | ||
'n' => string.push('\n'), | ||
'r' => string.push('\r'), | ||
't' => string.push('\t'), | ||
'0' => string.push('\0'), | ||
'\'' | '"' | '\\' => string.push(c), | ||
'x' => { | ||
let mut hex = String::new(); | ||
hex.push(chars.next().ok_or(())?); | ||
hex.push(chars.next().ok_or(())?); | ||
let code = u8::from_str_radix(&hex, 16).map_err(|_| ())?; | ||
if code > 0x7F { | ||
return Err(()); | ||
} | ||
string.push(code as char); | ||
} | ||
'u' => { | ||
if chars.next() != Some('{') { | ||
return Err(()); | ||
} | ||
let mut hex = String::new(); | ||
for _ in 0..6 { | ||
let c = chars.next().ok_or(())?; | ||
if c == '}' { | ||
break; | ||
} | ||
hex.push(c); | ||
} | ||
let code = u32::from_str_radix(&hex, 16).map_err(|_| ())?; | ||
string.push(char::from_u32(code).ok_or(())?); | ||
} | ||
_ => return Err(()), | ||
} | ||
} | ||
_ => string.push(c), | ||
} | ||
} | ||
Ok(string) | ||
} |