Skip to content

Commit

Permalink
Change \\u to parse unicode
Browse files Browse the repository at this point in the history
  • Loading branch information
zyk-mjzs committed Apr 4, 2024
1 parent ca71b5e commit 8e7f06b
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 2 deletions.
23 changes: 22 additions & 1 deletion src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,6 @@ impl<'a> Parser<'a> {
let Some(current_painter) = painter_stack.last_mut() else {
return Err(ParserError::MalformedPainterStack);
};
println!("{:?} {:?}", control_word, property);
#[rustfmt::skip] // For now, rustfmt does not support this kind of alignement
match control_word {
ControlWord::ColorNumber => current_painter.color_ref = property.get_value() as ColorRef,
Expand All @@ -147,6 +146,11 @@ impl<'a> Parser<'a> {
ControlWord::SpaceAfter => paragraph.spacing.after = property.get_value(),
ControlWord::SpaceBetweenLine => paragraph.spacing.between_line = SpaceBetweenLine::from(property.get_value()),
ControlWord::SpaceLineMul => paragraph.spacing.line_multiplier = property.get_value(),
ControlWord::Unicode => {
let unicode = property.get_value() as u16;
let str = String::from_utf16(&vec![unicode]).unwrap();
Self::add_text_to_document(&str, &painter_stack, &paragraph, &mut document)?
}
// Others
_ => {}
};
Expand Down Expand Up @@ -650,4 +654,21 @@ pub mod tests {
assert_eq!(&document.body[0].painter.underline, &true);
assert_eq!(&document.body[1].painter.underline, &false);
}

#[test]
fn parse_unicode() {
// start with \\uc0
// \u21834 => 啊
let rtf = r#"{\rtf1\ansi\ansicpg936\cocoartf2761
\cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
{\colortbl;\red255\green255\blue255;}
{\*\expandedcolortbl;;}
\paperw11900\paperh16840\margl1440\margr1440\vieww11520\viewh8400\viewkind0
\pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\pardirnatural\partightenfactor0
\f0\fs24 \cf0 \uc0\u21834 }"#;
let tokens = Lexer::scan(rtf).unwrap();
let document = Parser::new(tokens).parse().unwrap();
assert_eq!(&document.body[0].text, "啊");
}
}
4 changes: 3 additions & 1 deletion src/tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ pub enum ControlWord<'a> {
Rtf,
Ansi,

Unicode,

FontTable,
FontCharset,
FontNumber,
Expand Down Expand Up @@ -158,7 +160,7 @@ impl<'a> ControlWord<'a> {
// Format
r"\i" => ControlWord::Italic,
r"\b" => ControlWord::Bold,
r"\u" => ControlWord::Underline,
r"\u" => ControlWord::Unicode,
r"\ul" => ControlWord::Underline,
r"\ulnone" => ControlWord::UnderlineNone,
r"\super" => ControlWord::Superscript,
Expand Down

0 comments on commit 8e7f06b

Please sign in to comment.