Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add color And \\ulnone #10

Merged
merged 8 commits into from
Apr 7, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ repository = "https://github.com/d0rianb/rtf-parser"
version = "0.2.1"
edition = "2021"
license = "MIT"
keywords = ["rtf", "rich" ,"text", "format", "parser"]
keywords = ["rtf", "rich", "text", "format", "parser"]
categories = ["parsing", "parser-implementations"]
exclude = ["*.rtf", ".idea"]

Expand All @@ -15,4 +15,7 @@ opt-level = 3

[dependencies]
derivative = "2.2.0"
serde = { version = "1.0", optional = true, features = ["derive"] }

[features]
serde_support = ["serde"]
11 changes: 11 additions & 0 deletions src/document.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
use crate::header::RtfHeader;
use crate::parser::StyleBlock;

#[cfg(feature="serde_support")]
use serde::{Deserialize, Serialize};

#[cfg(feature="serde_support")]
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using #[cfg_attr(feature = "serde_support", derive(Deserialize, Serialize))] would prevent the code duplication

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thx, I got it

#[derive(Debug, Default, Clone, PartialEq, Deserialize, Serialize)]
pub struct RtfDocument {
pub header: RtfHeader,
pub body: Vec<StyleBlock>,
}

#[cfg(not(feature="serde_support"))]
#[derive(Debug, Default, Clone, PartialEq)]
pub struct RtfDocument {
pub header: RtfHeader,
Expand Down
73 changes: 73 additions & 0 deletions src/header.rs
Original file line number Diff line number Diff line change
@@ -1,37 +1,95 @@
use std::collections::HashMap;

#[cfg(feature="serde_support")]
use serde::{Deserialize, Serialize};

use crate::paragraph::Paragraph;
use crate::parser::Painter;
use crate::tokens::{ControlWord, Token};

pub type ColorRef = u16;
pub type ColorTable = HashMap<ColorRef, Color>;

pub type FontRef = u16;
pub type FontTable = HashMap<FontRef, Font>;

pub type StyleRef = u16;
pub type StyleSheet = HashMap<StyleRef, Style>;

/// Style for the StyleSheet
#[cfg(feature="serde_support")]
#[derive(Hash, Default, Debug, Clone, PartialEq, Deserialize, Serialize)]
pub struct Style {
painter: Painter,
paragraph: Paragraph,
}
#[cfg(not(feature="serde_support"))]
#[derive(Hash, Default, Debug, Clone, PartialEq)]
pub struct Style {
painter: Painter,
paragraph: Paragraph,
}

/// Information about the document, including references to fonts & styles
#[cfg(feature="serde_support")]
#[derive(Default, Debug, Clone, PartialEq, Deserialize, Serialize)]
pub struct RtfHeader {
pub character_set: CharacterSet,
pub font_table: FontTable,
pub color_table: ColorTable,
pub stylesheet: StyleSheet,
}
#[cfg(not(feature="serde_support"))]
#[derive(Default, Debug, Clone, PartialEq)]
pub struct RtfHeader {
pub character_set: CharacterSet,
pub font_table: FontTable,
pub color_table: ColorTable,
pub stylesheet: StyleSheet,
}

#[cfg(feature="serde_support")]
#[derive(Hash, Default, Clone, Debug, PartialEq, Deserialize, Serialize)]
pub struct Font {
pub name: String,
pub character_set: u8,
pub font_family: FontFamily,
}
#[cfg(not(feature="serde_support"))]
#[derive(Hash, Default, Clone, Debug, PartialEq)]
pub struct Font {
pub name: String,
pub character_set: u8,
pub font_family: FontFamily,
}

#[cfg(feature="serde_support")]
#[derive(Hash, Default, Clone, Debug, PartialEq, Deserialize, Serialize)]
pub struct Color {
pub red: u16,
pub green: u16,
pub blue: u16,
}
#[cfg(not(feature="serde_support"))]
#[derive(Hash, Default, Clone, Debug, PartialEq)]
pub struct Color {
pub red: u16,
pub green: u16,
pub blue: u16,
}

#[cfg(feature="serde_support")]
#[allow(dead_code)]
#[derive(Debug, PartialEq, Default, Clone, Deserialize, Serialize)]
pub enum CharacterSet {
#[default]
Ansi,
Mac,
Pc,
Pca,
Ansicpg(u16),
}
#[cfg(not(feature="serde_support"))]
#[allow(dead_code)]
#[derive(Debug, PartialEq, Default, Clone)]
pub enum CharacterSet {
Expand All @@ -53,6 +111,21 @@ impl CharacterSet {
}
}

#[cfg(feature="serde_support")]
#[allow(dead_code)]
#[derive(Debug, PartialEq, Hash, Clone, Default, Deserialize, Serialize)]
pub enum FontFamily {
#[default]
Nil,
Roman,
Swiss,
Modern,
Script,
Decor,
Tech,
Bidi,
}
#[cfg(not(feature="serde_support"))]
#[allow(dead_code)]
#[derive(Debug, PartialEq, Hash, Clone, Default)]
pub enum FontFamily {
Expand Down
13 changes: 10 additions & 3 deletions src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,12 @@ impl Lexer {
let control_word = ControlWord::from(ident)?;
let mut ret = vec![Token::ControlSymbol(control_word)];
recursive_tokenize!(tail, ret);

// \u1234 \u1234 is ok, but \u1234 \u1234 is lost a space, \u1234 \u1234 lost two spaces, and so on
if control_word.0 == ControlWord::Unicode && tail.len() > 0 {
ret.push(Token::PlainText(tail));
}

return Ok(ret);
}
'*' => Ok(vec![Token::IgnorableDestination]),
Expand All @@ -147,8 +153,9 @@ impl Lexer {

#[cfg(test)]
pub(crate) mod tests {
use crate::header::Color;
use crate::lexer::Lexer;
use crate::tokens::ControlWord::{Ansi, Bold, FontNumber, FontSize, FontTable, Italic, Par, Pard, Rtf, Underline, Unknown};
use crate::tokens::ControlWord::{Ansi, Bold, FontNumber, ColorNumber, FontSize, FontTable, Italic, Par, Pard, Rtf, Underline, Unknown};
use crate::tokens::Property::*;
use crate::tokens::Token::*;

Expand Down Expand Up @@ -202,7 +209,7 @@ if (a == b) \{\
vec![
ControlSymbol((FontNumber, Value(0))),
ControlSymbol((FontSize, Value(24))),
ControlSymbol((Unknown("\\cf"), Value(0))),
ControlSymbol((ColorNumber, Value(0))),
PlainText("test de code "),
CRLF,
PlainText("if (a == b) "),
Expand Down Expand Up @@ -266,7 +273,7 @@ if (a == b) \{\
OpeningBracket,
ControlSymbol((Unknown("\\partightenfactor"), Value(0))),
ControlSymbol((FontSize, Value(24))),
ControlSymbol((Unknown("\\cf"), Value(0))),
ControlSymbol((ColorNumber, Value(0))),
PlainText("Font size 12,"),
ControlSymbol((FontNumber, Value(0))),
ControlSymbol((Bold, None)),
Expand Down
47 changes: 47 additions & 0 deletions src/paragraph.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,18 @@
// Define the paragraph related structs and enums

#[cfg(feature="serde_support")]
use serde::{Deserialize, Serialize};
use crate::tokens::ControlWord;

#[cfg(feature="serde_support")]
#[derive(Debug, Default, Clone, PartialEq, Hash, Deserialize, Serialize)]
pub struct Paragraph {
pub alignment: Alignment,
pub spacing: Spacing,
pub indent: Indentation,
pub tab_width: i32,
}
#[cfg(not(feature="serde_support"))]
#[derive(Debug, Default, Clone, PartialEq, Hash)]
pub struct Paragraph {
pub alignment: Alignment,
Expand All @@ -11,6 +22,16 @@ pub struct Paragraph {
}

/// Alignement of a paragraph (left, right, center, justify)
#[cfg(feature="serde_support")]
#[derive(Debug, Default, Clone, Copy, PartialEq, Hash, Deserialize, Serialize)]
pub enum Alignment {
#[default]
LeftAligned, // \ql
RightAligned, // \qr
Center, // \qc
Justify, // \qj
}
#[cfg(not(feature="serde_support"))]
#[derive(Debug, Default, Clone, Copy, PartialEq, Hash)]
pub enum Alignment {
#[default]
Expand All @@ -33,6 +54,15 @@ impl From<&ControlWord<'_>> for Alignment {
}

/// The vertical margin before / after a block of text
#[cfg(feature="serde_support")]
#[derive(Debug, Default, Clone, PartialEq, Hash, Deserialize, Serialize)]
pub struct Spacing {
pub before: i32,
pub after: i32,
pub between_line: SpaceBetweenLine,
pub line_multiplier: i32,
}
#[cfg(not(feature="serde_support"))]
#[derive(Debug, Default, Clone, PartialEq, Hash)]
pub struct Spacing {
pub before: i32,
Expand All @@ -41,6 +71,15 @@ pub struct Spacing {
pub line_multiplier: i32,
}

#[cfg(feature="serde_support")]
#[derive(Default, Debug, Clone, PartialEq, Hash, Deserialize, Serialize)]
pub enum SpaceBetweenLine {
Value(i32),
#[default]
Auto,
Invalid,
}
#[cfg(not(feature="serde_support"))]
#[derive(Default, Debug, Clone, PartialEq, Hash)]
pub enum SpaceBetweenLine {
Value(i32),
Expand All @@ -64,6 +103,14 @@ impl From<i32> for SpaceBetweenLine {
}

// This struct can not be an enum because left-indent and right-ident can both be defined at the same time
#[cfg(feature="serde_support")]
#[derive(Default, Debug, Clone, PartialEq, Hash, Deserialize, Serialize)]
pub struct Indentation {
pub left: i32,
pub right: i32,
pub first_line: i32,
}
#[cfg(not(feature="serde_support"))]
#[derive(Default, Debug, Clone, PartialEq, Hash)]
pub struct Indentation {
pub left: i32,
Expand Down
Loading
Loading