diff --git a/Cargo.toml b/Cargo.toml index 54d4450..3a19fee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "rusty-tesseract" -version = "1.1.3" +version = "1.1.4" edition = "2021" authors = ["thomasgruebl"] description = "A Rust wrapper for Google Tesseract" diff --git a/README.md b/README.md index 60a6405..94a69d1 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ A Rust wrapper for Google Tesseract Add the following line to your Cargo.toml file: ```rust -rusty-tesseract = "1.1.3" +rusty-tesseract = "1.1.4" ``` ## Description @@ -94,7 +94,10 @@ Choose either string, bounding box or data output: // define parameters let mut my_args = Args { lang: "eng", - config_variables: "'tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'", + config_variables: HashMap::from([( + "tessedit_char_whitelist".into(), + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".into(), + )]), dpi: 150, psm: 6, oem: 3 diff --git a/src/tesseract.rs b/src/tesseract.rs index 9b8ed56..586de8c 100644 --- a/src/tesseract.rs +++ b/src/tesseract.rs @@ -11,3 +11,6 @@ pub use input::*; pub use output_boxes::*; pub use output_config_parameters::*; pub use output_data::*; + +mod parse_line_util; +use parse_line_util::*; diff --git a/src/tesseract/error.rs b/src/tesseract/error.rs index 383fb6c..d29c838 100644 --- a/src/tesseract/error.rs +++ b/src/tesseract/error.rs @@ -1,22 +1,28 @@ use thiserror::Error; -#[derive(Error, Debug)] +#[derive(Error, Debug, PartialEq)] pub enum TessError { #[error("Tesseract not found. Please check installation path!")] TesseractNotFoundError, - #[error("Invalid tesseract version!\n{0}")] + + #[error("Invalid Tesseract version!\n{0}")] VersionError(String), + #[error( "Image format not within the list of allowed image formats:\n\ - ['JPEG','JPG','PNG','PBM','PGM','PPM','TIFF','BMP','GIF','WEBP']" + ['JPEG','JPG','PNG','PBM','PGM','PPM','TIFF','BMP','GIF','WEBP']" )] ImageFormatError, + #[error("Please assign a valid image path.")] ImageNotFoundError, - #[error("Data could not be parsed.")] - ParseError, + + #[error("Could not parse {0}.")] + ParseError(String), + #[error("Could not create tempfile.\n{0}")] TempfileError(String), + #[error("Could not save dynamic image to tempfile.\n{0}")] DynamicImageError(String), } diff --git a/src/tesseract/input.rs b/src/tesseract/input.rs index f4bd798..52c19e2 100644 --- a/src/tesseract/input.rs +++ b/src/tesseract/input.rs @@ -33,12 +33,13 @@ impl Args { if self.config_variables.is_empty() { return None; } - let parameter = self - .config_variables - .iter() - .map(|(key, value)| format!("{}={}", key, value)) - .fold(String::new(), |acc, x| format!("{} {}", acc, x)); - Some(parameter) + Some( + self.config_variables + .iter() + .map(|(key, value)| format!("{}={}", key, value)) + .collect::>() + .join(" "), + ) } } @@ -56,10 +57,6 @@ impl Image { }) } - const FORMATS: [&'static str; 10] = [ - "JPEG", "JPG", "PNG", "PBM", "PGM", "PPM", "TIFF", "BMP", "GIF", "WEBP", - ]; - fn check_image_format(path: &Path) -> TessResult<()> { let binding = path .extension() @@ -67,7 +64,10 @@ impl Image { .to_str() .ok_or(TessError::ImageFormatError)? .to_uppercase(); - if Self::FORMATS.contains(&binding.as_str()) { + if matches!( + binding.as_str(), + "JPEG" | "JPG" | "PNG" | "PBM" | "PGM" | "PPM" | "TIFF" | "BMP" | "GIF" | "WEBP" + ) { Ok(()) } else { Err(TessError::ImageFormatError) diff --git a/src/tesseract/output_boxes.rs b/src/tesseract/output_boxes.rs index 695ccd9..04ac2ec 100644 --- a/src/tesseract/output_boxes.rs +++ b/src/tesseract/output_boxes.rs @@ -1,6 +1,5 @@ -use core::fmt; - use super::*; +use core::fmt; #[derive(Debug, PartialEq)] pub struct BoxOutput { @@ -34,23 +33,17 @@ impl fmt::Display for Box { } } -impl Box { - fn parse(line: &str) -> Option { +impl FromLine for Box { + fn from_line(line: &str) -> Option { let mut x = line.split_whitespace(); - let symbol = x.next()?.to_string(); - let left = str::parse::(x.next()?).ok()?; - let bottom = str::parse::(x.next()?).ok()?; - let right = str::parse::(x.next()?).ok()?; - let top = str::parse::(x.next()?).ok()?; - let page = str::parse::(x.next()?).ok()?; Some(Box { - symbol, - left, - bottom, - right, - top, - page, + symbol: x.next()?.to_string(), + left: parse_next(&mut x)?, + bottom: parse_next(&mut x)?, + right: parse_next(&mut x)?, + top: parse_next(&mut x)?, + page: parse_next(&mut x)?, }) } } @@ -69,8 +62,7 @@ fn string_to_boxes(output: &str) -> TessResult> { .lines() .into_iter() .map(|line| Box::parse(line.into())) - .collect::>>() - .ok_or(TessError::ParseError) + .collect::<_>() } #[cfg(test)] @@ -129,4 +121,15 @@ mod tests { .unwrap() ); } + + #[test] + fn test_string_to_boxes_parse_error() { + let result = string_to_boxes("L 18 X 36 59 0"); + assert_eq!( + result, + Err(TessError::ParseError( + "invalid line 'L 18 X 36 59 0'".into() + )) + ) + } } diff --git a/src/tesseract/output_config_parameters.rs b/src/tesseract/output_config_parameters.rs index 07d1226..1a6c975 100644 --- a/src/tesseract/output_config_parameters.rs +++ b/src/tesseract/output_config_parameters.rs @@ -1,6 +1,5 @@ -use core::fmt; - use super::*; +use core::fmt; #[derive(Debug, PartialEq)] pub struct ConfigParameterOutput { @@ -31,10 +30,10 @@ impl fmt::Display for ConfigParameter { } } -impl ConfigParameter { - fn parse(line: &str) -> Option { - let (name, x) = line.split_once("\t").unwrap(); - let (default_value, description) = x.split_once("\t").unwrap(); +impl FromLine for ConfigParameter { + fn from_line(line: &str) -> Option { + let (name, x) = line.split_once("\t")?; + let (default_value, description) = x.split_once("\t")?; Some(ConfigParameter { name: name.into(), @@ -63,8 +62,7 @@ fn string_to_config_parameter_output(output: &str) -> TessResult>>() - .ok_or(TessError::ParseError) + .collect::<_>() } #[cfg(test)] @@ -108,4 +106,19 @@ mod tests { assert_eq!(*x, expected); } + + #[test] + fn test_string_to_config_parameter_output_parse_error() { + let result = string_to_config_parameter_output( + "Tesseract parameters:\n\ + log_level\t2147483647\tLogging level\n\ + Test\n\ + textord_debug_block\t0\tBlock to do debug on\n\ + textord_pitch_range\t2\tMax range test on pitch", + ); + assert_eq!( + result, + Err(TessError::ParseError("invalid line 'Test'".into())) + ) + } } diff --git a/src/tesseract/output_data.rs b/src/tesseract/output_data.rs index e825881..60a787e 100644 --- a/src/tesseract/output_data.rs +++ b/src/tesseract/output_data.rs @@ -1,6 +1,5 @@ -use core::fmt; - use super::*; +use core::fmt; #[derive(Debug, PartialEq)] pub struct DataOutput { @@ -51,35 +50,22 @@ impl fmt::Display for Data { } } -impl Data { - fn parse(line: &str) -> Option { +impl FromLine for Data { + fn from_line(line: &str) -> Option { let mut x = line.split_whitespace(); - let level = str::parse::(x.next()?).ok()?; - let page_num = str::parse::(x.next()?).ok()?; - let block_num = str::parse::(x.next()?).ok()?; - let par_num = str::parse::(x.next()?).ok()?; - let line_num = str::parse::(x.next()?).ok()?; - let word_num = str::parse::(x.next()?).ok()?; - let left = str::parse::(x.next()?).ok()?; - let top = str::parse::(x.next()?).ok()?; - let width = str::parse::(x.next()?).ok()?; - let height = str::parse::(x.next()?).ok()?; - let conf = str::parse::(x.next()?).ok()?; - let text = x.next().unwrap_or("").to_string(); - Some(Data { - level, - page_num, - block_num, - par_num, - line_num, - word_num, - left, - top, - width, - height, - conf, - text, + level: parse_next(&mut x)?, + page_num: parse_next(&mut x)?, + block_num: parse_next(&mut x)?, + par_num: parse_next(&mut x)?, + line_num: parse_next(&mut x)?, + word_num: parse_next(&mut x)?, + left: parse_next(&mut x)?, + top: parse_next(&mut x)?, + width: parse_next(&mut x)?, + height: parse_next(&mut x)?, + conf: parse_next(&mut x)?, + text: x.next().unwrap_or("").to_string(), }) } } @@ -101,8 +87,7 @@ fn string_to_data(output: &str) -> TessResult> { .into_iter() .skip(1) .map(|line| Data::parse(line.into())) - .collect::>>() - .ok_or(TessError::ParseError) + .collect::<_>() } #[cfg(test)] @@ -156,4 +141,14 @@ mod tests { .unwrap() ); } + + #[test] + fn test_string_to_data_parse_error() { + let result = string_to_data("level page_num block_num par_num line_num word_num left top width height conf text\n\ + Test"); + assert_eq!( + result, + Err(TessError::ParseError("invalid line 'Test'".into())) + ) + } } diff --git a/src/tesseract/parse_line_util.rs b/src/tesseract/parse_line_util.rs new file mode 100644 index 0000000..d70a305 --- /dev/null +++ b/src/tesseract/parse_line_util.rs @@ -0,0 +1,15 @@ +use crate::{TessError, TessResult}; + +pub(crate) fn parse_next( + iter: &mut std::str::SplitWhitespace<'_>, +) -> Option { + iter.next()?.parse::().ok() +} + +pub(crate) trait FromLine: Sized { + fn from_line(line: &str) -> Option; + + fn parse(line: &str) -> TessResult { + Self::from_line(line).ok_or(TessError::ParseError(format!("invalid line '{}'", line))) + } +}