Skip to content

Commit

Permalink
Merge pull request #8 from Icekey/main
Browse files Browse the repository at this point in the history
Improved Error Messages
  • Loading branch information
thomasgruebl authored Mar 30, 2023
2 parents f4b3556 + 45e4323 commit 49d1cc7
Show file tree
Hide file tree
Showing 9 changed files with 114 additions and 76 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "rusty-tesseract"
version = "1.1.3"
version = "1.1.4"
edition = "2021"
authors = ["thomasgruebl"]
description = "A Rust wrapper for Google Tesseract"
Expand Down
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ A Rust wrapper for Google Tesseract
Add the following line to your <b>Cargo.toml</b> file:

```rust
rusty-tesseract = "1.1.3"
rusty-tesseract = "1.1.4"
```

## Description
Expand Down Expand Up @@ -94,7 +94,10 @@ Choose either string, bounding box or data output:
// define parameters
let mut my_args = Args {
lang: "eng",
config_variables: "'tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'",
config_variables: HashMap::from([(
"tessedit_char_whitelist".into(),
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".into(),
)]),
dpi: 150,
psm: 6,
oem: 3
Expand Down
3 changes: 3 additions & 0 deletions src/tesseract.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,6 @@ pub use input::*;
pub use output_boxes::*;
pub use output_config_parameters::*;
pub use output_data::*;

mod parse_line_util;
use parse_line_util::*;
16 changes: 11 additions & 5 deletions src/tesseract/error.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,28 @@
use thiserror::Error;

#[derive(Error, Debug)]
#[derive(Error, Debug, PartialEq)]
pub enum TessError {
#[error("Tesseract not found. Please check installation path!")]
TesseractNotFoundError,
#[error("Invalid tesseract version!\n{0}")]

#[error("Invalid Tesseract version!\n{0}")]
VersionError(String),

#[error(
"Image format not within the list of allowed image formats:\n\
['JPEG','JPG','PNG','PBM','PGM','PPM','TIFF','BMP','GIF','WEBP']"
['JPEG','JPG','PNG','PBM','PGM','PPM','TIFF','BMP','GIF','WEBP']"
)]
ImageFormatError,

#[error("Please assign a valid image path.")]
ImageNotFoundError,
#[error("Data could not be parsed.")]
ParseError,

#[error("Could not parse {0}.")]
ParseError(String),

#[error("Could not create tempfile.\n{0}")]
TempfileError(String),

#[error("Could not save dynamic image to tempfile.\n{0}")]
DynamicImageError(String),
}
Expand Down
22 changes: 11 additions & 11 deletions src/tesseract/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,13 @@ impl Args {
if self.config_variables.is_empty() {
return None;
}
let parameter = self
.config_variables
.iter()
.map(|(key, value)| format!("{}={}", key, value))
.fold(String::new(), |acc, x| format!("{} {}", acc, x));
Some(parameter)
Some(
self.config_variables
.iter()
.map(|(key, value)| format!("{}={}", key, value))
.collect::<Vec<_>>()
.join(" "),
)
}
}

Expand All @@ -56,18 +57,17 @@ impl Image {
})
}

const FORMATS: [&'static str; 10] = [
"JPEG", "JPG", "PNG", "PBM", "PGM", "PPM", "TIFF", "BMP", "GIF", "WEBP",
];

fn check_image_format(path: &Path) -> TessResult<()> {
let binding = path
.extension()
.ok_or(TessError::ImageFormatError)?
.to_str()
.ok_or(TessError::ImageFormatError)?
.to_uppercase();
if Self::FORMATS.contains(&binding.as_str()) {
if matches!(
binding.as_str(),
"JPEG" | "JPG" | "PNG" | "PBM" | "PGM" | "PPM" | "TIFF" | "BMP" | "GIF" | "WEBP"
) {
Ok(())
} else {
Err(TessError::ImageFormatError)
Expand Down
39 changes: 21 additions & 18 deletions src/tesseract/output_boxes.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use core::fmt;

use super::*;
use core::fmt;

#[derive(Debug, PartialEq)]
pub struct BoxOutput {
Expand Down Expand Up @@ -34,23 +33,17 @@ impl fmt::Display for Box {
}
}

impl Box {
fn parse(line: &str) -> Option<Self> {
impl FromLine for Box {
fn from_line(line: &str) -> Option<Self> {
let mut x = line.split_whitespace();
let symbol = x.next()?.to_string();
let left = str::parse::<i32>(x.next()?).ok()?;
let bottom = str::parse::<i32>(x.next()?).ok()?;
let right = str::parse::<i32>(x.next()?).ok()?;
let top = str::parse::<i32>(x.next()?).ok()?;
let page = str::parse::<i32>(x.next()?).ok()?;

Some(Box {
symbol,
left,
bottom,
right,
top,
page,
symbol: x.next()?.to_string(),
left: parse_next(&mut x)?,
bottom: parse_next(&mut x)?,
right: parse_next(&mut x)?,
top: parse_next(&mut x)?,
page: parse_next(&mut x)?,
})
}
}
Expand All @@ -69,8 +62,7 @@ fn string_to_boxes(output: &str) -> TessResult<Vec<Box>> {
.lines()
.into_iter()
.map(|line| Box::parse(line.into()))
.collect::<Option<Vec<Box>>>()
.ok_or(TessError::ParseError)
.collect::<_>()
}

#[cfg(test)]
Expand Down Expand Up @@ -129,4 +121,15 @@ mod tests {
.unwrap()
);
}

#[test]
fn test_string_to_boxes_parse_error() {
let result = string_to_boxes("L 18 X 36 59 0");
assert_eq!(
result,
Err(TessError::ParseError(
"invalid line 'L 18 X 36 59 0'".into()
))
)
}
}
29 changes: 21 additions & 8 deletions src/tesseract/output_config_parameters.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use core::fmt;

use super::*;
use core::fmt;

#[derive(Debug, PartialEq)]
pub struct ConfigParameterOutput {
Expand Down Expand Up @@ -31,10 +30,10 @@ impl fmt::Display for ConfigParameter {
}
}

impl ConfigParameter {
fn parse(line: &str) -> Option<Self> {
let (name, x) = line.split_once("\t").unwrap();
let (default_value, description) = x.split_once("\t").unwrap();
impl FromLine for ConfigParameter {
fn from_line(line: &str) -> Option<Self> {
let (name, x) = line.split_once("\t")?;
let (default_value, description) = x.split_once("\t")?;

Some(ConfigParameter {
name: name.into(),
Expand Down Expand Up @@ -63,8 +62,7 @@ fn string_to_config_parameter_output(output: &str) -> TessResult<Vec<ConfigParam
.lines()
.skip(1)
.map(|line| ConfigParameter::parse(line))
.collect::<Option<Vec<ConfigParameter>>>()
.ok_or(TessError::ParseError)
.collect::<_>()
}

#[cfg(test)]
Expand Down Expand Up @@ -108,4 +106,19 @@ mod tests {

assert_eq!(*x, expected);
}

#[test]
fn test_string_to_config_parameter_output_parse_error() {
let result = string_to_config_parameter_output(
"Tesseract parameters:\n\
log_level\t2147483647\tLogging level\n\
Test\n\
textord_debug_block\t0\tBlock to do debug on\n\
textord_pitch_range\t2\tMax range test on pitch",
);
assert_eq!(
result,
Err(TessError::ParseError("invalid line 'Test'".into()))
)
}
}
57 changes: 26 additions & 31 deletions src/tesseract/output_data.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use core::fmt;

use super::*;
use core::fmt;

#[derive(Debug, PartialEq)]
pub struct DataOutput {
Expand Down Expand Up @@ -51,35 +50,22 @@ impl fmt::Display for Data {
}
}

impl Data {
fn parse(line: &str) -> Option<Self> {
impl FromLine for Data {
fn from_line(line: &str) -> Option<Self> {
let mut x = line.split_whitespace();
let level = str::parse::<i32>(x.next()?).ok()?;
let page_num = str::parse::<i32>(x.next()?).ok()?;
let block_num = str::parse::<i32>(x.next()?).ok()?;
let par_num = str::parse::<i32>(x.next()?).ok()?;
let line_num = str::parse::<i32>(x.next()?).ok()?;
let word_num = str::parse::<i32>(x.next()?).ok()?;
let left = str::parse::<i32>(x.next()?).ok()?;
let top = str::parse::<i32>(x.next()?).ok()?;
let width = str::parse::<i32>(x.next()?).ok()?;
let height = str::parse::<i32>(x.next()?).ok()?;
let conf = str::parse::<f32>(x.next()?).ok()?;
let text = x.next().unwrap_or("").to_string();

Some(Data {
level,
page_num,
block_num,
par_num,
line_num,
word_num,
left,
top,
width,
height,
conf,
text,
level: parse_next(&mut x)?,
page_num: parse_next(&mut x)?,
block_num: parse_next(&mut x)?,
par_num: parse_next(&mut x)?,
line_num: parse_next(&mut x)?,
word_num: parse_next(&mut x)?,
left: parse_next(&mut x)?,
top: parse_next(&mut x)?,
width: parse_next(&mut x)?,
height: parse_next(&mut x)?,
conf: parse_next(&mut x)?,
text: x.next().unwrap_or("").to_string(),
})
}
}
Expand All @@ -101,8 +87,7 @@ fn string_to_data(output: &str) -> TessResult<Vec<Data>> {
.into_iter()
.skip(1)
.map(|line| Data::parse(line.into()))
.collect::<Option<Vec<Data>>>()
.ok_or(TessError::ParseError)
.collect::<_>()
}

#[cfg(test)]
Expand Down Expand Up @@ -156,4 +141,14 @@ mod tests {
.unwrap()
);
}

#[test]
fn test_string_to_data_parse_error() {
let result = string_to_data("level page_num block_num par_num line_num word_num left top width height conf text\n\
Test");
assert_eq!(
result,
Err(TessError::ParseError("invalid line 'Test'".into()))
)
}
}
15 changes: 15 additions & 0 deletions src/tesseract/parse_line_util.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
use crate::{TessError, TessResult};

pub(crate) fn parse_next<T: std::str::FromStr>(
iter: &mut std::str::SplitWhitespace<'_>,
) -> Option<T> {
iter.next()?.parse::<T>().ok()
}

pub(crate) trait FromLine: Sized {
fn from_line(line: &str) -> Option<Self>;

fn parse(line: &str) -> TessResult<Self> {
Self::from_line(line).ok_or(TessError::ParseError(format!("invalid line '{}'", line)))
}
}

0 comments on commit 49d1cc7

Please sign in to comment.