From b24056329ed7d0c5175fcf2e2f0965bceac40e9a Mon Sep 17 00:00:00 2001 From: "JuniFruit@github.com" Date: Fri, 14 Jul 2023 18:38:46 +0300 Subject: [PATCH] added output_page fn --- src/lib.rs | 1342 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 868 insertions(+), 474 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index f732088..dade88f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,54 +1,52 @@ extern crate lopdf; +use euclid::*; use lopdf::content::Content; use lopdf::*; -use euclid::*; use std::fmt::{Debug, Formatter}; +extern crate adobe_cmap_parser; extern crate encoding; extern crate euclid; -extern crate adobe_cmap_parser; extern crate type1_encoding_parser; extern crate unicode_normalization; -use euclid::vec2; -use encoding::{Encoding, DecoderTrap}; use encoding::all::UTF_16BE; +use encoding::{DecoderTrap, Encoding}; +use euclid::vec2; +use std::collections::hash_map::Entry; +use std::collections::HashMap; use std::fmt; -use std::str; use std::fs::File; -use std::slice::Iter; -use std::collections::HashMap; -use std::collections::hash_map::Entry; -use std::rc::Rc; use std::marker::PhantomData; +use std::rc::Rc; use std::result::Result; +use std::slice::Iter; +use std::str; mod core_fonts; +mod encodings; mod glyphnames; mod zapfglyphnames; -mod encodings; pub struct Space; pub type Transform = Transform2D; #[derive(Debug)] -pub enum OutputError -{ +pub enum OutputError { FormatError(std::fmt::Error), IoError(std::io::Error), - PdfError(lopdf::Error) + PdfError(lopdf::Error), } -impl std::fmt::Display for OutputError -{ +impl std::fmt::Display for OutputError { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { match self { OutputError::FormatError(e) => write!(f, "Formating error: {}", e), OutputError::IoError(e) => write!(f, "IO error: {}", e), - OutputError::PdfError(e) => write!(f, "PDF error: {}", e) + OutputError::PdfError(e) => write!(f, "PDF error: {}", e), } } } -impl std::error::Error for OutputError{} +impl std::error::Error for OutputError {} impl From for OutputError { fn from(e: std::fmt::Error) -> Self { @@ -75,12 +73,12 @@ macro_rules! dlog { fn get_info(doc: &Document) -> Option<&Dictionary> { match doc.trailer.get(b"Info") { - Ok(&Object::Reference(ref id)) => { - match doc.get_object(*id) { - Ok(&Object::Dictionary(ref info)) => { return Some(info); } - _ => {} + Ok(&Object::Reference(ref id)) => match doc.get_object(*id) { + Ok(&Object::Dictionary(ref info)) => { + return Some(info); } - } + _ => {} + }, _ => {} } None @@ -88,12 +86,12 @@ fn get_info(doc: &Document) -> Option<&Dictionary> { fn get_catalog(doc: &Document) -> &Dictionary { match doc.trailer.get(b"Root").unwrap() { - &Object::Reference(ref id) => { - match doc.get_object(*id) { - Ok(&Object::Dictionary(ref catalog)) => { return catalog; } - _ => {} + &Object::Reference(ref id) => match doc.get_object(*id) { + Ok(&Object::Dictionary(ref catalog)) => { + return catalog; } - } + _ => {} + }, _ => {} } panic!(); @@ -102,13 +100,17 @@ fn get_catalog(doc: &Document) -> &Dictionary { fn get_pages(doc: &Document) -> &Dictionary { let catalog = get_catalog(doc); match catalog.get(b"Pages").unwrap() { - &Object::Reference(ref id) => { - match doc.get_object(*id) { - Ok(&Object::Dictionary(ref pages)) => { return pages; } - other => {dlog!("pages: {:?}", other)} + &Object::Reference(ref id) => match doc.get_object(*id) { + Ok(&Object::Dictionary(ref pages)) => { + return pages; } + other => { + dlog!("pages: {:?}", other) + } + }, + other => { + dlog!("pages: {:?}", other) } - other => { dlog!("pages: {:?}", other)} } dlog!("catalog {:?}", catalog); panic!(); @@ -116,63 +118,66 @@ fn get_pages(doc: &Document) -> &Dictionary { #[allow(non_upper_case_globals)] const PDFDocEncoding: &'static [u16] = &[ - 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, - 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011, - 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x02d8, 0x02c7, 0x02c6, - 0x02d9, 0x02dd, 0x02db, 0x02da, 0x02dc, 0x0020, 0x0021, 0x0022, 0x0023, - 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, - 0x002d, 0x002e, 0x002f, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, - 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, - 0x003f, 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, - 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 0x0050, - 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, - 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, 0x0060, 0x0061, 0x0062, - 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, - 0x006c, 0x006d, 0x006e, 0x006f, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, - 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, - 0x007e, 0x0000, 0x2022, 0x2020, 0x2021, 0x2026, 0x2014, 0x2013, 0x0192, - 0x2044, 0x2039, 0x203a, 0x2212, 0x2030, 0x201e, 0x201c, 0x201d, 0x2018, - 0x2019, 0x201a, 0x2122, 0xfb01, 0xfb02, 0x0141, 0x0152, 0x0160, 0x0178, - 0x017d, 0x0131, 0x0142, 0x0153, 0x0161, 0x017e, 0x0000, 0x20ac, 0x00a1, - 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00aa, - 0x00ab, 0x00ac, 0x0000, 0x00ae, 0x00af, 0x00b0, 0x00b1, 0x00b2, 0x00b3, - 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, - 0x00bd, 0x00be, 0x00bf, 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, - 0x00c6, 0x00c7, 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, - 0x00cf, 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, - 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 0x00e0, - 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9, - 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 0x00f0, 0x00f1, 0x00f2, - 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00f9, 0x00fa, 0x00fb, - 0x00fc, 0x00fd, 0x00fe, 0x00ff]; + 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, + 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, + 0x02d8, 0x02c7, 0x02c6, 0x02d9, 0x02dd, 0x02db, 0x02da, 0x02dc, 0x0020, 0x0021, 0x0022, 0x0023, + 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, + 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, + 0x003c, 0x003d, 0x003e, 0x003f, 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, + 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 0x0050, 0x0051, 0x0052, 0x0053, + 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, + 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, + 0x006c, 0x006d, 0x006e, 0x006f, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, + 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x0000, 0x2022, 0x2020, 0x2021, 0x2026, + 0x2014, 0x2013, 0x0192, 0x2044, 0x2039, 0x203a, 0x2212, 0x2030, 0x201e, 0x201c, 0x201d, 0x2018, + 0x2019, 0x201a, 0x2122, 0xfb01, 0xfb02, 0x0141, 0x0152, 0x0160, 0x0178, 0x017d, 0x0131, 0x0142, + 0x0153, 0x0161, 0x017e, 0x0000, 0x20ac, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, + 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x0000, 0x00ae, 0x00af, 0x00b0, 0x00b1, 0x00b2, 0x00b3, + 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, + 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 0x00c8, 0x00c9, 0x00ca, 0x00cb, + 0x00cc, 0x00cd, 0x00ce, 0x00cf, 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, + 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 0x00e0, 0x00e1, 0x00e2, 0x00e3, + 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, + 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00f9, 0x00fa, 0x00fb, + 0x00fc, 0x00fd, 0x00fe, 0x00ff, +]; fn pdf_to_utf8(s: &[u8]) -> String { if s.len() > 2 && s[0] == 0xfe && s[1] == 0xff { - return UTF_16BE.decode(&s[2..], DecoderTrap::Strict).unwrap() + return UTF_16BE.decode(&s[2..], DecoderTrap::Strict).unwrap(); } else { - let r : Vec = s.iter().map(|x| *x).flat_map(|x| { - let k = PDFDocEncoding[x as usize]; - vec![(k>>8) as u8, k as u8].into_iter()}).collect(); - return UTF_16BE.decode(&r, DecoderTrap::Strict).unwrap() + let r: Vec = s + .iter() + .map(|x| *x) + .flat_map(|x| { + let k = PDFDocEncoding[x as usize]; + vec![(k >> 8) as u8, k as u8].into_iter() + }) + .collect(); + return UTF_16BE.decode(&r, DecoderTrap::Strict).unwrap(); } } fn to_utf8(encoding: &[u16], s: &[u8]) -> String { if s.len() > 2 && s[0] == 0xfe && s[1] == 0xff { - return UTF_16BE.decode(&s[2..], DecoderTrap::Strict).unwrap() + return UTF_16BE.decode(&s[2..], DecoderTrap::Strict).unwrap(); } else { - let r : Vec = s.iter().map(|x| *x).flat_map(|x| { - let k = encoding[x as usize]; - vec![(k>>8) as u8, k as u8].into_iter()}).collect(); - return UTF_16BE.decode(&r, DecoderTrap::Strict).unwrap() + let r: Vec = s + .iter() + .map(|x| *x) + .flat_map(|x| { + let k = encoding[x as usize]; + vec![(k >> 8) as u8, k as u8].into_iter() + }) + .collect(); + return UTF_16BE.decode(&r, DecoderTrap::Strict).unwrap(); } } - fn maybe_deref<'a>(doc: &'a Document, o: &'a Object) -> &'a Object { match o { &Object::Reference(r) => doc.get_object(r).expect("missing object reference"), - _ => o + _ => o, } } @@ -186,13 +191,16 @@ trait FromOptObj<'a> { } // conditionally convert to Self returns None if the conversion failed -trait FromObj<'a> where Self: std::marker::Sized { +trait FromObj<'a> +where + Self: std::marker::Sized, +{ fn from_obj(doc: &'a Document, obj: &'a Object) -> Option; } impl<'a, T: FromObj<'a>> FromOptObj<'a> for Option { fn from_opt_obj(doc: &'a Document, obj: Option<&'a Object>, _key: &[u8]) -> Self { - obj.and_then(|x| T::from_obj(doc,x)) + obj.and_then(|x| T::from_obj(doc, x)) } } @@ -206,9 +214,14 @@ impl<'a, T: FromObj<'a>> FromOptObj<'a> for T { // on arrays, streams and dicts impl<'a, T: FromObj<'a>> FromObj<'a> for Vec { fn from_obj(doc: &'a Document, obj: &'a Object) -> Option { - maybe_deref(doc, obj).as_array().map(|x| x.iter() - .map(|x| T::from_obj(doc, x).expect("wrong type")) - .collect()).ok() + maybe_deref(doc, obj) + .as_array() + .map(|x| { + x.iter() + .map(|x| T::from_obj(doc, x).expect("wrong type")) + .collect() + }) + .ok() } } @@ -216,21 +229,34 @@ impl<'a, T: FromObj<'a>> FromObj<'a> for Vec { // we don't want to do that impl<'a, T: FromObj<'a>> FromObj<'a> for [T; 4] { fn from_obj(doc: &'a Document, obj: &'a Object) -> Option { - maybe_deref(doc, obj).as_array().map(|x| { - let mut all = x.iter() - .map(|x| T::from_obj(doc, x).expect("wrong type")); - [all.next().unwrap(), all.next().unwrap(), all.next().unwrap(), all.next().unwrap()] - }).ok() + maybe_deref(doc, obj) + .as_array() + .map(|x| { + let mut all = x.iter().map(|x| T::from_obj(doc, x).expect("wrong type")); + [ + all.next().unwrap(), + all.next().unwrap(), + all.next().unwrap(), + all.next().unwrap(), + ] + }) + .ok() } } impl<'a, T: FromObj<'a>> FromObj<'a> for [T; 3] { fn from_obj(doc: &'a Document, obj: &'a Object) -> Option { - maybe_deref(doc, obj).as_array().map(|x| { - let mut all = x.iter() - .map(|x| T::from_obj(doc, x).expect("wrong type")); - [all.next().unwrap(), all.next().unwrap(), all.next().unwrap()] - }).ok() + maybe_deref(doc, obj) + .as_array() + .map(|x| { + let mut all = x.iter().map(|x| T::from_obj(doc, x).expect("wrong type")); + [ + all.next().unwrap(), + all.next().unwrap(), + all.next().unwrap(), + ] + }) + .ok() } } @@ -239,7 +265,7 @@ impl<'a> FromObj<'a> for f64 { match obj { &Object::Integer(i) => Some(i as f64), &Object::Real(f) => Some(f.into()), - _ => None + _ => None, } } } @@ -248,7 +274,7 @@ impl<'a> FromObj<'a> for i64 { fn from_obj(_doc: &Document, obj: &Object) -> Option { match obj { &Object::Integer(i) => Some(i), - _ => None + _ => None, } } } @@ -280,19 +306,35 @@ fn maybe_get<'a, T: FromObj<'a>>(doc: &'a Document, dict: &'a Dictionary, key: & } fn get_name_string<'a>(doc: &'a Document, dict: &'a Dictionary, key: &[u8]) -> String { - pdf_to_utf8(dict.get(key).map(|o| maybe_deref(doc, o)).unwrap_or_else(|_| panic!("deref")).as_name().expect("name")) + pdf_to_utf8( + dict.get(key) + .map(|o| maybe_deref(doc, o)) + .unwrap_or_else(|_| panic!("deref")) + .as_name() + .expect("name"), + ) } #[allow(dead_code)] -fn maybe_get_name_string<'a>(doc: &'a Document, dict: &'a Dictionary, key: &[u8]) -> Option { - maybe_get_obj(doc, dict, key).and_then(|n| n.as_name().ok()).map(|n| pdf_to_utf8(n)) +fn maybe_get_name_string<'a>( + doc: &'a Document, + dict: &'a Dictionary, + key: &[u8], +) -> Option { + maybe_get_obj(doc, dict, key) + .and_then(|n| n.as_name().ok()) + .map(|n| pdf_to_utf8(n)) } fn maybe_get_name<'a>(doc: &'a Document, dict: &'a Dictionary, key: &[u8]) -> Option<&'a [u8]> { maybe_get_obj(doc, dict, key).and_then(|n| n.as_name().ok()) } -fn maybe_get_array<'a>(doc: &'a Document, dict: &'a Dictionary, key: &[u8]) -> Option<&'a Vec> { +fn maybe_get_array<'a>( + doc: &'a Document, + dict: &'a Dictionary, + key: &[u8], +) -> Option<&'a Vec> { maybe_get_obj(doc, dict, key).and_then(|n| n.as_array().ok()) } @@ -315,7 +357,6 @@ struct PdfType3Font<'a> { widths: HashMap, // should probably just use i32 here } - fn make_font<'a>(doc: &'a Document, font: &'a Dictionary) -> Rc { let subtype = get_name_string(doc, font, b"Subtype"); dlog!("MakeFont({})", subtype); @@ -330,20 +371,20 @@ fn make_font<'a>(doc: &'a Document, font: &'a Dictionary) -> Rc bool { match name { - "Courier-Bold" | - "Courier-BoldOblique" | - "Courier-Oblique" | - "Courier" | - "Helvetica-Bold" | - "Helvetica-BoldOblique" | - "Helvetica-Oblique" | - "Helvetica" | - "Symbol" | - "Times-Bold" | - "Times-BoldItalic" | - "Times-Italic" | - "Times-Roman" | - "ZapfDingbats" => true, + "Courier-Bold" + | "Courier-BoldOblique" + | "Courier-Oblique" + | "Courier" + | "Helvetica-Bold" + | "Helvetica-BoldOblique" + | "Helvetica-Oblique" + | "Helvetica" + | "Symbol" + | "Times-Bold" + | "Times-BoldItalic" + | "Times-Italic" + | "Times-Roman" + | "ZapfDingbats" => true, _ => false, } } @@ -353,10 +394,17 @@ fn encoding_to_unicode_table(name: &[u8]) -> Vec { b"MacRomanEncoding" => encodings::MAC_ROMAN_ENCODING, b"MacExpertEncoding" => encodings::MAC_EXPERT_ENCODING, b"WinAnsiEncoding" => encodings::WIN_ANSI_ENCODING, - _ => panic!("unexpected encoding {:?}", pdf_to_utf8(name)) + _ => panic!("unexpected encoding {:?}", pdf_to_utf8(name)), }; - let encoding_table = encoding.iter() - .map(|x| if let &Some(x) = x { glyphnames::name_to_unicode(x).unwrap() } else { 0 }) + let encoding_table = encoding + .iter() + .map(|x| { + if let &Some(x) = x { + glyphnames::name_to_unicode(x).unwrap() + } else { + 0 + } + }) .collect(); encoding_table } @@ -373,7 +421,13 @@ impl<'a> PdfSimpleFont<'a> { let subtype = get_name_string(doc, font, b"Subtype"); let encoding: Option<&Object> = get(doc, font, b"Encoding"); - dlog!("base_name {} {} enc:{:?} {:?}", base_name, subtype, encoding, font); + dlog!( + "base_name {} {} enc:{:?} {:?}", + base_name, + subtype, + encoding, + font + ); let descriptor: Option<&Dictionary> = get(doc, font, b"FontDescriptor"); let mut type1_encoding = None; if let Some(descriptor) = descriptor { @@ -384,10 +438,12 @@ impl<'a> PdfSimpleFont<'a> { Some(&Object::Stream(ref s)) => { let s = get_contents(s); //dlog!("font contents {:?}", pdf_to_utf8(&s)); - type1_encoding = Some(type1_encoding_parser::get_encoding_map(&s).expect("encoding")); - + type1_encoding = + Some(type1_encoding_parser::get_encoding_map(&s).expect("encoding")); + } + _ => { + dlog!("font file {:?}", file) } - _ => { dlog!("font file {:?}", file) } } } else if subtype == "TrueType" { let file = maybe_get_obj(doc, descriptor, b"FontFile2"); @@ -396,7 +452,9 @@ impl<'a> PdfSimpleFont<'a> { let _s = get_contents(s); //File::create(format!("/tmp/{}", base_name)).unwrap().write_all(&s); } - _ => { dlog!("font file {:?}", file) } + _ => { + dlog!("font file {:?}", file) + } } } @@ -406,13 +464,15 @@ impl<'a> PdfSimpleFont<'a> { dlog!("font file {:?}", s); } None => {} - _ => { dlog!("unexpected") } + _ => { + dlog!("unexpected") + } } let charset = maybe_get_obj(doc, descriptor, b"CharSet"); let _charset = match charset { - Some(&Object::String(ref s, _)) => { Some(pdf_to_utf8(&s)) } - _ => { None } + Some(&Object::String(ref s, _)) => Some(pdf_to_utf8(&s)), + _ => None, }; //dlog!("charset {:?}", charset); } @@ -427,12 +487,13 @@ impl<'a> PdfSimpleFont<'a> { } Some(&Object::Dictionary(ref encoding)) => { //dlog!("Encoding {:?}", encoding); - let mut table = if let Some(base_encoding) = maybe_get_name(doc, encoding, b"BaseEncoding") { - dlog!("BaseEncoding {:?}", base_encoding); - encoding_to_unicode_table(base_encoding) - } else { - Vec::from(PDFDocEncoding) - }; + let mut table = + if let Some(base_encoding) = maybe_get_name(doc, encoding, b"BaseEncoding") { + dlog!("BaseEncoding {:?}", base_encoding); + encoding_to_unicode_table(base_encoding) + } else { + Vec::from(PDFDocEncoding) + }; let differences = maybe_get_array(doc, encoding, b"Differences"); if let Some(differences) = differences { dlog!("Differences"); @@ -440,19 +501,23 @@ impl<'a> PdfSimpleFont<'a> { for o in differences { let o = maybe_deref(doc, o); match o { - &Object::Integer(i) => { code = i; }, + &Object::Integer(i) => { + code = i; + } &Object::Name(ref n) => { let name = pdf_to_utf8(&n); // XXX: names of Type1 fonts can map to arbitrary strings instead of real // unicode names, so we should probably handle this differently let unicode = glyphnames::name_to_unicode(&name); - if let Some(unicode) = unicode{ + if let Some(unicode) = unicode { table[code as usize] = unicode; if let Some(ref mut unicode_map) = unicode_map { let be = [unicode]; match unicode_map.entry(code as u32) { // If there's a unicode table entry missing use one based on the name - Entry::Vacant(v) => { v.insert(String::from_utf16(&be).unwrap()); } + Entry::Vacant(v) => { + v.insert(String::from_utf16(&be).unwrap()); + } Entry::Occupied(e) => { if e.get() != &String::from_utf16(&be).unwrap() { println!("Unicode mismatch"); @@ -467,7 +532,9 @@ impl<'a> PdfSimpleFont<'a> { } code += 1; } - _ => { panic!("wrong type {:?}", o); } + _ => { + panic!("wrong type {:?}", o); + } } } } @@ -490,27 +557,48 @@ impl<'a> PdfSimpleFont<'a> { } encoding_table = Some(table) } else if subtype == "TrueType" { - encoding_table = Some(encodings::WIN_ANSI_ENCODING.iter() - .map(|x| if let &Some(x) = x { glyphnames::name_to_unicode(x).unwrap() } else { 0 }) - .collect()); + encoding_table = Some( + encodings::WIN_ANSI_ENCODING + .iter() + .map(|x| { + if let &Some(x) = x { + glyphnames::name_to_unicode(x).unwrap() + } else { + 0 + } + }) + .collect(), + ); } } - _ => { panic!() } + _ => { + panic!() + } } let mut width_map = HashMap::new(); /* "Ordinarily, a font dictionary that refers to one of the standard fonts - should omit the FirstChar, LastChar, Widths, and FontDescriptor entries. - However, it is permissible to override a standard font by including these - entries and embedding the font program in the PDF file." + should omit the FirstChar, LastChar, Widths, and FontDescriptor entries. + However, it is permissible to override a standard font by including these + entries and embedding the font program in the PDF file." - Note: some PDFs include a descriptor but still don't include these entries */ + Note: some PDFs include a descriptor but still don't include these entries */ // If we have widths prefer them over the core font widths. Needed for https://dkp.de/wp-content/uploads/parteitage/Sozialismusvorstellungen-der-DKP.pdf - if let (Some(first_char), Some(last_char), Some(widths)) = (maybe_get::(doc, font, b"FirstChar"), maybe_get::(doc, font, b"LastChar"), maybe_get::>(doc, font, b"Widths")) { + if let (Some(first_char), Some(last_char), Some(widths)) = ( + maybe_get::(doc, font, b"FirstChar"), + maybe_get::(doc, font, b"LastChar"), + maybe_get::>(doc, font, b"Widths"), + ) { // Some PDF's don't have these like fips-197.pdf let mut i: i64 = 0; - dlog!("first_char {:?}, last_char: {:?}, widths: {} {:?}", first_char, last_char, widths.len(), widths); + dlog!( + "first_char {:?}, last_char: {:?}, widths: {} {:?}", + first_char, + last_char, + widths.len(), + widths + ); for w in widths { width_map.insert((first_char + i) as CharCode, w); @@ -541,7 +629,8 @@ impl<'a> PdfSimpleFont<'a> { // -1 is "not encoded" if w.0 != -1 { table[w.0 as usize] = if base_name == "ZapfDingbats" { - zapfglyphnames::zapfdigbats_names_to_unicode(w.2).unwrap_or_else(|| panic!("bad name {:?}", w)) + zapfglyphnames::zapfdigbats_names_to_unicode(w.2) + .unwrap_or_else(|| panic!("bad name {:?}", w)) } else { glyphnames::name_to_unicode(w.2).unwrap() } @@ -556,11 +645,11 @@ impl<'a> PdfSimpleFont<'a> { encoding_table = Some(encoding.to_vec()); } /* "Ordinarily, a font dictionary that refers to one of the standard fonts - should omit the FirstChar, LastChar, Widths, and FontDescriptor entries. - However, it is permissible to override a standard font by including these - entries and embedding the font program in the PDF file." + should omit the FirstChar, LastChar, Widths, and FontDescriptor entries. + However, it is permissible to override a standard font by including these + entries and embedding the font program in the PDF file." - Note: some PDFs include a descriptor but still don't include these entries */ + Note: some PDFs include a descriptor but still don't include these entries */ // assert!(maybe_get_obj(doc, font, b"FirstChar").is_none()); // assert!(maybe_get_obj(doc, font, b"LastChar").is_none()); // assert!(maybe_get_obj(doc, font, b"Widths").is_none()); @@ -570,7 +659,14 @@ impl<'a> PdfSimpleFont<'a> { panic!("no widths"); } - PdfSimpleFont {doc, font, widths: width_map, encoding: encoding_table, default_width: None, unicode_map} + PdfSimpleFont { + doc, + font, + widths: width_map, + encoding: encoding_table, + default_width: None, + unicode_map, + } } #[allow(dead_code)] @@ -587,7 +683,8 @@ impl<'a> PdfSimpleFont<'a> { } #[allow(dead_code)] fn get_widths(&self) -> Option<&Vec> { - maybe_get_obj(self.doc, self.font, b"Widths").map(|widths| widths.as_array().expect("Widths should be an array")) + maybe_get_obj(self.doc, self.font, b"Widths") + .map(|widths| widths.as_array().expect("Widths should be an array")) } /* For type1: This entry is obsolescent and its use is no longer recommended. (See * implementation note 42 in Appendix H.) */ @@ -598,15 +695,17 @@ impl<'a> PdfSimpleFont<'a> { #[allow(dead_code)] fn get_descriptor(&self) -> Option { - maybe_get_obj(self.doc, self.font, b"FontDescriptor").and_then(|desc| desc.as_dict().ok()).map(|desc| PdfFontDescriptor{desc: desc, doc: self.doc}) + maybe_get_obj(self.doc, self.font, b"FontDescriptor") + .and_then(|desc| desc.as_dict().ok()) + .map(|desc| PdfFontDescriptor { + desc: desc, + doc: self.doc, + }) } } - - impl<'a> PdfType3Font<'a> { fn new(doc: &'a Document, font: &'a Dictionary) -> PdfType3Font<'a> { - let unicode_map = get_unicode_map(doc, font); let encoding: Option<&Object> = get(doc, font, b"Encoding"); @@ -618,25 +717,28 @@ impl<'a> PdfType3Font<'a> { } Some(&Object::Dictionary(ref encoding)) => { //dlog!("Encoding {:?}", encoding); - let mut table = if let Some(base_encoding) = maybe_get_name(doc, encoding, b"BaseEncoding") { - dlog!("BaseEncoding {:?}", base_encoding); - encoding_to_unicode_table(base_encoding) - } else { - Vec::from(PDFDocEncoding) - }; + let mut table = + if let Some(base_encoding) = maybe_get_name(doc, encoding, b"BaseEncoding") { + dlog!("BaseEncoding {:?}", base_encoding); + encoding_to_unicode_table(base_encoding) + } else { + Vec::from(PDFDocEncoding) + }; let differences = maybe_get_array(doc, encoding, b"Differences"); if let Some(differences) = differences { dlog!("Differences"); let mut code = 0; for o in differences { match o { - &Object::Integer(i) => { code = i; }, + &Object::Integer(i) => { + code = i; + } &Object::Name(ref n) => { let name = pdf_to_utf8(&n); // XXX: names of Type1 fonts can map to arbitrary strings instead of real // unicode names, so we should probably handle this differently let unicode = glyphnames::name_to_unicode(&name); - if let Some(unicode) = unicode{ + if let Some(unicode) = unicode { table[code as usize] = unicode; } dlog!("{} = {} ({:?})", code, name, unicode); @@ -645,7 +747,9 @@ impl<'a> PdfType3Font<'a> { } code += 1; } - _ => { panic!("wrong type"); } + _ => { + panic!("wrong type"); + } } } } @@ -658,7 +762,9 @@ impl<'a> PdfType3Font<'a> { encoding_table = Some(table); } - _ => { panic!() } + _ => { + panic!() + } } let first_char: i64 = get(doc, font, b"FirstChar"); @@ -668,21 +774,32 @@ impl<'a> PdfType3Font<'a> { let mut width_map = HashMap::new(); let mut i = 0; - dlog!("first_char {:?}, last_char: {:?}, widths: {} {:?}", first_char, last_char, widths.len(), widths); + dlog!( + "first_char {:?}, last_char: {:?}, widths: {} {:?}", + first_char, + last_char, + widths.len(), + widths + ); for w in widths { width_map.insert((first_char + i) as CharCode, w); i += 1; } assert_eq!(first_char + i - 1, last_char); - PdfType3Font {doc, font, widths: width_map, encoding: encoding_table, unicode_map} + PdfType3Font { + doc, + font, + widths: width_map, + encoding: encoding_table, + unicode_map, + } } } type CharCode = u32; -struct PdfFontIter<'a> -{ +struct PdfFontIter<'a> { i: Iter<'a, u8>, font: &'a dyn PdfFont, } @@ -694,30 +811,33 @@ impl<'a> Iterator for PdfFontIter<'a> { } } -trait PdfFont : Debug { +trait PdfFont: Debug { fn get_width(&self, id: CharCode) -> f64; fn next_char(&self, iter: &mut Iter) -> Option<(CharCode, u8)>; fn decode_char(&self, char: CharCode) -> String; - /*fn char_codes<'a>(&'a self, chars: &'a [u8]) -> PdfFontIter { - let p = self; - PdfFontIter{i: chars.iter(), font: p as &PdfFont} - }*/ - + /*fn char_codes<'a>(&'a self, chars: &'a [u8]) -> PdfFontIter { + let p = self; + PdfFontIter{i: chars.iter(), font: p as &PdfFont} + }*/ } impl<'a> dyn PdfFont + 'a { fn char_codes(&'a self, chars: &'a [u8]) -> PdfFontIter { - PdfFontIter{i: chars.iter(), font: self} + PdfFontIter { + i: chars.iter(), + font: self, + } } fn decode(&self, chars: &[u8]) -> String { - let strings = self.char_codes(chars).map(|x| self.decode_char(x.0)).collect::>(); + let strings = self + .char_codes(chars) + .map(|x| self.decode_char(x.0)) + .collect::>(); strings.join("") } - } - impl<'a> PdfFont for PdfSimpleFont<'a> { fn get_width(&self, id: CharCode) -> f64 { let width = self.widths.get(&id); @@ -726,7 +846,13 @@ impl<'a> PdfFont for PdfSimpleFont<'a> { } else { let mut widths = self.widths.iter().collect::>(); widths.sort_by_key(|x| x.0); - dlog!("missing width for {} len(widths) = {}, {:?} falling back to default_width {:?}", id, self.widths.len(), widths, self.font); + dlog!( + "missing width for {} len(widths) = {}, {:?} falling back to default_width {:?}", + id, + self.widths.len(), + widths, + self.font + ); return self.default_width.unwrap(); } } @@ -743,20 +869,27 @@ impl<'a> PdfFont for PdfSimpleFont<'a> { if let Some(ref unicode_map) = self.unicode_map { let s = unicode_map.get(&char); let s = match s { - None => { panic!("missing char {:?} in map {:?} for {:?}", char, unicode_map, self.font)} - Some(s) => { s.clone() } + None => { + panic!( + "missing char {:?} in map {:?} for {:?}", + char, unicode_map, self.font + ) + } + Some(s) => s.clone(), }; - return s + return s; } - let encoding = self.encoding.as_ref().map(|x| &x[..]).unwrap_or(&PDFDocEncoding); + let encoding = self + .encoding + .as_ref() + .map(|x| &x[..]) + .unwrap_or(&PDFDocEncoding); //dlog!("char_code {:?} {:?}", char, self.encoding); let s = to_utf8(encoding, &slice); s } } - - impl<'a> fmt::Debug for PdfSimpleFont<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { self.font.fmt(f) @@ -785,20 +918,24 @@ impl<'a> PdfFont for PdfType3Font<'a> { if let Some(ref unicode_map) = self.unicode_map { let s = unicode_map.get(&char); let s = match s { - None => { panic!("missing char {:?} in map {:?}", char, unicode_map)} - Some(s) => { s.clone() } + None => { + panic!("missing char {:?} in map {:?}", char, unicode_map) + } + Some(s) => s.clone(), }; - return s + return s; } - let encoding = self.encoding.as_ref().map(|x| &x[..]).unwrap_or(&PDFDocEncoding); + let encoding = self + .encoding + .as_ref() + .map(|x| &x[..]) + .unwrap_or(&PDFDocEncoding); //dlog!("char_code {:?} {:?}", char, self.encoding); let s = to_utf8(encoding, &slice); s } } - - impl<'a> fmt::Debug for PdfType3Font<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { self.font.fmt(f) @@ -835,11 +972,11 @@ fn get_unicode_map<'a>(doc: &'a Document, font: &'a Dictionary) -> Option { + [0xd800..=0xdfff] => { // this range is not specified as not being encoded // we ignore them so we don't an error from from_utt16 continue; @@ -854,23 +991,28 @@ fn get_unicode_map<'a>(doc: &'a Document, font: &'a Dictionary) -> Option { } + None => {} Some(&Object::Name(ref name)) => { let name = pdf_to_utf8(name); assert!(name == "Identity-H"); } - _ => { panic!("unsupported cmap {:?}", to_unicode)} + _ => { + panic!("unsupported cmap {:?}", to_unicode) + } } unicode_map } - impl<'a> PdfCIDFont<'a> { fn new(doc: &'a Document, font: &'a Dictionary) -> PdfCIDFont<'a> { let base_name = get_name_string(doc, font, b"BaseFont"); - let descendants = maybe_get_array(doc, font, b"DescendantFonts").expect("Descendant fonts required"); - let ciddict = maybe_deref(doc, &descendants[0]).as_dict().expect("should be CID dict"); - let encoding = maybe_get_obj(doc, font, b"Encoding").expect("Encoding required in type0 fonts"); + let descendants = + maybe_get_array(doc, font, b"DescendantFonts").expect("Descendant fonts required"); + let ciddict = maybe_deref(doc, &descendants[0]) + .as_dict() + .expect("should be CID dict"); + let encoding = + maybe_get_obj(doc, font, b"Encoding").expect("Encoding required in type0 fonts"); dlog!("base_name {} {:?}", base_name, font); match encoding { @@ -883,7 +1025,9 @@ impl<'a> PdfCIDFont<'a> { let contents = get_contents(stream); dlog!("Stream: {}", String::from_utf8(contents.clone()).unwrap()); } - _ => { panic!("unsupported encoding {:?}", encoding)} + _ => { + panic!("unsupported encoding {:?}", encoding) + } } // Sometimes a Type0 font might refer to the same underlying data as regular font. In this case we may be able to extract some encoding @@ -905,12 +1049,12 @@ impl<'a> PdfCIDFont<'a> { let mut i = 0; if let Some(w) = w { while i < w.len() { - if let &Object::Array(ref wa) = w[i+1] { + if let &Object::Array(ref wa) = w[i + 1] { let cid = w[i].as_i64().expect("id should be num"); let mut j = 0; dlog!("wa: {:?} -> {:?}", cid, wa); for w in wa { - widths.insert((cid + j) as CharCode, as_num(w) ); + widths.insert((cid + j) as CharCode, as_num(w)); j += 1; } i += 2; @@ -925,7 +1069,14 @@ impl<'a> PdfCIDFont<'a> { } } } - PdfCIDFont{doc, font, widths, to_unicode: unicode_map, encoding: None, default_width: Some(default_width as f64) } + PdfCIDFont { + doc, + font, + widths, + to_unicode: unicode_map, + encoding: None, + default_width: Some(default_width as f64), + } } } @@ -939,21 +1090,21 @@ impl<'a> PdfFont for PdfCIDFont<'a> { dlog!("missing width for {} falling back to default_width", id); return self.default_width.unwrap(); } - }/* - fn decode(&self, chars: &[u8]) -> String { - self.char_codes(chars); + } /* + fn decode(&self, chars: &[u8]) -> String { + self.char_codes(chars); - //let utf16 = Vec::new(); + //let utf16 = Vec::new(); - let encoding = self.encoding.as_ref().map(|x| &x[..]).unwrap_or(&PDFDocEncoding); - to_utf8(encoding, chars) - }*/ + let encoding = self.encoding.as_ref().map(|x| &x[..]).unwrap_or(&PDFDocEncoding); + to_utf8(encoding, chars) + }*/ fn next_char(&self, iter: &mut Iter) -> Option<(CharCode, u8)> { let p = iter.next(); if let Some(&c) = p { let next = *iter.next().unwrap(); - Some((((c as u32) << 8) | next as u32, 2)) + Some((((c as u32) << 8) | next as u32, 2)) } else { None } @@ -963,7 +1114,12 @@ impl<'a> PdfFont for PdfCIDFont<'a> { if let Some(s) = s { s.clone() } else { - dlog!("Unknown character {:?} in {:?} {:?}", char, self.font, self.to_unicode); + dlog!( + "Unknown character {:?} in {:?} {:?}", + char, + self.font, + self.to_unicode + ); "".to_string() } } @@ -975,12 +1131,10 @@ impl<'a> fmt::Debug for PdfCIDFont<'a> { } } - - #[derive(Copy, Clone)] struct PdfFontDescriptor<'a> { desc: &'a Dictionary, - doc: &'a Document + doc: &'a Document, } impl<'a> PdfFontDescriptor<'a> { @@ -1024,7 +1178,6 @@ impl Type0Func { fn eval(&self, _input: &[f64], _output: &mut [f64]) { let _n_inputs = self.domain.len() / 2; let _n_ouputs = self.range.len() / 2; - } } @@ -1042,7 +1195,7 @@ enum Function { #[allow(dead_code)] Type3, #[allow(dead_code)] - Type4 + Type4, } impl Function { @@ -1050,14 +1203,14 @@ impl Function { let dict = match obj { &Object::Dictionary(ref dict) => dict, &Object::Stream(ref stream) => &stream.dict, - _ => panic!() + _ => panic!(), }; let function_type: i64 = get(doc, dict, b"FunctionType"); let f = match function_type { 0 => { let stream = match obj { &Object::Stream(ref stream) => stream, - _ => panic!() + _ => panic!(), }; let range: Vec = get(doc, dict, b"Range"); let domain: Vec = get(doc, dict, b"Domain"); @@ -1075,17 +1228,28 @@ impl Function { } default }); - let decode = get::>>(doc, dict, b"Decode").unwrap_or_else(|| range.clone()); - - Function::Type0(Type0Func { domain, range, size, contents, bits_per_sample, encode, decode }) + let decode = + get::>>(doc, dict, b"Decode").unwrap_or_else(|| range.clone()); + + Function::Type0(Type0Func { + domain, + range, + size, + contents, + bits_per_sample, + encode, + decode, + }) } 2 => { let c0 = get::>>(doc, dict, b"C0"); let c1 = get::>>(doc, dict, b"C1"); let n = get::(doc, dict, b"N"); - Function::Type2(Type2Func { c0, c1, n}) + Function::Type2(Type2Func { c0, c1, n }) + } + _ => { + panic!("unhandled function type {}", function_type) } - _ => { panic!("unhandled function type {}", function_type) } }; f } @@ -1093,15 +1257,16 @@ impl Function { fn as_num(o: &Object) -> f64 { match o { - &Object::Integer(i) => { i as f64 } - &Object::Real(f) => { f.into() } - _ => { panic!("not a number") } + &Object::Integer(i) => i as f64, + &Object::Real(f) => f.into(), + _ => { + panic!("not a number") + } } } #[derive(Clone)] -struct TextState<'a> -{ +struct TextState<'a> { font: Option>, font_size: f64, character_spacing: f64, @@ -1115,15 +1280,16 @@ struct TextState<'a> // XXX: We'd ideally implement this without having to copy the uncompressed data fn get_contents(contents: &Stream) -> Vec { if contents.filter().is_ok() { - contents.decompressed_content().unwrap_or_else(|_|contents.content.clone()) + contents + .decompressed_content() + .unwrap_or_else(|_| contents.content.clone()) } else { contents.content.clone() } } #[derive(Clone)] -struct GraphicsState<'a> -{ +struct GraphicsState<'a> { ctm: Transform, ts: TextState<'a>, smask: Option<&'a Dictionary>, @@ -1134,10 +1300,13 @@ struct GraphicsState<'a> line_width: f64, } -fn show_text(gs: &mut GraphicsState, s: &[u8], - _tlm: &Transform, - _flip_ctm: &Transform, - output: &mut dyn OutputDev) -> Result<(), OutputError> { +fn show_text( + gs: &mut GraphicsState, + s: &[u8], + _tlm: &Transform, + _flip_ctm: &Transform, + output: &mut dyn OutputDev, +) -> Result<(), OutputError> { let ts = &mut gs.ts; let font = ts.font.as_ref().unwrap(); //let encoding = font.encoding.as_ref().map(|x| &x[..]).unwrap_or(&PDFDocEncoding); @@ -1147,19 +1316,13 @@ fn show_text(gs: &mut GraphicsState, s: &[u8], output.begin_word()?; for (c, length) in font.char_codes(s) { - let tsm = Transform2D::row_major(ts.horizontal_scaling, - 0., - 0., - 1.0, - 0., - ts.rise); + let tsm = Transform2D::row_major(ts.horizontal_scaling, 0., 0., 1.0, 0., ts.rise); let trm = ts.tm.pre_transform(&gs.ctm); let trm = trm.post_transform(&tsm); //dlog!("ctm: {:?} tm {:?}", gs.ctm, tm); //dlog!("current pos: {:?}", position); // 5.9 Extraction of Text Content - //dlog!("w: {}", font.widths[&(*c as i64)]); let w0 = font.get_width(c) / 1000.; @@ -1169,18 +1332,28 @@ fn show_text(gs: &mut GraphicsState, s: &[u8], // single-byte code. It does not apply to occurrences of the byte value 32 in // multiple-byte codes." let is_space = c == 32 && length == 1; - if is_space { spacing += ts.word_spacing } + if is_space { + spacing += ts.word_spacing + } output.output_character(&trm, w0, spacing, ts.font_size, &font.decode_char(c))?; let tj = 0.; let ty = 0.; - let tx = ts.horizontal_scaling * ((w0 - tj/1000.)* ts.font_size + spacing); - dlog!("horizontal {} adjust {} {} {} {}", ts.horizontal_scaling, tx, w0, ts.font_size, spacing); + let tx = ts.horizontal_scaling * ((w0 - tj / 1000.) * ts.font_size + spacing); + dlog!( + "horizontal {} adjust {} {} {} {}", + ts.horizontal_scaling, + tx, + w0, + ts.font_size, + spacing + ); // dlog!("w0: {}, tx: {}", w0, tx); - ts.tm = ts.tm.pre_transform(&Transform2D::create_translation(tx, ty)); + ts.tm = ts + .tm + .pre_transform(&Transform2D::create_translation(tx, ty)); let _trm = ts.tm.pre_transform(&gs.ctm); //dlog!("post pos: {:?}", trm); - } output.end_word()?; Ok(()) @@ -1191,14 +1364,14 @@ pub struct MediaBox { pub llx: f64, pub lly: f64, pub urx: f64, - pub ury: f64 + pub ury: f64, } fn apply_state(gs: &mut GraphicsState, state: &Dictionary) { for (k, v) in state.iter() { - let k : &[u8] = k.as_ref(); + let k: &[u8] = k.as_ref(); match k { - b"SMask" => { match v { + b"SMask" => match v { &Object::Name(ref name) => { if name == b"None" { gs.smask = None; @@ -1206,18 +1379,23 @@ fn apply_state(gs: &mut GraphicsState, state: &Dictionary) { panic!("unexpected smask name") } } - _ => { panic!("unexpected smask type {:?}", v) } - }} - b"Type" => { match v { + _ => { + panic!("unexpected smask type {:?}", v) + } + }, + b"Type" => match v { &Object::Name(ref name) => { assert_eq!(name, b"ExtGState") } - _ => { panic!("unexpected type") } - }} - _ => { dlog!("unapplied state: {:?} {:?}", k, v); } + _ => { + panic!("unexpected type") + } + }, + _ => { + dlog!("unapplied state: {:?} {:?}", k, v); + } } } - } #[derive(Debug)] @@ -1232,7 +1410,7 @@ pub enum PathOp { #[derive(Debug)] pub struct Path { - pub ops: Vec + pub ops: Vec, } impl Path { @@ -1241,10 +1419,12 @@ impl Path { } fn current_point(&self) -> (f64, f64) { match self.ops.last().unwrap() { - &PathOp::MoveTo(x, y) => { (x, y) } - &PathOp::LineTo(x, y) => { (x, y) } - &PathOp::CurveTo(_, _, _, _, x, y) => { (x, y) } - _ => { panic!() } + &PathOp::MoveTo(x, y) => (x, y), + &PathOp::LineTo(x, y) => (x, y), + &PathOp::CurveTo(_, _, _, _, x, y) => (x, y), + _ => { + panic!() + } } } } @@ -1261,7 +1441,7 @@ pub struct CalRGB { white_point: [f64; 3], black_point: Option<[f64; 3]>, gamma: Option<[f64; 3]>, - matrix: Option> + matrix: Option>, } #[derive(Clone, Debug)] @@ -1279,7 +1459,7 @@ pub enum AlternateColorSpace { CalRGB(CalRGB), CalGray(CalGray), Lab(Lab), - ICCBased(Vec) + ICCBased(Vec), } #[derive(Clone)] @@ -1299,7 +1479,7 @@ pub enum ColorSpace { CalGray(CalGray), Lab(Lab), Separation(Separation), - ICCBased(Vec) + ICCBased(Vec), } fn make_colorspace<'a>(doc: &'a Document, name: &[u8], resources: &'a Dictionary) -> ColorSpace { @@ -1310,22 +1490,22 @@ fn make_colorspace<'a>(doc: &'a Document, name: &[u8], resources: &'a Dictionary b"Pattern" => ColorSpace::Pattern, _ => { let colorspaces: &Dictionary = get(&doc, resources, b"ColorSpace"); - let cs = maybe_get_array(doc, colorspaces, &name[..]).unwrap_or_else(|| panic!("missing colorspace {:?}", &name[..])); + let cs = maybe_get_array(doc, colorspaces, &name[..]) + .unwrap_or_else(|| panic!("missing colorspace {:?}", &name[..])); let cs_name = pdf_to_utf8(cs[0].as_name().expect("first arg must be a name")); match cs_name.as_ref() { "Separation" => { let name = pdf_to_utf8(cs[1].as_name().expect("second arg must be a name")); let alternate_space = match &maybe_deref(doc, &cs[2]) { - Object::Name(name) => { - match &name[..] { - b"DeviceGray" => AlternateColorSpace::DeviceGray, - b"DeviceRGB" => AlternateColorSpace::DeviceRGB, - b"DeviceCMYK" => AlternateColorSpace::DeviceCMYK, - _ => panic!("unexpected color space name") - } - } + Object::Name(name) => match &name[..] { + b"DeviceGray" => AlternateColorSpace::DeviceGray, + b"DeviceRGB" => AlternateColorSpace::DeviceRGB, + b"DeviceCMYK" => AlternateColorSpace::DeviceCMYK, + _ => panic!("unexpected color space name"), + }, Object::Array(cs) => { - let cs_name = pdf_to_utf8(cs[0].as_name().expect("first arg must be a name")); + let cs_name = + pdf_to_utf8(cs[0].as_name().expect("first arg must be a name")); match cs_name.as_ref() { "ICCBased" => { let stream = maybe_deref(doc, &cs[1]).as_stream().unwrap(); @@ -1358,15 +1538,19 @@ fn make_colorspace<'a>(doc: &'a Document, name: &[u8], resources: &'a Dictionary range: get(&doc, dict, b"Range"), }) } - _ => panic!("Unexpected color space name") + _ => panic!("Unexpected color space name"), } } - _ => panic!("Alternate space should be name or array {:?}", cs[2]) + _ => panic!("Alternate space should be name or array {:?}", cs[2]), }; let tint_transform = Box::new(Function::new(doc, maybe_deref(doc, &cs[3]))); dlog!("{:?} {:?} {:?}", name, alternate_space, tint_transform); - ColorSpace::Separation(Separation{ name, alternate_space, tint_transform}) + ColorSpace::Separation(Separation { + name, + alternate_space, + tint_transform, + }) } "ICCBased" => { let stream = maybe_deref(doc, &cs[1]).as_stream().unwrap(); @@ -1399,9 +1583,7 @@ fn make_colorspace<'a>(doc: &'a Document, name: &[u8], resources: &'a Dictionary range: get(&doc, dict, b"Range"), }) } - "Pattern" => { - ColorSpace::Pattern - } + "Pattern" => ColorSpace::Pattern, _ => { panic!("color_space {:?} {:?} {:?}", name, cs_name, cs) } @@ -1411,7 +1593,7 @@ fn make_colorspace<'a>(doc: &'a Document, name: &[u8], resources: &'a Dictionary } struct Processor<'a> { - _none: PhantomData<&'a ()> + _none: PhantomData<&'a ()>, } impl<'a> Processor<'a> { @@ -1419,7 +1601,15 @@ impl<'a> Processor<'a> { Processor { _none: PhantomData } } - fn process_stream(&mut self, doc: &'a Document, content: Vec, resources: &'a Dictionary, media_box: &MediaBox, output: &mut dyn OutputDev, page_num: u32) -> Result<(), OutputError> { + fn process_stream( + &mut self, + doc: &'a Document, + content: Vec, + resources: &'a Dictionary, + media_box: &MediaBox, + output: &mut dyn OutputDev, + page_num: u32, + ) -> Result<(), OutputError> { let content = Content::decode(&content).unwrap(); let mut font_table = HashMap::new(); let mut gs: GraphicsState = GraphicsState { @@ -1439,7 +1629,7 @@ impl<'a> Processor<'a> { stroke_colorspace: ColorSpace::DeviceGray, line_width: 1., ctm: Transform2D::identity(), - smask: None + smask: None, }; //let mut ts = &mut gs.ts; let mut gs_stack = Vec::new(); @@ -1463,12 +1653,14 @@ impl<'a> Processor<'a> { } "cm" => { assert!(operation.operands.len() == 6); - let m = Transform2D::row_major(as_num(&operation.operands[0]), - as_num(&operation.operands[1]), - as_num(&operation.operands[2]), - as_num(&operation.operands[3]), - as_num(&operation.operands[4]), - as_num(&operation.operands[5])); + let m = Transform2D::row_major( + as_num(&operation.operands[0]), + as_num(&operation.operands[1]), + as_num(&operation.operands[2]), + as_num(&operation.operands[3]), + as_num(&operation.operands[4]), + as_num(&operation.operands[5]), + ); gs.ctm = gs.ctm.pre_transform(&m); dlog!("matrix {:?}", gs.ctm); } @@ -1482,60 +1674,72 @@ impl<'a> Processor<'a> { } "SC" | "SCN" => { gs.stroke_color = match gs.stroke_colorspace { - ColorSpace::Pattern => { dlog!("unhandled pattern color"); Vec::new() } - _ => { operation.operands.iter().map(|x| as_num(x)).collect() } + ColorSpace::Pattern => { + dlog!("unhandled pattern color"); + Vec::new() + } + _ => operation.operands.iter().map(|x| as_num(x)).collect(), }; } "sc" | "scn" => { gs.fill_color = match gs.fill_colorspace { - ColorSpace::Pattern => { dlog!("unhandled pattern color"); Vec::new() } - _ => { operation.operands.iter().map(|x| as_num(x)).collect() } + ColorSpace::Pattern => { + dlog!("unhandled pattern color"); + Vec::new() + } + _ => operation.operands.iter().map(|x| as_num(x)).collect(), }; } "G" | "g" | "RG" | "rg" | "K" | "k" => { dlog!("unhandled color operation {:?}", operation); } - "TJ" => { - match operation.operands[0] { - Object::Array(ref array) => { - for e in array { - match e { - &Object::String(ref s, _) => { - show_text(&mut gs, s, &tlm, &flip_ctm, output)?; - } - &Object::Integer(i) => { - let ts = &mut gs.ts; - let w0 = 0.; - let tj = i as f64; - let ty = 0.; - let tx = ts.horizontal_scaling * ((w0 - tj / 1000.) * ts.font_size); - ts.tm = ts.tm.pre_transform(&Transform2D::create_translation(tx, ty)); - dlog!("adjust text by: {} {:?}", i, ts.tm); - } - &Object::Real(i) => { - let ts = &mut gs.ts; - let w0 = 0.; - let tj = i as f64; - let ty = 0.; - let tx = ts.horizontal_scaling * ((w0 - tj / 1000.) * ts.font_size); - ts.tm = ts.tm.pre_transform(&Transform2D::create_translation(tx, ty)); - dlog!("adjust text by: {} {:?}", i, ts.tm); - } - _ => { dlog!("kind of {:?}", e); } + "TJ" => match operation.operands[0] { + Object::Array(ref array) => { + for e in array { + match e { + &Object::String(ref s, _) => { + show_text(&mut gs, s, &tlm, &flip_ctm, output)?; + } + &Object::Integer(i) => { + let ts = &mut gs.ts; + let w0 = 0.; + let tj = i as f64; + let ty = 0.; + let tx = + ts.horizontal_scaling * ((w0 - tj / 1000.) * ts.font_size); + ts.tm = ts + .tm + .pre_transform(&Transform2D::create_translation(tx, ty)); + dlog!("adjust text by: {} {:?}", i, ts.tm); + } + &Object::Real(i) => { + let ts = &mut gs.ts; + let w0 = 0.; + let tj = i as f64; + let ty = 0.; + let tx = + ts.horizontal_scaling * ((w0 - tj / 1000.) * ts.font_size); + ts.tm = ts + .tm + .pre_transform(&Transform2D::create_translation(tx, ty)); + dlog!("adjust text by: {} {:?}", i, ts.tm); + } + _ => { + dlog!("kind of {:?}", e); } } } - _ => {} } - } - "Tj" => { - match operation.operands[0] { - Object::String(ref s, _) => { - show_text(&mut gs, s, &tlm, &flip_ctm, output)?; - } - _ => { panic!("unexpected Tj operand {:?}", operation) } + _ => {} + }, + "Tj" => match operation.operands[0] { + Object::String(ref s, _) => { + show_text(&mut gs, s, &tlm, &flip_ctm, output)?; } - } + _ => { + panic!("unexpected Tj operand {:?}", operation) + } + }, "Tc" => { gs.ts.character_spacing = as_num(&operation.operands[0]); } @@ -1551,41 +1755,51 @@ impl<'a> Processor<'a> { "Tf" => { let fonts: &Dictionary = get(&doc, resources, b"Font"); let name = operation.operands[0].as_name().unwrap(); - let font = font_table.entry(name.to_owned()).or_insert_with(|| make_font(doc, get::<&Dictionary>(doc, fonts, name))).clone(); + let font = font_table + .entry(name.to_owned()) + .or_insert_with(|| make_font(doc, get::<&Dictionary>(doc, fonts, name))) + .clone(); { /*let file = font.get_descriptor().and_then(|desc| desc.get_file()); - if let Some(file) = file { - let file_contents = filter_data(file.as_stream().unwrap()); - let mut cursor = Cursor::new(&file_contents[..]); - //let f = Font::read(&mut cursor); - //dlog!("font file: {:?}", f); - }*/ + if let Some(file) = file { + let file_contents = filter_data(file.as_stream().unwrap()); + let mut cursor = Cursor::new(&file_contents[..]); + //let f = Font::read(&mut cursor); + //dlog!("font file: {:?}", f); + }*/ } gs.ts.font = Some(font); gs.ts.font_size = as_num(&operation.operands[1]); - dlog!("font {} size: {} {:?}", pdf_to_utf8(name), gs.ts.font_size, operation); + dlog!( + "font {} size: {} {:?}", + pdf_to_utf8(name), + gs.ts.font_size, + operation + ); } "Ts" => { gs.ts.rise = as_num(&operation.operands[0]); } "Tm" => { assert!(operation.operands.len() == 6); - tlm = Transform2D::row_major(as_num(&operation.operands[0]), - as_num(&operation.operands[1]), - as_num(&operation.operands[2]), - as_num(&operation.operands[3]), - as_num(&operation.operands[4]), - as_num(&operation.operands[5])); + tlm = Transform2D::row_major( + as_num(&operation.operands[0]), + as_num(&operation.operands[1]), + as_num(&operation.operands[2]), + as_num(&operation.operands[3]), + as_num(&operation.operands[4]), + as_num(&operation.operands[5]), + ); gs.ts.tm = tlm; dlog!("Tm: matrix {:?}", gs.ts.tm); output.end_line()?; } "Td" => { /* Move to the start of the next line, offset from the start of the current line by (tx , ty ). - tx and ty are numbers expressed in unscaled text space units. - More precisely, this operator performs the following assignments: - */ + tx and ty are numbers expressed in unscaled text space units. + More precisely, this operator performs the following assignments: + */ assert!(operation.operands.len() == 2); let tx = as_num(&operation.operands[0]); let ty = as_num(&operation.operands[1]); @@ -1599,8 +1813,8 @@ impl<'a> Processor<'a> { "TD" => { /* Move to the start of the next line, offset from the start of the current line by (tx , ty ). - As a side effect, this operator sets the leading parameter in the text state. - */ + As a side effect, this operator sets the leading parameter in the text state. + */ assert!(operation.operands.len() == 2); let tx = as_num(&operation.operands[0]); let ty = as_num(&operation.operands[1]); @@ -1622,7 +1836,9 @@ impl<'a> Processor<'a> { dlog!("T* matrix {:?}", gs.ts.tm); output.end_line()?; } - "q" => { gs_stack.push(gs.clone()); } + "q" => { + gs_stack.push(gs.clone()); + } "Q" => { let s = gs_stack.pop(); if let Some(s) = s { @@ -1637,46 +1853,60 @@ impl<'a> Processor<'a> { let state: &Dictionary = get(doc, ext_gstate, name); apply_state(&mut gs, state); } - "i" => { dlog!("unhandled graphics state flattness operator {:?}", operation); } - "w" => { gs.line_width = as_num(&operation.operands[0]); } - "J" | "j" | "M" | "d" | "ri" => { dlog!("unknown graphics state operator {:?}", operation); } - "m" => { path.ops.push(PathOp::MoveTo(as_num(&operation.operands[0]), as_num(&operation.operands[1]))) } - "l" => { path.ops.push(PathOp::LineTo(as_num(&operation.operands[0]), as_num(&operation.operands[1]))) } - "c" => { - path.ops.push(PathOp::CurveTo( - as_num(&operation.operands[0]), - as_num(&operation.operands[1]), - as_num(&operation.operands[2]), - as_num(&operation.operands[3]), - as_num(&operation.operands[4]), - as_num(&operation.operands[5]))) + "i" => { + dlog!( + "unhandled graphics state flattness operator {:?}", + operation + ); + } + "w" => { + gs.line_width = as_num(&operation.operands[0]); + } + "J" | "j" | "M" | "d" | "ri" => { + dlog!("unknown graphics state operator {:?}", operation); } + "m" => path.ops.push(PathOp::MoveTo( + as_num(&operation.operands[0]), + as_num(&operation.operands[1]), + )), + "l" => path.ops.push(PathOp::LineTo( + as_num(&operation.operands[0]), + as_num(&operation.operands[1]), + )), + "c" => path.ops.push(PathOp::CurveTo( + as_num(&operation.operands[0]), + as_num(&operation.operands[1]), + as_num(&operation.operands[2]), + as_num(&operation.operands[3]), + as_num(&operation.operands[4]), + as_num(&operation.operands[5]), + )), "v" => { let (x, y) = path.current_point(); path.ops.push(PathOp::CurveTo( x, y, - as_num(&operation.operands[0]), - as_num(&operation.operands[1]), - as_num(&operation.operands[2]), - as_num(&operation.operands[3]))) - } - "y" => { - path.ops.push(PathOp::CurveTo( as_num(&operation.operands[0]), as_num(&operation.operands[1]), as_num(&operation.operands[2]), as_num(&operation.operands[3]), - as_num(&operation.operands[2]), - as_num(&operation.operands[3]))) - } - "h" => { path.ops.push(PathOp::Close) } - "re" => { - path.ops.push(PathOp::Rect(as_num(&operation.operands[0]), - as_num(&operation.operands[1]), - as_num(&operation.operands[2]), - as_num(&operation.operands[3]))) + )) } + "y" => path.ops.push(PathOp::CurveTo( + as_num(&operation.operands[0]), + as_num(&operation.operands[1]), + as_num(&operation.operands[2]), + as_num(&operation.operands[3]), + as_num(&operation.operands[2]), + as_num(&operation.operands[3]), + )), + "h" => path.ops.push(PathOp::Close), + "re" => path.ops.push(PathOp::Rect( + as_num(&operation.operands[0]), + as_num(&operation.operands[1]), + as_num(&operation.operands[2]), + as_num(&operation.operands[3]), + )), "s" | "f*" | "B" | "B*" | "b" => { dlog!("unhandled path op {:?}", operation); } @@ -1688,7 +1918,9 @@ impl<'a> Processor<'a> { output.fill(&gs.ctm, &gs.fill_colorspace, &gs.fill_color, &path)?; path.ops.clear(); } - "W" | "w*" => { dlog!("unhandled clipping operation {:?}", operation); } + "W" | "w*" => { + dlog!("unhandled clipping operation {:?}", operation); + } "n" => { dlog!("discard {:?}", path); path.ops.clear(); @@ -1705,38 +1937,67 @@ impl<'a> Processor<'a> { let xobject: &Dictionary = get(&doc, resources, b"XObject"); let name = operation.operands[0].as_name().unwrap(); let xf: &Stream = get(&doc, xobject, name); - let resources = maybe_get_obj(&doc, &xf.dict, b"Resources").and_then(|n| n.as_dict().ok()).unwrap_or(resources); + let resources = maybe_get_obj(&doc, &xf.dict, b"Resources") + .and_then(|n| n.as_dict().ok()) + .unwrap_or(resources); let contents = get_contents(xf); self.process_stream(&doc, contents, resources, &media_box, output, page_num)?; } - _ => { dlog!("unknown operation {:?}", operation); } - + _ => { + dlog!("unknown operation {:?}", operation); + } } } Ok(()) } } - pub trait OutputDev { - fn begin_page(&mut self, page_num: u32, media_box: &MediaBox, art_box: Option<(f64, f64, f64, f64)>)-> Result<(), OutputError>; - fn end_page(&mut self)-> Result<(), OutputError>; - fn output_character(&mut self, trm: &Transform, width: f64, spacing: f64, font_size: f64, char: &str) -> Result<(), OutputError>; - fn begin_word(&mut self)-> Result<(), OutputError>; - fn end_word(&mut self)-> Result<(), OutputError>; - fn end_line(&mut self)-> Result<(), OutputError>; - fn stroke(&mut self, _ctm: &Transform, _colorspace: &ColorSpace, _color: &[f64], _path: &Path)-> Result<(), OutputError> {Ok(())} - fn fill(&mut self, _ctm: &Transform, _colorspace: &ColorSpace, _color: &[f64], _path: &Path)-> Result<(), OutputError> {Ok(())} + fn begin_page( + &mut self, + page_num: u32, + media_box: &MediaBox, + art_box: Option<(f64, f64, f64, f64)>, + ) -> Result<(), OutputError>; + fn end_page(&mut self) -> Result<(), OutputError>; + fn output_character( + &mut self, + trm: &Transform, + width: f64, + spacing: f64, + font_size: f64, + char: &str, + ) -> Result<(), OutputError>; + fn begin_word(&mut self) -> Result<(), OutputError>; + fn end_word(&mut self) -> Result<(), OutputError>; + fn end_line(&mut self) -> Result<(), OutputError>; + fn stroke( + &mut self, + _ctm: &Transform, + _colorspace: &ColorSpace, + _color: &[f64], + _path: &Path, + ) -> Result<(), OutputError> { + Ok(()) + } + fn fill( + &mut self, + _ctm: &Transform, + _colorspace: &ColorSpace, + _color: &[f64], + _path: &Path, + ) -> Result<(), OutputError> { + Ok(()) + } } - -pub struct HTMLOutput<'a> { +pub struct HTMLOutput<'a> { file: &'a mut dyn std::io::Write, flip_ctm: Transform, last_ctm: Transform, buf_ctm: Transform, buf_font_size: f64, - buf: String + buf: String, } fn insert_nbsp(input: &str) -> String { @@ -1770,15 +2031,17 @@ impl<'a> HTMLOutput<'a> { buf_font_size: 0., } } - fn flush_string(&mut self) -> Result<(), OutputError>{ + fn flush_string(&mut self) -> Result<(), OutputError> { if self.buf.len() != 0 { - let position = self.buf_ctm.post_transform(&self.flip_ctm); - let transformed_font_size_vec = self.buf_ctm.transform_vector(vec2(self.buf_font_size, self.buf_font_size)); + let transformed_font_size_vec = self + .buf_ctm + .transform_vector(vec2(self.buf_font_size, self.buf_font_size)); // get the length of one sized of the square with the same area with a rectangle of size (x, y) - let transformed_font_size = (transformed_font_size_vec.x * transformed_font_size_vec.y).sqrt(); + let transformed_font_size = + (transformed_font_size_vec.x * transformed_font_size_vec.y).sqrt(); let (x, y) = (position.m31, position.m32); - println!("flush {} {:?}", self.buf, (x,y)); + println!("flush {} {:?}", self.buf, (x, y)); write!(self.file, "
{}
\n", x, y, transformed_font_size, insert_nbsp(&self.buf))?; @@ -1790,7 +2053,12 @@ impl<'a> HTMLOutput<'a> { type ArtBox = (f64, f64, f64, f64); impl<'a> OutputDev for HTMLOutput<'a> { - fn begin_page(&mut self, page_num: u32, media_box: &MediaBox, _: Option) -> Result<(), OutputError> { + fn begin_page( + &mut self, + page_num: u32, + media_box: &MediaBox, + _: Option, + ) -> Result<(), OutputError> { write!(self.file, " ")?; write!(self.file, "", page_num)?; write!(self.file, "
", page_num, media_box.ury - media_box.lly, media_box.urx - media_box.llx)?; @@ -1804,15 +2072,25 @@ impl<'a> OutputDev for HTMLOutput<'a> { write!(self.file, "
")?; Ok(()) } - fn output_character(&mut self, trm: &Transform, width: f64, spacing: f64, font_size: f64, char: &str) -> Result<(), OutputError>{ + fn output_character( + &mut self, + trm: &Transform, + width: f64, + spacing: f64, + font_size: f64, + char: &str, + ) -> Result<(), OutputError> { if trm.approx_eq(&self.last_ctm) { let position = trm.post_transform(&self.flip_ctm); let (x, y) = (position.m31, position.m32); - println!("accum {} {:?}", char, (x,y)); + println!("accum {} {:?}", char, (x, y)); self.buf += char; } else { - println!("flush {} {:?} {:?} {} {} {}", char, trm, self.last_ctm, width, font_size, spacing); + println!( + "flush {} {:?} {:?} {} {} {}", + char, trm, self.last_ctm, width, font_size, spacing + ); self.flush_string()?; self.buf = char.to_owned(); self.buf_font_size = font_size; @@ -1821,36 +2099,57 @@ impl<'a> OutputDev for HTMLOutput<'a> { let position = trm.post_transform(&self.flip_ctm); let transformed_font_size_vec = trm.transform_vector(vec2(font_size, font_size)); // get the length of one sized of the square with the same area with a rectangle of size (x, y) - let transformed_font_size = (transformed_font_size_vec.x * transformed_font_size_vec.y).sqrt(); + let transformed_font_size = + (transformed_font_size_vec.x * transformed_font_size_vec.y).sqrt(); let (x, y) = (position.m31, position.m32); write!(self.file, "
{}
", x, y, transformed_font_size, char)?; - self.last_ctm = trm.pre_transform(&Transform2D::create_translation(width * font_size + spacing, 0.)); + self.last_ctm = trm.pre_transform(&Transform2D::create_translation( + width * font_size + spacing, + 0., + )); Ok(()) } - fn begin_word(&mut self) -> Result<(), OutputError> {Ok(())} - fn end_word(&mut self) -> Result<(), OutputError> {Ok(())} - fn end_line(&mut self) -> Result<(), OutputError> {Ok(())} + fn begin_word(&mut self) -> Result<(), OutputError> { + Ok(()) + } + fn end_word(&mut self) -> Result<(), OutputError> { + Ok(()) + } + fn end_line(&mut self) -> Result<(), OutputError> { + Ok(()) + } } -pub struct SVGOutput<'a> { - file: &'a mut dyn std::io::Write +pub struct SVGOutput<'a> { + file: &'a mut dyn std::io::Write, } impl<'a> SVGOutput<'a> { pub fn new(file: &mut dyn std::io::Write) -> SVGOutput { - SVGOutput{file} + SVGOutput { file } } } impl<'a> OutputDev for SVGOutput<'a> { - fn begin_page(&mut self, _page_num: u32, media_box: &MediaBox, art_box: Option<(f64, f64, f64, f64)>) -> Result<(), OutputError> { + fn begin_page( + &mut self, + _page_num: u32, + media_box: &MediaBox, + art_box: Option<(f64, f64, f64, f64)>, + ) -> Result<(), OutputError> { let ver = 1.1; write!(self.file, "\n")?; if ver == 1.1 { - write!(self.file, r#""#)?; + write!( + self.file, + r#""# + )?; } else { - write!(self.file, r#""#)?; + write!( + self.file, + r#""# + )?; } if let Some(art_box) = art_box { let width = art_box.2 - art_box.0; @@ -1866,35 +2165,48 @@ impl<'a> OutputDev for SVGOutput<'a> { type Mat = Transform; let ctm = Mat::create_scale(1., -1.).post_translate(vec2(0., media_box.ury)); - write!(self.file, "\n", - ctm.m11, - ctm.m12, - ctm.m21, - ctm.m22, - ctm.m31, - ctm.m32, + write!( + self.file, + "\n", + ctm.m11, ctm.m12, ctm.m21, ctm.m22, ctm.m31, ctm.m32, )?; Ok(()) } - fn end_page(&mut self) -> Result<(), OutputError>{ + fn end_page(&mut self) -> Result<(), OutputError> { write!(self.file, "\n")?; write!(self.file, "")?; Ok(()) } - fn output_character(&mut self, _trm: &Transform, _width: f64, _spacing: f64, _font_size: f64, _char: &str) -> Result<(), OutputError>{ + fn output_character( + &mut self, + _trm: &Transform, + _width: f64, + _spacing: f64, + _font_size: f64, + _char: &str, + ) -> Result<(), OutputError> { + Ok(()) + } + fn begin_word(&mut self) -> Result<(), OutputError> { + Ok(()) + } + fn end_word(&mut self) -> Result<(), OutputError> { Ok(()) } - fn begin_word(&mut self) -> Result<(), OutputError> {Ok(())} - fn end_word(&mut self) -> Result<(), OutputError> {Ok(())} - fn end_line(&mut self) -> Result<(), OutputError> {Ok(())} - fn fill(&mut self, ctm: &Transform, _colorspace: &ColorSpace, _color: &[f64], path: &Path) -> Result<(), OutputError>{ - write!(self.file, "", - ctm.m11, - ctm.m12, - ctm.m21, - ctm.m22, - ctm.m31, - ctm.m32, + fn end_line(&mut self) -> Result<(), OutputError> { + Ok(()) + } + fn fill( + &mut self, + ctm: &Transform, + _colorspace: &ColorSpace, + _color: &[f64], + path: &Path, + ) -> Result<(), OutputError> { + write!( + self.file, + "", + ctm.m11, ctm.m12, ctm.m21, ctm.m22, ctm.m31, ctm.m32, )?; /*if path.ops.len() == 1 { @@ -1907,10 +2219,12 @@ impl<'a> OutputDev for SVGOutput<'a> { let mut d = Vec::new(); for op in &path.ops { match op { - &PathOp::MoveTo(x, y) => { d.push(format!("M{} {}", x, y))} - &PathOp::LineTo(x, y) => { d.push(format!("L{} {}", x, y))}, - &PathOp::CurveTo(x1, y1, x2, y2, x, y) => { d.push(format!("C{} {} {} {} {} {}", x1, y1, x2, y2, x, y))}, - &PathOp::Close => { d.push(format!("Z"))}, + &PathOp::MoveTo(x, y) => d.push(format!("M{} {}", x, y)), + &PathOp::LineTo(x, y) => d.push(format!("L{} {}", x, y)), + &PathOp::CurveTo(x1, y1, x2, y2, x, y) => { + d.push(format!("C{} {} {} {} {} {}", x1, y1, x2, y2, x, y)) + } + &PathOp::Close => d.push(format!("Z")), &PathOp::Rect(x, y, width, height) => { d.push(format!("M{} {}", x, y)); d.push(format!("L{} {}", x + width, y)); @@ -1918,7 +2232,6 @@ impl<'a> OutputDev for SVGOutput<'a> { d.push(format!("L{} {}", x, y + height)); d.push(format!("Z")); } - } } write!(self.file, "", d.join(" "))?; @@ -1970,7 +2283,7 @@ impl<'a> ConvertToFmt for &'a mut File { } } -pub struct PlainTextOutput { +pub struct PlainTextOutput { writer: W::Writer, last_end: f64, last_y: f64, @@ -1980,7 +2293,7 @@ pub struct PlainTextOutput { impl PlainTextOutput { pub fn new(writer: W) -> PlainTextOutput { - PlainTextOutput{ + PlainTextOutput { writer: writer.convert(), last_end: 100000., first_char: false, @@ -1993,18 +2306,31 @@ impl PlainTextOutput { /* There are some structural hints that PDFs can use to signal word and line endings: * however relying on these is not likely to be sufficient. */ impl OutputDev for PlainTextOutput { - fn begin_page(&mut self, _page_num: u32, media_box: &MediaBox, _: Option) -> Result<(), OutputError> { + fn begin_page( + &mut self, + _page_num: u32, + media_box: &MediaBox, + _: Option, + ) -> Result<(), OutputError> { self.flip_ctm = Transform2D::row_major(1., 0., 0., -1., 0., media_box.ury - media_box.lly); Ok(()) } fn end_page(&mut self) -> Result<(), OutputError> { Ok(()) } - fn output_character(&mut self, trm: &Transform, width: f64, _spacing: f64, font_size: f64, char: &str) -> Result<(), OutputError> { + fn output_character( + &mut self, + trm: &Transform, + width: f64, + _spacing: f64, + font_size: f64, + char: &str, + ) -> Result<(), OutputError> { let position = trm.post_transform(&self.flip_ctm); let transformed_font_size_vec = trm.transform_vector(vec2(font_size, font_size)); // get the length of one sized of the square with the same area with a rectangle of size (x, y) - let transformed_font_size = (transformed_font_size_vec.x*transformed_font_size_vec.y).sqrt(); + let transformed_font_size = + (transformed_font_size_vec.x * transformed_font_size_vec.y).sqrt(); let (x, y) = (position.m31, position.m32); use std::fmt::Write; //dlog!("last_end: {} x: {}, width: {}", self.last_end, x, width); @@ -2019,7 +2345,12 @@ impl OutputDev for PlainTextOutput { } if x > self.last_end + transformed_font_size * 0.1 { - dlog!("width: {}, space: {}, thresh: {}", width, x - self.last_end, transformed_font_size * 0.1); + dlog!( + "width: {}, space: {}, thresh: {}", + width, + x - self.last_end, + transformed_font_size * 0.1 + ); write!(self.writer, " ")?; } } @@ -2034,31 +2365,45 @@ impl OutputDev for PlainTextOutput { self.first_char = true; Ok(()) } - fn end_word(&mut self) -> Result<(), OutputError> {Ok(())} - fn end_line(&mut self) -> Result<(), OutputError>{ + fn end_word(&mut self) -> Result<(), OutputError> { + Ok(()) + } + fn end_line(&mut self) -> Result<(), OutputError> { //write!(self.file, "\n"); Ok(()) } } - pub fn print_metadata(doc: &Document) { dlog!("Version: {}", doc.version); if let Some(ref info) = get_info(&doc) { for (k, v) in *info { match v { - &Object::String(ref s, StringFormat::Literal) => { dlog!("{}: {}", pdf_to_utf8(k), pdf_to_utf8(s)); } + &Object::String(ref s, StringFormat::Literal) => { + dlog!("{}: {}", pdf_to_utf8(k), pdf_to_utf8(s)); + } _ => {} } } } - dlog!("Page count: {}", get::(&doc, &get_pages(&doc), b"Count")); + dlog!( + "Page count: {}", + get::(&doc, &get_pages(&doc), b"Count") + ); dlog!("Pages: {:?}", get_pages(&doc)); - dlog!("Type: {:?}", get_pages(&doc).get(b"Type").and_then(|x| x.as_name()).unwrap()); + dlog!( + "Type: {:?}", + get_pages(&doc) + .get(b"Type") + .and_then(|x| x.as_name()) + .unwrap() + ); } /// Extract the text from a pdf at `path` and return a `String` with the results -pub fn extract_text>(path: P) -> Result { +pub fn extract_text>( + path: P, +) -> Result { let mut s = String::new(); { let mut output = PlainTextOutput::new(&mut s); @@ -2078,19 +2423,70 @@ pub fn extract_text_from_mem(buffer: &[u8]) -> Result { return Ok(s); } -fn get_inherited<'a, T: FromObj<'a>>(doc: &'a Document, dict: &'a Dictionary, key: &[u8]) -> Option { +fn get_inherited<'a, T: FromObj<'a>>( + doc: &'a Document, + dict: &'a Dictionary, + key: &[u8], +) -> Option { let o: Option = get(doc, dict, key); if let Some(o) = o { Some(o) } else { - let parent = dict.get(b"Parent") + let parent = dict + .get(b"Parent") .and_then(|parent| parent.as_reference()) - .and_then(|id| doc.get_dictionary(id)).ok()?; + .and_then(|id| doc.get_dictionary(id)) + .ok()?; get_inherited(doc, parent, key) } } -/// Parse a given document and output it to `output` -pub fn output_doc(doc: &Document, output: &mut dyn OutputDev) -> Result<(), OutputError> { +fn process_page<'a>( + doc: &'a Document, + dict: (u32, (u32, u16)), + output: &mut dyn OutputDev, + empty_resources: &'a Dictionary, +) -> Result<(), OutputError> { + let mut p = Processor::new(); + let page_num = dict.0; + let page_dict = doc.get_object(dict.1).unwrap().as_dict().unwrap(); + dlog!("page {} {:?}", page_num, page_dict); + // XXX: Some pdfs lack a Resources directory + let resources = get_inherited(doc, page_dict, b"Resources").unwrap_or(empty_resources); + dlog!("resources {:?}", resources); + + // pdfium searches up the page tree for MediaBoxes as needed + let media_box: Vec = get_inherited(doc, page_dict, b"MediaBox").expect("MediaBox"); + let media_box = MediaBox { + llx: media_box[0], + lly: media_box[1], + urx: media_box[2], + ury: media_box[3], + }; + + let art_box = + get::>>(&doc, page_dict, b"ArtBox").map(|x| (x[0], x[1], x[2], x[3])); + + output.begin_page(page_num, &media_box, art_box)?; + + p.process_stream( + &doc, + doc.get_page_content(dict.1).unwrap(), + resources, + &media_box, + output, + page_num, + )?; + + output.end_page()?; + Ok(()) +} + +// Parse given page and output it to `output` +pub fn output_page( + doc: &Document, + output: &mut dyn OutputDev, + page_num: &u32, +) -> Result<(), OutputError> { if let Ok(_) = doc.trailer.get(b"Encrypt") { eprintln!("Encrypted documents are not currently supported: See https://github.com/J-F-Liu/lopdf/issues/168") } @@ -2098,26 +2494,24 @@ pub fn output_doc(doc: &Document, output: &mut dyn OutputDev) -> Result<(), Outp let pages = doc.get_pages(); let mut p = Processor::new(); - for dict in pages { - let page_num = dict.0; - let page_dict = doc.get_object(dict.1).unwrap().as_dict().unwrap(); - dlog!("page {} {:?}", page_num, page_dict); - // XXX: Some pdfs lack a Resources directory - let resources = get_inherited(doc, page_dict, b"Resources").unwrap_or(empty_resources); - dlog!("resources {:?}", resources); - - // pdfium searches up the page tree for MediaBoxes as needed - let media_box: Vec = get_inherited(doc, page_dict, b"MediaBox").expect("MediaBox"); - let media_box = MediaBox { llx: media_box[0], lly: media_box[1], urx: media_box[2], ury: media_box[3] }; - - let art_box = get::>>(&doc, page_dict, b"ArtBox") - .map(|x| (x[0], x[1], x[2], x[3])); - - output.begin_page(page_num, &media_box, art_box)?; + let target_page = pages.into_iter().find(|p| p.0 == *page_num); + if target_page.is_none() { + return Err(OutputError::PdfError(Error::PageNumberNotFound(*page_num))); + } + process_page(&doc, target_page.unwrap(), output, empty_resources)?; + Ok(()) +} - p.process_stream(&doc, doc.get_page_content(dict.1).unwrap(), resources,&media_box, output, page_num)?; +/// Parse a given document and output it to `output` +pub fn output_doc(doc: &Document, output: &mut dyn OutputDev) -> Result<(), OutputError> { + if let Ok(_) = doc.trailer.get(b"Encrypt") { + eprintln!("Encrypted documents are not currently supported: See https://github.com/J-F-Liu/lopdf/issues/168") + } + let empty_resources = &Dictionary::new(); - output.end_page()?; + let pages = doc.get_pages(); + for dict in pages { + process_page(&doc, dict, output, empty_resources)?; } Ok(()) }