Skip to content

Commit

Permalink
Fallback to the encoding if the unicode map is missing characters.
Browse files Browse the repository at this point in the history
  • Loading branch information
jrmuizel committed Dec 5, 2023
1 parent 379647e commit aeb9a9d
Showing 1 changed file with 9 additions and 2 deletions.
11 changes: 9 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,6 @@ impl<'a> PdfSimpleFont<'a> {
let s = get_contents(s);
//dlog!("font contents {:?}", pdf_to_utf8(&s));
type1_encoding = Some(type1_encoding_parser::get_encoding_map(&s).expect("encoding"));

}
_ => { dlog!("font file {:?}", file) }
}
Expand Down Expand Up @@ -747,7 +746,15 @@ impl<'a> PdfFont for PdfSimpleFont<'a> {
if let Some(ref unicode_map) = self.unicode_map {
let s = unicode_map.get(&char);
let s = match s {
None => { panic!("missing char {:?} in map {:?} for {:?}", char, unicode_map, self.font)}
None => {
println!("missing char {:?} in unicode map {:?} for {:?}", char, unicode_map, self.font);
// some pdf's like http://arxiv.org/pdf/2312.00064v1 are missing entries in their unicode map but do have
// entries in the encoding.
let encoding = self.encoding.as_ref().map(|x| &x[..]).expect("missing unicode map and encoding");
let s = to_utf8(encoding, &slice);
println!("falling back to encoding {} -> {:?}", char, s);
s
}
Some(s) => { s.clone() }
};
return s
Expand Down

0 comments on commit aeb9a9d

Please sign in to comment.