Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added missing HEIC box names and handling of a TIFF header inside HEIC #173

Open
wants to merge 4 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions exifread/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,22 @@ def _find_tiff_exif(fh: BinaryIO) -> tuple:
offset = 0
return offset, endian

def _find_heic_tiff(fh: BinaryIO) -> tuple:
""" In some HEIC files, the Exif offset is 0 and there is a plain TIFF header near end of the file. """

data = fh.read(4)
if data[0:2] in [b'II', b'MM'] and data [2] == 42 and data[3] == 0:
offset = fh.tell() - 4
fh.seek(offset)
endian = data[0:2]
offset = fh.tell()
logger.debug('Found TIFF header in Exif, offset = %0xH', offset)
else:
raise InvalidExif(
"Exif pointer to zeros, but found " + str(data) + " instead of a TIFF header."
)

return offset, endian

def _find_webp_exif(fh: BinaryIO) -> tuple:
logger.debug("WebP format recognized in data[0:4], data[8:12]")
Expand Down Expand Up @@ -108,6 +124,9 @@ def _determine_type(fh: BinaryIO) -> tuple:
fh.seek(0)
heic = HEICExifFinder(fh)
offset, endian = heic.find_exif()
if offset == 0:
offset, endian = _find_heic_tiff(fh)
# It's a HEIC file with a TIFF header
elif data[0:4] == b'RIFF' and data[8:12] == b'WEBP':
offset, endian = _find_webp_exif(fh)
elif data[0:2] == b'\xFF\xD8':
Expand Down
54 changes: 50 additions & 4 deletions exifread/heic.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,12 @@ def get_parser(self, box: Box) -> Optional[Callable[[Box], Any]]:
'infe': self._parse_infe,
'iinf': self._parse_iinf,
'iloc': self._parse_iloc,
'hdlr': self._parse_hdlr, # HEIC/AVIF hdlr = Handler
'pitm': self._parse_pitm, # HEIC/AVIF pitm = Primary Item
'iref': self._parse_iref, # HEIC/AVIF idat = Item Reference
'idat': self._parse_idat, # HEIC/AVIF idat = Item Data Box
'dinf': self._parse_dinf, # HEIC/AVIF dinf = Data Information Box
'iprp': self._parse_iprp, # HEIC/AVIF iprp = Item Protection Box
}
return defs.get(box.name)

Expand Down Expand Up @@ -257,6 +263,37 @@ def _parse_iloc(self, box: Box):
extents.append((extent_offset, extent_length))
box.locs[item_id] = extents

# Added a few box names, which as unhandled aborted data extraction:
# hdlr, pitm, dinf, iprp, idat, iref
#
# Handling is initially `None`.
# They were found in .heif photo files produced by Nokia 8.3 5G.
#
# They are part of the standard, referring to:
# - ISO/IEC 14496-12 fifth edition 2015-02-20 (chapter 8.10 Metadata)
# found in:
# https://mpeg.chiariglione.org/standards/mpeg-4/iso-base-media-file-format/text-isoiec-14496-12-5th-edition
# (The newest is ISO/IEC 14496-12:2022, but would cost 208 Swiss Francs at iso.org)
# - A C++ example: https://exiv2.org/book/#BMFF

def _parse_hdlr(self, box: Box):
logger.debug("HEIC: found 'hdlr' Box %s, skipped", box.name)

def _parse_pitm(self, box: Box):
logger.debug("HEIC: found 'pitm' Box %s, skipped", box.name)

def _parse_dinf(self, box: Box):
logger.debug("HEIC: found 'dinf' Box %s, skipped", box.name)

def _parse_iprp(self, box: Box):
logger.debug("HEIC: found 'iprp' Box %s, skipped", box.name)

def _parse_idat(self, box: Box):
logger.debug("HEIC: found 'idat' Box %s, skipped", box.name)

def _parse_iref(self, box: Box):
logger.debug("HEIC: found 'iref' Box %s, skipped", box.name)

def find_exif(self) -> tuple:
ftyp = self.expect_parse('ftyp')
assert ftyp.major_brand == b'heic'
Expand All @@ -277,8 +314,17 @@ def find_exif(self) -> tuple:
# b'Exif\x00\x00' (without APP1 marker, e.g. iOS)
# according to "ISO/IEC 23008-12, 2017-12", both of them are legal
exif_tiff_header_offset = self.get32()
assert exif_tiff_header_offset >= 6
assert self.get(exif_tiff_header_offset)[-6:] == b'Exif\x00\x00'
offset = self.file_handle.tell()
endian = self.file_handle.read(1)

if exif_tiff_header_offset == 0:
# This case was found in HMD Nokia 8.3 5G heic photos.
# The TIFF header just sits there without any 'Exif'.

offset = 0
endian = '?' # Haven't got Endian info yet
else:
assert exif_tiff_header_offset >= 6
assert self.get(exif_tiff_header_offset)[-6:] == b'Exif\x00\x00'
offset = self.file_handle.tell()
endian = str(self.file_handle.read(1))

return offset, endian