diff --git a/index.bs b/index.bs index fad5a05..1d23843 100755 --- a/index.bs +++ b/index.bs @@ -79,118 +79,137 @@ Metadata Order: This version, !*, *
 url: https://www.iso.org/standard/66067.html; spec: HEIF; type: dfn;
-    text: colr
+    text: aux_type
+    text: auxC
+    text: AuxiliaryTypeInfoBox
+    text: AuxiliaryTypeProperty
+    text: auxl
+    text: bits_per_channel
+    text: cdsc
+    text: cmex
+    text: cmin
+    text: derived image item
+    text: dimg
+    text: grid
+    text: image_height
+    text: image_width
+    text: imir
+    text: irot
+    text: ispe
+    text: layer_id
+    text: lsel
     text: mif1
     text: msf1
-    text: pasp
+    text: ndwt
     text: pict
+    text: PixelInformationProperty
     text: pixi
-    text: ispe
-    text: lsel
-    text: irot
-    text: imir
-    text: clap
-    text: cclv
-    text: clli
-    text: mdcv
+    text: prem
     text: reve
-    text: amve
-    text: ndwt
-    text: cmin
-    text: cmex
-    text: dimg
-    text: layer_id
-    text: image_width
-    text: image_height
     text: ster
+    text: thmb
     text: tmap
-    text: derived image item
-    text: aux_type
-    text: AuxiliaryTypeInfoBox
-    text: AuxiliaryTypeProperty
-    text: bits_per_channel
-    text: PixelInformationProperty
 
 url: https://www.iso.org/standard/68960.html; spec: ISOBMFF; type: dfn;
+    text: altr
+    text: amve
+    text: cclv
+    text: clap
+    text: clli
+    text: colour_type
+    text: ColourInformationBox
+    text: colr
     text: compatible_brands
+    text: ContentLightLevelBox
+    text: dinf
+    text: dref
     text: FileTypeBox
-    text: major_brand
-    text: SingleItemTypeReferenceBox
-    text: SingleItemTypeReferenceBoxLarge
-    text: ItemReferenceBox
-    text: reference_count
+    text: free
     text: from_item_ID
-    text: to_item_ID
-    text: nclx
-    text: sync
-    text: iloc
-    text: mdat
+    text: ftyp
+    text: full_range_flag
+    text: GroupsListBox
+    text: grpl
+    text: hdlr
     text: idat
-    text: altr
+    text: iinf
+    text: iloc
+    text: infe
+    text: ipco
+    text: ipma
+    text: iprp
+    text: iref
+    text: ItemReferenceBox
+    text: major_brand
+    text: MasteringDisplayColourVolumeBox
     text: matrix_coefficients
-    text: full_range_flag
-    text: colour_type
+    text: mdat
+    text: mdcv
     text: meta
-    text: free
+    text: nclx
+    text: pasp
+    text: pitm
+    text: reference_count
+    text: SingleItemTypeReferenceBox
+    text: SingleItemTypeReferenceBoxLarge
     text: skip
-    text: ItemPropertyContainerBox
-    text: MasteringDisplayColourVolumeBox
-    text: ContentLightLevelBox
+    text: sync
+    text: to_item_ID
 
 url: https://www.iso.org/standard/74417.html; spec: MIAF; type: dfn;
+    text: edit-lists
+    text: grid-limit
+    text: matched-duration
     text: miaf
-    text: primary image item
-    text: MIAF image item
-    text: MIAF image sequence
     text: MIAF auxiliary image item
     text: MIAF auxiliary image sequence
+    text: MIAF image item
+    text: MIAF image sequence
+    text: primary image item
     text: self-containment
-    text: grid-limit
     text: single-track
-    text: edit-lists
-    text: matched-duration
 
 url: https://aomediacodec.github.io/av1-isobmff/; spec: AV1-ISOBMFF; type: dfn;
-    text: AV1CodecConfigurationBox
     text: AV1 Sample
     text: AV1 Track
+    text: AV1CodecConfigurationBox
 
 url: https://aomediacodec.github.io/av1-spec/av1-spec.pdf; spec: AV1; type: dfn;
     text: AV1 bitstream
     text: AV1 Frame
-    text: Sequence Header OBU
-    text: Metadata OBU
-    text: Temporal Unit
-    text: Operating Point
+    text: choose_operating_point
+    text: color_range
+    text: FrameHeight
     text: Intra Frame
+    text: max_frame_height_minus1
+    text: max_frame_width_minus1
+    text: Metadata OBU
     text: mono_chrome
-    text: color_range
-    text: still_picture
-    text: reduced_still_picture_header
+    text: Operating Point
     text: operating_points_cnt_minus_1
-    text: choose_operating_point
-    text: spatial_id
-    text: seq_level_idx
-    text: render_width_minus1
+    text: reduced_still_picture_header
     text: render_height_minus1
+    text: render_width_minus1
+    text: seq_level_idx
+    text: Sequence Header OBU
+    text: spatial_id
+    text: still_picture
+    text: Temporal Unit
     text: UpscaledWidth
-    text: FrameHeight
-    text: max_frame_width_minus1
-    text: max_frame_height_minus1
 

Scope

-[[!AV1]] defines the syntax and semantics of an [=AV1 bitstream=]. The AV1 Image File Format (AVIF) defined in this document supports the storage of a subset of the syntax and semantics of an [=AV1 bitstream=] in a [[!HEIF]] file. +[[!AV1]] defines the syntax and semantics of an [=AV1 bitstream=]. The AV1 Image File Format (AVIF) defined in this document supports the storage of a subset of the syntax and semantics of an [=AV1 bitstream=] in a [[!HEIF]] file. The [=AV1 Image File Format=] defines multiple profiles, which restrict the allowed syntax and semantics of the [=AV1 bitstream=] with the goal to improve interoperability, especially for hardware implementations. The profiles defined in this specification follow the conventions of the [[!MIAF]] specification. -Images encoded with [[!AV1]] and not meeting the restrictions of the defined profiles may still be compliant to this [=AV1 Image File Format=] if they adhere to the general AVIF requirements. +Images encoded with [[!AV1]] and not meeting the restrictions of the defined profiles may still be compliant to this [=AV1 Image File Format=] if they adhere to the general [=/AVIF=] requirements. The [=AV1 Image File Format=] supports High Dynamic Range (HDR) and Wide Color Gamut (WCG) images as well as Standard Dynamic Range (SDR). It supports monochrome images as well as multi-channel images with all the bit depths and color spaces specified in [[!AV1]], and other bit depths with [=Sample Transform Derived Image Items=]. The [=AV1 Image File Format=] also supports transparency (alpha) and other types of data such as depth maps through auxiliary [=AV1 bitstreams=]. The [=AV1 Image File Format=] also supports multi-layer images as specified in [[!AV1]] to be stored both in image items and image sequences. The [=AV1 Image File Format=] supports progressive image decoding through layered images. -An AVIF file is designed to be a conformant [[!HEIF]] file for both image items and image sequences. Specifically, this specification follows the recommendations given in "Annex I: Guidelines On Defining New Image Formats and Brands" of [[!HEIF]]. +An AVIF file is designed to be a conformant [[!HEIF]] file for both image items and image sequences. Specifically, this specification follows the recommendations given in "Annex I: Guidelines On Defining New Image Formats and Brands" of [[!HEIF]]. This specification reuses syntax and semantics used in [[!AV1-ISOBMFF]]. @@ -224,8 +243,8 @@ The syntax and semantics of the AV1ItemConfigurationProperty a - [=Sequence Header OBUs=] should not be present in the [=AV1ItemConfigurationProperty=]. - If a [=Sequence Header OBU=] is present in the [=AV1ItemConfigurationProperty=], it shall match the [=Sequence Header OBU=] in the [=AV1 Image Item Data=]. - The values of the fields in the [=AV1ItemConfigurationProperty=] shall match those of the [=Sequence Header OBU=] in the [=AV1 Image Item Data=]. - - The values of the bit depth and the number of channels derived from the [=AV1ItemConfigurationProperty=] shall match the [=PixelInformationProperty=] if present. - - [=Metadata OBUs=], if present, shall match the values given in other item properties, such as the [=MasteringDisplayColourVolumeBox=] or [=ContentLightLevelBox=]. + - The values of the bit depth and the number of channels derived from the [=AV1ItemConfigurationProperty=] shall match the [=PixelInformationProperty=] ('[=pixi=]') if present. + - [=Metadata OBUs=], if present, shall match the values given in other item properties, such as the [=MasteringDisplayColourVolumeBox=] ('[=mdcv=]') or [=ContentLightLevelBox=] ('[=clli=]'). This property should be marked as essential. @@ -234,17 +253,17 @@ The syntax and semantics of the AV1ItemConfigurationProperty a The semantics of the '[=ispe=]' property as defined in [[!HEIF]] apply. More specifically, for [[!AV1]] images, the values of [=image_width=] and [=image_height=] shall respectively equal the values of [=UpscaledWidth=] and [=FrameHeight=] as defined in [[!AV1]] but for a specific frame in the item payload. The exact frame depends on the presence and content of the '[=lsel=]' and [=OperatingPointSelectorProperty=] properties as follows: - In the absence of a '[=lsel=]' property associated with the item, or if it is present and its [=layer_id=] value is set to 0xFFFF: - - If no [=OperatingPointSelectorProperty=] is associated with the item, the '[=ispe=]' property shall document the dimensions of the last frame decoded when processing the operating point whose index is 0. + - If no [=OperatingPointSelectorProperty=] is associated with the item, the '[=ispe=]' property shall document the dimensions of the last frame decoded when processing the [=operating point=] whose index is 0. - - If an [=OperatingPointSelectorProperty=] is associated with the item, the '[=ispe=]' property shall document the dimensions of the last frame decoded when processing the corresponding operating point. + - If an [=OperatingPointSelectorProperty=] is associated with the item, the '[=ispe=]' property shall document the dimensions of the last frame decoded when processing the corresponding [=operating point=]. - NOTE: The dimensions of possible intermediate output images might not match the ones given in the '[=ispe=]' property. If they display these intermediate images, renderers are expected to scale the output image to match the '[=ispe=]' property. + NOTE: The dimensions of possible intermediate output images might not match the ones given in the '[=ispe=]' property. If renderers display these intermediate images, they are expected to scale the output image to match the '[=ispe=]' property. - If a '[=lsel=]' property is associated with an item and its [=layer_id=] is different from 0xFFFF, the '[=ispe=]' property documents the dimensions of the output frame produced by decoding the corresponding layer. NOTE: The dimensions indicated in the '[=ispe=]' property might not match the values [=max_frame_width_minus1=]+1 and [=max_frame_height_minus1=]+1 indicated in the AV1 bitstream. -NOTE: The values of [=render_width_minus1=] and [=render_height_minus1=] possibly present in the AV1 bistream are not exposed at the AVIF container level. +NOTE: The values of [=render_width_minus1=] and [=render_height_minus1=] possibly present in the AV1 bistream are not exposed at the [=/AVIF=] container level.

Clean Aperture Property

@@ -254,22 +273,9 @@ The semantics of the clean aperture property ('[=clap=]') as define

Other Item Properties

-In addition to the Image Properties defined in this document, [=AV1 image items=] MAY also be associated with item properties defined in other specifications such as [[!HEIF]] and [[!MIAF]]. Examples of commonly used item properties are: - - '[=colr=]' - - '[=pixi=]' - - '[=pasp=]' - - '[=irot=]' - - '[=imir=]' - - '[=clli=]' - - '[=cclv=]' - - '[=mdcv=]' - - '[=amve=]' - - '[=reve=]' - - '[=ndwt=]' - - '[=cmin=]' - - '[=cmex=]' - -In general, it is recommended to use properties instead of [=Metadata OBUs=] in the [=AV1ItemConfigurationProperty=]. +In addition to the Image Properties defined in this document, [=AV1 image items=] may also be associated with item properties defined in other specifications such as [[!HEIF]] and [[!MIAF]]. Commonly used item properties can be found in [[#avif-required-boxes]] and [[#avif-required-boxes-additional]]. + +In general, it is recommended to use item properties instead of [=Metadata OBUs=] in the [=AV1ItemConfigurationProperty=].

AV1 Layered Image Items

@@ -277,11 +283,11 @@ In general, it is recommended to use properties instead of [=Metadata OBUs=] in [[!AV1]] supports encoding a frame using multiple spatial layers. A spatial layer may improve the resolution or quality of the image decoded based on one or more of the previous layers. A layer may also provide an image that does not depend on the previous layers. Additionally, not all layers are expected to produce an image meant to be rendered. Some decoded images may be used only as intermediate decodes. Finally, layers are grouped into one or more [=Operating Points=]. The [=Sequence Header OBU=] defines the list of [=Operating Points=], provides required decoding capabilities, and indicates which layers form each [=Operating Point=]. -[[!AV1]] delegates the selection of which [=Operating Point=] to process to the application, by means of a function called choose_operating_point(). AVIF defines the [=OperatingPointSelectorProperty=] to control this selection. In the absence of an [=OperatingPointSelectorProperty=] associated with an [=AV1 Image Item=], the AVIF renderer is free to process any [=Operating Point=] present in the [=AV1 Image Item Data=]. In particular, when the [=AV1 Image Item=] is composed of a unique [=Operating Point=], the [=OperatingPointSelectorProperty=] should not be present. If an [=OperatingPointSelectorProperty=] is associated with an [=AV1 Image Item=], the [=op_index=] field indicates which [=Operating Point=] is expected to be processed for this item. +[[!AV1]] delegates the selection of which [=Operating Point=] to process to the application, by means of a function called choose_operating_point(). [=/AVIF=] defines the [=OperatingPointSelectorProperty=] to control this selection. In the absence of an [=OperatingPointSelectorProperty=] associated with an [=AV1 Image Item=], the [=/AVIF=] renderer is free to process any [=Operating Point=] present in the [=AV1 Image Item Data=]. In particular, when the [=AV1 Image Item=] is composed of a unique [=Operating Point=], the [=OperatingPointSelectorProperty=] should not be present. If an [=OperatingPointSelectorProperty=] is associated with an [=AV1 Image Item=], the [=op_index=] field indicates which [=Operating Point=] is expected to be processed for this item. -NOTE: When an author wants to offer the ability to render multiple [=Operating Points=] from the same AV1 image (e.g. in the case of multi-view images), multiple [=AV1 Image Items=] can be created that share the same [=AV1 Image Item Data=] but have different [=OperatingPointSelectorProperty=]s. +NOTE: When an author wants to offer the ability to render multiple [=Operating Points=] from the same AV1 image (e.g. in the case of multi-view images), multiple [=AV1 Image Items=] can be created that share the same [=AV1 Image Item Data=] but have different [=OperatingPointSelectorProperties=]. -[[!AV1]] expects the renderer to display only one frame within the selected [=Operating Point=], which should be the highest spatial layer that is both within the [=Operating Point=] and present within the temporal unit, but [[!AV1]] leaves the option for other applications to set their own policy about which frames are output, as defined in the general output process. AVIF sets a different policy, and defines how the '[=lsel=]' property (mandated by [[!HEIF]] for layered images) is used to control which layer is rendered. According to [[!HEIF]], the interpretation of the [=layer_id=] field in the '[=lsel=]' property is codec specific. In this specification, the value 0xFFFF is reserved for a special meaning. If a '[=lsel=]' property is associated with an [=AV1 Image Item=] but its [=layer_id=] value is set to 0xFFFF, the renderer is free to render either only the output image of the highest spatial layer, or to render all output images of all the intermediate layers and the highest spatial layer, resulting in a form of progressive decoding. If a '[=lsel=]' property is associated with an [=AV1 Image Item=] and the value of [=layer_id=] is not 0xFFFF, the renderer is expected to render only the output image for that layer. +[[!AV1]] expects the renderer to display only one frame within the selected [=Operating Point=], which should be the highest spatial layer that is both within the [=Operating Point=] and present within the temporal unit, but [[!AV1]] leaves the option for other applications to set their own policy about which frames are output, as defined in the general output process. [=/AVIF=] sets a different policy, and defines how the '[=lsel=]' property (mandated by [[!HEIF]] for layered images) is used to control which layer is rendered. According to [[!HEIF]], the interpretation of the [=layer_id=] field in the '[=lsel=]' property is codec specific. In this specification, the value 0xFFFF is reserved for a special meaning. If a '[=lsel=]' property is associated with an [=AV1 Image Item=] but its [=layer_id=] value is set to 0xFFFF, the renderer is free to render either only the output image of the highest spatial layer, or to render all output images of all the intermediate layers and the highest spatial layer, resulting in a form of progressive decoding. If a '[=lsel=]' property is associated with an [=AV1 Image Item=] and the value of [=layer_id=] is not 0xFFFF, the renderer is expected to render only the output image for that layer. NOTE: When such a progressive decoding of the layers within an [=Operating Point=] is not desired or when an author wants to expose each layer as a specific item, multiple [=AV1 Image Items=] sharing the same [=AV1 Image Item Data=] can be created and associated with different '[=lsel=]' properties, each with a different value of [=layer_id=]. @@ -301,7 +307,7 @@ NOTE: When such a progressive decoding of the layers within an [=Operating Point
Description
-An OperatingPointSelectorProperty may be associated with an [=AV1 Image Item=] to provide the index of the operating point to be processed for this item. If associated, it shall be marked as essential. +An OperatingPointSelectorProperty may be associated with an [=AV1 Image Item=] to provide the index of the [=operating point=] to be processed for this item. If associated, it shall be marked as essential.
Syntax
@@ -313,7 +319,7 @@ class OperatingPointSelectorProperty extends ItemProperty('a1op') {
Semantics
-op_index indicates the index of the operating point to be processed for this item. Its value shall be between 0 and [=operating_points_cnt_minus_1=] inclusive. +op_index indicates the index of the [=operating point=] to be processed for this item. Its value shall be between 0 and [=operating_points_cnt_minus_1=] inclusive.
Layer Selector Property
@@ -376,7 +382,7 @@ NOTE: The size of the last layer can be determined by subtracting the sum of the

An AV1 Alpha Image Item (respectively an AV1 Alpha Image Sequence) is an [=AV1 Auxiliary Image Item=] (respectively an [=AV1 Auxiliary Image Sequence=]), and as defined in [[!MIAF]], with the [=aux_type=] field of the [=AuxiliaryTypeProperty=] (respectively [=AuxiliaryTypeInfoBox=]) set to urn:mpeg:mpegB:cicp:systems:auxiliary:alpha. An [=AV1 Alpha Image Item=] (respectively an [=AV1 Alpha Image Sequence=]) shall be encoded with the same bit depth as the associated master [=AV1 Image Item=] (respectively [=AV1 Image Sequence=]).

-

For [=AV1 Alpha Image Item=] and [=AV1 Alpha Image Sequence=], the ColourInformationBox should be omitted. If present, readers shall ignore it.

+

For [=AV1 Alpha Image Items=] and [=AV1 Alpha Image Sequences=], the [=ColourInformationBox=] ('[=colr=]') should be omitted. If present, readers shall ignore it.

An AV1 Depth Image Item (respectively an AV1 Depth Image Sequence) is an [=AV1 Auxiliary Image Item=] (respectively an [=AV1 Auxiliary Image Sequence=]), and as defined in [[!MIAF]], with the [=aux_type=] field of the [=AuxiliaryTypeProperty=] (respectively [=AuxiliaryTypeInfoBox=]) set to urn:mpeg:mpegB:cicp:systems:auxiliary:depth.

@@ -384,15 +390,19 @@ NOTE: [[!AV1]] supports encoding either 3-component images (whose semantics are

Derived Image Items

+

Grid Derived Image Item

+ +A grid derived image item ('[=grid=]') as defined in [[!HEIF]] may be used in an [=AVIF file=]. +

Tone Map Derived Image Item

-A tone map derived image item ('[=tmap=]') as defined in [[!HEIF]] may be used in an [=AVIF=] file. When present, the base image item and the '[=tmap=]' image item should be grouped together by an '[=AVIF/altr=]' entity group as recommended in [[!HEIF]]. +A tone map derived image item ('[=tmap=]') as defined in [[!HEIF]] may be used in an [=AVIF file=]. When present, the base image item and the '[=tmap=]' image item should be grouped together by an '[=altr=]' (see [[#altr-group]]) entity group as recommended in [[!HEIF]].

Sample Transform Derived Image Item

-In these sections, a "sample" refers to the value of a pixel for a given channel. +With a [=Sample Transform Derived Image Item=], pixels at the same position in multiple input image items can be combined into a single output pixel using basic mathematical operations. This can for example be used to work around codec limitations or for storing alterations to an image as non-destructive residuals. With a [=Sample Transform Derived Image Item=] it is possible for [=/AVIF=] to support 16 or more bits of precision per sample, while still offering backward compatibility through a regular 8 to 12-bit [=AV1 Image Item=] containing the most significant bits of each sample. -With a Sample Transform Derived Image Item, pixels at the same position in multiple input image items can be combined into a single output pixel using basic mathematical operations. This makes it possible for [=AVIF=] to support 16 or more bits of precision per sample, while still offering backward compatibility through a regular 8 to 12-bit [=AV1 Image Item=] containing the most significant bits of each sample. +In these sections, a "sample" refers to the value of a pixel for a given channel.
Definition
@@ -401,7 +411,7 @@ When a [=derived image item=] is of type reserved shall be equal to 0. The value of [=sato/reserved=] shall be ignored by readers. -bit_depth determines the precision (from 8 to 64 bits, see Table 1) of the signed integer temporary variable supporting the intermediate results of the operations. It also determines the precision of the stack elements and the field size of the [=sato/constant=] fields. This intermediate precision shall be high enough so that all input sample values fit into that signed bit depth. +bit_depth determines the precision (from 8 to 64 bits, see Table 1) of the signed integer temporary variable supporting the intermediate results of the operations. It also determines the precision of the stack elements and the field size of the [=sato/constant=] fields. This intermediate precision shall be high enough so that all input sample values fit into that signed bit depth. - +
@@ -474,11 +484,11 @@ aligned(8) class SampleTransform { The result of any computation underflowing or overflowing the intermediate bit depth is replaced by -2[=sato/num_bits=]-1 and 2[=sato/num_bits=]-1-1, respectively. Encoder implementations should not create files leading to potential computation underflow or overflow. Decoder implementations shall check for computation underflow or overflow and clamp the results accordingly. Computations with [=sato/operands=] of negative values use the two’s-complement representation. -token_count is the expected number of [=sato/tokens=] to read. +token_count is the expected number of [=sato/tokens=] to read. The value of [=sato/token_count=] shall be greater than 0. -token determines the type of the operand ([=sato/constant=] or input image item sample) or the operator (how to transform one or two [=sato/operands=] into the result). See Table 2. Readers shall ignore a [=Sample Transform Derived Image Item=] with a reserved [=sato/token=] value. +token determines the type of the operand ([=sato/constant=] or input image item sample) or the operator (how to transform one or two [=sato/operands=] into the result). See Table 2. Readers shall ignore a [=Sample Transform Derived Image Item=] with a reserved [=sato/token=] value. -
- Table 1 - Mapping from [=sato/bit_depth=] to the intermediate bit depth. + Table 1 - Mapping from [=sato/bit_depth=] to the intermediate bit depth ([=num_bits=]).
+
@@ -679,15 +689,15 @@ Note: Because each [=sato/operator=] pops one or two elements and then pushes on

Entity groups

-The GroupsListBox ('grpl') defined in [[!ISOBMFF]] may be used to group multiple image items in a file together. The type of the group describes how the image items are related. Decoders should ignore groups of unknown type. +The [=GroupsListBox=] ('[=grpl=]') defined in [[!ISOBMFF]] may be used to group multiple image items or tracks in a file together. The type of the group describes how the image items or tracks are related. Decoders should ignore groups of unknown type. -

'[=/altr=]' group

+

'[=altr=]' group

-The 'altr' entity group as defined in [[!ISOBMFF]] may be used to mark multiple items as alternatives to each other. Only one item in the '[=AVIF/altr=]' group should be played or processed. This grouping is useful for defining a fallback for parsers when new types of items or essential item properties are introduced. +The '[=altr=]' entity group as defined in [[!ISOBMFF]] may be used to mark multiple items or tracks as alternatives to each other. Only one item or track in the '[=altr=]' group should be played or processed. This grouping is useful for defining a fallback for parsers when new types of items or essential item properties are introduced. -

'[=/ster=]' group

+

'[=ster=]' group

-The 'ster' entity group as defined in [[!HEIF]] may be used to indicate that two image items form a stereo pair suitable for stereoscopic viewing. +The '[=ster=]' entity group as defined in [[!HEIF]] may be used to indicate that two image items form a stereo pair suitable for stereoscopic viewing.

Brands, Internet media types and file extensions

@@ -706,12 +716,12 @@ Files that indicate this brand in the [=compatible_brands=] field o - The [=primary image item=] shall be an [=AV1 Image Item=] or be a derived image that references directly or indirectly one or more items that all are [=AV1 Image Items=]. - [=AV1 auxiliary image items=] may be present in the file. -Files that conform with these constraints should include the brand [=avif=] in the [=compatible_brands=] field of the [=FileTypeBox=]. +Files that conform with these constraints should include the brand [=AVIF Image brand/avif=] in the [=compatible_brands=] field of the [=FileTypeBox=]. Additionally, the brand avio is defined. If the file indicates the brand [=avio=] in the [=compatible_brands=] field of the [=FileTypeBox=], then the [=primary image item=] or all the items referenced by the [=primary image item=] shall be [=AV1 image items=] made only of [=Intra Frames=]. Conversely, if the previous constraint applies, the brand [=avio=] should be used in the [=compatible_brands=] field of the [=FileTypeBox=].

AVIF image sequence brands

-The brand to identify AVIF image sequences is avis. +The brand to identify [=AV1 image sequences=] is avis. Files that indicate this brand in the [=compatible_brands=] field of the [=FileTypeBox=] shall comply with the following: - they shall contain one or more [=AV1 image sequences=]. @@ -727,8 +737,8 @@ NOTE: As defined in [[!MIAF]], a file that is primarily an image sequence still The following constraints are common to files compliant with this specification: - The file shall be compliant with the [[!MIAF]] specification and list '[=miaf=]' in the [=compatible_brands=] field of the [=FileTypeBox=]. - - The file shall list '[=avif=]' or '[=avis=]' in the [=compatible_brands=] field of the [=FileTypeBox=]. - - Transformative properties shall not be associated with items in a derivation chain (as defined in [[!MIAF]]) that serves as an input to a grid item. For example, if a file contains a grid item and its referenced coded image items, cropping, mirroring or rotation transformations are only permitted on the grid item itself. + - The file shall list '[=AVIF Image brand/avif=]' or '[=avis=]' in the [=compatible_brands=] field of the [=FileTypeBox=]. + - Transformative properties shall not be associated with items in a derivation chain (as defined in [[!MIAF]]) that serves as an input to a [=grid derived image item=]. For example, if a file contains a grid item and its referenced coded image items, cropping, mirroring or rotation transformations are only permitted on the grid item itself. NOTE: This constraint further restricts files compared to [[!MIAF]]. @@ -742,13 +752,13 @@ The profiles defined in this section are for enabling interoperability between [ If '[=avis=]' is declared in the [=FileTypeBox=] and a profile is declared in the [=FileTypeBox=], the profile shall also enable decoding of at least one image sequence track.The profile should allow decoding of any associated auxiliary image sequence tracks, unless it is acceptable to decode the image sequence without its auxiliary image sequence tracks. -It is possible for a file compliant to this [=AV1 Image File Format=] to not be able to declare an AVIF profile, if the corresponding AV1 encoding characteristics do not match any of the defined profiles. +It is possible for a file compliant to this [=AV1 Image File Format=] to not be able to declare an [=/AVIF=] profile, if the corresponding AV1 encoding characteristics do not match any of the defined profiles. NOTE: [[!AV1]] supports 3 bit depths: 8, 10 and 12 bits, and the maximum dimensions of a coded image is 65536x65536, when [=seq_level_idx=] is set to 31 (maximum parameters level). -
If an image is encoded with dimensions (respectively a bit depth) that exceed the maximum dimensions (respectively bit depth) required by the AV1 profile and level of the AVIF profiles defined in this specification, the file will only signal general AVIF brands.
+
If an image is encoded with dimensions (respectively a bit depth) that exceed the maximum dimensions (respectively bit depth) required by the AV1 profile and level of the [=/AVIF=] profiles defined in this specification, the file will only signal general [=/AVIF=] brands.
-

AVIF Baseline Profile

+

AVIF Baseline Profile

This section defines the MIAF AV1 Baseline profile of [[!HEIF]], specifically for [[!AV1]] bitstreams, based on the constraints specified in [[!MIAF]] and identified by the brand MA1B. @@ -768,7 +778,7 @@ The following additional constraints apply to all [=AV1 Image Items=] and all [= NOTE: AV1 tiers are not constrained because timing is optional in image sequences and are not relevant in image items or collections. -NOTE: Level 5.1 is chosen for the Baseline profile to ensure that no single coded image exceeds 4k resolution, as some decoder may not be able to handle larger images. More precisely, following [[!AV1]] level definitions, coded image items compliant to the AVIF Baseline profile may not have a number of pixels greater than 8912896, a width greater than 8192 or a height greater than 4352. It is still possible to use the Baseline profile to create larger images using grid derivation. +NOTE: Level 5.1 is chosen for the Baseline profile to ensure that no single coded image exceeds 4k resolution, as some decoder may not be able to handle larger images. More precisely, following [[!AV1]] level definitions, coded image items compliant to the [=AVIF Baseline profile=] may not have a number of pixels greater than 8912896, a width greater than 8192 or a height greater than 4352. It is still possible to use the Baseline profile to create larger images using a [=grid derived image item=].
A file containing items compliant with this profile is expected to list the following brands, in any order, in the [=compatible_brands=] of the [=FileTypeBox=]: @@ -784,7 +794,7 @@ A file containing a '[=pict=]' track compliant with this profile an avis, avio, msf1, miaf, MA1B
-

AVIF Advanced Profile

+

AVIF Advanced Profile

This section defines the MIAF AV1 Advanced profile of [[!HEIF]], specifically for [[!AV1]] bitstreams, based on the constraints specified in [[!MIAF]] and identified by the brand MA1A. @@ -802,7 +812,7 @@ The following shared conditions and requirements from [[!MIAF]] should apply: The following additional constraints apply to all [=AV1 Image Items=]: - The AV1 profile shall be the High Profile and the level shall be 6.0 or lower. -NOTE: Following [[!AV1]] level definitions, coded image items compliant to the AVIF Advanced profile may not have a number of pixels greater than 35651584, a width greater than 16384 or a height greater than 8704. It is still possible to use the Advanced profile to create larger images using grid derivation. +NOTE: Following [[!AV1]] level definitions, coded image items compliant to the [=AVIF Advanced profile=] may not have a number of pixels greater than 35651584, a width greater than 16384 or a height greater than 8704. It is still possible to use the Advanced profile to create larger images using a [=grid derived image item=]. The following additional constraints apply only to [=AV1 Image Sequences=]: - The AV1 profile shall be either Main Profile or High Profile. @@ -822,12 +832,12 @@ A file containing a '[=pict=]' track compliant with this profile is

Box requirements

Image item boxes

-This section discusses the box requirements for an AVIF file containing only image items. +This section discusses the box requirements for an [=AVIF file=] containing only image items.

Minimum set of boxes

-

As indicated in [[#file-constraints]], an AVIF file is a compliant [[!MIAF]] file. As a consequence, some [[!ISOBMFF]] or [[!HEIF]] boxes are required, as indicated in the following table. The order of the boxes is indicative in the table. The specifications listed in the "Specification" -column may require a specific order for the box or for its children and shall be respected. For example, per [[!ISOBMFF]], the [=FileTypeBox=] is required to appear first in an AVIF file. +

As indicated in [[#file-constraints]], an [=AVIF file=] is a compliant [[!MIAF]] file. As a consequence, some [[!ISOBMFF]] or [[!HEIF]] boxes are required, as indicated in the following table. The order of the boxes is indicative in the table. The specifications listed in the "Specification" +column may require a specific order for the box or for its children and shall be respected. For example, per [[!ISOBMFF]], the [=FileTypeBox=] is required to appear first in an [=AVIF file=]. The "Version(s)" column in the following table lists the version(s) of the boxes allowed by this brand. With the exception of item properties marked as non-essential, other versions of the boxes shall not be used. "-" means that the box does not have a version.

Table 2 - Meaning of the value of [=sato/token=].
@@ -844,129 +854,129 @@ The "Version(s)" column in the following table lists the version(s) of the boxes - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -974,7 +984,7 @@ The "Version(s)" column in the following table lists the version(s) of the boxes

Requirements on additional image item related boxes

-

The boxes indicated in the following table may be present in an AVIF file to provide additional signaling for image items. The boxes may be present inside the box indicated in the "Containing box" column. If present, they shall use the version indicated in the table unless the box is an item property marked as non-essential. AVIF readers are expected to understand the boxes and versions listed in this table. The order of the boxes is indicative in the table. Specifications may require specific order and shall be respected. Additionally, the '[=free=]' and '[=skip=]' boxes may be present at any level in the hierarchy. AVIF readers are expected to ignore them. Additional boxes in the '[=meta=]' hierarchy not listed in the following table may also be present and may be ignored by AVIF readers.

+

The boxes indicated in the following table may be present in an [=AVIF file=] to provide additional signaling for image items. The boxes may be present inside the box indicated in the "Containing Box" column. If present, they shall use the version indicated in the table unless the box is an item property marked as non-essential. [=/AVIF=] readers are expected to understand the boxes and versions listed in this table. The order of the boxes is indicative in the table. Specifications may require specific order and shall be respected. Additionally, the '[=free=]' and '[=skip=]' boxes may be present at any level in the hierarchy and [=/AVIF=] readers are expected to ignore them. Additional boxes in the '[=meta=]' hierarchy not listed in the following table may also be present and may be ignored by [=/AVIF=] readers.

ftyp[=ftyp=]       -ISOBMFF[[!ISOBMFF]]  
meta[=meta=]       0ISOBMFF[[!ISOBMFF]]  
 hdlr[=hdlr=]     0ISOBMFF[[!ISOBMFF]]  
 pitm[=pitm=]     0, 1ISOBMFF[[!ISOBMFF]]  
 iloc[=iloc=]     0, 1, 2ISOBMFF[[!ISOBMFF]]  
 iinf[=iinf=]   0, 1ISOBMFF[[!ISOBMFF]]  
   infe[=infe=]   2, 3ISOBMFF[[!ISOBMFF]]  
 iprp[=iprp=]     -ISOBMFF[[!ISOBMFF]]  
   ipco[=ipco=]   -ISOBMFF[[!ISOBMFF]]  
     av1C[=/av1C=] -AVIF[=/AVIF=]  
     ispe[=ispe=] 0HEIF[[!HEIF]]  
     pixi[=pixi=] 0HEIF[[!HEIF]]  
   ipma[=ipma=]   0, 1ISOBMFF[[!ISOBMFF]]  
mdat[=mdat=]       -ISOBMFF[[!ISOBMFF]] The coded payload may be placed in '[=idat=]' rather than '[=mdat=]', in which case '[=mdat=]' is not required.
@@ -988,197 +998,237 @@ The "Version(s)" column in the following table lists the version(s) of the boxes - + - - + + - + - + - + - - + + - + - + - + - + - + - + - + - + - + - + - + - + - + - - + + - + - - + + - + - + - + - + - + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - - + + + - + + + + + + + + + + + + + + + + + + + + + + + + + - - + + - + - - + + + + + + + + + + + + + + + + + +
dinf[=dinf=]   -ISOBMFFmeta[[!ISOBMFF]][=meta=] Used to indicate the location of the media information in a track
 dref[=dref=] 0ISOBMFF[[!ISOBMFF]]    
iref[=iref=]   0, 1ISOBMFFmeta[[!ISOBMFF]][=meta=] Used to indicate directional relationships between images or metadata
 auxl[=auxl=] -HEIF[[!HEIF]]   Used when an image is auxiliary to another image
 thmb[=thmb=] -HEIF[[!HEIF]]   Used when an image is a thumbnail of another image
 dimg[=dimg=] -HEIF[[!HEIF]]  Used when an image is derived from another imageUsed when an image is [[#derived-images|derived from another image]]
 prem[=prem=] -HEIF[[!HEIF]]  Used when when an alpha image contains premultiplied color values from another imageUsed when the color values in an image have been premultiplied with alpha values
 cdsc[=cdsc=] -HEIF[[!HEIF]]   Used to link metadata with an image
idat[=idat=]   -ISOBMFFmeta[[!ISOBMFF]][=meta=] Used to store derived image definitions
[=AVIF/grpl=][[#groups|grpl]]   -ISOBMFFmeta[[!ISOBMFF]][=meta=] Used to indicate that multiple images are semantically grouped
 [=AVIF/altr=][[#altr-group|altr]] 0ISOBMFF[[!ISOBMFF]]  Used when images in a group are alternative to each otherUsed when images in a group are alternatives to each other
 [=AVIF/ster=][[#ster-group|ster]] 0HEIF[[!HEIF]]   Used when images in a group form a stereo pair
pasp[=pasp=]   -ISOBMFFipco[[!ISOBMFF]][=ipco=] Used to signal pixel aspect ratio. If present, shall indicate a pixel aspect ratio of 1:1
colr[=colr=]   -ISOBMFFipco[[!ISOBMFF]][=ipco=] Used to signal information such as color primaries.
auxC[=auxC=]   0HEIFipco[[!HEIF]][=ipco=] Used to signal the type of an auxiliary image (e.g. alpha, depth).
clap[[#clean-aperture-property|clap]]   -ISOBMFFipco[[!ISOBMFF]][=ipco=] Used to signal cropping applied to an image
irot[=irot=]   -HEIFipco[[!HEIF]][=ipco=] Used to signal a rotation applied to an image
imir[=imir=]   -HEIFipco[[!HEIF]][=ipco=] Used to signal a mirroring applied to an image
clli[=clli=]   -ISOBMFFipco[[!ISOBMFF]][=ipco=] Used to signal HDR light level information for an image
cclv[=cclv=]   -ISOBMFFipco[[!ISOBMFF]][=ipco=] Used to signal HDR color volume for an image
mdcv[=mdcv=]   -ISOBMFFipco[[!ISOBMFF]][=ipco=] Used to signal HDR mastering information for an image
a1op[=amve=]   -AVIFipcoUsed to configure rendering of a multiple operating points image[[!ISOBMFF]][=ipco=]Used to signal the nominal ambient viewing environment for the display of the content
lsel[=reve=] 0[[!HEIF]][=ipco=]Used to signal the viewing environment in which the image was mastered
[=ndwt=] 0[[!HEIF]][=ipco=]Used to signal the nominal diffuse white luminance of the content
[=a1op=] -[=/AVIF=][=ipco=]Used to configure which operating point to select when there are multiple choices
[=lsel=]   -HEIFipco[[!HEIF]][=ipco=] Used to configure rendering of a multilayered image
a1lx[=a1lx=]   -AVIFipco[=/AVIF=][=ipco=] Used to assist reader in parsing a multilayered image
[=cmin=] 0[[!HEIF]][=ipco=]Used to signal the camera intrinsic matrix
[=cmex=] 0[[!HEIF]][=ipco=]Used to signal the camera extrinsic matrix
@@ -1207,10 +1257,11 @@ The "Version(s)" column in the following table lists the version(s) of the boxes - Clarify required versions of non-essential item properties - EDITORIAL: Add refs, fix wording and format in clauses 1,2 - EDITORIAL: Add sato, alpha, depth, progressive in Scope + - Clean up linking to base specs and duplicated information -

Appendix A: Sample Transform Derived Image Item Examples

+

Appendix A: (informative) Sample Transform Derived Image Item Examples

-This informative appendix contains example recipes for extending base AVIF features with [=Sample Transform Derived Image Items=]. +This informative appendix contains example recipes for extending base [=/AVIF=] features with [=Sample Transform Derived Image Items=].

Bit depth extension

@@ -1235,7 +1286,7 @@ Consider the following: This is equivalent to the following postfix notation (parentheses for clarity): -sampleoutput=(256sample1×)sample2+ +sampleoutput=(256sample1×)sample2+ This is equivalent to the following infix notation: @@ -1243,7 +1294,7 @@ This is equivalent to the following infix notation: Each output sample is equal to the sum of a sample of the first input image item shifted to the left by 8 and of a sample of the second input image item. This can be viewed as a bit depth extension of the first input image item by the second input image item. The first input image item contains the 8 most significant bits and the second input image item contains the 8 least significant bits of the output reconstructed image item which has a bit depth of 16, something that is impossible to achieve with a single [=AV1 image item=]. -NOTE: If the first input image item is the [=primary image item=] and is enclosed in an '[=AVIF/altr=]' group with the [=Sample Transform Derived Image Item=], the first input image item is also a backward-compatible 8-bit regular coded image item that can be used by readers that do not support [=Sample Transform Derived Image Items=] or do not need extra precision. +NOTE: If the first input image item is the [=primary image item=] and is enclosed in an '[=altr=]' group (see [[#altr-group]]) with the [=Sample Transform Derived Image Item=], the first input image item is also a backward-compatible 8-bit regular coded image item that can be used by readers that do not support [=Sample Transform Derived Image Items=] or do not need extra precision. NOTE: The second input image item loses its meaning of least significant part if any of the most significant bits changes, so the first input image item has to be losslessly encoded. The second input image item supports reasonable loss during encoding. @@ -1277,7 +1328,7 @@ Consider the following: This is equivalent to the following postfix notation (parentheses for clarity): -sampleoutput=((16sample1×)sample2+)128- +sampleoutput=((16sample1×)sample2+)128- This is equivalent to the following infix notation: @@ -1285,7 +1336,7 @@ This is equivalent to the following infix notation: Each output sample is equal to the sum of a sample of the first input image item shifted to the left by 4 and of a sample of the second input image item offset by -128. This can be viewed as a bit depth extension of the first input image item by the second input image item which contains the residuals to correct the precision loss of the first input image item. -NOTE: If the first input image item is the [=primary image item=] and is enclosed in an '[=AVIF/altr=]' group with the derived image item, the first input image item is also a backward-compatible 12-bit regular coded image item that can be used by decoding contexts that do not support [=Sample Transform Derived Image Items=] or do not need extra precision. +NOTE: If the first input image item is the [=primary image item=] and is enclosed in an '[=altr=]' group (see [[#altr-group]]) with the derived image item, the first input image item is also a backward-compatible 12-bit regular coded image item that can be used by decoding contexts that do not support [=Sample Transform Derived Image Items=] or do not need extra precision. NOTE: The first input image item supports reasonable loss during encoding because the second input image item "overlaps" by 4 bits to correct the loss. The second input image item supports reasonable loss during encoding.