Add ONNX export optimization support for ModernBERT #2177

amas0 · 2025-02-03T23:57:43Z

Feature request

Release v1.24.0 successfully supports exporting a ModernBERT model to ONNX; however, this support does not extend to enabling optimizations via the --optimize flag in optimum-cli.

I'm not sure how much needs to go into enabling this in a more formal capacity, but a very brief attempt by me locally at simply adding modernbert in:

optimum/optimum/onnxruntime/utils.py

Lines 101 to 149 in afff2fa

    
           class ORTConfigManager: 
        
               """ 
        
               A class that contains all the information needed by ONNX Runtime optimization for a given model type. 
        
               Attributes: 
        
                   _conf (`Dict[str]`): 
        
                       A dictionary mapping each supported model type to the corresponding ONNX Runtime model type. 
        
               """ 
        
               # Contribution note: Please add new models in alphabetical order 
        
               # TODO: for encoder-decoder models, validate if bert or gpt2 optimization is better 
        
               _conf = { 
        
                   "albert": "bert", 
        
                   "bart": "bart", 
        
                   "bert": "bert", 
        
                   "big-bird": "bert", 
        
                   "bigbird-pegasus": "bart", 
        
                   "blenderbot": "bert", 
        
                   "bloom": "gpt2", 
        
                   "camembert": "bert", 
        
                   "codegen": "gpt2", 
        
                   "deberta": "bert", 
        
                   "deberta-v2": "bert", 
        
                   "distilbert": "bert", 
        
                   "electra": "bert", 
        
                   "gpt2": "gpt2", 
        
                   "gpt-bigcode": "gpt2", 
        
                   "gpt-neo": "gpt2", 
        
                   "gpt-neox": "gpt2", 
        
                   "gptj": "gpt2", 
        
                   "granite": "gpt2", 
        
                   "longt5": "bert", 
        
                   "llama": "gpt2", 
        
                   "marian": "bart", 
        
                   "mbart": "bart", 
        
                   "mistral": "gpt2", 
        
                   "mpnet": "bert", 
        
                   "mt5": "bart", 
        
                   "m2m-100": "bart", 
        
                   "nystromformer": "bert", 
        
                   "pegasus": "bert", 
        
                   "roberta": "bert", 
        
                   "segformer": "vit", 
        
                   "t5": "bert", 
        
                   "vit": "vit", 
        
                   "whisper": "bart", 
        
                   "xlm-roberta": "bert", 
        
                   "pix2struct": "vit", 
        
               }

and

optimum/optimum/utils/normalized_config.py

Lines 233 to 294 in afff2fa

    
           _conf = { 
        
               "albert": NormalizedTextConfig, 
        
               "bart": BartLikeNormalizedTextConfig, 
        
               "bert": NormalizedTextConfig, 
        
               "big-bird": NormalizedTextConfig, 
        
               "bigbird-pegasus": BartLikeNormalizedTextConfig, 
        
               "blenderbot": BartLikeNormalizedTextConfig, 
        
               "blenderbot-small": BartLikeNormalizedTextConfig, 
        
               "bloom": NormalizedTextConfig.with_args(num_layers="n_layer"), 
        
               "falcon": NormalizedTextConfig, 
        
               "camembert": NormalizedTextConfig, 
        
               "codegen": GPT2LikeNormalizedTextConfig, 
        
               "cvt": NormalizedVisionConfig, 
        
               "deberta": NormalizedTextConfig, 
        
               "deberta-v2": NormalizedTextConfig, 
        
               "deit": NormalizedVisionConfig, 
        
               "distilbert": NormalizedTextConfig.with_args(num_attention_heads="n_heads", hidden_size="dim"), 
        
               "donut-swin": NormalizedVisionConfig, 
        
               "electra": NormalizedTextConfig, 
        
               "encoder-decoder": NormalizedEncoderDecoderConfig, 
        
               "gemma": NormalizedTextConfigWithGQA, 
        
               "gpt2": GPT2LikeNormalizedTextConfig, 
        
               "gpt-bigcode": GPTBigCodeNormalizedTextConfig, 
        
               "gpt-neo": NormalizedTextConfig.with_args(num_attention_heads="num_heads"), 
        
               "gpt-neox": NormalizedTextConfig, 
        
               "gptj": GPT2LikeNormalizedTextConfig, 
        
               "imagegpt": GPT2LikeNormalizedTextConfig, 
        
               "llama": NormalizedTextConfigWithGQA, 
        
               "longt5": T5LikeNormalizedTextConfig, 
        
               "marian": BartLikeNormalizedTextConfig, 
        
               "markuplm": NormalizedTextConfig, 
        
               "mbart": BartLikeNormalizedTextConfig, 
        
               "mistral": NormalizedTextConfigWithGQA, 
        
               "mixtral": NormalizedTextConfigWithGQA, 
        
               "mpnet": NormalizedTextConfig, 
        
               "mpt": MPTNormalizedTextConfig, 
        
               "mt5": T5LikeNormalizedTextConfig, 
        
               "m2m-100": BartLikeNormalizedTextConfig, 
        
               "nystromformer": NormalizedTextConfig, 
        
               "opt": NormalizedTextConfig, 
        
               "pegasus": BartLikeNormalizedTextConfig, 
        
               "pix2struct": Pix2StructNormalizedTextConfig, 
        
               "phi": NormalizedTextConfig, 
        
               "phi3": NormalizedTextConfigWithGQA, 
        
               "phi3small": NormalizedTextConfigWithGQA, 
        
               "poolformer": NormalizedVisionConfig, 
        
               "regnet": NormalizedVisionConfig, 
        
               "resnet": NormalizedVisionConfig, 
        
               "roberta": NormalizedTextConfig, 
        
               "segformer": NormalizedSegformerConfig, 
        
               "speech-to-text": SpeechToTextLikeNormalizedTextConfig, 
        
               "splinter": NormalizedTextConfig, 
        
               "t5": T5LikeNormalizedTextConfig, 
        
               "trocr": TrOCRLikeNormalizedTextConfig, 
        
               "vision-encoder-decoder": NormalizedEncoderDecoderConfig, 
        
               "vit": NormalizedVisionConfig, 
        
               "whisper": WhisperLikeNormalizedTextConfig, 
        
               "xlm-roberta": NormalizedTextConfig, 
        
               "yolos": NormalizedVisionConfig, 
        
               "qwen2": NormalizedTextConfig, 
        
               "granite": NormalizedTextConfigWithGQA, 
        
           }

with the former mapping to "bert" and the latter mapping to NormalizedTextConfig seemed to allow me to export the model with optimizations. In my brief testing after that I didn't notice any glaring issues with the output and observed some expected speedups.

Motivation

I would like to export an optimized ONNX version of my ModernBERT model.

Your contribution

I'd be happy to submit a PR if given more information on how this support is typically added.

The text was updated successfully, but these errors were encountered:

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add ONNX export optimization support for ModernBERT #2177

Add ONNX export optimization support for ModernBERT #2177

amas0 commented Feb 3, 2025

Add ONNX export optimization support for ModernBERT #2177

Add ONNX export optimization support for ModernBERT #2177

Comments

amas0 commented Feb 3, 2025

Feature request

Motivation

Your contribution