diff --git a/nemo/collections/llm/quantization/quantizer.py b/nemo/collections/llm/quantization/quantizer.py
index c8f235cf7f3f..6605c12b1b54 100644
--- a/nemo/collections/llm/quantization/quantizer.py
+++ b/nemo/collections/llm/quantization/quantizer.py
@@ -147,9 +147,8 @@ def _generate_sample(model: MegatronParallel):
 
         mcore_tokenizer = MCoreTokenizerWrappper(model.tokenizer)
         mcore_inference = model.get_inference_wrapper(
-            params_dtype=torch.bfloat16,
-            inference_batch_times_seqlen_threshold=30
-            )
+            params_dtype=torch.bfloat16, inference_batch_times_seqlen_threshold=30
+        )
 
         generated = [r.generated_text for r in generate(mcore_inference, mcore_tokenizer, prompts)]
         outputs = [prompt + generation for prompt, generation in zip(prompts, generated)]
diff --git a/nemo/collections/llm/quantization/utils.py b/nemo/collections/llm/quantization/utils.py
index e233eb02b1af..20739c872e80 100644
--- a/nemo/collections/llm/quantization/utils.py
+++ b/nemo/collections/llm/quantization/utils.py
@@ -23,7 +23,6 @@
 from nemo.utils import logging
 
 
-
 def get_modelopt_decoder_type(model: llm.GPTModel) -> str:
     """Infers the modelopt decoder type from GPTModel subclass."""
     mapping = [
@@ -36,7 +35,7 @@ def get_modelopt_decoder_type(model: llm.GPTModel) -> str:
         (llm.MixtralModel, "llama"),
         (llm.NemotronModel, "gptnext"),
         (llm.Qwen2Model, "qwen"),
-        (llm.StarcoderModel,   "gptnext"),
+        (llm.StarcoderModel, "gptnext"),
         (llm.Starcoder2Model, "gptnext"),
         (llm.Phi3Model, "phi3"),
     ]