diff --git a/llama.cpp b/llama.cpp
index 6673718932868..0e9345c8608f2 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -6053,7 +6053,7 @@ static bool llm_load_tensors(
     model.n_gpu_layers = n_gpu_layers;
 
     const int n_layer     = hparams.n_layer;
-    const int i_gpu_start = std::max((int) hparams.n_layer - n_gpu_layers, (int) 0);
+    int i_gpu_start = std::max((int) hparams.n_layer - n_gpu_layers, (int) 0);
     bool use_mmap_buffer = true;
 
     #if defined(GGML_USE_CLBLAST)
@@ -6061,7 +6061,7 @@ static bool llm_load_tensors(
     {
         printf("\nOpenCL GPU Offload Fallback...");
         clblast_offload_fallback_layers = n_gpu_layers;
-        i_gpu_start = std::max((int64_t) hparams.n_layer, (int64_t) 0);
+        i_gpu_start = std::max((int) hparams.n_layer, (int) 0);
     }
     #endif