diff --git a/llama.cpp b/llama.cpp index 6673718932868..0e9345c8608f2 100644 --- a/llama.cpp +++ b/llama.cpp @@ -6053,7 +6053,7 @@ static bool llm_load_tensors( model.n_gpu_layers = n_gpu_layers; const int n_layer = hparams.n_layer; - const int i_gpu_start = std::max((int) hparams.n_layer - n_gpu_layers, (int) 0); + int i_gpu_start = std::max((int) hparams.n_layer - n_gpu_layers, (int) 0); bool use_mmap_buffer = true; #if defined(GGML_USE_CLBLAST) @@ -6061,7 +6061,7 @@ static bool llm_load_tensors( { printf("\nOpenCL GPU Offload Fallback..."); clblast_offload_fallback_layers = n_gpu_layers; - i_gpu_start = std::max((int64_t) hparams.n_layer, (int64_t) 0); + i_gpu_start = std::max((int) hparams.n_layer, (int) 0); } #endif