From ced5bfeb3362b32d70e043d465ed8e4c8043afb5 Mon Sep 17 00:00:00 2001
From: Herman Semenov <GermanAizek@yandex.ru>
Date: Mon, 13 May 2024 20:07:53 -0500
Subject: [PATCH 1/2] Added const reference for std::pair<> and std::tuple<>
 more 16 bytes:

- std::pair<llama_ngram, llama_ngram_cache_part> (72 bytes -> 8 bytes)
- std::tuple<std::string, float> (40 bytes -> 8 bytes)
---
 common/common.cpp      | 4 ++--
 common/ngram-cache.cpp | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/common/common.cpp b/common/common.cpp
index ba1ecf0e59c8b..3faa7c4598b44 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -2669,14 +2669,14 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
     }
 
     fprintf(stream, "lora:\n");
-    for (std::tuple<std::string, float> la : params.lora_adapter) {
+    for (const std::tuple<std::string, float> & la : params.lora_adapter) {
         if (std::get<1>(la) != 1.0f) {
             continue;
         }
         fprintf(stream, "  - %s\n", std::get<0>(la).c_str());
     }
     fprintf(stream, "lora_scaled:\n");
-    for (std::tuple<std::string, float> la : params.lora_adapter) {
+    for (const std::tuple<std::string, float> & la : params.lora_adapter) {
         if (std::get<1>(la) == 1.0f) {
             continue;
         }
diff --git a/common/ngram-cache.cpp b/common/ngram-cache.cpp
index 3ca112ef1613d..29d5b344aada9 100644
--- a/common/ngram-cache.cpp
+++ b/common/ngram-cache.cpp
@@ -195,7 +195,7 @@ void llama_ngram_cache_draft(
 
 void llama_ngram_cache_save(llama_ngram_cache & ngram_cache, std::string & filename) {
     std::ofstream file_out(filename, std::ios::binary);
-    for (std::pair<llama_ngram, llama_ngram_cache_part> item : ngram_cache) {
+    for (const std::pair<llama_ngram, llama_ngram_cache_part> & item : ngram_cache) {
         const llama_ngram      ngram        = item.first;
         llama_ngram_cache_part token_counts = item.second;
         GGML_ASSERT(!token_counts.empty());
@@ -255,7 +255,7 @@ llama_ngram_cache llama_ngram_cache_load(std::string & filename) {
 }
 
 void llama_ngram_cache_merge(llama_ngram_cache & ngram_cache_target, llama_ngram_cache & ngram_cache_add) {
-    for (std::pair<llama_ngram, llama_ngram_cache_part> ngram_part : ngram_cache_add) {
+    for (const std::pair<llama_ngram, llama_ngram_cache_part> & ngram_part : ngram_cache_add) {
         const llama_ngram      ngram = ngram_part.first;
         llama_ngram_cache_part  part = ngram_part.second;
 

From f2e4d92528b6e85e7d5d29ba717ae0d9725192de Mon Sep 17 00:00:00 2001
From: Herman Semenov <GermanAizek@yandex.ru>
Date: Sun, 19 May 2024 21:34:42 -0500
Subject: [PATCH 2/2] Added const reference for std::pair<> and std::tuple<>
 more 16 bytes:

- std::pair<llama_ngram, llama_ngram_cache_part> (72 bytes -> 8 bytes)
- std::tuple<std::string, float> (40 bytes -> 8 bytes)
---
 common/common.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/common/common.cpp b/common/common.cpp
index 3eb71ed67120c..c4bccd6c49b3b 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -2679,14 +2679,14 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
     }
 
     fprintf(stream, "lora:\n");
-    for (const std::tuple<std::string, float> & la : params.lora_adapter) {
+    for (std::tuple<std::string, float> & la : params.lora_adapter) {
         if (std::get<1>(la) != 1.0f) {
             continue;
         }
         fprintf(stream, "  - %s\n", std::get<0>(la).c_str());
     }
     fprintf(stream, "lora_scaled:\n");
-    for (const std::tuple<std::string, float> & la : params.lora_adapter) {
+    for (std::tuple<std::string, float> & la : params.lora_adapter) {
         if (std::get<1>(la) == 1.0f) {
             continue;
         }