Merge branch 'master' into pr/8836

ggerganov · Aug 12, 2024 · 3e2eb6d · 3e2eb6d
2 parents df9e6fd + df5478f
commit 3e2eb6d
Show file tree

Hide file tree

Showing 7 changed files with 1,224 additions and 13 deletions.
diff --git a/.github/workflows/python-check-requirements.yml b/.github/workflows/python-check-requirements.yml
@@ -6,15 +6,13 @@ on:
       - '.github/workflows/python-check-requirements.yml'
       - 'scripts/check-requirements.sh'
       - 'convert*.py'
-      - 'requirements.txt'
-      - 'requirements/*.txt'
+      - '**/requirements*.txt'
   pull_request:
     paths:
       - '.github/workflows/python-check-requirements.yml'
       - 'scripts/check-requirements.sh'
       - 'convert*.py'
-      - 'requirements.txt'
-      - 'requirements/*.txt'
+      - '**/requirements*.txt'
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}

diff --git a/examples/llava/requirements.txt b/examples/llava/requirements.txt
@@ -2,4 +2,4 @@
 --extra-index-url https://download.pytorch.org/whl/cpu
 pillow~=10.2.0
 torch~=2.2.1
-torchvision==0.17.1
+torchvision~=0.17.1
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
@@ -631,6 +631,7 @@ struct server_context {
 
     bool clean_kv_cache = true;
     bool add_bos_token  = true;
+    bool has_eos_token  = false;
 
     int32_t n_ctx; // total context for all clients / slots
 
@@ -693,7 +694,7 @@ struct server_context {
         n_ctx = llama_n_ctx(ctx);
 
         add_bos_token = llama_should_add_bos_token(model);
-        GGML_ASSERT(llama_add_eos_token(model) != 1);
+        has_eos_token = llama_add_eos_token(model) != 1;
 
         return true;
     }
@@ -1031,7 +1032,7 @@ struct server_context {
         {
             slot.sparams.logit_bias.clear();
 
-            if (json_value(data, "ignore_eos", false)) {
+            if (json_value(data, "ignore_eos", false) && has_eos_token) {
                 slot.sparams.logit_bias[llama_token_eos(model)] = -INFINITY;
             }
 

diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
@@ -21129,7 +21129,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
                 (int64_t) info->ne[2] *
                 (int64_t) info->ne[3];
 
-            if (ne % ggml_blck_size(info->type) != 0) {
+            if (ggml_blck_size(info->type) == 0 || ne % ggml_blck_size(info->type) != 0) {
                 fprintf(stderr, "%s: tensor '%s' of type %d (%s) number of elements (%" PRId64 ") is not a multiple of block size (%" PRId64 ")\n",
                         __func__, info->name.data, (int) info->type, ggml_type_name(info->type), ne, ggml_blck_size(info->type));
                 fclose(file);