update example to show full output

danbev · Jan 1, 2025 · 5d69d19 · 5d69d19
1 parent bfcd9c1
commit 5d69d19
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 19 deletions.
diff --git a/examples/simple-vision-mllama/README.md b/examples/simple-vision-mllama/README.md
@@ -48,11 +48,11 @@ $ ./build/bin/llama-simple-vision-mllama -m models/llama-3-2-11b-Q4_K.gguf -ngl
 ```
 
 ```
-This image is a photograph of the New York City skyline, with the Empire State
-Building prominently visible in the center.
+This image shows a cityscape of New York City. In the center of the image is the Empire State Building, a skyscraper in Midtown Manhattan, New York City. It is known as "The Empire State" and stands at a height of 1,454 feet (443 meters). It
 
-main: decoded 23 tokens in 2.24 s, speed: 10.28 t/s
+main: decoded 60 tokens in 5.79 s, speed: 10.37 t/s
 ```
+(Note that the example is set to only generate 60 tokens, hence the cut-off)
 
 <details><summary>Detailed output</summary>
 
@@ -902,9 +902,6 @@ Processing tile 1
 Processing tile 2
 Processing tile 3
 aspect_ratio=6
-ggml_backend_sched_alloc_splits: failed to allocate graph, reserving (backend_ids_changed = 1)
-ggml_gallocr_reserve_n: reallocating Metal buffer from size 0.00 MiB to 2839.12 MiB
-ggml_gallocr_reserve_n: reallocating CPU buffer from size 0.00 MiB to 201.98 MiB
 
 Tile 0 first 10 values:
   [0] = -1.558688
@@ -964,7 +961,6 @@ vision encoder output[6] = -2.124158
 vision encoder output[7] = 3.160614
 vision encoder output[8] = -7.931821
 vision encoder output[9] = -4.416915
-ggml_backend_sched_alloc_splits: failed to allocate graph, reserving (backend_ids_changed = 1)
 n_img_tokens = 1
 --------- use ca_patch_embd for K and V and store in kv_cache.layer[3] ------
 --------- use ca_patch_embd for K and V and store in kv_cache.layer[8] ------
@@ -974,8 +970,6 @@ n_img_tokens = 1
 --------- use ca_patch_embd for K and V and store in kv_cache.layer[28] ------
 --------- use ca_patch_embd for K and V and store in kv_cache.layer[33] ------
 --------- use ca_patch_embd for K and V and store in kv_cache.layer[38] ------
-ggml_backend_sched_alloc_splits: failed to allocate graph, reserving (backend_ids_changed = 1)
-ggml_gallocr_reserve_n: reallocating CPU buffer from size 16.01 MiB to 100.08 MiB
 ca_patch_emd[0] = 9.585714
 ca_patch_emd[1] = 14.321547
 ca_patch_emd[2] = -3.193105
@@ -986,17 +980,14 @@ ca_patch_emd[6] = -2.124158
 ca_patch_emd[7] = 3.160614
 ca_patch_emd[8] = -7.931821
 ca_patch_emd[9] = -4.416915
-ggml_backend_sched_alloc_splits: failed to allocate graph, reserving (backend_ids_changed = 1)
-This image is a photograph of the New York City skyline, with the Empire Stateggml_backend_sched_alloc_splits: failed to allocate graph, reserving (backend_ids_changed = 0)
- Building prominently visible in the center.
+This image shows a cityscape of New York City. In the center of the image is the Empire State Building, a skyscraper in Midtown Manhattan, New York City. It is known as "The Empire State" and stands at a height of 1,454 feet (443 meters). It
+main: decoded 60 tokens in 5.79 s, speed: 10.37 t/s
 
-main: decoded 23 tokens in 2.24 s, speed: 10.28 t/s
 
-
-llama_perf_context_print:        load time =   83149.26 ms
-llama_perf_context_print: prompt eval time =    1483.60 ms /    17 tokens (   87.27 ms per token,    11.46 tokens per second)
-llama_perf_context_print:        eval time =    2225.36 ms /    23 runs   (   96.75 ms per token,    10.34 tokens per second)
-llama_perf_context_print:       total time =   85386.82 ms /    40 tokens
+llama_perf_context_print:        load time =   77683.33 ms
+llama_perf_context_print: prompt eval time =    1311.75 ms /    17 tokens (   77.16 ms per token,    12.96 tokens per second)
+llama_perf_context_print:        eval time =    5683.89 ms /    59 runs   (   96.34 ms per token,    10.38 tokens per second)
+llama_perf_context_print:       total time =   83469.91 ms /    76 tokens
 ggml_metal_free: deallocating
 ```
 

diff --git a/src/llama-vision.cpp b/src/llama-vision.cpp
@@ -1215,7 +1215,6 @@ static int32_t encode_image_with_ca_vision(ca_context & ctx,
         struct ggml_tensor * inp_raw = ggml_graph_get_tensor(gf, "inp_raw");
         ggml_backend_tensor_set(inp_raw, img_batch[0].img->data, 0, ggml_nbytes(inp_raw));
 
-        const char * name = inp_raw->name;
         float buf[10];
         ggml_backend_t backend = ggml_backend_sched_get_tensor_backend(ctx.sched, inp_raw);
         ggml_backend_tensor_get_async(backend, inp_raw, buf, 0, sizeof(buf));