-
Notifications
You must be signed in to change notification settings - Fork 10.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
eval-callback: Example how to use eval callback for debugging (#6576)
* gguf-debug: Example how to use ggml callback for debugging * gguf-debug: no mutex, verify type, fix stride. * llama: cv eval: move cb eval field in common gpt_params * ggml_debug: use common gpt_params to pass cb eval. Fix get tensor SIGV random. * ggml_debug: ci: add tests * ggml_debug: EOL in CMakeLists.txt * ggml_debug: Remove unused param n_batch, no batching here * ggml_debug: fix trailing spaces * ggml_debug: fix trailing spaces * common: fix cb_eval and user data not initialized * ci: build revert label * ggml_debug: add main test label * doc: add a model: add a link to ggml-debug * ggml-debug: add to make toolchain * ggml-debug: tests add the main label * ggml-debug: ci add test curl label * common: allow the warmup to be disabled in llama_init_from_gpt_params * ci: add curl test * ggml-debug: better tensor type support * gitignore : ggml-debug * ggml-debug: printing also the sum of each tensor * ggml-debug: remove block size * eval-callback: renamed from ggml-debug * eval-callback: fix make toolchain --------- Co-authored-by: slaren <[email protected]> Co-authored-by: Georgi Gerganov <[email protected]>
- Loading branch information
1 parent
8228b66
commit b804b1e
Showing
12 changed files
with
320 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
set(TARGET eval-callback) | ||
add_executable(${TARGET} eval-callback.cpp) | ||
install(TARGETS ${TARGET} RUNTIME) | ||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) | ||
target_compile_features(${TARGET} PRIVATE cxx_std_11) | ||
|
||
set(TEST_TARGET test-eval-callback) | ||
add_test(NAME ${TEST_TARGET} COMMAND eval-callback --hf-repo ggml-org/models --hf-file tinyllamas/stories260K.gguf --model stories260K.gguf --prompt hello --seed 42) | ||
set_property(TEST ${TEST_TARGET} PROPERTY LABELS eval-callback curl) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
# llama.cpp/examples/eval-callback | ||
|
||
A simple example which demonstrates how to use callback during the inference. | ||
It simply prints to the console all operations and tensor data. | ||
|
||
Usage: | ||
|
||
```shell | ||
eval-callback \ | ||
--hf-repo ggml-org/models \ | ||
--hf-file phi-2/ggml-model-q4_0.gguf \ | ||
--model phi-2-q4_0.gguf \ | ||
--prompt hello \ | ||
--seed 42 \ | ||
-ngl 33 | ||
``` | ||
|
||
Will print: | ||
|
||
```shell | ||
llm_load_tensors: offloaded 33/33 layers to GPU | ||
... | ||
llama_new_context_with_model: n_ctx = 512 | ||
... | ||
llama_new_context_with_model: CUDA0 compute buffer size = 105.00 MiB | ||
llama_new_context_with_model: CUDA_Host compute buffer size = 6.01 MiB | ||
llama_new_context_with_model: graph nodes = 1225 | ||
llama_new_context_with_model: graph splits = 2 | ||
ggml_debug: inp_embd = (f32) GET_ROWS(token_embd.weight{2560, 51200, 1, 1}, inp_tokens{1, 1, 1, 1}}) = {2560, 1, 1, 1} | ||
[ | ||
[ | ||
[ -0.0181, 0.0272, 0.0272, ...], | ||
], | ||
] | ||
ggml_debug: norm-0 = (f32) NORM(CUDA0#inp_embd#0{2560, 1, 1, 1}, }) = {2560, 1, 1, 1} | ||
[ | ||
[ | ||
[ -0.6989, 1.0636, 1.0636, ...], | ||
], | ||
] | ||
ggml_debug: norm_w-0 = (f32) MUL(norm-0{2560, 1, 1, 1}, blk.0.attn_norm.weight{2560, 1, 1, 1}}) = {2560, 1, 1, 1} | ||
[ | ||
[ | ||
[ -0.1800, 0.2817, 0.2632, ...], | ||
], | ||
] | ||
ggml_debug: attn_norm-0 = (f32) ADD(norm_w-0{2560, 1, 1, 1}, blk.0.attn_norm.bias{2560, 1, 1, 1}}) = {2560, 1, 1, 1} | ||
[ | ||
[ | ||
[ -0.1863, 0.2970, 0.2604, ...], | ||
], | ||
] | ||
ggml_debug: wqkv-0 = (f32) MUL_MAT(blk.0.attn_qkv.weight{2560, 7680, 1, 1}, attn_norm-0{2560, 1, 1, 1}}) = {7680, 1, 1, 1} | ||
[ | ||
[ | ||
[ -1.1238, 1.2876, -1.8086, ...], | ||
], | ||
] | ||
ggml_debug: bqkv-0 = (f32) ADD(wqkv-0{7680, 1, 1, 1}, blk.0.attn_qkv.bias{7680, 1, 1, 1}}) = {7680, 1, 1, 1} | ||
[ | ||
[ | ||
[ -1.1135, 1.4604, -1.9226, ...], | ||
], | ||
] | ||
ggml_debug: bqkv-0 (view) = (f32) VIEW(bqkv-0{7680, 1, 1, 1}, }) = {2560, 1, 1, 1} | ||
[ | ||
[ | ||
[ -1.1135, 1.4604, -1.9226, ...], | ||
], | ||
] | ||
ggml_debug: Qcur-0 = (f32) CONT(bqkv-0 (view){2560, 1, 1, 1}, }) = {2560, 1, 1, 1} | ||
[ | ||
[ | ||
[ -1.1135, 1.4604, -1.9226, ...], | ||
], | ||
] | ||
ggml_debug: Qcur-0 (reshaped) = (f32) RESHAPE(Qcur-0{2560, 1, 1, 1}, }) = {80, 32, 1, 1} | ||
[ | ||
[ | ||
[ -1.1135, 1.4604, -1.9226, ...], | ||
[ -0.3608, 0.5076, -1.8866, ...], | ||
[ 1.7643, 0.0273, -2.1065, ...], | ||
... | ||
], | ||
] | ||
ggml_debug: Qcur-0 = (f32) ROPE(Qcur-0 (reshaped){80, 32, 1, 1}, CUDA0#inp_pos#0{1, 1, 1, 1}}) = {80, 32, 1, 1} | ||
[ | ||
[ | ||
[ -1.1135, 1.4604, -1.9226, ...], | ||
[ -0.3608, 0.5076, -1.8866, ...], | ||
[ 1.7643, 0.0273, -2.1065, ...], | ||
... | ||
], | ||
] | ||
``` |
Oops, something went wrong.