Skip to content

Commit

Permalink
Merge branch 'master' into xsn/server_more_tests
Browse files Browse the repository at this point in the history
  • Loading branch information
ngxson committed Nov 29, 2024
2 parents 879c5eb + 266b851 commit 9864e0d
Show file tree
Hide file tree
Showing 15 changed files with 523 additions and 81 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -904,6 +904,8 @@ jobs:
- name: Clone
id: checkout
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Install Cuda Toolkit 11.7
if: ${{ matrix.cuda == '11.7' }}
Expand Down Expand Up @@ -1139,6 +1141,8 @@ jobs:
- name: Clone
id: checkout
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Install
id: depends
Expand Down
186 changes: 185 additions & 1 deletion AUTHORS

Large diffs are not rendered by default.

10 changes: 6 additions & 4 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1370,8 +1370,9 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
[](common_params & params, int value) {
params.n_gpu_layers = value;
if (!llama_supports_gpu_offload()) {
fprintf(stderr, "warning: not compiled with GPU offload support, --gpu-layers option will be ignored\n");
fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n");
fprintf(stderr, "warning: no usable GPU found, --gpu-layers option will be ignored\n");
fprintf(stderr, "warning: one possible reason is that llama.cpp was compiled without GPU support\n");
fprintf(stderr, "warning: consult docs/build.md for compilation instructions\n");
}
}
).set_env("LLAMA_ARG_N_GPU_LAYERS"));
Expand Down Expand Up @@ -2104,8 +2105,9 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
[](common_params & params, int value) {
params.speculative.n_gpu_layers = value;
if (!llama_supports_gpu_offload()) {
fprintf(stderr, "warning: not compiled with GPU offload support, --gpu-layers-draft option will be ignored\n");
fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n");
fprintf(stderr, "warning: no usable GPU found, --gpu-layers-draft option will be ignored\n");
fprintf(stderr, "warning: one possible reason is that llama.cpp was compiled without GPU support\n");
fprintf(stderr, "warning: consult docs/build.md for compilation instructions\n");
}
}
).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}));
Expand Down
4 changes: 2 additions & 2 deletions docs/android.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ $ curl -L {model-url} -o ~/{model}.gguf
Then, if you are not already in the repo directory, `cd` into `llama.cpp` and:

```
$ ./build/bin/llama-simple -m ~/{model}.gguf -c {context-size} -p "{your-prompt}"
$ ./build/bin/llama-cli -m ~/{model}.gguf -c {context-size} -p "{your-prompt}"
```

Here, we show `llama-simple`, but any of the executables under `examples` should work, in theory. Be sure to set `context-size` to a reasonable number (say, 4096) to start with; otherwise, memory could spike and kill your terminal.
Here, we show `llama-cli`, but any of the executables under `examples` should work, in theory. Be sure to set `context-size` to a reasonable number (say, 4096) to start with; otherwise, memory could spike and kill your terminal.

To see what it might look like visually, here's an old demo of an interactive session running on a Pixel 5 phone:

Expand Down
15 changes: 11 additions & 4 deletions examples/llava/clip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,17 @@
#include <cinttypes>
#include <limits>

#define LOG_INF(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
#define LOG_WRN(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
#define LOG_ERR(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
#define LOG_DBG(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
#if defined(LLAVA_LOG_OFF)
# define LOG_INF(...)
# define LOG_WRN(...)
# define LOG_ERR(...)
# define LOG_DBG(...)
#else // defined(LLAVA_LOG_OFF)
# define LOG_INF(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
# define LOG_WRN(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
# define LOG_ERR(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
# define LOG_DBG(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
#endif // defined(LLAVA_LOG_OFF)

//#define CLIP_DEBUG_FUNCTIONS

Expand Down
28 changes: 19 additions & 9 deletions examples/llava/llava.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,17 @@
#include <limits>
#include <vector>

#define die(msg) do { fputs("error: " msg "\n", stderr); exit(1); } while (0)
#define die_fmt(fmt, ...) do { fprintf(stderr, "error: " fmt "\n", __VA_ARGS__); exit(1); } while (0)

#define LOG_INF(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
#define LOG_WRN(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
#define LOG_ERR(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
#define LOG_DBG(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
#if defined(LLAVA_LOG_OFF)
# define LOG_INF(...)
# define LOG_WRN(...)
# define LOG_ERR(...)
# define LOG_DBG(...)
#else // defined(LLAVA_LOG_OFF)
# define LOG_INF(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
# define LOG_WRN(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
# define LOG_ERR(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
# define LOG_DBG(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
#endif // defined(LLAVA_LOG_OFF)

// RGB uint8 image
struct clip_image_u8 {
Expand Down Expand Up @@ -498,10 +502,16 @@ static bool load_file_to_bytes(const char* path, unsigned char** bytesOut, long
errno = 0;
size_t ret = fread(buffer, 1, fileSize, file); // Read the file into the buffer
if (ferror(file)) {
die_fmt("read error: %s", strerror(errno));
LOG_ERR("read error: %s", strerror(errno));
free(buffer);
fclose(file);
return false;
}
if (ret != (size_t) fileSize) {
die("unexpectedly reached end of file");
LOG_ERR("unexpectedly reached end of file");
free(buffer);
fclose(file);
return false;
}
fclose(file); // Close the file

Expand Down
2 changes: 1 addition & 1 deletion examples/server/tests/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ aiohttp~=3.9.3
pytest~=8.3.3
huggingface_hub~=0.23.2
numpy~=1.26.4
openai~=1.30.3
openai~=1.55.3
prometheus-client~=0.20.0
requests~=2.32.3
19 changes: 2 additions & 17 deletions examples/server/tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import re
import json
import sys
import threading
import requests
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
Expand Down Expand Up @@ -170,26 +169,12 @@ def start(self, timeout_seconds: int = 10) -> None:
self.process = subprocess.Popen(
[str(arg) for arg in [server_path, *server_args]],
creationflags=flags,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stdout=sys.stdout,
stderr=sys.stdout,
env={**os.environ, "LLAMA_CACHE": "tmp"},
)
server_instances.add(self)

def server_log(in_stream, out_stream):
for line in iter(in_stream.readline, b""):
print(line.decode("utf-8"), end="", file=out_stream)

thread_stdout = threading.Thread(
target=server_log, args=(self.process.stdout, sys.stdout), daemon=True
)
thread_stdout.start()

thread_stderr = threading.Thread(
target=server_log, args=(self.process.stderr, sys.stderr), daemon=True
)
thread_stderr.start()

print(f"server pid={self.process.pid}, pytest pid={os.getpid()}")

# wait for server to start
Expand Down
2 changes: 1 addition & 1 deletion examples/simple/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
The purpose of this example is to demonstrate a minimal usage of llama.cpp for generating text with a given prompt.

```bash
./llama-simple -m ./models/llama-7b-v2/ggml-model-f16.gguf -p "Hello my name is"
./llama-simple -m ./models/llama-7b-v2/ggml-model-f16.gguf "Hello my name is"

...

Expand Down
Loading

0 comments on commit 9864e0d

Please sign in to comment.