Skip to content

Commit

Permalink
Merge branch 'ggerganov:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
sealad886 authored Jun 13, 2024
2 parents 13dfb68 + 172c825 commit 4313e50
Show file tree
Hide file tree
Showing 144 changed files with 1,407 additions and 968 deletions.
2 changes: 1 addition & 1 deletion .devops/cloud-v-pipeline
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ node('x86_runner1'){ // Running on x86 runner containing latest vecto
stage('Running llama.cpp'){
sh'''#!/bin/bash
module load gnu-bin2/0.1 # loading latest versions of vector qemu and vector gcc
qemu-riscv64 -L /softwares/gnu-bin2/sysroot -cpu rv64,v=true,vlen=256,elen=64,vext_spec=v1.0 ./main -m /home/alitariq/codellama-7b.Q4_K_M.gguf -p "Anything" -n 9 > llama_log.txt # Running llama.cpp on vector qemu-riscv64
qemu-riscv64 -L /softwares/gnu-bin2/sysroot -cpu rv64,v=true,vlen=256,elen=64,vext_spec=v1.0 ./llama-cli -m /home/alitariq/codellama-7b.Q4_K_M.gguf -p "Anything" -n 9 > llama_log.txt # Running llama.cpp on vector qemu-riscv64
cat llama_log.txt # Printing results
'''
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@ ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
# Enable CUDA
ENV LLAMA_CUDA=1

RUN make -j$(nproc) main
RUN make -j$(nproc) llama-cli

FROM ${BASE_CUDA_RUN_CONTAINER} as runtime

RUN apt-get update && \
apt-get install -y libgomp1

COPY --from=build /app/main /main
COPY --from=build /app/llama-cli /llama-cli

ENTRYPOINT [ "/main" ]
ENTRYPOINT [ "/llama-cli" ]
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@ RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
fi && \
cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
cmake --build build --config Release --target main
cmake --build build --config Release --target llama-cli

FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime

COPY --from=build /app/build/bin/main /main
COPY --from=build /app/build/bin/llama-cli /llama-cli

ENV LC_ALL=C.utf8

ENTRYPOINT [ "/main" ]
ENTRYPOINT [ "/llama-cli" ]
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,6 @@ ENV LLAMA_HIPBLAS=1
ENV CC=/opt/rocm/llvm/bin/clang
ENV CXX=/opt/rocm/llvm/bin/clang++

RUN make -j$(nproc) main
RUN make -j$(nproc) llama-cli

ENTRYPOINT [ "/app/main" ]
ENTRYPOINT [ "/app/llama-cli" ]
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key
WORKDIR /app
COPY . .
RUN cmake -B build -DLLAMA_VULKAN=1 && \
cmake --build build --config Release --target main
cmake --build build --config Release --target llama-cli

# Clean up
WORKDIR /
RUN cp /app/build/bin/main /main && \
RUN cp /app/build/bin/llama-cli /llama-cli && \
rm -rf /app

ENV LC_ALL=C.utf8

ENTRYPOINT [ "/main" ]
ENTRYPOINT [ "/llama-cli" ]
6 changes: 3 additions & 3 deletions .devops/main.Dockerfile → .devops/llama-cli.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@ WORKDIR /app

COPY . .

RUN make -j$(nproc) main
RUN make -j$(nproc) llama-cli

FROM ubuntu:$UBUNTU_VERSION as runtime

RUN apt-get update && \
apt-get install -y libgomp1

COPY --from=build /app/main /main
COPY --from=build /app/llama-cli /llama-cli

ENV LC_ALL=C.utf8

ENTRYPOINT [ "/main" ]
ENTRYPOINT [ "/llama-cli" ]
14 changes: 7 additions & 7 deletions .devops/llama-cpp-clblast.srpm.spec
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ make -j LLAMA_CLBLAST=1

%install
mkdir -p %{buildroot}%{_bindir}/
cp -p main %{buildroot}%{_bindir}/llamaclblast
cp -p server %{buildroot}%{_bindir}/llamaclblastserver
cp -p simple %{buildroot}%{_bindir}/llamaclblastsimple
cp -p llama-cli %{buildroot}%{_bindir}/llama-clblast-cli
cp -p llama-server %{buildroot}%{_bindir}/llama-clblast-server
cp -p llama-simple %{buildroot}%{_bindir}/llama-clblast-simple

mkdir -p %{buildroot}/usr/lib/systemd/system
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamaclblast.service
Expand All @@ -49,7 +49,7 @@ After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.t
[Service]
Type=simple
EnvironmentFile=/etc/sysconfig/llama
ExecStart=/usr/bin/llamaclblastserver $LLAMA_ARGS
ExecStart=/usr/bin/llama-clblast-server $LLAMA_ARGS
ExecReload=/bin/kill -s HUP $MAINPID
Restart=never

Expand All @@ -67,9 +67,9 @@ rm -rf %{buildroot}
rm -rf %{_builddir}/*

%files
%{_bindir}/llamaclblast
%{_bindir}/llamaclblastserver
%{_bindir}/llamaclblastsimple
%{_bindir}/llama-clblast-cli
%{_bindir}/llama-clblast-server
%{_bindir}/llama-clblast-simple
/usr/lib/systemd/system/llamaclblast.service
%config /etc/sysconfig/llama

Expand Down
14 changes: 7 additions & 7 deletions .devops/llama-cpp-cuda.srpm.spec
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ make -j LLAMA_CUDA=1

%install
mkdir -p %{buildroot}%{_bindir}/
cp -p main %{buildroot}%{_bindir}/llamacppcuda
cp -p server %{buildroot}%{_bindir}/llamacppcudaserver
cp -p simple %{buildroot}%{_bindir}/llamacppcudasimple
cp -p llama-cli %{buildroot}%{_bindir}/llama-cuda-cli
cp -p llama-server %{buildroot}%{_bindir}/llama-cuda-server
cp -p llama-simple %{buildroot}%{_bindir}/llama-cuda-simple

mkdir -p %{buildroot}/usr/lib/systemd/system
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacuda.service
Expand All @@ -49,7 +49,7 @@ After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.t
[Service]
Type=simple
EnvironmentFile=/etc/sysconfig/llama
ExecStart=/usr/bin/llamacppcudaserver $LLAMA_ARGS
ExecStart=/usr/bin/llama-cuda-server $LLAMA_ARGS
ExecReload=/bin/kill -s HUP $MAINPID
Restart=never

Expand All @@ -67,9 +67,9 @@ rm -rf %{buildroot}
rm -rf %{_builddir}/*

%files
%{_bindir}/llamacppcuda
%{_bindir}/llamacppcudaserver
%{_bindir}/llamacppcudasimple
%{_bindir}/llama-cuda-cli
%{_bindir}/llama-cuda-server
%{_bindir}/llama-cuda-simple
/usr/lib/systemd/system/llamacuda.service
%config /etc/sysconfig/llama

Expand Down
14 changes: 7 additions & 7 deletions .devops/llama-cpp.srpm.spec
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ make -j

%install
mkdir -p %{buildroot}%{_bindir}/
cp -p main %{buildroot}%{_bindir}/llama
cp -p server %{buildroot}%{_bindir}/llamaserver
cp -p simple %{buildroot}%{_bindir}/llamasimple
cp -p llama-cli %{buildroot}%{_bindir}/llama-cli
cp -p llama-server %{buildroot}%{_bindir}/llama-server
cp -p llama-simple %{buildroot}%{_bindir}/llama-simple

mkdir -p %{buildroot}/usr/lib/systemd/system
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llama.service
Expand All @@ -51,7 +51,7 @@ After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.t
[Service]
Type=simple
EnvironmentFile=/etc/sysconfig/llama
ExecStart=/usr/bin/llamaserver $LLAMA_ARGS
ExecStart=/usr/bin/llama-server $LLAMA_ARGS
ExecReload=/bin/kill -s HUP $MAINPID
Restart=never

Expand All @@ -69,9 +69,9 @@ rm -rf %{buildroot}
rm -rf %{_builddir}/*

%files
%{_bindir}/llama
%{_bindir}/llamaserver
%{_bindir}/llamasimple
%{_bindir}/llama-cli
%{_bindir}/llama-server
%{_bindir}/llama-simple
/usr/lib/systemd/system/llama.service
%config /etc/sysconfig/llama

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@ ENV LLAMA_CUDA=1
# Enable cURL
ENV LLAMA_CURL=1

RUN make -j$(nproc) server
RUN make -j$(nproc) llama-server

FROM ${BASE_CUDA_RUN_CONTAINER} as runtime

RUN apt-get update && \
apt-get install -y libcurl4-openssl-dev libgomp1

COPY --from=build /app/server /server
COPY --from=build /app/llama-server /llama-server

ENTRYPOINT [ "/server" ]
ENTRYPOINT [ "/llama-server" ]
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@ RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
fi && \
cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
cmake --build build --config Release --target server
cmake --build build --config Release --target llama-server

FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime

RUN apt-get update && \
apt-get install -y libcurl4-openssl-dev

COPY --from=build /app/build/bin/server /server
COPY --from=build /app/build/bin/llama-server /llama-server

ENV LC_ALL=C.utf8

ENTRYPOINT [ "/server" ]
ENTRYPOINT [ "/llama-server" ]
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,6 @@ ENV LLAMA_CURL=1
RUN apt-get update && \
apt-get install -y libcurl4-openssl-dev

RUN make -j$(nproc)
RUN make -j$(nproc) llama-server

ENTRYPOINT [ "/app/server" ]
ENTRYPOINT [ "/app/llama-server" ]
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@ RUN apt-get update && \
WORKDIR /app
COPY . .
RUN cmake -B build -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 && \
cmake --build build --config Release --target server
cmake --build build --config Release --target llama-server

# Clean up
WORKDIR /
RUN cp /app/build/bin/server /server && \
RUN cp /app/build/bin/llama-server /llama-server && \
rm -rf /app

ENV LC_ALL=C.utf8

ENTRYPOINT [ "/server" ]
ENTRYPOINT [ "/llama-server" ]
6 changes: 3 additions & 3 deletions .devops/server.Dockerfile → .devops/llama-server.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@ COPY . .

ENV LLAMA_CURL=1

RUN make -j$(nproc) server
RUN make -j$(nproc) llama-server

FROM ubuntu:$UBUNTU_VERSION as runtime

RUN apt-get update && \
apt-get install -y libcurl4-openssl-dev libgomp1

COPY --from=build /app/server /server
COPY --from=build /app/llama-server /llama-server

ENV LC_ALL=C.utf8

ENTRYPOINT [ "/server" ]
ENTRYPOINT [ "/llama-server" ]
6 changes: 3 additions & 3 deletions .devops/nix/apps.nix
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
let
inherit (config.packages) default;
binaries = [
"llama"
"llama-cli"
"llama-embedding"
"llama-server"
"quantize"
"train-text-from-scratch"
"llama-quantize"
"llama-train-text-from-scratch"
];
mkApp = name: {
type = "app";
Expand Down
4 changes: 1 addition & 3 deletions .devops/nix/package.nix
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,6 @@ effectiveStdenv.mkDerivation (
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
# if they haven't been added yet.
postInstall = ''
mv $out/bin/main${executableSuffix} $out/bin/llama${executableSuffix}
mv $out/bin/server${executableSuffix} $out/bin/llama-server${executableSuffix}
mkdir -p $out/include
cp $src/llama.h $out/include/
'';
Expand Down Expand Up @@ -294,7 +292,7 @@ effectiveStdenv.mkDerivation (
license = lib.licenses.mit;

# Accommodates `nix run` and `lib.getExe`
mainProgram = "llama";
mainProgram = "llama-cli";

# These people might respond, on the best effort basis, if you ping them
# in case of Nix-specific regressions or for reviewing Nix-specific PRs.
Expand Down
10 changes: 5 additions & 5 deletions .devops/tools.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,23 @@ shift
if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then
python3 ./convert-hf-to-gguf.py "$@"
elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
./quantize "$@"
./llama-quantize "$@"
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
./main "$@"
./llama-cli "$@"
elif [[ "$arg1" == '--finetune' || "$arg1" == '-f' ]]; then
./finetune "$@"
./llama-finetune "$@"
elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
echo "Converting PTH to GGML..."
for i in `ls $1/$2/ggml-model-f16.bin*`; do
if [ -f "${i/f16/q4_0}" ]; then
echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
else
echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..."
./quantize "$i" "${i/f16/q4_0}" q4_0
./llama-quantize "$i" "${i/f16/q4_0}" q4_0
fi
done
elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then
./server "$@"
./llama-server "$@"
else
echo "Unknown command: $arg1"
echo "Available commands: "
Expand Down
4 changes: 2 additions & 2 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ build*/

models/*

/main
/quantize
/llama-cli
/llama-quantize

arm_neon.h
compile_commands.json
Expand Down
2 changes: 1 addition & 1 deletion .github/ISSUE_TEMPLATE/01-bug-low.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ body:
label: Name and Version
description: Which executable and which version of our software are you running? (use `--version` to get a version string)
placeholder: |
$./main --version
$./llama-cli --version
version: 2999 (42b4109e)
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
validations:
Expand Down
2 changes: 1 addition & 1 deletion .github/ISSUE_TEMPLATE/02-bug-medium.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ body:
label: Name and Version
description: Which executable and which version of our software are you running? (use `--version` to get a version string)
placeholder: |
$./main --version
$./llama-cli --version
version: 2999 (42b4109e)
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
validations:
Expand Down
2 changes: 1 addition & 1 deletion .github/ISSUE_TEMPLATE/03-bug-high.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ body:
label: Name and Version
description: Which executable and which version of our software are you running? (use `--version` to get a version string)
placeholder: |
$./main --version
$./llama-cli --version
version: 2999 (42b4109e)
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
validations:
Expand Down
2 changes: 1 addition & 1 deletion .github/ISSUE_TEMPLATE/04-bug-critical.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ body:
label: Name and Version
description: Which executable and which version of our software are you running? (use `--version` to get a version string)
placeholder: |
$./main --version
$./llama-cli --version
version: 2999 (42b4109e)
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
validations:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ jobs:
-DLLAMA_FATAL_WARNINGS=OFF \
-DLLAMA_ALL_WARNINGS=OFF \
-DCMAKE_BUILD_TYPE=Release;
cmake --build build --config Release -j $(nproc) --target server
cmake --build build --config Release -j $(nproc) --target llama-server
- name: Download the dataset
id: download_dataset
Expand Down
Loading

0 comments on commit 4313e50

Please sign in to comment.