Skip to content

Commit

Permalink
Merge branch 'Mozilla-Ocho:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
mofosyne authored Apr 5, 2024
2 parents cf07bec + cb92b32 commit 6080f36
Show file tree
Hide file tree
Showing 162 changed files with 55,172 additions and 23,496 deletions.
7 changes: 7 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,27 +23,34 @@ o/$(MODE)/: o/$(MODE)/llama.cpp o/$(MODE)/llamafile
.PHONY: install
install: llamafile/zipalign.1 \
llama.cpp/main/main.1 \
llama.cpp/imatrix/imatrix.1 \
llama.cpp/quantize/quantize.1 \
llama.cpp/perplexity/perplexity.1 \
llama.cpp/llava/llava-quantize.1 \
o/$(MODE)/llamafile/zipalign \
o/$(MODE)/llama.cpp/main/main \
o/$(MODE)/llama.cpp/imatrix/imatrix \
o/$(MODE)/llama.cpp/quantize/quantize \
o/$(MODE)/llama.cpp/perplexity/perplexity \
o/$(MODE)/llama.cpp/llava/llava-quantize
mkdir -p $(PREFIX)/bin
$(INSTALL) o/$(MODE)/llamafile/zipalign $(PREFIX)/bin/zipalign
$(INSTALL) o/$(MODE)/llama.cpp/main/main $(PREFIX)/bin/llamafile
$(INSTALL) o/$(MODE)/llama.cpp/imatrix/imatrix $(PREFIX)/bin/llamafile-imatrix
$(INSTALL) o/$(MODE)/llama.cpp/quantize/quantize $(PREFIX)/bin/llamafile-quantize
$(INSTALL) build/llamafile-convert $(PREFIX)/bin/llamafile-convert
$(INSTALL) o/$(MODE)/llama.cpp/perplexity/perplexity $(PREFIX)/bin/llamafile-perplexity
$(INSTALL) o/$(MODE)/llama.cpp/llava/llava-quantize $(PREFIX)/bin/llava-quantize
mkdir -p $(PREFIX)/share/man/man1
$(INSTALL) -m 0644 llamafile/zipalign.1 $(PREFIX)/share/man/man1/zipalign.1
$(INSTALL) -m 0644 llama.cpp/main/main.1 $(PREFIX)/share/man/man1/llamafile.1
$(INSTALL) -m 0644 llama.cpp/imatrix/imatrix.1 $(PREFIX)/share/man/man1/llamafile-imatrix.1
$(INSTALL) -m 0644 llama.cpp/quantize/quantize.1 $(PREFIX)/share/man/man1/llamafile-quantize.1
$(INSTALL) -m 0644 llama.cpp/perplexity/perplexity.1 $(PREFIX)/share/man/man1/llamafile-perplexity.1
$(INSTALL) -m 0644 llama.cpp/llava/llava-quantize.1 $(PREFIX)/share/man/man1/llava-quantize.1

.PHONY: check
check: o/$(MODE)/llamafile/check

include build/deps.mk
include build/tags.mk
160 changes: 99 additions & 61 deletions README.md

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions build/config.mk
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#── vi: set noet ft=make ts=8 sw=8 fenc=utf-8 :vi ────────────────────┘

PREFIX = /usr/local
COSMOCC = .cosmocc/3.2.4
COSMOCC = .cosmocc/3.3.3
TOOLCHAIN = $(COSMOCC)/bin/cosmo

AR = $(TOOLCHAIN)ar
Expand All @@ -14,8 +14,8 @@ INSTALL = install

ARFLAGS = rcsD
CCFLAGS = -g -O3 -fexceptions
TARGET_ARCH = -Xx86_64-mssse3
CPPFLAGS_ = -iquote. -mcosmo -DGGML_MULTIPLATFORM
CPPFLAGS_ = -iquote. -mcosmo -DGGML_MULTIPLATFORM -Wno-attributes
TARGET_ARCH = -Xx86_64-mavx -Xx86_64-mtune=alderlake

TMPDIR = o//tmp
IGNORE := $(shell mkdir -p $(TMPDIR))
Expand Down Expand Up @@ -48,7 +48,7 @@ all: o/$(MODE)/
clean:; rm -rf o

.PHONY: distclean
distclean:; rm -rf o cosmocc
distclean:; rm -rf o .cosmocc

.cosmocc/3.2.4:
build/download-cosmocc.sh $@ 3.2.4 d2fa6dbf6f987310494581deff5b915dbdc5ca701f20f7613bb0dcf1de2ee511
.cosmocc/3.3.3:
build/download-cosmocc.sh $@ 3.3.3 e4d0fa63cd79cc3bfff6c2d015f1776db081409907625aea8ad40cefc1996d08
53 changes: 53 additions & 0 deletions build/cudacc
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/bin/sh

find_nvcc() {
CC=$(command -v nvcc 2>/dev/null) && return
CC="$CUDA_PATH/bin/nvcc"
[ -x "$CC" ] && return
CC="/opt/cuda/bin/nvcc"
[ -x "$CC" ] && return
CC="/usr/local/cuda/bin/nvcc"
[ -x "$CC" ] && return
return 1
}

find_hipcc() {
CC=$(command -v hipcc 2>/dev/null) && return
CC="$HIP_PATH/bin/hipcc"
[ -x "$CC" ] && return
CC="/opt/rocm/bin/hipcc"
[ -x "$CC" ] && return
CC="/usr/local/rocm/bin/hipcc"
[ -x "$CC" ] && return
return 1
}

if find_hipcc; then
VENDOR=AMD
FLAGS=
elif find_nvcc; then
VENDOR=NVIDIA
FLAGS="--forward-unknown-to-host-compiler"
else
echo 'error: need either hipcc (AMD) or nvcc (NVIDIA) on $PATH' >&2
exit 1
fi

FIRST=1
for x; do
if [ $FIRST -eq 1 ]; then
set --
FIRST=0
fi
if [ $VENDOR = AMD ]; then
if [ x"$x" = x"-lcublas" ]; then
set -- "$@" -lhipblas -lrocblas
continue
elif [ x"$x" = x"--use_fast_math" ]; then
continue
fi
fi
set -- "$@" "$x"
done

exec "$CC" $FLAGS "$@"
134 changes: 81 additions & 53 deletions build/llamafile-convert
Original file line number Diff line number Diff line change
@@ -1,64 +1,92 @@
#!/bin/sh
FILE=$1
SCRIPTNAME=${0##*/}
BIN=${0%/*}
PROG=${0##*/}

if [ -z "$FILE" ]; then
echo "Usage: $SCRIPTNAME <gguf file or url> [cli|server|both]"
if [ x"$1" = x"--help" ]; then
echo "Usage: $PROG <gguf file or url>"
echo
echo "This program converts GGUF weights into a llamafile."
echo "Your .llamafile is outputted to the current directory."
echo
echo "You can supply either a .gguf filename, or the URL to"
echo "download one from an online service like Hugging Face."
echo
echo "When you run this program, it's recommended that you've"
echo "downloaded or installed an official llamafile-VERSION.zip"
echo "from https://github.com/Mozilla-Ocho/llamafile/releases"
echo "because they include prebuilt DLLs for CUDA and ROCm."
echo "You can verify your llamafile has them w/ unzip -vl"
exit 0
fi

abort() {
echo "conversion terminated." >&2
exit 1
}

# find paths of golden llamafile binaries
#
# 1. if user downloaded `llamafile-VERSION.zip`, extracted it, and ran
# `./llamafile-VERSION/bin/llamafile-convert` directly, then we can
# support that by looking for a `llamafile` in the same bin folder.
#
# 2. otherwise, perform a $PATH lookup for llamafile
#
LLAMAFILE="$BIN/llamafile"
if [ ! -x "$LLAMAFILE" ]; then
LLAMAFILE=$(command -v llamafile) || abort
fi
ZIPALIGN="$BIN/zipalign"
if [ ! -x "$ZIPALIGN" ]; then
ZIPALIGN=$(command -v zipalign) || abort
fi

# get path of downloader program
if WGET=$(command -v wget 2>/dev/null); then
DOWNLOAD=$WGET
DOWNLOAD_ARGS=-O
elif CURL=$(command -v curl 2>/dev/null); then
DOWNLOAD=$CURL
DOWNLOAD_ARGS=-fLo
else
echo "$PROG: fatal error: you need to install either wget or curl" >&2
echo "please download https://cosmo.zip/pub/cosmos/bin/wget and put it on the system path" >&2
abort
fi

# get first program argument
FILE=$1
if [ -z "$FILE" ]; then
echo "$PROG: missing operand (pass --help for help)" >&2
abort
fi

# if the file starts with http
SHOULD_DELETE=0
if [ x"$FILE" != x"${FILE#http*}" ]; then
# download the file
# if the filename contains ?download=true, remove it
FILE=$(echo $FILE | sed 's/?download=true//g')
# get the filename
FILENAME=$(echo $FILE | sed 's/.*\///g')
echo "Downloading $FILENAME" >&2
if WGET=$(command -v wget 2>/dev/null); then
DOWNLOAD=$WGET
DOWNLOAD_ARGS=-O
elif CURL=$(command -v curl 2>/dev/null); then
DOWNLOAD=$CURL
DOWNLOAD_ARGS=-fLo
else
printf '%s\n' "$0: fatal error: you need to install either wget or curl" >&2
printf '%s\n' "please download https://cosmo.zip/pub/cosmos/bin/wget and put it on the system path" >&2
abort
fi
"${DOWNLOAD}" ${DOWNLOAD_ARGS} $FILENAME $FILE
# get the filename
FILE=$FILENAME
URL=$FILE
URL=${URL%?download=true} # strip "?download=true" suffix
FILE=${URL##*/} # local file is basename of url
echo "Downloading $FILE" >&2
"${DOWNLOAD}" ${DOWNLOAD_ARGS} "$FILE" "$URL" || abort
SHOULD_DELETE=1
fi

# replace .gguf with .llamafile
LLAMAFILE_NAME=$(echo $FILE | sed 's/.gguf/.llamafile/g')
LLAMAFILE_PATH=$(command -v llamafile)
CLI_ARGS="-m
$FILE
# create output in current directory
echo "Using $LLAMAFILE as golden llamafile binary" >&2
OUTPUT=${FILE##*/} # basename
OUTPUT="${OUTPUT%.gguf}.llamafile"
echo "Converting $FILE to $OUTPUT" >&2
cp -f "$LLAMAFILE" "$OUTPUT" || abort
printf %s "-m
${FILE##*/}
...
"

convert() {
echo "Converting $FILE to $LLAMAFILE_NAME"
# print CLI args to .args
printf %s "$CLI_ARGS" > .args
cp $LLAMAFILE_PATH $LLAMAFILE_NAME
zipalign -j0 $LLAMAFILE_NAME $FILE .args
}
" > .args
"$ZIPALIGN" -j0 "$OUTPUT" "$FILE" .args || abort

cleanup() {
echo "Cleaning up"
rm -f .args
# remove the downloaded file
rm -f $FILE
echo "Done"
}

abort() {
printf '%s\n' "conversion terminated." >&2
exit 1
}

convert || abort
cleanup
# cleanup
rm -f .args
if [ $SHOULD_DELETE -eq 1 ]; then
rm -f "$FILE"
fi
echo "Success. You may now run ./$OUTPUT" >&2
2 changes: 1 addition & 1 deletion build/objdump
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/sh
if printf '%s\n' "$*" | grep aarch64 >/dev/null 2>&1; then
exec aarch64-unknown-cosmo-objdump "$@"
exec aarch64-unknown-cosmo-objdump $1 ${2%/*}/.aarch64/${2##*/}
else
exec x86_64-unknown-cosmo-objdump "$@"
fi
7 changes: 7 additions & 0 deletions build/rules.mk
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@ o/$(MODE)/%.o: %.cpp $(COSMOCC)
o/$(MODE)/%: o/$(MODE)/%.o
$(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -o $@

o/$(MODE)/%.com: o/$(MODE)/%.o
$(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -o $@

%.runs: %
$<
@touch $@

.PRECIOUS: %.1.asc
%.1.asc: %.1
-MANWIDTH=80 MAN_KEEP_FORMATTING=1 man $< >$@.tmp && mv -f $@.tmp $@
Expand Down
16 changes: 16 additions & 0 deletions llama.cpp/BUILD.mk
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,31 @@ o/$(MODE)/llama.cpp/llama.cpp.a: $(LLAMA_CPP_OBJS)
include llama.cpp/llava/BUILD.mk
include llama.cpp/server/BUILD.mk
include llama.cpp/main/BUILD.mk
include llama.cpp/imatrix/BUILD.mk
include llama.cpp/quantize/BUILD.mk
include llama.cpp/perplexity/BUILD.mk

$(LLAMA_CPP_OBJS): private CCFLAGS += -DGGML_MULTIPLATFORM

o/$(MODE)/llama.cpp/ggml-alloc.o \
o/$(MODE)/llama.cpp/ggml-backend.o \
o/$(MODE)/llama.cpp/grammar-parser.o \
o/$(MODE)/llama.cpp/json-schema-to-grammar.o \
o/$(MODE)/llama.cpp/llama.o \
o/$(MODE)/llama.cpp/stb_image.o \
o/$(MODE)/llama.cpp/unicode.o \
o/$(MODE)/llama.cpp/sampling.o \
o/$(MODE)/llama.cpp/ggml-alloc.o \
o/$(MODE)/llama.cpp/common.o: private \
CCFLAGS += -Os

$(LLAMA_CPP_OBJS): llama.cpp/BUILD.mk

.PHONY: o/$(MODE)/llama.cpp
o/$(MODE)/llama.cpp: \
o/$(MODE)/llama.cpp/main \
o/$(MODE)/llama.cpp/llava \
o/$(MODE)/llama.cpp/server \
o/$(MODE)/llama.cpp/imatrix \
o/$(MODE)/llama.cpp/quantize \
o/$(MODE)/llama.cpp/perplexity
1 change: 1 addition & 0 deletions llama.cpp/LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
MIT License

Copyright (c) 2023 Georgi Gerganov
Copyright (c) 2023 Iwan Kawrakow
Copyright (c) 2023 Jeffrey Quesnelle and Bowen Peng.
Copyright (c) 2023 Yuji Hirose
Copyright (c) 2022 Niels Lohmann <https://nlohmann.me>
Expand Down
12 changes: 5 additions & 7 deletions llama.cpp/README.llamafile
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,24 @@ LICENSE
ORIGIN

https://github.com/ggerganov/llama.cpp/pull/4406/
4f56458d34cb13dcbf69aca650e9bf77d5497e6f
2024-01-10
fa046eafbc70bf97dcf39843af0323f19a8c9ac3
2024-03-22

LOCAL MODIFICATIONS

- Count the number of cores correctly on Intel's Alderlake architecture
- Remove MAP_POPULATE because it makes mmap(tinyllama) block for 100ms
- Refactor ggml.c, llama.cpp, and llava to use llamafile_open() APIs
- Unify main, server, and llava-cli into single llamafile program
- Make cuBLAS / hipBLAS optional by introducing tinyBLAS library
- Use Microsoft ABI on CUDA module and ggml-backend interfaces
- Add support to main() programs for Cosmo /zip/.args files
- Introduce pledge() SECCOMP sandboxing to improve security
- Call exit() rather than abort() when GGML_ASSERT() fails
- Fix OpenAI server sampling w.r.t. temperature and seed
- Remove log callback pointer API from Metal GPU module
- Make GPU logger callback API safer and less generic
- Write log to /dev/null when main.log fails to open
- Use _rand64() rather than time() as default seed
- Make main and llava-cli print timings on ctrl-c
- Avoid bind() conflicts on port 8080 w/ server
- Allow --grammar to be used on --image prompts
- Use runtime dispatching for matmul quants
- Remove operating system #ifdef statements
- Introduce --silent-prompt flag to main
- Remove stdout logging from LLaVA
4 changes: 2 additions & 2 deletions llama.cpp/base64.h
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ class base64
++in_begin;

if (c != '=') {
throw std::runtime_error("invalid base64 character.");
throw base64_error("invalid base64 character.");
}
}
}
Expand Down Expand Up @@ -385,7 +385,7 @@ class base64
}
}

throw std::runtime_error("invalid base64 character.");
throw base64_error("invalid base64 character.");
}
};

Expand Down
Loading

0 comments on commit 6080f36

Please sign in to comment.