Merge branch 'Mozilla-Ocho:main' into main

mofosyne · Apr 5, 2024 · 6080f36 · 6080f36
2 parents cf07bec + cb92b32
commit 6080f36
Show file tree

Hide file tree

Showing 162 changed files with 55,172 additions and 23,496 deletions.
diff --git a/Makefile b/Makefile
@@ -23,27 +23,34 @@ o/$(MODE)/: o/$(MODE)/llama.cpp o/$(MODE)/llamafile
 .PHONY: install
 install:	llamafile/zipalign.1					\
 		llama.cpp/main/main.1					\
+		llama.cpp/imatrix/imatrix.1				\
 		llama.cpp/quantize/quantize.1				\
 		llama.cpp/perplexity/perplexity.1			\
 		llama.cpp/llava/llava-quantize.1			\
 		o/$(MODE)/llamafile/zipalign				\
 		o/$(MODE)/llama.cpp/main/main				\
+		o/$(MODE)/llama.cpp/imatrix/imatrix			\
 		o/$(MODE)/llama.cpp/quantize/quantize			\
 		o/$(MODE)/llama.cpp/perplexity/perplexity		\
 		o/$(MODE)/llama.cpp/llava/llava-quantize
 	mkdir -p $(PREFIX)/bin
 	$(INSTALL) o/$(MODE)/llamafile/zipalign $(PREFIX)/bin/zipalign
 	$(INSTALL) o/$(MODE)/llama.cpp/main/main $(PREFIX)/bin/llamafile
+	$(INSTALL) o/$(MODE)/llama.cpp/imatrix/imatrix $(PREFIX)/bin/llamafile-imatrix
 	$(INSTALL) o/$(MODE)/llama.cpp/quantize/quantize $(PREFIX)/bin/llamafile-quantize
 	$(INSTALL) build/llamafile-convert $(PREFIX)/bin/llamafile-convert
 	$(INSTALL) o/$(MODE)/llama.cpp/perplexity/perplexity $(PREFIX)/bin/llamafile-perplexity
 	$(INSTALL) o/$(MODE)/llama.cpp/llava/llava-quantize $(PREFIX)/bin/llava-quantize
 	mkdir -p $(PREFIX)/share/man/man1
 	$(INSTALL) -m 0644 llamafile/zipalign.1 $(PREFIX)/share/man/man1/zipalign.1
 	$(INSTALL) -m 0644 llama.cpp/main/main.1 $(PREFIX)/share/man/man1/llamafile.1
+	$(INSTALL) -m 0644 llama.cpp/imatrix/imatrix.1 $(PREFIX)/share/man/man1/llamafile-imatrix.1
 	$(INSTALL) -m 0644 llama.cpp/quantize/quantize.1 $(PREFIX)/share/man/man1/llamafile-quantize.1
 	$(INSTALL) -m 0644 llama.cpp/perplexity/perplexity.1 $(PREFIX)/share/man/man1/llamafile-perplexity.1
 	$(INSTALL) -m 0644 llama.cpp/llava/llava-quantize.1 $(PREFIX)/share/man/man1/llava-quantize.1
 
+.PHONY: check
+check: o/$(MODE)/llamafile/check
+
 include build/deps.mk
 include build/tags.mk
diff --git a/README.md b/README.md
diff --git a/build/config.mk b/build/config.mk
@@ -2,7 +2,7 @@
 #── vi: set noet ft=make ts=8 sw=8 fenc=utf-8 :vi ────────────────────┘
 
 PREFIX = /usr/local
-COSMOCC = .cosmocc/3.2.4
+COSMOCC = .cosmocc/3.3.3
 TOOLCHAIN = $(COSMOCC)/bin/cosmo
 
 AR = $(TOOLCHAIN)ar
@@ -14,8 +14,8 @@ INSTALL = install
 
 ARFLAGS = rcsD
 CCFLAGS = -g -O3 -fexceptions
-TARGET_ARCH = -Xx86_64-mssse3
-CPPFLAGS_ = -iquote. -mcosmo -DGGML_MULTIPLATFORM
+CPPFLAGS_ = -iquote. -mcosmo -DGGML_MULTIPLATFORM -Wno-attributes
+TARGET_ARCH = -Xx86_64-mavx -Xx86_64-mtune=alderlake
 
 TMPDIR = o//tmp
 IGNORE := $(shell mkdir -p $(TMPDIR))
@@ -48,7 +48,7 @@ all: o/$(MODE)/
 clean:; rm -rf o
 
 .PHONY: distclean
-distclean:; rm -rf o cosmocc
+distclean:; rm -rf o .cosmocc
 
-.cosmocc/3.2.4:
-	build/download-cosmocc.sh $@ 3.2.4 d2fa6dbf6f987310494581deff5b915dbdc5ca701f20f7613bb0dcf1de2ee511
+.cosmocc/3.3.3:
+	build/download-cosmocc.sh $@ 3.3.3 e4d0fa63cd79cc3bfff6c2d015f1776db081409907625aea8ad40cefc1996d08
diff --git a/build/cudacc b/build/cudacc
@@ -0,0 +1,53 @@
+#!/bin/sh
+
+find_nvcc() {
+  CC=$(command -v nvcc 2>/dev/null) && return
+  CC="$CUDA_PATH/bin/nvcc"
+  [ -x "$CC" ] && return
+  CC="/opt/cuda/bin/nvcc"
+  [ -x "$CC" ] && return
+  CC="/usr/local/cuda/bin/nvcc"
+  [ -x "$CC" ] && return
+  return 1
+}
+
+find_hipcc() {
+  CC=$(command -v hipcc 2>/dev/null) && return
+  CC="$HIP_PATH/bin/hipcc"
+  [ -x "$CC" ] && return
+  CC="/opt/rocm/bin/hipcc"
+  [ -x "$CC" ] && return
+  CC="/usr/local/rocm/bin/hipcc"
+  [ -x "$CC" ] && return
+  return 1
+}
+
+if find_hipcc; then
+  VENDOR=AMD
+  FLAGS=
+elif find_nvcc; then
+  VENDOR=NVIDIA
+  FLAGS="--forward-unknown-to-host-compiler"
+else
+  echo 'error: need either hipcc (AMD) or nvcc (NVIDIA) on $PATH' >&2
+  exit 1
+fi
+
+FIRST=1
+for x; do
+  if [ $FIRST -eq 1 ]; then
+    set --
+    FIRST=0
+  fi
+  if [ $VENDOR = AMD ]; then
+    if [ x"$x" = x"-lcublas" ]; then
+      set -- "$@" -lhipblas -lrocblas
+      continue
+    elif [ x"$x" = x"--use_fast_math" ]; then
+      continue
+    fi
+  fi
+  set -- "$@" "$x"
+done
+
+exec "$CC" $FLAGS "$@"
diff --git a/build/llamafile-convert b/build/llamafile-convert
@@ -1,64 +1,92 @@
 #!/bin/sh
-FILE=$1
-SCRIPTNAME=${0##*/}
+BIN=${0%/*}
+PROG=${0##*/}
 
-if [ -z "$FILE" ]; then
-  echo "Usage: $SCRIPTNAME <gguf file or url> [cli|server|both]"
+if [ x"$1" = x"--help" ]; then
+  echo "Usage: $PROG <gguf file or url>"
+  echo
+  echo "This program converts GGUF weights into a llamafile."
+  echo "Your .llamafile is outputted to the current directory."
+  echo
+  echo "You can supply either a .gguf filename, or the URL to"
+  echo "download one from an online service like Hugging Face."
+  echo
+  echo "When you run this program, it's recommended that you've"
+  echo "downloaded or installed an official llamafile-VERSION.zip"
+  echo "from https://github.com/Mozilla-Ocho/llamafile/releases"
+  echo "because they include prebuilt DLLs for CUDA and ROCm."
+  echo "You can verify your llamafile has them w/ unzip -vl"
+  exit 0
+fi
+
+abort() {
+  echo "conversion terminated." >&2
   exit 1
+}
+
+# find paths of golden llamafile binaries
+#
+# 1. if user downloaded `llamafile-VERSION.zip`, extracted it, and ran
+#    `./llamafile-VERSION/bin/llamafile-convert` directly, then we can
+#    support that by looking for a `llamafile` in the same bin folder.
+#
+# 2. otherwise, perform a $PATH lookup for llamafile
+#
+LLAMAFILE="$BIN/llamafile"
+if [ ! -x "$LLAMAFILE" ]; then
+  LLAMAFILE=$(command -v llamafile) || abort
+fi
+ZIPALIGN="$BIN/zipalign"
+if [ ! -x "$ZIPALIGN" ]; then
+  ZIPALIGN=$(command -v zipalign) || abort
+fi
+
+# get path of downloader program
+if WGET=$(command -v wget 2>/dev/null); then
+  DOWNLOAD=$WGET
+  DOWNLOAD_ARGS=-O
+elif CURL=$(command -v curl 2>/dev/null); then
+  DOWNLOAD=$CURL
+  DOWNLOAD_ARGS=-fLo
+else
+  echo "$PROG: fatal error: you need to install either wget or curl" >&2
+  echo "please download https://cosmo.zip/pub/cosmos/bin/wget and put it on the system path" >&2
+  abort
+fi
+
+# get first program argument
+FILE=$1
+if [ -z "$FILE" ]; then
+  echo "$PROG: missing operand (pass --help for help)" >&2
+  abort
 fi
 
 # if the file starts with http
+SHOULD_DELETE=0
 if [ x"$FILE" != x"${FILE#http*}" ]; then
-  # download the file
-  # if the filename contains ?download=true, remove it
-  FILE=$(echo $FILE | sed 's/?download=true//g')
-  # get the filename
-  FILENAME=$(echo $FILE | sed 's/.*\///g')
-  echo "Downloading $FILENAME" >&2
-  if WGET=$(command -v wget 2>/dev/null); then
-    DOWNLOAD=$WGET
-    DOWNLOAD_ARGS=-O
-  elif CURL=$(command -v curl 2>/dev/null); then
-    DOWNLOAD=$CURL
-    DOWNLOAD_ARGS=-fLo
-  else
-    printf '%s\n' "$0: fatal error: you need to install either wget or curl" >&2
-    printf '%s\n' "please download https://cosmo.zip/pub/cosmos/bin/wget and put it on the system path" >&2
-    abort
-  fi
-  "${DOWNLOAD}" ${DOWNLOAD_ARGS} $FILENAME $FILE
-  # get the filename
-  FILE=$FILENAME
+  URL=$FILE
+  URL=${URL%?download=true} # strip "?download=true" suffix
+  FILE=${URL##*/}           # local file is basename of url
+  echo "Downloading $FILE" >&2
+  "${DOWNLOAD}" ${DOWNLOAD_ARGS} "$FILE" "$URL" || abort
+  SHOULD_DELETE=1
 fi
 
-# replace .gguf with .llamafile
-LLAMAFILE_NAME=$(echo $FILE | sed 's/.gguf/.llamafile/g')
-LLAMAFILE_PATH=$(command -v llamafile)
-CLI_ARGS="-m
-$FILE
+# create output in current directory
+echo "Using $LLAMAFILE as golden llamafile binary" >&2
+OUTPUT=${FILE##*/}  # basename
+OUTPUT="${OUTPUT%.gguf}.llamafile"
+echo "Converting $FILE to $OUTPUT" >&2
+cp -f "$LLAMAFILE" "$OUTPUT" || abort
+printf %s "-m
+${FILE##*/}
 ...
-"
-
-convert() {
-  echo "Converting $FILE to $LLAMAFILE_NAME"
-  # print CLI args to .args
-  printf %s "$CLI_ARGS" > .args
-  cp $LLAMAFILE_PATH $LLAMAFILE_NAME
-  zipalign -j0 $LLAMAFILE_NAME $FILE .args
-}
+" > .args
+"$ZIPALIGN" -j0 "$OUTPUT" "$FILE" .args || abort
 
-cleanup() {
-  echo "Cleaning up"
-  rm -f .args
-  # remove the downloaded file
-  rm -f $FILE
-  echo "Done"
-}
-
-abort() { 
-  printf '%s\n' "conversion terminated." >&2 
-  exit 1 
-} 
-
-convert || abort
-cleanup
+# cleanup
+rm -f .args
+if [ $SHOULD_DELETE -eq 1 ]; then
+  rm -f "$FILE"
+fi
+echo "Success. You may now run ./$OUTPUT" >&2
diff --git a/build/objdump b/build/objdump
@@ -1,6 +1,6 @@
 #!/bin/sh
 if printf '%s\n' "$*" | grep aarch64 >/dev/null 2>&1; then
-  exec aarch64-unknown-cosmo-objdump "$@"
+  exec aarch64-unknown-cosmo-objdump $1 ${2%/*}/.aarch64/${2##*/}
 else
   exec x86_64-unknown-cosmo-objdump "$@"
 fi
diff --git a/build/rules.mk b/build/rules.mk
@@ -23,6 +23,13 @@ o/$(MODE)/%.o: %.cpp $(COSMOCC)
 o/$(MODE)/%: o/$(MODE)/%.o
 	$(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -o $@
 
+o/$(MODE)/%.com: o/$(MODE)/%.o
+	$(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -o $@
+
+%.runs: %
+	$<
+	@touch $@
+
 .PRECIOUS: %.1.asc
 %.1.asc: %.1
 	-MANWIDTH=80 MAN_KEEP_FORMATTING=1 man $< >$@.tmp && mv -f $@.tmp $@

diff --git a/llama.cpp/BUILD.mk b/llama.cpp/BUILD.mk
@@ -21,15 +21,31 @@ o/$(MODE)/llama.cpp/llama.cpp.a: $(LLAMA_CPP_OBJS)
 include llama.cpp/llava/BUILD.mk
 include llama.cpp/server/BUILD.mk
 include llama.cpp/main/BUILD.mk
+include llama.cpp/imatrix/BUILD.mk
 include llama.cpp/quantize/BUILD.mk
 include llama.cpp/perplexity/BUILD.mk
 
 $(LLAMA_CPP_OBJS): private CCFLAGS += -DGGML_MULTIPLATFORM
 
+o/$(MODE)/llama.cpp/ggml-alloc.o			\
+o/$(MODE)/llama.cpp/ggml-backend.o			\
+o/$(MODE)/llama.cpp/grammar-parser.o			\
+o/$(MODE)/llama.cpp/json-schema-to-grammar.o		\
+o/$(MODE)/llama.cpp/llama.o				\
+o/$(MODE)/llama.cpp/stb_image.o				\
+o/$(MODE)/llama.cpp/unicode.o				\
+o/$(MODE)/llama.cpp/sampling.o				\
+o/$(MODE)/llama.cpp/ggml-alloc.o			\
+o/$(MODE)/llama.cpp/common.o: private			\
+		CCFLAGS += -Os
+
+$(LLAMA_CPP_OBJS): llama.cpp/BUILD.mk
+
 .PHONY: o/$(MODE)/llama.cpp
 o/$(MODE)/llama.cpp: 					\
 		o/$(MODE)/llama.cpp/main		\
 		o/$(MODE)/llama.cpp/llava		\
 		o/$(MODE)/llama.cpp/server		\
+		o/$(MODE)/llama.cpp/imatrix		\
 		o/$(MODE)/llama.cpp/quantize		\
 		o/$(MODE)/llama.cpp/perplexity
diff --git a/llama.cpp/LICENSE b/llama.cpp/LICENSE
@@ -1,6 +1,7 @@
 MIT License
 
 Copyright (c) 2023 Georgi Gerganov
+Copyright (c) 2023 Iwan Kawrakow
 Copyright (c) 2023 Jeffrey Quesnelle and Bowen Peng.
 Copyright (c) 2023 Yuji Hirose
 Copyright (c) 2022 Niels Lohmann <https://nlohmann.me>

diff --git a/llama.cpp/README.llamafile b/llama.cpp/README.llamafile
@@ -9,26 +9,24 @@ LICENSE
 ORIGIN
 
   https://github.com/ggerganov/llama.cpp/pull/4406/
-  4f56458d34cb13dcbf69aca650e9bf77d5497e6f
-  2024-01-10
+  fa046eafbc70bf97dcf39843af0323f19a8c9ac3
+  2024-03-22
 
 LOCAL MODIFICATIONS
 
+  - Count the number of cores correctly on Intel's Alderlake architecture
+  - Remove MAP_POPULATE because it makes mmap(tinyllama) block for 100ms
   - Refactor ggml.c, llama.cpp, and llava to use llamafile_open() APIs
   - Unify main, server, and llava-cli into single llamafile program
   - Make cuBLAS / hipBLAS optional by introducing tinyBLAS library
-  - Use Microsoft ABI on CUDA module and ggml-backend interfaces
   - Add support to main() programs for Cosmo /zip/.args files
   - Introduce pledge() SECCOMP sandboxing to improve security
   - Call exit() rather than abort() when GGML_ASSERT() fails
-  - Fix OpenAI server sampling w.r.t. temperature and seed
-  - Remove log callback pointer API from Metal GPU module
+  - Make GPU logger callback API safer and less generic
   - Write log to /dev/null when main.log fails to open
   - Use _rand64() rather than time() as default seed
   - Make main and llava-cli print timings on ctrl-c
   - Avoid bind() conflicts on port 8080 w/ server
-  - Allow --grammar to be used on --image prompts
   - Use runtime dispatching for matmul quants
   - Remove operating system #ifdef statements
-  - Introduce --silent-prompt flag to main
   - Remove stdout logging from LLaVA
diff --git a/llama.cpp/base64.h b/llama.cpp/base64.h
@@ -235,7 +235,7 @@ class base64
                 ++in_begin;
 
                 if (c != '=') {
-                    throw std::runtime_error("invalid base64 character.");
+                    throw base64_error("invalid base64 character.");
                 }
             }
         }
@@ -385,7 +385,7 @@ class base64
             }
         }
 
-        throw std::runtime_error("invalid base64 character.");
+        throw base64_error("invalid base64 character.");
     }
 };