github: add ci (Mozilla-Ocho#454)

mofosyne · May 29, 2024 · 31419d0 · 31419d0
1 parent 397175e
commit 31419d0
Show file tree

Hide file tree

Showing 7 changed files with 143 additions and 36 deletions.
diff --git a/.github/ISSUE_TEMPLATE/07-refactor.yml b/.github/ISSUE_TEMPLATE/07-refactor.yml
@@ -0,0 +1,28 @@
+name: Refactor (Maintainers)
+description: Used to track refactoring opportunities
+title: "Refactor: "
+labels: ["refactor"]
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Don't forget to [check for existing refactor issue tickets](https://github.com/Mozilla-Ocho/llamafile/issues?q=is%3Aopen+is%3Aissue+label%3Arefactoring) in case it's already covered.
+        Also you may want to check [Pull request refactor label as well](https://github.com/Mozilla-Ocho/llamafile/pulls?q=is%3Aopen+is%3Apr+label%3Arefactoring) for duplicates too.
+
+  - type: textarea
+    id: background-description
+    attributes:
+      label: Background Description
+      description: Please provide a detailed written description of the pain points you are trying to solve.
+      placeholder: Detailed description behind your motivation to request refactor
+    validations:
+      required: true
+
+  - type: textarea
+    id: possible-approaches
+    attributes:
+      label: Possible Refactor Approaches
+      description: If you have some idea of possible approaches to solve this problem. You may want to make it a todo list.
+      placeholder: Your idea of possible refactoring opportunity/approaches
+    validations:
+      required: false
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,63 @@
+name: CI
+on:
+  push:
+    branches: [ master, main, fix ]
+  pull_request:
+    branches: [ master, main, fix ]
+
+jobs:
+  ubuntu-focal-make:
+    timeout-minutes: 60
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v4
+
+      - name: Dependencies
+        id: depends
+        run: |
+          sudo apt-get update
+          sudo apt-get install make
+
+      - name: Cache cosmocc toolchain
+        id: cache-cosmocc-toolchain
+        uses: actions/cache@v4
+        env:
+          cache-name: cache-cosmocc-toolchain
+        with:
+          path: |
+            .cosmocc
+            o/depend
+            o/depend.test
+          key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ hashFiles('**/config.mk') }}
+          restore-keys: |
+            ${{ runner.os }}-build-${{ env.cache-name }}
+
+      - name: Setup cosmocc and ape loader
+        run: |
+          sudo make cosmocc-ci PREFIX=/usr
+
+      - name: Build
+        run: |
+          sudo make -j $(nproc)
+
+      - name: Make Llamafile
+        run: |
+          cp ./models/TinyLLama-v0.1-5M-F16.gguf tinyllama.gguf
+          cat << EoF > .args
+          -m
+          tinyllama.gguf
+          ...
+          EoF
+          cp o//llama.cpp/main/main \
+            tinyllama.llamafile
+          o//llamafile/zipalign -j0 \
+            tinyllama.llamafile \
+            tinyllama.gguf \
+            .args
+
+      - name: Execute LLM CLI CPU  # GA doesn't have "support_simdgroup_reduction" for RMS_NORM :'(
+        run: |
+          ./tinyllama.llamafile -e -p '## Famous Speech\n\nFour score and seven' -n 50 -ngl 0
diff --git a/Makefile b/Makefile
@@ -57,5 +57,11 @@ install:	llamafile/zipalign.1					\
 .PHONY: check
 check: o/$(MODE)/llamafile/check
 
+.PHONY: check
+cosmocc: $(COSMOCC) # cosmocc toolchain setup
+
+.PHONY: check
+cosmocc-ci: $(COSMOCC) $(PREFIX)/bin/ape # cosmocc toolchain setup in ci context
+
 include build/deps.mk
 include build/tags.mk
diff --git a/README.md b/README.md
@@ -1,36 +1,38 @@
 # llamafile
 
+[![ci status](https://github.com/Mozilla-Ocho/llamafile/actions/workflows/ci.yml/badge.svg)](https://github.com/Mozilla-Ocho/llamafile/actions/workflows/ci.yml)
+
 <img src="llamafile/llamafile-640x640.png" width="320" height="320"
      alt="[line drawing of llama animal head in front of slightly open manilla folder filled with files]">
 
 **llamafile lets you distribute and run LLMs with a single file. ([announcement blog post](https://hacks.mozilla.org/2023/11/introducing-llamafile/))**
 
 [![](https://dcbadge.vercel.app/api/server/teDuGYVTB2)](https://discord.gg/teDuGYVTB2)
 
-Our goal is to make open LLMs much more 
-accessible to both developers and end users. We're doing that by 
-combining [llama.cpp](https://github.com/ggerganov/llama.cpp) with [Cosmopolitan Libc](https://github.com/jart/cosmopolitan) into one 
-framework that collapses all the complexity of LLMs down to 
+Our goal is to make open LLMs much more
+accessible to both developers and end users. We're doing that by
+combining [llama.cpp](https://github.com/ggerganov/llama.cpp) with [Cosmopolitan Libc](https://github.com/jart/cosmopolitan) into one
+framework that collapses all the complexity of LLMs down to
 a single-file executable (called a "llamafile") that runs
 locally on most computers, with no installation.
 
 [Join us on our Discord](https://discord.gg/teDuGYVTB2)
 
 ## Quickstart
 
-The easiest way to try it for yourself is to download our example 
-llamafile for the [LLaVA](https://llava-vl.github.io/) model (license: [LLaMA 2](https://ai.meta.com/resources/models-and-libraries/llama-downloads/), 
-[OpenAI](https://openai.com/policies/terms-of-use)). LLaVA is a new LLM that can do more 
-than just chat; you can also upload images and ask it questions 
-about them. With llamafile, this all happens locally; no data 
+The easiest way to try it for yourself is to download our example
+llamafile for the [LLaVA](https://llava-vl.github.io/) model (license: [LLaMA 2](https://ai.meta.com/resources/models-and-libraries/llama-downloads/),
+[OpenAI](https://openai.com/policies/terms-of-use)). LLaVA is a new LLM that can do more
+than just chat; you can also upload images and ask it questions
+about them. With llamafile, this all happens locally; no data
 ever leaves your computer.
 
 1. Download [llava-v1.5-7b-q4.llamafile](https://huggingface.co/Mozilla/llava-v1.5-7b-llamafile/resolve/main/llava-v1.5-7b-q4.llamafile?download=true) (4.29 GB).
 
 2. Open your computer's terminal.
 
-3. If you're using macOS, Linux, or BSD, you'll need to grant permission 
-for your computer to execute this new file. (You only need to do this 
+3. If you're using macOS, Linux, or BSD, you'll need to grant permission
+for your computer to execute this new file. (You only need to do this
 once.)
 
 ```sh
@@ -45,7 +47,7 @@ chmod +x llava-v1.5-7b-q4.llamafile
 ./llava-v1.5-7b-q4.llamafile
 ```
 
-6. Your browser should open automatically and display a chat interface. 
+6. Your browser should open automatically and display a chat interface.
 (If it doesn't, just open your browser and point it at http://localhost:8080)
 
 7. When you're done chatting, return to your terminal and hit
@@ -217,13 +219,13 @@ later in this document.
 
 ## How llamafile works
 
-A llamafile is an executable LLM that you can run on your own 
-computer. It contains the weights for a given open LLM, as well 
-as everything needed to actually run that model on your computer. 
-There's nothing to install or configure (with a few caveats, discussed 
+A llamafile is an executable LLM that you can run on your own
+computer. It contains the weights for a given open LLM, as well
+as everything needed to actually run that model on your computer.
+There's nothing to install or configure (with a few caveats, discussed
 in subsequent sections of this document).
 
-This is all accomplished by combining llama.cpp with Cosmopolitan Libc, 
+This is all accomplished by combining llama.cpp with Cosmopolitan Libc,
 which provides some useful capabilities:
 
 1. llamafiles can run on multiple CPU microarchitectures. We
@@ -237,10 +239,10 @@ and most UNIX shells. It's also able to be easily converted (by either
 you or your users) to the platform-native format, whenever required.
 
 3. llamafiles can run on six OSes (macOS, Windows, Linux,
-FreeBSD, OpenBSD, and NetBSD). If you make your own llama files, you'll 
-only need to build your code once, using a Linux-style toolchain. The 
-GCC-based compiler we provide is itself an Actually Portable Executable, 
-so you can build your software for all six OSes from the comfort of 
+FreeBSD, OpenBSD, and NetBSD). If you make your own llama files, you'll
+only need to build your code once, using a Linux-style toolchain. The
+GCC-based compiler we provide is itself an Actually Portable Executable,
+so you can build your software for all six OSes from the comfort of
 whichever one you prefer most for development.
 
 4. The weights for an LLM can be embedded within the llamafile.
@@ -250,19 +252,19 @@ archive. It enables quantized weights distributed online to be prefixed
 with a compatible version of the llama.cpp software, thereby ensuring
 its originally observed behaviors can be reproduced indefinitely.
 
-5. Finally, with the tools included in this project you can create your 
-*own* llamafiles, using any compatible model weights you want. You can 
-then distribute these llamafiles to other people, who can easily make 
+5. Finally, with the tools included in this project you can create your
+*own* llamafiles, using any compatible model weights you want. You can
+then distribute these llamafiles to other people, who can easily make
 use of them regardless of what kind of computer they have.
 
 ## Using llamafile with external weights
 
-Even though our example llamafiles have the weights built-in, you don't 
-*have* to use llamafile that way. Instead, you can download *just* the 
-llamafile software (without any weights included) from our releases page. 
-You can then use it alongside any external weights you may have on hand. 
-External weights are particularly useful for Windows users because they 
-enable you to work around Windows' 4GB executable file size limit. 
+Even though our example llamafiles have the weights built-in, you don't
+*have* to use llamafile that way. Instead, you can download *just* the
+llamafile software (without any weights included) from our releases page.
+You can then use it alongside any external weights you may have on hand.
+External weights are particularly useful for Windows users because they
+enable you to work around Windows' 4GB executable file size limit.
 
 For Windows users, here's an example for the Mistral LLM:
 
@@ -297,13 +299,13 @@ sudo sh -c "echo ':APE:M::MZqFpD::/usr/bin/ape:' >/proc/sys/fs/binfmt_misc/regis
 sudo sh -c "echo ':APE-jart:M::jartsr::/usr/bin/ape:' >/proc/sys/fs/binfmt_misc/register"
 ```
 
-As mentioned above, on Windows you may need to rename your llamafile by 
-adding `.exe` to the filename. 
+As mentioned above, on Windows you may need to rename your llamafile by
+adding `.exe` to the filename.
 
 Also as mentioned above, Windows also has a maximum file size limit of 4GB
 for executables. The LLaVA server executable above is just 30MB shy of
 that limit, so it'll work on Windows, but with larger models like
-WizardCoder 13B, you need to store the weights in a separate file. An 
+WizardCoder 13B, you need to store the weights in a separate file. An
 example is provided above; see "Using llamafile with external weights."
 
 On WSL, it's recommended that the WIN32 interop feature be disabled:
@@ -328,7 +330,7 @@ if you have CrowdStrike and then ask to be whitelisted.
 
 ## Supported OSes
 
-llamafile supports the following operating systems, which require a minimum 
+llamafile supports the following operating systems, which require a minimum
 stock install:
 
 - Linux 2.6.18+ (i.e. every distro since RHEL5 c. 2007)
@@ -702,8 +704,8 @@ for further details.
 
 ## A note about models
 
-The example llamafiles provided above should not be interpreted as 
-endorsements or recommendations of specific models, licenses, or data 
+The example llamafiles provided above should not be interpreted as
+endorsements or recommendations of specific models, licenses, or data
 sets on the part of Mozilla.
 
 ## Security

diff --git a/build/config.mk b/build/config.mk
@@ -19,6 +19,7 @@ TARGET_ARCH = -Xx86_64-mavx -Xx86_64-mtune=znver4
 
 TMPDIR = o//tmp
 IGNORE := $(shell mkdir -p $(TMPDIR))
+ARCH := $(shell uname -m)
 
 # apple still distributes a 17 year old version of gnu make
 ifeq ($(MAKE_VERSION), 3.81)

diff --git a/build/rules.mk b/build/rules.mk
@@ -39,3 +39,10 @@ o/$(MODE)/%.zip.o: % $(COSMOCC)
 	@mkdir -p $(dir $@)/.aarch64
 	$(ZIPOBJ) $(ZIPOBJ_FLAGS) -a x86_64 -o $@ $<
 	$(ZIPOBJ) $(ZIPOBJ_FLAGS) -a aarch64 -o $(dir $@)/.aarch64/$(notdir $@) $<
+
+$(PREFIX)/bin/ape: $(COSMOCC) # cosmocc toolchain setup in restricted ci context 
+	# Install ape loader
+	$(INSTALL) $(COSMOCC)/bin/ape-$(ARCH).elf $(PREFIX)/bin/ape
+
+	# Config binfmt_misc to use ape loader for ape.elf files
+	echo ':APE:M::MZqFpD::/usr/bin/ape:' > /proc/sys/fs/binfmt_misc/register
diff --git a/models/TinyLLama-v0.1-5M-F16.gguf b/models/TinyLLama-v0.1-5M-F16.gguf