From 76c7f3cfa0c1232da0c7e20b995a7f89e57b8ee4 Mon Sep 17 00:00:00 2001 From: okada Date: Sun, 26 Nov 2023 15:56:44 +0900 Subject: [PATCH] release --- .github/workflows/build.yaml | 58 +++++++++++++++++++++++++++++++++--- flatline_lsp.py | 22 ++++++++++++-- prepare_codegen25_model.sh | 2 +- 3 files changed, 74 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 6a15a84..8edcb24 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -25,9 +25,59 @@ jobs: run: | sudo apt-get update sudo apt-get install -y build-essential cmake libpython3-dev + pip3 install torch --index-url https://download.pytorch.org/whl/cpu + pip3 install transformers tiktoken pyinstaller - - name: Build - id: cmake_build + - name: Build backend server + id: cmake_build_backend_server run: | - cmake -B build -S . - cmake --build build --config Release + cmake -B build_backend_server -S . + cmake --build build_backend_server --config Release + + - name: Build lsp server + id: pyinstaller_build_lsp_server + run: | + pyinstaller flatline_lsp.py \ + --copy-metadata tqdm \ + --copy-metadata regex \ + --copy-metadata requests \ + --copy-metadata packaging \ + --copy-metadata filelock \ + --copy-metadata numpy \ + --copy-metadata tokenizers \ + --copy-metadata huggingface-hub \ + --copy-metadata safetensors \ + --copy-metadata pyyaml \ + --copy-metadata torch \ + --hidden-import=tiktoken_ext.openai_public \ + --hidden-import=tiktoken_ext \ + --add-binary build_backend_server/bin/flatline-server:flatline/backend_server \ + ; + mkdir -p ./dist/flatline/license + find build_backend_server/bin -name \*.LICENSE.txt | xargs -I{} cp {} ./dist/flatline/license/ + zip -r flatline_lsp.zip dist/flatline_lsp + + - name: Create release + id: create_release + uses: actions/create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ github.ref }} + release_name: Release ${{ github.ref }} + body: | + Release test + draft: true + prerelease: true + + - name: Upload Release Asset + id: upload_release_asset + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ steps.create_release.outputs.upload_url }} + asset_path: ./flatline_lsp.zip + asset_name: flatline_lsp.zip + asset_content_type: application/zip + diff --git a/flatline_lsp.py b/flatline_lsp.py index 3a431bd..cfd6161 100644 --- a/flatline_lsp.py +++ b/flatline_lsp.py @@ -1,4 +1,6 @@ import argparse +import sys +import os import requests from typing import Any, Dict, Optional, List import threading @@ -27,6 +29,7 @@ class LlamaCppCausalLM(PreTrainedModel): def __init__( self, config: LlamaCppConfig, + backend_server_bin: str, backend_server_host: str, backend_server_port: int, model_name: str, @@ -35,6 +38,7 @@ def __init__( ): super().__init__(config) + self.baseckend_server_bin = backend_server_bin self.baseckend_server_host = backend_server_host self.baseckend_server_port = backend_server_port try: @@ -43,7 +47,7 @@ def __init__( ) except Exception: subprocess.Popen( - f"/home/okada/flatline2/build/bin/flatline-server --model-path {model_name} --n-gpu_layers {n_gpu_layers}".split() + f"{self.baseckend_server_bin} --model-path {model_name} --n-gpu_layers {n_gpu_layers}".split() ) @property @@ -123,6 +127,7 @@ def __init__( self, lang_server: LanguageServer, max_new_tokens: int, + backend_server_bin: str, backend_server_host: str, backend_server_port: int, model_name: str, @@ -130,6 +135,7 @@ def __init__( n_gpu_layers: int, ): self.lang_server = lang_server + self.max_new_tokens = max_new_tokens assert model_name.endswith(".gguf") self.tokenizer = AutoTokenizer.from_pretrained( @@ -137,13 +143,13 @@ def __init__( ) self.model = LlamaCppCausalLM( config=LlamaCppConfig(), + backend_server_bin=backend_server_bin, backend_server_host=backend_server_host, backend_server_port=backend_server_port, model_name=model_name, n_threads=n_threads, n_gpu_layers=n_gpu_layers, ) - self.max_new_tokens = max_new_tokens self.latest_completion_id_lock = threading.Lock() self.computing_resource_lock = threading.Lock() @@ -221,14 +227,23 @@ def completions( ) +def resource_path(relative_path: str): + try: + base_path = sys._MEIPASS + except Exception: + base_path = os.path.dirname(__file__) + return os.path.join(base_path, relative_path) + + def main() -> None: parser = argparse.ArgumentParser() + parser.add_argument("--backend-server-bin", type=str, default=resource_path("./flatline/backend_server/flatline-server")) parser.add_argument("--backend-server-host", type=str, default="localhost") parser.add_argument("--backend-server-port", type=int, default=5000) parser.add_argument( "--model-name", type=str, - default="/home/okada/flatline2/codegen25-7b-multi/ggml-model-Q4_K.gguf", + default=resource_path("./flatline/model_data/codegen25-7b-multi/ggml-model-Q4_K.gguf"), ) parser.add_argument("--max-new-tokens", type=int, default=256) parser.add_argument("--n-threads", type=int, default=8) @@ -239,6 +254,7 @@ def main() -> None: lm_for_completion = LanguageModelForCompletion( lang_server=server, max_new_tokens=args.max_new_tokens, + backend_server_bin=args.backend_server_bin, backend_server_host=args.backend_server_host, backend_server_port=args.backend_server_port, model_name=args.model_name, diff --git a/prepare_codegen25_model.sh b/prepare_codegen25_model.sh index 4d5459d..60a105a 100644 --- a/prepare_codegen25_model.sh +++ b/prepare_codegen25_model.sh @@ -1,3 +1,3 @@ #!/usr/bin/bash head -c 30M dummy_file -python3 make_dummy_tokenizer.py 2>&1 | grep -v 'The corpus must be encoded in utf-8.' +python3 make_dummy_tokenizer.py