From 9702b64e8ad947c0ed03340a04f0c3c06f157803 Mon Sep 17 00:00:00 2001 From: EricLBuehler Date: Wed, 19 Jun 2024 02:15:23 -0400 Subject: [PATCH 1/2] Dusting the project off --- .typos.toml | 1 + README.md | 7 ------- kernels/build.rs | 30 +++++++++++++++--------------- 3 files changed, 16 insertions(+), 22 deletions(-) diff --git a/.typos.toml b/.typos.toml index fe50f81..f015473 100644 --- a/.typos.toml +++ b/.typos.toml @@ -1,4 +1,5 @@ [default] extend-ignore-identifiers-re = [ "mmaped", + "arange", ] \ No newline at end of file diff --git a/README.md b/README.md index 78d5681..592db04 100644 --- a/README.md +++ b/README.md @@ -3,14 +3,9 @@

[![Continuous integration](https://github.com/EricLBuehler/candle-vllm/actions/workflows/ci.yml/badge.svg)](https://github.com/EricLBuehler/candle-vllm/actions/workflows/ci.yml) -[![Discord server](https://dcbadge.vercel.app/api/server/FAeJRRJ8)](https://discord.gg/FAeJRRJ8) Efficient, easy-to-use platform for inference and serving local LLMs including an OpenAI compatible API server. -PPlease see [mistral.rs](https://github.com/EricLBuehler/mistral.rs), efficient inference platform for many models, including quantized support. Additionally, it implements X-LoRA, recently released method [here](https://github.com/EricLBuehler/xlora). X-LoRA introduces a MoE inspired method to densely gate LoRA adapters powered by a model self-reflection forward pass. - -**candle-vllm is flux, in breaking development and as such is currently unstable.** - ## Features - OpenAI compatible API server provided for serving LLMs. - Highly extensible trait-based system to allow rapid implementation of new module pipelines, @@ -23,8 +18,6 @@ PPlease see [mistral.rs](https://github.com/EricLBuehler/mistral.rs), efficient - 7b - 13b - 70b -- Mistral - - 7b ## Examples See [this folder](examples/) for some examples. diff --git a/kernels/build.rs b/kernels/build.rs index 300d47c..38bc2fd 100644 --- a/kernels/build.rs +++ b/kernels/build.rs @@ -1,8 +1,8 @@ -use std::path::PathBuf; +use anyhow::Result; +use std::fs::read_to_string; use std::fs::OpenOptions; use std::io::prelude::*; -use anyhow::{Result}; -use std::fs::read_to_string; +use std::path::PathBuf; fn read_lines(filename: &str) -> Vec { let mut result = Vec::new(); @@ -29,27 +29,27 @@ fn main() -> Result<()> { let kernel_dir = PathBuf::from("../kernels/"); let absolute_kernel_dir = std::fs::canonicalize(&kernel_dir).unwrap(); - println!("cargo:rustc-link-search=native={}", absolute_kernel_dir.display()); - println!("cargo:rustc-link-lib=pagedattention"); + println!( + "cargo:rustc-link-search=native={}", + absolute_kernel_dir.display() + ); + println!("cargo:rustc-link-lib=pagedattention"); println!("cargo:rustc-link-lib=dylib=cudart"); - + let contents = read_lines("src/lib.rs"); for line in contents { if line == "pub mod ffi;" { - return Ok(()) + return Ok(()); } } let mut file = OpenOptions::new() - .write(true) - .append(true) - .open("src/lib.rs") - .unwrap(); + .write(true) + .append(true) + .open("src/lib.rs") + .unwrap(); //Expose paged attention interface to Rust if let Err(e) = writeln!(file, "pub mod ffi;") { - anyhow::bail!( - "error while building dependencies: {:?}\n", - e, - ) + anyhow::bail!("error while building dependencies: {:?}\n", e,) } else { Ok(()) } From 695ec3bfd53b01684548b5b2b3eb6a3022c4c034 Mon Sep 17 00:00:00 2001 From: EricLBuehler Date: Wed, 19 Jun 2024 02:17:56 -0400 Subject: [PATCH 2/2] Format --- kernels/src/ffi.rs | 2 +- kernels/src/lib.rs | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/kernels/src/ffi.rs b/kernels/src/ffi.rs index 0b0e6ee..423db2c 100644 --- a/kernels/src/ffi.rs +++ b/kernels/src/ffi.rs @@ -67,4 +67,4 @@ extern "C" { dtype: u32, ); -} \ No newline at end of file +} diff --git a/kernels/src/lib.rs b/kernels/src/lib.rs index c43d980..54dce19 100644 --- a/kernels/src/lib.rs +++ b/kernels/src/lib.rs @@ -1,4 +1,6 @@ -pub const COPY_BLOCKS_KERNEL: &str = include_str!(concat!(env!("OUT_DIR"), "/copy_blocks_kernel.ptx")); +pub const COPY_BLOCKS_KERNEL: &str = + include_str!(concat!(env!("OUT_DIR"), "/copy_blocks_kernel.ptx")); pub const PAGEDATTENTION: &str = include_str!(concat!(env!("OUT_DIR"), "/pagedattention.ptx")); -pub const RESHAPE_AND_CACHE_KERNEL: &str = include_str!(concat!(env!("OUT_DIR"), "/reshape_and_cache_kernel.ptx")); +pub const RESHAPE_AND_CACHE_KERNEL: &str = + include_str!(concat!(env!("OUT_DIR"), "/reshape_and_cache_kernel.ptx")); pub mod ffi;