From 9702b64e8ad947c0ed03340a04f0c3c06f157803 Mon Sep 17 00:00:00 2001
From: EricLBuehler
Date: Wed, 19 Jun 2024 02:15:23 -0400
Subject: [PATCH 1/2] Dusting the project off
---
.typos.toml | 1 +
README.md | 7 -------
kernels/build.rs | 30 +++++++++++++++---------------
3 files changed, 16 insertions(+), 22 deletions(-)
diff --git a/.typos.toml b/.typos.toml
index fe50f81..f015473 100644
--- a/.typos.toml
+++ b/.typos.toml
@@ -1,4 +1,5 @@
[default]
extend-ignore-identifiers-re = [
"mmaped",
+ "arange",
]
\ No newline at end of file
diff --git a/README.md b/README.md
index 78d5681..592db04 100644
--- a/README.md
+++ b/README.md
@@ -3,14 +3,9 @@
[![Continuous integration](https://github.com/EricLBuehler/candle-vllm/actions/workflows/ci.yml/badge.svg)](https://github.com/EricLBuehler/candle-vllm/actions/workflows/ci.yml)
-[![Discord server](https://dcbadge.vercel.app/api/server/FAeJRRJ8)](https://discord.gg/FAeJRRJ8)
Efficient, easy-to-use platform for inference and serving local LLMs including an OpenAI compatible API server.
-PPlease see [mistral.rs](https://github.com/EricLBuehler/mistral.rs), efficient inference platform for many models, including quantized support. Additionally, it implements X-LoRA, recently released method [here](https://github.com/EricLBuehler/xlora). X-LoRA introduces a MoE inspired method to densely gate LoRA adapters powered by a model self-reflection forward pass.
-
-**candle-vllm is flux, in breaking development and as such is currently unstable.**
-
## Features
- OpenAI compatible API server provided for serving LLMs.
- Highly extensible trait-based system to allow rapid implementation of new module pipelines,
@@ -23,8 +18,6 @@ PPlease see [mistral.rs](https://github.com/EricLBuehler/mistral.rs), efficient
- 7b
- 13b
- 70b
-- Mistral
- - 7b
## Examples
See [this folder](examples/) for some examples.
diff --git a/kernels/build.rs b/kernels/build.rs
index 300d47c..38bc2fd 100644
--- a/kernels/build.rs
+++ b/kernels/build.rs
@@ -1,8 +1,8 @@
-use std::path::PathBuf;
+use anyhow::Result;
+use std::fs::read_to_string;
use std::fs::OpenOptions;
use std::io::prelude::*;
-use anyhow::{Result};
-use std::fs::read_to_string;
+use std::path::PathBuf;
fn read_lines(filename: &str) -> Vec {
let mut result = Vec::new();
@@ -29,27 +29,27 @@ fn main() -> Result<()> {
let kernel_dir = PathBuf::from("../kernels/");
let absolute_kernel_dir = std::fs::canonicalize(&kernel_dir).unwrap();
- println!("cargo:rustc-link-search=native={}", absolute_kernel_dir.display());
- println!("cargo:rustc-link-lib=pagedattention");
+ println!(
+ "cargo:rustc-link-search=native={}",
+ absolute_kernel_dir.display()
+ );
+ println!("cargo:rustc-link-lib=pagedattention");
println!("cargo:rustc-link-lib=dylib=cudart");
-
+
let contents = read_lines("src/lib.rs");
for line in contents {
if line == "pub mod ffi;" {
- return Ok(())
+ return Ok(());
}
}
let mut file = OpenOptions::new()
- .write(true)
- .append(true)
- .open("src/lib.rs")
- .unwrap();
+ .write(true)
+ .append(true)
+ .open("src/lib.rs")
+ .unwrap();
//Expose paged attention interface to Rust
if let Err(e) = writeln!(file, "pub mod ffi;") {
- anyhow::bail!(
- "error while building dependencies: {:?}\n",
- e,
- )
+ anyhow::bail!("error while building dependencies: {:?}\n", e,)
} else {
Ok(())
}
From 695ec3bfd53b01684548b5b2b3eb6a3022c4c034 Mon Sep 17 00:00:00 2001
From: EricLBuehler
Date: Wed, 19 Jun 2024 02:17:56 -0400
Subject: [PATCH 2/2] Format
---
kernels/src/ffi.rs | 2 +-
kernels/src/lib.rs | 6 ++++--
2 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/kernels/src/ffi.rs b/kernels/src/ffi.rs
index 0b0e6ee..423db2c 100644
--- a/kernels/src/ffi.rs
+++ b/kernels/src/ffi.rs
@@ -67,4 +67,4 @@ extern "C" {
dtype: u32,
);
-}
\ No newline at end of file
+}
diff --git a/kernels/src/lib.rs b/kernels/src/lib.rs
index c43d980..54dce19 100644
--- a/kernels/src/lib.rs
+++ b/kernels/src/lib.rs
@@ -1,4 +1,6 @@
-pub const COPY_BLOCKS_KERNEL: &str = include_str!(concat!(env!("OUT_DIR"), "/copy_blocks_kernel.ptx"));
+pub const COPY_BLOCKS_KERNEL: &str =
+ include_str!(concat!(env!("OUT_DIR"), "/copy_blocks_kernel.ptx"));
pub const PAGEDATTENTION: &str = include_str!(concat!(env!("OUT_DIR"), "/pagedattention.ptx"));
-pub const RESHAPE_AND_CACHE_KERNEL: &str = include_str!(concat!(env!("OUT_DIR"), "/reshape_and_cache_kernel.ptx"));
+pub const RESHAPE_AND_CACHE_KERNEL: &str =
+ include_str!(concat!(env!("OUT_DIR"), "/reshape_and_cache_kernel.ptx"));
pub mod ffi;