-
Notifications
You must be signed in to change notification settings - Fork 30
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Unified pipeline for models & support phi3 model (#45)
* Optional logprobs & fix llama eos/stop token * Cargo fmt * Mention other options for chat completion request * Configurable kvcache & fix repeat chat history * Improve readability * Instructions for ChatUI & add demo chat video * Optimization for decoding stage & try to fix blocktable issue * Support stream response for chat completion * Update ReadMe & demo video * Reduce demo video size * Fix stream generation hang in release mode * Reduce the buffer size & update ReadMe * Fix LLaMa2 prompt instruction (for long conversation) * Cargo fmt * Padding to avoid block allocation issue & revision for prompt instruction * Unfied pipeline for models & support phi3 model * Fix padding strategy * Cargo fmt * Update ReadMe for supported models
- Loading branch information
1 parent
ae35a3a
commit 743a8b2
Showing
14 changed files
with
652 additions
and
203 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,26 @@ | ||
pub mod llama; | ||
pub mod phi3; | ||
|
||
pub trait ConfigLike { | ||
fn get_num_kv_heads(&self) -> usize; | ||
fn get_hidden_size(&self) -> usize; | ||
fn get_num_hidden_layers(&self) -> usize; | ||
fn get_num_attention_heads(&self) -> usize; | ||
fn get_vocab_size(&self) -> usize; | ||
fn get_sliding_window(&self) -> Option<usize>; | ||
fn get_head_size(&self) -> usize { | ||
self.get_hidden_size() / self.get_num_attention_heads() | ||
#[derive(Debug, Clone)] | ||
pub struct Config { | ||
pub hidden_size: usize, | ||
pub intermediate_size: usize, | ||
pub vocab_size: usize, | ||
pub num_hidden_layers: usize, | ||
pub num_attention_heads: usize, | ||
pub num_key_value_heads: usize, | ||
pub use_flash_attn: bool, | ||
pub rms_norm_eps: f64, | ||
pub rope_theta: f32, | ||
pub bos_token_id: Option<u32>, | ||
pub eos_token_id: Option<u32>, | ||
pub max_seq_len: usize, | ||
pub sliding_window: Option<usize>, | ||
pub hidden_act: Option<candle_nn::Activation>, | ||
} | ||
|
||
impl Config { | ||
pub fn get_head_size(&self) -> usize { | ||
self.hidden_size / self.num_attention_heads | ||
} | ||
} |
Oops, something went wrong.