Skip to content

Commit

Permalink
Merge pull request #17 from Finfalter/main
Browse files Browse the repository at this point in the history
fixing 'missing argument' and 'blck_size'
  • Loading branch information
santiagomed authored Feb 12, 2024
2 parents 03613aa + 9ecc387 commit 6e05c53
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 9 deletions.
8 changes: 4 additions & 4 deletions orca-core/src/llm/quantized.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,22 +198,22 @@ impl Quantized {
let mut total_size_in_bytes = 0;
for (_, tensor) in model.tensor_infos.iter() {
let elem_count = tensor.shape.elem_count();
total_size_in_bytes += elem_count * tensor.ggml_dtype.type_size() / tensor.ggml_dtype.blck_size();
total_size_in_bytes += elem_count * tensor.ggml_dtype.type_size() / tensor.ggml_dtype.block_size();
}
log::info!(
"loaded {:?} tensors ({}) in {:.2}s",
model.tensor_infos.len(),
&format_size(total_size_in_bytes),
start.elapsed().as_secs_f32(),
);
Some(ModelWeights::from_gguf(model, &mut file)?)
Some(ModelWeights::from_gguf(model, &mut file, &Device::Cpu)?)
}
Some("ggml" | "bin") | Some(_) | None => {
let model = ggml_file::Content::read(&mut file)?;
let model = ggml_file::Content::read(&mut file, &Device::Cpu)?;
let mut total_size_in_bytes = 0;
for (_, tensor) in model.tensors.iter() {
let elem_count = tensor.shape().elem_count();
total_size_in_bytes += elem_count * tensor.dtype().type_size() / tensor.dtype().blck_size();
total_size_in_bytes += elem_count * tensor.dtype().type_size() / tensor.dtype().block_size();
}
log::info!(
"loaded {:?} tensors ({}) in {:.2}s",
Expand Down
7 changes: 4 additions & 3 deletions orca-models/src/mistral.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use candle::Device;
use crate::utils::text_generation::{Model, TextGeneration};
use candle_transformers::models::mistral;
use candle_transformers::models::quantized_mistral;
Expand Down Expand Up @@ -72,7 +73,7 @@ impl Mistral {
P: AsRef<std::path::Path>,
{
let cfg = mistral::Config::config_7b_v0_1(config.flash_attn);
let vb = candle_transformers::quantized_var_builder::VarBuilder::from_gguf(weights)?;
let vb = candle_transformers::quantized_var_builder::VarBuilder::from_gguf(weights, &Device::Cpu)?;
let model = quantized_mistral::Model::new(&cfg, vb)?;
let tokenizer = tokenizers::Tokenizer::from_file(tokenizer).map_err(|m| anyhow::anyhow!(m))?;
Ok(Self {
Expand All @@ -88,7 +89,7 @@ impl Mistral {

pub fn from_stream(weights: Vec<u8>, tokenizer: Vec<u8>, config: Config) -> anyhow::Result<Self> {
let cfg = mistral::Config::config_7b_v0_1(config.flash_attn);
let vb = candle_transformers::quantized_var_builder::VarBuilder::from_gguf_buffer(&weights)?;
let vb = candle_transformers::quantized_var_builder::VarBuilder::from_gguf_buffer(&weights, &Device::Cpu)?;
let model = quantized_mistral::Model::new(&cfg, vb)?;
let tokenizer = tokenizers::Tokenizer::from_bytes(tokenizer).map_err(|m| anyhow::anyhow!(m))?;
Ok(Self {
Expand All @@ -112,7 +113,7 @@ impl Mistral {
));
let tokenizer = repo.get("tokenizer.json").await?;
let model_path = repo.get("model-q4k.gguf").await?;
let vb = candle_transformers::quantized_var_builder::VarBuilder::from_gguf(model_path)?;
let vb = candle_transformers::quantized_var_builder::VarBuilder::from_gguf(model_path, &Device::Cpu)?;
let model = quantized_mistral::Model::new(&mistral::Config::config_7b_v0_1(config.flash_attn), vb)?;
let tokenizer = tokenizers::Tokenizer::from_file(tokenizer).map_err(anyhow::Error::msg)?;
Ok(Self {
Expand Down
5 changes: 3 additions & 2 deletions orca-models/src/quantized.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// #![allow(unused_variables)]
// #![allow(unused_imports)]

use candle::Device;
use candle::quantized::{ggml_file, gguf_file};
use candle_transformers::models::quantized_llama::ModelWeights;

Expand Down Expand Up @@ -64,7 +65,7 @@ impl Quantized {
pub fn from_gguf_stream(model: Vec<u8>, tokenizer: Vec<u8>, config: Config) -> anyhow::Result<Self> {
let mut model_reader = std::io::Cursor::new(model);
let model_content = gguf_file::Content::read(&mut model_reader)?;
let model = ModelWeights::from_gguf(model_content, &mut model_reader)?;
let model = ModelWeights::from_gguf(model_content, &mut model_reader, &Device::Cpu)?;
let tokenizer = tokenizers::Tokenizer::from_bytes(tokenizer).map_err(|m| anyhow::anyhow!(m))?;
Ok(Self {
model,
Expand All @@ -79,7 +80,7 @@ impl Quantized {

pub fn from_ggml_stream(model: Vec<u8>, tokenizer: Vec<u8>, config: Config) -> anyhow::Result<Self> {
let mut model_reader = std::io::Cursor::new(model);
let model_content = ggml_file::Content::read(&mut model_reader)?;
let model_content = ggml_file::Content::read(&mut model_reader, &Device::Cpu)?;
let model = ModelWeights::from_ggml(model_content, 1)?;
let tokenizer = tokenizers::Tokenizer::from_bytes(tokenizer).map_err(|m| anyhow::anyhow!(m))?;
Ok(Self {
Expand Down

0 comments on commit 6e05c53

Please sign in to comment.