Skip to content

Commit

Permalink
compiler error
Browse files Browse the repository at this point in the history
  • Loading branch information
santiagomed committed Mar 14, 2024
1 parent 6e05c53 commit 2bc3dcf
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 12 deletions.
21 changes: 19 additions & 2 deletions orca-core/src/llm/openai.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,21 @@ pub struct Response {
choices: Vec<Choice>,
}

#[derive(Serialize, Deserialize, Debug)]
pub struct QuotaError {
message: String,
#[serde(rename = "type")]
_type: String,
param: String,
code: String,
}

#[derive(Serialize, Deserialize, Debug)]
pub enum OpenAIResponse {
Response(Response),
QuotaError(QuotaError),
}

#[derive(Serialize, Deserialize, Debug, Default, Clone)]
pub struct OpenAIEmbeddingResponse {
object: String,
Expand Down Expand Up @@ -292,8 +307,10 @@ impl LLM for OpenAI {
let messages = prompt.to_chat()?;
let req = self.generate_request(messages.to_vec_ref())?;
let res = self.client.execute(req).await?;
let res = res.json::<Response>().await?;
Ok(res.into())
match res.json::<OpenAIResponse>().await? {
OpenAIResponse::Response(response) => Ok(response.into()),
OpenAIResponse::QuotaError(e) => Err(anyhow::anyhow!("Quota error: {}", e.message)),
}
}
}

Expand Down
8 changes: 4 additions & 4 deletions orca-core/src/llm/quantized.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,22 +198,22 @@ impl Quantized {
let mut total_size_in_bytes = 0;
for (_, tensor) in model.tensor_infos.iter() {
let elem_count = tensor.shape.elem_count();
total_size_in_bytes += elem_count * tensor.ggml_dtype.type_size() / tensor.ggml_dtype.block_size();
total_size_in_bytes += elem_count * tensor.ggml_dtype.type_size() / tensor.ggml_dtype.blck_size();
}
log::info!(
"loaded {:?} tensors ({}) in {:.2}s",
model.tensor_infos.len(),
&format_size(total_size_in_bytes),
start.elapsed().as_secs_f32(),
);
Some(ModelWeights::from_gguf(model, &mut file, &Device::Cpu)?)
Some(ModelWeights::from_gguf(model, &mut file)?)
}
Some("ggml" | "bin") | Some(_) | None => {
let model = ggml_file::Content::read(&mut file, &Device::Cpu)?;
let model = ggml_file::Content::read(&mut file)?;
let mut total_size_in_bytes = 0;
for (_, tensor) in model.tensors.iter() {
let elem_count = tensor.shape().elem_count();
total_size_in_bytes += elem_count * tensor.dtype().type_size() / tensor.dtype().block_size();
total_size_in_bytes += elem_count * tensor.dtype().type_size() / tensor.dtype().blck_size();
}
log::info!(
"loaded {:?} tensors ({}) in {:.2}s",
Expand Down
5 changes: 2 additions & 3 deletions orca-models/src/mistral.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
use candle::Device;
use crate::utils::text_generation::{Model, TextGeneration};
use candle_transformers::models::mistral;
use candle_transformers::models::quantized_mistral;
Expand Down Expand Up @@ -73,7 +72,7 @@ impl Mistral {
P: AsRef<std::path::Path>,
{
let cfg = mistral::Config::config_7b_v0_1(config.flash_attn);
let vb = candle_transformers::quantized_var_builder::VarBuilder::from_gguf(weights, &Device::Cpu)?;
let vb = candle_transformers::quantized_var_builder::VarBuilder::from_gguf(weights)?;
let model = quantized_mistral::Model::new(&cfg, vb)?;
let tokenizer = tokenizers::Tokenizer::from_file(tokenizer).map_err(|m| anyhow::anyhow!(m))?;
Ok(Self {
Expand All @@ -89,7 +88,7 @@ impl Mistral {

pub fn from_stream(weights: Vec<u8>, tokenizer: Vec<u8>, config: Config) -> anyhow::Result<Self> {
let cfg = mistral::Config::config_7b_v0_1(config.flash_attn);
let vb = candle_transformers::quantized_var_builder::VarBuilder::from_gguf_buffer(&weights, &Device::Cpu)?;
let vb = candle_transformers::quantized_var_builder::VarBuilder::from_gguf_buffer(&weights)?;
let model = quantized_mistral::Model::new(&cfg, vb)?;
let tokenizer = tokenizers::Tokenizer::from_bytes(tokenizer).map_err(|m| anyhow::anyhow!(m))?;
Ok(Self {
Expand Down
6 changes: 3 additions & 3 deletions orca-models/src/quantized.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
// #![allow(unused_variables)]
// #![allow(unused_imports)]

use candle::Device;
use candle::quantized::{ggml_file, gguf_file};
use candle::Device;
use candle_transformers::models::quantized_llama::ModelWeights;

use crate::utils::text_generation::{Model, TextGeneration};
Expand Down Expand Up @@ -65,7 +65,7 @@ impl Quantized {
pub fn from_gguf_stream(model: Vec<u8>, tokenizer: Vec<u8>, config: Config) -> anyhow::Result<Self> {
let mut model_reader = std::io::Cursor::new(model);
let model_content = gguf_file::Content::read(&mut model_reader)?;
let model = ModelWeights::from_gguf(model_content, &mut model_reader, &Device::Cpu)?;
let model = ModelWeights::from_gguf(model_content, &mut model_reader)?;
let tokenizer = tokenizers::Tokenizer::from_bytes(tokenizer).map_err(|m| anyhow::anyhow!(m))?;
Ok(Self {
model,
Expand All @@ -80,7 +80,7 @@ impl Quantized {

pub fn from_ggml_stream(model: Vec<u8>, tokenizer: Vec<u8>, config: Config) -> anyhow::Result<Self> {
let mut model_reader = std::io::Cursor::new(model);
let model_content = ggml_file::Content::read(&mut model_reader, &Device::Cpu)?;
let model_content = ggml_file::Content::read(&mut model_reader)?;
let model = ModelWeights::from_ggml(model_content, 1)?;
let tokenizer = tokenizers::Tokenizer::from_bytes(tokenizer).map_err(|m| anyhow::anyhow!(m))?;
Ok(Self {
Expand Down

0 comments on commit 2bc3dcf

Please sign in to comment.