Skip to content

Commit

Permalink
integration: Add Hugging Face local models; ST for embeddings (#402)
Browse files Browse the repository at this point in the history
---------
Co-authored-by: Marcus Schiesser <[email protected]>
  • Loading branch information
tomaarsen authored Nov 4, 2024
1 parent 1fe21f8 commit 0b0ed11
Show file tree
Hide file tree
Showing 7 changed files with 132 additions and 35 deletions.
5 changes: 5 additions & 0 deletions .changeset/plenty-pumpkins-fold.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"create-llama": patch
---

Add local models via Hugging Face; use Sentence Transformers w. ONNX instead of FastEmbed (support for more models, etc)
14 changes: 14 additions & 0 deletions helpers/env-variables.ts
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,20 @@ const getModelEnvs = (modelConfig: ModelConfig): EnvVar[] => {
},
]
: []),
...(modelConfig.provider === "huggingface"
? [
{
name: "EMBEDDING_BACKEND",
description:
"The backend to use for the Sentence Transformers embedding model, either 'torch', 'onnx', or 'openvino'. Defaults to 'onnx'.",
},
{
name: "EMBEDDING_TRUST_REMOTE_CODE",
description:
"Whether to trust remote code for the embedding model, required for some models with custom code.",
},
]
: []),
...(modelConfig.provider === "t-systems"
? [
{
Expand Down
61 changes: 61 additions & 0 deletions helpers/providers/huggingface.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import prompts from "prompts";
import { ModelConfigParams } from ".";
import { questionHandlers, toChoice } from "../../questions/utils";

const MODELS = ["HuggingFaceH4/zephyr-7b-alpha"];
type ModelData = {
dimensions: number;
};
const EMBEDDING_MODELS: Record<string, ModelData> = {
"all-MiniLM-L6-v2": { dimensions: 384 },
};

const DEFAULT_MODEL = MODELS[0];
const DEFAULT_EMBEDDING_MODEL = Object.keys(EMBEDDING_MODELS)[0];
const DEFAULT_DIMENSIONS = Object.values(EMBEDDING_MODELS)[0].dimensions;

type HuggingfaceQuestionsParams = {
askModels: boolean;
};

export async function askHuggingfaceQuestions({
askModels,
}: HuggingfaceQuestionsParams): Promise<ModelConfigParams> {
const config: ModelConfigParams = {
model: DEFAULT_MODEL,
embeddingModel: DEFAULT_EMBEDDING_MODEL,
dimensions: DEFAULT_DIMENSIONS,
isConfigured(): boolean {
return true;
},
};

if (askModels) {
const { model } = await prompts(
{
type: "select",
name: "model",
message: "Which Hugging Face model would you like to use?",
choices: MODELS.map(toChoice),
initial: 0,
},
questionHandlers,
);
config.model = model;

const { embeddingModel } = await prompts(
{
type: "select",
name: "embeddingModel",
message: "Which embedding model would you like to use?",
choices: Object.keys(EMBEDDING_MODELS).map(toChoice),
initial: 0,
},
questionHandlers,
);
config.embeddingModel = embeddingModel;
config.dimensions = EMBEDDING_MODELS[embeddingModel].dimensions;
}

return config;
}
5 changes: 5 additions & 0 deletions helpers/providers/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { askAnthropicQuestions } from "./anthropic";
import { askAzureQuestions } from "./azure";
import { askGeminiQuestions } from "./gemini";
import { askGroqQuestions } from "./groq";
import { askHuggingfaceQuestions } from "./huggingface";
import { askLLMHubQuestions } from "./llmhub";
import { askMistralQuestions } from "./mistral";
import { askOllamaQuestions } from "./ollama";
Expand Down Expand Up @@ -39,6 +40,7 @@ export async function askModelConfig({

if (framework === "fastapi") {
choices.push({ title: "T-Systems", value: "t-systems" });
choices.push({ title: "Huggingface", value: "huggingface" });
}
const { provider } = await prompts(
{
Expand Down Expand Up @@ -76,6 +78,9 @@ export async function askModelConfig({
case "t-systems":
modelConfig = await askLLMHubQuestions({ askModels });
break;
case "huggingface":
modelConfig = await askHuggingfaceQuestions({ askModels });
break;
default:
modelConfig = await askOpenAIQuestions({
openAiKey,
Expand Down
30 changes: 14 additions & 16 deletions helpers/python.ts
Original file line number Diff line number Diff line change
Expand Up @@ -173,35 +173,23 @@ const getAdditionalDependencies = (
}
break;
case "groq":
// Fastembed==0.2.0 does not support python3.13 at the moment
// Fixed the python version less than 3.13
dependencies.push({
name: "python",
version: "^3.11,<3.13",
});
dependencies.push({
name: "llama-index-llms-groq",
version: "0.2.0",
});
dependencies.push({
name: "llama-index-embeddings-fastembed",
version: "^0.2.0",
name: "llama-index-embeddings-huggingface",
version: "^0.3.1",
});
break;
case "anthropic":
// Fastembed==0.2.0 does not support python3.13 at the moment
// Fixed the python version less than 3.13
dependencies.push({
name: "python",
version: "^3.11,<3.13",
});
dependencies.push({
name: "llama-index-llms-anthropic",
version: "0.3.0",
});
dependencies.push({
name: "llama-index-embeddings-fastembed",
version: "^0.2.0",
name: "llama-index-embeddings-huggingface",
version: "^0.3.1",
});
break;
case "gemini":
Expand Down Expand Up @@ -234,6 +222,16 @@ const getAdditionalDependencies = (
version: "0.2.4",
});
break;
case "huggingface":
dependencies.push({
name: "llama-index-llms-huggingface",
version: "^0.3.5",
});
dependencies.push({
name: "llama-index-embeddings-huggingface",
version: "^0.3.1",
});
break;
case "t-systems":
dependencies.push({
name: "llama-index-agent-openai",
Expand Down
1 change: 1 addition & 0 deletions helpers/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ export type ModelProvider =
| "gemini"
| "mistral"
| "azure-openai"
| "huggingface"
| "t-systems";
export type ModelConfig = {
provider: ModelProvider;
Expand Down
51 changes: 32 additions & 19 deletions templates/components/settings/python/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ def init_settings():
init_mistral()
case "azure-openai":
init_azure_openai()
case "huggingface":
init_huggingface()
case "t-systems":
from .llmhub import init_llmhub

Expand Down Expand Up @@ -113,29 +115,40 @@ def init_azure_openai():
)


def init_fastembed():
def init_huggingface_embedding():
try:
from llama_index.embeddings.fastembed import FastEmbedEmbedding
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
except ImportError:
raise ImportError(
"FastEmbed support is not installed. Please install it with `poetry add llama-index-embeddings-fastembed`"
"Hugging Face support is not installed. Please install it with `poetry add llama-index-embeddings-huggingface`"
)

embed_model_map: Dict[str, str] = {
# Small and multilingual
"all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2",
# Large and multilingual
"paraphrase-multilingual-mpnet-base-v2": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
}
embedding_model = os.getenv("EMBEDDING_MODEL", "all-MiniLM-L6-v2")
backend = os.getenv("EMBEDDING_BACKEND", "onnx") # "torch", "onnx", or "openvino"
trust_remote_code = (
os.getenv("EMBEDDING_TRUST_REMOTE_CODE", "false").lower() == "true"
)

Settings.embed_model = HuggingFaceEmbedding(
model_name=embedding_model,
trust_remote_code=trust_remote_code,
backend=backend,
)


embedding_model = os.getenv("EMBEDDING_MODEL")
if embedding_model is None:
raise ValueError("EMBEDDING_MODEL environment variable is not set")
def init_huggingface():
try:
from llama_index.llms.huggingface import HuggingFaceLLM
except ImportError:
raise ImportError(
"Hugging Face support is not installed. Please install it with `poetry add llama-index-llms-huggingface` and `poetry add llama-index-embeddings-huggingface`"
)

# This will download the model automatically if it is not already downloaded
Settings.embed_model = FastEmbedEmbedding(
model_name=embed_model_map[embedding_model]
Settings.llm = HuggingFaceLLM(
model_name=os.getenv("MODEL"),
tokenizer_name=os.getenv("MODEL"),
)
init_huggingface_embedding()


def init_groq():
Expand All @@ -147,8 +160,8 @@ def init_groq():
)

Settings.llm = Groq(model=os.getenv("MODEL"))
# Groq does not provide embeddings, so we use FastEmbed instead
init_fastembed()
# Groq does not provide embeddings, so we use open Sentence Transformer models instead
init_huggingface_embedding()


def init_anthropic():
Expand All @@ -168,8 +181,8 @@ def init_anthropic():
}

Settings.llm = Anthropic(model=model_map[os.getenv("MODEL")])
# Anthropic does not provide embeddings, so we use FastEmbed instead
init_fastembed()
# Anthropic does not provide embeddings, so we use open Sentence Transformer models instead
init_huggingface_embedding()


def init_gemini():
Expand Down

0 comments on commit 0b0ed11

Please sign in to comment.