-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'llmware-ai:main' into patch-2
- Loading branch information
Showing
13 changed files
with
3,193 additions
and
143 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
|
||
""" This example shows how to add a custom or private OpenVino or ONNX model to the llmware model catalog. | ||
Over the next few releases, we will be expanding the default ModelCatalog considerably, but for the time | ||
being, please feel free to follow the steps below to build your own custom catalog. | ||
We show below templates for the model card dictionaries - most of which is fairly easy to build for a given | ||
model. | ||
We highlight both the main step - which is a simple one-liner to register the model, and then provide | ||
more details on three potential troubleshooting items: | ||
1 - using a model from a custom/private path - and 'inserting' directly into the model_repo lookup | ||
2 - identifying the prompt wrapper template | ||
3 - customizing a new prompt wrapper | ||
""" | ||
|
||
from llmware.models import ModelCatalog | ||
from llmware.prompts import Prompt | ||
from llmware.configs import LLMWareConfig | ||
|
||
# Create model card and register in the ModelCatalog | ||
|
||
""" Sample OpenVino Model Card template | ||
model_card_dict = {"model_name": "phi-3-ov", "model_family": "OVGenerativeModel", | ||
"model_category": "generative_local", "display_name": "phi-3-ov", | ||
"model_location": "llmware_repo", | ||
"context_window": 4096, "instruction_following": False, "prompt_wrapper": "phi_3", | ||
"temperature": 0.0, "sample_default": False, "trailing_space": "", | ||
"tokenizer_local": "tokenizer_phi3.json", | ||
"hf_repo": "llmware/phi-3-ov", | ||
"custom_model_files": [], "custom_model_repo": "", | ||
"fetch": {"snapshot": True, "module": "llmware.models", "method": "pull_snapshot_from_hf"}, | ||
"validation_files": ["openvino_model.xml"], | ||
"link": "https://huggingface.co/llmware/phi-3-ov"}, | ||
""" | ||
|
||
""" Sample ONNX Model Card template | ||
model_card_dict = {"model_name": "phi-3-onnx", "model_family": "ONNXGenerativeModel", | ||
"model_category": "generative_local", "display_name": "phi-3-onnx", | ||
"model_location": "llmware_repo", | ||
"context_window": 4096, "instruction_following": False, "prompt_wrapper": "phi_3", | ||
"temperature": 0.0, "sample_default": False, "trailing_space": "", | ||
"tokenizer_local": "tokenizer_phi3.json", | ||
"hf_repo": "llmware/phi-3-onnx", | ||
"custom_model_files": [], "custom_model_repo": "", | ||
"fetch": {"snapshot": True, "module": "llmware.models", "method": "pull_snapshot_from_hf"}, | ||
"validation_files": ["model.onnx", "model.onnx.data"], | ||
"link": "https://huggingface.co/llmware/phi-3-onnx"}, | ||
""" | ||
|
||
# create the model card dictionary manually using the templates above as guides, e.g., | ||
model_card_dict = {"model_name": "my_model", "insert other params from above...": []} | ||
|
||
# this is the key step - registering the model card - add as a first line in any script/example | ||
ModelCatalog().register_new_model_card(model_card_dict) | ||
|
||
# once the model is registered in the catalog, it can then be accessed anytime by name, e.g., | ||
model = ModelCatalog().load_model("my_model") | ||
response = model.inference("What is ...") | ||
|
||
# or if using in conjunction with building a RAG prompt | ||
prompter = Prompt().load_model("my_model") | ||
|
||
""" Issue # 1 - Models in local/custom path | ||
If you have the model in a local/custom path, then the easiest thing to do is to copy/move manually to | ||
/llmware_data/model_repo/{{my_model_name}}/ and place the model components in this path. | ||
""" | ||
|
||
# lookup model repo path | ||
model_path = LLMWareConfig().get_model_repo_path() | ||
print("local model path: ", model_path) | ||
|
||
# You can manually put the model components in a folder called "model_name" at the model repo path, and | ||
# 'lookups' will all work. | ||
|
||
""" Issue # 2 - How do I figure out the prompt template? | ||
Below is a list of the prompt wrapper lookups that covers most of the common models: | ||
# standard used in most llmware models - bling, dragon and slim | ||
"human_bot": {"main_start": "<human>: ", "main_stop": "\n", "start_llm_response": "<bot>:"}, | ||
# commonly used by llama2 and mistral | ||
"<INST>": {"main_start": "<INST>", "main_stop": "</INST>", "start_llm_response": ""}, | ||
"hf_chat": {"system_start": "<|im_start|>system\n", "system_stop": "<|im_end|>\n", | ||
"main_start": "<|im_start|>user", "main_stop": "<|im_end|>\n", | ||
"start_llm_response": "<|im_start|>assistant"}, | ||
"open_chat": {"main_start": "GPT4 User: ", "main_stop": "<|endofturn|>", | ||
"start_llm_response": "GPT4 Assistant:"}, | ||
"alpaca": {"main_start": "### Instruction: ", "main_stop": "\n", | ||
"start_llm_response": "### Response: "}, | ||
"chat_ml": {"system_start": "<|im_start|>system", "system_stop": "<|im_end|>\n", | ||
"main_start": "<|im_start|>user", "main_stop": "<|im_end|>\n", | ||
"start_llm_response": "<|im_start|>assistant"}, | ||
"phi_3": {"system_start": "<|system|>\n", "system_stop": "<|end|>\n", | ||
"main_start": "<|user|>\n", "main_stop": "<|end|>\n", "start_llm_response": "<|assistant|>"}, | ||
"llama_3_chat": {"system_start": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n", | ||
"system_stop": "<|eot_id|>", | ||
"main_start": "<|start_header_id|>user>|end_header_id|>\n", | ||
"main_stop": "<|eot_id|>", | ||
"start_llm_response": "<|start_header_id|>assistant<|end_header_id|>\n"}, | ||
"tiny_llama_chat": {"system_start": "<|system|>", "system_stop": "</s>", | ||
"main_start": "<|user|>", "main_stop": "</s>", | ||
"start_llm_response": "<|assistant|>"}, | ||
"stablelm_zephyr_chat": {"system_start": "", "system_stop": "", | ||
"main_start": "<|user|>", "main_stop": "<|endoftext|>\n", | ||
"start_llm_response": "<|assistant|>"}, | ||
"google_gemma_chat": {"system_start": "", "system_stop": "", | ||
"main_start": "<bos><start_of_turn>user\n", | ||
"main_stop": "<end_of_turn>\n", | ||
"start_llm_response": "<start_of_turn>model"}, | ||
"vicuna_chat": {"system_start": "", "system_stop": "", | ||
"main_start": "USER: ", "main_stop": "", | ||
"start_llm_response": " ASSISTANT:"} | ||
""" | ||
|
||
# if none of these templates work, then you can also register a new prompt template | ||
ModelCatalog().register_new_finetune_wrapper("my_new_template", | ||
main_start="<user starts here>", | ||
main_stop="<user ends here>", | ||
llm_start="<model starts here>", | ||
system_start="<you are useful assistant...", | ||
system_stop="<end system stuff>" | ||
) | ||
|
||
# once registered, this new prompt wrapper can also be invoked directly by "my_new_template", and it will be | ||
# picked up in the lookup at the time of instantiating the model | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
|
||
""" Starting with llmware 0.3.7, we have integrated support for ONNX Runtime Generative models. | ||
To get started: | ||
`pip install onnxruntime_genai` | ||
Please note that onnxruntime_genai is supported on a wide range of Windows, Linux and x86 platforms, but | ||
does not build for Mac Metal - so it will not work on Macs. | ||
""" | ||
|
||
from llmware.models import ModelCatalog | ||
|
||
from importlib import util | ||
if not util.find_spec("onnxruntime_genai"): | ||
print("\nto run this example, you need to install onnxruntime_genai first, e.g., pip3 install onnxruntime_genai") | ||
|
||
# we will be adding more ONNX models to the default catalog, but we currently support: | ||
# -- bling-tiny-llama-onnx | ||
# -- bling-phi-3-onnx | ||
# -- phi-3-onnx | ||
|
||
# please see the example 'adding_openvino_or_onnx_model.py' to add your own ONNX and OpenVino models | ||
|
||
|
||
def getting_started(): | ||
|
||
""" Simple 'hello world' example. """ | ||
|
||
model = ModelCatalog().load_model("bling-tiny-llama-onnx", temperature=0.0, sample=False, | ||
max_output=100) | ||
|
||
query= "What was Microsoft's revenue in the 3rd quarter?" | ||
|
||
context = ("Microsoft Cloud Strength Drives Third Quarter Results \nREDMOND, Wash. — April 25, 2023 — " | ||
"Microsoft Corp. today announced the following results for the quarter ended March 31, 2023," | ||
" as compared to the corresponding period of last fiscal year:\n· Revenue was $52.9 billion" | ||
" and increased 7% (up 10% in constant currency)\n· Operating income was $22.4 billion " | ||
"and increased 10% (up 15% in constant currency)\n· Net income was $18.3 billion and " | ||
"increased 9% (up 14% in constant currency)\n· Diluted earnings per share was $2.45 " | ||
"and increased 10% (up 14% in constant currency).\n") | ||
|
||
response = model.inference(query,add_context=context) | ||
|
||
print(f"\ngetting_started example - query - {query}") | ||
print("getting_started example - response: ", response) | ||
|
||
return response | ||
|
||
|
||
def streaming_example(): | ||
|
||
prompt = "What are the benefits of small specialized LLMs?" | ||
|
||
print(f"\nstreaming_example - prompt: {prompt}") | ||
|
||
# since model.stream provides a generator, then use as follows to consume the generator | ||
model = ModelCatalog().load_model("phi-3-onnx", max_output=500) | ||
text_out = "" | ||
token_count = 0 | ||
|
||
for streamed_token in model.stream(prompt): | ||
|
||
text_out += streamed_token | ||
if text_out.strip(): | ||
print(streamed_token, end="") | ||
|
||
token_count += 1 | ||
|
||
print("total text: ", text_out) | ||
print("total tokens: ", token_count) | ||
|
||
return text_out | ||
|
||
|
||
if __name__ == "__main__": | ||
|
||
getting_started() | ||
|
||
streaming_example() | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
|
||
""" Starting with llmware 0.3.7, we have integrated support for OpenVino Generative models. | ||
To get started: | ||
`pip install openvino` | ||
`pip install openvino_genai` | ||
Openvino is supported on a wide range of platforms (including Windows, Linux, Mac OS), and is highly | ||
optimized for Intel x86 architectures - both CPU and GPU. | ||
The intent is for OpenVino models to be "drop in" replacements for Pytorch or GGUF models by simply | ||
replacing the model with the OpenVino equivalent - usually indicated by an 'ov' at the end of the model name | ||
""" | ||
|
||
from llmware.models import ModelCatalog | ||
|
||
from importlib import util | ||
if not util.find_spec("openvino"): | ||
print("\nto run this example, you need to install openvino first, e.g., pip3 install openvino") | ||
|
||
if not util.find_spec("openvino_genai"): | ||
print("\nto run this example, you need to install openvino_genai first, e.g., pip3 install openvino_genai") | ||
|
||
|
||
# we will be adding more OpenVino models to the default catalog, but we currently support: | ||
# -- bling-tiny-llama-ov | ||
# -- bling-phi-3-ov | ||
# -- phi-3-ov | ||
# -- qwen2.5-1.5b-ov | ||
# -- qwen2.5-3b-ov | ||
# -- qwen2.5-0.5b-ov | ||
# -- dragon-llama2-ov | ||
# -- dragon-mistral-ov | ||
# -- dragon-yi-9b-ov | ||
# -- slim-extract-tiny-ov | ||
# -- slim-extract-phi-3-ov | ||
# -- slim-sentiment-ov | ||
|
||
# to add your own OpenVino models, please see the example 'adding_openvino_or_onnx_model.py' | ||
|
||
|
||
def getting_started(): | ||
|
||
model = ModelCatalog().load_model("bling-tiny-llama-ov", temperature=0.0, sample=False, | ||
max_output=100) | ||
|
||
query= "What was Microsoft's revenue in the 3rd quarter?" | ||
|
||
context = ("Microsoft Cloud Strength Drives Third Quarter Results \nREDMOND, Wash. — April 25, 2023 — " | ||
"Microsoft Corp. today announced the following results for the quarter ended March 31, 2023," | ||
" as compared to the corresponding period of last fiscal year:\n· Revenue was $52.9 billion" | ||
" and increased 7% (up 10% in constant currency)\n· Operating income was $22.4 billion " | ||
"and increased 10% (up 15% in constant currency)\n· Net income was $18.3 billion and " | ||
"increased 9% (up 14% in constant currency)\n· Diluted earnings per share was $2.45 " | ||
"and increased 10% (up 14% in constant currency).\n") | ||
|
||
response = model.inference(query ,add_context=context) | ||
|
||
print(f"\ngetting_started example - query - {query}") | ||
print("getting_started example - response: ", response) | ||
|
||
return response | ||
|
||
|
||
def sentiment_analysis(): | ||
|
||
model = ModelCatalog().load_model("slim-sentiment-ov", temperature=0.0,sample=False) | ||
|
||
text = ("The poor earnings results along with the worrisome guidance on the future has dampened " | ||
"expectations and put a lot of pressure on the share price.") | ||
|
||
response = model.function_call(text) | ||
|
||
print(f"\nsentiment_analysis - {response}") | ||
|
||
return response | ||
|
||
|
||
def extract_info(): | ||
|
||
model = ModelCatalog().load_model("slim-extract-tiny-ov", temperature=0.0, sample=False) | ||
|
||
text = ("Adobe shares tumbled as much as 11% in extended trading Thursday after the design software maker " | ||
"issued strong fiscal first-quarter results but came up slightly short on quarterly revenue guidance. " | ||
"Here’s how the company did, compared with estimates from analysts polled by LSEG, formerly known as Refinitiv: " | ||
"Earnings per share: $4.48 adjusted vs. $4.38 expected Revenue: $5.18 billion vs. $5.14 billion expected " | ||
"Adobe’s revenue grew 11% year over year in the quarter, which ended March 1, according to a statement. " | ||
"Net income decreased to $620 million, or $1.36 per share, from $1.25 billion, or $2.71 per share, " | ||
"in the same quarter a year ago. During the quarter, Adobe abandoned its $20 billion acquisition of " | ||
"design software startup Figma after U.K. regulators found competitive concerns. The company paid " | ||
"Figma a $1 billion termination fee.") | ||
|
||
response = model.function_call(text,function="extract", params=["termination fee"]) | ||
|
||
print(f"\nextract_info - {response}") | ||
|
||
return response | ||
|
||
|
||
if __name__ == "__main__": | ||
|
||
getting_started() | ||
sentiment_analysis() | ||
extract_info() | ||
|
Oops, something went wrong.