Merge branch 'llmware-ai:main' into patch-2

llmware-ai · Oct 10, 2024 · e9f9e2f · e9f9e2f
2 parents 5e0cf39 + b8c1f09
commit e9f9e2f
Show file tree

Hide file tree

Showing 13 changed files with 3,193 additions and 143 deletions.
diff --git a/README.md b/README.md
@@ -4,6 +4,11 @@
 [![discord](https://img.shields.io/badge/Chat%20on-Discord-blue?logo=discord&logoColor=white)](https://discord.gg/MhZn5Nc39h)   
 [![Documentation](https://github.com/llmware-ai/llmware/actions/workflows/pages.yml/badge.svg)](https://github.com/llmware-ai/llmware/actions/workflows/pages.yml)
 
+![DevFest GIF](https://i.giphy.com/media/v1.Y2lkPTc5MGI3NjExc3dodTV4czFsd2lrYWV5N3BhaXV5MXpucDhrcWZ2ODF4amM2aXo3diZlcD12MV9pbnRlcm5hbF9naWZfYnlfaWQmY3Q9Zw/Bkax2GRzAt0PDHcmSq/giphy.gif)
+
+
+**Selected Winners will Win $25 in GitHub Sponsorship Prize!**
+
 ## 🧰🛠️🔩Building Enterprise RAG Pipelines with Small, Specialized Models  
 
 `llmware` provides a unified framework for building LLM-based applications (e.g, RAG, Agents), using small, specialized models that can be deployed privately, integrated with enterprise knowledge sources safely and securely, and cost-effectively tuned and adapted for any business process.  
@@ -819,6 +824,12 @@ Questions and discussions are welcome in our [github discussions](https://github
 
 See also [additional deployment/install release notes in wheel_archives](https://github.com/llmware-ai/llmware/tree/main/wheel_archives)   
 
+**Sunday, October 6 - v0.3.7**  
+- Added new model class - OVGenerativeModel - to support the use of models packaged in OpenVino format  
+- Added new model class - ONNXGenerativeModel - to support use of models packaged in ONNX format  
+- Getting started with [OpenVino example](https://github.com/llmware-ai/llmware/blob/main/examples/Models/using_openvino_models.py)  
+- Getting started with [ONNX example](https://github.com/llmware-ai/llmware/blob/main/examples/Models/using_onnx_models.py)  
+
 **Tuesday, October 1 - v0.3.6**  
 - Added new prompt chat templates  
 - Improved and updated model configurations    

diff --git a/examples/Models/adding_openvino_or_onnx_model.py b/examples/Models/adding_openvino_or_onnx_model.py
@@ -0,0 +1,144 @@
+
+""" This example shows how to add a custom or private OpenVino or ONNX model to the llmware model catalog.
+
+    Over the next few releases, we will be expanding the default ModelCatalog considerably, but for the time
+    being, please feel free to follow the steps below to build your own custom catalog.
+
+    We show below templates for the model card dictionaries - most of which is fairly easy to build for a given
+    model.
+
+    We highlight both the main step - which is a simple one-liner to register the model, and then provide
+    more details on three potential troubleshooting items:
+
+        1 - using a model from a custom/private path - and 'inserting' directly into the model_repo lookup
+        2 - identifying the prompt wrapper template
+        3 - customizing a new prompt wrapper
+
+"""
+
+from llmware.models import ModelCatalog
+from llmware.prompts import Prompt
+from llmware.configs import LLMWareConfig
+
+#   Create model card and register in the ModelCatalog
+
+"""             Sample OpenVino Model Card template
+
+ model_card_dict = {"model_name": "phi-3-ov", "model_family": "OVGenerativeModel",
+                    "model_category": "generative_local", "display_name": "phi-3-ov",
+                    "model_location": "llmware_repo",
+                    "context_window": 4096, "instruction_following": False, "prompt_wrapper": "phi_3",
+                    "temperature": 0.0, "sample_default": False, "trailing_space": "",
+                    "tokenizer_local": "tokenizer_phi3.json",
+                    "hf_repo": "llmware/phi-3-ov",
+                    "custom_model_files": [], "custom_model_repo": "",
+                    "fetch": {"snapshot": True, "module": "llmware.models", "method": "pull_snapshot_from_hf"},
+                    "validation_files": ["openvino_model.xml"],
+                    "link": "https://huggingface.co/llmware/phi-3-ov"},
+"""
+
+"""              Sample ONNX Model Card template 
+
+model_card_dict =  {"model_name": "phi-3-onnx", "model_family": "ONNXGenerativeModel",
+                    "model_category": "generative_local", "display_name": "phi-3-onnx",
+                    "model_location": "llmware_repo",
+                    "context_window": 4096, "instruction_following": False, "prompt_wrapper": "phi_3",
+                    "temperature": 0.0, "sample_default": False, "trailing_space": "",
+                    "tokenizer_local": "tokenizer_phi3.json",
+                    "hf_repo": "llmware/phi-3-onnx",
+                    "custom_model_files": [], "custom_model_repo": "",
+                    "fetch": {"snapshot": True, "module": "llmware.models", "method": "pull_snapshot_from_hf"},
+                    "validation_files": ["model.onnx", "model.onnx.data"],
+                    "link": "https://huggingface.co/llmware/phi-3-onnx"},
+"""
+
+#   create the model card dictionary manually using the templates above as guides, e.g.,
+model_card_dict = {"model_name": "my_model", "insert other params from above...": []}
+
+#   this is the key step - registering the model card - add as a first line in any script/example
+ModelCatalog().register_new_model_card(model_card_dict)
+
+#   once the model is registered in the catalog, it can then be accessed anytime by name, e.g.,
+model = ModelCatalog().load_model("my_model")
+response = model.inference("What is ...")
+
+# or if using in conjunction with building a RAG prompt
+prompter = Prompt().load_model("my_model")
+
+""" Issue # 1 - Models in local/custom path
+
+   If you have the model in a local/custom path, then the easiest thing to do is to copy/move manually to 
+    /llmware_data/model_repo/{{my_model_name}}/ and place the model components in this path.
+"""
+
+# lookup model repo path
+model_path = LLMWareConfig().get_model_repo_path()
+print("local model path: ", model_path)
+
+# You can manually put the model components in a folder called "model_name" at the model repo path, and
+# 'lookups' will all work.
+
+""" Issue # 2 - How do I figure out the prompt template?
+        
+    Below is a list of the prompt wrapper lookups that covers most of the common models:
+        
+        # standard used in most llmware models - bling, dragon and slim
+        "human_bot": {"main_start": "<human>: ", "main_stop": "\n", "start_llm_response": "<bot>:"},
+        
+        # commonly used by llama2 and mistral
+        "<INST>": {"main_start": "<INST>", "main_stop": "</INST>", "start_llm_response": ""},
+        
+        "hf_chat": {"system_start": "<|im_start|>system\n", "system_stop": "<|im_end|>\n",
+                    "main_start": "<|im_start|>user", "main_stop": "<|im_end|>\n",
+                    "start_llm_response": "<|im_start|>assistant"},
+        
+        "open_chat": {"main_start": "GPT4 User: ", "main_stop": "<|endofturn|>",
+                      "start_llm_response": "GPT4 Assistant:"},
+        
+        "alpaca": {"main_start": "### Instruction: ", "main_stop": "\n",
+                   "start_llm_response": "### Response: "},
+        
+        "chat_ml": {"system_start": "<|im_start|>system", "system_stop": "<|im_end|>\n",
+                    "main_start": "<|im_start|>user", "main_stop": "<|im_end|>\n",
+                    "start_llm_response": "<|im_start|>assistant"},
+        
+        "phi_3": {"system_start": "<|system|>\n", "system_stop": "<|end|>\n",
+                  "main_start": "<|user|>\n", "main_stop": "<|end|>\n", "start_llm_response": "<|assistant|>"},
+        
+        "llama_3_chat": {"system_start": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n",
+                         "system_stop": "<|eot_id|>",
+                         "main_start": "<|start_header_id|>user>|end_header_id|>\n",
+                         "main_stop": "<|eot_id|>",
+                         "start_llm_response": "<|start_header_id|>assistant<|end_header_id|>\n"},
+        
+        "tiny_llama_chat": {"system_start": "<|system|>", "system_stop": "</s>",
+                            "main_start": "<|user|>", "main_stop": "</s>",
+                            "start_llm_response": "<|assistant|>"},
+        
+        "stablelm_zephyr_chat": {"system_start": "", "system_stop": "",
+                                 "main_start": "<|user|>", "main_stop": "<|endoftext|>\n",
+                                 "start_llm_response": "<|assistant|>"},
+        
+        "google_gemma_chat": {"system_start": "", "system_stop": "",
+                              "main_start": "<bos><start_of_turn>user\n",
+                              "main_stop": "<end_of_turn>\n",
+                              "start_llm_response": "<start_of_turn>model"},
+        
+        "vicuna_chat": {"system_start": "", "system_stop": "",
+                        "main_start": "USER: ", "main_stop": "",
+                        "start_llm_response": " ASSISTANT:"}
+
+"""
+
+# if none of these templates work, then you can also register a new prompt template
+ModelCatalog().register_new_finetune_wrapper("my_new_template",
+                                             main_start="<user starts here>",
+                                             main_stop="<user ends here>",
+                                             llm_start="<model starts here>",
+                                             system_start="<you are useful assistant...",
+                                             system_stop="<end system stuff>"
+                                             )
+
+# once registered, this new prompt wrapper can also be invoked directly by "my_new_template", and it will be
+# picked up in the lookup at the time of instantiating the model
+
diff --git a/examples/Models/using_onnx_models.py b/examples/Models/using_onnx_models.py
@@ -0,0 +1,84 @@
+
+""" Starting with llmware 0.3.7, we have integrated support for ONNX Runtime Generative models.
+
+    To get started:
+
+    `pip install onnxruntime_genai`
+
+    Please note that onnxruntime_genai is supported on a wide range of Windows, Linux and x86 platforms, but
+    does not build for Mac Metal - so it will not work on Macs.
+
+    """
+
+from llmware.models import ModelCatalog
+
+from importlib import util
+if not util.find_spec("onnxruntime_genai"):
+    print("\nto run this example, you need to install onnxruntime_genai first, e.g., pip3 install onnxruntime_genai")
+
+# we will be adding more ONNX models to the default catalog, but we currently support:
+#   -- bling-tiny-llama-onnx
+#   -- bling-phi-3-onnx
+#   -- phi-3-onnx
+
+# please see the example 'adding_openvino_or_onnx_model.py' to add your own ONNX and OpenVino models
+
+
+def getting_started():
+
+    """ Simple 'hello world' example. """
+
+    model = ModelCatalog().load_model("bling-tiny-llama-onnx", temperature=0.0, sample=False,
+                                      max_output=100)
+
+    query= "What was Microsoft's revenue in the 3rd quarter?"
+
+    context = ("Microsoft Cloud Strength Drives Third Quarter Results \nREDMOND, Wash. — April 25, 2023 — "
+               "Microsoft Corp. today announced the following results for the quarter ended March 31, 2023,"
+               " as compared to the corresponding period of last fiscal year:\n· Revenue was $52.9 billion"
+               " and increased 7% (up 10% in constant currency)\n· Operating income was $22.4 billion "
+               "and increased 10% (up 15% in constant currency)\n· Net income was $18.3 billion and "
+               "increased 9% (up 14% in constant currency)\n· Diluted earnings per share was $2.45 "
+               "and increased 10% (up 14% in constant currency).\n")
+
+    response = model.inference(query,add_context=context)
+
+    print(f"\ngetting_started example - query - {query}")
+    print("getting_started example - response: ", response)
+
+    return response
+
+
+def streaming_example():
+
+    prompt = "What are the benefits of small specialized LLMs?"
+
+    print(f"\nstreaming_example - prompt: {prompt}")
+
+    #   since model.stream provides a generator, then use as follows to consume the generator
+    model = ModelCatalog().load_model("phi-3-onnx", max_output=500)
+    text_out = ""
+    token_count = 0
+
+    for streamed_token in model.stream(prompt):
+
+        text_out += streamed_token
+        if text_out.strip():
+            print(streamed_token, end="")
+
+        token_count += 1
+
+    print("total text: ", text_out)
+    print("total tokens: ", token_count)
+
+    return text_out
+
+
+if __name__ == "__main__":
+
+    getting_started()
+
+    streaming_example()
+
+
+
diff --git a/examples/Models/using_openvino_models.py b/examples/Models/using_openvino_models.py
@@ -0,0 +1,107 @@
+
+""" Starting with llmware 0.3.7, we have integrated support for OpenVino Generative models.
+
+    To get started:
+
+    `pip install openvino`
+    `pip install openvino_genai`
+
+    Openvino is supported on a wide range of platforms (including Windows, Linux, Mac OS), and is highly
+    optimized for Intel x86 architectures - both CPU and GPU.
+
+    The intent is for OpenVino models to be "drop in" replacements for Pytorch or GGUF models by simply
+    replacing the model with the OpenVino equivalent - usually indicated by an 'ov' at the end of the model name
+
+    """
+
+from llmware.models import ModelCatalog
+
+from importlib import util
+if not util.find_spec("openvino"):
+    print("\nto run this example, you need to install openvino first, e.g., pip3 install openvino")
+
+if not util.find_spec("openvino_genai"):
+    print("\nto run this example, you need to install openvino_genai first, e.g., pip3 install openvino_genai")
+
+
+#   we will be adding more OpenVino models to the default catalog, but we currently support:
+#   -- bling-tiny-llama-ov
+#   -- bling-phi-3-ov
+#   -- phi-3-ov
+#   -- qwen2.5-1.5b-ov
+#   -- qwen2.5-3b-ov
+#   -- qwen2.5-0.5b-ov
+#   -- dragon-llama2-ov
+#   -- dragon-mistral-ov
+#   -- dragon-yi-9b-ov
+#   -- slim-extract-tiny-ov
+#   -- slim-extract-phi-3-ov
+#   -- slim-sentiment-ov
+
+#   to add your own OpenVino models, please see the example 'adding_openvino_or_onnx_model.py'
+
+
+def getting_started():
+
+    model = ModelCatalog().load_model("bling-tiny-llama-ov", temperature=0.0, sample=False,
+                                      max_output=100)
+
+    query= "What was Microsoft's revenue in the 3rd quarter?"
+
+    context = ("Microsoft Cloud Strength Drives Third Quarter Results \nREDMOND, Wash. — April 25, 2023 — "
+               "Microsoft Corp. today announced the following results for the quarter ended March 31, 2023,"
+               " as compared to the corresponding period of last fiscal year:\n· Revenue was $52.9 billion"
+               " and increased 7% (up 10% in constant currency)\n· Operating income was $22.4 billion "
+               "and increased 10% (up 15% in constant currency)\n· Net income was $18.3 billion and "
+               "increased 9% (up 14% in constant currency)\n· Diluted earnings per share was $2.45 "
+               "and increased 10% (up 14% in constant currency).\n")
+
+    response = model.inference(query ,add_context=context)
+
+    print(f"\ngetting_started example - query - {query}")
+    print("getting_started example - response: ", response)
+
+    return response
+
+
+def sentiment_analysis():
+
+    model = ModelCatalog().load_model("slim-sentiment-ov", temperature=0.0,sample=False)
+
+    text = ("The poor earnings results along with the worrisome guidance on the future has dampened "
+            "expectations and put a lot of pressure on the share price.")
+
+    response = model.function_call(text)
+
+    print(f"\nsentiment_analysis - {response}")
+
+    return response
+
+
+def extract_info():
+
+    model = ModelCatalog().load_model("slim-extract-tiny-ov", temperature=0.0, sample=False)
+
+    text = ("Adobe shares tumbled as much as 11% in extended trading Thursday after the design software maker "
+            "issued strong fiscal first-quarter results but came up slightly short on quarterly revenue guidance. "
+            "Here’s how the company did, compared with estimates from analysts polled by LSEG, formerly known as Refinitiv: "
+            "Earnings per share: $4.48 adjusted vs. $4.38 expected Revenue: $5.18 billion vs. $5.14 billion expected "
+            "Adobe’s revenue grew 11% year over year in the quarter, which ended March 1, according to a statement. "
+            "Net income decreased to $620 million, or $1.36 per share, from $1.25 billion, or $2.71 per share, "
+            "in the same quarter a year ago. During the quarter, Adobe abandoned its $20 billion acquisition of "
+            "design software startup Figma after U.K. regulators found competitive concerns. The company paid "
+            "Figma a $1 billion termination fee.")
+
+    response = model.function_call(text,function="extract", params=["termination fee"])
+
+    print(f"\nextract_info - {response}")
+
+    return response
+
+
+if __name__ == "__main__":
+
+    getting_started()
+    sentiment_analysis()
+    extract_info()
+