diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml new file mode 100644 index 00000000..8b3a87af --- /dev/null +++ b/.github/workflows/python-publish.yml @@ -0,0 +1,35 @@ +# This workflow will upload a Python Package using Twine when a release is created +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries + +name: Upload Python Package + +on: + release: + types: [published] + +permissions: + contents: read + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.10' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build + - name: Build package + run: python -m build + - name: Publish package + uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} + packages_dir: dist diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 00000000..8540c660 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,36 @@ +name: Test PRs + +on: + pull_request: + branches: + - main + +jobs: + test: + runs-on: macos-14 + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Set up Python + run: | + brew install python@3.10 + python3 -m venv env + source env/bin/activate + + + - name: Run style checks + run: | + pip install pre-commit + pre-commit run --all + if ! git diff --quiet; then echo 'Style checks failed, please install pre-commit and run pre-commit run --all and push the change'; exit 1; fi + + - name: Install dependencies + run: | + pip install pytest + pip install -e . + + - name: Run Python tests + run: | + pytest -s . diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..67cfefe3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.DS_Store +__pycache__ +*.egg-info \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..04427a14 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,11 @@ +repos: +- repo: https://github.com/psf/black-pre-commit-mirror + rev: 24.2.0 + hooks: + - id: black +- repo: https://github.com/pycqa/isort + rev: 5.13.2 + hooks: + - id: isort + args: + - --profile=black \ No newline at end of file diff --git a/AUTHORS.rst b/AUTHORS.rst new file mode 100644 index 00000000..dffae7a0 --- /dev/null +++ b/AUTHORS.rst @@ -0,0 +1,13 @@ +======= +Credits +======= + +Development Lead +---------------- + +* Prince Canuma + +Contributors +------------ + +None yet. Why not be the first? diff --git a/LICENSE b/LICENSE index 261eeb9e..b6cd1934 100644 --- a/LICENSE +++ b/LICENSE @@ -1,201 +1,16 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ +Apache Software License 2.0 - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION +Copyright (c) 2024, Prince Canuma - 1. Definitions. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. +http://www.apache.org/licenses/LICENSE-2.0 - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..89411aa6 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,7 @@ +include LICENSE +include README.md +include requirements.txt + +recursive-exclude * __pycache__ +recursive-exclude * *.py[co] + diff --git a/README.md b/README.md index 1227361b..835d42b7 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,109 @@ -# FastMLX +# fastmlX -FastMLX is a high performance production ready API to host MLX models. +[![image](https://img.shields.io/pypi/v/fastmlx.svg)](https://pypi.python.org/pypi/fastmlx) +[![image](https://img.shields.io/conda/vn/conda-forge/fastmlx.svg)](https://anaconda.org/conda-forge/fastmlx) +[![image](https://pyup.io/repos/github/Blaizzy/fastmlx/shield.svg)](https://pyup.io/repos/github/Blaizzy/fastmlx) + +**FastMLX is a high performance production ready API to host MLX models, including Vision Language Models (VLMs) and Language Models (LMs).** + +- Free software: Apache Software License 2.0 +- Documentation: https://Blaizzy.github.io/fastmlx + +## Features + +- **OpenAI-compatible API**: Easily integrate with existing applications that use OpenAI's API. +- **Dynamic Model Loading**: Load MLX models on-the-fly or use pre-loaded models for better performance. +- **Support for Multiple Model Types**: Compatible with various MLX model architectures. +- **Image Processing Capabilities**: Handle both text and image inputs for versatile model interactions. +- **Efficient Resource Management**: Optimized for high-performance and scalability. +- **Error Handling**: Robust error management for production environments. +- **Customizable**: Easily extendable to accommodate specific use cases and model types. + +## Usage + +1. **Installation** + + ```bash + pip install fastmlx + ``` + +2. **Running the Server** + + Start the FastMLX server: + ```bash + fastmlx + ``` + or + + ```bash + uvicorn fastmlx:app --reload + ``` + +3. **Making API Calls** + + Use the API similar to OpenAI's chat completions: + + **Vision Language Model** + + ```python + import requests + import json + + url = "http://localhost:8000/v1/chat/completions" + headers = {"Content-Type": "application/json"} + data = { + "model": "mlx-community/nanoLLaVA-1.5-4bit", + "image": "http://images.cocodataset.org/,val2017/000000039769.jpg" + "messages": [{"role": "user", "content": "What are these"}], + "max_tokens": 100 + } + + response = requests.post(url, headers=headers, data=json.dumps(data)) + print(response.json()) + ``` + **Language Model** + ```python + import requests + import json + + url = "http://localhost:8000/v1/chat/completions" + headers = {"Content-Type": "application/json"} + data = { + "model": "mlx-community/gemma-2-9b-it-4bit", + "messages": [{"role": "user", "content": "What is the capital of France?"}], + "max_tokens": 100 + } + + response = requests.post(url, headers=headers, data=json.dumps(data)) + print(response.json()) + ``` + +4. **Adding a New Model** + + You can add new models to the API: + + ```python + import requests + + url = "http://localhost:8000/v1/models" + params = { + "model_name": "hf-repo-or-path", + } + + response = requests.post(url, params=params) + print(response.json()) + ``` + +5. **Listing Available Models** + + To see all available models: + + ```python + import requests + + url = "http://localhost:8000/v1/models" + response = requests.get(url) + print(response.json()) + ``` + +For more detailed usage instructions and API documentation, please refer to the [full documentation](https://Blaizzy.github.io/fastmlx). \ No newline at end of file diff --git a/docs/authors.rst b/docs/authors.rst new file mode 100644 index 00000000..e122f914 --- /dev/null +++ b/docs/authors.rst @@ -0,0 +1 @@ +.. include:: ../AUTHORS.rst diff --git a/docs/changelog.md b/docs/changelog.md new file mode 100644 index 00000000..289e2c52 --- /dev/null +++ b/docs/changelog.md @@ -0,0 +1,11 @@ +# Changelog + +## v0.0.1 - Date + +**Improvement**: + +- TBD + +**New Features**: + +- TBD diff --git a/docs/common.md b/docs/common.md new file mode 100644 index 00000000..8d5152a8 --- /dev/null +++ b/docs/common.md @@ -0,0 +1,3 @@ +# common module + +::: fastmlx.common \ No newline at end of file diff --git a/docs/contributing.md b/docs/contributing.md new file mode 100644 index 00000000..52aa69a8 --- /dev/null +++ b/docs/contributing.md @@ -0,0 +1,108 @@ +# Contributing + +Contributions are welcome, and they are greatly appreciated! Every +little bit helps, and credit will always be given. + +You can contribute in many ways: + +## Types of Contributions + +### Report Bugs + +Report bugs at . + +If you are reporting a bug, please include: + +- Your operating system name and version. +- Any details about your local setup that might be helpful in troubleshooting. +- Detailed steps to reproduce the bug. + +### Fix Bugs + +Look through the GitHub issues for bugs. Anything tagged with `bug` and +`help wanted` is open to whoever wants to implement it. + +### Implement Features + +Look through the GitHub issues for features. Anything tagged with +`enhancement` and `help wanted` is open to whoever wants to implement it. + +### Write Documentation + +fastmlx could always use more documentation, +whether as part of the official fastmlx docs, +in docstrings, or even on the web in blog posts, articles, and such. + +### Submit Feedback + +The best way to send feedback is to file an issue at +. + +If you are proposing a feature: + +- Explain in detail how it would work. +- Keep the scope as narrow as possible, to make it easier to implement. +- Remember that this is a volunteer-driven project, and that contributions are welcome :) + +## Get Started! + +Ready to contribute? Here's how to set up fastmlx for local development. + +1. Fork the fastmlx repo on GitHub. + +2. Clone your fork locally: + + ```shell + $ git clone git@github.com:your_name_here/fastmlx.git + ``` + +3. Install your local copy into a virtualenv. Assuming you have + virtualenvwrapper installed, this is how you set up your fork for + local development: + + ```shell + $ mkvirtualenv fastmlx + $ cd fastmlx/ + $ python setup.py develop + ``` + +4. Create a branch for local development: + + ```shell + $ git checkout -b name-of-your-bugfix-or-feature + ``` + + Now you can make your changes locally. + +5. When you're done making changes, check that your changes pass flake8 + and the tests, including testing other Python versions with tox: + + ```shell + $ flake8 fastmlx tests + $ python setup.py test or pytest + $ tox + ``` + + To get flake8 and tox, just pip install them into your virtualenv. + +6. Commit your changes and push your branch to GitHub: + + ```shell + $ git add . + $ git commit -m "Your detailed description of your changes." + $ git push origin name-of-your-bugfix-or-feature + ``` + +7. Submit a pull request through the GitHub website. + +## Pull Request Guidelines + +Before you submit a pull request, check that it meets these guidelines: + +1. The pull request should include tests. +2. If the pull request adds functionality, the docs should be updated. + Put your new functionality into a function with a docstring, and add + the feature to the list in README.rst. +3. The pull request should work for Python 3.8 and later, and + for PyPy. Check and make sure that the tests pass for all + supported Python versions. diff --git a/docs/examples/intro.ipynb b/docs/examples/intro.ipynb new file mode 100644 index 00000000..ebd171a6 --- /dev/null +++ b/docs/examples/intro.ipynb @@ -0,0 +1,21 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print('Hello World!')" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/faq.md b/docs/faq.md new file mode 100644 index 00000000..4514b4c1 --- /dev/null +++ b/docs/faq.md @@ -0,0 +1 @@ +# FAQ diff --git a/docs/fastmlx.md b/docs/fastmlx.md new file mode 100644 index 00000000..f2a80faf --- /dev/null +++ b/docs/fastmlx.md @@ -0,0 +1,4 @@ + +# fastmlx module + +::: fastmlx.fastmlx \ No newline at end of file diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 00000000..18c5f86d --- /dev/null +++ b/docs/index.md @@ -0,0 +1,18 @@ +# Welcome to fastmlx + + +[![image](https://img.shields.io/pypi/v/fastmlx.svg)](https://pypi.python.org/pypi/fastmlx) + +[![image](https://pyup.io/repos/github/Blaizzy/fastmlx/shield.svg)](https://pyup.io/repos/github/Blaizzy/fastmlx) + + +**FastMLX is a high performance production ready API to host MLX models.** + + +- Free software: Apache Software License 2.0 +- Documentation: + + +## Features + +- TODO diff --git a/docs/installation.md b/docs/installation.md new file mode 100644 index 00000000..981f8f17 --- /dev/null +++ b/docs/installation.md @@ -0,0 +1,21 @@ +# Installation + +## Stable release + +To install fastmlx, run this command in your terminal: + +``` +pip install fastmlx +``` + +This is the preferred method to install fastmlx, as it will always install the most recent stable release. + +If you don't have [pip](https://pip.pypa.io) installed, this [Python installation guide](http://docs.python-guide.org/en/latest/starting/installation/) can guide you through the process. + +## From sources + +To install fastmlx from sources, run this command in your terminal: + +``` +pip install git+https://github.com/Blaizzy/fastmlx +``` diff --git a/docs/overrides/main.html b/docs/overrides/main.html new file mode 100644 index 00000000..702c96bf --- /dev/null +++ b/docs/overrides/main.html @@ -0,0 +1,11 @@ +{% extends "base.html" %} + +{% block content %} +{% if page.nb_url %} + + {% include ".icons/material/download.svg" %} + +{% endif %} + +{{ super() }} +{% endblock content %} diff --git a/docs/usage.md b/docs/usage.md new file mode 100644 index 00000000..7f6d34e2 --- /dev/null +++ b/docs/usage.md @@ -0,0 +1,7 @@ +# Usage + +To use fastmlx in a project: + +``` +import fastmlx +``` diff --git a/fastmlx/__init__.py b/fastmlx/__init__.py new file mode 100644 index 00000000..ed220226 --- /dev/null +++ b/fastmlx/__init__.py @@ -0,0 +1,7 @@ +"""Top-level package for fastmlx.""" + +__author__ = """Prince Canuma""" +__email__ = "prince.gdt@gmail.com" +__version__ = "0.0.1" + +from .fastmlx import * diff --git a/fastmlx/fastmlx.py b/fastmlx/fastmlx.py new file mode 100644 index 00000000..fd878760 --- /dev/null +++ b/fastmlx/fastmlx.py @@ -0,0 +1,181 @@ +"""Main module.""" + +import os +import time +from typing import List, Optional + +from fastapi import FastAPI, HTTPException +from fastapi.middleware.cors import CORSMiddleware +from pydantic import BaseModel, Field + +try: + import mlx.core as mx + from mlx_lm import generate as lm_generate + from mlx_vlm import generate as vlm_generate + from mlx_vlm.prompt_utils import get_message_json + from mlx_vlm.utils import load_config + + from .utils import MODEL_REMAPPING, MODELS, load_lm_model, load_vlm_model + + MLX_AVAILABLE = True +except ImportError: + print("Warning: mlx or mlx_lm not available. Some functionality will be limited.") + MLX_AVAILABLE = False + + +class ModelProvider: + def __init__(self): + self.models = {} + + def load_model(self, model_name: str): + if model_name not in self.models: + config = load_config(model_name) + model_type = MODEL_REMAPPING.get(config["model_type"], config["model_type"]) + if model_type in MODELS["vlm"]: + self.models[model_name] = load_vlm_model(model_name, config) + else: + self.models[model_name] = load_lm_model(model_name, config) + + return self.models[model_name] + + def get_available_models(self): + return list(self.models.keys()) + + +class ChatMessage(BaseModel): + role: str + content: str + + +class ChatCompletionRequest(BaseModel): + model: str + messages: List[ChatMessage] + image: Optional[str] = Field(default=None) + max_tokens: Optional[int] = Field(default=100) + temperature: Optional[float] = Field(default=0.7) + + +class ChatCompletionResponse(BaseModel): + id: str + object: str = "chat.completion" + created: int + model: str + choices: List[dict] + + +app = FastAPI() + +# Add CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Initialize the ModelProvider +model_provider = ModelProvider() + + +@app.post("/v1/chat/completions", response_model=ChatCompletionResponse) +async def chat_completion(request: ChatCompletionRequest): + if not MLX_AVAILABLE: + raise HTTPException(status_code=500, detail="MLX library not available") + + model_data = model_provider.load_model(request.model) + model = model_data["model"] + config = model_data["config"] + model_type = MODEL_REMAPPING.get(config["model_type"], config["model_type"]) + + if model_type in MODELS["vlm"]: + processor = model_data["processor"] + image_processor = model_data["image_processor"] + + image = request.image + + chat_messages = [] + + for msg in request.messages: + if msg.role == "user": + chat_messages.append( + get_message_json(config["model_type"], msg.content) + ) + else: + chat_messages.append({"role": msg.role, "content": msg.content}) + + prompt = "" + if "chat_template" in processor.__dict__.keys(): + prompt = processor.apply_chat_template( + chat_messages, + tokenize=False, + add_generation_prompt=True, + ) + + elif "tokenizer" in processor.__dict__.keys(): + if model.config.model_type != "paligemma": + prompt = processor.tokenizer.apply_chat_template( + chat_messages, + tokenize=False, + add_generation_prompt=True, + ) + else: + prompt = request.messages[-1].content + + # Generate the response + output = vlm_generate( + model, processor, image, prompt, image_processor, verbose=False + ) + + else: + tokenizer = model_data["tokenizer"] + chat_messages = [ + {"role": msg.role, "content": msg.content} for msg in request.messages + ] + if "chat_template" in tokenizer.__dict__.keys(): + prompt = tokenizer.apply_chat_template( + chat_messages, + tokenize=False, + add_generation_prompt=True, + ) + else: + prompt = request.messages[-1].content + + output = lm_generate(model, tokenizer, prompt, verbose=False) + + # Prepare the response + response = ChatCompletionResponse( + id=f"chatcmpl-{os.urandom(4).hex()}", + created=int(time.time()), + model=request.model, + choices=[ + { + "index": 0, + "message": {"role": "assistant", "content": output}, + "finish_reason": "stop", + } + ], + ) + + return response + + +@app.get("/v1/models") +async def list_models(): + return {"models": model_provider.get_available_models()} + + +@app.post("/v1/models") +async def add_model(model_name: str): + model_provider.load_model(model_name) + return {"status": "success", "message": f"Model {model_name} added successfully"} + + +def run(): + import uvicorn + + uvicorn.run("fastmlx:app", host="127.0.0.1", port=8000, reload=True) + + +if __name__ == "__main__": + run() diff --git a/fastmlx/utils.py b/fastmlx/utils.py new file mode 100644 index 00000000..6f75205f --- /dev/null +++ b/fastmlx/utils.py @@ -0,0 +1,57 @@ +import os +from typing import Any, Dict + +# MLX Imports +try: + from mlx_lm import load as lm_load + from mlx_lm import models as lm_models + from mlx_vlm import load as vlm_load + from mlx_vlm import models as vlm_models + from mlx_vlm.utils import load_image_processor +except ImportError: + print("Warning: mlx or mlx_lm not available. Some functionality will be limited.") + + +def get_model_type_list(models, type="vlm"): + + # Get the directory path of the models package + models_dir = os.path.dirname(models.__file__) + + # List all items in the models directory + all_items = os.listdir(models_dir) + + if type == "vlm": + submodules = [ + item + for item in all_items + if os.path.isdir(os.path.join(models_dir, item)) + and not item.startswith(".") + and item != "__pycache__" + ] + return submodules + else: + return all_items + + +MODELS = { + "vlm": get_model_type_list(vlm_models), + "lm": get_model_type_list(lm_models, "lm"), +} +MODEL_REMAPPING = {"llava-qwen2": "llava_bunny", "bunny-llama": "llava_bunny"} + + +# Model Loading and Generation Functions +def load_vlm_model(model_name: str, config: Dict[str, Any]) -> Dict[str, Any]: + model, processor = vlm_load(model_name, {"trust_remote_code": True}) + image_processor = load_image_processor(model_name) + return { + "model": model, + "processor": processor, + "image_processor": image_processor, + "config": config, + } + + +def load_lm_model(model_name: str, config: Dict[str, Any]) -> Dict[str, Any]: + model, tokenizer = lm_load(model_name) + return {"model": model, "tokenizer": tokenizer, "config": config} diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 00000000..9599fa35 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,86 @@ +site_name: fastmlx +site_description: FastMLX is a high performance production ready API to host MLX models. +site_author: Blaizzy +site_url: https://Blaizzy.github.io/fastmlx +repo_url: https://github.com/Blaizzy/fastmlx + +copyright: "Copyright © 2024 - 2024 Prince Canuma" + +theme: + palette: + - scheme: default + # primary: blue + # accent: indigo + toggle: + icon: material/toggle-switch-off-outline + name: Switch to dark mode + - scheme: slate + primary: indigo + accent: indigo + toggle: + icon: material/toggle-switch + name: Switch to light mode + name: material + icon: + repo: fontawesome/brands/github + # logo: assets/logo.png + # favicon: assets/favicon.png + features: + - navigation.instant + - navigation.tracking + - navigation.top + - search.highlight + - search.share + custom_dir: "docs/overrides" + font: + text: Google Sans + code: Regular + +plugins: + - search + - mkdocstrings + - git-revision-date + - git-revision-date-localized: + enable_creation_date: true + type: timeago + # - pdf-export + - mkdocs-jupyter: + include_source: True + ignore_h1_titles: True + execute: True + allow_errors: false + ignore: ["conf.py"] + execute_ignore: ["*ignore.ipynb"] + +markdown_extensions: + - admonition + - abbr + - attr_list + - def_list + - footnotes + - meta + - md_in_html + - pymdownx.superfences + - pymdownx.highlight: + linenums: true + - toc: + permalink: true + +# extra: +# analytics: +# provider: google +# property: UA-XXXXXXXXX-X + +nav: + - Home: index.md + - Installation: installation.md + - Usage: usage.md + - Contributing: contributing.md + - FAQ: faq.md + - Changelog: changelog.md + - Report Issues: https://github.com/Blaizzy/fastmlx/issues + - Examples: + - examples/intro.ipynb + - API Reference: + - fastmlx module: fastmlx.md + - common module: common.md diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..9aa1edb9 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,82 @@ +[project] +name = "fastmlx" +version = "0.0.1" +dynamic = [ + "dependencies", +] +description = "FastMLX is a high performance production ready API to host MLX models." +readme = "README.md" +requires-python = ">=3.8" +keywords = [ + "fastmlx", +] +license = {text = "Apache Software License 2.0"} +authors = [ + {name = "Prince Canuma", email = "prince.gdt@gmail.com"}, +] +classifiers = [ + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License 2.0", + "Natural Language :: English", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", +] + +[project.entry-points."console_scripts"] +fastmlx = "fastmlx.fastmlx:run" + +[project.optional-dependencies] +all = [ + "fastmlx[extra]", +] + +extra = [ + "pandas", +] + + +[tool] +[tool.setuptools.packages.find] +include = ["fastmlx*"] +exclude = ["docs*"] + +[tool.setuptools.dynamic] +dependencies = {file = ["requirements.txt"]} + + +[tool.distutils.bdist_wheel] +universal = true + + +[tool.bumpversion] +current_version = "0.0.1" +commit = true +tag = true + +[[tool.bumpversion.files]] +filename = "pyproject.toml" +search = 'version = "{current_version}"' +replace = 'version = "{new_version}"' + +[[tool.bumpversion.files]] +filename = "fastmlx/__init__.py" +search = '__version__ = "{current_version}"' +replace = '__version__ = "{new_version}"' + + +[tool.flake8] +exclude = [ + "docs", +] +max-line-length = 88 + + +[project.urls] +Homepage = "https://github.com/Blaizzy/fastmlx" + +[build-system] +requires = ["setuptools>=64", "setuptools_scm>=8"] +build-backend = "setuptools.build_meta" \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..c7d4791c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +mlx>=0.15 +mlx-lm>=0.15.2 +mlx-vlm>=0.0.11 +fastapi>=0.111.0 +jinja2 \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..1e1ee754 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""Unit test package for fastmlx.""" diff --git a/tests/test_fastmlx.py b/tests/test_fastmlx.py new file mode 100644 index 00000000..ed744402 --- /dev/null +++ b/tests/test_fastmlx.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python + +"""Tests for `fastmlx` package.""" + +import json +from unittest.mock import MagicMock, patch + +import pytest +from fastapi.testclient import TestClient + +# Import the actual classes and functions +from fastmlx import ChatCompletionRequest, ChatMessage, ModelProvider, app + + +# Create mock classes that inherit from the original classes +class MockModelProvider(ModelProvider): + def __init__(self): + super().__init__() + self.models = {} + + def load_model(self, model_name: str): + if model_name not in self.models: + model_type = "vlm" if "llava" in model_name.lower() else "lm" + self.models[model_name] = { + "model": MagicMock(), + "processor": MagicMock(), + "tokenizer": MagicMock(), + "image_processor": MagicMock() if model_type == "vlm" else None, + "config": {"model_type": model_type}, + } + return self.models[model_name] + + def get_available_models(self): + return list(self.models.keys()) + + +# Mock MODELS dictionary +MODELS = {"vlm": ["llava"], "lm": ["phi"]} + + +# Mock functions +def mock_generate(*args, **kwargs): + return "generated response" + + +@pytest.fixture(scope="module") +def client(): + # Apply patches + with patch("fastmlx.fastmlx.model_provider", MockModelProvider()), patch( + "fastmlx.fastmlx.vlm_generate", mock_generate + ), patch("fastmlx.fastmlx.lm_generate", mock_generate), patch( + "fastmlx.fastmlx.MODELS", MODELS + ): + yield TestClient(app) + + +def test_chat_completion_vlm(client): + request = ChatCompletionRequest( + model="test_llava_model", + messages=[ChatMessage(role="user", content="Hello")], + image="test_image", + ) + response = client.post( + "/v1/chat/completions", json=json.loads(request.model_dump_json()) + ) + + assert response.status_code == 200 + assert "generated response" in response.json()["choices"][0]["message"]["content"] + + +def test_chat_completion_lm(client): + request = ChatCompletionRequest( + model="test_phi_model", messages=[ChatMessage(role="user", content="Hello")] + ) + response = client.post( + "/v1/chat/completions", json=json.loads(request.model_dump_json()) + ) + + assert response.status_code == 200 + assert "generated response" in response.json()["choices"][0]["message"]["content"] + + +def test_list_models(client): + client.post("/v1/models?model_name=test_llava_model") + client.post("/v1/models?model_name=test_phi_model") + + response = client.get("/v1/models") + + assert response.status_code == 200 + assert set(response.json()["models"]) == {"test_llava_model", "test_phi_model"} + + +def test_add_model(client): + response = client.post("/v1/models?model_name=new_llava_model") + + assert response.status_code == 200 + assert response.json() == { + "status": "success", + "message": "Model new_llava_model added successfully", + } + + +if __name__ == "__main__": + pytest.main(["-v", __file__])