From c10428999202ffad8d303739abaea354c42e5cdb Mon Sep 17 00:00:00 2001
From: Prince Canuma <prince.gdt@gmail.com>
Date: Mon, 1 Apr 2024 10:46:53 +0200
Subject: [PATCH 01/16] Initial commit

---
 .gitignore                |   1 +
 AUTHORS.rst               |  13 +++
 LICENSE                   | 207 ++------------------------------------
 MANIFEST.in               |   7 ++
 README.md                 |  20 +++-
 docs/authors.rst          |   1 +
 docs/changelog.md         |  11 ++
 docs/common.md            |   3 +
 docs/contributing.md      | 108 ++++++++++++++++++++
 docs/examples/intro.ipynb |  21 ++++
 docs/faq.md               |   1 +
 docs/fastmlx.md           |   4 +
 docs/index.md             |  18 ++++
 docs/installation.md      |  21 ++++
 docs/overrides/main.html  |  11 ++
 docs/usage.md             |   7 ++
 fastmlx/__init__.py       |   5 +
 fastmlx/cli.py            |  19 ++++
 fastmlx/common.py         |   7 ++
 fastmlx/fastmlx.py        |   1 +
 mkdocs.yml                |  86 ++++++++++++++++
 pyproject.toml            |  82 +++++++++++++++
 requirements.txt          |   6 ++
 requirements_dev.txt      |  29 ++++++
 tests/__init__.py         |   1 +
 tests/test_fastmlx.py     |  24 +++++
 26 files changed, 516 insertions(+), 198 deletions(-)
 create mode 100644 .gitignore
 create mode 100644 AUTHORS.rst
 create mode 100644 MANIFEST.in
 create mode 100644 docs/authors.rst
 create mode 100644 docs/changelog.md
 create mode 100644 docs/common.md
 create mode 100644 docs/contributing.md
 create mode 100644 docs/examples/intro.ipynb
 create mode 100644 docs/faq.md
 create mode 100644 docs/fastmlx.md
 create mode 100644 docs/index.md
 create mode 100644 docs/installation.md
 create mode 100644 docs/overrides/main.html
 create mode 100644 docs/usage.md
 create mode 100644 fastmlx/__init__.py
 create mode 100644 fastmlx/cli.py
 create mode 100644 fastmlx/common.py
 create mode 100644 fastmlx/fastmlx.py
 create mode 100644 mkdocs.yml
 create mode 100644 pyproject.toml
 create mode 100644 requirements.txt
 create mode 100644 requirements_dev.txt
 create mode 100644 tests/__init__.py
 create mode 100644 tests/test_fastmlx.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..e43b0f98
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+.DS_Store
diff --git a/AUTHORS.rst b/AUTHORS.rst
new file mode 100644
index 00000000..dffae7a0
--- /dev/null
+++ b/AUTHORS.rst
@@ -0,0 +1,13 @@
+=======
+Credits
+=======
+
+Development Lead
+----------------
+
+* Prince Canuma <prince.gdt@gmail.com>
+
+Contributors
+------------
+
+None yet. Why not be the first?
diff --git a/LICENSE b/LICENSE
index 261eeb9e..b6cd1934 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,201 +1,16 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
+Apache Software License 2.0
 
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+Copyright (c) 2024, Prince Canuma
 
-   1. Definitions.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
 
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
+http://www.apache.org/licenses/LICENSE-2.0
 
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
 
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 00000000..89411aa6
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,7 @@
+include LICENSE
+include README.md
+include requirements.txt
+
+recursive-exclude * __pycache__
+recursive-exclude * *.py[co]
+
diff --git a/README.md b/README.md
index 1227361b..985279b3 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,19 @@
-# FastMLX
+# fastmlx
 
-FastMLX is a high performance production ready API to host MLX models.
+
+[![image](https://img.shields.io/pypi/v/fastmlx.svg)](https://pypi.python.org/pypi/fastmlx)
+[![image](https://img.shields.io/conda/vn/conda-forge/fastmlx.svg)](https://anaconda.org/conda-forge/fastmlx)
+
+[![image](https://pyup.io/repos/github/Blaizzy/fastmlx/shield.svg)](https://pyup.io/repos/github/Blaizzy/fastmlx)
+
+
+**FastMLX is a high performance production ready API to host MLX models.**
+
+
+-   Free software: Apache Software License 2.0
+-   Documentation: https://Blaizzy.github.io/fastmlx
+    
+
+## Features
+
+-   TODO
diff --git a/docs/authors.rst b/docs/authors.rst
new file mode 100644
index 00000000..e122f914
--- /dev/null
+++ b/docs/authors.rst
@@ -0,0 +1 @@
+.. include:: ../AUTHORS.rst
diff --git a/docs/changelog.md b/docs/changelog.md
new file mode 100644
index 00000000..289e2c52
--- /dev/null
+++ b/docs/changelog.md
@@ -0,0 +1,11 @@
+# Changelog
+
+## v0.0.1 - Date
+
+**Improvement**:
+
+-   TBD
+
+**New Features**:
+
+-   TBD
diff --git a/docs/common.md b/docs/common.md
new file mode 100644
index 00000000..8d5152a8
--- /dev/null
+++ b/docs/common.md
@@ -0,0 +1,3 @@
+# common module
+
+::: fastmlx.common
\ No newline at end of file
diff --git a/docs/contributing.md b/docs/contributing.md
new file mode 100644
index 00000000..52aa69a8
--- /dev/null
+++ b/docs/contributing.md
@@ -0,0 +1,108 @@
+# Contributing
+
+Contributions are welcome, and they are greatly appreciated! Every
+little bit helps, and credit will always be given.
+
+You can contribute in many ways:
+
+## Types of Contributions
+
+### Report Bugs
+
+Report bugs at <https://github.com/Blaizzy/fastmlx/issues>.
+
+If you are reporting a bug, please include:
+
+-   Your operating system name and version.
+-   Any details about your local setup that might be helpful in troubleshooting.
+-   Detailed steps to reproduce the bug.
+
+### Fix Bugs
+
+Look through the GitHub issues for bugs. Anything tagged with `bug` and
+`help wanted` is open to whoever wants to implement it.
+
+### Implement Features
+
+Look through the GitHub issues for features. Anything tagged with
+`enhancement` and `help wanted` is open to whoever wants to implement it.
+
+### Write Documentation
+
+fastmlx could always use more documentation,
+whether as part of the official fastmlx docs,
+in docstrings, or even on the web in blog posts, articles, and such.
+
+### Submit Feedback
+
+The best way to send feedback is to file an issue at
+<https://github.com/Blaizzy/fastmlx/issues>.
+
+If you are proposing a feature:
+
+-   Explain in detail how it would work.
+-   Keep the scope as narrow as possible, to make it easier to implement.
+-   Remember that this is a volunteer-driven project, and that contributions are welcome :)
+
+## Get Started!
+
+Ready to contribute? Here's how to set up fastmlx for local development.
+
+1.  Fork the fastmlx repo on GitHub.
+
+2.  Clone your fork locally:
+
+    ```shell
+    $ git clone git@github.com:your_name_here/fastmlx.git
+    ```
+
+3.  Install your local copy into a virtualenv. Assuming you have
+    virtualenvwrapper installed, this is how you set up your fork for
+    local development:
+
+    ```shell
+    $ mkvirtualenv fastmlx
+    $ cd fastmlx/
+    $ python setup.py develop
+    ```
+
+4.  Create a branch for local development:
+
+    ```shell
+    $ git checkout -b name-of-your-bugfix-or-feature
+    ```
+
+    Now you can make your changes locally.
+
+5.  When you're done making changes, check that your changes pass flake8
+    and the tests, including testing other Python versions with tox:
+
+    ```shell
+    $ flake8 fastmlx tests
+    $ python setup.py test or pytest
+    $ tox
+    ```
+
+    To get flake8 and tox, just pip install them into your virtualenv.
+
+6.  Commit your changes and push your branch to GitHub:
+
+    ```shell
+    $ git add .
+    $ git commit -m "Your detailed description of your changes."
+    $ git push origin name-of-your-bugfix-or-feature
+    ```
+
+7.  Submit a pull request through the GitHub website.
+
+## Pull Request Guidelines
+
+Before you submit a pull request, check that it meets these guidelines:
+
+1.  The pull request should include tests.
+2.  If the pull request adds functionality, the docs should be updated.
+    Put your new functionality into a function with a docstring, and add
+    the feature to the list in README.rst.
+3.  The pull request should work for Python 3.8 and later, and
+    for PyPy. Check <https://github.com/Blaizzy/fastmlx/pull_requests> and make sure that the tests pass for all
+    supported Python versions.
diff --git a/docs/examples/intro.ipynb b/docs/examples/intro.ipynb
new file mode 100644
index 00000000..ebd171a6
--- /dev/null
+++ b/docs/examples/intro.ipynb
@@ -0,0 +1,21 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print('Hello World!')"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs/faq.md b/docs/faq.md
new file mode 100644
index 00000000..4514b4c1
--- /dev/null
+++ b/docs/faq.md
@@ -0,0 +1 @@
+# FAQ
diff --git a/docs/fastmlx.md b/docs/fastmlx.md
new file mode 100644
index 00000000..f2a80faf
--- /dev/null
+++ b/docs/fastmlx.md
@@ -0,0 +1,4 @@
+ 
+# fastmlx module
+
+::: fastmlx.fastmlx
\ No newline at end of file
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 00000000..18c5f86d
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,18 @@
+# Welcome to fastmlx
+
+
+[![image](https://img.shields.io/pypi/v/fastmlx.svg)](https://pypi.python.org/pypi/fastmlx)
+
+[![image](https://pyup.io/repos/github/Blaizzy/fastmlx/shield.svg)](https://pyup.io/repos/github/Blaizzy/fastmlx)
+
+
+**FastMLX is a high performance production ready API to host MLX models.**
+
+
+-   Free software: Apache Software License 2.0
+-   Documentation: <https://Blaizzy.github.io/fastmlx>
+    
+
+## Features
+
+-   TODO
diff --git a/docs/installation.md b/docs/installation.md
new file mode 100644
index 00000000..981f8f17
--- /dev/null
+++ b/docs/installation.md
@@ -0,0 +1,21 @@
+# Installation
+
+## Stable release
+
+To install fastmlx, run this command in your terminal:
+
+```
+pip install fastmlx
+```
+
+This is the preferred method to install fastmlx, as it will always install the most recent stable release.
+
+If you don't have [pip](https://pip.pypa.io) installed, this [Python installation guide](http://docs.python-guide.org/en/latest/starting/installation/) can guide you through the process.
+
+## From sources
+
+To install fastmlx from sources, run this command in your terminal:
+
+```
+pip install git+https://github.com/Blaizzy/fastmlx
+```
diff --git a/docs/overrides/main.html b/docs/overrides/main.html
new file mode 100644
index 00000000..702c96bf
--- /dev/null
+++ b/docs/overrides/main.html
@@ -0,0 +1,11 @@
+{% extends "base.html" %}
+
+{% block content %}
+{% if page.nb_url %}
+    <a href="{{ page.nb_url }}" title="Download Notebook" class="md-content__button md-icon">
+        {% include ".icons/material/download.svg" %}
+    </a>
+{% endif %}
+
+{{ super() }}
+{% endblock content %}
diff --git a/docs/usage.md b/docs/usage.md
new file mode 100644
index 00000000..7f6d34e2
--- /dev/null
+++ b/docs/usage.md
@@ -0,0 +1,7 @@
+# Usage
+
+To use fastmlx in a project:
+
+```
+import fastmlx
+```
diff --git a/fastmlx/__init__.py b/fastmlx/__init__.py
new file mode 100644
index 00000000..41e84b78
--- /dev/null
+++ b/fastmlx/__init__.py
@@ -0,0 +1,5 @@
+"""Top-level package for fastmlx."""
+
+__author__ = """Prince Canuma"""
+__email__ = "prince.gdt@gmail.com"
+__version__ = "0.0.1"
diff --git a/fastmlx/cli.py b/fastmlx/cli.py
new file mode 100644
index 00000000..b0060eb3
--- /dev/null
+++ b/fastmlx/cli.py
@@ -0,0 +1,19 @@
+"""Console script for fastmlx."""
+import argparse
+import sys
+
+
+def main():
+    """Console script for fastmlx."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument('_', nargs='*')
+    args = parser.parse_args()
+
+    print("Arguments: " + str(args._))
+    print("Replace this message by putting your code into "
+          "fastmlx.cli.main")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())  # pragma: no cover
diff --git a/fastmlx/common.py b/fastmlx/common.py
new file mode 100644
index 00000000..6967be7d
--- /dev/null
+++ b/fastmlx/common.py
@@ -0,0 +1,7 @@
+"""The common module contains common functions and classes used by the other modules.
+"""
+
+def hello_world():
+    """Prints "Hello World!" to the console.
+    """
+    print("Hello World!")
\ No newline at end of file
diff --git a/fastmlx/fastmlx.py b/fastmlx/fastmlx.py
new file mode 100644
index 00000000..dd0b80ed
--- /dev/null
+++ b/fastmlx/fastmlx.py
@@ -0,0 +1 @@
+"""Main module."""
diff --git a/mkdocs.yml b/mkdocs.yml
new file mode 100644
index 00000000..9599fa35
--- /dev/null
+++ b/mkdocs.yml
@@ -0,0 +1,86 @@
+site_name: fastmlx
+site_description: FastMLX is a high performance production ready API to host MLX models.
+site_author: Blaizzy
+site_url: https://Blaizzy.github.io/fastmlx
+repo_url: https://github.com/Blaizzy/fastmlx
+
+copyright: "Copyright &copy; 2024 - 2024 Prince Canuma"
+
+theme:
+    palette:
+        - scheme: default
+          #   primary: blue
+          #   accent: indigo
+          toggle:
+              icon: material/toggle-switch-off-outline
+              name: Switch to dark mode
+        - scheme: slate
+          primary: indigo
+          accent: indigo
+          toggle:
+              icon: material/toggle-switch
+              name: Switch to light mode
+    name: material
+    icon:
+        repo: fontawesome/brands/github
+    # logo: assets/logo.png
+    # favicon: assets/favicon.png
+    features:
+        - navigation.instant
+        - navigation.tracking
+        - navigation.top
+        - search.highlight
+        - search.share
+    custom_dir: "docs/overrides"
+    font:
+        text: Google Sans
+        code: Regular
+
+plugins:
+    - search
+    - mkdocstrings
+    - git-revision-date
+    - git-revision-date-localized:
+          enable_creation_date: true
+          type: timeago
+    # - pdf-export
+    - mkdocs-jupyter:
+          include_source: True
+          ignore_h1_titles: True
+          execute: True
+          allow_errors: false
+          ignore: ["conf.py"]
+          execute_ignore: ["*ignore.ipynb"]
+          
+markdown_extensions:
+    - admonition
+    - abbr
+    - attr_list
+    - def_list
+    - footnotes
+    - meta
+    - md_in_html
+    - pymdownx.superfences
+    - pymdownx.highlight:
+          linenums: true
+    - toc:
+          permalink: true
+
+# extra:
+#     analytics:
+#         provider: google
+#         property: UA-XXXXXXXXX-X
+
+nav:
+    - Home: index.md
+    - Installation: installation.md
+    - Usage: usage.md
+    - Contributing: contributing.md
+    - FAQ: faq.md
+    - Changelog: changelog.md
+    - Report Issues: https://github.com/Blaizzy/fastmlx/issues
+    - Examples:
+        - examples/intro.ipynb
+    - API Reference:
+          - fastmlx module: fastmlx.md
+          - common module: common.md
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..2d71a932
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,82 @@
+[project]
+name = "fastmlx"
+version = "0.0.1"
+dynamic = [
+    "dependencies",
+]
+description = "FastMLX is a high performance production ready API to host MLX models."
+readme = "README.md"
+requires-python = ">=3.8"
+keywords = [
+    "fastmlx",
+]
+license = {text = "Apache Software License 2.0"}
+authors = [
+  {name = "Prince Canuma", email = "prince.gdt@gmail.com"},
+]
+classifiers = [
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: Apache Software License 2.0",
+    "Natural Language :: English",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+]
+
+[project.entry-points."console_scripts"]
+fastmlx = "fastmlx.cli:main"
+
+[project.optional-dependencies]
+all = [
+    "fastmlx[extra]",
+]
+
+extra = [
+    "pandas",
+]
+
+
+[tool]
+[tool.setuptools.packages.find]
+include = ["fastmlx*"]
+exclude = ["docs*"]
+
+[tool.setuptools.dynamic]
+dependencies = {file = ["requirements.txt"]}
+
+
+[tool.distutils.bdist_wheel]
+universal = true
+
+
+[tool.bumpversion]
+current_version = "0.0.1"
+commit = true
+tag = true
+
+[[tool.bumpversion.files]]
+filename = "pyproject.toml"
+search = 'version = "{current_version}"'
+replace = 'version = "{new_version}"'
+
+[[tool.bumpversion.files]]
+filename = "fastmlx/__init__.py"
+search = '__version__ = "{current_version}"'
+replace = '__version__ = "{new_version}"'
+
+
+[tool.flake8]
+exclude = [
+    "docs",
+]
+max-line-length = 88
+
+
+[project.urls]
+Homepage = "https://github.com/Blaizzy/fastmlx"
+
+[build-system]
+requires = ["setuptools>=64", "setuptools_scm>=8"]
+build-backend = "setuptools.build_meta"
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 00000000..a9cb9ede
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,6 @@
+numpy
+mlx>=0.8
+mlx-lm>=0.4.0
+fastapi>=0.110.0
+transformers>=4.38.0
+jinja2
\ No newline at end of file
diff --git a/requirements_dev.txt b/requirements_dev.txt
new file mode 100644
index 00000000..16d70930
--- /dev/null
+++ b/requirements_dev.txt
@@ -0,0 +1,29 @@
+black
+black[jupyter]
+build
+bump-my-version
+codespell
+wheel
+flake8
+twine
+click
+pytest
+pytest-runner
+
+ipykernel
+livereload
+nbconvert
+nbformat
+sphinx
+watchdog
+mkdocs
+mkdocs-git-revision-date-plugin 
+mkdocs-git-revision-date-localized-plugin
+mkdocs-jupyter>=0.24.0
+mkdocs-material>=9.1.3 
+mkdocs-pdf-export-plugin
+mkdocstrings 
+mkdocstrings-crystal
+mkdocstrings-python-legacy
+pygments
+pymdown-extensions
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 00000000..1e1ee754
--- /dev/null
+++ b/tests/__init__.py
@@ -0,0 +1 @@
+"""Unit test package for fastmlx."""
diff --git a/tests/test_fastmlx.py b/tests/test_fastmlx.py
new file mode 100644
index 00000000..29319c6f
--- /dev/null
+++ b/tests/test_fastmlx.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python
+
+"""Tests for `fastmlx` package."""
+
+import pytest
+
+
+from fastmlx import fastmlx
+
+
+@pytest.fixture
+def response():
+    """Sample pytest fixture.
+
+    See more at: http://doc.pytest.org/en/latest/fixture.html
+    """
+    # import requests
+    # return requests.get('https://github.com/audreyr/cookiecutter-pypackage')
+
+
+def test_content(response):
+    """Sample pytest test function with the pytest fixture as an argument."""
+    # from bs4 import BeautifulSoup
+    # assert 'GitHub' in BeautifulSoup(response.content).title.string

From 63b14e0ce0ff5d200f591f0c3abf7a4a0816848c Mon Sep 17 00:00:00 2001
From: Prince Canuma <prince.gdt@gmail.com>
Date: Tue, 9 Jul 2024 14:21:54 +0200
Subject: [PATCH 02/16] add tests

---
 tests/test_fastmlx.py | 135 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 122 insertions(+), 13 deletions(-)

diff --git a/tests/test_fastmlx.py b/tests/test_fastmlx.py
index 29319c6f..b0ccb56f 100644
--- a/tests/test_fastmlx.py
+++ b/tests/test_fastmlx.py
@@ -2,23 +2,132 @@
 
 """Tests for `fastmlx` package."""
 
-import pytest
+import sys
+from unittest.mock import MagicMock
 
+# Detailed mock for mlx_vlm
+class MockMLXVLM:
+    class prompt_utils:
+        @staticmethod
+        def get_message_json(*args, **kwargs):
+            return {"role": "user", "content": "mocked content"}
 
-from fastmlx import fastmlx
+    class utils:
+        @staticmethod
+        def load_image_processor(*args, **kwargs):
+            return MagicMock()
 
+        @staticmethod
+        def load_config(*args, **kwargs):
+            return {"model_type": "test_model"}
 
-@pytest.fixture
-def response():
-    """Sample pytest fixture.
+    @staticmethod
+    def load(*args, **kwargs):
+        return (MagicMock(), MagicMock())
 
-    See more at: http://doc.pytest.org/en/latest/fixture.html
-    """
-    # import requests
-    # return requests.get('https://github.com/audreyr/cookiecutter-pypackage')
+    @staticmethod
+    def generate(*args, **kwargs):
+        return "This is a test response."
 
+# Detailed mock for mlx
+class MockMLX:
+    class core:
+        @staticmethod
+        def array(*args, **kwargs):
+            return MagicMock()
 
-def test_content(response):
-    """Sample pytest test function with the pytest fixture as an argument."""
-    # from bs4 import BeautifulSoup
-    # assert 'GitHub' in BeautifulSoup(response.content).title.string
+        @staticmethod
+        def concatenate(*args, **kwargs):
+            return MagicMock()
+
+    class nn:
+        Module = type('Module', (), {})
+        Linear = type('Linear', (), {'__call__': lambda *args, **kwargs: MagicMock()})
+
+    @staticmethod
+    def optimizer(*args, **kwargs):
+        return MagicMock()
+
+# Detailed mock for huggingface_hub
+class MockHuggingFaceHub:
+    class utils:
+        class _errors:
+            RepositoryNotFoundError = type('RepositoryNotFoundError', (Exception,), {})
+
+    @staticmethod
+    def snapshot_download(*args, **kwargs):
+        return "/mocked/path/to/model"
+
+# Apply mocks
+sys.modules['mlx_vlm'] = MockMLXVLM()
+sys.modules['mlx_vlm.prompt_utils'] = MockMLXVLM.prompt_utils
+sys.modules['mlx_vlm.utils'] = MockMLXVLM.utils
+sys.modules['mlx'] = MockMLX()
+sys.modules['mlx.core'] = MockMLX.core
+sys.modules['mlx.nn'] = MockMLX.nn
+sys.modules['huggingface_hub'] = MockHuggingFaceHub()
+sys.modules['huggingface_hub.utils'] = MockHuggingFaceHub.utils
+sys.modules['huggingface_hub.utils._errors'] = MockHuggingFaceHub.utils._errors
+
+from fastapi.testclient import TestClient
+from fastapi import HTTPException
+from unittest.mock import patch
+from fastmlx import app
+
+client = TestClient(app)
+
+def test_chat_completion():
+    response = client.post(
+        "/v1/chat/completions",
+        json={
+            "model": "test-model",
+            "messages": [{"role": "user", "content": "Hello"}],
+            "max_tokens": 100,
+            "temperature": 0.7
+        }
+    )
+    assert response.status_code == 200
+    assert "choices" in response.json()
+    assert response.json()["choices"][0]["message"]["content"] == "This is a test response."
+
+def test_chat_completion_with_image():
+    response = client.post(
+        "/v1/chat/completions",
+        json={
+            "model": "test-model",
+            "messages": [{"role": "user", "content": "What's in this image?"}],
+            "image": "base64_encoded_image_data",
+            "max_tokens": 100,
+            "temperature": 0.7
+        }
+    )
+    assert response.status_code == 200
+    assert "choices" in response.json()
+    assert response.json()["choices"][0]["message"]["content"] == "This is a test response."
+
+def test_list_models():
+    with patch('fastmlx.ModelProvider.get_available_models', return_value=["model1", "model2"]):
+        response = client.get("/v1/models")
+    assert response.status_code == 200
+    assert response.json() == {"models": ["model1", "model2"]}
+
+def test_add_model():
+    with patch('fastmlx.ModelProvider.add_model_path') as mock_add_model_path:
+        response = client.post("/v1/models?model_name=new-model&model_path=/path/to/model")
+    assert response.status_code == 200
+    assert response.json() == {"status": "success", "message": "Model new-model added successfully"}
+    mock_add_model_path.assert_called_once_with("new-model", "/path/to/model")
+
+
+def test_chat_completion_invalid_model():
+    with patch('fastmlx.ModelProvider.load_model', side_effect=HTTPException(status_code=404, detail="Model not found")):
+        response = client.post(
+            "/v1/chat/completions",
+            json={
+                "model": "invalid-model",
+                "messages": [{"role": "user", "content": "Hello"}]
+            }
+        )
+
+    assert response.status_code == 404
+    assert "Model not found" in response.json()["detail"]
\ No newline at end of file

From e7acad865936ba0e6a837486fbae8429c4983db5 Mon Sep 17 00:00:00 2001
From: Prince Canuma <prince.gdt@gmail.com>
Date: Tue, 9 Jul 2024 14:26:31 +0200
Subject: [PATCH 03/16] remove unused files

---
 fastmlx/cli.py       | 19 -------------------
 fastmlx/common.py    |  7 -------
 requirements_dev.txt | 29 -----------------------------
 3 files changed, 55 deletions(-)
 delete mode 100644 fastmlx/cli.py
 delete mode 100644 fastmlx/common.py
 delete mode 100644 requirements_dev.txt

diff --git a/fastmlx/cli.py b/fastmlx/cli.py
deleted file mode 100644
index b0060eb3..00000000
--- a/fastmlx/cli.py
+++ /dev/null
@@ -1,19 +0,0 @@
-"""Console script for fastmlx."""
-import argparse
-import sys
-
-
-def main():
-    """Console script for fastmlx."""
-    parser = argparse.ArgumentParser()
-    parser.add_argument('_', nargs='*')
-    args = parser.parse_args()
-
-    print("Arguments: " + str(args._))
-    print("Replace this message by putting your code into "
-          "fastmlx.cli.main")
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())  # pragma: no cover
diff --git a/fastmlx/common.py b/fastmlx/common.py
deleted file mode 100644
index 6967be7d..00000000
--- a/fastmlx/common.py
+++ /dev/null
@@ -1,7 +0,0 @@
-"""The common module contains common functions and classes used by the other modules.
-"""
-
-def hello_world():
-    """Prints "Hello World!" to the console.
-    """
-    print("Hello World!")
\ No newline at end of file
diff --git a/requirements_dev.txt b/requirements_dev.txt
deleted file mode 100644
index 16d70930..00000000
--- a/requirements_dev.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-black
-black[jupyter]
-build
-bump-my-version
-codespell
-wheel
-flake8
-twine
-click
-pytest
-pytest-runner
-
-ipykernel
-livereload
-nbconvert
-nbformat
-sphinx
-watchdog
-mkdocs
-mkdocs-git-revision-date-plugin 
-mkdocs-git-revision-date-localized-plugin
-mkdocs-jupyter>=0.24.0
-mkdocs-material>=9.1.3 
-mkdocs-pdf-export-plugin
-mkdocstrings 
-mkdocstrings-crystal
-mkdocstrings-python-legacy
-pygments
-pymdown-extensions

From 68db9a78687a7d976340caa62aada591c02afab0 Mon Sep 17 00:00:00 2001
From: Prince Canuma <prince.gdt@gmail.com>
Date: Tue, 9 Jul 2024 14:27:34 +0200
Subject: [PATCH 04/16] remove transformers and pin vlm and lm

---
 requirements.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index a9cb9ede..191c296a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
 numpy
-mlx>=0.8
-mlx-lm>=0.4.0
+mlx>=0.15
+mlx-lm>=0.15.0
+mlx-vlm>=0.0.11
 fastapi>=0.110.0
-transformers>=4.38.0
 jinja2
\ No newline at end of file

From bc787baf61253bb5889e0fea368425c6732670f9 Mon Sep 17 00:00:00 2001
From: Prince Canuma <prince.gdt@gmail.com>
Date: Tue, 9 Jul 2024 14:27:45 +0200
Subject: [PATCH 05/16] update gitignore

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index e43b0f98..67cfefe3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,3 @@
 .DS_Store
+__pycache__
+*.egg-info
\ No newline at end of file

From 4381a8b3afa5bce5f1cc93c5a8dc5f7e38e10152 Mon Sep 17 00:00:00 2001
From: Prince Canuma <prince.gdt@gmail.com>
Date: Tue, 9 Jul 2024 14:28:21 +0200
Subject: [PATCH 06/16] add server

---
 fastmlx/__init__.py |   2 +
 fastmlx/fastmlx.py  | 135 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 137 insertions(+)

diff --git a/fastmlx/__init__.py b/fastmlx/__init__.py
index 41e84b78..7b80935e 100644
--- a/fastmlx/__init__.py
+++ b/fastmlx/__init__.py
@@ -3,3 +3,5 @@
 __author__ = """Prince Canuma"""
 __email__ = "prince.gdt@gmail.com"
 __version__ = "0.0.1"
+
+from .fastmlx import *
\ No newline at end of file
diff --git a/fastmlx/fastmlx.py b/fastmlx/fastmlx.py
index dd0b80ed..c1c5f6e4 100644
--- a/fastmlx/fastmlx.py
+++ b/fastmlx/fastmlx.py
@@ -1 +1,136 @@
 """Main module."""
+import os
+import time
+from typing import List, Optional
+from pydantic import BaseModel, Field
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+import mlx.core as mx
+from mlx_vlm import load, generate
+from mlx_vlm.prompt_utils import get_message_json
+from mlx_vlm.utils import load_image_processor, load_config
+
+class ModelProvider:
+    def __init__(self):
+        self.models = {}
+
+
+    def load_model(self, model_name: str):
+        if model_name not in self.models:
+            model, processor = load(model_name, {"trust_remote_code":True})
+            image_processor = load_image_processor(model_name)
+            config = load_config(model_name)
+            self.models[model_name] = {
+                "model": model,
+                "processor": processor,
+                "image_processor": image_processor,
+                "config": config
+            }
+
+        return self.models[model_name]
+
+    def add_model_path(self, model_name: str, model_path: str):
+        self.model_paths[model_name] = model_path
+
+    def get_available_models(self):
+        return list(self.model_paths.keys())
+
+class ChatMessage(BaseModel):
+    role: str
+    content: str
+
+class ChatCompletionRequest(BaseModel):
+    model: str
+    messages: List[ChatMessage]
+    image: Optional[str] = Field(default=None)
+    max_tokens: Optional[int] = Field(default=100)
+    temperature: Optional[float] = Field(default=0.7)
+
+class ChatCompletionResponse(BaseModel):
+    id: str
+    object: str = "chat.completion"
+    created: int
+    model: str
+    choices: List[dict]
+
+app = FastAPI()
+
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# Initialize the ModelProvider
+model_provider = ModelProvider()
+
+@app.post("/v1/chat/completions", response_model=ChatCompletionResponse)
+async def chat_completion(request: ChatCompletionRequest):
+    model_data = model_provider.load_model(request.model)
+    model = model_data["model"]
+    processor = model_data["processor"]
+    image_processor = model_data["image_processor"]
+    config = model_data["config"]
+    image = request.image
+
+    chat_messages = []
+
+    for msg in request.messages:
+        if msg.role == "user":
+            chat_messages.append(get_message_json(config["model_type"], msg.content))
+        else:
+            chat_messages.append({"role": msg.role, "content": msg.content})
+
+    prompt = ""
+    if "chat_template" in processor.__dict__.keys():
+        prompt = processor.apply_chat_template(
+            chat_messages,
+            tokenize=False,
+            add_generation_prompt=True,
+        )
+
+    elif "tokenizer" in processor.__dict__.keys():
+        if model.config.model_type != "paligemma":
+            prompt = processor.tokenizer.apply_chat_template(
+                chat_messages,
+                tokenize=False,
+                add_generation_prompt=True,
+            )
+        else:
+            prompt = request.messages[-1].content
+
+
+    # Generate the response
+    output = generate(model, processor, image, prompt, image_processor, verbose=False)
+
+    # Prepare the response
+    response = ChatCompletionResponse(
+        id=f"chatcmpl-{os.urandom(4).hex()}",
+        created=int(time.time()),
+        model=request.model,
+        choices=[
+            {
+                "index": 0,
+                "message": {"role": "assistant", "content": output},
+                "finish_reason": "stop",
+            }
+        ],
+    )
+
+    return response
+
+@app.get("/v1/models")
+async def list_models():
+    return {"models": model_provider.get_available_models()}
+
+@app.post("/v1/models")
+async def add_model(model_name: str, model_path: str):
+    model_provider.add_model_path(model_name, model_path)
+    return {"status": "success", "message": f"Model {model_name} added successfully"}
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)
\ No newline at end of file

From 06f7af68b548fc89d85d804ebdd3b4a8dbd99ddf Mon Sep 17 00:00:00 2001
From: Prince Canuma <prince.gdt@gmail.com>
Date: Tue, 9 Jul 2024 19:58:02 +0200
Subject: [PATCH 07/16] refactor model loading

---
 fastmlx/fastmlx.py | 107 ++++++++++++++++++++++++++++-----------------
 fastmlx/utils.py   |  51 +++++++++++++++++++++
 2 files changed, 117 insertions(+), 41 deletions(-)
 create mode 100644 fastmlx/utils.py

diff --git a/fastmlx/fastmlx.py b/fastmlx/fastmlx.py
index c1c5f6e4..ca0529c3 100644
--- a/fastmlx/fastmlx.py
+++ b/fastmlx/fastmlx.py
@@ -5,35 +5,38 @@
 from pydantic import BaseModel, Field
 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
-import mlx.core as mx
-from mlx_vlm import load, generate
-from mlx_vlm.prompt_utils import get_message_json
-from mlx_vlm.utils import load_image_processor, load_config
+
+try:
+    import mlx.core as mx
+    from mlx_lm import generate as lm_generate
+    from mlx_vlm import generate as vlm_generate
+    from mlx_vlm.prompt_utils import get_message_json
+    from mlx_vlm.utils import load_config
+    from .utils import load_lm_model, load_vlm_model, MODEL_REMAPPING, MODELS
+    MLX_AVAILABLE = True
+except ImportError:
+    print("Warning: mlx or mlx_lm not available. Some functionality will be limited.")
+    MLX_AVAILABLE = False
+
 
 class ModelProvider:
     def __init__(self):
         self.models = {}
 
-
     def load_model(self, model_name: str):
         if model_name not in self.models:
-            model, processor = load(model_name, {"trust_remote_code":True})
-            image_processor = load_image_processor(model_name)
             config = load_config(model_name)
-            self.models[model_name] = {
-                "model": model,
-                "processor": processor,
-                "image_processor": image_processor,
-                "config": config
-            }
+            model_type = MODEL_REMAPPING.get(config["model_type"], config["model_type"])
+            if model_type in MODELS["vlm"]:
+                self.models[model_name] = load_vlm_model(model_name, config)
+            else:
+                self.models[model_name] = load_lm_model(model_name, config)
 
         return self.models[model_name]
 
-    def add_model_path(self, model_name: str, model_path: str):
-        self.model_paths[model_name] = model_path
 
     def get_available_models(self):
-        return list(self.model_paths.keys())
+        return list(self.models.keys())
 
 class ChatMessage(BaseModel):
     role: str
@@ -69,32 +72,55 @@ class ChatCompletionResponse(BaseModel):
 
 @app.post("/v1/chat/completions", response_model=ChatCompletionResponse)
 async def chat_completion(request: ChatCompletionRequest):
+    if not MLX_AVAILABLE:
+        raise HTTPException(status_code=500, detail="MLX library not available")
+
     model_data = model_provider.load_model(request.model)
     model = model_data["model"]
-    processor = model_data["processor"]
-    image_processor = model_data["image_processor"]
     config = model_data["config"]
-    image = request.image
+    model_type = MODEL_REMAPPING.get(config["model_type"], config["model_type"])
 
-    chat_messages = []
+    if model_type in MODELS["vlm"]:
+        processor = model_data["processor"]
+        image_processor = model_data["image_processor"]
 
-    for msg in request.messages:
-        if msg.role == "user":
-            chat_messages.append(get_message_json(config["model_type"], msg.content))
-        else:
-            chat_messages.append({"role": msg.role, "content": msg.content})
-
-    prompt = ""
-    if "chat_template" in processor.__dict__.keys():
-        prompt = processor.apply_chat_template(
-            chat_messages,
-            tokenize=False,
-            add_generation_prompt=True,
-        )
-
-    elif "tokenizer" in processor.__dict__.keys():
-        if model.config.model_type != "paligemma":
-            prompt = processor.tokenizer.apply_chat_template(
+        image = request.image
+
+        chat_messages = []
+
+        for msg in request.messages:
+            if msg.role == "user":
+                chat_messages.append(get_message_json(config["model_type"], msg.content))
+            else:
+                chat_messages.append({"role": msg.role, "content": msg.content})
+
+        prompt = ""
+        if "chat_template" in processor.__dict__.keys():
+            prompt = processor.apply_chat_template(
+                chat_messages,
+                tokenize=False,
+                add_generation_prompt=True,
+            )
+
+        elif "tokenizer" in processor.__dict__.keys():
+            if model.config.model_type != "paligemma":
+                prompt = processor.tokenizer.apply_chat_template(
+                    chat_messages,
+                    tokenize=False,
+                    add_generation_prompt=True,
+                )
+            else:
+                prompt = request.messages[-1].content
+
+
+        # Generate the response
+        output = vlm_generate(model, processor, image, prompt, image_processor, verbose=False)
+
+    else:
+        tokenizer = model_data["tokenizer"]
+        chat_messages = [{"role": msg.role, "content": msg.content} for msg in request.messages]
+        if "chat_template" in tokenizer.__dict__.keys():
+            prompt = tokenizer.apply_chat_template(
                 chat_messages,
                 tokenize=False,
                 add_generation_prompt=True,
@@ -102,9 +128,8 @@ async def chat_completion(request: ChatCompletionRequest):
         else:
             prompt = request.messages[-1].content
 
+        output = lm_generate(model, tokenizer, prompt, verbose=False)
 
-    # Generate the response
-    output = generate(model, processor, image, prompt, image_processor, verbose=False)
 
     # Prepare the response
     response = ChatCompletionResponse(
@@ -127,8 +152,8 @@ async def list_models():
     return {"models": model_provider.get_available_models()}
 
 @app.post("/v1/models")
-async def add_model(model_name: str, model_path: str):
-    model_provider.add_model_path(model_name, model_path)
+async def add_model(model_name: str):
+    model_provider.load_model(model_name)
     return {"status": "success", "message": f"Model {model_name} added successfully"}
 
 if __name__ == "__main__":
diff --git a/fastmlx/utils.py b/fastmlx/utils.py
new file mode 100644
index 00000000..4a94c028
--- /dev/null
+++ b/fastmlx/utils.py
@@ -0,0 +1,51 @@
+
+from typing import Dict, Any
+import os
+
+# MLX Imports
+try:
+    from mlx_lm import load as lm_load, models as lm_models
+    from mlx_vlm import load as vlm_load, models as vlm_models
+    from mlx_vlm.utils import load_image_processor
+except ImportError:
+    print("Warning: mlx or mlx_lm not available. Some functionality will be limited.")
+
+
+def get_model_type_list(models, type="vlm"):
+
+    # Get the directory path of the models package
+    models_dir = os.path.dirname(models.__file__)
+
+    # List all items in the models directory
+    all_items = os.listdir(models_dir)
+
+    if type == "vlm":
+        submodules = [item for item in all_items
+              if os.path.isdir(os.path.join(models_dir, item))
+              and not item.startswith('.')
+              and item != '__pycache__']
+        return submodules
+    else:
+        return all_items
+
+MODELS = {"vlm": get_model_type_list(vlm_models), "lm": get_model_type_list(lm_models, "lm")}
+MODEL_REMAPPING = {"llava-qwen2": "llava_bunny", "bunny-llama": "llava_bunny"}
+
+# Model Loading and Generation Functions
+def load_vlm_model(model_name: str, config: Dict[str, Any]) -> Dict[str, Any]:
+    model, processor = vlm_load(model_name, {"trust_remote_code": True})
+    image_processor = load_image_processor(model_name)
+    return {
+        "model": model,
+        "processor": processor,
+        "image_processor": image_processor,
+        "config": config
+    }
+
+def load_lm_model(model_name: str, config: Dict[str, Any]) -> Dict[str, Any]:
+    model, tokenizer = lm_load(model_name)
+    return {
+        "model": model,
+        "tokenizer": tokenizer,
+        "config": config
+    }
\ No newline at end of file

From 9d2e132b4fd89625620d6f24c16c62805f0574ca Mon Sep 17 00:00:00 2001
From: Prince Canuma <prince.gdt@gmail.com>
Date: Tue, 9 Jul 2024 19:58:19 +0200
Subject: [PATCH 08/16] update requirements

---
 requirements.txt | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 191c296a..c7d4791c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,5 @@
-numpy
 mlx>=0.15
-mlx-lm>=0.15.0
+mlx-lm>=0.15.2
 mlx-vlm>=0.0.11
-fastapi>=0.110.0
+fastapi>=0.111.0
 jinja2
\ No newline at end of file

From f3c8555be1f117f9667c50112448d96c8d207c68 Mon Sep 17 00:00:00 2001
From: Prince Canuma <prince.gdt@gmail.com>
Date: Tue, 9 Jul 2024 19:58:30 +0200
Subject: [PATCH 09/16] fix tests

---
 tests/test_fastmlx.py | 190 ++++++++++++++++--------------------------
 1 file changed, 72 insertions(+), 118 deletions(-)

diff --git a/tests/test_fastmlx.py b/tests/test_fastmlx.py
index b0ccb56f..dc4154a5 100644
--- a/tests/test_fastmlx.py
+++ b/tests/test_fastmlx.py
@@ -2,132 +2,86 @@
 
 """Tests for `fastmlx` package."""
 
-import sys
-from unittest.mock import MagicMock
-
-# Detailed mock for mlx_vlm
-class MockMLXVLM:
-    class prompt_utils:
-        @staticmethod
-        def get_message_json(*args, **kwargs):
-            return {"role": "user", "content": "mocked content"}
-
-    class utils:
-        @staticmethod
-        def load_image_processor(*args, **kwargs):
-            return MagicMock()
-
-        @staticmethod
-        def load_config(*args, **kwargs):
-            return {"model_type": "test_model"}
-
-    @staticmethod
-    def load(*args, **kwargs):
-        return (MagicMock(), MagicMock())
-
-    @staticmethod
-    def generate(*args, **kwargs):
-        return "This is a test response."
-
-# Detailed mock for mlx
-class MockMLX:
-    class core:
-        @staticmethod
-        def array(*args, **kwargs):
-            return MagicMock()
-
-        @staticmethod
-        def concatenate(*args, **kwargs):
-            return MagicMock()
-
-    class nn:
-        Module = type('Module', (), {})
-        Linear = type('Linear', (), {'__call__': lambda *args, **kwargs: MagicMock()})
-
-    @staticmethod
-    def optimizer(*args, **kwargs):
-        return MagicMock()
-
-# Detailed mock for huggingface_hub
-class MockHuggingFaceHub:
-    class utils:
-        class _errors:
-            RepositoryNotFoundError = type('RepositoryNotFoundError', (Exception,), {})
-
-    @staticmethod
-    def snapshot_download(*args, **kwargs):
-        return "/mocked/path/to/model"
-
-# Apply mocks
-sys.modules['mlx_vlm'] = MockMLXVLM()
-sys.modules['mlx_vlm.prompt_utils'] = MockMLXVLM.prompt_utils
-sys.modules['mlx_vlm.utils'] = MockMLXVLM.utils
-sys.modules['mlx'] = MockMLX()
-sys.modules['mlx.core'] = MockMLX.core
-sys.modules['mlx.nn'] = MockMLX.nn
-sys.modules['huggingface_hub'] = MockHuggingFaceHub()
-sys.modules['huggingface_hub.utils'] = MockHuggingFaceHub.utils
-sys.modules['huggingface_hub.utils._errors'] = MockHuggingFaceHub.utils._errors
-
+import pytest
 from fastapi.testclient import TestClient
-from fastapi import HTTPException
-from unittest.mock import patch
-from fastmlx import app
-
-client = TestClient(app)
-
-def test_chat_completion():
-    response = client.post(
-        "/v1/chat/completions",
-        json={
-            "model": "test-model",
-            "messages": [{"role": "user", "content": "Hello"}],
-            "max_tokens": 100,
-            "temperature": 0.7
-        }
+from unittest.mock import patch, MagicMock
+import json
+
+# Import the actual classes and functions
+from fastmlx import app, ModelProvider, ChatCompletionRequest, ChatMessage
+
+# Create mock classes that inherit from the original classes
+class MockModelProvider(ModelProvider):
+    def __init__(self):
+        super().__init__()
+        self.models = {}
+
+    def load_model(self, model_name: str):
+        if model_name not in self.models:
+            model_type = "vlm" if "llava" in model_name.lower() else "lm"
+            self.models[model_name] = {
+                "model": MagicMock(),
+                "processor": MagicMock(),
+                "tokenizer": MagicMock(),
+                "image_processor": MagicMock() if model_type == "vlm" else None,
+                "config": {"model_type": model_type}
+            }
+        return self.models[model_name]
+
+    def get_available_models(self):
+        return list(self.models.keys())
+
+# Mock MODELS dictionary
+MODELS = {"vlm": ["llava"], "lm": ["phi"]}
+
+# Mock functions
+def mock_generate(*args, **kwargs):
+    return "generated response"
+
+@pytest.fixture(scope="module")
+def client():
+    # Apply patches
+    with patch('fastmlx.fastmlx.model_provider', MockModelProvider()), \
+         patch('fastmlx.fastmlx.vlm_generate', mock_generate), \
+         patch('fastmlx.fastmlx.lm_generate', mock_generate), \
+         patch('fastmlx.fastmlx.MODELS', MODELS):
+        yield TestClient(app)
+
+def test_chat_completion_vlm(client):
+    request = ChatCompletionRequest(
+        model="test_llava_model",
+        messages=[ChatMessage(role="user", content="Hello")],
+        image="test_image"
     )
+    response = client.post("/v1/chat/completions", json=json.loads(request.model_dump_json()))
+
     assert response.status_code == 200
-    assert "choices" in response.json()
-    assert response.json()["choices"][0]["message"]["content"] == "This is a test response."
-
-def test_chat_completion_with_image():
-    response = client.post(
-        "/v1/chat/completions",
-        json={
-            "model": "test-model",
-            "messages": [{"role": "user", "content": "What's in this image?"}],
-            "image": "base64_encoded_image_data",
-            "max_tokens": 100,
-            "temperature": 0.7
-        }
+    assert "generated response" in response.json()["choices"][0]["message"]["content"]
+
+def test_chat_completion_lm(client):
+    request = ChatCompletionRequest(
+        model="test_phi_model",
+        messages=[ChatMessage(role="user", content="Hello")]
     )
-    assert response.status_code == 200
-    assert "choices" in response.json()
-    assert response.json()["choices"][0]["message"]["content"] == "This is a test response."
+    response = client.post("/v1/chat/completions", json=json.loads(request.model_dump_json()))
 
-def test_list_models():
-    with patch('fastmlx.ModelProvider.get_available_models', return_value=["model1", "model2"]):
-        response = client.get("/v1/models")
     assert response.status_code == 200
-    assert response.json() == {"models": ["model1", "model2"]}
+    assert "generated response" in response.json()["choices"][0]["message"]["content"]
+
+def test_list_models(client):
+    client.post("/v1/models?model_name=test_llava_model")
+    client.post("/v1/models?model_name=test_phi_model")
+
+    response = client.get("/v1/models")
 
-def test_add_model():
-    with patch('fastmlx.ModelProvider.add_model_path') as mock_add_model_path:
-        response = client.post("/v1/models?model_name=new-model&model_path=/path/to/model")
     assert response.status_code == 200
-    assert response.json() == {"status": "success", "message": "Model new-model added successfully"}
-    mock_add_model_path.assert_called_once_with("new-model", "/path/to/model")
+    assert set(response.json()["models"]) == {"test_llava_model", "test_phi_model"}
 
+def test_add_model(client):
+    response = client.post("/v1/models?model_name=new_llava_model")
 
-def test_chat_completion_invalid_model():
-    with patch('fastmlx.ModelProvider.load_model', side_effect=HTTPException(status_code=404, detail="Model not found")):
-        response = client.post(
-            "/v1/chat/completions",
-            json={
-                "model": "invalid-model",
-                "messages": [{"role": "user", "content": "Hello"}]
-            }
-        )
+    assert response.status_code == 200
+    assert response.json() == {"status": "success", "message": "Model new_llava_model added successfully"}
 
-    assert response.status_code == 404
-    assert "Model not found" in response.json()["detail"]
\ No newline at end of file
+if __name__ == "__main__":
+    pytest.main(["-v", __file__])
\ No newline at end of file

From ac6e119ee71c51686e1477af2756a2f58e43f886 Mon Sep 17 00:00:00 2001
From: Prince Canuma <prince.gdt@gmail.com>
Date: Tue, 9 Jul 2024 20:15:39 +0200
Subject: [PATCH 10/16] add new start command

---
 fastmlx/fastmlx.py | 7 +++++--
 pyproject.toml     | 2 +-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/fastmlx/fastmlx.py b/fastmlx/fastmlx.py
index ca0529c3..77f54359 100644
--- a/fastmlx/fastmlx.py
+++ b/fastmlx/fastmlx.py
@@ -156,6 +156,9 @@ async def add_model(model_name: str):
     model_provider.load_model(model_name)
     return {"status": "success", "message": f"Model {model_name} added successfully"}
 
-if __name__ == "__main__":
+def run():
     import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=8000)
\ No newline at end of file
+    uvicorn.run("fastmlx:app", host="127.0.0.1", port=8000, reload=True)
+
+if __name__ == "__main__":
+    run()
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 2d71a932..9aa1edb9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,7 +26,7 @@ classifiers = [
 ]
 
 [project.entry-points."console_scripts"]
-fastmlx = "fastmlx.cli:main"
+fastmlx = "fastmlx.fastmlx:run"
 
 [project.optional-dependencies]
 all = [

From bff1372125a7c7439876eb4e6acaf16dd449d68b Mon Sep 17 00:00:00 2001
From: Prince Canuma <prince.gdt@gmail.com>
Date: Tue, 9 Jul 2024 20:27:52 +0200
Subject: [PATCH 11/16] update readme

---
 README.md | 104 ++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 97 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 985279b3..d99e71b5 100644
--- a/README.md
+++ b/README.md
@@ -1,19 +1,109 @@
-# fastmlx
-
+# fastmlX
 
 [![image](https://img.shields.io/pypi/v/fastmlx.svg)](https://pypi.python.org/pypi/fastmlx)
 [![image](https://img.shields.io/conda/vn/conda-forge/fastmlx.svg)](https://anaconda.org/conda-forge/fastmlx)
-
 [![image](https://pyup.io/repos/github/Blaizzy/fastmlx/shield.svg)](https://pyup.io/repos/github/Blaizzy/fastmlx)
 
-
 **FastMLX is a high performance production ready API to host MLX models.**
 
-
 -   Free software: Apache Software License 2.0
 -   Documentation: https://Blaizzy.github.io/fastmlx
-    
 
 ## Features
 
--   TODO
+- **OpenAI-compatible API**: Easily integrate with existing applications that use OpenAI's API.
+- **Dynamic Model Loading**: Load MLX models on-the-fly or use pre-loaded models for better performance.
+- **Support for Multiple Model Types**: Compatible with various MLX model architectures.
+- **Image Processing Capabilities**: Handle both text and image inputs for versatile model interactions.
+- **Efficient Resource Management**: Optimized for high-performance and scalability.
+- **Error Handling**: Robust error management for production environments.
+- **Customizable**: Easily extendable to accommodate specific use cases and model types.
+
+## Usage
+
+1. **Installation**
+
+   ```bash
+   pip install fastmlx
+   ```
+
+2. **Running the Server**
+
+   Start the FastMLX server:
+   ```bash
+   fastmlx
+   ```
+   or
+
+   ```bash
+   uvicorn fastmlx:app --reload
+   ```
+
+3. **Making API Calls**
+
+   Use the API similar to OpenAI's chat completions:
+
+   **Vision Language Model**
+
+   ```python
+   import requests
+   import json
+
+   url = "http://localhost:8000/v1/chat/completions"
+   headers = {"Content-Type": "application/json"}
+   data = {
+       "model": "mlx-community/nanoLLaVA-1.5-4bit",
+       "image": "http://images.cocodataset.org/,val2017/000000039769.jpg"
+       "messages": [{"role": "user", "content": "What are these"}],
+       "max_tokens": 100
+   }
+
+   response = requests.post(url, headers=headers, data=json.dumps(data))
+   print(response.json())
+   ```
+   **Language Model**
+   ```python
+   import requests
+   import json
+
+   url = "http://localhost:8000/v1/chat/completions"
+   headers = {"Content-Type": "application/json"}
+   data = {
+       "model": "mlx-community/gemma-2-9b-it-4bit",
+       "messages": [{"role": "user", "content": "What is the capital of France?"}],
+       "max_tokens": 100
+   }
+
+   response = requests.post(url, headers=headers, data=json.dumps(data))
+   print(response.json())
+   ```
+
+4. **Adding a New Model**
+
+   You can add new models to the API:
+
+   ```python
+   import requests
+
+   url = "http://localhost:8000/v1/models"
+   params = {
+       "model_name": "hf-repo-or-path",
+   }
+
+   response = requests.post(url, params=params)
+   print(response.json())
+   ```
+
+5. **Listing Available Models**
+
+   To see all available models:
+
+   ```python
+   import requests
+
+   url = "http://localhost:8000/v1/models"
+   response = requests.get(url)
+   print(response.json())
+   ```
+
+For more detailed usage instructions and API documentation, please refer to the [full documentation](https://Blaizzy.github.io/fastmlx).
\ No newline at end of file

From 33abf910b1500a1610f819960fd76dae2982f572 Mon Sep 17 00:00:00 2001
From: Prince Canuma <prince.gdt@gmail.com>
Date: Tue, 9 Jul 2024 20:28:20 +0200
Subject: [PATCH 12/16] add ci/cd

---
 .github/workflows/python-publish.yml | 35 ++++++++++++++++++++++++++
 .github/workflows/tests.yml          | 37 ++++++++++++++++++++++++++++
 2 files changed, 72 insertions(+)
 create mode 100644 .github/workflows/python-publish.yml
 create mode 100644 .github/workflows/tests.yml

diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
new file mode 100644
index 00000000..8b3a87af
--- /dev/null
+++ b/.github/workflows/python-publish.yml
@@ -0,0 +1,35 @@
+# This workflow will upload a Python Package using Twine when a release is created
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
+
+name: Upload Python Package
+
+on:
+  release:
+    types: [published]
+
+permissions:
+  contents: read
+
+jobs:
+  deploy:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python
+      uses: actions/setup-python@v3
+      with:
+        python-version: '3.10'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install build
+    - name: Build package
+      run: python -m build
+    - name: Publish package
+      uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
+      with:
+        user: __token__
+        password: ${{ secrets.PYPI_API_TOKEN }}
+        packages_dir: dist
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
new file mode 100644
index 00000000..989aedc2
--- /dev/null
+++ b/.github/workflows/tests.yml
@@ -0,0 +1,37 @@
+name: Test PRs
+
+on:
+    pull_request:
+        branches:
+            - main
+
+jobs:
+    test:
+        runs-on: macos-14
+
+        steps:
+        - name: Checkout code
+          uses: actions/checkout@v2
+
+        - name: Set up Python
+          run: |
+            brew install python@3.10
+            python3 -m venv env
+            source env/bin/activate
+
+
+        - name: Run style checks
+          run: |
+            pip install pre-commit
+            pre-commit run --all
+            if ! git diff --quiet; then echo 'Style checks failed, please install pre-commit and run pre-commit run --all and push the change'; exit 1; fi
+
+        - name: Install dependencies
+          run: |
+            pip install pytest
+            pip install -e .
+
+        - name: Run Python tests
+          run: |
+            cd fastmlx/
+            pytest -s ./tests

From cd344271e244a84a5ac3bb97e71a36ef00517794 Mon Sep 17 00:00:00 2001
From: Prince Canuma <prince.gdt@gmail.com>
Date: Tue, 9 Jul 2024 20:44:25 +0200
Subject: [PATCH 13/16] add pre-commit

---
 .pre-commit-config.yaml | 11 +++++++++++
 1 file changed, 11 insertions(+)
 create mode 100644 .pre-commit-config.yaml

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 00000000..04427a14
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,11 @@
+repos:
+-   repo: https://github.com/psf/black-pre-commit-mirror
+    rev: 24.2.0
+    hooks:
+    -   id: black
+-   repo: https://github.com/pycqa/isort
+    rev: 5.13.2
+    hooks:
+    -   id: isort
+        args:
+            - --profile=black
\ No newline at end of file

From b407afc2cf42646a5203ff2383fe6f7e52bc488e Mon Sep 17 00:00:00 2001
From: Prince Canuma <prince.gdt@gmail.com>
Date: Tue, 9 Jul 2024 20:46:28 +0200
Subject: [PATCH 14/16] format code

---
 fastmlx/__init__.py   |  2 +-
 fastmlx/fastmlx.py    | 35 +++++++++++++++++++++++---------
 fastmlx/utils.py      | 36 +++++++++++++++++++--------------
 tests/test_fastmlx.py | 47 +++++++++++++++++++++++++++++--------------
 4 files changed, 80 insertions(+), 40 deletions(-)

diff --git a/fastmlx/__init__.py b/fastmlx/__init__.py
index 7b80935e..ed220226 100644
--- a/fastmlx/__init__.py
+++ b/fastmlx/__init__.py
@@ -4,4 +4,4 @@
 __email__ = "prince.gdt@gmail.com"
 __version__ = "0.0.1"
 
-from .fastmlx import *
\ No newline at end of file
+from .fastmlx import *
diff --git a/fastmlx/fastmlx.py b/fastmlx/fastmlx.py
index 77f54359..fd878760 100644
--- a/fastmlx/fastmlx.py
+++ b/fastmlx/fastmlx.py
@@ -1,10 +1,12 @@
 """Main module."""
+
 import os
 import time
 from typing import List, Optional
-from pydantic import BaseModel, Field
+
 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, Field
 
 try:
     import mlx.core as mx
@@ -12,7 +14,9 @@
     from mlx_vlm import generate as vlm_generate
     from mlx_vlm.prompt_utils import get_message_json
     from mlx_vlm.utils import load_config
-    from .utils import load_lm_model, load_vlm_model, MODEL_REMAPPING, MODELS
+
+    from .utils import MODEL_REMAPPING, MODELS, load_lm_model, load_vlm_model
+
     MLX_AVAILABLE = True
 except ImportError:
     print("Warning: mlx or mlx_lm not available. Some functionality will be limited.")
@@ -34,14 +38,15 @@ def load_model(self, model_name: str):
 
         return self.models[model_name]
 
-
     def get_available_models(self):
         return list(self.models.keys())
 
+
 class ChatMessage(BaseModel):
     role: str
     content: str
 
+
 class ChatCompletionRequest(BaseModel):
     model: str
     messages: List[ChatMessage]
@@ -49,6 +54,7 @@ class ChatCompletionRequest(BaseModel):
     max_tokens: Optional[int] = Field(default=100)
     temperature: Optional[float] = Field(default=0.7)
 
+
 class ChatCompletionResponse(BaseModel):
     id: str
     object: str = "chat.completion"
@@ -56,6 +62,7 @@ class ChatCompletionResponse(BaseModel):
     model: str
     choices: List[dict]
 
+
 app = FastAPI()
 
 # Add CORS middleware
@@ -70,6 +77,7 @@ class ChatCompletionResponse(BaseModel):
 # Initialize the ModelProvider
 model_provider = ModelProvider()
 
+
 @app.post("/v1/chat/completions", response_model=ChatCompletionResponse)
 async def chat_completion(request: ChatCompletionRequest):
     if not MLX_AVAILABLE:
@@ -90,7 +98,9 @@ async def chat_completion(request: ChatCompletionRequest):
 
         for msg in request.messages:
             if msg.role == "user":
-                chat_messages.append(get_message_json(config["model_type"], msg.content))
+                chat_messages.append(
+                    get_message_json(config["model_type"], msg.content)
+                )
             else:
                 chat_messages.append({"role": msg.role, "content": msg.content})
 
@@ -112,13 +122,16 @@ async def chat_completion(request: ChatCompletionRequest):
             else:
                 prompt = request.messages[-1].content
 
-
         # Generate the response
-        output = vlm_generate(model, processor, image, prompt, image_processor, verbose=False)
+        output = vlm_generate(
+            model, processor, image, prompt, image_processor, verbose=False
+        )
 
     else:
         tokenizer = model_data["tokenizer"]
-        chat_messages = [{"role": msg.role, "content": msg.content} for msg in request.messages]
+        chat_messages = [
+            {"role": msg.role, "content": msg.content} for msg in request.messages
+        ]
         if "chat_template" in tokenizer.__dict__.keys():
             prompt = tokenizer.apply_chat_template(
                 chat_messages,
@@ -130,7 +143,6 @@ async def chat_completion(request: ChatCompletionRequest):
 
         output = lm_generate(model, tokenizer, prompt, verbose=False)
 
-
     # Prepare the response
     response = ChatCompletionResponse(
         id=f"chatcmpl-{os.urandom(4).hex()}",
@@ -147,18 +159,23 @@ async def chat_completion(request: ChatCompletionRequest):
 
     return response
 
+
 @app.get("/v1/models")
 async def list_models():
     return {"models": model_provider.get_available_models()}
 
+
 @app.post("/v1/models")
 async def add_model(model_name: str):
     model_provider.load_model(model_name)
     return {"status": "success", "message": f"Model {model_name} added successfully"}
 
+
 def run():
     import uvicorn
+
     uvicorn.run("fastmlx:app", host="127.0.0.1", port=8000, reload=True)
 
+
 if __name__ == "__main__":
-    run()
\ No newline at end of file
+    run()
diff --git a/fastmlx/utils.py b/fastmlx/utils.py
index 4a94c028..6f75205f 100644
--- a/fastmlx/utils.py
+++ b/fastmlx/utils.py
@@ -1,11 +1,12 @@
-
-from typing import Dict, Any
 import os
+from typing import Any, Dict
 
 # MLX Imports
 try:
-    from mlx_lm import load as lm_load, models as lm_models
-    from mlx_vlm import load as vlm_load, models as vlm_models
+    from mlx_lm import load as lm_load
+    from mlx_lm import models as lm_models
+    from mlx_vlm import load as vlm_load
+    from mlx_vlm import models as vlm_models
     from mlx_vlm.utils import load_image_processor
 except ImportError:
     print("Warning: mlx or mlx_lm not available. Some functionality will be limited.")
@@ -20,17 +21,25 @@ def get_model_type_list(models, type="vlm"):
     all_items = os.listdir(models_dir)
 
     if type == "vlm":
-        submodules = [item for item in all_items
-              if os.path.isdir(os.path.join(models_dir, item))
-              and not item.startswith('.')
-              and item != '__pycache__']
+        submodules = [
+            item
+            for item in all_items
+            if os.path.isdir(os.path.join(models_dir, item))
+            and not item.startswith(".")
+            and item != "__pycache__"
+        ]
         return submodules
     else:
         return all_items
 
-MODELS = {"vlm": get_model_type_list(vlm_models), "lm": get_model_type_list(lm_models, "lm")}
+
+MODELS = {
+    "vlm": get_model_type_list(vlm_models),
+    "lm": get_model_type_list(lm_models, "lm"),
+}
 MODEL_REMAPPING = {"llava-qwen2": "llava_bunny", "bunny-llama": "llava_bunny"}
 
+
 # Model Loading and Generation Functions
 def load_vlm_model(model_name: str, config: Dict[str, Any]) -> Dict[str, Any]:
     model, processor = vlm_load(model_name, {"trust_remote_code": True})
@@ -39,13 +48,10 @@ def load_vlm_model(model_name: str, config: Dict[str, Any]) -> Dict[str, Any]:
         "model": model,
         "processor": processor,
         "image_processor": image_processor,
-        "config": config
+        "config": config,
     }
 
+
 def load_lm_model(model_name: str, config: Dict[str, Any]) -> Dict[str, Any]:
     model, tokenizer = lm_load(model_name)
-    return {
-        "model": model,
-        "tokenizer": tokenizer,
-        "config": config
-    }
\ No newline at end of file
+    return {"model": model, "tokenizer": tokenizer, "config": config}
diff --git a/tests/test_fastmlx.py b/tests/test_fastmlx.py
index dc4154a5..ed744402 100644
--- a/tests/test_fastmlx.py
+++ b/tests/test_fastmlx.py
@@ -2,13 +2,15 @@
 
 """Tests for `fastmlx` package."""
 
+import json
+from unittest.mock import MagicMock, patch
+
 import pytest
 from fastapi.testclient import TestClient
-from unittest.mock import patch, MagicMock
-import json
 
 # Import the actual classes and functions
-from fastmlx import app, ModelProvider, ChatCompletionRequest, ChatMessage
+from fastmlx import ChatCompletionRequest, ChatMessage, ModelProvider, app
+
 
 # Create mock classes that inherit from the original classes
 class MockModelProvider(ModelProvider):
@@ -24,50 +26,60 @@ def load_model(self, model_name: str):
                 "processor": MagicMock(),
                 "tokenizer": MagicMock(),
                 "image_processor": MagicMock() if model_type == "vlm" else None,
-                "config": {"model_type": model_type}
+                "config": {"model_type": model_type},
             }
         return self.models[model_name]
 
     def get_available_models(self):
         return list(self.models.keys())
 
+
 # Mock MODELS dictionary
 MODELS = {"vlm": ["llava"], "lm": ["phi"]}
 
+
 # Mock functions
 def mock_generate(*args, **kwargs):
     return "generated response"
 
+
 @pytest.fixture(scope="module")
 def client():
     # Apply patches
-    with patch('fastmlx.fastmlx.model_provider', MockModelProvider()), \
-         patch('fastmlx.fastmlx.vlm_generate', mock_generate), \
-         patch('fastmlx.fastmlx.lm_generate', mock_generate), \
-         patch('fastmlx.fastmlx.MODELS', MODELS):
+    with patch("fastmlx.fastmlx.model_provider", MockModelProvider()), patch(
+        "fastmlx.fastmlx.vlm_generate", mock_generate
+    ), patch("fastmlx.fastmlx.lm_generate", mock_generate), patch(
+        "fastmlx.fastmlx.MODELS", MODELS
+    ):
         yield TestClient(app)
 
+
 def test_chat_completion_vlm(client):
     request = ChatCompletionRequest(
         model="test_llava_model",
         messages=[ChatMessage(role="user", content="Hello")],
-        image="test_image"
+        image="test_image",
+    )
+    response = client.post(
+        "/v1/chat/completions", json=json.loads(request.model_dump_json())
     )
-    response = client.post("/v1/chat/completions", json=json.loads(request.model_dump_json()))
 
     assert response.status_code == 200
     assert "generated response" in response.json()["choices"][0]["message"]["content"]
 
+
 def test_chat_completion_lm(client):
     request = ChatCompletionRequest(
-        model="test_phi_model",
-        messages=[ChatMessage(role="user", content="Hello")]
+        model="test_phi_model", messages=[ChatMessage(role="user", content="Hello")]
+    )
+    response = client.post(
+        "/v1/chat/completions", json=json.loads(request.model_dump_json())
     )
-    response = client.post("/v1/chat/completions", json=json.loads(request.model_dump_json()))
 
     assert response.status_code == 200
     assert "generated response" in response.json()["choices"][0]["message"]["content"]
 
+
 def test_list_models(client):
     client.post("/v1/models?model_name=test_llava_model")
     client.post("/v1/models?model_name=test_phi_model")
@@ -77,11 +89,16 @@ def test_list_models(client):
     assert response.status_code == 200
     assert set(response.json()["models"]) == {"test_llava_model", "test_phi_model"}
 
+
 def test_add_model(client):
     response = client.post("/v1/models?model_name=new_llava_model")
 
     assert response.status_code == 200
-    assert response.json() == {"status": "success", "message": "Model new_llava_model added successfully"}
+    assert response.json() == {
+        "status": "success",
+        "message": "Model new_llava_model added successfully",
+    }
+
 
 if __name__ == "__main__":
-    pytest.main(["-v", __file__])
\ No newline at end of file
+    pytest.main(["-v", __file__])

From f79c0e61828c0ae1346f83a3d2a5bb3583f4694a Mon Sep 17 00:00:00 2001
From: Prince Canuma <prince.gdt@gmail.com>
Date: Tue, 9 Jul 2024 20:50:53 +0200
Subject: [PATCH 15/16] fix tests

---
 .github/workflows/tests.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 989aedc2..8540c660 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -33,5 +33,4 @@ jobs:
 
         - name: Run Python tests
           run: |
-            cd fastmlx/
-            pytest -s ./tests
+            pytest -s .

From 540b3a6b60a706dd60f5ee82e8e35dc64a0b6163 Mon Sep 17 00:00:00 2001
From: Prince Canuma <prince.gdt@gmail.com>
Date: Tue, 9 Jul 2024 20:56:49 +0200
Subject: [PATCH 16/16] update readme description

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d99e71b5..835d42b7 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
 [![image](https://img.shields.io/conda/vn/conda-forge/fastmlx.svg)](https://anaconda.org/conda-forge/fastmlx)
 [![image](https://pyup.io/repos/github/Blaizzy/fastmlx/shield.svg)](https://pyup.io/repos/github/Blaizzy/fastmlx)
 
-**FastMLX is a high performance production ready API to host MLX models.**
+**FastMLX is a high performance production ready API to host MLX models, including Vision Language Models (VLMs) and Language Models (LMs).**
 
 -   Free software: Apache Software License 2.0
 -   Documentation: https://Blaizzy.github.io/fastmlx