diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
new file mode 100644
index 00000000..8b3a87af
--- /dev/null
+++ b/.github/workflows/python-publish.yml
@@ -0,0 +1,35 @@
+# This workflow will upload a Python Package using Twine when a release is created
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
+
+name: Upload Python Package
+
+on:
+  release:
+    types: [published]
+
+permissions:
+  contents: read
+
+jobs:
+  deploy:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python
+      uses: actions/setup-python@v3
+      with:
+        python-version: '3.10'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install build
+    - name: Build package
+      run: python -m build
+    - name: Publish package
+      uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
+      with:
+        user: __token__
+        password: ${{ secrets.PYPI_API_TOKEN }}
+        packages_dir: dist
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
new file mode 100644
index 00000000..8540c660
--- /dev/null
+++ b/.github/workflows/tests.yml
@@ -0,0 +1,36 @@
+name: Test PRs
+
+on:
+    pull_request:
+        branches:
+            - main
+
+jobs:
+    test:
+        runs-on: macos-14
+
+        steps:
+        - name: Checkout code
+          uses: actions/checkout@v2
+
+        - name: Set up Python
+          run: |
+            brew install python@3.10
+            python3 -m venv env
+            source env/bin/activate
+
+
+        - name: Run style checks
+          run: |
+            pip install pre-commit
+            pre-commit run --all
+            if ! git diff --quiet; then echo 'Style checks failed, please install pre-commit and run pre-commit run --all and push the change'; exit 1; fi
+
+        - name: Install dependencies
+          run: |
+            pip install pytest
+            pip install -e .
+
+        - name: Run Python tests
+          run: |
+            pytest -s .
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..67cfefe3
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+.DS_Store
+__pycache__
+*.egg-info
\ No newline at end of file
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 00000000..04427a14
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,11 @@
+repos:
+-   repo: https://github.com/psf/black-pre-commit-mirror
+    rev: 24.2.0
+    hooks:
+    -   id: black
+-   repo: https://github.com/pycqa/isort
+    rev: 5.13.2
+    hooks:
+    -   id: isort
+        args:
+            - --profile=black
\ No newline at end of file
diff --git a/AUTHORS.rst b/AUTHORS.rst
new file mode 100644
index 00000000..dffae7a0
--- /dev/null
+++ b/AUTHORS.rst
@@ -0,0 +1,13 @@
+=======
+Credits
+=======
+
+Development Lead
+----------------
+
+* Prince Canuma <prince.gdt@gmail.com>
+
+Contributors
+------------
+
+None yet. Why not be the first?
diff --git a/LICENSE b/LICENSE
index 261eeb9e..b6cd1934 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,201 +1,16 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
+Apache Software License 2.0
 
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+Copyright (c) 2024, Prince Canuma
 
-   1. Definitions.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
 
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
+http://www.apache.org/licenses/LICENSE-2.0
 
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
 
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 00000000..89411aa6
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,7 @@
+include LICENSE
+include README.md
+include requirements.txt
+
+recursive-exclude * __pycache__
+recursive-exclude * *.py[co]
+
diff --git a/README.md b/README.md
index 1227361b..835d42b7 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,109 @@
-# FastMLX
+# fastmlX
 
-FastMLX is a high performance production ready API to host MLX models.
+[![image](https://img.shields.io/pypi/v/fastmlx.svg)](https://pypi.python.org/pypi/fastmlx)
+[![image](https://img.shields.io/conda/vn/conda-forge/fastmlx.svg)](https://anaconda.org/conda-forge/fastmlx)
+[![image](https://pyup.io/repos/github/Blaizzy/fastmlx/shield.svg)](https://pyup.io/repos/github/Blaizzy/fastmlx)
+
+**FastMLX is a high performance production ready API to host MLX models, including Vision Language Models (VLMs) and Language Models (LMs).**
+
+-   Free software: Apache Software License 2.0
+-   Documentation: https://Blaizzy.github.io/fastmlx
+
+## Features
+
+- **OpenAI-compatible API**: Easily integrate with existing applications that use OpenAI's API.
+- **Dynamic Model Loading**: Load MLX models on-the-fly or use pre-loaded models for better performance.
+- **Support for Multiple Model Types**: Compatible with various MLX model architectures.
+- **Image Processing Capabilities**: Handle both text and image inputs for versatile model interactions.
+- **Efficient Resource Management**: Optimized for high-performance and scalability.
+- **Error Handling**: Robust error management for production environments.
+- **Customizable**: Easily extendable to accommodate specific use cases and model types.
+
+## Usage
+
+1. **Installation**
+
+   ```bash
+   pip install fastmlx
+   ```
+
+2. **Running the Server**
+
+   Start the FastMLX server:
+   ```bash
+   fastmlx
+   ```
+   or
+
+   ```bash
+   uvicorn fastmlx:app --reload
+   ```
+
+3. **Making API Calls**
+
+   Use the API similar to OpenAI's chat completions:
+
+   **Vision Language Model**
+
+   ```python
+   import requests
+   import json
+
+   url = "http://localhost:8000/v1/chat/completions"
+   headers = {"Content-Type": "application/json"}
+   data = {
+       "model": "mlx-community/nanoLLaVA-1.5-4bit",
+       "image": "http://images.cocodataset.org/,val2017/000000039769.jpg"
+       "messages": [{"role": "user", "content": "What are these"}],
+       "max_tokens": 100
+   }
+
+   response = requests.post(url, headers=headers, data=json.dumps(data))
+   print(response.json())
+   ```
+   **Language Model**
+   ```python
+   import requests
+   import json
+
+   url = "http://localhost:8000/v1/chat/completions"
+   headers = {"Content-Type": "application/json"}
+   data = {
+       "model": "mlx-community/gemma-2-9b-it-4bit",
+       "messages": [{"role": "user", "content": "What is the capital of France?"}],
+       "max_tokens": 100
+   }
+
+   response = requests.post(url, headers=headers, data=json.dumps(data))
+   print(response.json())
+   ```
+
+4. **Adding a New Model**
+
+   You can add new models to the API:
+
+   ```python
+   import requests
+
+   url = "http://localhost:8000/v1/models"
+   params = {
+       "model_name": "hf-repo-or-path",
+   }
+
+   response = requests.post(url, params=params)
+   print(response.json())
+   ```
+
+5. **Listing Available Models**
+
+   To see all available models:
+
+   ```python
+   import requests
+
+   url = "http://localhost:8000/v1/models"
+   response = requests.get(url)
+   print(response.json())
+   ```
+
+For more detailed usage instructions and API documentation, please refer to the [full documentation](https://Blaizzy.github.io/fastmlx).
\ No newline at end of file
diff --git a/docs/authors.rst b/docs/authors.rst
new file mode 100644
index 00000000..e122f914
--- /dev/null
+++ b/docs/authors.rst
@@ -0,0 +1 @@
+.. include:: ../AUTHORS.rst
diff --git a/docs/changelog.md b/docs/changelog.md
new file mode 100644
index 00000000..289e2c52
--- /dev/null
+++ b/docs/changelog.md
@@ -0,0 +1,11 @@
+# Changelog
+
+## v0.0.1 - Date
+
+**Improvement**:
+
+-   TBD
+
+**New Features**:
+
+-   TBD
diff --git a/docs/common.md b/docs/common.md
new file mode 100644
index 00000000..8d5152a8
--- /dev/null
+++ b/docs/common.md
@@ -0,0 +1,3 @@
+# common module
+
+::: fastmlx.common
\ No newline at end of file
diff --git a/docs/contributing.md b/docs/contributing.md
new file mode 100644
index 00000000..52aa69a8
--- /dev/null
+++ b/docs/contributing.md
@@ -0,0 +1,108 @@
+# Contributing
+
+Contributions are welcome, and they are greatly appreciated! Every
+little bit helps, and credit will always be given.
+
+You can contribute in many ways:
+
+## Types of Contributions
+
+### Report Bugs
+
+Report bugs at <https://github.com/Blaizzy/fastmlx/issues>.
+
+If you are reporting a bug, please include:
+
+-   Your operating system name and version.
+-   Any details about your local setup that might be helpful in troubleshooting.
+-   Detailed steps to reproduce the bug.
+
+### Fix Bugs
+
+Look through the GitHub issues for bugs. Anything tagged with `bug` and
+`help wanted` is open to whoever wants to implement it.
+
+### Implement Features
+
+Look through the GitHub issues for features. Anything tagged with
+`enhancement` and `help wanted` is open to whoever wants to implement it.
+
+### Write Documentation
+
+fastmlx could always use more documentation,
+whether as part of the official fastmlx docs,
+in docstrings, or even on the web in blog posts, articles, and such.
+
+### Submit Feedback
+
+The best way to send feedback is to file an issue at
+<https://github.com/Blaizzy/fastmlx/issues>.
+
+If you are proposing a feature:
+
+-   Explain in detail how it would work.
+-   Keep the scope as narrow as possible, to make it easier to implement.
+-   Remember that this is a volunteer-driven project, and that contributions are welcome :)
+
+## Get Started!
+
+Ready to contribute? Here's how to set up fastmlx for local development.
+
+1.  Fork the fastmlx repo on GitHub.
+
+2.  Clone your fork locally:
+
+    ```shell
+    $ git clone git@github.com:your_name_here/fastmlx.git
+    ```
+
+3.  Install your local copy into a virtualenv. Assuming you have
+    virtualenvwrapper installed, this is how you set up your fork for
+    local development:
+
+    ```shell
+    $ mkvirtualenv fastmlx
+    $ cd fastmlx/
+    $ python setup.py develop
+    ```
+
+4.  Create a branch for local development:
+
+    ```shell
+    $ git checkout -b name-of-your-bugfix-or-feature
+    ```
+
+    Now you can make your changes locally.
+
+5.  When you're done making changes, check that your changes pass flake8
+    and the tests, including testing other Python versions with tox:
+
+    ```shell
+    $ flake8 fastmlx tests
+    $ python setup.py test or pytest
+    $ tox
+    ```
+
+    To get flake8 and tox, just pip install them into your virtualenv.
+
+6.  Commit your changes and push your branch to GitHub:
+
+    ```shell
+    $ git add .
+    $ git commit -m "Your detailed description of your changes."
+    $ git push origin name-of-your-bugfix-or-feature
+    ```
+
+7.  Submit a pull request through the GitHub website.
+
+## Pull Request Guidelines
+
+Before you submit a pull request, check that it meets these guidelines:
+
+1.  The pull request should include tests.
+2.  If the pull request adds functionality, the docs should be updated.
+    Put your new functionality into a function with a docstring, and add
+    the feature to the list in README.rst.
+3.  The pull request should work for Python 3.8 and later, and
+    for PyPy. Check <https://github.com/Blaizzy/fastmlx/pull_requests> and make sure that the tests pass for all
+    supported Python versions.
diff --git a/docs/examples/intro.ipynb b/docs/examples/intro.ipynb
new file mode 100644
index 00000000..ebd171a6
--- /dev/null
+++ b/docs/examples/intro.ipynb
@@ -0,0 +1,21 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print('Hello World!')"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs/faq.md b/docs/faq.md
new file mode 100644
index 00000000..4514b4c1
--- /dev/null
+++ b/docs/faq.md
@@ -0,0 +1 @@
+# FAQ
diff --git a/docs/fastmlx.md b/docs/fastmlx.md
new file mode 100644
index 00000000..f2a80faf
--- /dev/null
+++ b/docs/fastmlx.md
@@ -0,0 +1,4 @@
+ 
+# fastmlx module
+
+::: fastmlx.fastmlx
\ No newline at end of file
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 00000000..18c5f86d
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,18 @@
+# Welcome to fastmlx
+
+
+[![image](https://img.shields.io/pypi/v/fastmlx.svg)](https://pypi.python.org/pypi/fastmlx)
+
+[![image](https://pyup.io/repos/github/Blaizzy/fastmlx/shield.svg)](https://pyup.io/repos/github/Blaizzy/fastmlx)
+
+
+**FastMLX is a high performance production ready API to host MLX models.**
+
+
+-   Free software: Apache Software License 2.0
+-   Documentation: <https://Blaizzy.github.io/fastmlx>
+    
+
+## Features
+
+-   TODO
diff --git a/docs/installation.md b/docs/installation.md
new file mode 100644
index 00000000..981f8f17
--- /dev/null
+++ b/docs/installation.md
@@ -0,0 +1,21 @@
+# Installation
+
+## Stable release
+
+To install fastmlx, run this command in your terminal:
+
+```
+pip install fastmlx
+```
+
+This is the preferred method to install fastmlx, as it will always install the most recent stable release.
+
+If you don't have [pip](https://pip.pypa.io) installed, this [Python installation guide](http://docs.python-guide.org/en/latest/starting/installation/) can guide you through the process.
+
+## From sources
+
+To install fastmlx from sources, run this command in your terminal:
+
+```
+pip install git+https://github.com/Blaizzy/fastmlx
+```
diff --git a/docs/overrides/main.html b/docs/overrides/main.html
new file mode 100644
index 00000000..702c96bf
--- /dev/null
+++ b/docs/overrides/main.html
@@ -0,0 +1,11 @@
+{% extends "base.html" %}
+
+{% block content %}
+{% if page.nb_url %}
+    <a href="{{ page.nb_url }}" title="Download Notebook" class="md-content__button md-icon">
+        {% include ".icons/material/download.svg" %}
+    </a>
+{% endif %}
+
+{{ super() }}
+{% endblock content %}
diff --git a/docs/usage.md b/docs/usage.md
new file mode 100644
index 00000000..7f6d34e2
--- /dev/null
+++ b/docs/usage.md
@@ -0,0 +1,7 @@
+# Usage
+
+To use fastmlx in a project:
+
+```
+import fastmlx
+```
diff --git a/fastmlx/__init__.py b/fastmlx/__init__.py
new file mode 100644
index 00000000..ed220226
--- /dev/null
+++ b/fastmlx/__init__.py
@@ -0,0 +1,7 @@
+"""Top-level package for fastmlx."""
+
+__author__ = """Prince Canuma"""
+__email__ = "prince.gdt@gmail.com"
+__version__ = "0.0.1"
+
+from .fastmlx import *
diff --git a/fastmlx/fastmlx.py b/fastmlx/fastmlx.py
new file mode 100644
index 00000000..fd878760
--- /dev/null
+++ b/fastmlx/fastmlx.py
@@ -0,0 +1,181 @@
+"""Main module."""
+
+import os
+import time
+from typing import List, Optional
+
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, Field
+
+try:
+    import mlx.core as mx
+    from mlx_lm import generate as lm_generate
+    from mlx_vlm import generate as vlm_generate
+    from mlx_vlm.prompt_utils import get_message_json
+    from mlx_vlm.utils import load_config
+
+    from .utils import MODEL_REMAPPING, MODELS, load_lm_model, load_vlm_model
+
+    MLX_AVAILABLE = True
+except ImportError:
+    print("Warning: mlx or mlx_lm not available. Some functionality will be limited.")
+    MLX_AVAILABLE = False
+
+
+class ModelProvider:
+    def __init__(self):
+        self.models = {}
+
+    def load_model(self, model_name: str):
+        if model_name not in self.models:
+            config = load_config(model_name)
+            model_type = MODEL_REMAPPING.get(config["model_type"], config["model_type"])
+            if model_type in MODELS["vlm"]:
+                self.models[model_name] = load_vlm_model(model_name, config)
+            else:
+                self.models[model_name] = load_lm_model(model_name, config)
+
+        return self.models[model_name]
+
+    def get_available_models(self):
+        return list(self.models.keys())
+
+
+class ChatMessage(BaseModel):
+    role: str
+    content: str
+
+
+class ChatCompletionRequest(BaseModel):
+    model: str
+    messages: List[ChatMessage]
+    image: Optional[str] = Field(default=None)
+    max_tokens: Optional[int] = Field(default=100)
+    temperature: Optional[float] = Field(default=0.7)
+
+
+class ChatCompletionResponse(BaseModel):
+    id: str
+    object: str = "chat.completion"
+    created: int
+    model: str
+    choices: List[dict]
+
+
+app = FastAPI()
+
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# Initialize the ModelProvider
+model_provider = ModelProvider()
+
+
+@app.post("/v1/chat/completions", response_model=ChatCompletionResponse)
+async def chat_completion(request: ChatCompletionRequest):
+    if not MLX_AVAILABLE:
+        raise HTTPException(status_code=500, detail="MLX library not available")
+
+    model_data = model_provider.load_model(request.model)
+    model = model_data["model"]
+    config = model_data["config"]
+    model_type = MODEL_REMAPPING.get(config["model_type"], config["model_type"])
+
+    if model_type in MODELS["vlm"]:
+        processor = model_data["processor"]
+        image_processor = model_data["image_processor"]
+
+        image = request.image
+
+        chat_messages = []
+
+        for msg in request.messages:
+            if msg.role == "user":
+                chat_messages.append(
+                    get_message_json(config["model_type"], msg.content)
+                )
+            else:
+                chat_messages.append({"role": msg.role, "content": msg.content})
+
+        prompt = ""
+        if "chat_template" in processor.__dict__.keys():
+            prompt = processor.apply_chat_template(
+                chat_messages,
+                tokenize=False,
+                add_generation_prompt=True,
+            )
+
+        elif "tokenizer" in processor.__dict__.keys():
+            if model.config.model_type != "paligemma":
+                prompt = processor.tokenizer.apply_chat_template(
+                    chat_messages,
+                    tokenize=False,
+                    add_generation_prompt=True,
+                )
+            else:
+                prompt = request.messages[-1].content
+
+        # Generate the response
+        output = vlm_generate(
+            model, processor, image, prompt, image_processor, verbose=False
+        )
+
+    else:
+        tokenizer = model_data["tokenizer"]
+        chat_messages = [
+            {"role": msg.role, "content": msg.content} for msg in request.messages
+        ]
+        if "chat_template" in tokenizer.__dict__.keys():
+            prompt = tokenizer.apply_chat_template(
+                chat_messages,
+                tokenize=False,
+                add_generation_prompt=True,
+            )
+        else:
+            prompt = request.messages[-1].content
+
+        output = lm_generate(model, tokenizer, prompt, verbose=False)
+
+    # Prepare the response
+    response = ChatCompletionResponse(
+        id=f"chatcmpl-{os.urandom(4).hex()}",
+        created=int(time.time()),
+        model=request.model,
+        choices=[
+            {
+                "index": 0,
+                "message": {"role": "assistant", "content": output},
+                "finish_reason": "stop",
+            }
+        ],
+    )
+
+    return response
+
+
+@app.get("/v1/models")
+async def list_models():
+    return {"models": model_provider.get_available_models()}
+
+
+@app.post("/v1/models")
+async def add_model(model_name: str):
+    model_provider.load_model(model_name)
+    return {"status": "success", "message": f"Model {model_name} added successfully"}
+
+
+def run():
+    import uvicorn
+
+    uvicorn.run("fastmlx:app", host="127.0.0.1", port=8000, reload=True)
+
+
+if __name__ == "__main__":
+    run()
diff --git a/fastmlx/utils.py b/fastmlx/utils.py
new file mode 100644
index 00000000..6f75205f
--- /dev/null
+++ b/fastmlx/utils.py
@@ -0,0 +1,57 @@
+import os
+from typing import Any, Dict
+
+# MLX Imports
+try:
+    from mlx_lm import load as lm_load
+    from mlx_lm import models as lm_models
+    from mlx_vlm import load as vlm_load
+    from mlx_vlm import models as vlm_models
+    from mlx_vlm.utils import load_image_processor
+except ImportError:
+    print("Warning: mlx or mlx_lm not available. Some functionality will be limited.")
+
+
+def get_model_type_list(models, type="vlm"):
+
+    # Get the directory path of the models package
+    models_dir = os.path.dirname(models.__file__)
+
+    # List all items in the models directory
+    all_items = os.listdir(models_dir)
+
+    if type == "vlm":
+        submodules = [
+            item
+            for item in all_items
+            if os.path.isdir(os.path.join(models_dir, item))
+            and not item.startswith(".")
+            and item != "__pycache__"
+        ]
+        return submodules
+    else:
+        return all_items
+
+
+MODELS = {
+    "vlm": get_model_type_list(vlm_models),
+    "lm": get_model_type_list(lm_models, "lm"),
+}
+MODEL_REMAPPING = {"llava-qwen2": "llava_bunny", "bunny-llama": "llava_bunny"}
+
+
+# Model Loading and Generation Functions
+def load_vlm_model(model_name: str, config: Dict[str, Any]) -> Dict[str, Any]:
+    model, processor = vlm_load(model_name, {"trust_remote_code": True})
+    image_processor = load_image_processor(model_name)
+    return {
+        "model": model,
+        "processor": processor,
+        "image_processor": image_processor,
+        "config": config,
+    }
+
+
+def load_lm_model(model_name: str, config: Dict[str, Any]) -> Dict[str, Any]:
+    model, tokenizer = lm_load(model_name)
+    return {"model": model, "tokenizer": tokenizer, "config": config}
diff --git a/mkdocs.yml b/mkdocs.yml
new file mode 100644
index 00000000..9599fa35
--- /dev/null
+++ b/mkdocs.yml
@@ -0,0 +1,86 @@
+site_name: fastmlx
+site_description: FastMLX is a high performance production ready API to host MLX models.
+site_author: Blaizzy
+site_url: https://Blaizzy.github.io/fastmlx
+repo_url: https://github.com/Blaizzy/fastmlx
+
+copyright: "Copyright &copy; 2024 - 2024 Prince Canuma"
+
+theme:
+    palette:
+        - scheme: default
+          #   primary: blue
+          #   accent: indigo
+          toggle:
+              icon: material/toggle-switch-off-outline
+              name: Switch to dark mode
+        - scheme: slate
+          primary: indigo
+          accent: indigo
+          toggle:
+              icon: material/toggle-switch
+              name: Switch to light mode
+    name: material
+    icon:
+        repo: fontawesome/brands/github
+    # logo: assets/logo.png
+    # favicon: assets/favicon.png
+    features:
+        - navigation.instant
+        - navigation.tracking
+        - navigation.top
+        - search.highlight
+        - search.share
+    custom_dir: "docs/overrides"
+    font:
+        text: Google Sans
+        code: Regular
+
+plugins:
+    - search
+    - mkdocstrings
+    - git-revision-date
+    - git-revision-date-localized:
+          enable_creation_date: true
+          type: timeago
+    # - pdf-export
+    - mkdocs-jupyter:
+          include_source: True
+          ignore_h1_titles: True
+          execute: True
+          allow_errors: false
+          ignore: ["conf.py"]
+          execute_ignore: ["*ignore.ipynb"]
+          
+markdown_extensions:
+    - admonition
+    - abbr
+    - attr_list
+    - def_list
+    - footnotes
+    - meta
+    - md_in_html
+    - pymdownx.superfences
+    - pymdownx.highlight:
+          linenums: true
+    - toc:
+          permalink: true
+
+# extra:
+#     analytics:
+#         provider: google
+#         property: UA-XXXXXXXXX-X
+
+nav:
+    - Home: index.md
+    - Installation: installation.md
+    - Usage: usage.md
+    - Contributing: contributing.md
+    - FAQ: faq.md
+    - Changelog: changelog.md
+    - Report Issues: https://github.com/Blaizzy/fastmlx/issues
+    - Examples:
+        - examples/intro.ipynb
+    - API Reference:
+          - fastmlx module: fastmlx.md
+          - common module: common.md
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..9aa1edb9
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,82 @@
+[project]
+name = "fastmlx"
+version = "0.0.1"
+dynamic = [
+    "dependencies",
+]
+description = "FastMLX is a high performance production ready API to host MLX models."
+readme = "README.md"
+requires-python = ">=3.8"
+keywords = [
+    "fastmlx",
+]
+license = {text = "Apache Software License 2.0"}
+authors = [
+  {name = "Prince Canuma", email = "prince.gdt@gmail.com"},
+]
+classifiers = [
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: Apache Software License 2.0",
+    "Natural Language :: English",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+]
+
+[project.entry-points."console_scripts"]
+fastmlx = "fastmlx.fastmlx:run"
+
+[project.optional-dependencies]
+all = [
+    "fastmlx[extra]",
+]
+
+extra = [
+    "pandas",
+]
+
+
+[tool]
+[tool.setuptools.packages.find]
+include = ["fastmlx*"]
+exclude = ["docs*"]
+
+[tool.setuptools.dynamic]
+dependencies = {file = ["requirements.txt"]}
+
+
+[tool.distutils.bdist_wheel]
+universal = true
+
+
+[tool.bumpversion]
+current_version = "0.0.1"
+commit = true
+tag = true
+
+[[tool.bumpversion.files]]
+filename = "pyproject.toml"
+search = 'version = "{current_version}"'
+replace = 'version = "{new_version}"'
+
+[[tool.bumpversion.files]]
+filename = "fastmlx/__init__.py"
+search = '__version__ = "{current_version}"'
+replace = '__version__ = "{new_version}"'
+
+
+[tool.flake8]
+exclude = [
+    "docs",
+]
+max-line-length = 88
+
+
+[project.urls]
+Homepage = "https://github.com/Blaizzy/fastmlx"
+
+[build-system]
+requires = ["setuptools>=64", "setuptools_scm>=8"]
+build-backend = "setuptools.build_meta"
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 00000000..c7d4791c
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+mlx>=0.15
+mlx-lm>=0.15.2
+mlx-vlm>=0.0.11
+fastapi>=0.111.0
+jinja2
\ No newline at end of file
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 00000000..1e1ee754
--- /dev/null
+++ b/tests/__init__.py
@@ -0,0 +1 @@
+"""Unit test package for fastmlx."""
diff --git a/tests/test_fastmlx.py b/tests/test_fastmlx.py
new file mode 100644
index 00000000..ed744402
--- /dev/null
+++ b/tests/test_fastmlx.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python
+
+"""Tests for `fastmlx` package."""
+
+import json
+from unittest.mock import MagicMock, patch
+
+import pytest
+from fastapi.testclient import TestClient
+
+# Import the actual classes and functions
+from fastmlx import ChatCompletionRequest, ChatMessage, ModelProvider, app
+
+
+# Create mock classes that inherit from the original classes
+class MockModelProvider(ModelProvider):
+    def __init__(self):
+        super().__init__()
+        self.models = {}
+
+    def load_model(self, model_name: str):
+        if model_name not in self.models:
+            model_type = "vlm" if "llava" in model_name.lower() else "lm"
+            self.models[model_name] = {
+                "model": MagicMock(),
+                "processor": MagicMock(),
+                "tokenizer": MagicMock(),
+                "image_processor": MagicMock() if model_type == "vlm" else None,
+                "config": {"model_type": model_type},
+            }
+        return self.models[model_name]
+
+    def get_available_models(self):
+        return list(self.models.keys())
+
+
+# Mock MODELS dictionary
+MODELS = {"vlm": ["llava"], "lm": ["phi"]}
+
+
+# Mock functions
+def mock_generate(*args, **kwargs):
+    return "generated response"
+
+
+@pytest.fixture(scope="module")
+def client():
+    # Apply patches
+    with patch("fastmlx.fastmlx.model_provider", MockModelProvider()), patch(
+        "fastmlx.fastmlx.vlm_generate", mock_generate
+    ), patch("fastmlx.fastmlx.lm_generate", mock_generate), patch(
+        "fastmlx.fastmlx.MODELS", MODELS
+    ):
+        yield TestClient(app)
+
+
+def test_chat_completion_vlm(client):
+    request = ChatCompletionRequest(
+        model="test_llava_model",
+        messages=[ChatMessage(role="user", content="Hello")],
+        image="test_image",
+    )
+    response = client.post(
+        "/v1/chat/completions", json=json.loads(request.model_dump_json())
+    )
+
+    assert response.status_code == 200
+    assert "generated response" in response.json()["choices"][0]["message"]["content"]
+
+
+def test_chat_completion_lm(client):
+    request = ChatCompletionRequest(
+        model="test_phi_model", messages=[ChatMessage(role="user", content="Hello")]
+    )
+    response = client.post(
+        "/v1/chat/completions", json=json.loads(request.model_dump_json())
+    )
+
+    assert response.status_code == 200
+    assert "generated response" in response.json()["choices"][0]["message"]["content"]
+
+
+def test_list_models(client):
+    client.post("/v1/models?model_name=test_llava_model")
+    client.post("/v1/models?model_name=test_phi_model")
+
+    response = client.get("/v1/models")
+
+    assert response.status_code == 200
+    assert set(response.json()["models"]) == {"test_llava_model", "test_phi_model"}
+
+
+def test_add_model(client):
+    response = client.post("/v1/models?model_name=new_llava_model")
+
+    assert response.status_code == 200
+    assert response.json() == {
+        "status": "success",
+        "message": "Model new_llava_model added successfully",
+    }
+
+
+if __name__ == "__main__":
+    pytest.main(["-v", __file__])