diff --git a/.github/workflows/code-check.yml b/.github/workflows/code-check.yml
index 1ac760d..246f097 100644
--- a/.github/workflows/code-check.yml
+++ b/.github/workflows/code-check.yml
@@ -6,21 +6,21 @@ jobs:
   pre-commit:
     strategy:
       matrix:
-        python-version: ['3.10', '3.11', '3.12']
+        python-version: ["3.10", "3.11"]
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v2
-    - uses: eifinger/setup-rye@v1
-      with:
-        enable-cache: true
-        cache-prefix: 'venv-funcchain'
-    - name: pin version
-      run: rye pin ${{ matrix.python-version }}
-    - name: Sync rye
-      run: rye sync
-    - name: Run pre-commit
-      run: rye run pre-commit run --all-files
-    - name: Run tests
-      env:
-        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-      run: rye run pytest -m "not skip_on_actions"
+      - uses: actions/checkout@v2
+      - uses: eifinger/setup-rye@v1
+        with:
+          enable-cache: true
+          cache-prefix: "venv-funcchain"
+      - name: pin version
+        run: rye pin ${{ matrix.python-version }}
+      - name: Sync rye
+        run: rye sync
+      - name: Run pre-commit
+        run: rye run pre-commit run --all-files
+      - name: Run tests
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        run: rye run pytest -m "not skip_on_actions"
diff --git a/.gitignore b/.gitignore
index 8cb8a54..e326d92 100644
--- a/.gitignore
+++ b/.gitignore
@@ -165,3 +165,4 @@ cython_debug/
 
 vscext
 .models
+.python-version
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 25b11eb..3cd30bf 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,23 +1,14 @@
 repos:
+    - repo: https://github.com/pre-commit/pre-commit-hooks
+      rev: v4.5.0
+      hooks:
+          - id: end-of-file-fixer
+          - id: trailing-whitespace
 
--   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.5.0
-    hooks:
-    -   id: check-yaml
-    -   id: end-of-file-fixer
-    -   id: trailing-whitespace
-
--   repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.7.1
-    hooks:
-    -   id: mypy
-        args: [--ignore-missing-imports, --follow-imports=skip]
-        additional_dependencies: [types-requests]
-
--   repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.1.7
-    hooks:
-    -   id: ruff
-        args: [ --fix ]
-    -   id: ruff-format
-        types_or: [ python, pyi, jupyter ]
+    - repo: https://github.com/astral-sh/ruff-pre-commit
+      rev: v0.1.7
+      hooks:
+          - id: ruff
+            args: [--fix]
+          - id: ruff-format
+            types_or: [python, pyi, jupyter]
diff --git a/.python-version b/.python-version
deleted file mode 100644
index eb07499..0000000
--- a/.python-version
+++ /dev/null
@@ -1 +0,0 @@
-cpython@3.12.0
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..b059829
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,11 @@
+# Contributing
+
+To contribute, clone the repo and run:
+
+```bash
+./dev_setup.sh
+```
+
+You should not run unstrusted scripts so ask ChatGPT to explain what the contents of this script do!
+
+This will install and setup your development environment using [rye](https://rye-up.com) or pip.
diff --git a/MODELS.md b/MODELS.md
index f865603..5958f32 100644
--- a/MODELS.md
+++ b/MODELS.md
@@ -2,10 +2,10 @@
 
 ## LangChain Chat Models
 
-You can set the `settings.llm` with any ChatModel the LangChain library.
+You can set the `settings.llm` with any LangChain ChatModel.
 
 ```python
-from langchain.chat_models import AzureChatOpenAI
+from langchain_openai.chat_models import AzureChatOpenAI
 
 settings.llm = AzureChatOpenAI(...)
 ```
@@ -16,23 +16,28 @@ You can also set the `settings.llm` with a string identifier of a ChatModel incl
 
 ### Schema
 
-`<provider>/<name>:<optional_label>`
+`<provider>/<model_name>:<optional_label>`
 
 ### Providers
 
 - `openai`: OpenAI Chat Models
-- `gguf`: Huggingface GGUF Models from TheBloke using LlamaCpp
-- `local` | `thebloke` | `huggingface`: alias for `gguf`
+- `llamacpp`: Run local models directly using llamacpp (alias: `thebloke`, `gguf`)
+- `ollama`: Run local models through Ollama (wrapper for llamacpp)
+- `azure`: Azure Chat Models
+- `anthropic`: Anthropic Chat Models
+- `google`: Google Chat Models
 
 ### Examples
 
-- `openai/gpt-3.5-turbo`: Classic ChatGPT
-- `gguf/deepseek-llm-7b-chat`: DeepSeek LLM 7B Chat
-- `gguf/OpenHermes-2.5-7B`: OpenHermes 2.5
-- `TheBloke/deepseek-llm-7B-chat-GGUF:Q3_K_M`: (eg thebloke huggingface identifier)
-- `local/neural-chat-7B-v3-1`: Neural Chat 7B (local as alias for gguf)
+- `openai/gpt-3.5-turbo`: ChatGPT Classic
+- `openai/gpt-4-1106-preview`: GPT-4-Turbo
+- `ollama/openchat`: OpenChat3.5-1210
+- `ollama/openhermes2.5-mistral`: OpenHermes 2.5
+- `llamacpp/openchat-3.5-1210`: OpenChat3.5-1210
+- `TheBloke/Nous-Hermes-2-SOLAR-10.7B-GGUF`: alias for `llamacpp/...`
+- `TheBloke/openchat-3.5-0106-GGUF:Q3_K_L`: with Q label
 
 ### additional notes
 
-Checkout the file `src/funcchain/utils/model_defaults.py` for the code that parses the string identifier.
+Checkout the file `src/funcchain/model/defaults.py` for the code that parses the string identifier.
 Feel free to create a PR to add more models to the defaults. Or tell me how wrong I am and create a better system.
diff --git a/README.md b/README.md
index 968e0cc..e14573f 100644
--- a/README.md
+++ b/README.md
@@ -8,13 +8,14 @@
 [![Twitter Follow](https://img.shields.io/twitter/follow/shroominic?style=social)](https://x.com/shroominic)
 
 ```bash
-> pip install "funcchain[all]"
+pip install funcchain
 ```
 
 ## Introduction
 
 `funcchain` is the *most pythonic* way of writing cognitive systems. Leveraging pydantic models as output schemas combined with langchain in the backend allows for a seamless integration of llms into your apps.
-It works perfect with OpenAI Functions or LlamaCpp grammars (json-schema-mode).
+It utilizes perfect with OpenAI Functions or LlamaCpp grammars (json-schema-mode) for efficient structured output.
+In the backend it compiles the funcchain syntax into langchain runnables so you can easily invoke, stream or batch process your pipelines.
 
 [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/ricklamers/funcchain-demo)
 
@@ -94,7 +95,7 @@ match lst:
 ## Vision Models
 
 ```python
-from PIL import Image
+from funcchain import Image
 from pydantic import BaseModel, Field
 from funcchain import chain, settings
 
@@ -132,7 +133,7 @@ from pydantic import BaseModel, Field
 from funcchain import chain, settings
 
 # auto-download the model from huggingface
-settings.llm = "gguf/openhermes-2.5-mistral-7b"
+settings.llm = "ollama/openchat"
 
 class SentimentAnalysis(BaseModel):
     analysis: str
@@ -153,32 +154,38 @@ print(poem.analysis)
 
 ## Features
 
-- minimalistic and easy to use
-- easy swap between openai and local models
-- write prompts as python functions
-- pydantic models for output schemas
-- langchain core in the backend
-- fstrings or jinja templates for prompts
-- fully utilises OpenAI Functions or LlamaCpp Grammars
+- pythonic
+- easy swap between openai or local models
+- dynamic output types (pydantic models, or primitives)
+- vision llm support
+- langchain_core as backend
+- jinja templating for prompts
+- reliable structured output
+- auto retry parsing
 - langsmith support
-- async and pythonic
-- auto gguf model download from huggingface
-- streaming support
+- sync, async, streaming, parallel, fallbacks
+- gguf download from huggingface
+- type hints for all functions and mypy support
+- chat router component
+- composable with langchain LCEL
+- easy error handling
+- enums and literal support
+- custom parsing types
 
 ## Documentation
 
-Highly recommend to try out the examples in the `./examples` folder.
+[Checkout the docs here](https://shroominic.github.io/funcchain/) 👈
 
-Coming soon... feel free to add helpful .md files :)
+Also highly recommend to try and run the examples in the `./examples` folder.
 
 ## Contribution
 
-You want to contribute? That's great! Please run the dev setup to get started:
+You want to contribute? Thanks, that's great!
+For more information checkout the [Contributing Guide](docs/contributing/dev-setup.md).
+Please run the dev setup to get started:
 
 ```bash
-> git clone https://github.com/shroominic/funcchain.git && cd funcchain
+git clone https://github.com/shroominic/funcchain.git && cd funcchain
 
-> ./dev_setup.sh
+./dev_setup.sh
 ```
-
-Thanks!
diff --git a/dev-setup.sh b/dev-setup.sh
index fee5319..ebd1b1d 100755
--- a/dev-setup.sh
+++ b/dev-setup.sh
@@ -3,16 +3,42 @@
 # check if rye is installed
 if ! command -v rye &> /dev/null
 then
-    echo "rye could not be found: installing now ..."
-    curl -sSf https://rye-up.com/get | bash
-    echo "Check the rye docs for more info: https://rye-up.com/"
+    echo "rye could not be found"
+    echo "Would you like to install via rye or pip? Enter 'rye' or 'pip':"
+    read install_method
+    clear
+
+    if [ "$install_method" = "rye" ]
+    then
+        echo "Installing via rye now ..."
+        curl -sSf https://rye-up.com/get | bash
+        echo "Check the rye docs for more info: https://rye-up.com/"
+
+    elif [ "$install_method" = "pip" ]
+    then
+        echo "Installing via pip now ..."
+        python3 -m venv .venv
+        source .venv/bin/activate
+        pip install -r requirements.lock
+
+    else
+        echo "Invalid option. Please run the script again and enter 'rye' or 'pip'."
+        exit 1
+    fi
+
+    clear
 fi
 
-echo "SYNC: setup .venv"
-rye sync
+if [ "$install_method" = "rye" ]
+then
+    echo "SYNC: setup .venv"
+    rye sync
+
+    echo "ACTIVATE: activate .venv"
+    rye shell
 
-echo "ACTIVATE: activate .venv"
-rye shell
+    clear
+fi
 
 echo "SETUP: install pre-commit hooks"
 pre-commit install
diff --git a/docs/advanced/async.md b/docs/advanced/async.md
new file mode 100644
index 0000000..c24a23d
--- /dev/null
+++ b/docs/advanced/async.md
@@ -0,0 +1,39 @@
+# Async
+
+## Why and how to use using async?
+
+Asyncronous promgramming is a way to easily parallelize processes in python.
+This is very useful when dealing with LLMs because every request takes a long time and the python interpreter should do alot of other things in the meantime instead of waiting for the request.
+
+Checkout [this brillian async tutorial](https://fastapi.tiangolo.com/async/) if you never coded in an asyncronous way.
+
+## Async in FuncChain
+
+You can use async in funcchain by creating your functions using `achain()` instead of the normal `chain()`.
+It would then look like this:
+
+```python
+from funcchain import achain
+
+async def generate_poem(topic: str) -> str:
+    """
+    Generate a poem inspired by the given topic.
+    """
+    return await achain()
+```
+
+You can then `await` the async `generate_poem` function inside another async funtion or directly call it using `asyncio.run(generate_poem("birds"))`.
+
+## Async in LangChain
+
+When converting your funcchains into a langchain runnable you can use the native langchain way of async.
+This would be `.ainvoke(...)`, `.astream(...)` or `.abatch(...)` .
+
+## Async Streaming
+
+You can use langchains async streaming interface but also use the `stream_to(...)` wrapper (explained [here](../concepts/streaming.md#strem_to-wrapper)) as an async context manager.
+
+```python
+async with stream_to(...):
+    await ...
+```
diff --git a/docs/advanced/codebase-scaling.md b/docs/advanced/codebase-scaling.md
new file mode 100644
index 0000000..ce79a89
--- /dev/null
+++ b/docs/advanced/codebase-scaling.md
@@ -0,0 +1,9 @@
+# Codebase Scaling
+
+## Multi file projects
+
+### TODO
+
+## Structure
+
+### TODO
diff --git a/docs/advanced/custom-parser-types.md b/docs/advanced/custom-parser-types.md
new file mode 100644
index 0000000..a0168b5
--- /dev/null
+++ b/docs/advanced/custom-parser-types.md
@@ -0,0 +1,21 @@
+# Custom Parsers
+
+## Example
+
+### TODO
+
+## Grammars
+
+### TODO
+
+## Format Instructions
+
+### TODO
+
+## parse() Function
+
+### TODO
+
+## Write your own Parser
+
+### TODO
diff --git a/docs/advanced/customization.md b/docs/advanced/customization.md
new file mode 100644
index 0000000..790fb18
--- /dev/null
+++ b/docs/advanced/customization.md
@@ -0,0 +1,17 @@
+# Customization
+
+## extra args inside chain
+
+### TODO
+
+## low level langchain
+
+### TODO
+
+## extra args inside @runnable
+
+### TODO
+
+## custom ll models
+
+### TODO
diff --git a/docs/advanced/runnables.md b/docs/advanced/runnables.md
new file mode 100644
index 0000000..9e6176c
--- /dev/null
+++ b/docs/advanced/runnables.md
@@ -0,0 +1,9 @@
+# runnables
+
+## LangChain Expression Language (LCEL)
+
+### TODO
+
+## Streaming, Parallel, Async and
+
+### TODO
diff --git a/docs/advanced/signature.md b/docs/advanced/signature.md
new file mode 100644
index 0000000..97a30cd
--- /dev/null
+++ b/docs/advanced/signature.md
@@ -0,0 +1,9 @@
+# Signature
+
+## Compilation
+
+### TODO
+
+## Schema
+
+### TODO
diff --git a/docs/advanced/stream-parsing.md b/docs/advanced/stream-parsing.md
new file mode 100644
index 0000000..6f06e58
--- /dev/null
+++ b/docs/advanced/stream-parsing.md
@@ -0,0 +1,9 @@
+# Stream Parsing
+
+## Transform Output Parsing
+
+### TODO
+
+## Composing Runnables
+
+### TODO
diff --git a/docs/changelog.md b/docs/changelog.md
new file mode 100644
index 0000000..766195f
--- /dev/null
+++ b/docs/changelog.md
@@ -0,0 +1,55 @@
+# Changelog
+
+## [0.2.0] - 2024-01-28
+
+### Added
+
+- todo write about new features
+
+### Changed
+
+- todo write about changes
+
+### Deprecated
+
+- todo write about deprecations
+
+### Removed
+
+- todo write about removals
+
+### Fixed
+
+- todo write about fixes
+
+### Security
+
+- Updated dependencies to mitigate known vulnerabilities.
+
+## [0.1.10] - 2023-12-12
+
+- universal model loader
+- improved configuration
+- SettingOverrides
+- auto gguf model download
+- optional dependencies
+- improve examples
+- other small improvements
+
+## [0.1.0] - [0.1.9] - 2023-12-01
+
+- pydantic v2
+- llamacpp support
+- auto retry parsing
+- jinja templates
+- multiple tiny improvements
+- codebase refactor
+- bug fixes
+
+## [0.0.1] - [0.1.7] - 2023-10-08
+
+- Undocumented Experimental Releases
+
+## [0.0.1] - 2023-08-31
+
+- Initial Release
diff --git a/docs/concepts/chain.md b/docs/concepts/chain.md
index e69de29..bf7f5c4 100644
--- a/docs/concepts/chain.md
+++ b/docs/concepts/chain.md
@@ -0,0 +1,55 @@
+# Chain
+
+## `chain()`
+
+The chain function abstracts away all the magic happening in the funcchain backend. It extracts the docstring, input arguments and return type of the function and compiles everything into a langchain prompt.
+
+```python
+from funcchain import chain
+
+def ask(question: str) -> str:
+    """
+    Answer the given question.
+    """
+    return chain()
+
+ask("What is the capital of Germany?")
+# => "The capital of Germany is Berlin."
+```
+
+## `achain()`
+
+Async version of the `chain()` function.
+
+```python
+import asyncio
+from funcchain import achain
+
+async def ask(question: str) -> str:
+    """
+    Answer the given question.
+    """
+    return await achain()
+
+asyncio.run(ask("What is the capital of Germany?"))
+# => "The capital of Germany is Berlin."
+```
+
+## `@runnable`
+
+The `@runnable` decorator is used to compile a chain function into a langchain runnable object.
+You just write a normal funcchain function using chain() and then decorate it with `@runnable`.
+
+```python
+from funcchain import chain, runnable
+
+@runnable
+def ask(question: str) -> str:
+    """
+    Answer the given question.
+    """
+    return chain()
+
+ask.invoke(input={"question": "What is the capital of Germany?"})
+# => "The capital of Germany is Berlin."
+```
diff --git a/docs/concepts/errors.md b/docs/concepts/errors.md
index e69de29..a9d6661 100644
--- a/docs/concepts/errors.md
+++ b/docs/concepts/errors.md
@@ -0,0 +1,50 @@
+# Errors
+
+## Example
+
+```python
+from funcchain import BaseModel, Error, chain
+from rich import print
+
+class User(BaseModel):
+    name: str
+    email: str | None
+
+def extract_user_info(text: str) -> User | Error:
+    """
+    Extract the user information from the given text.
+    In case you do not have enough infos, return an error.
+    """
+    return chain()
+
+print(extract_user_info("hey what's up?"))
+# => Error(title='Invalid Input', description='The input text does not contain user information.')
+
+print(extract_user_info("I'm John and my email is john@mail.com"))
+# => User(name='John', email='john@mail.com')
+```
+
+## Error Type
+
+(currently only supported for union output types e.g. `Answer | Error` so only openai models)
+
+The Error type is a special type that can be used to return an error from a chain function.
+It is just a pydantic model with a title and description field.
+
+```python
+class Error(BaseModel):
+    """
+    Fallback function for invalid input.
+    If you are unsure on what function to call, use this error function as fallback.
+    This will tell the user that the input is not valid.
+    """
+
+    title: str = Field(description="CamelCase Name titeling the error")
+    description: str = Field(..., description="Short description of the unexpected situation")
+
+    def __raise__(self) -> None:
+        raise Exception(self.description)
+```
+
+You can also create your own error types by inheriting from the Error type.
+Or just do it similar to the example above.
diff --git a/docs/concepts/input.md b/docs/concepts/input.md
index e69de29..bc3b39d 100644
--- a/docs/concepts/input.md
+++ b/docs/concepts/input.md
@@ -0,0 +1,111 @@
+# Input Arguments
+
+Funcchain utilises your function's input arguments including type hints to compile your prompt.
+You can utilise the following types:
+
+## Strings
+
+All string inputs serve as classic prompt placeholders and are replaced with the input value.
+You can insert anything as long as you cast it to a string and the language model will see its as text.
+
+```python
+def create_username(full_name: str, email: str) -> str:
+    """
+    Create a creative username from the given full name and email.
+    """
+    return chain()
+```
+
+All strings that are not mentioned in the instructions are automatically added to the beginning of the prompt.
+
+When calling `create_username("John Doe", "john.doe@gmail.com")` the compiled prompt will look like this:
+
+```html
+<HumanMessage>
+    FULL_NAME:
+    John Doe
+
+    EMAIL:
+    john.doe@gmail.com
+
+    Create a creative username from the given full name and email.
+</HumanMessage>
+```
+
+The language model will then be able to use the input values to generate a good username.
+
+You can also manually format your instructions if you want to have more control over the prompt.
+Use jinja2 syntax to insert the input values.
+
+```python
+def create_username(full_name: str, email: str) -> str:
+    """
+    Create a creative username for {{ full_name }} with the mail {{ email }}.
+    """
+    return chain()
+```
+
+Compiles to:
+
+```html
+<HumanMessage>
+    Create a creative username for John Doe with the mail john.doe@gmail.com.
+</HumanMessage>
+```
+
+## Pydantic Models
+
+You can also use pydantic models as input arguments.
+This is useful if you already have complex data structures that you want to use as input.
+
+```python
+class User(BaseModel):
+    full_name: str
+    email: str
+
+def create_username(user: User) -> str:
+    """
+    Create a creative username from the given user.
+    """
+    return chain()
+```
+
+By default, the pydantic model is converted to a string using the `__str__` method
+and then added to the prompt.
+
+```html
+<HumanMessage>
+    USER:
+    full_name='Herbert Geier' email='hello@bert.com'
+
+    Create a creative username from the given user.
+</HumanMessage>
+```
+
+If you want more control you can override the `__str__` method of your pydantic model.
+Or use jinja2 syntax to manually unpack the model.
+
+```python
+class User(BaseModel):
+    full_name: str
+    email: str
+
+def create_username(user: User) -> str:
+    """
+    Create a creative username for {{ user.full_name }} with the mail {{ user.email }}.
+    """
+    return chain()
+```
+
+## Images
+
+todo: write
+
+## Other Types
+
+More special types are coming soon.
+
+## Important Notes
+
+You need to use type hints for all your input arguments.
+Otherwise, funcchain will just ignore them.
diff --git a/docs/concepts/langchain.md b/docs/concepts/langchain.md
new file mode 100644
index 0000000..26b5e18
--- /dev/null
+++ b/docs/concepts/langchain.md
@@ -0,0 +1,60 @@
+# LangChain
+
+## What is LangChain?
+
+[LangChain](https://python.langchain.com/docs/get_started/introduction) is the most advanced library for building applications using large language models.
+Funcchain is built on top of `langchain_core` which inculdes [LangChain Expression Language (LCEL)](https://python.langchain.com/docs/expression_language/get_started) and alot more powerful core abstractions for building cognitive architectures.
+
+## Why building on top of it?
+
+We have been looking into alot of different libraries and wanted to start a llm framework from scratch.
+But langchain already provides all of the fundamental abstractions we need to use and it is the most advanced library we have found so far.
+
+## Compatibility
+
+Funcchain is compatible with all langchain chat models, memory stores and runnables.
+It's using langchain output parsers and the templating system.
+On the other hand langchain is compatible with funcchain by using the `@runnable` decorator.
+This will convert your function into a runnable that can be used in LCEL so you can build your own complex cognitive architectures.
+
+## LCEL Example (RAG)
+
+```python
+from funcchain import chain, runnable
+from langchain_community.vectorstores.faiss import FAISS
+from langchain_core.runnables import Runnable, RunnablePassthrough
+from langchain_openai.embeddings import OpenAIEmbeddings
+
+@runnable
+def generate_poem(topic: str, context: str) -> str:
+    """
+    Generate a poem about the topic with the given context.
+    """
+    return chain()
+
+vectorstore = FAISS.from_texts(
+    [
+        "cold showers are good for your immune system",
+        "i dont like when people are mean to me",
+        "japanese tea is full of heart warming flavors",
+    ],
+    embedding=OpenAIEmbeddings(),
+)
+retriever = vectorstore.as_retriever(search_kwargs={"k": 1})
+
+retrieval_chain: Runnable = {
+    "context": retriever,
+    "topic": RunnablePassthrough(),
+} | generate_poem
+
+print(retrieval_chain.invoke("love"))
+```
+
+The chain will then retrieve ´japanese tea is full of heart warming flavors` as context since it's the most similar to the topic "love".
+
+```bash
+# => In a cup of tea, love's warmth unfurls
+#    Japanese flavors, heartwarming pearls
+#    A sip of love, in every swirl
+#    In Japanese tea, love's essence twirls
+```
diff --git a/docs/concepts/local-models.md b/docs/concepts/local-models.md
new file mode 100644
index 0000000..ff95c36
--- /dev/null
+++ b/docs/concepts/local-models.md
@@ -0,0 +1,21 @@
+# Local Models
+
+Funcchain supports local models through the [llama.cpp](https://github.com/ggerganov/llama.cpp) project using the [llama_cpp_python](https://llama-cpp-python.readthedocs.io/en/latest/) bindings.
+
+## LlamaCPP
+
+Written in highly optimized C++ code, LlamaCPP is a library for running large language models locally.
+It uses GGUF files which are a binary format for storing quantized versions of large language models.
+You can download alot of GGUF models from TheBloke on huggingface.
+
+## Grammars
+
+Context Free Grammars are a powerful abstraction for a deterministic shape of a string.
+Funcchain utilizes this by forcing local models to respond in a structured way.
+
+For example you can create a grammar that forces the model to always respond with a json object.
+This is useful if you want to use the output of the model in your code.
+
+Going one step further you can also create a grammar that forces the model to respond with a specific pydantic model.
+
+This is how funcchain is able to use local models in a structured way.
diff --git a/docs/concepts/overview.md b/docs/concepts/overview.md
index 48b7a95..d168e8e 100644
--- a/docs/concepts/overview.md
+++ b/docs/concepts/overview.md
@@ -1,115 +1,17 @@
 # Concepts
 
-## Concepts Overview
-
-| name | description |
-|-|-|
-| chain | Main funcchain to get responses from the assistant |
-| achain | Async version of chain |
-| settings | Global settings object |
-| BaseModel | Pydantic model base class |
-
-## chain
-
-The `chain` function is the main interface to get responses from the assistant. It handles creating the prompt, querying the model, and parsing the response.
-
-Key things:
-
-- Takes instruction and system prompt strings to create the prompt
-- Automatically extracts docstring of calling function as instruction
-- Gets output parser based on return type annotation
-- Supports OpenAI Functions under the hood
-- Retries on parser failure
-- Logs tokens usage
-
-Usage:
-
-```python
-from funcchain import chain
-
-
-def get_weather(city: str) -> str:
-    """
-    Get the weather for {city}.
-    """
-    return chain()
-
-print(get_weather("Barcelona"))
-```
-
-## achain
-
-The `achain` function is an async version of `chain` that can be awaited.
-
-Usage:
-
-```python
-from funcchain import achain
-import asyncio
-
-
-async def get_weather(city: str) -> str:
-    """
-    Get the weather for {city}.
-    """
-    return await achain()
-
-
-print(asyncio.run(get_weather("Barcelona")))
-```
-
-## settings
-
-The `settings` object contains global settings for funcchain.
-
-Key attributes:
-
-- `llm`: Configures the default llm
-- `max_tokens`: Max tokens per request
-- `default_system_prompt`: Default system prompt
-- `openai_api_key`: OpenAI API key
-- `model_kwargs()`: kwargs for model like temperature
-
-Usage:
-
-```python
-from funcchain import settings
-
-settings.llm = MyCustomChatModel()
-settings.max_tokens = 2048
-```
-
-## BaseModel
-
-`BaseModel` is the Pydantic model base class used to define output schemas.
-
-Funcchain can automatically parse responses to Pydantic models.
-
-Usage:
-
-```python
-from funcchain import chain
-from pydantic import BaseModel, Field
-
-
-class Article(BaseModel):
-    title: str = Field(description="Title of the article")
-    description: str = Field(description="Description of the content of the article")
-
-
-def summarize(text: str) -> Article:
-    """
-    Summarize the text into an Article:
-    {text}
-    """
-    return chain()
-
-
-print(
-    summarize(
-        """
-        AI has the potential to revolutionize education, offering personalized and individualized teaching, and improved learning outcomes. AI can analyze student data and provide real-time feedback to teachers and students, allowing them to adjust their teaching and learning strategies accordingly. One of the biggest benefits of AI in education is the ability to provide personalized and individualized teaching. AI can analyze student data and create a personalized learning plan for each individual student, taking into account their strengths, weaknesses, and learning styles. This approach has the potential to dramatically improve learning outcomes and engagement. The potential of AI in education is enormous, and it is expected to revolutionize the way we approach degree and diploma programs in the future. AI-powered technologies can provide students with real-time feedback, help them to stay on track with their studies, and offer a more personalized and engaging learning experience.
-        """
-    )
-)
-```
+## Overview
+
+| name                                                                | description                                          |
+| ------------------------------------------------------------------- | ---------------------------------------------------- |
+| [chain()](chain.md)                                                 | Core funcchain syntax component to write chains      |
+| [Input Args](input.md)                                              | prompt placeholders and input for your chains        |
+| [BaseModel](pydantic.md)                                            | Core component to create pydantic models/classes     |
+| [Settings](../getting-started/config.md#set-global-settings)        | Global settings object with for all your chains      |
+| [SettingsOverride](../getting-started/config.md#set-local-settings) | Local settings dict for a specific chain             |
+| [OutputParser](parser.md)                                           | Parses the llm output into your desired shape        |
+| [Prompting](prompting.md)                                           | Templating system and techniques for writing prompts |
+| [Vision](vision.md)                                                 | LLM that can also takes images as input/context      |
+| [Streaming](streaming.md)                                           | Token by token streaming of llm output               |
+| [Unions](unions.md)                                                 | Pydantic union types for your models                 |
+| [LangChain](langchain.md)                                           | Library for building cognitive systems               |
diff --git a/docs/concepts/parser.md b/docs/concepts/parser.md
index e69de29..b90caa7 100644
--- a/docs/concepts/parser.md
+++ b/docs/concepts/parser.md
@@ -0,0 +1,111 @@
+# Output Parser
+
+## Output Type Hints
+
+Funcchain recognises the output type hint you put on your function to automatically attach
+a fitting output parser to the end of your chain. This makes it really to code because you just use normal python typing syntax and funcchain handles everything for your.
+
+## Strings
+
+The simplest output type is a string.
+The output parser will return the content of the AI response just as it is.
+
+## Pydantic Models
+
+To force the model to respond in a certain way you can use pydantic models.
+This gives your alot of flexibility and control over the output because you can define the exact types of your fields and even add custom validation logic. Everything of your defined model will be part of the prompt including model_name, class_docstring, field_names, field_types and field_descriptions.
+This gives you alot of room for prompt engineering and optimisation.
+
+```python
+from funcchain import chain
+from pydantic import BaseModel, Field
+
+class GroceryList(BaseModel):
+    recipie: str = Field(description="Goal of what to cook with all items.")
+    items: list[str] = Field(description="Items to buy")
+
+def create_grocerylist(customer_request: str) -> GroceryList
+    """
+    Come up with a grocery list based on what the customer wants.
+    """
+    return chain()
+```
+
+When calling this function with
+e.g. `create_grocerylist("I want a cheap, protein rich and vegan meal.")`
+the model is then forced to respond using the model as a json_schema
+and the unterlying conversation would look like the following:
+
+```html
+<HumanMessage>
+    CUSTOMER_REQUEST:
+    I want a cheap, protein rich and vegan meal.
+
+    Come up with a grocery list based on what the customer wants.
+</HumanMessage>
+
+<AIMessage>
+    {
+        "recipie": "lentil soup"
+        "items" [
+            "todo",
+            "insert",
+            "ingredients"
+        ]
+    }
+</AIMessage>
+```
+
+This json is then automatically validated and parsed into the pydantic model.
+When a validation fails the model automatically recieves the error as followup message and tries again.
+
+## Primitive Types
+
+You can also use other primitive types like int, float, bool, list, Literals, Enums, etc. </br>
+Funcchain will then create a temporary pydantic model with the type as a single field and use that as the output parser.
+
+```python
+def create_grocerylist(customer_request: str) -> list[str]
+    """
+    Come up with a grocery list based on what the customer wants.
+    """
+    return chain()
+```
+
+This time when calling this function with
+e.g. `create_grocerylist("I want a cheap, protein rich and vegan meal.")`
+funcchain automatically creates a temporary pydantic model in the background like this:
+
+```python
+class Extract(BaseModel):
+    value: list[str]
+```
+
+The model then understands the desired shape and will output like here:
+
+```html
+<AIMessage>
+    {
+        "value": [
+            "todo",
+            "insert",
+            "ingredients"
+        ]
+    }
+</AIMessage>
+```
+
+## Union Types
+
+You can also use mupliple PydanticModels at once as the output type using Unions.
+The LLM will then select one of the models that fits best your inputs.
+Checkout the seperate page for [UnionTypes](unions.md) for more info.
+
+## Streaming
+
+You can stream everything with a `str` output type.
+
+Since pydantic models need to be fully constructed before they can be returned, you can't use them for streaming.
+There is one approach to stream pydantic models but it works only if all fields are Optional, which is not the case for most models and they still come field by field.
+
+This is not implemented yet but will be added in the future.
diff --git a/docs/concepts/prompting.md b/docs/concepts/prompting.md
new file mode 100644
index 0000000..84c87f3
--- /dev/null
+++ b/docs/concepts/prompting.md
@@ -0,0 +1,52 @@
+# Prompting
+
+Prompting involes every text you write that the LLM recieves and interprets. It often involves `prompt engineering` which is optimizing and finetuning your wordings so the LLM understands what you wants and responds correctly.
+Everything from the input argument names, output type and docstring are part of the prompt and are visible to the model to evaluate. Make sure your choose your terms well and try different wordings if you encounter problems.
+
+## Jinja2 Templating
+
+Often you can write your funcchains without templating but for special cases it is useful to do custom things.
+Funcchain allows jinja2 as templating syntax for writing more complex prompts.
+All function input areguments that are either `str` or a subclass of a `Pydantic Model` are awailable in the jinja environment and can be used.
+
+```python
+class GroceryList(BaseModel):
+    recipie: str
+    items: list[str]
+
+def create_recipie(glist: GroceryList) -> str:
+    """
+    I want to cook {{ glist.recipie }}.
+    Create a step by step recipie based on these ingridients I just bought:
+    {% for i in glist.items %}
+    - {{ i }}
+    {% endfor %}
+    """
+    return chain()
+```
+
+The LLM will then recieve a formatted prompt based on what you input is.
+
+## Input Argument Placement
+
+If you do not specify a place in your prompt for your input arguments using jinja,
+all unused arguments (`str` and `PydanticModels`) will then get automatically appended
+to the beginning of your instruction.
+
+E.g. if you just provide `Create a step by step recipie based on the grocery list.`,
+the prompt template would look like this:
+
+```html
+<PromptTemplate>
+    GLIST:
+    {{ glist }}
+
+    Create a step by step recipie based on the grocery list.
+</PromptTemplate>
+```
+
+When inserting the instance of `GroceryList` into the template, the `__str__` method is called for converting the model into text. Keep this in mind if you want to customise apperence to the LLM.
+
+## ChatModel Behavior
+
+Keep in mind that funcchain in always using instructional chat prompting, so instrution is made the perspective of a Human <-> AI conversation. If you process input from your users its good to talk of them as `customers` so the model understands the perspective.
diff --git a/docs/concepts/pydantic.md b/docs/concepts/pydantic.md
new file mode 100644
index 0000000..a205df4
--- /dev/null
+++ b/docs/concepts/pydantic.md
@@ -0,0 +1,68 @@
+# Pydantic
+
+`pydantic` is a python library for creating data structures with strict typing and automatic type validation.
+When dealing with LLMs this is very useful because it exposes a precise `json-schema` that can be used with grammars or function-calling to force the LLM to respond in the desired way.
+Additionally validation especially using custom validators can be used to automatically retry if the output does not match your requirements.
+
+## BaseModel
+
+When `from pydantic import BaseModel` this is imports the core class of Pydantic which can be used to construct your data structures.
+
+```python
+from pydantic import BaseModel
+
+class User(BaseModel):
+    id: int
+    name: str
+    email: str
+    items: list[str]
+```
+
+This model can then be initiated:
+
+```python
+user = User(
+    id=1943,
+    name="Albert Hofmann",
+    email="hofmann.albert@sandoz.ch",
+    items=["lab coat", "safety glasses", "a bicycle"]
+)
+```
+
+## Field Descriptions
+
+To give the LLM more context about what you mean with the stucture you can provide field descriptions:
+
+```python
+from pydantic import Field
+
+class User(BaseModel):
+    id: int
+    name: str = Field(description="FullName of the user.")
+    email: str
+    items: list[str] = Field(description="Everyday items of the user.")
+```
+
+These descriptions are included in the json-schema and are passed as format instructions into the prompt from the output parser.
+
+## Custom Validators
+
+You can also write custom validators if you want to check for specific information beyond just the type.
+
+```python
+from pydantic import field_validator
+
+class User(BaseModel):
+    id: int
+    name: str = Field(description="FullName of the user.")
+    email: str
+    items: list[str] = Field(description="Everyday items of the user.")
+
+    @field_validator("email")
+    def keywords_must_be_unique(cls, v: str) -> str:
+        if not v.endswith("@sandoz.ch"):
+            raise ValueError("User has to work at Sandoz to register!")
+        return v
+```
+
+In this example the validator makes sure every user has an email ending with `@sandoz.ch`.
diff --git a/docs/concepts/streaming.md b/docs/concepts/streaming.md
new file mode 100644
index 0000000..feb4497
--- /dev/null
+++ b/docs/concepts/streaming.md
@@ -0,0 +1,46 @@
+# Streaming
+
+Streaming is important if you want to do things with your LLM generation while the LLM is still generating.
+This can enhance the user experience by already showing part of the response but you could also stop a generation early if it does not match certain requirements.
+
+## Console Log Streaming
+
+If you want to stream all the tokens generated quickly to your console output,
+you can use the `settings.console_stream = True` setting.
+
+## `strem_to()` wrapper
+
+For streaming with non runnable funcchains you can wrap the LLM generation call into the `stream_to()` context manager. This would look like this:
+
+```python
+def summarize(text: str) -> str:
+    """Summarize the text."""
+    return chain()
+
+text = "... a large text"
+
+with stream_to(print):
+    summarize(text)
+```
+
+This will call token by token the print function so it will show up in your console.
+But you can also insert any function that accepts a string to create your custom stream handlers.
+
+You can also use `async with stream_to(your_async_handler):` for async streaming.
+Make sure summarize is then created using `await achain()`.
+
+## LangChain runnable streaming
+
+If you can compile every funcchain into a langchain runnable and then use the native langchain syntax for streaming:
+
+```python
+@runnable
+def summarize(text: str) -> str:
+    """Summarize the text."""
+    return chain()
+
+text = "... a large text"
+
+for chunk in summarize.stream(input={"text": text}):
+    print(chunk, end="", flush=True)
+```
diff --git a/docs/concepts/unions.md b/docs/concepts/unions.md
new file mode 100644
index 0000000..4631039
--- /dev/null
+++ b/docs/concepts/unions.md
@@ -0,0 +1,18 @@
+# Union Types
+
+You can use union types in funcchain to make the model select one of multiple PydanticModels for the response.
+You may have seen this in the [Complex Example](../index.md#complex-example).
+
+## Errors
+
+One good usecase for this is to always give the LLM the chance to raise an Error if the input is strange or not suited. You can check this in more detail [here](errors.md).
+
+## Agents
+
+Another usecase is to create an Agent like chain that selects one of multiple tools.
+Every PydanticModel then represents the input schema of your function and you can even override the `__call__` method of your models to directly execute the tool if you need so.
+
+## Function Calling
+
+Under the hood the union type featur uses openai tool_calling, especially the functionallity to give the LLM multiple tools to choose from.
+All pydantic models then get injected as available tools and the LLM is forced to call one of them.
diff --git a/docs/concepts/vision.md b/docs/concepts/vision.md
new file mode 100644
index 0000000..7c2124d
--- /dev/null
+++ b/docs/concepts/vision.md
@@ -0,0 +1,19 @@
+# Vision Models
+
+Funcchain supports working with vision models so you can use images as input arguments of your prompts.
+This only works if you also choose the correct model.
+Currently known supported models:
+
+- `openai/gpt-4-vision-preview`
+- `ollama/llava` or `ollama/bakllava`
+
+You need to set these using `settings.llm` (checkout the [Funcchain Settings](../getting-started/config.md)).
+
+## Image Type
+
+`from funcchain import Image`
+
+Funcchain introuces a special type for Images to quickly recognise image arguments and format them correctly into the prompt.
+This type also exposes a variaty of classmethods for creating but also methods for converting Image instances.
+
+Checkout the [Vision Example](../features/vision.md) for more details.
diff --git a/docs/contributing/codebase-structure.md b/docs/contributing/codebase-structure.md
new file mode 100644
index 0000000..2ca11eb
--- /dev/null
+++ b/docs/contributing/codebase-structure.md
@@ -0,0 +1,3 @@
+# Codebase Structure
+
+## TODO: explain structure of codebase to easier contribute
diff --git a/docs/contributing/contributors.md b/docs/contributing/contributors.md
new file mode 100644
index 0000000..fbbe9e4
--- /dev/null
+++ b/docs/contributing/contributors.md
@@ -0,0 +1,11 @@
+# Contributors
+
+We would like to acknowledge the contributions of the following people:
+
+| Name | Contribution |
+| ---- | ------------ |
+|      |
+
+## How to Contribute
+
+If you would like to contribute to this project, please follow the guidelines in our [Contributing Guide](dev-setup.md).
diff --git a/docs/contributing/dev-setup.md b/docs/contributing/dev-setup.md
new file mode 100644
index 0000000..32e1eb8
--- /dev/null
+++ b/docs/contributing/dev-setup.md
@@ -0,0 +1,11 @@
+# Development Setup
+
+To contribute, clone the repo and run:
+
+```bash
+./dev_setup.sh
+```
+
+You should not run unstrusted scripts so ask ChatGPT to explain what the contents of this script do!
+
+This will install and setup your development environment using [rye](https://rye-up.com) or pip.
diff --git a/docs/contributing/license.md b/docs/contributing/license.md
new file mode 100644
index 0000000..19c0f00
--- /dev/null
+++ b/docs/contributing/license.md
@@ -0,0 +1,6 @@
+# License
+
+## MIT License
+
+All contributions are made under the MIT License.
+See LICENSE.md for more information.
diff --git a/docs/contributing/roadmap.md b/docs/contributing/roadmap.md
new file mode 100644
index 0000000..2b819eb
--- /dev/null
+++ b/docs/contributing/roadmap.md
@@ -0,0 +1,9 @@
+# TODOs for writing the documentation
+
+- [ ] write out all todos in advanced
+
+- [ ] look more into other repos for mkdocs tricks and inspiration
+
+- [ ] make this file a general todo list + roadmap for contributors
+
+- [ ] maybe rename features to examples
diff --git a/docs/contributing/security.md b/docs/contributing/security.md
new file mode 100644
index 0000000..c64f264
--- /dev/null
+++ b/docs/contributing/security.md
@@ -0,0 +1,3 @@
+# Security
+
+If you notice any security risks please immidiatly email `contact@shroominic.com` and for major risks you will recieve a bounty of 100$.
diff --git a/docs/css/custom.css b/docs/css/custom.css
new file mode 100644
index 0000000..7776378
--- /dev/null
+++ b/docs/css/custom.css
@@ -0,0 +1,52 @@
+.termynal-comment {
+  color: #4a968f;
+  font-style: italic;
+  display: block;
+}
+
+.termy [data-termynal] {
+  white-space: pre-wrap;
+}
+
+a.external-link::after {
+  /* \00A0 is a non-breaking space
+        to make the mark be on the same line as the link
+    */
+  content: "\00A0[↪]";
+}
+
+a.internal-link::after {
+  /* \00A0 is a non-breaking space
+        to make the mark be on the same line as the link
+    */
+  content: "\00A0↪";
+}
+
+.shadow {
+  box-shadow: 5px 5px 10px #999;
+}
+
+pre {
+  position: relative;
+}
+
+.copy-code-button {
+  position: absolute;
+  right: 5px;
+  top: 5px;
+  cursor: pointer;
+  padding: 0.5em;
+  margin-bottom: 0.5em;
+  background-color: rgba(
+    247,
+    247,
+    247,
+    0.4
+  ); /* Light grey background with slight transparency */
+  border: 1px solid #dcdcdc; /* Slightly darker border for definition */
+  border-radius: 3px; /* Rounded corners */
+  font-family: monospace; /* Monospace font similar to code blocks */
+  font-size: 0.85em; /* Slightly smaller font size */
+  color: #333; /* Dark grey text for contrast */
+  outline: none; /* Remove outline to maintain minimal style on focus */
+}
diff --git a/docs/css/termynal.css b/docs/css/termynal.css
new file mode 100644
index 0000000..50f81fb
--- /dev/null
+++ b/docs/css/termynal.css
@@ -0,0 +1,113 @@
+/**
+ * termynal.js
+ *
+ * @author Ines Montani <ines@ines.io>
+ * @version 0.0.1
+ * @license MIT
+ */
+
+:root {
+  --color-bg: #252a33;
+  --color-text: #eee;
+  --color-text-subtle: #a2a2a2;
+}
+
+[data-termynal] {
+  width: 750px;
+  max-width: 100%;
+  background: var(--color-bg);
+  color: var(--color-text);
+  /* font-size: 18px; */
+  font-size: 15px;
+  /* font-family: 'Fira Mono', Consolas, Menlo, Monaco, 'Courier New', Courier, monospace; */
+  font-family: "Roboto Mono", "Fira Mono", Consolas, Menlo, Monaco,
+    "Courier New", Courier, monospace;
+  border-radius: 4px;
+  padding: 75px 45px 35px;
+  position: relative;
+  -webkit-box-sizing: border-box;
+  box-sizing: border-box;
+}
+
+[data-termynal]:before {
+  content: "";
+  position: absolute;
+  top: 15px;
+  left: 15px;
+  display: inline-block;
+  width: 15px;
+  height: 15px;
+  border-radius: 50%;
+  /* A little hack to display the window buttons in one pseudo element. */
+  background: #d9515d;
+  -webkit-box-shadow:
+    25px 0 0 #f4c025,
+    50px 0 0 #3ec930;
+  box-shadow:
+    25px 0 0 #f4c025,
+    50px 0 0 #3ec930;
+}
+
+[data-termynal]:after {
+  content: "bash";
+  position: absolute;
+  color: var(--color-text-subtle);
+  top: 5px;
+  left: 0;
+  width: 100%;
+  text-align: center;
+}
+
+a[data-terminal-control] {
+  text-align: right;
+  display: block;
+  color: #aebbff;
+}
+
+[data-ty] {
+  display: block;
+  line-height: 2;
+}
+
+[data-ty]:before {
+  /* Set up defaults and ensure empty lines are displayed. */
+  content: "";
+  display: inline-block;
+  vertical-align: middle;
+}
+
+[data-ty="input"]:before,
+[data-ty-prompt]:before {
+  margin-right: 0.75em;
+  color: var(--color-text-subtle);
+}
+
+[data-ty="input"]:before {
+  content: "$";
+}
+
+[data-ty][data-ty-prompt]:before {
+  content: attr(data-ty-prompt);
+}
+
+[data-ty-cursor]:after {
+  content: attr(data-ty-cursor);
+  font-family: monospace;
+  margin-left: 0.5em;
+  -webkit-animation: blink 1s infinite;
+  animation: blink 1s infinite;
+}
+
+/* Cursor animation */
+
+@-webkit-keyframes blink {
+  50% {
+    opacity: 0;
+  }
+}
+
+@keyframes blink {
+  50% {
+    opacity: 0;
+  }
+}
diff --git a/docs/examples.md b/docs/examples.md
deleted file mode 100644
index 6ee0a62..0000000
--- a/docs/examples.md
+++ /dev/null
@@ -1,67 +0,0 @@
-# Examples
-
-## Basic Usage
-
-The `chain()` function allows you to call a prompt like a regular Python function. The docstring serves as the instructions and the return type annotation determines the output parsing.
-
-```python
-from funcchain import chain
-
-def hello_world() -> str:
-    """
-    Generate a friendly hello world message.
-    """
-    return chain()
-
-print(hello_world())
-```
-
-This will send the docstring to the AI assistant and parse the response as a string.
-
-## Pydantic Models
-
-You can use Pydantic models to validate the response.
-
-```python
-from funcchain import chain
-from pydantic import BaseModel
-
-
-class Message(BaseModel):
-    text: str
-
-
-def hello_message() -> Message:
-    """
-    Generate a message object that says hello.
-    """
-    return chain()
-
-
-print(hello_message())
-```
-
-Now the response will be parsed as a Message object.
-
-## Asynchronous Support
-
-Async functions are also supported with `achain()`:
-
-```python
-import asyncio
-from funcchain import achain
-
-async def async_hello() -> str:
-    """Say hello asynchronously"""
-    return await achain()
-
-print(asyncio.run(async_hello()))
-```
-
-This allows you to easily call AI functions from async code.
-
-The funcchain project makes it really simple to leverage large language models in your Python code! Check out the source code for more examples.
-
-## Advanced Examples
-
-For advanced examples, checkout the examples directory [here](https://github.com/shroominic/funcchain/tree/main/examples)
diff --git a/docs/features/dynamic_router.md b/docs/features/dynamic_router.md
new file mode 100644
index 0000000..5bdc421
--- /dev/null
+++ b/docs/features/dynamic_router.md
@@ -0,0 +1,244 @@
+<!-- markdownlint-disable MD033 MD046 -->
+# Dynamic Chat Router with Funcchain
+
+!!! Example
+    dynamic_router.py [Example](https://github.com/shroominic/funcchain/blob/main/examples/dynamic_router.py)
+
+In this example we will use funcchain to build a LLM routing pipeline.
+This is a very useful LLM task and can be used in a variety of applications.
+You can abstract this for your own usage.
+This should serve as an example of how to archive complex structures using funcchain.
+
+A dynamic chat router that selects the appropriate handler for user queries based on predefined routes.
+
+## Full Code Example
+
+```python
+from enum import Enum
+from typing import Any, Callable, TypedDict
+
+from funcchain.syntax.executable import compile_runnable
+from pydantic import BaseModel, Field
+
+
+class Route(TypedDict):
+    handler: Callable
+    description: str
+
+
+class DynamicChatRouter(BaseModel):
+    routes: dict[str, Route]
+
+    def _routes_repr(self) -> str:
+        return "\n".join([f"{route_name}: {route['description']}" for route_name, route in self.routes.items()])
+
+    def invoke_route(self, user_query: str, /, **kwargs: Any) -> Any:
+        RouteChoices = Enum(  # type: ignore
+            "RouteChoices",
+            {r: r for r in self.routes.keys()},
+            type=str,
+        )
+
+        class RouterModel(BaseModel):
+            selector: RouteChoices = Field(
+                default="default",
+                description="Enum of the available routes.",
+            )
+
+        route_query = compile_runnable(
+            instruction="Given the user query select the best query handler for it.",
+            input_args=["user_query", "query_handlers"],
+            output_type=RouterModel,
+        )
+
+        selected_route = route_query.invoke(
+            input={
+                "user_query": user_query,
+                "query_handlers": self._routes_repr(),
+            }
+        ).selector
+        assert isinstance(selected_route, str)
+
+        return self.routes[selected_route]["handler"](user_query, **kwargs)
+
+
+def handle_pdf_requests(user_query: str) -> str:
+    return "Handling PDF requests with user query: " + user_query
+
+
+def handle_csv_requests(user_query: str) -> str:
+    return "Handling CSV requests with user query: " + user_query
+
+
+def handle_default_requests(user_query: str) -> str:
+    return "Handling DEFAULT requests with user query: " + user_query
+
+
+router = DynamicChatRouter(
+    routes={
+        "pdf": {
+            "handler": handle_pdf_requests,
+            "description": "Call this for requests including PDF Files.",
+        },
+        "csv": {
+            "handler": handle_csv_requests,
+            "description": "Call this for requests including CSV Files.",
+        },
+        "default": {
+            "handler": handle_default_requests,
+            "description": "Call this for all other requests.",
+        },
+    },
+)
+
+
+router.invoke_route("Can you summarize this csv?")
+```
+
+## Demo
+
+<div class="termy">
+    ```python
+    $ router.invoke_route("Can you summarize this csv?")
+    Handling CSV requests with user query: Can you summarize this csv?
+    ```
+</div>
+
+## Instructions
+
+!!! Step-by-Step
+
+    **Nececary imports**
+
+    ```python
+    from enum import Enum
+    from typing import Any, Callable, TypedDict
+
+    from funcchain.syntax.executable import compile_runnable
+    from pydantic import BaseModel, Field
+    ```
+
+    **Define Route Type**
+
+    ```python
+    class Route(TypedDict):
+        handler: Callable
+        description: str
+    ```
+
+    Create a `TypedDict` to define the structure of a route with a handler function and a description. Just leave this unchanged if not intentionally experimenting.
+
+    **Implement Route Representation**
+
+    Establish a Router class
+
+    ```python
+    class DynamicChatRouter(BaseModel):
+        routes: dict[str, Route]
+    ```
+
+    **_routes_repr():**
+
+    Returns a string representation of all routes and their descriptions, used to help the language model understand the available routes.
+
+    ```python
+    def _routes_repr(self) -> str:
+        return "\n".join([f"{route_name}: {route['description']}" for route_name, route in self.routes.items()])
+    ```
+
+    **invoke_route(user_query: str, **kwargs: Any) -> Any: **
+
+    This method takes a user query and additional keyword arguments. Inside invoke_route, an Enum named RouteChoices is dynamically created with keys corresponding to the route names. This Enum is used to validate the selected route.
+
+    ```python
+    def invoke_route(self, user_query: str, /, **kwargs: Any) -> Any:
+        RouteChoices = Enum(  # type: ignore
+            "RouteChoices",
+            {r: r for r in self.routes.keys()},
+            type=str,
+        )
+    ```
+
+    **Compile the Route Selection Logic**
+
+    The `RouterModel` class in this example is used for defining the expected output structure that the `compile_runnable` function will use to determine the best route for a given user query.
+
+
+    ```python
+    class RouterModel(BaseModel):
+        selector: RouteChoices = Field(
+            default="default",
+            description="Enum of the available routes.",
+        )
+
+    route_query = compile_runnable(
+        instruction="Given the user query select the best query handler for it.",
+        input_args=["user_query", "query_handlers"],
+        output_type=RouterModel,
+    )
+
+    selected_route = route_query.invoke(
+        input={
+            "user_query": user_query,
+            "query_handlers": self._routes_repr(),
+        }
+    ).selector
+    assert isinstance(selected_route, str)
+
+    return self.routes[selected_route]["handler"](user_query, **kwargs)
+    ```
+
+    - `RouterModel`: Holds the route selection with a default option, ready for you to play around with.
+    - `RouteChoices`: An Enum built from route names, ensuring you only get valid route selections.
+    - `compile_runnable`: Sets up the decision-making logic for route selection, guided by the provided instruction and inputs.
+    - `route_query`: Calls the decision logic with the user's query and a string of route descriptions.
+    - `selected_route`: The outcome of the decision logic, representing the route to take.
+    - `assert`: A safety check to confirm the route is a string, as expected by the routes dictionary.
+    - `handler invocation`: Runs the chosen route's handler with the provided query and additional arguments.
+
+    **Define route functions**
+
+    Now you can use the structured output to execute programatically based on a natural language input.
+    Establish functions tailored to your needs.
+
+    ```python
+    def handle_pdf_requests(user_query: str) -> str:
+    return "Handling PDF requests with user query: " + user_query
+
+    def handle_csv_requests(user_query: str) -> str:
+        return "Handling CSV requests with user query: " + user_query
+
+    def handle_default_requests(user_query: str) -> str:
+        return "Handling DEFAULT requests with user query: " + user_query
+    ```
+
+    **Define the routes**
+
+    And bind the previous established functions.
+
+    ```python
+    router = DynamicChatRouter(
+        routes={
+            "pdf": {
+                "handler": handle_pdf_requests,
+                "description": "Call this for requests including PDF Files.",
+            },
+            "csv": {
+                "handler": handle_csv_requests,
+                "description": "Call this for requests including CSV Files.",
+            },
+            "default": {
+                "handler": handle_default_requests,
+                "description": "Call this for all other requests.",
+            },
+        },
+    )
+    ```
+
+    **Get output**
+
+    Use the router.invoke_route method to process the user query and obtain the appropriate response.
+
+    ```python
+    router.invoke_route("Can you summarize this csv?")
+    ```
diff --git a/docs/features/enums.md b/docs/features/enums.md
new file mode 100644
index 0000000..d1817ad
--- /dev/null
+++ b/docs/features/enums.md
@@ -0,0 +1,99 @@
+<!-- markdownlint-disable MD033 MD046 -->
+# Decision Making with Enums and Funcchain
+
+!!! Example
+    See [enums.py](https://github.com/shroominic/funcchain/blob/main/examples/enums.py)
+
+    In this example, we will use the enum module and funcchain library to build a decision-making system.
+    This is a useful task for creating applications that require predefined choices or responses.
+    You can adapt this for your own usage.
+    This serves as an example of how to implement decision-making logic using enums and the funcchain library.
+
+## Full Code Example
+
+A simple system that takes a question and decides a 'yes' or 'no' answer based on the input.
+
+<pre><code id="codeblock">
+```python
+from enum import Enum
+from funcchain import chain
+from pydantic import BaseModel
+
+class Answer(str, Enum):
+    yes = "yes"
+    no = "no"
+
+class Decision(BaseModel):
+    answer: Answer
+
+def make_decision(question: str) -> Decision:
+    """
+    Based on the question decide yes or no.
+    """
+    return chain()
+
+print(make_decision("Do you like apples?"))
+```
+</code></pre>
+
+# Demo
+
+<div class="termy">
+    ```terminal
+    $ make_decision("Do you like apples?")
+
+    answer=<Answer.yes: 'yes'>
+    ```
+</div>
+
+## Instructions
+
+!!! Step-by-Step
+    **Necessary Imports**
+
+    ```python
+    from enum import Enum
+    from funcchain import chain
+    from pydantic import BaseModel
+    ```
+
+    **Define the Answer Enum**
+
+    The Answer enum defines possible answers as 'yes' and 'no', which are the only valid responses for the decision-making system. Experiment by using and describing other enums.
+
+    ```python
+    class Answer(str, Enum):
+        yes = "yes"
+        no = "no"
+    ```
+
+    **Create the Decision Model**
+
+    The Decision class uses Pydantic to model a decision, ensuring that the answer is always an instance of the Answer enum.
+
+    ```python
+    class Decision(BaseModel):
+        answer: Answer
+    ```
+
+    **Implement the Decision Function**
+
+    The make_decision function is where the decision logic will be implemented, using `chain()` to process the question and return a decision.
+    When using your own enums you want to edit this accordingly.
+
+    ```python
+    def make_decision(question: str) -> Decision:
+        """
+        Based on the question decide yes or no.
+        """
+        return chain()
+    ```
+
+    **Run the Decision System**
+
+    This block runs the decision-making system, printing out the decision for a given question when the script is executed directly.
+
+
+    ```python
+    print(make_decision("Do you like apples?"))
+    ```
diff --git a/docs/features/error_output.md b/docs/features/error_output.md
new file mode 100644
index 0000000..1c98102
--- /dev/null
+++ b/docs/features/error_output.md
@@ -0,0 +1,107 @@
+<!-- markdownlint-disable MD033 MD046 -->
+# Example of raising an error
+
+!!! Example
+    error_output.py [Example](https://github.com/shroominic/funcchain/blob/main/examples/error_output.py)
+
+    In this example, we will use the funcchain library to build a system that extracts user information from text.
+    Most importantly we will be able to raise an error thats programmatically usable.
+    You can adapt this for your own usage.
+
+    The main functionality is to take a string of text and attempt to extract user information, such as name and email, and return a User object. If the information is insufficient, an Error is returned instead.
+
+## Full Code Example
+
+<pre><code id="codeblock">
+```python
+from funcchain import BaseModel, Error, chain
+from rich import print
+
+class User(BaseModel):
+    name: str
+    email: str | None
+
+def extract_user_info(text: str) -> User | Error:
+    """
+    Extract the user information from the given text.
+    In case you do not have enough infos, raise.
+    """
+    return chain()
+
+print(extract_user_info("hey"))
+# => returns Error
+
+print(extract_user_info("I'm John and my mail is john@gmail.com"))
+# => returns a User object
+
+```
+</code></pre>
+
+Demo
+
+<div class="termy">
+    ```python
+    $ extract_user_info("hey")
+
+    Error(
+        title='Invalid Input',
+        description='The input text does not contain user information.'
+    )
+
+    $ extract_user_info("I'm John and my mail is john@gmail.com")
+
+    User(
+        name='John',
+        email='john@gmail.com'
+    )
+    ```
+
+</div>
+
+## Instructions
+
+!!! Step-by-Step
+
+    **Necessary Imports**
+
+    ```python
+    from funcchain import BaseModel, Error, chain
+    from rich import print
+    ```
+
+    **Define the User Model**
+
+    ```python
+    class User(BaseModel):
+        name: str
+        email: str | None
+    ```
+    The User class is a Pydantic model that defines the structure of the user information to be extracted, with fields for `name` and an email.
+    Change the fields to experiment and alignment with your project.
+
+    **Implement the Extraction Function**
+
+    The `extract_user_info` function is intended to process the input text and return either a User object with extracted information or an Error if the information is not sufficient.
+
+    ```python
+    def extract_user_info(text: str) -> User | Error:
+        """
+        Extract the user information from the given text.
+        In case you do not have enough infos, raise.
+        """
+        return chain()
+    ```
+    For experiments and adoptions also change the `str` that will be used in chain() to identify what you defined earlier in the `User(BaseModel)`
+
+
+    **Run the Extraction System**
+
+    This conditional block is used to execute the extraction function and print the results when the script is run directly.
+
+    ```python
+    print(extract_user_info("hey"))
+    # => returns Error
+
+    print(extract_user_info("I'm John and my mail is john@gmail.com"))
+    # => returns a User object
+    ```
diff --git a/docs/features/literals.md b/docs/features/literals.md
new file mode 100644
index 0000000..c7d372d
--- /dev/null
+++ b/docs/features/literals.md
@@ -0,0 +1,92 @@
+<!-- markdownlint-disable MD033 MD046 -->
+# Literal Type Enforcement in Funcchain
+
+!!! Example
+    literals.py [Example](https://github.com/shroominic/funcchain/blob/main/examples/literals.py)
+
+    This is a useful task for scenarios where you want to ensure that certain outputs strictly conform to a predefined set of values.
+    This serves as an example of how to implement strict type checks on outputs using the Literal type from the typing module and the funcchain library.
+
+    You can adapt this for your own usage.
+
+## Full Code Example
+
+<pre><code id="codeblock">
+```python
+from typing import Literal
+from funcchain import chain
+from pydantic import BaseModel
+
+class Ranking(BaseModel):
+    analysis: str
+    score: Literal[11, 22, 33, 44, 55]
+    error: Literal["no_input", "all_good", "invalid"]
+
+def rank_output(output: str) -> Ranking:
+    """
+    Analyze and rank the output.
+    """
+    return chain()
+
+rank = rank_output("The quick brown fox jumps over the lazy dog.")
+print(rank)
+```
+</code></pre>
+
+Demo
+
+<div class="termy">
+    ```python
+    rank = rank_output("The quick brown fox jumps over the lazy dog.")
+    print(rank)
+    $ ........
+    Ranking(analysis='...', score=33, error='all_good')
+    ```
+</div>
+
+## Instructions
+
+!!! Step-by-Step
+
+    **Necessary Imports**
+
+    ```python
+    from typing import Literal
+    from funcchain import chain
+    from pydantic import BaseModel
+    ```
+
+    **Define the Ranking Model**
+
+    The Ranking class is a Pydantic model that uses the Literal type to ensure that the score and error fields can only contain certain predefined values.
+    So experiment with changing those but keeping this structure of the class.
+    The LLM will be forced to deliver one of the defined output.
+
+    ```python
+    class Ranking(BaseModel):
+        analysis: str
+        score: Literal[11, 22, 33, 44, 55]
+        error: Literal["no_input", "all_good", "invalid"]
+    ```
+
+    **Implement the Ranking Function**
+
+    Use `chain()` to process a user input, which must be a string.
+    Adjust the content based on your above defined class.
+
+    ```python
+    def rank_output(output: str) -> Ranking:
+     """
+     Analyze and rank the output.
+     """
+     return chain()
+    ```
+
+    **Execute the Ranking System**
+
+    This block is used to execute the ranking function and print the results when the script is run directly.
+
+    ```python
+    rank = rank_output("The quick brown fox jumps over the lazy dog.")
+    print(rank)
+    ```
diff --git a/docs/features/llamacpp.md b/docs/features/llamacpp.md
new file mode 100644
index 0000000..b9969f9
--- /dev/null
+++ b/docs/features/llamacpp.md
@@ -0,0 +1,111 @@
+<!-- markdownlint-disable MD033 MD046 -->
+# Different LLMs with funcchain EASY TO USE
+
+!!! Example
+    See [llamacpp.py](https://github.com/shroominic/funcchain/blob/main/examples/llamacpp.py)
+    Also see supported [MODELS.md](https://github.com/shroominic/funcchain/blob/main/MODELS.md)
+
+    In this example, we will use the funcchain library to perform sentiment analysis on a piece of text. This showcases how funcchain can seamlessly utilize different Language Models (LLMs) using local llamacpp models, without many code changes..
+
+    This is particularly useful for developers looking to integrate different models in a single application or just experimenting with different models.
+
+## Full Code Example
+
+<pre><code id="codeblock">
+```python
+from funcchain import chain, settings
+from pydantic import BaseModel, Field
+from rich import print
+
+# define your model
+class SentimentAnalysis(BaseModel):
+    analysis: str = Field(description="A description of the analysis")
+    sentiment: bool = Field(description="True for Happy, False for Sad")
+
+# define your prompt
+def analyze(text: str) -> SentimentAnalysis:
+    """
+    Determines the sentiment of the text.
+    """
+    return chain()
+
+# set global llm
+settings.llm = "llamacpp/openchat-3.5-0106:Q3_K_M"
+
+# log tokens as stream to console
+settings.console_stream = True
+
+# run prompt
+poem = analyze("I really like when my dog does a trick!")
+
+# show final parsed output
+print(poem)
+```
+</code></pre>
+
+# Demo
+
+<div class="termy">
+    ```
+    poem = analyze("I really like when my dog does a trick!")
+
+    $ {"analysis": "A dog trick", "sentiment": true}
+
+    SentimentAnalysis(analysis='A dog trick', sentiment=True)
+
+    ```
+</div>
+
+## Instructions
+
+!!! Step-by-Step
+
+    **Necessary Imports**
+
+    ```python
+    from funcchain import chain, settings
+    from pydantic import BaseModel, Field
+    ```
+
+    **Define the Data Model**
+
+    Here, we define a `SentimentAnalysis` model with a description of the sentiment analysis and a boolean field indicating the sentiment.
+
+    ```python
+    class SentimentAnalysis(BaseModel):
+        analysis: str = Field(description="A description of the analysis")
+        sentiment: bool = Field(description="True for Happy, False for Sad")
+    ```
+
+    **Create the Analysis Function**
+
+    This 'analyze' function takes a string as input and is expected to return a `SentimentAnalysis` object by calling the `chain()` function from the `funcchain` library.
+
+    ```python
+    def analyze(text: str) -> SentimentAnalysis:
+        """
+        Determines the sentiment of the text.
+        """
+        return chain()
+    ```
+
+    **Execution Configuration**
+
+    In the main block, configure the global settings to set the preferred LLM, enable console streaming, and run the `analyze` function with sample text.
+
+    ```python
+    # set global llm
+    settings.llm = "llamacpp/openchat-3.5-0106:Q3_K_M"
+
+    # log tokens as stream to console
+    settings.console_stream = True
+
+    # run prompt
+    poem = analyze("I really like when my dog does a trick!")
+
+    # show final parsed output
+    print(poem)
+    ```
+
+    !!!Important
+        We need to note here is that `settings.llm` can be adjusted to any model mentioned in [MODELS.md](https://github.com/shroominic/funcchain/blob/main/MODELS.md) and your funcchain code will still work and `chain()` does everything in the background for you.
diff --git a/docs/features/ollama.md b/docs/features/ollama.md
new file mode 100644
index 0000000..f19829a
--- /dev/null
+++ b/docs/features/ollama.md
@@ -0,0 +1,103 @@
+<!-- markdownlint-disable MD033 MD046 -->
+# Different LLMs with funcchain EASY TO USE
+
+!!! Example
+    See [ollama.py](https://github.com/shroominic/funcchain/blob/main/examples/ollama.py)
+    Also see supported [MODELS.md](https://github.com/shroominic/funcchain/blob/main/MODELS.md)
+
+    In this example, we will use the funcchain library to perform sentiment analysis on a piece of text. This showcases how funcchain can seamlessly utilize different Language Models (LLMs) from ollama, without many unnececary code changes..
+
+    This is particularly useful for developers looking to integrate different models in a single application or just experimenting with different models.
+
+## Full Code Example
+
+<pre><code id="codeblock">
+```python
+from funcchain import chain, settings
+from pydantic import BaseModel, Field
+
+# define your model
+class SentimentAnalysis(BaseModel):
+    analysis: str = Field(description="A description of the analysis")
+    sentiment: bool = Field(description="True for Happy, False for Sad")
+
+# define your prompt
+def analyze(text: str) -> SentimentAnalysis:
+    """
+    Determines the sentiment of the text.
+    """
+    return chain()
+
+if __name__ == "__main__":
+    # set global llm
+    settings.llm = "ollama/openchat"
+
+    # log tokens as stream to console
+    settings.console_stream = True
+
+    # run prompt
+    poem = analyze("I really like when my dog does a trick!")
+
+    # show final parsed output
+    print(poem)
+```
+</code></pre>
+
+# Demo
+
+<div class="termy">
+    ```
+    poem = analyze("I really like when my dog does a trick!")
+
+    $ {"analysis": "A dog trick", "sentiment": true}
+
+    SentimentAnalysis(analysis='A dog trick', sentiment=True)
+
+    ```
+</div>
+
+## Instructions
+
+!!! Step-by-Step
+
+    **Necessary Imports**
+
+    ```python
+    from funcchain import chain, settings
+    from pydantic import BaseModel, Field
+    ```
+
+    **Define the Data Model**
+    Here, we define a `SentimentAnalysis` model with a description of the sentiment analysis and a boolean field indicating the sentiment.
+
+    ```python
+    class SentimentAnalysis(BaseModel):
+        analysis: str = Field(description="A description of the analysis")
+        sentiment: bool = Field(description="True for Happy, False for Sad")
+    ```
+
+    **Create the Analysis Function**
+
+    This 'analyze' function takes a string as input and is expected to return a `SentimentAnalysis` object by calling the `chain()` function from the `funcchain` library.
+
+    ```python
+    def analyze(text: str) -> SentimentAnalysis:
+        """
+        Determines the sentiment of the text.
+        """
+        return chain()
+    ```
+
+    **Execution Configuration**
+
+    In the main block, configure the global settings to set the preferred LLM, enable console streaming, and run the `analyze` function with sample text.
+
+    ```python
+    settings.llm = "ollama/openchat"
+    settings.console_stream = True
+    poem = analyze("I really like when my dog does a trick!")
+    print(poem)
+    ```
+
+    !!!Important
+        We need to note here is that `settings.llm` can be adjusted to any model mentioned in [MODELS.md](https://github.com/shroominic/funcchain/blob/main/MODELS.md) and your funcchain code will still work and `chain()` does everything in the background for you.
diff --git a/docs/features/openai_json_mode.md b/docs/features/openai_json_mode.md
new file mode 100644
index 0000000..4cd6132
--- /dev/null
+++ b/docs/features/openai_json_mode.md
@@ -0,0 +1,76 @@
+<!-- markdownlint-disable MD033 MD046 -->
+# JSON structured Output using Funcchain with OenAI
+
+!!! Example
+    See [openai_json_mode.py](https://github.com/shroominic/funcchain/blob/main/examples/openai_json_mode.py)
+
+    This example will showcase how funcchain enables OpenAI to output even the type `int` as JSON.
+
+    This example demonstrates using the funcchain library and pydantic to create a FruitSalad model, sum its contents, and output the total in a Result model as an integer.
+
+## Full Code Example
+
+<pre><code id="codeblock">
+```python
+from funcchain import chain
+from pydantic import BaseModel
+
+class FruitSalad(BaseModel):
+    bananas: int = 0
+    apples: int = 0
+
+def sum_fruits(fruit_salad: FruitSalad) -> int:
+    """
+    Sum the number of fruits in a fruit salad.
+    """
+    return chain()
+
+if __name__ == "__main__":
+    fruit_salad = FruitSalad(bananas=3, apples=5)
+    assert sum_fruits(fruit_salad) == 8
+```
+</code></pre>
+
+Instructions
+!!! Step-by-Step
+
+    **Necessary Imports**
+
+    `funcchain` for chaining functionality, and `pydantic` for the data models.
+
+    ```python
+    from funcchain import chain, settings
+    from pydantic import BaseModel
+    ```
+
+    **Defining the Data Models**
+
+    We define two Pydantic models: `FruitSalad` with integer fields for the number of bananas and apples.
+    Of course feel free to change those classes according to your needs but use of `pydantic` is required.
+
+    ```python
+    class FruitSalad(BaseModel):
+        bananas: int = 0
+        apples: int = 0
+    ```
+
+    **Summing Function**
+
+    The `sum_fruits` function is intended to take a `FruitSalad` object and use `chain()` for solving this task with an LLM. The result is returned then returned as integer.
+
+    ```python
+    def sum_fruits(fruit_salad: FruitSalad) -> int:
+        """
+        Sum the number of fruits in a fruit salad.
+        """
+        return chain()
+    ```
+
+    **Execution Block**
+
+    ```python
+    fruit_salad = FruitSalad(bananas=3, apples=5)
+    assert sum_fruits(fruit_salad) == 8
+    ```
+
+    In the primary execution section of the script, we instantiate a `FruitSalad` object with predefined quantities of bananas and apples. We then verify that the `sum_fruits` function accurately calculates the total count of fruits, which should be 8 in this case.
diff --git a/docs/features/retry_parsing.md b/docs/features/retry_parsing.md
new file mode 100644
index 0000000..95f1f4a
--- /dev/null
+++ b/docs/features/retry_parsing.md
@@ -0,0 +1,123 @@
+<!-- markdownlint-disable MD033 MD046 -->
+# Retry Parsing
+
+!!! Example
+    [pydantic_validation.py](https://github.com/shroominic/funcchain/blob/main/examples/pydantic_validation.py)
+
+    You can adapt this for your own usage.
+    This serves as an example of how to implement data validation and task creation using pydantic for data models and funcchain for processing natural language input.
+
+    The main functionality is to parse a user description, validate the task details, and create a new Task object with unique keywords and a difficulty level within a specified range.
+
+## Full Code Example
+
+<pre><code id="codeblock">
+```python
+from funcchain import chain, settings
+from pydantic import BaseModel, field_validator
+
+# settings.llm = "ollama/openchat"
+settings.console_stream = True
+
+class Task(BaseModel):
+    name: str
+    difficulty: int
+    keywords: list[str]
+
+    @field_validator("keywords")
+    def keywords_must_be_unique(cls, v: list[str]) -> list[str]:
+        if len(v) != len(set(v)):
+            raise ValueError("keywords must be unique")
+        return v
+
+    @field_validator("difficulty")
+    def difficulty_must_be_between_1_and_10(cls, v: int) -> int:
+        if v < 10 or v > 100:
+            raise ValueError("difficulty must be between 10 and 100")
+        return v
+
+def gather_infos(user_description: str) -> Task:
+    """
+    Based on the user description,
+    create a new task to put on the todo list.
+    """
+    return chain()
+
+if __name__ == "__main__":
+    task = gather_infos("cleanup the kitchen")
+    print(f"{task=}")
+```
+</code></pre>
+
+Demo
+
+<div class="termy">
+    ```python
+    User:
+    $ cleanup the kitchen
+
+    task=Task
+    name='cleanup',
+    difficulty=30,
+    keywords=['kitchen', 'cleanup']
+    ```
+
+</div>
+
+## Instructions
+
+!!! Step-by-Step
+    **Necessary Imports**
+
+    ```python
+    from funcchain import chain, settings
+    from pydantic import BaseModel, field_validator
+    ```
+
+    **Define the Task Model with Validators**
+    The `Task` class is a Pydantic model with fields: `name`, `difficulty`, and `keywords`. Validators ensure data integrity:
+
+    - `keywords_must_be_unique`: Checks that all keywords are distinct.
+    - `difficulty_must_be_between_1_and_10`: Ensures difficulty is within 10 to 100.
+
+    ```python
+    class Task(BaseModel):
+        name: str  # Task name.
+        difficulty: int  # Difficulty level (10-100).
+        keywords: list[str]  # Unique keywords.
+
+        @field_validator("keywords")
+        def keywords_must_be_unique(cls, v: list[str]) -> list[str]:
+            # Ensure keyword uniqueness.
+            if len(v) != len(set(v)):
+                raise ValueError("keywords must be unique")
+            return v
+
+        @field_validator("difficulty")
+        def difficulty_must_be_between_1_and_10(cls, v: int) -> int:
+            # Validate difficulty range.
+            if v < 10 or v > 100:
+                raise ValueError("difficulty must be between 10 and 100")
+            return v
+    ```
+
+    **Implement the Information Gathering Function**
+    The gather_infos function is designed to take a user description and use the chain function to process and validate the input, returning a new Task object.
+    Adjust the string description to match your purposes when changing the code above.
+
+    ```python
+    def gather_infos(user_description: str) -> Task:
+        """
+        Based on the user description,
+        create a new task to put on the todo list.
+        """
+        return chain()
+    ```
+
+    **Execute the Script**
+    Runs gather_infos with a sample and prints the Task.
+    ```python
+    if __name__ == "__main__":
+        task = gather_infos("cleanup the kitchen")
+        print(f"{task=}")
+    ```
diff --git a/docs/features/static_router.md b/docs/features/static_router.md
new file mode 100644
index 0000000..b46037d
--- /dev/null
+++ b/docs/features/static_router.md
@@ -0,0 +1,139 @@
+<!-- markdownlint-disable MD033 MD046 -->
+# Static Routing with Funcchain and Pydantic
+
+!!! Example
+    See [static_router.py](https://github.com/shroominic/funcchain/blob/main/examples/static_router.py)
+
+    This serves as an example of how to implement static routing using funcchain for decision-making and Enum for route selection.
+    This is a useful task for applications that need to route user requests to specific handlers based on the content of the request.
+    You can adapt this for your own usage.
+
+## Full Code Example
+
+<pre><code id="codeblock">
+```python
+from enum import Enum
+from typing import Any
+
+from funcchain import chain, settings
+from pydantic import BaseModel, Field
+
+settings.console_stream = True
+
+def handle_pdf_requests(user_query: str) -> None:
+    print("Handling PDF requests with user query: ", user_query)
+
+def handle_csv_requests(user_query: str) -> None:
+    print("Handling CSV requests with user query: ", user_query)
+
+def handle_default_requests(user_query: str) -> Any:
+    print("Handling DEFAULT requests with user query: ", user_query)
+
+class RouteChoices(str, Enum):
+    pdf = "pdf"
+    csv = "csv"
+    default = "default"
+
+class Router(BaseModel):
+    selector: RouteChoices = Field(description="Enum of the available routes.")
+
+    def invoke_route(self, user_query: str) -> Any:
+        match self.selector.value:
+            case RouteChoices.pdf:
+                return handle_pdf_requests(user_query)
+            case RouteChoices.csv:
+                return handle_csv_requests(user_query)
+            case RouteChoices.default:
+                return handle_default_requests(user_query)
+
+def route_query(user_query: str) -> Router:
+    return chain()
+
+user_query = input("Enter your query: ")
+routed_chain = route_query(user_query)
+routed_chain.invoke_route(user_query)
+```
+</code></pre>
+
+Demo
+
+<div class="termy">
+
+    ```python
+    Enter your query:
+    $ I need to process a CSV file
+
+    Handling CSV requests with user query: I need to process a CSV file
+    ```
+</div>
+
+## Instructions
+
+!!! Step-by-Step
+    We will implement a script with the functionality to take a user query, determine the type of request (PDF, CSV, or default), and invoke the appropriate handler function.
+
+    **Necessary Imports**
+
+    ```python
+    from enum import Enum
+    from typing import Any
+    from funcchain import chain, settings
+    from pydantic import BaseModel, Field
+    ```
+
+    **Define Route Handlers**
+
+    These functions are the specific handlers for different types of user queries.
+
+    ```python
+    def handle_pdf_requests(user_query: str) -> None:
+        print("Handling PDF requests with user query: ", user_query)
+
+    def handle_csv_requests(user_query: str) -> None:
+        print("Handling CSV requests with user query: ", user_query)
+
+    def handle_default_requests(user_query: str) -> Any:
+        print("Handling DEFAULT requests with user query: ", user_query)
+    ```
+
+    **Create RouteChoices Enum and Router Model**
+
+    RouteChoices is an Enum that defines the possible routes. Router is a Pydantic model that selects and invokes the appropriate handler based on the route.
+
+    ```python
+    class RouteChoices(str, Enum):
+        pdf = "pdf"
+        csv = "csv"
+        default = "default"
+
+    class Router(BaseModel):
+        selector: RouteChoices = Field(description="Enum of the available routes.")
+
+        def invoke_route(self, user_query: str) -> Any:
+            match self.selector.value:
+                case RouteChoices.pdf:
+                    return handle_pdf_requests(user_query)
+                case RouteChoices.csv:
+                    return handle_csv_requests(user_query)
+                case RouteChoices.default:
+                    return handle_default_requests(user_query)
+    ```
+
+    **Implement Routing Logic**
+
+    The route_query function is intended to determine the best route for a given user query using the `chain()` function.
+
+    ```python
+    def route_query(user_query: str) -> Router:
+        return chain()
+    ```
+
+    **Execute the Routing System**
+
+    This block runs the routing system, asking the user for a query and then processing it through the defined routing logic.
+
+    ```python
+    user_query = input("Enter your query: ")
+    routed_chain = route_query(user_query)
+    routed_chain.invoke_route(user_query)
+    ```
diff --git a/docs/features/stream.md b/docs/features/stream.md
new file mode 100644
index 0000000..bdaa4dd
--- /dev/null
+++ b/docs/features/stream.md
@@ -0,0 +1,80 @@
+<!-- markdownlint-disable MD033 MD046 -->
+# Streaming with Funcchain
+
+!!! Example
+    See [stream.py](https://github.com/shroominic/funcchain/blob/main/examples/stream.py)
+
+    This serves as an example of how to implement streaming output for text generation tasks using funcchain.
+
+## Full Code Example
+
+<pre><code id="codeblock">
+```python
+from funcchain import chain, settings
+from funcchain.backend.streaming import stream_to
+
+settings.temperature = 1
+
+def generate_story_of(topic: str) -> str:
+    """
+    Write a short story based on the topic.
+    """
+    return chain()
+
+with stream_to(print):
+    generate_story_of("a space cat")
+```
+</code></pre>
+
+Demo
+
+<div class="termy">
+    ```python
+    with stream_to(print):
+        generate_story_of("a space cat")
+
+    $ Once upon a time in a galaxy far, far away, there was a space cat named Whiskertron...
+    ```
+</div>
+
+## Instructions
+
+!!! Step-by-Step
+
+    **Necessary Imports**
+
+    ```python
+    from funcchain import chain, settings
+    from funcchain.backend.streaming import stream_to
+    ```
+
+    **Configure Settings**
+
+    The settings are configured to set the temperature, which controls the creativity of the language model's output.
+    Experiment with different values.
+
+    ```python
+    settings.temperature = 1
+    ```
+
+    **Define the Story Generation Function**
+
+    The generate_story_of function is designed to take a topic and use the chain function to generate a story.
+
+    ```python
+    def generate_story_of(topic: str) -> str:
+        """
+        Write a short story based on the topic.
+        """
+        return chain()
+    ```
+
+    **Execute the Streaming Generation**
+
+    This block uses the stream_to context manager to print the output of the story generation function as it is being streamed.
+    This is how you stream the story while it is being generated.
+
+    ```python
+    with stream_to(print):
+        generate_story_of("a space cat")
+    ```
diff --git a/docs/features/vision.md b/docs/features/vision.md
new file mode 100644
index 0000000..0fe57e0
--- /dev/null
+++ b/docs/features/vision.md
@@ -0,0 +1,112 @@
+<!-- markdownlint-disable MD033 MD046 -->
+# Image Analysis with Funcchain and Pydantic
+
+!!! Example
+    [vision.py](https://github.com/shroominic/funcchain/blob/main/examples/vision.py)
+
+    This is a useful task for applications that need to extract structured information from images.
+    You can adapt this for your own usage.
+    This serves as an example of how to implement image analysis using the funcchain library's integration with openai/gpt-4-vision-preview.
+
+## Full Code Example
+
+<pre><code id="codeblock">
+```python
+from funcchain import Image, chain, settings
+from pydantic import BaseModel, Field
+
+settings.llm = "openai/gpt-4-vision-preview"
+settings.console_stream = True
+
+class AnalysisResult(BaseModel):
+    """The result of an image analysis."""
+
+    theme: str = Field(description="The theme of the image")
+    description: str = Field(description="A description of the image")
+    objects: list[str] = Field(description="A list of objects found in the image")
+
+def analyse_image(image: Image) -> AnalysisResult:
+    """
+    Analyse the image and extract its
+    theme, description and objects.
+    """
+    return chain()
+
+example_image = Image.from_file("examples/assets/old_chinese_temple.jpg")
+
+result = analyse_image(example_image)
+
+print("Theme:", result.theme)
+print("Description:", result.description)
+for obj in result.objects:
+    print("Found this object:", obj)
+```
+</code></pre>
+
+Example Output
+
+```txt
+Theme: Traditional Japanese architecture and nature during rainfall
+Description: The image depicts a serene rainy scene with traditional Japanese buildings. The warm glow of lights from the windows contrasts with the cool tones of the rain. A cherry blossom tree in bloom adds a splash of color to the otherwise muted scene. Stone lanterns and stepping stones create a path leading to the building, while hanging lanterns with a skull motif suggest a cultural or festive significance.
+Found this object: traditional Japanese building
+Found this object: cherry blossom tree
+Found this object: rain
+Found this object: stepping stones
+Found this object: stone lantern
+Found this object: hanging lanterns with skull motif
+Found this object: glowing windows
+```
+
+## Instructions
+
+!!! Step-by-Step
+    Oiur goal is the functionality is to analyze an image and extract its theme, a description, and a list of objects found within it.
+
+    **Necessary Imports**
+
+    ```python
+    from funcchain import Image, chain, settings
+    from pydantic import BaseModel, Field
+    ```
+
+    **Configure Settings**
+
+    The settings are configured to use a specific language model capable of image analysis and to enable console streaming for immediate output.
+
+    ```python
+    settings.llm = "openai/gpt-4-vision-preview"
+    settings.console_stream = True
+    ```
+
+    **Define the AnalysisResult Model**
+
+    The AnalysisResult class models the expected output of the image analysis, including the theme, description, and objects detected in the image.
+
+    ```python
+    class AnalysisResult(BaseModel):
+        theme: str = Field(description="The theme of the image")
+        description: str = Field(description="A description of the image")
+        objects: list[str] = Field(description="A list of objects found in the image")
+    ```
+
+    **Implement the Image Analysis Function**
+
+    The analyse_image function is designed to take an Image object and use the chain function to process the image and return an AnalysisResult object for later usage (here printing).
+
+    ```python
+    def analyse_image(image: Image) -> AnalysisResult:
+        return chain()
+    ```
+
+    **Execute the Analysis**
+
+    This block runs the image analysis on an example image and prints the results when the script is executed directly.
+
+    ```python
+    example_image = Image.from_file("examples/assets/old_chinese_temple.jpg")
+    result = analyse_image(example_image)
+    print("Theme:", result.theme)
+    print("Description:", result.description)
+    for obj in result.objects:
+        print("Found this object:", obj)
+    ```
diff --git a/docs/getting-started/config.md b/docs/getting-started/config.md
new file mode 100644
index 0000000..7e55aa6
--- /dev/null
+++ b/docs/getting-started/config.md
@@ -0,0 +1,107 @@
+# Funcchain Configuration
+
+## Set Global Settings
+
+In every project you use funcchain in you can specify global settings. This is done by importing the `settings` object from the `funcchain` package.
+
+```python
+from funcchain import settings
+```
+
+You can then change the settings like here:
+
+```python
+settings.llm = "openai/gpt-4-vision-preview"
+```
+
+## Set Local Settings
+
+If you want to set local settings only applied to a specific funcchain function you
+can set them using the SettingsOverride class.
+
+```python
+from funcchain import chain
+from funcchain.settings import SettingsOverride
+
+def analyse_output(
+    goal: str
+    output: str,
+    settings: SettingsOverride = {},
+) -> OutputAnalysis:
+    """
+    Analyse the output and determine if the goal is reached.
+    """
+    return chain(settings_override=settings)
+
+result = analyse_output(
+    "healthy outpout",
+    "Hello World!",
+    settings_override={"llm": "openai/gpt-4-vision-preview"},
+)
+```
+
+The `settings_override` argument is a `SettingsOverride` object which is a dict-like object that can be used to override the global settings.
+You will get suggestions from your IDE on what settings you can override due to the type hints.
+
+## Settings Class Overview
+
+The configuration settings for Funcchain are encapsulated within the `FuncchainSettings` class. This class inherits from Pydantic's `BaseSettings`.
+
+`funcchain/backend/settings.py`
+
+```python
+class FuncchainSettings(BaseSettings):
+    ...
+```
+
+## Setting Descriptions
+
+### General Settings
+
+- `debug: bool = True`
+  Enables or disables debug mode.
+
+- `llm: BaseChatModel | str = "openai/gpt-3.5-turbo-1106"`
+  Defines the language learning model to be used. It can be a type of `BaseChatModel` or `str` (model_name).
+  Checkout the [MODELS.md](https://github.com/shroominic/funcchain/blob/main/MODELS.md) file for a list and schema of supported models.
+
+- `console_stream: bool = False`
+  Enables or disables token streaming to the console.
+
+- `system_prompt: str = ""`
+  System prompt used as first message in the chat to instruct the model.
+
+- `retry_parse: int = 3`
+  Number of retries for auto fixing pydantic validation errors.
+
+- `retry_parse_sleep: float = 0.1`
+  Sleep time between retries.
+
+### Model Keyword Arguments
+
+- `verbose: bool = False`
+  Enables or disables verbose logging for the model.
+
+- `streaming: bool = False`
+  Enables or disables streaming for the model.
+
+- `max_tokens: int = 2048`
+  Specifies the maximum number of output tokens for chat models.
+
+- `temperature: float = 0.1`
+  Controls the randomness in the model's output.
+
+### LlamaCPP Keyword Arguments
+
+- `context_lenght: int = 8196`
+  Specifies the context length for the LlamaCPP model.
+
+- `n_gpu_layers: int = 42`
+  Specifies the number of GPU layers for the LlamaCPP model.
+  Choose 0 for CPU only.
+
+- `keep_loaded: bool = False`
+  Determines whether to keep the LlamaCPP model loaded in memory.
+
+- `local_models_path: str = "./.models"`
+  Specifies the local path for storing models.
diff --git a/docs/getting-started/demos.md b/docs/getting-started/demos.md
new file mode 100644
index 0000000..b49081f
--- /dev/null
+++ b/docs/getting-started/demos.md
@@ -0,0 +1,383 @@
+<!-- markdownlint-disable MD033 MD046 -->
+# Demos
+
+## Simple Structured Output
+
+```python
+from funcchain import chain
+from pydantic import BaseModel
+
+# define your output shape
+class Recipe(BaseModel):
+    ingredients: list[str]
+    instructions: list[str]
+    duration: int
+
+# write prompts utilising all native python features
+def generate_recipe(topic: str) -> Recipe:
+    """
+    Generate a recipe for a given topic.
+    """
+    return chain() # <- this is doing all the magic
+
+# generate llm response
+recipe = generate_recipe("christmas dinner")
+
+# recipe is automatically converted as pydantic model
+print(recipe.ingredients)
+```
+
+!!! Step-by-Step
+
+    ```python
+    class Recipe(BaseModel):
+        ingredients: list[str]
+        instructions: list[str]
+        duration: int
+    ```
+
+    A Recipe class is defined, inheriting from BaseModel (pydantic library). This class
+    specifies the structure of the output data, which you can customize.
+    In the example it includes a list of ingredients, a list of instructions, and an integer
+    representing the duration
+
+    ```python
+    def generate_recipe(topic: str) -> Recipe:
+        """
+        Generate a recipe for a given topic.
+        """
+        return chain()
+    ```
+    In this example the `generate_recipe` function takes a topic string and returns a `Recipe` instance for that topic.
+    # Understanding chain() Functionality
+    Chain() is the backend magic of funcchain. Behind the szenes it creates the llm function from the function signature and docstring.
+    Meaning it will turn your function into usable LLM input.
+
+    The `chain()` function is the core component of funcchain. It takes the docstring, input arguments and return type of the function and compiles everything into a langchain runnable . It then executes the prompt with your input arguments if you call the function and returns the parsed result.
+
+    # Print your response
+    ```python
+    recipe = generate_recipe("christmas dinner")
+
+    print(recipe.ingredients)
+    ```
+
+### Demo
+
+<div class="termy">
+    ```
+    $ print(generate_recipe("christmas dinner").ingredients
+
+    ['turkey', 'potatoes', 'carrots', 'brussels sprouts', 'cranberry sauce', 'gravy','butter', 'salt', 'pepper', 'rosemary']
+
+    ```
+</div>
+
+## Complex Structured Output
+
+([full code](../index.md#complex-example))
+
+!!! Step-by-Step
+    **Nececary Imports**
+
+    ```python
+    from pydantic import BaseModel, Field
+    from funcchain import chain
+    ```
+
+    **Data Structures and Model Definitions**
+    ```python
+    # define nested models
+    class Item(BaseModel):
+        name: str = Field(description="Name of the item")
+        description: str = Field(description="Description of the item")
+        keywords: list[str] = Field(description="Keywords for the item")
+
+    class ShoppingList(BaseModel):
+        items: list[Item]
+        store: str = Field(description="The store to buy the items from")
+
+    class TodoList(BaseModel):
+        todos: list[Item]
+        urgency: int = Field(description="The urgency of all tasks (1-10)")
+
+    ```
+
+    In this example, we create a more complex data structure with nested models.
+    The Item model defines the attributes of a single item, such as its name, description, and keywords.
+    ShoppingList and TodoList models define the attributes of a shopping list and a todo list, utilizing the Item model as a nested model.
+
+    You can define new Pydantic models or extend existing ones by adding additional fields or methods. The general approach is to identify the data attributes relevant to your application and create corresponding model classes with these attributes.
+
+    The Field descriptions serve as prompts for the language model to understand the data structure.
+    Additionally you can include a docstring for each model class to provide further information to the LLM.
+
+    !!! Important
+        Everything including class names, argument names, doc string and field descriptions are part of the prompt and can be optimised using prompting techniques.
+
+
+    **Union types**
+    ```python
+    # support for union types
+    def extract_list(user_input: str) -> TodoList | ShoppingList:
+        """
+        The user input is either a shopping List or a todo list.
+        """
+        return chain()
+    ```
+    The extract_list function uses the chain function to analyze user input and return a structured list:
+    In the example:
+    - Union Types: It can return either a TodoList or a ShoppingList, depending on the input.
+    - Usage of chain: chain simplifies the process, deciding the type of list to return.
+
+    For your application this is going to serve as a router to route between your previously defined models.
+
+    **Get a list from the user** (here as "lst")
+    ```python
+    # the model will choose the output type automatically
+    lst = extract_list(
+        input("Enter your list: ")
+    )
+
+    ```
+
+    **Define your custom handlers**
+
+    And now its time to define what happens with the result.
+    You can then use the lst (list) variable to access the attributes of the list.
+    It utilizes pattern matching to determine the type of list and print the corresponding output.
+
+    ```python
+    # custom handler based on type
+    match lst:
+        case ShoppingList(items=items, store=store):
+            print("Here is your Shopping List: ")
+            for item in items:
+                print(f"{item.name}: {item.description}")
+            print(f"You need to go to: {store}")
+
+        case TodoList(todos=todos, urgency=urgency):
+            print("Here is your Todo List: ")
+            for item in todos:
+                print(f"{item.name}: {item.description}")
+            print(f"Urgency: {urgency}")
+
+    ```
+
+<div class="termy">
+    ```
+    lst = extract_list(
+        input("Enter your list: ")
+    )
+
+    User:
+    $ Complete project report, Prepare for meeting, Respond to emails;
+    $ if I don't respond I will be fired
+
+    Output:
+    $ ...............
+    Here is your Todo List:
+    Complete your buisness tasks: project report, Prepare for meeting, Respond to emails
+    Urgency: 10
+    //add real output
+    ```
+</div>
+
+## Vision Models
+
+```python
+from PIL import Image
+from pydantic import BaseModel, Field
+from funcchain import chain, settings
+
+# set global llm using model identifiers (see MODELS.md)
+settings.llm = "openai/gpt-4-vision-preview"
+
+# everything defined is part of the prompt
+class AnalysisResult(BaseModel):
+    """The result of an image analysis."""
+
+    theme: str = Field(description="The theme of the image")
+    description: str = Field(description="A description of the image")
+    objects: list[str] = Field(description="A list of objects found in the image")
+
+# easy use of images as input with structured output
+def analyse_image(image: Image.Image) -> AnalysisResult:
+    """
+    Analyse the image and extract its
+    theme, description and objects.
+    """
+    return chain()
+
+result = analyse_image(Image.open("examples/assets/old_chinese_temple.jpg"))
+
+print("Theme:", result.theme)
+print("Description:", result.description)
+for obj in result.objects:
+    print("Found this object:", obj)
+```
+
+!!! Step-by-Step
+    **Nececary Imports**
+
+    ```python
+    from PIL import Image
+    from pydantic import BaseModel, Field
+    from funcchain import chain, settings
+    ```
+
+    **Define Model**
+    set global llm using model identifiers see [MODELS.md](https://github.com/shroominic/funcchain/blob/main/MODELS.md)
+    ```python
+    settings.llm = "openai/gpt-4-vision-preview"
+    ```
+    Funcchains modularity allows for all kinds of models including local models
+
+
+    **Analize Image**
+    Get structured output from an image in our example `theme`, `description` and `objects`
+    ```python
+    # everything defined is part of the prompt
+    class AnalysisResult(BaseModel):
+        """The result of an image analysis."""
+
+        theme: str = Field(description="The theme of the image")
+        description: str = Field(description="A description of the image")
+        objects: list[str] = Field(description="A list of objects found in the image")
+    ```
+    Adjsut the fields as needed. Play around with the example, feel free to experiment.
+    You can customize the analysis by modifying the fields of the `AnalysisResult` model.
+
+    **Function to start the analysis**
+
+    ```python
+    # easy use of images as input with structured output
+    def analyse_image(image: Image.Image) -> AnalysisResult:
+        """
+        Analyse the image and extract its
+        theme, description and objects.
+        """
+        return chain()
+    ```
+    Chain() will handle the image input.
+    We here define again the fields from before `theme`, `description` and `objects`
+
+    give an image as input `image: Image.Image`
+
+    Its important that the fields defined earlier are mentioned here with the prompt
+    `Analyse the image and extract its`...
+
+<div class="termy">
+    ```
+    result = analyse_image(
+        Image.from_file("examples/assets/old_chinese_temple.jpg")
+    )
+
+    print("Theme:", result.theme)
+    print("Description:", result.description)
+    for obj in result.objects:
+        print("Found this object:", obj)
+
+    $ ..................
+
+    Theme: Traditional Japanese architecture and nature during rainfall
+    Description: The image depicts a serene rainy scene at night in a traditional Japanese setting. A two-story wooden building with glowing green lanterns is the focal point, surrounded by a cobblestone path, a blooming pink cherry blossom tree, and a stone lantern partially obscured by the rain. The atmosphere is tranquil and slightly mysterious.
+    Found this object: building
+    Found this object: green lanterns
+    Found this object: cherry blossom tree
+    Found this object: rain
+    Found this object: cobblestone path
+    Found this object: stone lantern
+    Found this object: wooden structure
+
+    ```
+
+</div>
+
+## Seamless local model support
+
+Yes you can use funcchain without internet connection.
+Start heating up your device.
+
+```python
+from pydantic import BaseModel, Field
+from funcchain import chain, settings
+
+# auto-download the model from huggingface
+settings.llm = "ollama/openchat"
+
+class SentimentAnalysis(BaseModel):
+    analysis: str
+    sentiment: bool = Field(description="True for Happy, False for Sad")
+
+def analyze(text: str) -> SentimentAnalysis:
+    """
+    Determines the sentiment of the text.
+    """
+    return chain()
+
+# generates using the local model
+poem = analyze("I really like when my dog does a trick!")
+
+# promised structured output (for local models!)
+print(poem.analysis)
+```
+
+!!! Step-by-Step
+    **Nececary Imports**
+
+    ```python
+    from pydantic import BaseModel, Field
+    from funcchain import chain, settings
+    ```
+
+    **Choose and enjoy**
+    ```python
+    # auto-download the model from huggingface
+    settings.llm = "llamacpp/openchat-3.5-0106:Q3_K_M"
+    ```
+
+    **Structured output definition**
+    With an input `str` a description can be added to return a boolean `true` or `false`
+    ```python
+    class SentimentAnalysis(BaseModel):
+        analysis: str
+        sentiment: bool = Field(description="True for Happy, False for Sad")
+    ```
+    Experiment yourself by adding different descriptions for the true and false case.
+
+    **Use `chain()` to analize**
+    Defines with natural language the analysis
+    ```python
+    def analyze(text: str) -> SentimentAnalysis:
+    """
+    Determines the sentiment of the text.
+    """
+    return chain()
+    ```
+    For your own usage adjust the str. Be precise and reference your classes again.
+
+    **Generate and print the output**
+    ```python
+    **Use the analyze function and print output**
+
+    # generates using the local model
+    poem = analyze("I really like when my dog does a trick!")
+
+    # promised structured output (for local models!)
+    print(poem.analysis)
+    ```
+
+!!! Useful
+
+    For seeing whats going on inside the LLM you should try the Langsmith integration:
+    Add those lines to .env and funcchain will use langsmith tracing.
+
+    ```bash
+    LANGCHAIN_TRACING_V2=true
+    LANGCHAIN_API_KEY="ls__api_key"
+    LANGCHAIN_PROJECT="PROJECT_NAME"
+    ```
+
+    Langsmith is used to understand what happens under the hood of your LLM generations.
+    When multiple LLM calls are used for an output they can be logged for debugging.
diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md
index e69de29..d6c3b9f 100644
--- a/docs/getting-started/installation.md
+++ b/docs/getting-started/installation.md
@@ -0,0 +1,37 @@
+<!-- markdownlint-disable MD033 MD046 -->
+# Installation
+
+<div class="termy">
+
+```bash
+$ pip install funcchain
+---> 100%
+```
+
+</div>
+
+For additional features you can also install:
+
+- `funcchain` (langchain_core + openai)
+- `funcchain[ollama]` (you need to install this [ollama fork](https://github.com/ollama/ollama/pull/1606) for grammar support)
+- `funcchain[llamacpp]` (using llama-cpp-python)
+- `funcchain[pillow]` (for vision model features)
+- `funcchain[all]` (includes everything)
+
+To enter this in your terminal you need to write it like this:
+`pip install "funcchain[all]"`
+
+## Environment
+
+Make sure to have an OpenAI API key in your environment variables. For example,
+
+<div class="termy">
+
+```bash
+export OPENAI_API_KEY="sk-rnUPxirSQ4bmz2He4qyaiKShdXJcsOsTg"
+```
+
+</div>
+
+But you can also create a `.env` file in your current working directory and include the key there.
+The dot env file will load automatically.
diff --git a/docs/getting-started/models.md b/docs/getting-started/models.md
new file mode 100644
index 0000000..f9c21a4
--- /dev/null
+++ b/docs/getting-started/models.md
@@ -0,0 +1,52 @@
+# Supported Models
+
+## LangChain Chat Models
+
+You can set the `settings.llm` with any LangChain ChatModel.
+
+```python
+from funcchain import settings
+from langchain_openai.chat_models import AzureChatOpenAI
+
+settings.llm = AzureChatOpenAI(...)
+```
+
+## String Model Identifiers
+
+You can also set the `settings.llm` with a string identifier of a ChatModel including local models.
+
+```python
+from funcchain import settings
+
+settings.llm = "llamacpp/openchat-3.5-1210"
+
+# ...
+```
+
+### Schema
+
+`<provider>/<model_name>:<optional_label>`
+
+### Providers
+
+- `openai`: OpenAI Chat Models
+- `llamacpp`: Run local models directly using llamacpp (alias: `thebloke`, `gguf`)
+- `ollama`: Run local models through Ollama (wrapper for llamacpp)
+- `azure`: Azure Chat Models
+- `anthropic`: Anthropic Chat Models
+- `google`: Google Chat Models
+
+### Examples
+
+- `openai/gpt-3.5-turbo`: ChatGPT Classic
+- `openai/gpt-4-1106-preview`: GPT-4-Turbo
+- `ollama/openchat`: OpenChat3.5-1210
+- `ollama/openhermes2.5-mistral`: OpenHermes 2.5
+- `llamacpp/openchat-3.5-1210`: OpenChat3.5-1210
+- `TheBloke/Nous-Hermes-2-SOLAR-10.7B-GGUF`: alias for `llamacpp/...`
+- `TheBloke/openchat-3.5-0106-GGUF:Q3_K_L`: with Q label
+
+### additional notes
+
+Checkout the file `src/funcchain/model/defaults.py` for the code that parses the string identifier.
+Feel free to create a PR to add more models to the defaults. Or tell me how wrong I am and create a better system.
diff --git a/docs/getting-started/usage.md b/docs/getting-started/usage.md
index e69de29..ca67c01 100644
--- a/docs/getting-started/usage.md
+++ b/docs/getting-started/usage.md
@@ -0,0 +1,44 @@
+# Usage
+
+To write your cognitive architectures with the funcchain syntax you need to import the `chain` function from the `funcchain` package.
+
+```python
+from funcchain import chain
+```
+
+This chain function it the core component of funcchain.
+It takes the docstring, input arguments and return type of the function and compiles everything into a langchain prompt.
+It then executes the prompt with your input arguments if you call the function and returns the parsed result.
+
+```python
+def hello(lang1: str, lang2: str, lang3: str) -> list[str]:
+    """
+    Say hello in these 3 languages.
+    """
+    return chain()
+
+hello("German", "French", "Spanish")
+```
+
+The underlying chat in the background will look like this:
+
+```html
+<HumanMessage>
+LANG1: German
+LANG2: French
+LANG3: Spanish
+
+Say hello in these 3 languages.
+</HumanMessage>
+
+<AIMessage>
+{
+    "value": ["Hallo", "Bonjour", "Hola"]
+}
+</AIMessage>
+```
+
+Funcchain is handling all the redundant and complicated structuring of your prompts so you can focus on the important parts of your code.
+
+All input arguments are automatically added to the prompt so the model has context about what you insert.
+The return type is used to force the model using a json-schema to always return a json object in the desired shape.
diff --git a/docs/index.md b/docs/index.md
index dd00a0a..7a289dc 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,66 +1,127 @@
-# Getting Started
+<!-- markdownlint-disable MD033 MD046 -->
+# Introduction
 
 [![Version](https://badge.fury.io/py/funcchain.svg)](https://badge.fury.io/py/funcchain)
 [![code-check](https://github.com/shroominic/funcchain/actions/workflows/code-check.yml/badge.svg)](https://github.com/shroominic/funcchain/actions/workflows/code-check.yml)
 ![Downloads](https://img.shields.io/pypi/dm/funcchain)
-![License](https://img.shields.io/pypi/l/funcchain)
+[![Discord](https://img.shields.io/discord/1192334452110659664?label=discord)](https://discord.gg/TrwWWMXdtR)
 ![PyVersion](https://img.shields.io/pypi/pyversions/funcchain)
 
-## Welcome
-
-funcchain is the *most pythonic* way of writing cognitive systems. Leveraging pydantic models as output schemas combined with langchain in the backend allows for a seamless integration of llms into your apps.
-It works perfect with OpenAI Functions and soon with other models using JSONFormer.
-
-Key features:
-
-- increased productivity
-- prompts as Python functions
-- pydantic models as output schemas
-- langchain schemas in the backend
-- fstrings or jinja templates for prompts
-- fully utilises OpenAI Functions
-- minimalistic and easy to use
+`funcchain` is the _most pythonic_ way of writing cognitive systems. Leveraging pydantic models as output schemas combined with langchain in the backend allows for a seamless integration of llms into your apps.
+It utilizes perfect with OpenAI Functions or LlamaCpp grammars (json-schema-mode) for efficient structured output.
+In the backend it compiles the funcchain syntax into langchain runnables so you can easily invoke, stream or batch process your pipelines.
 
 ## Installation
 
-```bash
-pip install funcchain
-```
-
-Make sure to have an OpenAI API key in your environment variables. For example,
+<div class="termy">
 
 ```bash
-export OPENAI_API_KEY=sk-**********
+$ pip install funcchain
+---> 100%
 ```
 
+</div>
+
+!!! Important
+    Make sure to have an OpenAI API key in your environment variables:
+
+    ```bash
+    export OPENAI_API_KEY="sk-rnUPxirSQ4bmz2He4qyaiKShdXJcsOsTg"
+    ```
+    (not needed for local models of course)
+
+## Key Features
+
+- pythonic
+- easy swap between openai or local models
+- dynamic output types (pydantic models, or primitives)
+- vision llm support
+- langchain_core as backend
+- jinja templating for prompts
+- reliable structured output
+- auto retry parsing
+- langsmith support
+- sync, async, streaming, parallel, fallbacks
+- gguf download from huggingface
+- type hints for all functions and mypy support
+- chat router component
+- composable with langchain LCEL
+- easy error handling
+- enums and literal support
+- custom parsing types
+
 ## Usage
 
 ```python
 from funcchain import chain
 
 def hello() -> str:
-    """Say hello in 3 languages"""
+    """
+    Say hello in 3 languages.
+    """
     return chain()
 
-print(hello()) # -> Hello, Bonjour, Hola
+print(hello()) # -> "Hallo, Bonjour, Hola"
 ```
 
 This will call the OpenAI API and return the response.
+Its using OpenAI since we did not specify a model and it will use the default model from the global settings of funcchain.
 
-The `chain` function extracts the docstring as the prompt and the return type for parsing the response.
+The underlying chat will look like this:
 
-## Contributing
+- User: "Say hello in 3 languages."
+- AI: "Hallo, Bonjour, Hola"
 
-To contribute, clone the repo and run:
+The `chain()` function does all the magic in the background. It extracts the docstring, input arguments and return type of the function and compiles everything into a langchain prompt.
 
-```bash
-./dev_setup.sh
-```
+## Complex Example
 
-This will install pre-commit hooks, dependencies and set up the environment.
+Here a more complex example of what is possible. We create nested pydantic models and use union types to let the model choose the best shape to parse your given list into.
 
-To activate the virtual environment managed by poetry, you can use the following command:
+```python
+from pydantic import BaseModel, Field
+from funcchain import chain
 
-```bash
-poetry shell
+# define nested models
+class Item(BaseModel):
+    name: str = Field(description="Name of the item")
+    description: str = Field(description="Description of the item")
+    keywords: list[str] = Field(description="Keywords for the item")
+
+class ShoppingList(BaseModel):
+    items: list[Item]
+    store: str = Field(description="The store to buy the items from")
+
+class TodoList(BaseModel):
+    todos: list[Item]
+    urgency: int = Field(description="The urgency of all tasks (1-10)")
+
+# support for union types
+def extract_list(user_input: str) -> TodoList | ShoppingList:
+    """
+    The user input is either a shopping List or a todo list.
+    """
+    return chain()
+
+# the model will choose the output type automatically
+lst = extract_list(
+    input("Enter your list: ")
+)
+
+# custom handler based on type
+match lst:
+    case ShoppingList(items=items, store=store):
+        print("Here is your Shopping List: ")
+        for item in items:
+            print(f"{item.name}: {item.description}")
+        print(f"You need to go to: {store}")
+
+    case TodoList(todos=todos, urgency=urgency):
+        print("Here is your Todo List: ")
+        for item in todos:
+            print(f"{item.name}: {item.description}")
+        print(f"Urgency: {urgency}")
 ```
+
+The pydantic models force the language model to output only in the specified format. The actual ouput is a json string which is parsed into the pydantic model. This allows for a seamless integration of the language model into your app.
+The union type selection works by listing every pydantic model as seperate function call to the model. So the LLM will select the best fitting pydantic model based on the prompt and inputs.
diff --git a/docs/js/custom.js b/docs/js/custom.js
new file mode 100644
index 0000000..862ad25
--- /dev/null
+++ b/docs/js/custom.js
@@ -0,0 +1,147 @@
+function setupTermynal() {
+  document.querySelectorAll(".use-termynal").forEach((node) => {
+    node.style.display = "block";
+    new Termynal(node, {
+      lineDelay: 500,
+    });
+  });
+  const progressLiteralStart = "---> 100%";
+  const promptLiteralStart = "$ ";
+  const customPromptLiteralStart = "# ";
+  const termynalActivateClass = "termy";
+  let termynals = [];
+
+  function createTermynals() {
+    document
+      .querySelectorAll(`.${termynalActivateClass} .highlight`)
+      .forEach((node) => {
+        const text = node.textContent;
+        const lines = text.split("\n");
+        const useLines = [];
+        let buffer = [];
+        function saveBuffer() {
+          if (buffer.length) {
+            let isBlankSpace = true;
+            buffer.forEach((line) => {
+              if (line) {
+                isBlankSpace = false;
+              }
+            });
+            dataValue = {};
+            if (isBlankSpace) {
+              dataValue["delay"] = 0;
+            }
+            if (buffer[buffer.length - 1] === "") {
+              // A last single <br> won't have effect
+              // so put an additional one
+              buffer.push("");
+            }
+            const bufferValue = buffer.join("<br>");
+            dataValue["value"] = bufferValue;
+            useLines.push(dataValue);
+            buffer = [];
+          }
+        }
+        for (let line of lines) {
+          if (line === progressLiteralStart) {
+            saveBuffer();
+            useLines.push({
+              type: "progress",
+            });
+          } else if (line.startsWith(promptLiteralStart)) {
+            saveBuffer();
+            const value = line.replace(promptLiteralStart, "").trimEnd();
+            useLines.push({
+              type: "input",
+              value: value,
+            });
+          } else if (line.startsWith("// ")) {
+            saveBuffer();
+            const value = "💬 " + line.replace("// ", "").trimEnd();
+            useLines.push({
+              value: value,
+              class: "termynal-comment",
+              delay: 0,
+            });
+          } else if (line.startsWith(customPromptLiteralStart)) {
+            saveBuffer();
+            const promptStart = line.indexOf(promptLiteralStart);
+            if (promptStart === -1) {
+              console.error("Custom prompt found but no end delimiter", line);
+            }
+            const prompt = line
+              .slice(0, promptStart)
+              .replace(customPromptLiteralStart, "");
+            let value = line.slice(promptStart + promptLiteralStart.length);
+            useLines.push({
+              type: "input",
+              value: value,
+              prompt: prompt,
+            });
+          } else {
+            buffer.push(line);
+          }
+        }
+        saveBuffer();
+        const div = document.createElement("div");
+        node.replaceWith(div);
+        const termynal = new Termynal(div, {
+          lineData: useLines,
+          noInit: true,
+          lineDelay: 500,
+        });
+        termynals.push(termynal);
+      });
+  }
+
+  function loadVisibleTermynals() {
+    termynals = termynals.filter((termynal) => {
+      if (termynal.container.getBoundingClientRect().top - innerHeight <= 0) {
+        termynal.init();
+        return false;
+      }
+      return true;
+    });
+  }
+  window.addEventListener("scroll", loadVisibleTermynals);
+  createTermynals();
+  loadVisibleTermynals();
+}
+
+function addCopyButtons() {
+  document.querySelectorAll("pre code").forEach(function (codeBlock) {
+    var button = document.createElement("button");
+    button.className = "copy-code-button";
+    button.type = "button";
+    button.innerText = "Copy";
+    button.addEventListener("click", function () {
+      navigator.clipboard.writeText(codeBlock.innerText).then(
+        function () {
+          /* clipboard successfully set */
+          button.innerText = "Copied!";
+          setTimeout(function () {
+            button.innerText = "Copy";
+          }, 2000);
+        },
+        function () {
+          /* clipboard write failed */
+          button.innerText = "Failed to copy";
+        },
+      );
+    });
+
+    var pre = codeBlock.parentNode;
+    if (pre.parentNode.classList.contains("highlight")) {
+      var highlight = pre.parentNode;
+      highlight.parentNode.insertBefore(button, highlight);
+    }
+  });
+}
+
+// Call addCopyButtons in your main function or after the DOM content is fully loaded
+async function main() {
+  setupTermynal();
+  addCopyButtons(); // Add this line to your existing main function
+}
+
+main();
diff --git a/docs/js/termynal.js b/docs/js/termynal.js
new file mode 100644
index 0000000..6c54353
--- /dev/null
+++ b/docs/js/termynal.js
@@ -0,0 +1,282 @@
+/**
+ * termynal.js
+ * A lightweight, modern and extensible animated terminal window, using
+ * async/await.
+ *
+ * @author Ines Montani <ines@ines.io>
+ * @version 0.0.1
+ * @license MIT
+ */
+
+"use strict";
+
+/** Generate a terminal widget. */
+class Termynal {
+  /**
+   * Construct the widget's settings.
+   * @param {(string|Node)=} container - Query selector or container element.
+   * @param {Object=} options - Custom settings.
+   * @param {string} options.prefix - Prefix to use for data attributes.
+   * @param {number} options.startDelay - Delay before animation, in ms.
+   * @param {number} options.typeDelay - Delay between each typed character, in ms.
+   * @param {number} options.lineDelay - Delay between each line, in ms.
+   * @param {number} options.progressLength - Number of characters displayed as progress bar.
+   * @param {string} options.progressChar – Character to use for progress bar, defaults to █.
+   * @param {number} options.progressPercent - Max percent of progress.
+   * @param {string} options.cursor – Character to use for cursor, defaults to ▋.
+   * @param {Object[]} lineData - Dynamically loaded line data objects.
+   * @param {boolean} options.noInit - Don't initialise the animation.
+   */
+  constructor(container = "#termynal", options = {}) {
+    this.container =
+      typeof container === "string"
+        ? document.querySelector(container)
+        : container;
+    this.pfx = `data-${options.prefix || "ty"}`;
+    this.originalStartDelay = this.startDelay =
+      options.startDelay ||
+      parseFloat(this.container.getAttribute(`${this.pfx}-startDelay`)) ||
+      600;
+    this.originalTypeDelay = this.typeDelay =
+      options.typeDelay ||
+      parseFloat(this.container.getAttribute(`${this.pfx}-typeDelay`)) ||
+      90;
+    this.originalLineDelay = this.lineDelay =
+      options.lineDelay ||
+      parseFloat(this.container.getAttribute(`${this.pfx}-lineDelay`)) ||
+      1500;
+    this.progressLength =
+      options.progressLength ||
+      parseFloat(this.container.getAttribute(`${this.pfx}-progressLength`)) ||
+      40;
+    this.progressChar =
+      options.progressChar ||
+      this.container.getAttribute(`${this.pfx}-progressChar`) ||
+      "█";
+    this.progressPercent =
+      options.progressPercent ||
+      parseFloat(this.container.getAttribute(`${this.pfx}-progressPercent`)) ||
+      100;
+    this.cursor =
+      options.cursor ||
+      this.container.getAttribute(`${this.pfx}-cursor`) ||
+      "▋";
+    this.lineData = this.lineDataToElements(options.lineData || []);
+    this.loadLines();
+    if (!options.noInit) this.init();
+  }
+
+  loadLines() {
+    // Load all the lines and create the container so that the size is fixed
+    // Otherwise it would be changing and the user viewport would be constantly
+    // moving as she/he scrolls
+    const finish = this.generateFinish();
+    finish.style.visibility = "hidden";
+    this.container.appendChild(finish);
+    // Appends dynamically loaded lines to existing line elements.
+    this.lines = [...this.container.querySelectorAll(`[${this.pfx}]`)].concat(
+      this.lineData,
+    );
+    for (let line of this.lines) {
+      line.style.visibility = "hidden";
+      this.container.appendChild(line);
+    }
+    const restart = this.generateRestart();
+    restart.style.visibility = "hidden";
+    this.container.appendChild(restart);
+    this.container.setAttribute("data-termynal", "");
+  }
+
+  /**
+   * Initialise the widget, get lines, clear container and start animation.
+   */
+  init() {
+    /**
+     * Calculates width and height of Termynal container.
+     * If container is empty and lines are dynamically loaded, defaults to browser `auto` or CSS.
+     */
+    const containerStyle = getComputedStyle(this.container);
+    this.container.style.width =
+      containerStyle.width !== "0px" ? containerStyle.width : undefined;
+    this.container.style.minHeight =
+      containerStyle.height !== "0px" ? containerStyle.height : undefined;
+
+    this.container.setAttribute("data-termynal", "");
+    this.container.innerHTML = "";
+    for (let line of this.lines) {
+      line.style.visibility = "visible";
+    }
+    this.start();
+  }
+
+  /**
+   * Start the animation and rener the lines depending on their data attributes.
+   */
+  async start() {
+    this.addFinish();
+    await this._wait(this.startDelay);
+
+    for (let line of this.lines) {
+      const type = line.getAttribute(this.pfx);
+      const delay = line.getAttribute(`${this.pfx}-delay`) || this.lineDelay;
+
+      if (type == "input") {
+        line.setAttribute(`${this.pfx}-cursor`, this.cursor);
+        await this.type(line);
+        await this._wait(delay);
+      } else if (type == "progress") {
+        await this.progress(line);
+        await this._wait(delay);
+      } else {
+        this.container.appendChild(line);
+        await this._wait(delay);
+      }
+
+      line.removeAttribute(`${this.pfx}-cursor`);
+    }
+    this.addRestart();
+    this.finishElement.style.visibility = "hidden";
+    this.lineDelay = this.originalLineDelay;
+    this.typeDelay = this.originalTypeDelay;
+    this.startDelay = this.originalStartDelay;
+  }
+
+  generateRestart() {
+    const restart = document.createElement("a");
+    restart.onclick = (e) => {
+      e.preventDefault();
+      this.container.innerHTML = "";
+      this.init();
+    };
+    restart.href = "#";
+    restart.setAttribute("data-terminal-control", "");
+    restart.innerHTML = "restart ↻";
+    return restart;
+  }
+
+  generateFinish() {
+    const finish = document.createElement("a");
+    finish.onclick = (e) => {
+      e.preventDefault();
+      this.lineDelay = 0;
+      this.typeDelay = 0;
+      this.startDelay = 0;
+    };
+    finish.href = "#";
+    finish.setAttribute("data-terminal-control", "");
+    finish.innerHTML = "fast →";
+    this.finishElement = finish;
+    return finish;
+  }
+
+  addRestart() {
+    const restart = this.generateRestart();
+    this.container.appendChild(restart);
+  }
+
+  addFinish() {
+    const finish = this.generateFinish();
+    this.container.appendChild(finish);
+  }
+
+  /**
+   * Animate a typed line.
+   * @param {Node} line - The line element to render.
+   */
+  async type(line) {
+    const chars = [...line.textContent];
+    line.textContent = "";
+    this.container.appendChild(line);
+
+    for (let char of chars) {
+      const delay =
+        line.getAttribute(`${this.pfx}-typeDelay`) || this.typeDelay;
+      await this._wait(delay);
+      line.textContent += char;
+    }
+  }
+
+  /**
+   * Animate a progress bar.
+   * @param {Node} line - The line element to render.
+   */
+  async progress(line) {
+    const progressLength =
+      line.getAttribute(`${this.pfx}-progressLength`) || this.progressLength;
+    const progressChar =
+      line.getAttribute(`${this.pfx}-progressChar`) || this.progressChar;
+    const chars = progressChar.repeat(progressLength);
+    const progressPercent =
+      line.getAttribute(`${this.pfx}-progressPercent`) || this.progressPercent;
+    line.textContent = "";
+    this.container.appendChild(line);
+
+    for (let i = 1; i < chars.length + 1; i++) {
+      await this._wait(this.typeDelay);
+      const percent = Math.round((i / chars.length) * 100);
+      line.textContent = `${chars.slice(0, i)} ${percent}%`;
+      if (percent > progressPercent) {
+        break;
+      }
+    }
+  }
+
+  /**
+   * Helper function for animation delays, called with `await`.
+   * @param {number} time - Timeout, in ms.
+   */
+  _wait(time) {
+    return new Promise((resolve) => setTimeout(resolve, time));
+  }
+
+  /**
+   * Converts line data objects into line elements.
+   *
+   * @param {Object[]} lineData - Dynamically loaded lines.
+   * @param {Object} line - Line data object.
+   * @returns {Element[]} - Array of line elements.
+   */
+  lineDataToElements(lineData) {
+    return lineData.map((line) => {
+      let div = document.createElement("div");
+      div.innerHTML = `<span ${this._attributes(line)}>${
+        line.value || ""
+      }</span>`;
+
+      return div.firstElementChild;
+    });
+  }
+
+  /**
+   * Helper function for generating attributes string.
+   *
+   * @param {Object} line - Line data object.
+   * @returns {string} - String of attributes.
+   */
+  _attributes(line) {
+    let attrs = "";
+    for (let prop in line) {
+      // Custom add class
+      if (prop === "class") {
+        attrs += ` class=${line[prop]} `;
+        continue;
+      }
+      if (prop === "type") {
+        attrs += `${this.pfx}="${line[prop]}" `;
+      } else if (prop !== "value") {
+        attrs += `${this.pfx}-${prop}="${line[prop]}" `;
+      }
+    }
+    return attrs;
+  }
+}
+
+/**
+ * HTML API: If current script has container(s) specified, initialise Termynal.
+ */
+if (document.currentScript.hasAttribute("data-termynal-container")) {
+  const containers = document.currentScript.getAttribute(
+    "data-termynal-container",
+  );
+  containers.split("|").forEach((container) => new Termynal(container));
+}
diff --git a/docs/settings.md b/docs/settings.md
deleted file mode 100644
index a9c26a9..0000000
--- a/docs/settings.md
+++ /dev/null
@@ -1,74 +0,0 @@
-# Settings
-
-## Settings Class Overview
-
-The configuration settings for Funcchain are encapsulated within the `FuncchainSettings` class. This class inherits from Pydantic's `BaseSettings`.
-
-`funcchain/config.py`
-
-```python
-class FuncchainSettings(BaseSettings):
-    ...
-```
-
-## Setting Descriptions
-
-### General Settings
-
-- `llm: BaseChatModel | RunnableWithFallbacks | str = "openai/gpt-3.5-turbo"`
-  Defines the language learning model to be used. It can be a type of `BaseChatModel`, `RunnableWithFallbacks`, or `str` (model_name).
-
-- `verbose: bool = True`
-  Enables or disables verbose logging.
-
-### Prompt Settings
-
-- `max_tokens: int = 4096`
-  Specifies the maximum number of tokens for chat models.
-
-- `default_system_prompt: str = "You are a professional assistant solving tasks."`
-  Default prompt used for initializing the system.
-
-### API Keys
-
-- `openai_api_key: Optional[str] = None`
-  API key for the OpenAI service.
-
-- `azure_api_key: Optional[str] = None`
-  API key for the Azure service.
-
-- `anthropic_api_key: Optional[str] = None`
-  API key for the Anthropic service.
-
-- `google_api_key: Optional[str] = None`
-  API key for the Google service.
-
-- `JINACHAT_API_KEY: Optional[str] = None`
-  API key for the JinaChat service.
-
-### Azure Settings
-
-- `AZURE_API_BASE: Optional[str] = None`
-  Base URL for the Azure API.
-
-- `AZURE_DEPLOYMENT_NAME: str = "gpt-4"`
-  Deployment name for the Azure service.
-
-- `AZURE_DEPLOYMENT_NAME_LONG: Optional[str] = None`
-  Extended deployment name for the Azure service, if applicable.
-
-- `AZURE_API_VERSION: str = "2023-07-01-preview"`
-  API version for the Azure service.
-
-### Model Keyword Arguments
-
-- `temperature: float = 0.1`
-  Controls the randomness in the model's output.
-
-- `verbose: bool = False`
-  Enables or disables verbose logging for the model.
-
-### Additional Methods
-
-- `model_kwargs(self) -> dict[str, Any]`
-  Method that returns a dictionary of keyword arguments for the model initialization based on the settings.
diff --git a/examples/async/expert_answer.py b/examples/async/expert_answer.py
index 09a9d93..faa7852 100644
--- a/examples/async/expert_answer.py
+++ b/examples/async/expert_answer.py
@@ -2,12 +2,10 @@
 from asyncio import run as _await
 from random import shuffle
 
-from pydantic import BaseModel
-
 from funcchain import achain, settings
+from pydantic import BaseModel
 
 settings.temperature = 1
-settings.llm = "openai/gpt-3.5-turbo-1106"
 
 
 async def generate_answer(question: str) -> str:
@@ -33,14 +31,10 @@ async def expert_answer(question: str) -> str:
     # Shuffle the answers to ensure randomness
     enum_answers = list(enumerate(answers))
     shuffle(enum_answers)
-    ranked_answers = await gather(
-        *(rank_answers(question, enum_answers) for _ in range(3))
-    )
+    ranked_answers = await gather(*(rank_answers(question, enum_answers) for _ in range(3)))
     highest_ranked_answer = max(
         ranked_answers,
-        key=lambda x: sum(
-            1 for ans in ranked_answers if ans.selected_answer == x.selected_answer
-        ),
+        key=lambda x: sum(1 for ans in ranked_answers if ans.selected_answer == x.selected_answer),
     ).selected_answer
     return answers[highest_ranked_answer]
 
@@ -51,3 +45,5 @@ async def expert_answer(question: str) -> str:
     answer = _await(expert_answer(question))
 
     print(answer)
+
+    assert isinstance(answer, str)
diff --git a/examples/async/startup_names.py b/examples/async/startup_names.py
index a0be125..6f5f2c3 100644
--- a/examples/async/startup_names.py
+++ b/examples/async/startup_names.py
@@ -1,8 +1,7 @@
 import asyncio
 
-from pydantic import BaseModel
-
 from funcchain import achain, settings
+from pydantic import BaseModel
 
 settings.temperature = 1
 
@@ -31,4 +30,6 @@ async def generate_random_startups(topic: str, amount: int = 3) -> list[StartupC
 
     for startup in startups:
         print("name:", startup.name)
+        assert isinstance(startup.name, str)
         print("concept:", startup.description)
+        assert isinstance(startup.description, str)
diff --git a/examples/chatgpt.py b/examples/chatgpt.py
index c82953d..aa09320 100644
--- a/examples/chatgpt.py
+++ b/examples/chatgpt.py
@@ -1,12 +1,11 @@
 """
 Simple chatgpt rebuild with memory/history.
 """
-from langchain.memory import ChatMessageHistory
-
 from funcchain import chain, settings
-from funcchain.streaming import stream_to
+from funcchain.utils.memory import ChatMessageHistory
 
 settings.llm = "openai/gpt-4"
+settings.console_stream = True
 
 history = ChatMessageHistory()
 
@@ -32,8 +31,7 @@ def chat_loop() -> None:
             print("\033c")
             continue
 
-        with stream_to(print):
-            ask(query)
+        ask(query)
 
 
 if __name__ == "__main__":
diff --git a/examples/custom_model_display.py b/examples/custom_model_display.py
new file mode 100644
index 0000000..1bfe69a
--- /dev/null
+++ b/examples/custom_model_display.py
@@ -0,0 +1,27 @@
+from funcchain import chain
+from pydantic import BaseModel
+
+
+class Task(BaseModel):
+    name: str
+    description: str
+    difficulty: int
+
+    def __str__(self) -> str:
+        return f"{self.name}\n - {self.description}\n - Difficulty: {self.difficulty}"
+
+
+def plan_task(task: Task) -> str:
+    """
+    Based on the task infos, plan the task step by step.
+    """
+    return chain()
+
+
+if __name__ == "__main__":
+    task = Task(
+        name="Do Laundry",
+        description="Collect and wash all the dirty clothes.",
+        difficulty=4,
+    )
+    print(plan_task(task))
diff --git a/examples/decorator.py b/examples/decorator.py
new file mode 100644
index 0000000..7a2cef9
--- /dev/null
+++ b/examples/decorator.py
@@ -0,0 +1,31 @@
+from funcchain.syntax import chain, runnable
+from langchain_community.vectorstores.faiss import FAISS
+from langchain_core.runnables import Runnable, RunnablePassthrough
+from langchain_openai.embeddings import OpenAIEmbeddings
+
+
+@runnable
+def generate_poem(topic: str, context: str) -> str:
+    """
+    Generate a short poem about the topic with the given context.
+    """
+    return chain()
+
+
+vectorstore = FAISS.from_texts(
+    [
+        "japanese tea is full of heart warming flavors",
+        "in the morning you should take a walk",
+        "cold showers are good for your health",
+    ],
+    embedding=OpenAIEmbeddings(),
+)
+retriever = vectorstore.as_retriever(search_kwargs={"k": 1})
+
+retrieval_chain: Runnable = {
+    "context": retriever,
+    "topic": RunnablePassthrough(),
+} | generate_poem
+
+
+print(retrieval_chain.invoke("love"))
diff --git a/examples/dynamic_router.py b/examples/dynamic_router.py
new file mode 100644
index 0000000..cbff138
--- /dev/null
+++ b/examples/dynamic_router.py
@@ -0,0 +1,84 @@
+from enum import Enum
+from typing import Any, Callable, TypedDict
+
+from funcchain.syntax.executable import compile_runnable
+from pydantic import BaseModel, Field
+
+# Dynamic Router Definition:
+
+
+class Route(TypedDict):
+    handler: Callable
+    description: str
+
+
+class DynamicChatRouter(BaseModel):
+    routes: dict[str, Route]
+
+    def _routes_repr(self) -> str:
+        return "\n".join([f"{route_name}: {route['description']}" for route_name, route in self.routes.items()])
+
+    def invoke_route(self, user_query: str, /, **kwargs: Any) -> Any:
+        RouteChoices = Enum(  # type: ignore
+            "RouteChoices",
+            {r: r for r in self.routes.keys()},
+            type=str,
+        )
+
+        class RouterModel(BaseModel):
+            selector: RouteChoices = Field(
+                default="default",
+                description="Enum of the available routes.",
+            )
+
+        route_query = compile_runnable(
+            instruction="Given the user query select the best query handler for it.",
+            input_args=["user_query", "query_handlers"],
+            output_types=[RouterModel],
+        )
+
+        selected_route = route_query.invoke(
+            input={
+                "user_query": user_query,
+                "query_handlers": self._routes_repr(),
+            }
+        ).selector
+        assert isinstance(selected_route, str)
+
+        return self.routes[selected_route]["handler"](user_query, **kwargs)
+
+
+# Example Usage:
+
+
+def handle_pdf_requests(user_query: str) -> str:
+    return "Handling PDF requests with user query: " + user_query
+
+
+def handle_csv_requests(user_query: str) -> str:
+    return "Handling CSV requests with user query: " + user_query
+
+
+def handle_default_requests(user_query: str) -> str:
+    return "Handling DEFAULT requests with user query: " + user_query
+
+
+router = DynamicChatRouter(
+    routes={
+        "pdf": {
+            "handler": handle_pdf_requests,
+            "description": "Call this for requests including PDF Files.",
+        },
+        "csv": {
+            "handler": handle_csv_requests,
+            "description": "Call this for requests including CSV Files.",
+        },
+        "default": {
+            "handler": handle_default_requests,
+            "description": "Call this for all other requests.",
+        },
+    },
+)
+
+
+router.invoke_route("Can you summarize this csv?")
diff --git a/examples/enums.py b/examples/enums.py
index 3efb97e..46dcb48 100644
--- a/examples/enums.py
+++ b/examples/enums.py
@@ -1,7 +1,8 @@
-from funcchain import chain, settings
-from pydantic import BaseModel
 from enum import Enum
 
+from funcchain import chain
+from pydantic import BaseModel
+
 
 class Answer(str, Enum):
     yes = "yes"
@@ -20,6 +21,4 @@ def make_decision(question: str) -> Decision:
 
 
 if __name__ == "__main__":
-    settings.llm = "gguf/phi-2"
-
     print(make_decision("Do you like apples?"))
diff --git a/examples/error_output.py b/examples/error_output.py
index 45d8ebe..92d77e7 100644
--- a/examples/error_output.py
+++ b/examples/error_output.py
@@ -1,6 +1,5 @@
-from rich import print
-
 from funcchain import BaseModel, Error, chain
+from rich import print
 
 
 class User(BaseModel):
@@ -11,7 +10,7 @@ class User(BaseModel):
 def extract_user_info(text: str) -> User | Error:
     """
     Extract the user information from the given text.
-    If you do not have enough infos, raise.
+    In case you do not have enough infos, raise.
     """
     return chain()
 
@@ -19,6 +18,4 @@ def extract_user_info(text: str) -> User | Error:
 if __name__ == "__main__":
     print(extract_user_info("hey"))  # returns Error
 
-    print(
-        extract_user_info("I'm John and my mail is john@gmail.com")
-    )  # returns a User object
+    print(extract_user_info("I'm John and my mail is john@gmail.com"))  # returns a User object
diff --git a/examples/experiments/dynamic_model_generation.py b/examples/experiments/dynamic_model_generation.py
deleted file mode 100644
index 4b0ce4b..0000000
--- a/examples/experiments/dynamic_model_generation.py
+++ /dev/null
@@ -1,58 +0,0 @@
-from langchain.document_loaders import WebBaseLoader
-from pydantic import BaseModel
-from rich import print
-
-from funcchain import chain, settings
-from funcchain.parser import CodeBlock
-
-settings.llm = "gpt-4-1106-preview"
-settings.context_lenght = 4096 * 8
-
-
-def create_model(web_page: str) -> CodeBlock:
-    """
-    Based on the pure web page, create a Pydantic to extract the core contents of the page.
-    Create now a Pydantic model to represent this structure.
-    Only include imports and the model class.
-    Always name the class "StructuredOutput". The user can change it later.
-    """
-    return chain()
-
-
-def fix_imports(error: str) -> CodeBlock:
-    """
-    Write proper import statements for the given error.
-    """
-    return chain()
-
-
-if __name__ == "__main__":
-    url = input("Give me a link and I scrape your page!\n> Url: ")
-
-    page = WebBaseLoader(url).load()
-
-    model = create_model(page.__str__())
-
-    print("Model:\n", model.code)
-
-    try:
-        exec(model.code)
-    except Exception as e:
-        imports = fix_imports(str(e))
-        exec(imports.code)
-        exec(model.code)
-
-    class StructuredOutput(BaseModel):
-        ...
-
-    def scrape_page(
-        page: str,
-    ) -> StructuredOutput:
-        """
-        Scrape the unstructured data into the given shape.
-        """
-        return chain()
-
-    output = scrape_page(str(page))
-
-    print(output)
diff --git a/examples/experiments/email_answering.py b/examples/experiments/email_answering.py
index d7b1071..a1d2416 100644
--- a/examples/experiments/email_answering.py
+++ b/examples/experiments/email_answering.py
@@ -20,9 +20,7 @@ def get_emails_from_inbox() -> List[Tuple[str, str]]:
     """
 
     # Run AppleScript and collect output
-    process = subprocess.Popen(
-        ["osascript", "-e", apple_script], stdout=subprocess.PIPE
-    )
+    process = subprocess.Popen(["osascript", "-e", apple_script], stdout=subprocess.PIPE)
     out, _ = process.communicate()
     raw_output = out.decode("utf-8").strip()
 
diff --git a/examples/experiments/generate_pp_and_tos.py b/examples/experiments/generate_pp_and_tos.py
index 32dc84d..b739521 100644
--- a/examples/experiments/generate_pp_and_tos.py
+++ b/examples/experiments/generate_pp_and_tos.py
@@ -56,18 +56,14 @@ def generate_pp(answered_questions: list[str]) -> str:
 
 
 if __name__ == "__main__":
-    print(
-        "Please answer the following questions to generate a Terms of Service and Privacy Policy."
-    )
+    print("Please answer the following questions to generate a Terms of Service and Privacy Policy.")
     print("To skip a question, press enter without typing anything.")
 
     legal_questions = example_legal_questions.copy()
     # or from scratch using generate_legal_questions()
 
     for i, question in enumerate(legal_questions):
-        answer = (
-            input(f"{i+1}/{len(legal_questions)}: {question} ") or "No answer provided."
-        )
+        answer = input(f"{i+1}/{len(legal_questions)}: {question} ") or "No answer provided."
 
         legal_questions[i] = f"Q: {question}\nA: {answer}\n"
 
diff --git a/examples/experiments/local_codeblock.py b/examples/experiments/local_codeblock.py
new file mode 100644
index 0000000..03eadc7
--- /dev/null
+++ b/examples/experiments/local_codeblock.py
@@ -0,0 +1,16 @@
+from funcchain import chain, settings
+from funcchain.syntax.output_types import CodeBlock
+
+
+def generate_code(instruction: str) -> CodeBlock:
+    return chain(instruction=instruction)
+
+
+if __name__ == "__main__":
+    settings.llm = "ollama/openhermes-2.5-mistral-7b"
+    settings.console_stream = True
+
+    block = generate_code("Write a script that generates a sin wave.")
+
+    print("\033c")
+    print(block.code)
diff --git a/examples/experiments/console_log_testing.py b/examples/experiments/parallel_console_streaming.py
similarity index 86%
rename from examples/experiments/console_log_testing.py
rename to examples/experiments/parallel_console_streaming.py
index 10dc354..693ee65 100644
--- a/examples/experiments/console_log_testing.py
+++ b/examples/experiments/parallel_console_streaming.py
@@ -3,15 +3,14 @@
 from typing import AsyncGenerator
 from uuid import uuid4
 
+from funcchain import achain, settings
+from funcchain.backend.streaming import astream_to
+from funcchain.utils.token_counter import count_tokens
 from rich.console import Console
 from rich.layout import Layout
 from rich.live import Live
 from rich.panel import Panel
 
-from funcchain import achain, settings
-from funcchain.streaming import astream_to
-from funcchain.utils import count_tokens
-
 
 class RenderChain:
     def __init__(self, renderer: "Renderer", name: str) -> None:
@@ -39,9 +38,7 @@ def add_chain(self, chain: RenderChain) -> None:
         if not self.live.is_started:
             self.live.start()
         self.console.height = (len(self.layout.children) + 1) * self.column_height
-        self.layout.split_column(
-            *self.layout.children, Layout(name=chain.id, size=self.column_height)
-        )
+        self.layout.split_column(*self.layout.children, Layout(name=chain.id, size=self.column_height))
         self.chains.append(chain)
 
     def render_stream(self, token: str, chain: RenderChain) -> None:
@@ -49,9 +46,7 @@ def render_stream(self, token: str, chain: RenderChain) -> None:
         tokens: int = 0
         max_width: int = self.console.width
         content_width: int = 0
-        if isinstance(panel := self.layout[chain.id]._renderable, Panel) and isinstance(
-            panel.renderable, str
-        ):
+        if isinstance(panel := self.layout[chain.id]._renderable, Panel) and isinstance(panel.renderable, str):
             content_width = self.console.measure(panel.renderable).maximum
             if isinstance(panel.title, str) and " " in panel.title:
                 tokens = int(panel.title.split(" ")[1])
@@ -63,16 +58,12 @@ def render_stream(self, token: str, chain: RenderChain) -> None:
                 prev += token
         else:
             prev += token
-        self.layout[chain.id].update(
-            Panel(prev, title=f"({chain.name}) {tokens} tokens")
-        )
+        self.layout[chain.id].update(Panel(prev, title=f"({chain.name}) {tokens} tokens"))
         self.live.update(self.layout)
 
     def remove(self, chain: RenderChain) -> None:
         self.chains.remove(chain)
-        self.layout.split_column(
-            *(child for child in self.layout.children if child.name != chain.id)
-        )
+        self.layout.split_column(*(child for child in self.layout.children if child.name != chain.id))
         self.console.height = (len(self.layout.children)) * self.column_height
         self.live.update(self.layout)
         if not self.chains:
diff --git a/examples/jinja.py b/examples/jinja.py
new file mode 100644
index 0000000..a82fbc7
--- /dev/null
+++ b/examples/jinja.py
@@ -0,0 +1,30 @@
+from funcchain import chain, settings
+from pydantic import BaseModel
+
+settings.console_stream = True
+
+
+class Cart(BaseModel):
+    items: list[str]
+    price: float
+
+
+def shopping_analysis(cart: Cart, f_instructions: bool) -> str:
+    """
+    Shopping List:
+    {% for item in cart.items %} - {{ item }}
+    {% endfor %}
+
+    Determine if the cart is healthy or not and if the price is good.
+    {% if f_instructions %} format the output as json! {% endif %}
+    """
+    return chain()
+
+
+example_cart = Cart(
+    items=["apple", "banana", "orange", "mango", "pineapple"],
+    price=2.99,
+)
+
+print(shopping_analysis(example_cart, True))
+print(shopping_analysis(example_cart, False))
diff --git a/examples/literals.py b/examples/literals.py
new file mode 100644
index 0000000..5948e5e
--- /dev/null
+++ b/examples/literals.py
@@ -0,0 +1,22 @@
+from typing import Literal
+
+from funcchain import chain
+from pydantic import BaseModel
+
+
+# just a silly example to schowcase the Literal type
+class Ranking(BaseModel):
+    score: Literal[11, 22, 33, 44, 55]
+    error: Literal["no_input", "all_good", "invalid"]
+
+
+def rank_output(output: str) -> Ranking:
+    """
+    Analyze and rank the output.
+    """
+    return chain()
+
+
+rank = rank_output("The quick brown fox jumps over the lazy dog.")
+
+print(rank)
diff --git a/examples/llamacpp.py b/examples/llamacpp.py
index 77d382e..4b7e796 100644
--- a/examples/llamacpp.py
+++ b/examples/llamacpp.py
@@ -1,7 +1,6 @@
-from pydantic import BaseModel, Field
-
 from funcchain import chain, settings
-from funcchain.streaming import stream_to
+from pydantic import BaseModel, Field
+from rich import print
 
 
 # define your model
@@ -20,14 +19,12 @@ def analyze(text: str) -> SentimentAnalysis:
 
 if __name__ == "__main__":
     # set global llm
-    settings.llm = "gguf/openhermes-2.5-mistral-7b"
-
+    settings.llm = "llamacpp/Nous-Hermes-2-SOLAR-10.7B"
     # log tokens as stream to console
-    with stream_to(print):
-        # run prompt
-        poem = analyze("I really like when my dog does a trick!")
+    settings.console_stream = True
 
-    # print final parsed output
-    from rich import print
+    # run prompt
+    poem = analyze("I really like when my dog does a trick!")
 
+    # show final parsed output
     print(poem)
diff --git a/examples/ollama.py b/examples/ollama.py
new file mode 100644
index 0000000..76328c8
--- /dev/null
+++ b/examples/ollama.py
@@ -0,0 +1,30 @@
+from funcchain import chain, settings
+from pydantic import BaseModel, Field
+from rich import print
+
+
+# define your model
+class SentimentAnalysis(BaseModel):
+    analysis: str = Field(description="A description of the analysis")
+    sentiment: bool = Field(description="True for Happy, False for Sad")
+
+
+# define your prompt
+def analyze(text: str) -> SentimentAnalysis:
+    """
+    Determines the sentiment of the text.
+    """
+    return chain()
+
+
+if __name__ == "__main__":
+    # set global llm
+    settings.llm = "ollama/openchat"
+    # log tokens as stream to console
+    settings.console_stream = True
+
+    # run prompt
+    poem = analyze("I really like when my dog does a trick!")
+
+    # show final parsed output
+    print(poem)
diff --git a/examples/openai_json_mode.py b/examples/openai_json_mode.py
new file mode 100644
index 0000000..135887e
--- /dev/null
+++ b/examples/openai_json_mode.py
@@ -0,0 +1,21 @@
+from funcchain import chain, settings
+from pydantic import BaseModel
+
+settings.console_stream = True
+
+
+class FruitSalad(BaseModel):
+    bananas: int = 0
+    apples: int = 0
+
+
+def sum_fruits(fruit_salad: FruitSalad) -> int:
+    """
+    Sum the number of fruits in a fruit salad.
+    """
+    return chain()
+
+
+if __name__ == "__main__":
+    fruit_salad = FruitSalad(bananas=3, apples=5)
+    assert sum_fruits(fruit_salad) == 8
diff --git a/examples/primitive_types.py b/examples/primitive_types.py
new file mode 100644
index 0000000..1296f9e
--- /dev/null
+++ b/examples/primitive_types.py
@@ -0,0 +1,17 @@
+from typing import Literal
+
+from funcchain import chain, settings
+
+settings.console_stream = True
+
+
+def evaluate(sentence: str) -> tuple[Literal["good", "bad"], float, str]:
+    """
+    Evaluate the given sentence based on grammatical correctness and give it a score.
+    """
+    return chain()
+
+
+result = evaluate("Hello, I am new to english language. Let's see how well I can write.")
+
+print(type(result))
diff --git a/examples/pydantic_validation.py b/examples/pydantic_validation.py
index 7ddda1e..cfc104e 100644
--- a/examples/pydantic_validation.py
+++ b/examples/pydantic_validation.py
@@ -1,9 +1,8 @@
-from pydantic import BaseModel, field_validator
-
 from funcchain import chain, settings
-from funcchain.streaming import stream_to
+from pydantic import BaseModel, field_validator
 
-settings.llm = "gguf/dolphin-2.5-mixtral-8x7b:Q3_K_M"
+# settings.llm = "ollama/openchat"
+settings.console_stream = True
 
 
 class Task(BaseModel):
@@ -33,6 +32,5 @@ def gather_infos(user_description: str) -> Task:
 
 
 if __name__ == "__main__":
-    with stream_to(print):
-        task = gather_infos("cleanup the kitchen")
+    task = gather_infos("cleanup the kitchen")
     print(f"{task=}")
diff --git a/examples/router_component.py b/examples/router_component.py
deleted file mode 100644
index 1e3b618..0000000
--- a/examples/router_component.py
+++ /dev/null
@@ -1,31 +0,0 @@
-from funcchain.components import ChatRouter
-
-
-def handle_pdf_requests(user_query: str) -> None:
-    print("Handling PDF requests with user query: ", user_query)
-
-
-def handle_csv_requests(user_query: str) -> None:
-    print("Handling CSV requests with user query: ", user_query)
-
-
-def handle_default_requests(user_query: str) -> None:
-    print("Handling DEFAULT requests with user query: ", user_query)
-
-
-router = ChatRouter(
-    routes={
-        "pdf": {
-            "handler": handle_pdf_requests,
-            "description": "Call this for requests including PDF Files.",
-        },
-        "csv": {
-            "handler": handle_csv_requests,
-            "description": "Call this for requests including CSV Files.",
-        },
-        "default": handle_default_requests,
-    },
-)
-
-
-router.invoke_route("Can you summarize this csv?")
diff --git a/examples/simple/gather_infos.py b/examples/simple/gather_infos.py
index 461d6f4..d8234f1 100644
--- a/examples/simple/gather_infos.py
+++ b/examples/simple/gather_infos.py
@@ -1,6 +1,8 @@
+from funcchain import chain, settings
 from pydantic import BaseModel
 
-from funcchain import chain
+# settings.llm = "ollama/openchat"
+settings.console_stream = True
 
 
 class Task(BaseModel):
@@ -25,7 +27,7 @@ def plan_task(task: Task) -> str:
 
 
 def main() -> None:
-    task_input = input("\nEnter task input: ")
+    task_input = "I need to buy apples, oranges and bananas from whole foods"
 
     task = gather_infos(task_input)
 
diff --git a/examples/simple/task_comparison.py b/examples/simple/task_comparison.py
index 972e463..b3611dd 100644
--- a/examples/simple/task_comparison.py
+++ b/examples/simple/task_comparison.py
@@ -1,8 +1,7 @@
 import asyncio
 
-from pydantic import BaseModel
-
 from funcchain import achain, chain
+from pydantic import BaseModel
 
 
 class Task(BaseModel):
diff --git a/examples/simple/tutorial.py b/examples/simple/tutorial.py
index bb39f68..6eb35f7 100644
--- a/examples/simple/tutorial.py
+++ b/examples/simple/tutorial.py
@@ -1,7 +1,6 @@
-from pydantic import BaseModel, Field, validator
-
 # %%
 from funcchain import chain
+from pydantic import BaseModel, Field, validator
 
 
 # %%
diff --git a/examples/router_chain.py b/examples/static_router.py
similarity index 94%
rename from examples/router_chain.py
rename to examples/static_router.py
index d01a1a3..5af3576 100644
--- a/examples/router_chain.py
+++ b/examples/static_router.py
@@ -1,11 +1,11 @@
 from enum import Enum
 from typing import Any
 
-from pydantic import BaseModel, Field
-
 from funcchain import chain, settings
+from pydantic import BaseModel, Field
 
-settings.llm = "gguf/openhermes-2.5-mistral-7b"
+settings.console_stream = True
+# settings.llm = "ollama/openhermes2.5-mistral"
 
 
 def handle_pdf_requests(
diff --git a/examples/stream.py b/examples/stream.py
index 9c8be06..c2dac4f 100644
--- a/examples/stream.py
+++ b/examples/stream.py
@@ -1,5 +1,5 @@
 from funcchain import chain, settings
-from funcchain.streaming import stream_to
+from funcchain.backend.streaming import stream_to
 
 settings.temperature = 1
 
diff --git a/examples/stream_runnables.py b/examples/stream_runnables.py
new file mode 100644
index 0000000..b7e0050
--- /dev/null
+++ b/examples/stream_runnables.py
@@ -0,0 +1,95 @@
+from typing import AsyncIterator, Iterator
+
+from funcchain import chain
+from funcchain.syntax import runnable
+from funcchain.syntax.components import RouterChat
+from funcchain.syntax.components.handler import BasicChatHandler
+from funcchain.utils.msg_tools import msg_to_str
+from langchain_core.messages import AIMessage, HumanMessage
+from langchain_core.runnables import RunnableGenerator, RunnableSerializable
+
+# settings.llm = "ollama/openchat"
+
+
+@runnable
+def animal_poem(animal: str) -> str:
+    """
+    Write a long poem about the animal.
+    """
+    return chain()
+
+
+def split_into_list(
+    input: Iterator[str],
+) -> Iterator[list[str]]:
+    buffer = ""
+    for chunk in input:
+        buffer += chunk
+        while "\n" in buffer:
+            comma_index = buffer.index("\n")
+            yield [buffer[:comma_index].strip()]
+            buffer = buffer[comma_index + 1 :]
+    yield [buffer.strip()]
+
+
+async def asplit_into_list(
+    input: AsyncIterator[str],
+) -> AsyncIterator[list[str]]:
+    buffer = ""
+    async for chunk in input:
+        buffer += chunk
+        while "\n" in buffer:
+            comma_index = buffer.index("\n")
+            yield [buffer[:comma_index].strip()]
+            buffer = buffer[comma_index + 1 :]
+    yield [buffer.strip()]
+
+
+animal_list_chain = animal_poem | RunnableGenerator(transform=split_into_list, atransform=asplit_into_list)
+
+
+def convert_to_ai_message(input: Iterator[list[str]]) -> Iterator[AIMessage]:
+    for chunk in input:
+        yield AIMessage(content=chunk[0])
+
+
+async def aconvert_to_ai_message(input: AsyncIterator[list[str]]) -> AsyncIterator[AIMessage]:
+    async for chunk in input:
+        yield AIMessage(content=chunk[0])
+
+
+animal_chat: RunnableSerializable[HumanMessage, AIMessage] = (
+    {
+        "animal": lambda x: msg_to_str(x),  # type: ignore
+    }
+    | animal_list_chain
+    | RunnableGenerator(transform=convert_to_ai_message, atransform=aconvert_to_ai_message)  # type: ignore
+)
+
+
+chat = RouterChat(
+    {
+        "animal": {
+            "handler": animal_chat,
+            "description": "If the user gives an animal, call this handler.",
+        },
+        "default": {
+            "handler": BasicChatHandler(
+                system_message="You are a powerful AI assistant. "
+                "Always mention that the user should start funcchain on github."
+            ),
+            "description": "Any other request.",
+        },
+    }
+)
+
+
+def main() -> None:
+    for chunk in chat.stream(HumanMessage(content="Hey whatsup?"), config={"configurable": {"session_id": ""}}):
+        if isinstance(chunk, AIMessage):
+            print(chunk.content, flush=True)
+        if isinstance(chunk, str):
+            print(chunk, flush=True, end="")
+
+
+main()
diff --git a/examples/todo/smart_question.py b/examples/todo/smart_question.py
deleted file mode 100644
index 1070f07..0000000
--- a/examples/todo/smart_question.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from funcchain import Matrix, achain  # type: ignore
-
-# Matrix is a type annotation that tells the backend
-# to run n versions of this prompt in parallel and
-# summarizes the results.
-# This corrects for any errors in the model and improves
-# the quality of the answer.
-
-
-# NOT YET WORKING (TODO)
-async def generate_answer(question: Matrix[str], context: list[str] = []) -> str:
-    """
-    Generate an answer to the question based on the context.
-    If no context is provided just use the question.
-    """
-    return await achain()
diff --git a/examples/union_types.py b/examples/union_types.py
index 5c16b5b..8dc6afc 100644
--- a/examples/union_types.py
+++ b/examples/union_types.py
@@ -1,6 +1,5 @@
-from pydantic import BaseModel, Field
-
 from funcchain import chain
+from pydantic import BaseModel, Field
 
 
 class Item(BaseModel):
diff --git a/examples/vision.py b/examples/vision.py
index 972c8c5..cf2881d 100644
--- a/examples/vision.py
+++ b/examples/vision.py
@@ -1,9 +1,9 @@
-from PIL import Image
+from funcchain import Image, chain, settings
 from pydantic import BaseModel, Field
 
-from funcchain import chain, settings
-
 settings.llm = "openai/gpt-4-vision-preview"
+# settings.llm = "ollama/bakllava"
+settings.console_stream = True
 
 
 class AnalysisResult(BaseModel):
@@ -14,7 +14,7 @@ class AnalysisResult(BaseModel):
     objects: list[str] = Field(description="A list of objects found in the image")
 
 
-def analyse_image(image: Image.Image) -> AnalysisResult:
+def analyse_image(image: Image) -> AnalysisResult:
     """
     Analyse the image and extract its
     theme, description and objects.
@@ -23,11 +23,9 @@ def analyse_image(image: Image.Image) -> AnalysisResult:
 
 
 if __name__ == "__main__":
-    example_image = Image.open("examples/assets/old_chinese_temple.jpg")
-    from funcchain.streaming import stream_to
+    example_image = Image.from_file("examples/assets/old_chinese_temple.jpg")
 
-    with stream_to(print):
-        result = analyse_image(example_image)
+    result = analyse_image(example_image)
 
     print("Theme:", result.theme)
     print("Description:", result.description)
diff --git a/mkdocs.yml b/mkdocs.yml
index 0e882bd..93c5b68 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -5,21 +5,118 @@ repo_name: shroominic/funcchain
 repo_url: https://github.com/shroominic/funcchain/
 
 nav:
-  - 'Getting Started':
-    - 'Welcome': 'index.md'
-    - 'Installation': 'getting-started/installation.md'
-    - 'Usage': 'getting-started/usage.md'
-  - 'Concepts':
-    - 'Overview': 'overview.md'
-    - 'Chain': 'chain.md'
-    - 'Input Args': 'input.md'
-    - 'Prompt Template': 'prompt.md'
-    - 'Output Parser': 'parser.md'
-    - 'Pydantic Models': 'models.md'
-  - 'Settings': 'settings.md'
-  - 'Examples': 'examples.md'
+  - "Funcchain": "index.md"
+  - "Getting Started":
+      - "Installation": "getting-started/installation.md"
+      - "Usage": "getting-started/usage.md"
+      - "Demos": "getting-started/demos.md"
+      - "Configuration": "getting-started/config.md"
+      - "Models": "getting-started/models.md"
+  - "Concepts":
+      - "Overview": "concepts/overview.md"
+      - "Chain": "concepts/chain.md"
+      - "Input Args": "concepts/input.md"
+      - "Prompting": "concepts/prompting.md"
+      - "Output Parsing": "concepts/parser.md"
+      - "Errors": "concepts/errors.md"
+      - "Langchain": "concepts/langchain.md"
+      - "Pydantic": "concepts/pydantic.md"
+      - "Local Models": "concepts/local-models.md"
+      - "Streaming": "concepts/streaming.md"
+      - "Unions": "concepts/unions.md"
+      - "Vision": "concepts/vision.md"
+  - "Examples":
+      # - "ChatGPT": "features/chat.md"
+      - "Literals": "features/literals.md"
+      - "Retry Parsing": "features/retry_parsing.md"
+      - "Structured vision output": "features/vision.md"
+      - "Enums": "features/enums.md"
+      - "Dynamic Router": "features/dynamic_router.md"
+      - "Streaming Output": "features/stream.md"
+      - "LlamaCpp": "features/llamacpp.md"
+      - "OpenAI JSON Output": "features/openai_json_mode.md"
+      - "Static Router": "features/static_router.md"
+      - "Ollama": "features/ollama.md"
+      - "Error Output": "features/error_output.md"
+  - "Advanced":
+      - "Async": "advanced/async.md"
+      - "Signature": "advanced/signature.md"
+      - "Runnables": "advanced/runnables.md"
+      - "Codebase Scaling": "advanced/codebase-scaling.md"
+      - "Customization": "advanced/customization.md"
+      - "Stream Parsing": "advanced/stream-parsing.md"
+      - "Custom Parsers": "advanced/custom-parser-types.md"
+  - "Contributing":
+      - "Contributing": "contributing/dev-setup.md"
+      - "Codebase Structure": "contributing/codebase-structure.md"
+      # - "Code of Conduct": "contributing/code-of-conduct.md"
+      - "Contributors": "contributing/contributors.md"
+      - "Security": "contributing/security.md"
+      - "Roadmap": "contributing/roadmap.md"
+      - "License": "contributing/license.md"
+  - "Changelog": "changelog.md"
+#   - "API Reference": "api.md"
 
 theme:
   name: material
   palette:
     scheme: slate
+
+# Extensions
+markdown_extensions:
+  - abbr
+  - admonition
+  - pymdownx.details
+  - attr_list
+  - def_list
+  - footnotes
+  - md_in_html
+  - toc:
+      permalink: true
+  - pymdownx.arithmatex:
+      generic: true
+  - pymdownx.betterem:
+      smart_enable: all
+  - pymdownx.caret
+  - pymdownx.details
+  - pymdownx.emoji:
+      emoji_generator: !!python/name:material.extensions.emoji.to_svg
+      emoji_index: !!python/name:material.extensions.emoji.twemoji
+  - pymdownx.highlight:
+      anchor_linenums: true
+      line_spans: __span
+      pygments_lang_class: true
+  - pymdownx.inlinehilite
+  - pymdownx.keys
+  - pymdownx.magiclink:
+      normalize_issue_symbols: true
+      repo_url_shorthand: true
+      user: shroominic
+      repo: funcchain
+  - pymdownx.mark
+  - pymdownx.smartsymbols
+  - pymdownx.snippets:
+      auto_append:
+        - includes/mkdocs.md
+  - pymdownx.superfences:
+      custom_fences:
+        - name: mermaid
+          class: mermaid
+          format: !!python/name:pymdownx.superfences.fence_code_format
+  - pymdownx.tabbed:
+      alternate_style: true
+      combine_header_slug: true
+      slugify: !!python/object/apply:pymdownx.slugs.slugify
+        kwds:
+          case: lower
+  - pymdownx.tasklist:
+      custom_checkbox: true
+  - pymdownx.tilde
+
+extra_css:
+  - css/termynal.css
+  - css/custom.css
+
+extra_javascript:
+  - js/termynal.js
+  - js/custom.js
diff --git a/pyproject.toml b/pyproject.toml
index 6d7b149..7582a0c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,22 +1,28 @@
 [project]
 name = "funcchain"
-version = "0.2.0-alpha.1"
+version = "0.2.0"
 description = "🔖 write prompts as python functions"
-authors = [
-    { name = "Shroominic", email = "contact@shroominic.com" }
-]
+authors = [{ name = "Shroominic", email = "contact@shroominic.com" }]
 dependencies = [
-    "langchain>=0.0.347",
-    "pydantic-settings>=2.1.0",
+    "langchain_openai>=0.0.3",
+    "pydantic-settings>=2",
     "docstring-parser>=0.15",
-    "rich>=13.7.0",
-    "jinja2>=3.1.2",
-    "pillow>=10.1.0",
+    "rich>=13",
+    "jinja2>=3",
 ]
 license = "MIT"
 readme = "README.md"
 requires-python = ">= 3.10, <3.13"
-keywords = ["funcchain", "ai", "llm", "langchain", "pydantic", "pythonic", "cognitive systems", "agent framework"]
+keywords = [
+    "funcchain",
+    "ai",
+    "llm",
+    "langchain",
+    "pydantic",
+    "pythonic",
+    "cognitive systems",
+    "agent framework",
+]
 classifiers = [
     "License :: OSI Approved :: MIT License",
     "Programming Language :: Python :: 3.10",
@@ -36,30 +42,30 @@ build-backend = "hatchling.build"
 [tool.rye]
 managed = true
 dev-dependencies = [
+    "funcchain[all]",
     "ruff",
     "mypy",
     "isort",
     "pytest",
     "ipython",
     "pre-commit",
-    "funcchain[all]",
+    "types-PyYAML>=6",
     "mkdocs-material>=9.4",
-    "beautifulsoup4>=4.12",
-    "python-dotenv>=1",
 ]
 
 [project.optional-dependencies]
-local = [
-    "llama-cpp-python>=0.2.20",
-    "huggingface_hub>=0.19.4",
-]
-openai = [
-    "openai>=1.3.4",
-    "tiktoken>=0.5.1",
-]
+openai = ["langchain_openai"]
+ollama = ["langchain_community"]
+llamacpp = ["llama-cpp-python>=0.2.32", "huggingface_hub>=0.20"]
+pillow = ["pillow"]
+example-extras = ["langchain>=0.1", "faiss-cpu>=1.7.4", "beautifulsoup4>=4.12"]
 all = [
-    "funcchain[local]",
+    "funcchain[pillow]",
     "funcchain[openai]",
+    "funcchain[ollama]",
+    "funcchain[llamacpp]",
+    "funcchain[example-extras]",
+    "langchain",
 ]
 
 [tool.hatch.metadata]
@@ -85,3 +91,7 @@ ignore_missing_imports = true
 disallow_untyped_defs = true
 disallow_untyped_calls = true
 disallow_incomplete_defs = true
+
+[tool.ruff]
+select = ["E", "F", "I"]
+line-length = 120
diff --git a/requirements-dev.lock b/requirements-dev.lock
index 79434b5..7d9f3cd 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -5,106 +5,111 @@
 #   pre: false
 #   features: []
 #   all-features: false
+#   with-sources: false
 
 -e file:.
-aiohttp==3.9.0
+aiohttp==3.9.3
 aiosignal==1.3.1
 annotated-types==0.6.0
-anyio==3.7.1
+anyio==4.2.0
 asttokens==2.4.1
-attrs==23.1.0
-babel==2.13.1
-beautifulsoup4==4.12.2
-certifi==2023.11.17
+attrs==23.2.0
+babel==2.14.0
+beautifulsoup4==4.12.3
+certifi==2024.2.2
 cfgv==3.4.0
 charset-normalizer==3.3.2
 click==8.1.7
 colorama==0.4.6
-dataclasses-json==0.6.2
+dataclasses-json==0.6.4
 decorator==5.1.1
 diskcache==5.6.3
-distlib==0.3.7
-distro==1.8.0
+distlib==0.3.8
+distro==1.9.0
 docstring-parser==0.15
 executing==2.0.1
+faiss-cpu==1.7.4
 filelock==3.13.1
-frozenlist==1.4.0
-fsspec==2023.12.1
+frozenlist==1.4.1
+fsspec==2023.12.2
 ghp-import==2.1.0
 h11==0.14.0
 httpcore==1.0.2
-httpx==0.25.1
-huggingface-hub==0.19.4
-identify==2.5.32
-idna==3.4
+httpx==0.26.0
+huggingface-hub==0.20.3
+identify==2.5.33
+idna==3.6
 iniconfig==2.0.0
-ipython==8.18.1
+ipython==8.21.0
 isort==5.13.2
 jedi==0.19.1
-jinja2==3.1.2
+jinja2==3.1.3
 jsonpatch==1.33
 jsonpointer==2.4
-langchain==0.0.348
-langchain-core==0.0.12
-langsmith==0.0.66
-llama-cpp-python==0.2.20
-markdown==3.5.1
+langchain==0.1.5
+langchain-community==0.0.17
+langchain-core==0.1.18
+langchain-openai==0.0.5
+langsmith==0.0.86
+llama-cpp-python==0.2.38
+markdown==3.5.2
 markdown-it-py==3.0.0
-markupsafe==2.1.3
-marshmallow==3.20.1
+markupsafe==2.1.4
+marshmallow==3.20.2
 matplotlib-inline==0.1.6
 mdurl==0.1.2
 mergedeep==1.3.4
 mkdocs==1.5.3
-mkdocs-material==9.4.10
-mkdocs-material-extensions==1.3
-multidict==6.0.4
-mypy==1.7.0
+mkdocs-material==9.5.6
+mkdocs-material-extensions==1.3.1
+multidict==6.0.5
+mypy==1.8.0
 mypy-extensions==1.0.0
 nodeenv==1.8.0
-numpy==1.26.2
-openai==1.3.4
+numpy==1.26.3
+openai==1.10.0
 packaging==23.2
 paginate==0.5.6
 parso==0.8.3
-pathspec==0.11.2
+pathspec==0.12.1
 pexpect==4.9.0
-pillow==10.1.0
-platformdirs==4.0.0
-pluggy==1.3.0
-pre-commit==3.5.0
-prompt-toolkit==3.0.41
+pillow==10.2.0
+platformdirs==4.2.0
+pluggy==1.4.0
+pre-commit==3.6.0
+prompt-toolkit==3.0.43
 ptyprocess==0.7.0
 pure-eval==0.2.2
-pydantic==2.5.2
-pydantic-core==2.14.5
+pydantic==2.6.0
+pydantic-core==2.16.1
 pydantic-settings==2.1.0
 pygments==2.17.2
-pymdown-extensions==10.4
-pytest==7.4.3
+pymdown-extensions==10.7
+pytest==8.0.0
 python-dateutil==2.8.2
-python-dotenv==1.0.0
+python-dotenv==1.0.1
 pyyaml==6.0.1
 pyyaml-env-tag==0.1
-regex==2023.10.3
+regex==2023.12.25
 requests==2.31.0
 rich==13.7.0
-ruff==0.1.6
+ruff==0.2.0
 six==1.16.0
 sniffio==1.3.0
 soupsieve==2.5
-sqlalchemy==2.0.23
+sqlalchemy==2.0.25
 stack-data==0.6.3
 tenacity==8.2.3
-tiktoken==0.5.1
+tiktoken==0.5.2
 tqdm==4.66.1
-traitlets==5.14.0
-typing-extensions==4.8.0
+traitlets==5.14.1
+types-pyyaml==6.0.12.12
+typing-extensions==4.9.0
 typing-inspect==0.9.0
-urllib3==2.1.0
-virtualenv==20.24.7
+urllib3==2.2.0
+virtualenv==20.25.0
 watchdog==3.0.0
-wcwidth==0.2.12
-yarl==1.9.3
+wcwidth==0.2.13
+yarl==1.9.4
 # The following packages are considered to be unsafe in a requirements file:
-setuptools==69.0.2
+setuptools==69.0.3
diff --git a/requirements.lock b/requirements.lock
index 12c03de..57cdc12 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -5,46 +5,43 @@
 #   pre: false
 #   features: []
 #   all-features: false
+#   with-sources: false
 
 -e file:.
-aiohttp==3.9.0
-aiosignal==1.3.1
 annotated-types==0.6.0
-anyio==3.7.1
-attrs==23.1.0
-certifi==2023.11.17
+anyio==4.2.0
+certifi==2024.2.2
 charset-normalizer==3.3.2
-dataclasses-json==0.6.2
+distro==1.9.0
 docstring-parser==0.15
-frozenlist==1.4.0
-idna==3.4
-jinja2==3.1.2
+h11==0.14.0
+httpcore==1.0.2
+httpx==0.26.0
+idna==3.6
+jinja2==3.1.3
 jsonpatch==1.33
 jsonpointer==2.4
-langchain==0.0.348
-langchain-core==0.0.12
-langsmith==0.0.66
+langchain-core==0.1.18
+langchain-openai==0.0.5
+langsmith==0.0.86
 markdown-it-py==3.0.0
-markupsafe==2.1.3
-marshmallow==3.20.1
+markupsafe==2.1.4
 mdurl==0.1.2
-multidict==6.0.4
-mypy-extensions==1.0.0
-numpy==1.26.2
+numpy==1.26.3
+openai==1.10.0
 packaging==23.2
-pillow==10.1.0
-pydantic==2.5.2
-pydantic-core==2.14.5
+pydantic==2.6.0
+pydantic-core==2.16.1
 pydantic-settings==2.1.0
 pygments==2.17.2
-python-dotenv==1.0.0
+python-dotenv==1.0.1
 pyyaml==6.0.1
+regex==2023.12.25
 requests==2.31.0
 rich==13.7.0
 sniffio==1.3.0
-sqlalchemy==2.0.23
 tenacity==8.2.3
-typing-extensions==4.8.0
-typing-inspect==0.9.0
-urllib3==2.1.0
-yarl==1.9.3
+tiktoken==0.5.2
+tqdm==4.66.1
+typing-extensions==4.9.0
+urllib3==2.2.0
diff --git a/roadmap.todo b/roadmap.todo
index dcfbf90..c9197c7 100644
--- a/roadmap.todo
+++ b/roadmap.todo
@@ -1,56 +1,47 @@
-[ ] - override global settings inside chain()  (4h)
+V0.2:
+[ ] - write docs  (8h)
 
-[ ] - depends functionality to create nested chains and compile into runnables  (10h)
-        # add a deps thing to put into funcchain defs that takes another chain and compiles it into a runnable
-        # so langsmith shows nested chains
-        # in the chain creation process it just runns all of the depending chains in parallel and feeds the inputs into sub chain
-        # output of every chain is string when inserted
+V0.2.1
+[ ] - pygmalion grammar support
 
-[ ] - vector retrieval type to add file/url context into chains  (20h)
+V0.3:
 
-[ ] - implement variable compression to inject as much context as possible without overloading  (20h)
-        # Create a Compressable Context Schema where it is compressed in case the context lenght is already filled
-        # So anything that is additional can be compressed to fit in the context but when other things that are important are not compressed.
-        # Optionally you can define how to compress and where to leave the gaps (default in the middle with [...])
+[ ] - outlines as (optional) backend for guided generation (mostly json-schemas)
 
-[ ] - improve chain(*args interface)  (2h)
+[ ] - dspy integration for autotuning prompts
 
-[ ] - enable union type without function calling or grammars  (8h)
+[ ] - pydantic model streaming  (6h)
+
+[ ] - enable union type without function calling  (6h)
 
 [ ] - enable Error type for non union calls  (4h)
 
-[ ] - develop Matrix wrapper idea  (10h)
+[ ] - convert langchain tools to funcchain agent/router  (8h)
 
-[ ] - easy to use Universal Router Class (20h)
 
-[ ] - funcchain Agent Framework with Task Dependencies  (30h)
+V0.4+:
 
-[ ] - convert langchain tools to funcchain agent/router  (8h)
+[ ] - cookbooks folder with jupyter notebook tutorials  (8h)
 
-[ ] - vscode extension for custom syntax highlighting  (30h)
+[ ] - depends functionality to create nested chains and compile into runnables  (10h)
+        # add a deps thing to put into funcchain defs that takes another chain and compiles it into a runnable
+        # so langsmith shows nested chains
+        # in the chain creation process it just runns all of the depending chains in parallel and feeds the inputs into sub chain
+        # output of every chain is string when inserted
 
-[ ] - migrate to jinja2  (6h)
+[ ] - vector retrieval type to add file/url context into chains  (20h)
 
-[ ] - allow images as urls  (2h)
+[ ] - implement variable compression to inject as much context as possible without overloading  (20h)
+        # Create a Compressable Context Schema where it is compressed in case the context lenght is already filled
+        # So anything that is additional can be compressed to fit in the context but when other things that are important are not compressed.
+        # Optionally you can define how to compress and where to leave the gaps (default in the middle with [...])
 
-[ ] - brainstorm easy async helpers  (4h)
+[ ] - LLMCompiler written in funcchain example  (30h)
 
-[ ] - cookbooks folder with jupyter notebook tutorials (6h)
+[ ] - vscode extension for custom syntax highlighting  (30h)
 
 [ ] - parallel function calling  (8h)
 
 [ ] - FuncUnion and str output  (6h)
 
-[ ] - implement vision over llamacpp  (8h)
-
-[ ] - fix deepseek over llamacpp  (6h)
-
-[ ] - document examples  (6h)
-
-[ ] - split this list into priorities and optional improvements  (2h)
-
-[ ] - check similar frameworks for new ideas
-
-[ ] - grammar support for enums
-
-[ ] - split required/optional deps for only local or only openai ...
+[ ] - think of new syntax for special dspy modules (COT, FewShot, ...)
diff --git a/src/funcchain/__init__.py b/src/funcchain/__init__.py
index 418a914..e53da3c 100644
--- a/src/funcchain/__init__.py
+++ b/src/funcchain/__init__.py
@@ -1,14 +1,18 @@
 from pydantic import BaseModel
 
-from .chain import achain, chain, runnable
-from .settings import settings
-from .types import Error
+from .backend.settings import settings
+from .syntax.decorators import runnable
+from .syntax.executable import achain, chain
+from .syntax.input_types import Image
+from .syntax.output_types import Error
 
 __all__ = [
     "settings",
     "chain",
     "achain",
+    "runnable",
     "BaseModel",
+    "Image",
     "Error",
     "runnable",
 ]
diff --git a/src/funcchain/_llms.py b/src/funcchain/_llms.py
deleted file mode 100644
index e3b48ea..0000000
--- a/src/funcchain/_llms.py
+++ /dev/null
@@ -1,368 +0,0 @@
-from __future__ import annotations
-
-import logging
-from pathlib import Path
-from typing import Any, Dict, Iterator, List, Optional, Union
-
-from langchain_core.callbacks.manager import CallbackManagerForLLMRun
-from langchain_core.language_models import BaseChatModel, BaseLanguageModel
-from langchain_core.messages import (
-    AIMessage,
-    AIMessageChunk,
-    BaseMessage,
-    ChatMessage,
-    HumanMessage,
-    SystemMessage,
-)
-from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
-from langchain_core.pydantic_v1 import Field, root_validator
-from langchain_core.utils import get_pydantic_field_names
-from langchain_core.utils.utils import build_extra_kwargs
-
-logger = logging.getLogger(__name__)
-
-
-class _LlamaCppCommon(BaseLanguageModel):
-    client: Any = Field(default=None, exclude=True)  #: :meta private:
-    model_path: str
-    """The path to the Llama model file."""
-
-    lora_base: Optional[str] = None
-    """The path to the Llama LoRA base model."""
-
-    lora_path: Optional[str] = None
-    """The path to the Llama LoRA. If None, no LoRa is loaded."""
-
-    n_ctx: int = Field(4096, alias="n_ctx")
-    """Token context window."""
-
-    n_parts: int = Field(-1, alias="n_parts")
-    """Number of parts to split the model into.
-    If -1, the number of parts is automatically determined."""
-
-    seed: int = Field(-1, alias="seed")
-    """Seed. If -1, a random seed is used."""
-
-    f16_kv: bool = Field(True, alias="f16_kv")
-    """Use half-precision for key/value cache."""
-
-    logits_all: bool = Field(False, alias="logits_all")
-    """Return logits for all tokens, not just the last token."""
-
-    vocab_only: bool = Field(False, alias="vocab_only")
-    """Only load the vocabulary, no weights."""
-
-    use_mlock: bool = Field(False, alias="use_mlock")
-    """Force system to keep model in RAM."""
-
-    n_threads: Optional[int] = Field(None, alias="n_threads")
-    """Number of threads to use.
-    If None, the number of threads is automatically determined."""
-
-    n_batch: Optional[int] = Field(8, alias="n_batch")
-    """Number of tokens to process in parallel.
-    Should be a number between 1 and n_ctx."""
-
-    n_gpu_layers: Optional[int] = Field(None, alias="n_gpu_layers")
-    """Number of layers to be loaded into gpu memory. Default None."""
-
-    suffix: Optional[str] = Field(None)
-    """A suffix to append to the generated text. If None, no suffix is appended."""
-
-    max_tokens: Optional[int] = 512
-    """The maximum number of tokens to generate."""
-
-    temperature: Optional[float] = 0.8
-    """The temperature to use for sampling."""
-
-    top_p: Optional[float] = 0.95
-    """The top-p value to use for sampling."""
-
-    logprobs: Optional[int] = Field(None)
-    """The number of logprobs to return. If None, no logprobs are returned."""
-
-    echo: Optional[bool] = False
-    """Whether to echo the prompt."""
-
-    stop: Optional[List[str]] = []
-    """A list of strings to stop generation when encountered."""
-
-    repeat_penalty: Optional[float] = 1.1
-    """The penalty to apply to repeated tokens."""
-
-    top_k: Optional[int] = 40
-    """The top-k value to use for sampling."""
-
-    last_n_tokens_size: Optional[int] = 64
-    """The number of tokens to look back when applying the repeat_penalty."""
-
-    use_mmap: Optional[bool] = True
-    """Whether to keep the model loaded in RAM"""
-
-    rope_freq_scale: float = 1.0
-    """Scale factor for rope sampling."""
-
-    rope_freq_base: float = 10000.0
-    """Base frequency for rope sampling."""
-
-    model_kwargs: Dict[str, Any] = Field(default_factory=dict)
-    """Any additional parameters to pass to llama_cpp.Llama."""
-
-    streaming: bool = True
-    """Whether to stream the results, token by token."""
-
-    grammar_path: Optional[Union[str, Path]] = None
-    """
-    grammar_path: Path to the .gbnf file that defines formal grammars
-    for constraining model outputs. For instance, the grammar can be used
-    to force the model to generate valid JSON or to speak exclusively in emojis. At most
-    one of grammar_path and grammar should be passed in.
-    """
-    grammar: Optional[str] = None
-    """
-    grammar: formal grammar for constraining model outputs. For instance, the grammar
-    can be used to force the model to generate valid JSON or to speak exclusively in
-    emojis. At most one of grammar_path and grammar should be passed in.
-    """
-
-    verbose: bool = False
-    """Print verbose output to stderr."""
-
-    @root_validator()
-    def validate_environment(cls, values: Dict) -> Dict:
-        """Validate that llama-cpp-python library is installed."""
-        try:
-            from llama_cpp import Llama, LlamaGrammar
-        except ImportError:
-            raise ImportError(
-                "Could not import llama-cpp-python library. "
-                "Please install the llama-cpp-python library to "
-                "use this embedding model: pip install llama-cpp-python"
-            )
-
-        model_path = values["model_path"]
-        model_param_names = [
-            "rope_freq_scale",
-            "rope_freq_base",
-            "lora_path",
-            "lora_base",
-            "n_ctx",
-            "n_parts",
-            "seed",
-            "f16_kv",
-            "logits_all",
-            "vocab_only",
-            "use_mlock",
-            "n_threads",
-            "n_batch",
-            "use_mmap",
-            "last_n_tokens_size",
-            "verbose",
-        ]
-        model_params = {k: values[k] for k in model_param_names}
-        # For backwards compatibility, only include if non-null.
-        if values["n_gpu_layers"] is not None:
-            model_params["n_gpu_layers"] = values["n_gpu_layers"]
-
-        model_params.update(values["model_kwargs"])
-
-        try:
-            values["client"] = Llama(model_path, **model_params)
-        except Exception as e:
-            raise ValueError(
-                f"Could not load Llama model from path: {model_path}. "
-                f"Received error {e}"
-            )
-
-        if values["grammar"] and values["grammar_path"]:
-            grammar = values["grammar"]
-            grammar_path = values["grammar_path"]
-            raise ValueError(
-                "Can only pass in one of grammar and grammar_path. Received "
-                f"{grammar=} and {grammar_path=}."
-            )
-        elif isinstance(values["grammar"], str):
-            values["grammar"] = LlamaGrammar.from_string(values["grammar"])
-        elif values["grammar_path"]:
-            values["grammar"] = LlamaGrammar.from_file(values["grammar_path"])
-        else:
-            pass
-        return values
-
-    @root_validator(pre=True)
-    def build_model_kwargs(cls, values: Dict[str, Any]) -> Dict[str, Any]:
-        """Build extra kwargs from additional params that were passed in."""
-        all_required_field_names = get_pydantic_field_names(cls)
-        extra = values.get("model_kwargs", {})
-        values["model_kwargs"] = build_extra_kwargs(
-            extra, values, all_required_field_names
-        )
-        return values
-
-    @property
-    def _default_params(self) -> Dict[str, Any]:
-        """Get the default parameters for calling llama_cpp."""
-        params = {
-            "suffix": self.suffix,
-            "max_tokens": self.max_tokens,
-            "temperature": self.temperature,
-            "top_p": self.top_p,
-            "logprobs": self.logprobs,
-            "echo": self.echo,
-            "stop_sequences": self.stop,  # key here is convention among LLM classes
-            "repeat_penalty": self.repeat_penalty,
-            "top_k": self.top_k,
-        }
-        if self.grammar:
-            params["grammar"] = self.grammar
-        return params
-
-    @property
-    def _identifying_params(self) -> Dict[str, Any]:
-        """Get the identifying parameters."""
-        return {**{"model_path": self.model_path}, **self._default_params}
-
-    def _get_parameters(self, stop: Optional[List[str]] = None) -> Dict[str, Any]:
-        """
-        Performs sanity check, preparing parameters in format needed by llama_cpp.
-
-        Args:
-            stop (Optional[List[str]]): List of stop sequences for llama_cpp.
-
-        Returns:
-            Dictionary containing the combined parameters.
-        """
-
-        # Raise error if stop sequences are in both input and default params
-        if self.stop and stop is not None:
-            raise ValueError("`stop` found in both the input and default params.")
-
-        params = self._default_params
-
-        # llama_cpp expects the "stop" key not this, so we remove it:
-        params.pop("stop_sequences")
-
-        # then sets it as configured, or default to an empty list:
-        params["stop"] = self.stop or stop or []
-
-        return params
-
-    def get_num_tokens(self, text: str) -> int:
-        tokenized_text = self.client.tokenize(text.encode("utf-8"))
-        return len(tokenized_text)
-
-
-class ChatLlamaCpp(BaseChatModel, _LlamaCppCommon):
-    """llama.cpp chat model.
-
-    To use, you should have the llama-cpp-python library installed, and provide the
-    path to the Llama model as a named parameter to the constructor.
-    Check out: https://github.com/abetlen/llama-cpp-python
-
-    Example:
-        .. code-block:: python
-
-            from funcchain._llms import ChatLlamaCpp
-            llm = ChatLlamaCpp(model_path="./path/to/model.gguf")
-    """
-
-    @property
-    def _llm_type(self) -> str:
-        """Return type of chat model."""
-        return "llamacpp-chat"
-
-    def _format_message_as_text(self, message: BaseMessage) -> str:
-        if isinstance(message, ChatMessage):
-            message_text = f"\n\n{message.role.capitalize()}: {message.content}"
-        elif isinstance(message, HumanMessage):
-            message_text = f"[INST] {message.content} [/INST]"
-        elif isinstance(message, AIMessage):
-            message_text = f"{message.content}"
-        elif isinstance(message, SystemMessage):
-            message_text = f"<<SYS>> {message.content} <</SYS>>"
-        else:
-            raise ValueError(f"Got unknown type {message}")
-        return message_text
-
-    def _format_messages_as_text(self, messages: List[BaseMessage]) -> str:
-        return "\n".join(
-            [self._format_message_as_text(message) for message in messages]
-        )
-
-    def _stream_with_aggregation(
-        self,
-        messages: List[BaseMessage],
-        stop: Optional[List[str]] = None,
-        run_manager: Optional[CallbackManagerForLLMRun] = None,
-        verbose: bool = False,
-        **kwargs: Any,
-    ) -> ChatGenerationChunk:
-        final_chunk: Optional[ChatGenerationChunk] = None
-        for chunk in self._stream(messages, stop, **kwargs):
-            if final_chunk is None:
-                final_chunk = chunk
-            else:
-                final_chunk += chunk
-            if run_manager:
-                run_manager.on_llm_new_token(
-                    chunk.text,
-                    verbose=verbose,
-                )
-        if final_chunk is None:
-            raise ValueError("No data received from llamacpp stream.")
-
-        return final_chunk
-
-    def _generate(
-        self,
-        messages: List[BaseMessage],
-        stop: Optional[List[str]] = None,
-        run_manager: Optional[CallbackManagerForLLMRun] = None,
-        **kwargs: Any,
-    ) -> ChatResult:
-        """Call out to LlamaCpp's generation endpoint.
-
-        Args:
-            messages: The list of base messages to pass into the model.
-            stop: Optional list of stop words to use when generating.
-
-        Returns:
-            Chat generations from the model
-
-        Example:
-            .. code-block:: python
-
-                response = llamacpp([
-                    HumanMessage(content="Tell me about the history of AI")
-                ])
-        """
-        final_chunk = self._stream_with_aggregation(
-            messages, stop=stop, run_manager=run_manager, verbose=self.verbose, **kwargs
-        )
-        chat_generation = ChatGeneration(
-            message=AIMessage(content=final_chunk.text),
-            generation_info=final_chunk.generation_info,
-        )
-        return ChatResult(generations=[chat_generation])
-
-    def _stream(
-        self,
-        messages: List[BaseMessage],
-        stop: Optional[List[str]] = None,
-        run_manager: Optional[CallbackManagerForLLMRun] = None,
-        **kwargs: Any,
-    ) -> Iterator[ChatGenerationChunk]:
-        params = {**self._get_parameters(stop), **kwargs}
-        prompt = self._format_messages_as_text(messages)
-        result = self.client(prompt=prompt, stream=True, **params)
-        for part in result:
-            logprobs = part["choices"][0].get("logprobs", None)
-            chunk = ChatGenerationChunk(
-                message=AIMessageChunk(content=part["choices"][0]["text"]),
-                generation_info={"logprobs": logprobs},
-            )
-            yield chunk
-            if run_manager:
-                run_manager.on_llm_new_token(
-                    token=chunk.text, verbose=self.verbose, log_probs=logprobs
-                )
diff --git a/docs/concepts/async.md b/src/funcchain/backend/__init__.py
similarity index 100%
rename from docs/concepts/async.md
rename to src/funcchain/backend/__init__.py
diff --git a/src/funcchain/backend/compiler.py b/src/funcchain/backend/compiler.py
new file mode 100644
index 0000000..f900a9f
--- /dev/null
+++ b/src/funcchain/backend/compiler.py
@@ -0,0 +1,388 @@
+from typing import Any, TypeVar
+
+from langchain_core.callbacks import Callbacks
+from langchain_core.chat_history import BaseChatMessageHistory
+from langchain_core.language_models import BaseChatModel
+from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage
+from langchain_core.output_parsers import BaseGenerationOutputParser, BaseOutputParser
+from langchain_core.runnables import Runnable
+from pydantic import BaseModel
+
+from ..model.abilities import is_openai_function_model, is_vision_model
+from ..model.defaults import univeral_model_selector
+from ..parser.json_schema import RetryJsonPydanticParser
+from ..parser.openai_functions import RetryOpenAIFunctionPydanticParser, RetryOpenAIFunctionPydanticUnionParser
+from ..parser.primitive_types import RetryJsonPrimitiveTypeParser
+from ..parser.schema_converter import pydantic_to_grammar
+from ..parser.selector import parser_for
+from ..schema.signature import Signature
+from ..syntax.input_types import Image
+from ..syntax.output_types import ParserBaseModel
+from ..utils.msg_tools import msg_to_str
+from ..utils.pydantic import multi_pydantic_to_functions, pydantic_to_functions
+from ..utils.token_counter import count_tokens
+from .prompt import (
+    HumanImageMessagePromptTemplate,
+    create_chat_prompt,
+    create_instruction_prompt,
+)
+from .settings import FuncchainSettings
+from .streaming import stream_handler
+
+ChainOutput = TypeVar("ChainOutput")
+
+
+# TODO: do patch instead of seperate creation
+def create_union_chain(
+    output_types: list[type],
+    instruction_prompt: HumanImageMessagePromptTemplate,
+    system: str,
+    memory: BaseChatMessageHistory,
+    context: list[BaseMessage],
+    llm: BaseChatModel,
+    input_kwargs: dict[str, Any],
+) -> Runnable[dict[str, str], Any]:
+    """
+    Compile a langchain runnable chain from the funcchain syntax.
+    """
+    if not all(issubclass(t, BaseModel) for t in output_types):
+        raise RuntimeError("Funcchain union types are currently only supported for pydantic models.")
+
+    output_type_names = [t.__name__ for t in output_types]
+
+    input_kwargs["format_instructions"] = f"Extract to one of these output types: {output_type_names}."
+
+    functions = multi_pydantic_to_functions(output_types)
+
+    _llm = llm
+    llm = _llm.bind(**functions)  # type: ignore
+
+    prompt = create_chat_prompt(
+        system,
+        instruction_prompt,
+        context=[
+            *context,
+            HumanMessage(content="Can you use a function call for the next response?"),
+            AIMessage(content="Yeah I can do that, just tell me what you need!"),
+        ],
+        memory=memory,
+    )
+
+    return prompt | llm | RetryOpenAIFunctionPydanticUnionParser(output_types=output_types, retry=3, retry_llm=_llm)
+
+
+def patch_openai_function_to_pydantic(
+    llm: BaseChatModel,
+    output_type: type[BaseModel],
+    input_kwargs: dict[str, str],
+) -> tuple[BaseChatModel, BaseGenerationOutputParser]:
+    input_kwargs["format_instructions"] = f"Extract to {output_type.__name__}."
+    functions = pydantic_to_functions(output_type)
+
+    _llm = llm
+    llm = llm.bind(**functions)  # type: ignore
+
+    return llm, RetryOpenAIFunctionPydanticParser(pydantic_schema=output_type, retry=3, retry_llm=_llm)
+
+
+def create_chain(
+    system: str,
+    instruction: str,
+    output_types: list[type[ChainOutput]],
+    context: list[BaseMessage],
+    memory: BaseChatMessageHistory,
+    settings: FuncchainSettings,
+    input_args: list[tuple[str, type]],
+    temp_images: list[Image] = [],
+) -> Runnable[dict[str, str], ChainOutput]:
+    """
+    Compile a langchain runnable chain from the funcchain syntax.
+    """
+    # large language model
+    _llm = _gather_llm(settings)
+    llm = _add_custom_callbacks(_llm, settings)
+
+    parser = parser_for(output_types, retry=settings.retry_parse, llm=llm)
+
+    # TODO collect types from input_args
+    # -> this would allow special prompt templating based on certain types
+    # -> e.g. BaseChatMessageHistory adds a history placeholder
+    # -> e.g. BaseChatModel overrides the default language model
+    # -> e.g. SettingsOverride overrides the default settings
+    # -> e.g. Callbacks adds custom callbacks
+    # -> e.g. SystemMessage adds a system message
+
+    # handle input arguments
+    prompt_args: list[str] = []
+    pydantic_args: list[str] = []
+    special_args: list[tuple[str, type]] = []
+
+    for i in input_args:
+        if i[1] is str:
+            prompt_args.append(i[0])
+        if issubclass(i[1], BaseModel):
+            pydantic_args.append(i[0])
+        else:
+            special_args.append(i)
+
+    # TODO: change this into input_args
+    input_kwargs = {k: "" for k in (prompt_args + pydantic_args)}
+
+    # add format instructions for parser
+    f_instructions = None
+    if parser and (settings.streaming or not is_openai_function_model(llm)):
+        # streaming behavior is not supported for function models
+        # but for normal function models we do not need to add format instructions
+        if not isinstance(parser, BaseOutputParser):
+            raise NotImplementedError("Fix this")
+        instruction, f_instructions = _add_format_instructions(
+            parser,
+            instruction,
+            input_kwargs,
+        )
+
+    # patch inputs
+    _crop_large_inputs(
+        system,
+        instruction,
+        input_kwargs,
+        settings,
+    )
+
+    # for vision models
+    images = _handle_images(llm, memory, input_kwargs)
+    images.extend(temp_images)
+
+    # create prompts
+    instruction_prompt = create_instruction_prompt(
+        instruction,
+        images,
+        input_kwargs,
+        format_instructions=f_instructions,
+    )
+    chat_prompt = create_chat_prompt(system, instruction_prompt, context, memory)
+
+    # TODO: think why this was needed
+    # # add formatted instruction to chat history
+    # memory.add_message(instruction_prompt.format(**input_kwargs))
+
+    _inject_grammar_for_local_models(llm, output_types, parser)
+
+    # function model patches
+    if is_openai_function_model(llm):
+        if len(output_types) > 1:
+            return create_union_chain(
+                output_types,
+                instruction_prompt,
+                system,
+                memory,
+                context,
+                llm,
+                input_kwargs,
+            )
+        if isinstance(parser, RetryJsonPydanticParser) or isinstance(parser, RetryJsonPrimitiveTypeParser):
+            output_type = parser.pydantic_object
+            if issubclass(output_type, BaseModel) and not issubclass(output_type, ParserBaseModel):
+                if settings.streaming and hasattr(llm, "model_kwargs"):
+                    llm.model_kwargs = {"response_format": {"type": "json_object"}}
+                else:
+                    assert isinstance(parser, RetryJsonPydanticParser)
+                    llm, parser = patch_openai_function_to_pydantic(llm, output_type, input_kwargs)
+
+    assert parser is not None
+    return chat_prompt | llm | parser
+
+
+def compile_chain(signature: Signature, temp_images: list[Image] = []) -> Runnable[dict[str, str], ChainOutput]:
+    """
+    Compile a signature to a runnable chain.
+    """
+    system = (
+        [msg for msg in signature.history if isinstance(msg, SystemMessage)] or [None]  # type: ignore
+    )[0]
+
+    from ..utils.memory import ChatMessageHistory
+
+    memory = ChatMessageHistory(messages=signature.history)
+
+    return create_chain(
+        msg_to_str(system) if system else "",
+        signature.instruction,
+        signature.output_types,
+        signature.history,
+        memory,
+        signature.settings,
+        signature.input_args,
+        temp_images,
+    )
+
+
+def _add_format_instructions(
+    parser: BaseOutputParser,
+    instruction: str,
+    input_kwargs: dict[str, str],
+) -> tuple[str, str | None]:
+    """
+    Add parsing format instructions
+    to the instruction message and input_kwargs
+    if the output parser supports it.
+    """
+    try:
+        if format_instructions := parser.get_format_instructions():
+            instruction += "\n{format_instructions}"
+            input_kwargs["format_instructions"] = format_instructions
+        return instruction, format_instructions
+    except NotImplementedError:
+        return instruction, None
+
+
+def _crop_large_inputs(
+    system: str,
+    instruction: str,
+    input_kwargs: dict,
+    settings: FuncchainSettings,
+) -> None:
+    """
+    Crop large inputs to avoid exceeding the maximum number of tokens.
+    """
+    base_tokens = count_tokens(instruction + system)
+    for k, v in input_kwargs.copy().items():
+        if isinstance(v, str):
+            content_tokens = count_tokens(v)
+            if base_tokens + content_tokens > settings.context_lenght:
+                input_kwargs[k] = v[: (settings.context_lenght - base_tokens) * 2 // 3]
+                print("Truncated: ", len(input_kwargs[k]))
+
+
+def _handle_images(
+    llm: BaseChatModel,
+    memory: BaseChatMessageHistory,
+    input_kwargs: dict[str, Any],
+) -> list[Image]:
+    """
+    Handle images for vision models.
+    """
+    images = [v for v in input_kwargs.values() if isinstance(v, Image)]
+    if is_vision_model(llm):
+        for k in list(input_kwargs.keys()):
+            if isinstance(input_kwargs[k], Image):
+                del input_kwargs[k]
+    elif images:
+        raise RuntimeError("Images as input are only supported for vision models.")
+    elif _history_contains_images(memory):
+        print("Warning: Images in chat history are ignored for non-vision models.")
+        memory.messages = _clear_images_from_history(memory.messages)
+
+    return images
+
+
+def _inject_grammar_for_local_models(
+    llm: BaseChatModel,
+    output_types: list[type],
+    parser: BaseOutputParser | BaseGenerationOutputParser,
+) -> None:
+    """
+    Inject GBNF grammar into local models.
+    """
+    try:
+        from funcchain.model.patches.ollama import ChatOllama
+    except:  # noqa
+        pass
+    else:
+        if isinstance(llm, ChatOllama):
+            if len(output_types) > 1:
+                raise NotImplementedError("Union types are not yet supported for LlamaCpp models.")  # TODO: implement
+            output_type = output_types[0]
+            if issubclass(output_type, BaseModel) and not issubclass(output_type, ParserBaseModel):
+                assert isinstance(parser, RetryJsonPydanticParser)
+                output_type = parser.pydantic_object
+                llm.grammar = pydantic_to_grammar(output_type)
+            if issubclass(output_type, ParserBaseModel):
+                llm.grammar = output_type.custom_grammar()
+    try:
+        from llama_cpp import LlamaGrammar
+
+        from ..model.patches.llamacpp import ChatLlamaCpp
+    except:  # noqa
+        pass
+    else:
+        if isinstance(llm, ChatLlamaCpp):
+            if len(output_types) > 1:  # TODO: implement
+                raise NotImplementedError("Union types are not yet supported for LlamaCpp models.")
+
+            output_type = output_types[0]
+            if isinstance(parser, RetryJsonPydanticParser) or isinstance(parser, RetryJsonPrimitiveTypeParser):
+                output_type = parser.pydantic_object
+
+                if issubclass(output_type, BaseModel) and not issubclass(output_type, ParserBaseModel):
+                    assert isinstance(parser, RetryJsonPydanticParser)
+                    output_type = parser.pydantic_object
+                    grammar: str | None = pydantic_to_grammar(output_type)
+                if issubclass(output_type, ParserBaseModel):
+                    grammar = output_type.custom_grammar()
+                if grammar:
+                    setattr(
+                        llm,
+                        "grammar",
+                        LlamaGrammar.from_string(grammar, verbose=False),
+                    )
+
+
+def _gather_llm(settings: FuncchainSettings) -> BaseChatModel:
+    if isinstance(settings.llm, BaseChatModel):
+        llm = settings.llm
+    else:
+        llm = univeral_model_selector(settings)
+
+    if not llm:
+        raise RuntimeError(
+            "No language model provided. Either set the llm environment variable or "
+            "pass a model to the `chain` function."
+        )
+    return llm
+
+
+def _add_custom_callbacks(llm: BaseChatModel, settings: FuncchainSettings) -> BaseChatModel:
+    callbacks: Callbacks = []
+
+    if handler := stream_handler.get():
+        callbacks = [handler]
+
+    if settings.console_stream:
+        from .streaming import AsyncStreamHandler
+
+        callbacks = [
+            AsyncStreamHandler(print, {"end": "", "flush": True}),
+        ]
+
+    if callbacks:
+        settings.streaming = True
+        if hasattr(llm, "streaming"):
+            llm.streaming = True
+        llm.callbacks = callbacks
+
+    return llm
+
+
+def _history_contains_images(history: BaseChatMessageHistory) -> bool:
+    """
+    Check if the chat history contains images.
+    """
+    for message in history.messages:
+        if isinstance(message.content, list):
+            for content in message.content:
+                if isinstance(content, dict) and content.get("type") == "image_url":
+                    return True
+    return False
+
+
+def _clear_images_from_history(history: list[BaseMessage]) -> list[BaseMessage]:
+    """
+    Remove images from the chat history.
+    """
+    for message in history:
+        if isinstance(message.content, list):
+            for content in message.content:
+                if isinstance(content, dict) and content.get("type") == "image_url":
+                    message.content.remove(content)
+    return history
diff --git a/src/funcchain/backend/meta_inspect.py b/src/funcchain/backend/meta_inspect.py
new file mode 100644
index 0000000..15f265f
--- /dev/null
+++ b/src/funcchain/backend/meta_inspect.py
@@ -0,0 +1,80 @@
+from inspect import FrameInfo, currentframe, getouterframes
+from types import FunctionType, UnionType
+from typing import Optional
+
+FUNC_DEPTH = 4
+
+
+def get_parent_frame(depth: int = FUNC_DEPTH) -> FrameInfo:
+    """
+    Get the dep'th parent function information.
+    """
+    return getouterframes(currentframe())[depth]
+
+
+def get_func_obj() -> FunctionType:
+    """
+    Get the parent caller function.
+    """
+    func_name = get_parent_frame().function
+    if func_name == "<module>":
+        raise RuntimeError("Cannot get function object from module")
+    if func_name == "<lambda>":
+        raise RuntimeError("Cannot get function object from lambda")
+
+    try:
+        func = get_parent_frame().frame.f_globals[func_name]
+    except KeyError:
+        func = get_parent_frame(FUNC_DEPTH + 1).frame.f_locals[func_name]
+    return func
+
+
+def from_docstring(f: Optional[FunctionType] = None) -> str:
+    """
+    Get the docstring of the parent caller function.
+    """
+    if doc_str := (f or get_func_obj()).__doc__:
+        return "\n".join([line.lstrip() for line in doc_str.split("\n")])
+    raise ValueError(f"The funcchain ({get_parent_frame().function}) must have a docstring")
+
+
+def get_output_types(f: Optional[FunctionType] = None) -> list[type]:
+    """
+    Get the output type annotation of the parent caller function.
+    Returns a list of types in case of a union, otherwise a list with one type.
+    """
+    try:
+        return_type = (f or get_func_obj()).__annotations__["return"]
+        if isinstance(return_type, UnionType):
+            return return_type.__args__  # type: ignore
+        else:
+            return [return_type]
+    except KeyError:
+        raise ValueError("The funcchain must have a return type annotation")
+
+
+def kwargs_from_parent() -> dict[str, str]:
+    """
+    Get the kwargs from the parent function.
+    """
+    return get_parent_frame(FUNC_DEPTH - 1).frame.f_locals
+
+
+def args_from_parent() -> list[tuple[str, type]]:
+    """
+    Get input args with type hints from parent function
+    """
+    return [(arg, t) for arg, t in get_func_obj().__annotations__.items() if arg != "return" and arg != "self"]
+
+
+def gather_signature(
+    f: FunctionType,
+) -> dict[str, str | list[tuple[str, type]] | list[type]]:
+    """
+    Gather the signature of the parent caller function.
+    """
+    return {
+        "instruction": from_docstring(f),
+        "input_args": [(arg, f.__annotations__[arg]) for arg in f.__code__.co_varnames[: f.__code__.co_argcount]],
+        "output_types": get_output_types(f),
+    }
diff --git a/src/funcchain/chain/prompt.py b/src/funcchain/backend/prompt.py
similarity index 77%
rename from src/funcchain/chain/prompt.py
rename to src/funcchain/backend/prompt.py
index 5a887b7..4cb2131 100644
--- a/src/funcchain/chain/prompt.py
+++ b/src/funcchain/backend/prompt.py
@@ -1,6 +1,7 @@
 from string import Formatter
 from typing import Any, Optional, Type
 
+from jinja2 import Environment, meta
 from langchain_core.chat_history import BaseChatMessageHistory
 from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
 from langchain_core.prompts import ChatPromptTemplate
@@ -9,37 +10,35 @@
     MessagePromptTemplateT,
 )
 from langchain_core.prompts.prompt import PromptTemplate
-from PIL import Image
 from pydantic import BaseModel
 
-from ..utils import image_to_base64_url
+from ..syntax.input_types import Image
 
 
 def create_instruction_prompt(
     instruction: str,
-    images: list[Image.Image],
+    images: list[Image],
     input_kwargs: dict[str, Any],
+    format_instructions: Optional[str] = None,
 ) -> "HumanImageMessagePromptTemplate":
     template_format = _determine_format(instruction)
 
-    required_f_str_vars = _extract_fstring_vars(instruction)
+    required_f_str_vars = _extract_template_vars(instruction, template_format)
 
     _filter_fstring_vars(input_kwargs)
 
-    inject_vars = [
-        f"{var.upper()}:\n{{{var}}}\n"
-        for var, _ in input_kwargs.items()
-        if var not in required_f_str_vars
-    ]
+    inject_vars = [f"{var.upper()}:\n{{{var}}}\n" for var, _ in input_kwargs.items() if var not in required_f_str_vars]
+
     added_instruction = "\n".join(inject_vars)
     instruction = added_instruction + instruction
 
-    images = [image_to_base64_url(image) for image in images]
+    _images = [image.url for image in images]
 
     return HumanImageMessagePromptTemplate.from_template(
         template=instruction,
         template_format=template_format,
-        images=images,
+        images=_images,
+        partial_variables={"format_instructions": format_instructions} if format_instructions else None,
     )
 
 
@@ -47,7 +46,7 @@ def create_chat_prompt(
     system: str,
     instruction_template: "HumanImageMessagePromptTemplate",
     context: list[BaseMessage],
-    memory: BaseChatMessageHistory,
+    memory: BaseChatMessageHistory,  # TODO: remove and do memory placeholder
 ) -> ChatPromptTemplate:
     """
     Compose a chat prompt from a system message,
@@ -57,7 +56,9 @@ def create_chat_prompt(
     if system and memory.messages and isinstance(memory.messages[0], SystemMessage):
         memory.messages.pop(0)
 
+    # TODO: fix union type problem
     if memory.messages and isinstance(memory.messages[-1], HumanMessage):
+        print("specialchatprompt")
         return ChatPromptTemplate.from_messages(
             [
                 *([SystemMessage(content=system)] if system else []),
@@ -65,7 +66,6 @@ def create_chat_prompt(
                 *context,
             ]
         )
-
     return ChatPromptTemplate.from_messages(
         [
             *([SystemMessage(content=system)] if system else []),
@@ -82,9 +82,20 @@ def _determine_format(
     return "jinja2" if "{{" in instruction or "{%" in instruction else "f-string"
 
 
+def _extract_template_vars(
+    template: str,
+    template_format: str,
+) -> list[str]:
+    """
+    Function to extract variables from a string template.
+    """
+    if template_format == "jinja2":
+        return _extract_jinja_vars(template)
+    return _extract_fstring_vars(template)
+
+
 def _extract_fstring_vars(template: str) -> list[str]:
     """
-    TODO: enable jinja2 check
     Function to extract f-string variables from a string.
     """
     return [
@@ -94,6 +105,15 @@ def _extract_fstring_vars(template: str) -> list[str]:
     ]
 
 
+def _extract_jinja_vars(template: str) -> list[str]:
+    """
+    Function to extract variables from a Jinja2 template.
+    """
+    env = Environment()
+    parsed_content = env.parse(template)
+    return list(meta.find_undeclared_variables(parsed_content))
+
+
 def _filter_fstring_vars(
     input_kwargs: dict[str, Any],
 ) -> None:
@@ -101,9 +121,7 @@ def _filter_fstring_vars(
     keys_to_remove = [
         key
         for key, value in input_kwargs.items()
-        if not (
-            isinstance(value, str) or isinstance(value, BaseModel)
-        )  # TODO: remove BaseModel
+        if not (isinstance(value, str) or isinstance(value, BaseModel))  # TODO: remove BaseModel
     ]
     for key in keys_to_remove:
         del input_kwargs[key]
diff --git a/src/funcchain/settings.py b/src/funcchain/backend/settings.py
similarity index 74%
rename from src/funcchain/settings.py
rename to src/funcchain/backend/settings.py
index e1e9dd2..6557a46 100644
--- a/src/funcchain/settings.py
+++ b/src/funcchain/backend/settings.py
@@ -2,31 +2,27 @@
 Funcchain Settings:
 Automatically loads environment variables from .env file
 """
-from typing import Optional, TypedDict
+from typing import Optional
 
-from langchain.cache import InMemoryCache
-from langchain_core.globals import set_llm_cache
 from langchain_core.language_models import BaseChatModel
-from langchain_core.runnables import RunnableWithFallbacks
 from pydantic import Field
 from pydantic_settings import BaseSettings
-
-set_llm_cache(InMemoryCache())
+from typing_extensions import TypedDict
 
 
 class FuncchainSettings(BaseSettings):
     debug: bool = True
 
-    llm: BaseChatModel | RunnableWithFallbacks | str = Field(
+    llm: BaseChatModel | str = Field(
         default="openai/gpt-3.5-turbo-1106",
         validate_default=False,
     )
 
-    local_models_path: str = "./.models"
+    console_stream: bool = False
 
-    default_system_prompt: str = ""
+    system_prompt: str = ""
 
-    retry_parse: int = 5
+    retry_parse: int = 3
     retry_parse_sleep: float = 0.1
 
     # KEYS
@@ -41,10 +37,11 @@ class FuncchainSettings(BaseSettings):
     max_tokens: int = 2048
     temperature: float = 0.1
 
-    # LLAMA KWARGS
+    # LLAMACPP KWARGS
     context_lenght: int = 8196
     n_gpu_layers: int = 50
     keep_loaded: bool = False
+    local_models_path: str = "./.models"
 
     def model_kwargs(self) -> dict:
         return {
@@ -59,7 +56,10 @@ def openai_kwargs(self) -> dict:
             "openai_api_key": self.openai_api_key,
         }
 
-    def llama_kwargs(self) -> dict:
+    def ollama_kwargs(self) -> dict:
+        return {}
+
+    def llamacpp_kwargs(self) -> dict:
         return {
             "n_ctx": self.context_lenght,
             "use_mlock": self.keep_loaded,
@@ -71,17 +71,21 @@ def llama_kwargs(self) -> dict:
 
 
 class SettingsOverride(TypedDict, total=False):
-    llm: BaseChatModel | RunnableWithFallbacks | str
+    llm: BaseChatModel | str | None
 
     verbose: bool
     temperature: float
     max_tokens: int
     streaming: bool
-    # TODO: context_length: int
+    retry_parse: int
+    context_lenght: int
+    system_prompt: str
 
 
-def get_settings(override: Optional[SettingsOverride] = None) -> FuncchainSettings:
+def create_local_settings(override: Optional[SettingsOverride] = None) -> FuncchainSettings:
     if override:
+        if override["llm"] is None:
+            override["llm"] = settings.llm
         return settings.model_copy(update=dict(override))
     return settings
 
diff --git a/src/funcchain/streaming.py b/src/funcchain/backend/streaming.py
similarity index 92%
rename from src/funcchain/streaming.py
rename to src/funcchain/backend/streaming.py
index 259ac31..494b0f5 100644
--- a/src/funcchain/streaming.py
+++ b/src/funcchain/backend/streaming.py
@@ -11,9 +11,7 @@
 class AsyncStreamHandler(AsyncCallbackHandler):
     """Async callback handler that can be used to handle callbacks from langchain_core."""
 
-    def __init__(
-        self, fn: Callable[[str], Awaitable[None] | None], default_kwargs: dict
-    ) -> None:
+    def __init__(self, fn: Callable[[str], Awaitable[None] | None], default_kwargs: dict) -> None:
         self.fn = fn
         self.default_kwargs = default_kwargs
         self.cost: float = 0.0
@@ -69,15 +67,11 @@ async def on_llm_end(
             print("\n")
 
 
-stream_handler: ContextVar[AsyncStreamHandler | None] = ContextVar(
-    "stream_handler", default=None
-)
+stream_handler: ContextVar[AsyncStreamHandler | None] = ContextVar("stream_handler", default=None)
 
 
 @contextmanager
-def stream_to(
-    fn: Callable[[str], None], **kwargs: Any
-) -> Generator[AsyncStreamHandler, None, None]:
+def stream_to(fn: Callable[[str], None], **kwargs: Any) -> Generator[AsyncStreamHandler, None, None]:
     """
     Stream the llm tokens to a given function.
 
diff --git a/src/funcchain/chain/__init__.py b/src/funcchain/chain/__init__.py
deleted file mode 100644
index bf3e470..0000000
--- a/src/funcchain/chain/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from .interface import achain, chain
-from .runnables import runnable
-
-__all__ = ["chain", "achain", "runnable"]
diff --git a/src/funcchain/chain/creation.py b/src/funcchain/chain/creation.py
deleted file mode 100644
index afdefb6..0000000
--- a/src/funcchain/chain/creation.py
+++ /dev/null
@@ -1,282 +0,0 @@
-from types import UnionType
-from typing import TypeVar, Type
-
-from langchain_core.language_models import BaseChatModel
-from langchain_core.prompts import ChatPromptTemplate
-from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
-from langchain_core.output_parsers import BaseOutputParser
-from langchain_core.chat_history import BaseChatMessageHistory
-from langchain_core.runnables import (
-    RunnableSerializable,
-    RunnableWithFallbacks,
-)
-from PIL import Image
-from pydantic import BaseModel
-
-from funcchain._llms import ChatLlamaCpp
-
-from ..parser import MultiToolParser, ParserBaseModel, PydanticFuncParser
-from ..settings import FuncchainSettings
-from ..streaming import stream_handler
-from ..utils import (
-    parser_for,
-    count_tokens,
-    is_function_model,
-    is_vision_model,
-    multi_pydantic_to_functions,
-    pydantic_to_functions,
-    pydantic_to_grammar,
-    univeral_model_selector,
-)
-from .prompt import (
-    HumanImageMessagePromptTemplate,
-    create_chat_prompt,
-    create_instruction_prompt,
-)
-
-ChainOutput = TypeVar("ChainOutput")
-
-
-# TODO: do patch instead of seperate creation
-def create_union_chain(
-    output_type: UnionType,
-    instruction_prompt: HumanImageMessagePromptTemplate,
-    system: str,
-    memory: BaseChatMessageHistory,
-    context: list[BaseMessage],
-    llm: BaseChatModel | RunnableWithFallbacks,
-    input_kwargs: dict[str, str],
-) -> RunnableSerializable[dict[str, str], BaseModel]:
-    """
-    Compile a langchain runnable chain from the funcchain syntax.
-    """
-    if not all(issubclass(t, BaseModel) for t in output_type.__args__):
-        raise RuntimeError(
-            "Funcchain union types are currently only supported for pydantic models."
-        )
-
-    output_types: list[Type[BaseModel]] = output_type.__args__  # type: ignore
-    output_type_names = [t.__name__ for t in output_types]
-
-    input_kwargs[
-        "format_instructions"
-    ] = f"Extract to one of these output types: {output_type_names}."
-
-    functions = multi_pydantic_to_functions(output_types)
-
-    if isinstance(llm, RunnableWithFallbacks):
-        llm = llm.runnable.bind(**functions).with_fallbacks(
-            [
-                fallback.bind(**functions)
-                for fallback in llm.fallbacks
-                if hasattr(llm, "fallbacks")
-            ]
-        )
-    else:
-        llm = llm.bind(**functions)  # type: ignore
-
-    prompt = create_chat_prompt(
-        system,
-        instruction_prompt,
-        context=[
-            *context,
-            HumanMessage(content="Can you use a function call for the next response?"),
-            AIMessage(content="Yeah I can do that, just tell me what you need!"),
-        ],
-        memory=memory,
-    )
-
-    return prompt | llm | MultiToolParser(output_types=output_types)
-
-
-# TODO: do patch instead of seperate creation
-def create_pydanctic_chain(
-    output_type: type[BaseModel],
-    prompt: ChatPromptTemplate,
-    llm: BaseChatModel | RunnableWithFallbacks,
-    input_kwargs: dict[str, str],
-) -> RunnableSerializable[dict[str, str], BaseModel]:
-    # TODO: check these format_instructions
-    input_kwargs["format_instructions"] = f"Extract to {output_type.__name__}."
-    functions = pydantic_to_functions(output_type)
-
-    llm = (
-        llm.runnable.bind(**functions).with_fallbacks(  # type: ignore
-            [
-                fallback.bind(**functions)
-                for fallback in llm.fallbacks
-                if hasattr(llm, "fallbacks")
-            ]
-        )
-        if isinstance(llm, RunnableWithFallbacks)
-        else llm.bind(**functions)
-    )
-    return prompt | llm | PydanticFuncParser(pydantic_schema=output_type)
-
-
-def create_chain(
-    system: str,
-    instruction: str,
-    output_type: Type[ChainOutput],
-    context: list[BaseMessage],
-    memory: BaseChatMessageHistory,
-    settings: FuncchainSettings,
-    input_kwargs: dict[str, str],
-) -> RunnableSerializable[dict[str, str], ChainOutput]:
-    """
-    Compile a langchain runnable chain from the funcchain syntax.
-    """
-    # large language model
-    llm = _gather_llm(settings)
-
-    parser = parser_for(output_type)
-
-    # add format instructions for parser
-    if parser and not is_function_model(llm):
-        instruction = _add_format_instructions(
-            parser,
-            instruction,
-            input_kwargs,
-        )
-
-    # patch inputs
-    _crop_large_inputs(
-        system,
-        instruction,
-        input_kwargs,
-        settings,
-    )
-
-    # for vision models
-    images = _handle_images(llm, input_kwargs)
-
-    # create prompts
-    instruction_prompt = create_instruction_prompt(instruction, images, input_kwargs)
-    chat_prompt = create_chat_prompt(system, instruction_prompt, context, memory)
-
-    # add formatted instruction to chat history
-    memory.add_message(instruction_prompt.format(**input_kwargs))
-
-    if isinstance(llm, ChatLlamaCpp):
-        if isinstance(output_type, UnionType):
-            # TODO: implement Union Type grammar
-            raise NotImplementedError(
-                "Union types are not yet supported for LlamaCpp models."
-            )
-        if issubclass(output_type, BaseModel) and not issubclass(
-            output_type, ParserBaseModel
-        ):
-            from llama_cpp import LlamaGrammar
-
-            grammar = pydantic_to_grammar(output_type)
-            setattr(
-                llm,
-                "grammar",
-                LlamaGrammar.from_string(grammar, verbose=False),
-            )
-
-    # function model patches
-    if is_function_model(llm):
-        if isinstance(output_type, UnionType):
-            return create_union_chain(
-                output_type,
-                instruction_prompt,
-                system,
-                memory,
-                context,
-                llm,
-                input_kwargs,
-            )
-
-        if issubclass(output_type, BaseModel) and not issubclass(
-            output_type, ParserBaseModel
-        ):
-            return create_pydanctic_chain(  # type: ignore
-                output_type,
-                chat_prompt,
-                llm,
-                input_kwargs,
-            )
-
-    return chat_prompt | llm | parser
-
-
-def _add_format_instructions(
-    parser: BaseOutputParser,
-    instruction: str,
-    input_kwargs: dict[str, str],
-) -> str:
-    """
-    Add parsing format instructions
-    to the instruction message and input_kwargs
-    if the output parser supports it.
-    """
-    try:
-        if format_instructions := parser.get_format_instructions():
-            instruction += "\n{format_instructions}"
-            input_kwargs["format_instructions"] = format_instructions
-        return instruction
-    except NotImplementedError:
-        return instruction
-
-
-def _crop_large_inputs(
-    system: str,
-    instruction: str,
-    input_kwargs: dict,
-    settings: FuncchainSettings,
-) -> None:
-    """
-    Crop large inputs to avoid exceeding the maximum number of tokens.
-    """
-    base_tokens = count_tokens(instruction + system)
-    for k, v in input_kwargs.copy().items():
-        if isinstance(v, str):
-            content_tokens = count_tokens(v)
-            if base_tokens + content_tokens > settings.context_lenght:
-                input_kwargs[k] = v[: (settings.context_lenght - base_tokens) * 2 // 3]
-                print("Truncated: ", len(input_kwargs[k]))
-
-
-def _handle_images(
-    llm: BaseChatModel | RunnableWithFallbacks,
-    input_kwargs: dict[str, str],
-) -> list[Image.Image]:
-    """
-    Handle images for vision models.
-    """
-    images = [v for v in input_kwargs.values() if isinstance(v, Image.Image)]
-    if is_vision_model(llm):
-        for k in list(input_kwargs.keys()):
-            if isinstance(input_kwargs[k], Image.Image):
-                del input_kwargs[k]
-    elif images:
-        raise RuntimeError("Images as input are only supported for vision models.")
-
-    return images
-
-
-def _gather_llm(
-    settings: FuncchainSettings,
-) -> BaseChatModel | RunnableWithFallbacks:
-    if isinstance(settings.llm, RunnableWithFallbacks) or isinstance(
-        settings.llm, BaseChatModel
-    ):
-        llm = settings.llm
-    else:
-        llm = univeral_model_selector(settings)
-
-    if not llm:
-        raise RuntimeError(
-            "No language model provided. Either set the llm environment variable or "
-            "pass a model to the `chain` function."
-        )
-    if handler := stream_handler.get():
-        settings.streaming = True
-        if isinstance(llm, RunnableWithFallbacks) and isinstance(
-            llm.runnable, BaseChatModel
-        ):
-            llm.runnable.callbacks = [handler]
-        elif isinstance(llm, BaseChatModel):
-            llm.callbacks = [handler]
-    return llm
diff --git a/src/funcchain/chain/interface.py b/src/funcchain/chain/interface.py
deleted file mode 100644
index 82d73f8..0000000
--- a/src/funcchain/chain/interface.py
+++ /dev/null
@@ -1,52 +0,0 @@
-from typing import TypeVar
-
-from langchain.memory import ChatMessageHistory
-from langchain_core.chat_history import BaseChatMessageHistory
-from langchain_core.messages import BaseMessage
-
-from ..settings import SettingsOverride, get_settings
-from .invoke import ainvoke, invoke
-
-ChainOutput = TypeVar("ChainOutput")
-
-
-def chain(
-    system: str | None = None,
-    instruction: str | None = None,
-    context: list[BaseMessage] = [],
-    memory: BaseChatMessageHistory | None = None,
-    settings_override: SettingsOverride | None = None,
-    **input_kwargs: str,
-) -> ChainOutput:  # type: ignore
-    """
-    Generate response of llm for provided instructions.
-    """
-    return invoke(
-        system,
-        instruction,
-        context,
-        memory or ChatMessageHistory(),
-        get_settings(settings_override),
-        input_kwargs,
-    )
-
-
-async def achain(
-    system: str | None = None,
-    instruction: str | None = None,
-    context: list[BaseMessage] = [],
-    memory: BaseChatMessageHistory | None = None,
-    settings_override: SettingsOverride | None = None,
-    **input_kwargs: str,
-) -> ChainOutput:
-    """
-    Asyncronously generate response of llm for provided instructions.
-    """
-    return await ainvoke(
-        system,
-        instruction,
-        context,
-        memory or ChatMessageHistory(),
-        get_settings(settings_override),
-        input_kwargs,
-    )
diff --git a/src/funcchain/chain/invoke.py b/src/funcchain/chain/invoke.py
deleted file mode 100644
index f1b1454..0000000
--- a/src/funcchain/chain/invoke.py
+++ /dev/null
@@ -1,93 +0,0 @@
-from typing import TypeVar, Any
-
-from langchain_core.callbacks.base import Callbacks
-from langchain_core.chat_history import BaseChatMessageHistory
-from langchain_core.messages import BaseMessage
-from langchain_core.runnables import RunnableSerializable
-
-from ..settings import FuncchainSettings
-from .creation import create_chain
-from ..utils import (
-    from_docstring,
-    get_output_type,
-    kwargs_from_parent,
-    get_parent_frame,
-    log_openai_callback,
-    retry_parse,
-)
-
-T = TypeVar("T")
-
-
-@retry_parse
-@log_openai_callback
-def invoke(
-    system: str | None,
-    instruction: str | None,
-    context: list[BaseMessage],
-    memory: BaseChatMessageHistory,
-    settings: FuncchainSettings,
-    input_kw: dict[str, str] = {},
-    callbacks: Callbacks = None,
-) -> Any:  # type: ignore
-    # default values
-    output_type = get_output_type()
-    input_kw.update(kwargs_from_parent())
-    system = system or settings.default_system_prompt
-    instruction = instruction or from_docstring()
-
-    chain: RunnableSerializable[dict[str, str], Any] = create_chain(
-        system,
-        instruction,
-        output_type,
-        context,
-        memory,
-        settings,
-        input_kw,
-    )
-    result = chain.invoke(
-        input_kw, {"run_name": get_parent_frame(5).function, "callbacks": callbacks}
-    )
-
-    if isinstance(result, str):
-        # TODO: function calls?
-        memory.add_ai_message(result)
-
-    return result
-
-
-@retry_parse
-@log_openai_callback
-async def ainvoke(
-    system: str | None,
-    instruction: str | None,
-    context: list[BaseMessage],
-    memory: BaseChatMessageHistory,
-    settings: FuncchainSettings,
-    input_kw: dict[str, str] = {},
-    callbacks: Callbacks = None,
-) -> Any:
-    # default values
-    output_type = get_output_type()
-    input_kw.update(kwargs_from_parent())
-    system = system or settings.default_system_prompt
-    instruction = instruction or from_docstring()
-
-    chain: RunnableSerializable[dict[str, str], Any] = create_chain(
-        system,
-        instruction,
-        output_type,
-        context,
-        memory,
-        settings,
-        input_kw,
-    )
-    result = await chain.ainvoke(
-        input_kw, {"run_name": get_parent_frame(5).function, "callbacks": callbacks}
-    )
-
-    if isinstance(result, str):
-        # TODO: function calls?
-        memory.add_ai_message(result)
-
-    return result
diff --git a/src/funcchain/chain/runnables.py b/src/funcchain/chain/runnables.py
deleted file mode 100644
index 40689b4..0000000
--- a/src/funcchain/chain/runnables.py
+++ /dev/null
@@ -1,36 +0,0 @@
-from typing import TypeVar, Type
-from langchain_core.runnables import RunnableSerializable
-from langchain.memory import ChatMessageHistory
-from .creation import create_chain
-from ..settings import SettingsOverride, get_settings
-
-T = TypeVar("T")
-
-
-def runnable(
-    instruction: str,
-    output_type: Type[T],
-    input_args: list[str] = [],
-    settings_override: SettingsOverride | None = None,
-) -> RunnableSerializable[dict[str, str], T]:
-    """
-    Experimental replacement for using the funcchain syntax.
-    """
-    instruction = "\n" + instruction
-    chain: RunnableSerializable[dict[str, str], T] = create_chain(
-        "",
-        instruction,
-        output_type,
-        [],
-        ChatMessageHistory(),
-        settings=get_settings(settings_override),
-        input_kwargs={k: "" for k in input_args},
-    )
-
-    # TODO: rewrite without original chain creation
-    # gather llm
-    # evaluate model capabilities
-    # get
-    # create prompt template
-
-    return chain
diff --git a/src/funcchain/components.py b/src/funcchain/components.py
deleted file mode 100644
index 4654dcf..0000000
--- a/src/funcchain/components.py
+++ /dev/null
@@ -1,95 +0,0 @@
-from enum import Enum
-from typing import Union, Callable, TypedDict, Any, Coroutine
-from pydantic import BaseModel, Field, field_validator
-from funcchain import runnable
-
-
-class Route(TypedDict):
-    handler: Union[Callable, Coroutine]
-    description: str
-
-
-Routes = dict[str, Union[Route, Callable, Coroutine]]
-
-
-class ChatRouter(BaseModel):
-    routes: Routes
-
-    class Config:
-        arbitrary_types_allowed = True
-
-    @field_validator("routes")
-    def validate_routes(cls, v: Routes) -> Routes:
-        if "default" not in v.keys():
-            raise ValueError("`default` route is missing")
-        return v
-
-    def create_route(self) -> Any:
-        RouteChoices = Enum(  # type: ignore
-            "RouteChoices",
-            {r: r for r in self.routes.keys()},
-            type=str,
-        )
-
-        class RouterModel(BaseModel):
-            selector: RouteChoices = Field(
-                default="default",
-                description="Enum of the available routes.",
-            )
-
-        return runnable(
-            instruction="Given the user query select the best query handler for it.",
-            input_args=["user_query", "query_handlers"],
-            output_type=RouterModel,
-        )
-
-    def show_routes(self) -> str:
-        return "\n".join(
-            [
-                f"{route_name}: {route['description']}"
-                if isinstance(route, dict)
-                else f"{route_name}: {route.__name__}"
-                for route_name, route in self.routes.items()
-            ]
-        )
-
-    def invoke_route(self, user_query: str, /, **kwargs: Any) -> Any:
-        route_query = self.create_route()
-
-        selected_route = route_query.invoke(
-            input={
-                "user_query": user_query,
-                "query_handlers": self.show_routes(),
-            }
-        ).selector
-        assert isinstance(selected_route, str)
-
-        if isinstance(self.routes[selected_route], dict):
-            return self.routes[selected_route]["handler"](user_query, **kwargs)  # type: ignore
-        return self.routes[selected_route](user_query, **kwargs)  # type: ignore
-
-    async def ainvoke_route(self, user_query: str, /, **kwargs: Any) -> Any:
-        import asyncio
-
-        if not all(
-            [
-                asyncio.iscoroutinefunction(route["handler"])
-                if isinstance(route, dict)
-                else asyncio.iscoroutinefunction(route)
-                for route in self.routes.values()
-            ]
-        ):
-            raise ValueError("All routes must be awaitable when using `ainvoke_route`")
-
-        route_query = self.create_route()
-        selected_route = route_query.invoke(
-            input={
-                "user_query": user_query,
-                "query_handlers": self.show_routes(),
-            }
-        ).selector
-        assert isinstance(selected_route, str)
-
-        if isinstance(self.routes[selected_route], dict):
-            return await self.routes[selected_route]["handler"](user_query, **kwargs)  # type: ignore
-        return await self.routes[selected_route](user_query, **kwargs)  # type: ignore
diff --git a/src/funcchain/exceptions.py b/src/funcchain/exceptions.py
deleted file mode 100644
index c463839..0000000
--- a/src/funcchain/exceptions.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from typing import Any
-
-from langchain_core.exceptions import OutputParserException
-from langchain_core.messages import BaseMessage
-
-
-class ParsingRetryException(OutputParserException):
-    """Exception raised when parsing fails."""
-
-    def __init__(self, *args: Any, message: BaseMessage, **kwargs: Any) -> None:
-        super().__init__(*args, **kwargs)
-        self.message = message
diff --git a/docs/concepts/models.md b/src/funcchain/model/__init__.py
similarity index 100%
rename from docs/concepts/models.md
rename to src/funcchain/model/__init__.py
diff --git a/src/funcchain/model/abilities.py b/src/funcchain/model/abilities.py
new file mode 100644
index 0000000..e291a7a
--- /dev/null
+++ b/src/funcchain/model/abilities.py
@@ -0,0 +1,81 @@
+from langchain_core.language_models import BaseChatModel
+from langchain_core.messages import HumanMessage, SystemMessage
+
+from .patches.ollama import ChatOllama
+
+verified_openai_function_models = [
+    "gpt-4",
+    "gpt-4-0613",
+    "gpt-4-1106-preview",
+    "gpt-4-32k",
+    "gpt-4-32k-0613",
+    "gpt-3.5-turbo",
+    "gpt-3.5-turbo-0613",
+    "gpt-3.5-turbo-1106",
+    "gpt-3.5-turbo-16k",
+    "gpt-3.5-turbo-16k-0613",
+]
+
+verified_openai_vision_models = [
+    "gpt-4-vision-preview",
+]
+
+verified_ollama_vision_models = [
+    "llava",
+    "bakllava",
+]  # TODO: llamacpp
+
+
+def gather_llm_type(llm: BaseChatModel, func_check: bool = True) -> str:
+    from langchain_openai.chat_models import ChatOpenAI
+
+    if not isinstance(llm, BaseChatModel):
+        return "base_model"
+    if isinstance(llm, ChatOpenAI):
+        if llm.model_name in verified_openai_vision_models:
+            return "vision_model"
+        if llm.model_name in verified_openai_function_models:
+            return "function_model"
+        try:
+            if func_check:
+                llm.predict_messages(
+                    [
+                        SystemMessage(content=("This is a test message to see " "if the model can run functions.")),
+                        HumanMessage(content="Hello!"),
+                    ],
+                    functions=[
+                        {
+                            "name": "print",
+                            "description": "show the input",
+                            "parameters": {
+                                "properties": {
+                                    "__arg1": {"title": "__arg1", "type": "string"},
+                                },
+                                "required": ["__arg1"],
+                                "type": "object",
+                            },
+                        }
+                    ],
+                )
+        except Exception:
+            return "chat_model"
+        else:
+            return "function_model"
+    elif isinstance(llm, ChatOllama):
+        for model in verified_ollama_vision_models:
+            if llm.model in model:
+                return "vision_model"
+
+    return "chat_model"
+
+
+def is_openai_function_model(
+    llm: BaseChatModel,
+) -> bool:
+    return gather_llm_type(llm) == "function_model"
+
+
+def is_vision_model(
+    llm: BaseChatModel,
+) -> bool:
+    return gather_llm_type(llm) == "vision_model"
diff --git a/src/funcchain/utils/model_defaults.py b/src/funcchain/model/defaults.py
similarity index 61%
rename from src/funcchain/utils/model_defaults.py
rename to src/funcchain/model/defaults.py
index 32bc80e..046d593 100644
--- a/src/funcchain/utils/model_defaults.py
+++ b/src/funcchain/model/defaults.py
@@ -1,16 +1,10 @@
 from pathlib import Path
 from typing import Any
 
-from langchain.chat_models import (
-    AzureChatOpenAI,
-    ChatAnthropic,
-    ChatGooglePalm,
-    ChatOpenAI,
-)
 from langchain_core.language_models import BaseChatModel
 
-from .._llms import ChatLlamaCpp
-from ..settings import FuncchainSettings
+from ..backend.settings import FuncchainSettings
+from ..model.patches.llamacpp import ChatLlamaCpp
 
 
 def get_gguf_model(
@@ -39,26 +33,22 @@ def get_gguf_model(
     if (p := model_path / f"{name.lower()}.{label}.gguf").exists():
         return p
 
-    # check if available on huggingface
-    try:
-        # check local cache
+    repo_id = f"TheBloke/{name}-GGUF"
+    filename = f"{name.lower()}.{label}.gguf"
 
-        input(
-            f"Do you want to download this model from huggingface.co/TheBloke/{name}-GGUF ?\n"
-            "Press enter to continue."
-        )
+    try:
+        # todo make setting to turn prints off
         print("\033c")
-        print("Downloading model from huggingface...")
+        print("Downloading model from huggingface... (Ctrl+C to cancel)")
         p = hf_hub_download(
-            repo_id=f"TheBloke/{name}-GGUF",
-            filename=f"{name.lower()}.{label}.gguf",
+            repo_id,
+            filename,
             local_dir=model_path,
             local_dir_use_symlinks=True,
         )
         print("\033c")
         return Path(p)
-    except Exception as e:
-        print(e)
+    except Exception:
         raise ValueError(f"ModelNotFound: {name}.{label}")
 
 
@@ -69,16 +59,10 @@ def default_model_fallback(
     """
     Give user multiple options for local models to download.
     """
-    if (
-        input("ModelNotFound: Do you want to download a local model instead?")
-        .lower()
-        .startswith("y")
-    ):
-        model_kwargs.update(settings.llama_kwargs())
+    if input("ModelNotFound: Do you want to download a local model instead?").lower().startswith("y"):
+        model_kwargs.update(settings.llamacpp_kwargs())
         return ChatLlamaCpp(
-            model_path=get_gguf_model(
-                "neural-chat-7b-v3-1", "Q4_K_M", settings
-            ).as_posix(),
+            model_path=get_gguf_model("neural-chat-7b-v3-1", "Q4_K_M", settings).as_posix(),
             **model_kwargs,
         )
     print("Please select a model to use funcchain!")
@@ -101,12 +85,11 @@ def univeral_model_selector(
     Examples:
     - "openai/gpt-3.5-turbo"
     - "anthropic/claude-2"
-    - "thebloke/deepseek-llm-7b-chat"
-
-    (gguf models from huggingface.co/TheBloke)
+    - "llamacpp/openchat-3.5-0106"  (theblock gguf models)
+    - "ollama/deepseek-llm-7b-chat"
 
     Supported:
-        [ openai, anthropic, google, llamacpp ]
+        [ openai, anthropic, google, ollama ]
 
     Raises:
     - ModelNotFoundError, when the model is not found.
@@ -115,12 +98,7 @@ def univeral_model_selector(
     model_kwargs.update(settings.model_kwargs())
 
     if model_name:
-        mtype, name_lable = (
-            model_name.split("/") if "/" in model_name else ("", model_name)
-        )
-        name, label = (
-            name_lable.split(":") if ":" in name_lable else (name_lable, "latest")
-        )
+        mtype, name = model_name.split("/") if "/" in model_name else ("", model_name)
         mtype = mtype.lower()
 
         model_kwargs["model_name"] = name
@@ -128,42 +106,80 @@ def univeral_model_selector(
         try:
             match mtype:
                 case "openai":
+                    from langchain_openai.chat_models import ChatOpenAI
+
                     model_kwargs.update(settings.openai_kwargs())
                     return ChatOpenAI(**model_kwargs)
+
                 case "anthropic":
+                    from langchain_community.chat_models import ChatAnthropic
+
                     return ChatAnthropic(**model_kwargs)
+
                 case "google":
+                    from langchain_community.chat_models import ChatGooglePalm
+
                     return ChatGooglePalm(**model_kwargs)
-                case "llamacpp" | "thebloke" | "huggingface" | "local" | "gguf":
+
+                case "ollama":
+                    from .patches.ollama import ChatOllama
+
+                    model = model_kwargs.pop("model_name")
+                    model_kwargs.update(settings.ollama_kwargs())
+                    return ChatOllama(model=model, **model_kwargs)
+
+                case "llamacpp" | "thebloke" | "gguf":
+                    from .patches.llamacpp import ChatLlamaCpp
+
                     model_kwargs.pop("model_name")
+                    name, label = name.split(":") if ":" in name else (name, "latest")
                     model_path = get_gguf_model(name, label, settings).as_posix()
                     print("Using model:", model_path)
-                    model_kwargs.update(settings.llama_kwargs())
+                    model_kwargs.update(settings.llamacpp_kwargs())
                     return ChatLlamaCpp(
                         model_path=model_path,
                         **model_kwargs,
                     )
+
         except Exception as e:
             print("ERROR:", e)
             raise e
 
         try:
             if "gpt-4" in name or "gpt-3.5" in name:
+                from langchain_openai.chat_models import ChatOpenAI
+
                 model_kwargs.update(settings.openai_kwargs())
                 return ChatOpenAI(**model_kwargs)
+
         except Exception as e:
             print(e)
 
     model_kwargs.pop("model_name")
 
     if settings.openai_api_key:
+        from langchain_openai.chat_models import ChatOpenAI
+
         model_kwargs.update(settings.openai_kwargs())
         return ChatOpenAI(**model_kwargs)
+
     if settings.azure_api_key:
+        from langchain_openai.chat_models import AzureChatOpenAI
+
         return AzureChatOpenAI(**model_kwargs)
+
     if settings.anthropic_api_key:
+        from langchain_community.chat_models import ChatAnthropic
+
         return ChatAnthropic(**model_kwargs)
+
     if settings.google_api_key:
+        from langchain_community.chat_models import ChatGooglePalm
+
         return ChatGooglePalm(**model_kwargs)
 
-    return default_model_fallback(**model_kwargs)
+    raise ValueError(
+        "Could not read llm selector string. Please check "
+        "[here](https://github.com/shroominic/funcchain/blob/main/MODELS.md) "
+        "for more info."
+    )
diff --git a/docs/concepts/prompt.md b/src/funcchain/model/patches/__init__.py
similarity index 100%
rename from docs/concepts/prompt.md
rename to src/funcchain/model/patches/__init__.py
diff --git a/src/funcchain/model/patches/llamacpp.py b/src/funcchain/model/patches/llamacpp.py
new file mode 100644
index 0000000..6e8fd94
--- /dev/null
+++ b/src/funcchain/model/patches/llamacpp.py
@@ -0,0 +1,364 @@
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+from typing import Any, Dict, Iterator, List, Optional, Union
+
+from langchain_core.callbacks.manager import CallbackManagerForLLMRun
+from langchain_core.language_models import BaseChatModel, BaseLanguageModel
+from langchain_core.messages import (
+    AIMessage,
+    AIMessageChunk,
+    BaseMessage,
+    ChatMessage,
+    HumanMessage,
+    SystemMessage,
+)
+from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
+from langchain_core.pydantic_v1 import Field, root_validator
+from langchain_core.utils import get_pydantic_field_names
+from langchain_core.utils.utils import build_extra_kwargs
+
+logger = logging.getLogger(__name__)
+
+
+try:
+
+    class _LlamaCppCommon(BaseLanguageModel):
+        client: Any = Field(default=None, exclude=True)  #: :meta private:
+        model_path: str
+        """The path to the Llama model file."""
+
+        lora_base: Optional[str] = None
+        """The path to the Llama LoRA base model."""
+
+        lora_path: Optional[str] = None
+        """The path to the Llama LoRA. If None, no LoRa is loaded."""
+
+        n_ctx: int = Field(4096, alias="n_ctx")
+        """Token context window."""
+
+        n_parts: int = Field(-1, alias="n_parts")
+        """Number of parts to split the model into.
+        If -1, the number of parts is automatically determined."""
+
+        seed: int = Field(-1, alias="seed")
+        """Seed. If -1, a random seed is used."""
+
+        f16_kv: bool = Field(True, alias="f16_kv")
+        """Use half-precision for key/value cache."""
+
+        logits_all: bool = Field(False, alias="logits_all")
+        """Return logits for all tokens, not just the last token."""
+
+        vocab_only: bool = Field(False, alias="vocab_only")
+        """Only load the vocabulary, no weights."""
+
+        use_mlock: bool = Field(False, alias="use_mlock")
+        """Force system to keep model in RAM."""
+
+        n_threads: Optional[int] = Field(None, alias="n_threads")
+        """Number of threads to use.
+        If None, the number of threads is automatically determined."""
+
+        n_batch: Optional[int] = Field(8, alias="n_batch")
+        """Number of tokens to process in parallel.
+        Should be a number between 1 and n_ctx."""
+
+        n_gpu_layers: Optional[int] = Field(42, alias="n_gpu_layers")
+        """Number of layers to be loaded into gpu memory. Default 42."""
+
+        suffix: Optional[str] = Field(None)
+        """A suffix to append to the generated text. If None, no suffix is appended."""
+
+        max_tokens: Optional[int] = 1024
+        """The maximum number of tokens to generate."""
+
+        temperature: Optional[float] = 0.3
+        """The temperature to use for sampling."""
+
+        top_p: Optional[float] = 0.95
+        """The top-p value to use for sampling."""
+
+        logprobs: Optional[int] = Field(None)
+        """The number of logprobs to return. If None, no logprobs are returned."""
+
+        echo: Optional[bool] = False
+        """Whether to echo the prompt."""
+
+        stop: Optional[List[str]] = []
+        """A list of strings to stop generation when encountered."""
+
+        repeat_penalty: Optional[float] = 1.1
+        """The penalty to apply to repeated tokens."""
+
+        top_k: Optional[int] = 40
+        """The top-k value to use for sampling."""
+
+        last_n_tokens_size: Optional[int] = 64
+        """The number of tokens to look back when applying the repeat_penalty."""
+
+        use_mmap: Optional[bool] = True
+        """Whether to keep the model loaded in RAM"""
+
+        rope_freq_scale: float = 1.0
+        """Scale factor for rope sampling."""
+
+        rope_freq_base: float = 10000.0
+        """Base frequency for rope sampling."""
+
+        model_kwargs: Dict[str, Any] = Field(default_factory=dict)
+        """Any additional parameters to pass to llama_cpp.Llama."""
+
+        streaming: bool = True
+        """Whether to stream the results, token by token."""
+
+        grammar_path: Optional[Union[str, Path]] = None
+        """
+        grammar_path: Path to the .gbnf file that defines formal grammars
+        for constraining model outputs. For instance, the grammar can be used
+        to force the model to generate valid JSON or to speak exclusively in emojis. At most
+        one of grammar_path and grammar should be passed in.
+        """
+        grammar: Optional[str] = None
+        """
+        grammar: formal grammar for constraining model outputs. For instance, the grammar
+        can be used to force the model to generate valid JSON or to speak exclusively in
+        emojis. At most one of grammar_path and grammar should be passed in.
+        """
+
+        verbose: bool = False
+        """Print verbose output to stderr."""
+
+        @root_validator()
+        def validate_environment(cls, values: Dict) -> Dict:
+            """Validate that llama-cpp-python library is installed."""
+            try:
+                from llama_cpp import Llama, LlamaGrammar
+            except ImportError:
+                raise ImportError(
+                    "Could not import llama-cpp-python library. "
+                    "Please install the llama-cpp-python library to "
+                    "use this embedding model: pip install llama-cpp-python"
+                )
+
+            model_path = values["model_path"]
+            model_param_names = [
+                "rope_freq_scale",
+                "rope_freq_base",
+                "lora_path",
+                "lora_base",
+                "n_ctx",
+                "n_parts",
+                "seed",
+                "f16_kv",
+                "logits_all",
+                "vocab_only",
+                "use_mlock",
+                "n_threads",
+                "n_batch",
+                "use_mmap",
+                "last_n_tokens_size",
+                "verbose",
+            ]
+            model_params = {k: values[k] for k in model_param_names}
+            # For backwards compatibility, only include if non-null.
+            if values["n_gpu_layers"] is not None:
+                model_params["n_gpu_layers"] = values["n_gpu_layers"]
+
+            model_params.update(values["model_kwargs"])
+
+            try:
+                values["client"] = Llama(model_path, **model_params)
+            except Exception as e:
+                raise ValueError(f"Could not load Llama model from path: {model_path}. " f"Received error {e}")
+
+            if values["grammar"] and values["grammar_path"]:
+                grammar = values["grammar"]
+                grammar_path = values["grammar_path"]
+                raise ValueError(
+                    "Can only pass in one of grammar and grammar_path. Received " f"{grammar=} and {grammar_path=}."
+                )
+            elif isinstance(values["grammar"], str):
+                values["grammar"] = LlamaGrammar.from_string(values["grammar"])
+            elif values["grammar_path"]:
+                values["grammar"] = LlamaGrammar.from_file(values["grammar_path"])
+            else:
+                pass
+            return values
+
+        @root_validator(pre=True)
+        def build_model_kwargs(cls, values: Dict[str, Any]) -> Dict[str, Any]:
+            """Build extra kwargs from additional params that were passed in."""
+            all_required_field_names = get_pydantic_field_names(cls)
+            extra = values.get("model_kwargs", {})
+            values["model_kwargs"] = build_extra_kwargs(extra, values, all_required_field_names)
+            return values
+
+        @property
+        def _default_params(self) -> Dict[str, Any]:
+            """Get the default parameters for calling llama_cpp."""
+            params = {
+                "suffix": self.suffix,
+                "max_tokens": self.max_tokens,
+                "temperature": self.temperature,
+                "top_p": self.top_p,
+                "logprobs": self.logprobs,
+                "echo": self.echo,
+                "stop_sequences": self.stop,  # key here is convention among LLM classes
+                "repeat_penalty": self.repeat_penalty,
+                "top_k": self.top_k,
+            }
+            if self.grammar:
+                params["grammar"] = self.grammar
+            return params
+
+        @property
+        def _identifying_params(self) -> Dict[str, Any]:
+            """Get the identifying parameters."""
+            return {**{"model_path": self.model_path}, **self._default_params}
+
+        def _get_parameters(self, stop: Optional[List[str]] = None) -> Dict[str, Any]:
+            """
+            Performs sanity check, preparing parameters in format needed by llama_cpp.
+
+            Args:
+                stop (Optional[List[str]]): List of stop sequences for llama_cpp.
+
+            Returns:
+                Dictionary containing the combined parameters.
+            """
+
+            # Raise error if stop sequences are in both input and default params
+            if self.stop and stop is not None:
+                raise ValueError("`stop` found in both the input and default params.")
+
+            params = self._default_params
+
+            # llama_cpp expects the "stop" key not this, so we remove it:
+            params.pop("stop_sequences")
+
+            # then sets it as configured, or default to an empty list:
+            params["stop"] = self.stop or stop or []
+
+            return params
+
+        def get_num_tokens(self, text: str) -> int:
+            tokenized_text = self.client.tokenize(text.encode("utf-8"))
+            return len(tokenized_text)
+
+    class ChatLlamaCpp(BaseChatModel, _LlamaCppCommon):
+        """llama.cpp chat model.
+
+        To use, you should have the llama-cpp-python library installed, and provide the
+        path to the Llama model as a named parameter to the constructor.
+        Check out: https://github.com/abetlen/llama-cpp-python
+
+        Example:
+            .. code-block:: python
+
+                from funcchain._llms import ChatLlamaCpp
+                llm = ChatLlamaCpp(model_path="./path/to/model.gguf")
+        """
+
+        @property
+        def _llm_type(self) -> str:
+            """Return type of chat model."""
+            return "llamacpp-chat"
+
+        def _format_message_as_text(self, message: BaseMessage) -> str:
+            if isinstance(message, ChatMessage):
+                message_text = f"\n\n{message.role.capitalize()}: {message.content}"
+            elif isinstance(message, HumanMessage):
+                message_text = f"[INST] {message.content} [/INST]"
+            elif isinstance(message, AIMessage):
+                message_text = f"{message.content}"
+            elif isinstance(message, SystemMessage):
+                message_text = f"<<SYS>> {message.content} <</SYS>>"
+            else:
+                raise ValueError(f"Got unknown type {message}")
+            return message_text
+
+        def _format_messages_as_text(self, messages: List[BaseMessage]) -> str:
+            return "\n".join([self._format_message_as_text(message) for message in messages])
+
+        def _stream_with_aggregation(
+            self,
+            messages: List[BaseMessage],
+            stop: Optional[List[str]] = None,
+            run_manager: Optional[CallbackManagerForLLMRun] = None,
+            verbose: bool = False,
+            **kwargs: Any,
+        ) -> ChatGenerationChunk:
+            final_chunk: Optional[ChatGenerationChunk] = None
+            for chunk in self._stream(messages, stop, **kwargs):
+                if final_chunk is None:
+                    final_chunk = chunk
+                else:
+                    final_chunk += chunk
+                if run_manager:
+                    run_manager.on_llm_new_token(
+                        chunk.text,
+                        verbose=verbose,
+                    )
+            if final_chunk is None:
+                raise ValueError("No data received from llamacpp stream.")
+
+            return final_chunk
+
+        def _generate(
+            self,
+            messages: List[BaseMessage],
+            stop: Optional[List[str]] = None,
+            run_manager: Optional[CallbackManagerForLLMRun] = None,
+            **kwargs: Any,
+        ) -> ChatResult:
+            """Call out to LlamaCpp's generation endpoint.
+
+            Args:
+                messages: The list of base messages to pass into the model.
+                stop: Optional list of stop words to use when generating.
+
+            Returns:
+                Chat generations from the model
+
+            Example:
+                .. code-block:: python
+
+                    response = llamacpp([
+                        HumanMessage(content="Tell me about the history of AI")
+                    ])
+            """
+            final_chunk = self._stream_with_aggregation(
+                messages, stop=stop, run_manager=run_manager, verbose=self.verbose, **kwargs
+            )
+            chat_generation = ChatGeneration(
+                message=AIMessage(content=final_chunk.text),
+                generation_info=final_chunk.generation_info,
+            )
+            return ChatResult(generations=[chat_generation])
+
+        def _stream(
+            self,
+            messages: List[BaseMessage],
+            stop: Optional[List[str]] = None,
+            run_manager: Optional[CallbackManagerForLLMRun] = None,
+            **kwargs: Any,
+        ) -> Iterator[ChatGenerationChunk]:
+            params = {**self._get_parameters(stop), **kwargs}
+            prompt = self._format_messages_as_text(messages)
+            result = self.client(prompt=prompt, stream=True, **params)
+            for part in result:
+                logprobs = part["choices"][0].get("logprobs", None)
+                chunk = ChatGenerationChunk(
+                    message=AIMessageChunk(content=part["choices"][0]["text"]),
+                    generation_info={"logprobs": logprobs},
+                )
+                yield chunk
+                if run_manager:
+                    run_manager.on_llm_new_token(token=chunk.text, verbose=self.verbose, log_probs=logprobs)
+except ImportError:
+
+    class ChatLlamaCpp:  # type: ignore
+        def __init__(self, *args: Any, **kwargs: Any) -> None:
+            raise ImportError("Please install langchain_community to use ChatLlamaCpp.")
diff --git a/src/funcchain/model/patches/ollama.py b/src/funcchain/model/patches/ollama.py
new file mode 100644
index 0000000..31792e7
--- /dev/null
+++ b/src/funcchain/model/patches/ollama.py
@@ -0,0 +1,132 @@
+import base64
+from typing import Any, Dict, Optional, Union
+
+import requests  # type: ignore
+from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage
+from langchain_core.pydantic_v1 import validator
+
+try:
+    from langchain_community.chat_models import ChatOllama as _ChatOllama
+
+    class ChatOllama(_ChatOllama):
+        grammar: Optional[str] = None
+        """
+        The [GBNF](https://github.com/ggerganov/llama.cpp/tree/master/grammars) grammar used to constrain the output.
+        """
+
+        @validator("grammar")
+        def _validate_grammar(cls, v: Optional[str]) -> Optional[str]:
+            if v is not None and "root ::=" not in v:
+                raise ValueError("Grammar must contain a root rule.")
+            return v
+
+        @property
+        def _default_params(self) -> Dict[str, Any]:
+            """Get the default parameters for calling Ollama."""
+            return {
+                "model": self.model,
+                "format": self.format,
+                "options": {
+                    "mirostat": self.mirostat,
+                    "mirostat_eta": self.mirostat_eta,
+                    "mirostat_tau": self.mirostat_tau,
+                    "num_ctx": self.num_ctx,
+                    "num_gpu": self.num_gpu,
+                    "num_thread": self.num_thread,
+                    "repeat_last_n": self.repeat_last_n,
+                    "repeat_penalty": self.repeat_penalty,
+                    "temperature": self.temperature,
+                    "stop": self.stop,
+                    "tfs_z": self.tfs_z,
+                    "top_k": self.top_k,
+                    "top_p": self.top_p,
+                    "grammar": self.grammar,  # added
+                },
+                "system": self.system,
+                "template": self.template,
+            }
+
+        def _convert_messages_to_ollama_messages(
+            self, messages: list[BaseMessage]
+        ) -> list[dict[str, Union[str, list[str]]]]:
+            ollama_messages = []
+            for message in messages:
+                role = ""
+                if isinstance(message, HumanMessage):
+                    role = "user"
+                elif isinstance(message, AIMessage):
+                    role = "assistant"
+                elif isinstance(message, SystemMessage):
+                    role = "system"
+                else:
+                    raise ValueError("Received unsupported message type for Ollama.")
+
+                content = ""
+                images = []
+                if isinstance(message.content, str):
+                    content = message.content
+                else:
+                    image_urls = []
+                    for content_part in message.content:
+                        if isinstance(content_part, str):
+                            content += f"\n{content_part}"
+                        elif content_part.get("type") == "text":
+                            content += f"\n{content_part['text']}"
+                        elif content_part.get("type") == "image_url":
+                            if isinstance(content_part.get("image_url"), str):
+                                if content_part["image_url"].startswith("data:"):
+                                    image_url_components = content_part["image_url"].split(",")
+                                    # Support data:image/jpeg;base64,<image> format
+                                    # and base64 strings
+                                    if len(image_url_components) > 1:
+                                        images.append(image_url_components[1])
+                                    else:
+                                        images.append(image_url_components[0])
+                                else:
+                                    image_urls.append(content_part["image_url"])
+                            else:
+                                if isinstance(content_part.get("image_url"), dict):
+                                    if content_part["image_url"]["url"].startswith("data:"):
+                                        image_url_components = content_part["image_url"]["url"].split(",")
+                                        # Support data:image/jpeg;base64,<image> format
+                                        # and base64 strings
+                                        if len(image_url_components) > 1:
+                                            images.append(image_url_components[1])
+                                        else:
+                                            images.append(image_url_components[0])
+                                    else:
+                                        image_urls.append(content_part["image_url"]["url"])
+                                else:
+                                    raise ValueError("Unsupported message content type.")
+                        else:
+                            raise ValueError(
+                                "Unsupported message content type. "
+                                "Must either have type 'text' or type 'image_url' "
+                                "with a string 'image_url' field."
+                            )
+                    # download images and append base64 strings
+                    if image_urls:
+                        for image_url in image_urls:
+                            response = requests.get(image_url)
+                            if response.status_code == 200:
+                                image = response.content
+                                images.append(base64.b64encode(image).decode("utf-8"))
+                            else:
+                                raise ValueError(f"Failed to download image from {image_url}.")
+
+                ollama_messages.append(
+                    {
+                        "role": role,
+                        "content": content,
+                        "images": images,
+                    }
+                )
+
+            return ollama_messages  # type: ignore
+
+
+except ImportError:
+
+    class ChatOllama:  # type: ignore
+        def __init__(self, *args: Any, **kwargs: Any) -> None:
+            raise ImportError("Please install langchain_community to use ChatOllama.")
diff --git a/src/funcchain/parser.py b/src/funcchain/parser.py
deleted file mode 100644
index 89bb445..0000000
--- a/src/funcchain/parser.py
+++ /dev/null
@@ -1,242 +0,0 @@
-import copy
-import json
-import re
-from typing import Callable, Optional, Type, TypeVar
-
-from langchain_core.exceptions import OutputParserException
-from langchain_core.messages import AIMessage
-from langchain_core.output_parsers import BaseGenerationOutputParser, BaseOutputParser
-from langchain_core.outputs import ChatGeneration, Generation
-from pydantic import BaseModel, ValidationError
-
-from .exceptions import ParsingRetryException
-from .types import CodeBlock as CodeBlock
-from .types import ParserBaseModel
-
-T = TypeVar("T")
-
-
-class LambdaOutputParser(BaseOutputParser[T]):
-    _parse: Optional[Callable[[str], T]] = None
-
-    def parse(self, text: str) -> T:
-        if self._parse is None:
-            raise NotImplementedError(
-                "LambdaOutputParser.lambda_parse() is not implemented"
-            )
-        return self._parse(text)
-
-    @property
-    def _type(self) -> str:
-        return "lambda"
-
-
-class BoolOutputParser(BaseOutputParser[bool]):
-    def parse(self, text: str) -> bool:
-        return text.strip()[:1].lower() == "y"
-
-    def get_format_instructions(self) -> str:
-        return "\nAnswer only with 'Yes' or 'No'."
-
-    @property
-    def _type(self) -> str:
-        return "bool"
-
-
-M = TypeVar("M", bound=BaseModel)
-
-
-class PydanticFuncParser(BaseGenerationOutputParser[M]):
-    pydantic_schema: Type[M]
-    args_only: bool = False
-
-    def parse_result(self, result: list[Generation], *, partial: bool = False) -> M:
-        generation = result[0]
-        if not isinstance(generation, ChatGeneration):
-            raise OutputParserException(
-                "This output parser can only be used with a chat generation.",
-            )
-        message = generation.message
-        try:
-            func_call = copy.deepcopy(message.additional_kwargs["function_call"])
-        except KeyError as exc:
-            raise ParsingRetryException(
-                f"Could not parse function call: {exc}",
-                message=message,
-            )
-
-        if self.args_only:
-            _result = func_call["arguments"]
-        else:
-            _result = func_call
-        try:
-            if self.args_only:
-                pydantic_args = self.pydantic_schema.model_validate_json(_result)
-            else:
-                pydantic_args = self.pydantic_schema.model_validate_json(
-                    _result["arguments"]
-                )
-        except ValidationError as exc:
-            raise ParsingRetryException(
-                f"Could not parse function call: {exc}", message=message
-            )
-        return pydantic_args
-
-
-class MultiToolParser(BaseGenerationOutputParser[M]):
-    output_types: list[Type[M]]
-    args_only: bool = False
-
-    def parse_result(self, result: list[Generation], *, partial: bool = False) -> M:
-        function_call = self._pre_parse_function_call(result)
-
-        output_type_names = [t.__name__.lower() for t in self.output_types]
-
-        if function_call["name"] not in output_type_names:
-            raise OutputParserException("Invalid function call")
-
-        print(function_call["name"])
-
-        output_type = self._get_output_type(function_call["name"])
-
-        generation = result[0]
-        if not isinstance(generation, ChatGeneration):
-            raise OutputParserException(
-                "This output parser can only be used with a chat generation."
-            )
-        message = generation.message
-        try:
-            func_call = copy.deepcopy(message.additional_kwargs["function_call"])
-        except KeyError as exc:
-            raise ParsingRetryException(
-                f"Could not parse function call: {exc}", message=message
-            )
-
-        if self.args_only:
-            _result = func_call["arguments"]
-        else:
-            _result = func_call
-
-        try:
-            if self.args_only:
-                pydantic_args = output_type.model_validate_json(_result)
-            else:
-                pydantic_args = output_type.model_validate_json(_result["arguments"])
-        except ValidationError as exc:
-            raise ParsingRetryException(
-                f"Could not parse function call: {exc}",
-                message=message,
-            )
-        return pydantic_args
-
-    def _pre_parse_function_call(self, result: list[Generation]) -> dict:
-        generation = result[0]
-        if not isinstance(generation, ChatGeneration):
-            raise OutputParserException(
-                "This output parser can only be used with a chat generation."
-            )
-        message = generation.message
-        try:
-            func_call = copy.deepcopy(message.additional_kwargs["function_call"])
-        except KeyError:
-            raise ParsingRetryException(
-                f"The model refused to respond with a function call:\n{message.content}\n\n",
-                message=message,
-            )
-
-        return func_call
-
-    def _get_output_type(self, function_name: str) -> Type[M]:
-        output_type_iter = filter(
-            lambda t: t.__name__.lower() == function_name, self.output_types
-        )
-        if output_type_iter is None:
-            raise OutputParserException(
-                f"No parser found for function: {function_name}"
-            )
-        return next(output_type_iter)
-
-
-P = TypeVar("P", bound=ParserBaseModel)
-
-
-class CustomPydanticOutputParser(BaseOutputParser[P]):
-    pydantic_object: Type[P]
-
-    def parse(self, text: str) -> P:
-        try:
-            return self.pydantic_object.parse(text)
-        except (json.JSONDecodeError, ValidationError) as e:
-            raise ParsingRetryException(
-                f"Failed to parse {self.pydantic_object.__name__} from completion {text}. Got: {e}",
-                message=AIMessage(content=text),
-            )
-
-    def get_format_instructions(self) -> str:
-        reduced_schema = self.pydantic_object.model_json_schema()
-        if "title" in reduced_schema:
-            del reduced_schema["title"]
-        if "type" in reduced_schema:
-            del reduced_schema["type"]
-
-        return self.pydantic_object.format_instructions().format(
-            schema=json.dumps(reduced_schema),
-        )
-
-    @property
-    def _type(self) -> str:
-        return "pydantic"
-
-
-class PydanticOutputParser(BaseOutputParser[M]):
-    """Parse an output using a pydantic model."""
-
-    pydantic_object: Type[M]
-    """The pydantic model to parse."""
-
-    def parse(self, text: str) -> M:
-        try:
-            matches = re.findall(
-                r"\{.*\}", text.strip(), re.MULTILINE | re.IGNORECASE | re.DOTALL
-            )
-            if len(matches) > 1:
-                for match in matches:
-                    try:
-                        json_object = json.loads(match, strict=False)
-                        return self.pydantic_object.model_validate(json_object)
-                    except (json.JSONDecodeError, ValidationError):
-                        continue
-            elif len(matches) == 1:
-                json_object = json.loads(matches[0], strict=False)
-                return self.pydantic_object.model_validate(json_object)
-            raise ParsingRetryException(
-                f"Failed to parse {self.pydantic_object.__name__} from completion {text}.",
-                message=AIMessage(content=text),
-            )
-        except (json.JSONDecodeError, ValidationError) as e:
-            raise ParsingRetryException(
-                str(e),
-                message=AIMessage(content=text),
-            )
-
-    def get_format_instructions(self) -> str:
-        schema = self.pydantic_object.model_json_schema()
-
-        # Remove extraneous fields.
-        reduced_schema = schema
-        if "title" in reduced_schema:
-            del reduced_schema["title"]
-        if "type" in reduced_schema:
-            del reduced_schema["type"]
-        # Ensure json in context is well-formed with double quotes.
-        schema_str = json.dumps(reduced_schema)
-
-        return (
-            "Please respond with a JSON object matching the following schema:"
-            f"\n\n```json_schema\n{schema_str}\n```"
-            "Only respond with the object, not the schema."
-        )
-
-    @property
-    def _type(self) -> str:
-        return "pydantic"
diff --git a/docs/concepts/types.md b/src/funcchain/parser/__init__.py
similarity index 100%
rename from docs/concepts/types.md
rename to src/funcchain/parser/__init__.py
diff --git a/src/funcchain/parser/custom.py b/src/funcchain/parser/custom.py
new file mode 100644
index 0000000..dd79a44
--- /dev/null
+++ b/src/funcchain/parser/custom.py
@@ -0,0 +1,39 @@
+import json
+from typing import Type, TypeVar
+
+from langchain_core.exceptions import OutputParserException
+from langchain_core.output_parsers import BaseOutputParser
+from pydantic import ValidationError
+
+from ..syntax.output_types import CodeBlock as CodeBlock
+from ..syntax.output_types import ParserBaseModel
+
+P = TypeVar("P", bound=ParserBaseModel)
+
+
+class CustomPydanticOutputParser(BaseOutputParser[P]):
+    pydantic_object: Type[P]
+
+    def parse(self, text: str) -> P:
+        try:
+            return self.pydantic_object.parse(text)
+        except (json.JSONDecodeError, ValidationError) as e:
+            raise OutputParserException(
+                f"Failed to parse {self.pydantic_object.__name__} " f"from completion {text}. Got: {e}",
+                llm_output=text,
+            )
+
+    def get_format_instructions(self) -> str:
+        reduced_schema = self.pydantic_object.model_json_schema()
+        if "title" in reduced_schema:
+            del reduced_schema["title"]
+        if "type" in reduced_schema:
+            del reduced_schema["type"]
+
+        return self.pydantic_object.format_instructions().format(
+            schema=json.dumps(reduced_schema),
+        )
+
+    @property
+    def _type(self) -> str:
+        return "pydantic"
diff --git a/src/funcchain/parser/json_schema.py b/src/funcchain/parser/json_schema.py
new file mode 100644
index 0000000..2e452d4
--- /dev/null
+++ b/src/funcchain/parser/json_schema.py
@@ -0,0 +1,93 @@
+import json
+import re
+from typing import Type, TypeVar
+
+import yaml  # type: ignore
+from langchain_core.exceptions import OutputParserException
+from langchain_core.language_models import BaseChatModel
+from langchain_core.output_parsers import BaseOutputParser
+from langchain_core.runnables import Runnable
+from pydantic import BaseModel, ValidationError
+
+M = TypeVar("M", bound=BaseModel)
+
+
+class RetryJsonPydanticParser(BaseOutputParser[M]):
+    """Parse an output using a pydantic model."""
+
+    pydantic_object: Type[M]
+    """The pydantic model to parse."""
+
+    retry: int
+    retry_llm: BaseChatModel | str | None = None
+
+    def parse(self, text: str) -> M:
+        try:
+            matches = re.findall(r"\{.*\}", text.strip(), re.MULTILINE | re.IGNORECASE | re.DOTALL)
+            if len(matches) > 1:
+                for match in matches:
+                    try:
+                        json_object = json.loads(match, strict=False)
+                        return self.pydantic_object.model_validate(json_object)
+                    except (json.JSONDecodeError, ValidationError):
+                        continue
+            elif len(matches) == 1:
+                json_object = json.loads(matches[0], strict=False)
+                return self.pydantic_object.model_validate(json_object)
+            # no matches
+            raise OutputParserException(
+                f"No JSON {self.pydantic_object.__name__} found in completion {text}.",
+                llm_output=text,
+            )
+        except (json.JSONDecodeError, ValidationError) as e:
+            if self.retry > 0:
+                print(f"Retrying parsing {self.pydantic_object.__name__}...")
+                return self.retry_chain.invoke(
+                    input={"output": text, "error": str(e)},
+                    config={"run_name": "RetryPydanticOutputParser"},
+                )
+            # no retries left
+            raise OutputParserException(str(e), llm_output=text)
+
+    def get_format_instructions(self) -> str:
+        schema = self.pydantic_object.model_json_schema()
+
+        # Remove extraneous fields.
+        reduced_schema = schema
+        if "title" in reduced_schema:
+            del reduced_schema["title"]
+        if "type" in reduced_schema:
+            del reduced_schema["type"]
+        # Ensure json in context is well-formed with double quotes.
+        schema_str = yaml.dump(reduced_schema)
+
+        return (
+            "Please respond with a json result matching the following schema:"
+            f"\n\n```schema\n{schema_str}\n```\n"
+            "Do not repeat the schema. Only respond with the result."
+        )
+
+    @property
+    def _type(self) -> str:
+        return "pydantic"
+
+    @property
+    def retry_chain(self) -> Runnable:
+        from ..syntax.executable import compile_runnable
+
+        return compile_runnable(
+            instruction="Retry parsing the output by fixing the error.",
+            input_args=["output", "error"],
+            output_types=[self.pydantic_object],
+            llm=self.retry_llm,
+            settings_override={"retry_parse": self.retry - 1},
+        )
+
+
+class RetryJsonPydanticUnionParser(BaseOutputParser[M]):
+    """Parse an output using a pydantic model."""
+
+    output_types: list[Type[M]]
+
+    def parse(self, text: str) -> M:
+        raise NotImplementedError
diff --git a/src/funcchain/parser/openai_functions.py b/src/funcchain/parser/openai_functions.py
new file mode 100644
index 0000000..2a259e1
--- /dev/null
+++ b/src/funcchain/parser/openai_functions.py
@@ -0,0 +1,144 @@
+import copy
+from typing import Type, TypeVar
+
+from langchain_core.exceptions import OutputParserException
+from langchain_core.language_models import BaseChatModel
+from langchain_core.output_parsers import BaseGenerationOutputParser
+from langchain_core.outputs import ChatGeneration, Generation
+from langchain_core.runnables import Runnable
+from pydantic import BaseModel, ValidationError
+
+from ..syntax.output_types import CodeBlock as CodeBlock
+from ..utils.msg_tools import msg_to_str
+
+M = TypeVar("M", bound=BaseModel)
+
+
+class RetryOpenAIFunctionPydanticParser(BaseGenerationOutputParser[M]):
+    pydantic_schema: Type[M]
+    args_only: bool = False
+    retry: int
+    retry_llm: BaseChatModel | str | None = None
+
+    def parse_result(self, result: list[Generation], *, partial: bool = False) -> M:
+        try:
+            generation = result[0]
+            if not isinstance(generation, ChatGeneration):
+                raise OutputParserException(
+                    "This output parser can only be used with a chat generation.",
+                )
+            message = generation.message
+            try:
+                func_call = copy.deepcopy(message.additional_kwargs["function_call"])
+            except KeyError as exc:
+                raise OutputParserException(
+                    f"Could not parse function call: {exc}",
+                    llm_output=msg_to_str(message),
+                )
+
+            if self.args_only:
+                pydantic_args = self.pydantic_schema.model_validate_json(func_call)
+            else:
+                pydantic_args = self.pydantic_schema.model_validate_json(func_call["arguments"])
+
+            return pydantic_args
+        except ValidationError as e:
+            if self.retry > 0:
+                print(f"Retrying parsing {self.pydantic_schema.__name__}...")
+                return self.retry_chain.invoke(
+                    input={"output": result, "error": str(e)},
+                    config={"run_name": "RetryOpenAIFunctionPydanticParser"},
+                )
+            # no retries left
+            raise OutputParserException(str(e), llm_output=msg_to_str(message))
+
+    @property
+    def retry_chain(self) -> Runnable:
+        from ..syntax.executable import compile_runnable
+
+        return compile_runnable(
+            instruction="Retry parsing the output by fixing the error.",
+            input_args=["output", "error"],
+            output_types=[self.pydantic_schema],
+            llm=self.retry_llm,
+            settings_override={"retry_parse": self.retry - 1},
+        )
+
+
+class RetryOpenAIFunctionPydanticUnionParser(BaseGenerationOutputParser[M]):
+    output_types: list[type[M]]
+    args_only: bool = False
+    retry: int
+    retry_llm: BaseChatModel | str | None = None
+
+    def parse_result(self, result: list[Generation], *, partial: bool = False) -> M:
+        try:
+            function_call = self._pre_parse_function_call(result)
+
+            output_type_names = [t.__name__.lower() for t in self.output_types]
+
+            if function_call["name"] not in output_type_names:
+                raise OutputParserException("Invalid function call")
+
+            output_type = self._get_output_type(function_call["name"])
+
+            generation = result[0]
+            if not isinstance(generation, ChatGeneration):
+                raise OutputParserException("This output parser can only be used with a chat generation.")
+            message = generation.message
+            try:
+                func_call = copy.deepcopy(message.additional_kwargs["function_call"])
+            except KeyError as exc:
+                raise OutputParserException(
+                    f"Could not parse function call: {exc}",
+                    llm_output=msg_to_str(message),
+                )
+
+            if self.args_only:
+                pydantic_args = output_type.model_validate_json(func_call["arguments"])
+            else:
+                pydantic_args = output_type.model_validate_json(func_call["arguments"])
+
+            return pydantic_args
+        except (ValidationError, OutputParserException) as e:
+            if self.retry > 0:
+                print(f"Retrying parsing {output_type.__name__}...")
+                return self.retry_chain.invoke(
+                    input={"output": result, "error": str(e)},
+                    config={"run_name": "RetryOpenAIFunctionPydanticUnionParser"},
+                )
+            # no retries left
+            raise OutputParserException(str(e), llm_output=msg_to_str(message))
+
+    def _pre_parse_function_call(self, result: list[Generation]) -> dict:
+        generation = result[0]
+        if not isinstance(generation, ChatGeneration):
+            raise OutputParserException("This output parser can only be used with a chat generation.")
+        message = generation.message
+        try:
+            func_call = copy.deepcopy(message.additional_kwargs["function_call"])
+        except KeyError:
+            raise OutputParserException(
+                "The model refused to respond with a " f"function call:\n{message.content}\n\n",
+                llm_output=msg_to_str(message),
+            )
+
+        return func_call
+
+    def _get_output_type(self, function_name: str) -> Type[M]:
+        output_type_iter = filter(lambda t: t.__name__.lower() == function_name, self.output_types)
+        if output_type_iter is None:
+            raise OutputParserException(f"No parser found for function: {function_name}")
+        return next(output_type_iter)
+
+    @property
+    def retry_chain(self) -> Runnable:
+        from ..syntax.executable import compile_runnable
+
+        return compile_runnable(
+            instruction="Retry parsing the output by fixing the error.",
+            input_args=["output", "error"],
+            output_types=self.output_types,
+            llm=self.retry_llm,
+            settings_override={"retry_parse": self.retry - 1},
+        )
diff --git a/src/funcchain/parser/primitive_types.py b/src/funcchain/parser/primitive_types.py
new file mode 100644
index 0000000..7757b2f
--- /dev/null
+++ b/src/funcchain/parser/primitive_types.py
@@ -0,0 +1,33 @@
+"""
+Primitive Types Parser
+"""
+from typing import Generic, TypeVar
+
+from langchain_core.language_models import BaseChatModel
+from pydantic import BaseModel, create_model
+
+from .json_schema import RetryJsonPydanticParser
+
+M = TypeVar("M", bound=BaseModel)
+
+
+class RetryJsonPrimitiveTypeParser(RetryJsonPydanticParser, Generic[M]):
+    """
+    Parse primitve types by wrapping them in a PydanticModel and parsing them.
+    Examples: int, float, bool, list[str], dict[str, int], Literal["a", "b", "c"], etc.
+    """
+
+    def __init__(
+        self,
+        primitive_type: type,
+        retry: int = 1,
+        retry_llm: BaseChatModel | str | None = None,
+    ) -> None:
+        super().__init__(
+            pydantic_object=create_model("Extract", value=(primitive_type, ...)),
+            retry=retry,
+            retry_llm=retry_llm,
+        )
+
+    def parse(self, text: str) -> M:
+        return super().parse(text).value
diff --git a/src/funcchain/utils/grammars.py b/src/funcchain/parser/schema_converter.py
similarity index 84%
rename from src/funcchain/utils/grammars.py
rename to src/funcchain/parser/schema_converter.py
index bd41804..47d785d 100644
--- a/src/funcchain/utils/grammars.py
+++ b/src/funcchain/parser/schema_converter.py
@@ -60,9 +60,16 @@ def visit(self, schema: dict, name: str) -> str:
             rule = " | ".join(
                 (
                     self.visit(alt_schema, f'{name}{"-" if name else ""}{i}')
-                    for i, alt_schema in enumerate(
-                        schema.get("oneOf") or schema["anyOf"]
-                    )
+                    for i, alt_schema in enumerate(schema.get("oneOf") or schema["anyOf"])
+                )
+            )
+            return self._add_rule(rule_name, rule)
+
+        elif "allOf" in schema:
+            rule = " ".join(
+                (
+                    self.visit(sub_schema, f'{name}{"-" if name else ""}{i}')
+                    for i, sub_schema in enumerate(schema["allOf"])
                 )
             )
             return self._add_rule(rule_name, rule)
@@ -85,9 +92,7 @@ def visit(self, schema: dict, name: str) -> str:
 
             rule = '"{" space'
             for i, (prop_name, prop_schema) in enumerate(prop_pairs):
-                prop_rule_name = self.visit(
-                    prop_schema, f'{name}{"-" if name else ""}{prop_name}'
-                )
+                prop_rule_name = self.visit(prop_schema, f'{name}{"-" if name else ""}{prop_name}')
                 if i > 0:
                     rule += ' "," space'
                 rule += rf' {self._format_literal(prop_name)} space ":" space {prop_rule_name}'
@@ -97,12 +102,8 @@ def visit(self, schema: dict, name: str) -> str:
 
         elif schema_type == "array" and "items" in schema:
             # TODO `prefixItems` keyword
-            item_rule_name = self.visit(
-                schema["items"], f'{name}{"-" if name else ""}item'
-            )
-            rule = (
-                f'"[" space ({item_rule_name} ("," space {item_rule_name})*)? "]" space'
-            )
+            item_rule_name = self.visit(schema["items"], f'{name}{"-" if name else ""}item')
+            rule = f'"[" space ({item_rule_name} ("," space {item_rule_name})*)? "]" space'
             return self._add_rule(rule_name, rule)
 
         else:
@@ -118,7 +119,7 @@ def format_grammar(self) -> str:
 
 def schema_to_grammar(json_schema: dict) -> str:
     schema = json_schema
-    prop_order = {name: idx for idx, name in enumerate(schema.keys())}
+    prop_order = {name: idx for idx, name in enumerate(schema["properties"].keys())}
     defs = schema.get("$defs", {})
     converter = SchemaConverter(prop_order, defs)
     converter.visit(schema, "")
diff --git a/src/funcchain/parser/selector.py b/src/funcchain/parser/selector.py
new file mode 100644
index 0000000..132b771
--- /dev/null
+++ b/src/funcchain/parser/selector.py
@@ -0,0 +1,51 @@
+from enum import Enum
+from typing import Literal, get_origin
+
+from langchain_core.language_models import BaseChatModel
+from langchain_core.output_parsers import BaseGenerationOutputParser, BaseOutputParser, StrOutputParser
+from pydantic import BaseModel
+
+from ..parser.json_schema import RetryJsonPydanticParser, RetryJsonPydanticUnionParser
+from ..parser.primitive_types import RetryJsonPrimitiveTypeParser
+from ..syntax.output_types import ParserBaseModel
+
+
+def parser_for(
+    output_types: list[type],
+    retry: int,
+    llm: BaseChatModel | str | None = None,
+) -> BaseOutputParser | BaseGenerationOutputParser:
+    """
+    Get the parser from the type annotation of the parent caller function.
+    """
+    if len(output_types) > 1:
+        return RetryJsonPydanticUnionParser(output_types=output_types)
+
+    output_type = output_types[0]
+
+    if output_type is str:
+        return StrOutputParser()
+
+    # TODO: write tests for each of these cases
+    if (
+        (output_type is bool)
+        or (output_type is int)
+        or (output_type is float)
+        or ((t := get_origin(output_type)) is list)
+        or (t is list)
+        or (t is dict)
+        or (t is set)
+        or (t is tuple)
+        or (t is Literal)
+        or (t is Enum)
+    ):
+        return RetryJsonPrimitiveTypeParser(primitive_type=output_type, retry=retry, retry_llm=llm)
+
+    if issubclass(output_type, ParserBaseModel):
+        return output_type.output_parser()  # type: ignore
+
+    if issubclass(output_type, BaseModel):
+        return RetryJsonPydanticParser(pydantic_object=output_type, retry=retry, retry_llm=llm)
+
+    else:
+        raise SyntaxError(f"Output Type is not supported: {output_type}")
diff --git a/src/funcchain/py.typed b/src/funcchain/py.typed
new file mode 100644
index 0000000..e69de29
diff --git a/src/funcchain/schema/__init__.py b/src/funcchain/schema/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/funcchain/schema/signature.py b/src/funcchain/schema/signature.py
new file mode 100644
index 0000000..778aaba
--- /dev/null
+++ b/src/funcchain/schema/signature.py
@@ -0,0 +1,49 @@
+from typing import Any
+
+from langchain_core.messages import BaseMessage
+from langchain_core.pydantic_v1 import BaseModel, Field
+
+from ..backend.settings import FuncchainSettings, settings
+
+
+class Signature(BaseModel):
+    """
+    Fundamental structure of an executable prompt.
+    """
+
+    instruction: str
+    """ Prompt instruction to the language model. """
+
+    input_args: list[tuple[str, type]] = Field(default_factory=list)
+    """ List of input arguments for the prompt template. """
+
+    output_types: list[Any]
+    """ Type to parse the output into. """
+
+    # todo: is history really needed? maybe this could be a background optimization
+    history: list[BaseMessage] = Field(default_factory=list)
+    """ Additional messages that are inserted before the instruction. """
+
+    # update_history: bool = Field(default=True)
+
+    # todo: should this be defined at compile time? maybe runtime is better
+    settings: FuncchainSettings = Field(default=settings)
+    """ Local settings to override global settings. """
+
+    auto_tune: bool = Field(default=False)
+    """ Whether to auto tune the prompt using dspy. """
+
+    class Config:
+        arbitrary_types_allowed = True
+
+    def __hash__(self) -> int:
+        """Hash for caching keys."""
+        return hash(
+            (
+                self.instruction,
+                tuple(self.input_args),
+                tuple(self.output_types),
+                tuple(self.history),
+                self.settings,
+            )
+        )
diff --git a/src/funcchain/syntax/__init__.py b/src/funcchain/syntax/__init__.py
new file mode 100644
index 0000000..c10254d
--- /dev/null
+++ b/src/funcchain/syntax/__init__.py
@@ -0,0 +1,13 @@
+""" Syntax -> Signature
+"""
+from .decorators import runnable
+from .executable import achain, chain
+from .output_types import CodeBlock, Error
+
+__all__ = [
+    "chain",
+    "achain",
+    "runnable",
+    "CodeBlock",
+    "Error",
+]
diff --git a/src/funcchain/syntax/components/__init__.py b/src/funcchain/syntax/components/__init__.py
new file mode 100644
index 0000000..848903b
--- /dev/null
+++ b/src/funcchain/syntax/components/__init__.py
@@ -0,0 +1,5 @@
+from .router import RouterChat
+
+__all__ = [
+    "RouterChat",
+]
diff --git a/src/funcchain/syntax/components/handler.py b/src/funcchain/syntax/components/handler.py
new file mode 100644
index 0000000..e6e8ef5
--- /dev/null
+++ b/src/funcchain/syntax/components/handler.py
@@ -0,0 +1,63 @@
+from typing import Union
+
+from langchain_core.chat_history import BaseChatMessageHistory
+from langchain_core.language_models import BaseChatModel
+from langchain_core.messages import AIMessage, HumanMessage
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+from langchain_core.runnables import Runnable
+from langchain_core.runnables.history import RunnableWithMessageHistory
+
+from ...backend.settings import settings
+from ...model.defaults import univeral_model_selector
+from ...utils.msg_tools import msg_to_str
+
+UniversalLLM = Union[BaseChatModel, str, None]
+
+
+def load_universal_llm(llm: UniversalLLM) -> BaseChatModel:
+    if isinstance(llm, str):
+        settings.llm = llm
+        llm = None
+    if not llm:
+        llm = univeral_model_selector(settings)
+    return llm
+
+
+# def history_handler(input: Iterator[Any]) -> Iterator[Any]:
+
+#     for chunk in input:
+#         yield chunk
+
+
+def BasicChatHandler(
+    *,
+    llm: UniversalLLM = None,
+    chat_history: BaseChatMessageHistory | None = None,
+    system_message: str = "",
+) -> Runnable[HumanMessage, AIMessage]:
+    if chat_history is None:
+        from ...utils.memory import ChatMessageHistory
+
+        chat_history = ChatMessageHistory()
+
+    llm = load_universal_llm(llm)
+
+    handler_chain = (
+        ChatPromptTemplate.from_messages(
+            [
+                *(("system", system_message) if system_message else []),
+                MessagesPlaceholder(variable_name="history"),
+                ("human", "{user_msg}"),
+            ]
+        )
+        | llm
+    )
+    return {
+        # todo handle images
+        "user_msg": lambda x: msg_to_str(x),
+    } | RunnableWithMessageHistory(
+        handler_chain,  # type: ignore
+        get_session_history=lambda _: chat_history,
+        input_messages_key="user_msg",
+        history_messages_key="history",
+    )
diff --git a/src/funcchain/syntax/components/router.py b/src/funcchain/syntax/components/router.py
new file mode 100644
index 0000000..a5fc1a7
--- /dev/null
+++ b/src/funcchain/syntax/components/router.py
@@ -0,0 +1,132 @@
+from enum import Enum
+from typing import Any, AsyncIterator, Callable, Iterator, Optional
+
+from langchain_core.chat_history import BaseChatMessageHistory
+from langchain_core.language_models import BaseChatModel
+from langchain_core.messages import AIMessage, HumanMessage
+from langchain_core.runnables import (
+    RouterRunnable,
+    Runnable,
+    RunnableConfig,
+    RunnableLambda,
+    RunnablePassthrough,
+    RunnableSerializable,
+)
+from typing_extensions import TypedDict
+
+from ...utils.msg_tools import msg_to_str
+from ..executable import compile_runnable
+
+
+class Route(TypedDict):
+    handler: Callable | Runnable
+    description: str
+
+
+Routes = dict[str, Route]
+
+
+class RouterChat(Runnable[HumanMessage, AIMessage]):
+    """
+    A router component that can be used to route user requests to different handlers.
+    """
+
+    def __init__(
+        self,
+        routes: Routes,
+        llm: Optional[BaseChatModel | str] = None,
+        history: Optional[BaseChatMessageHistory] = None,
+        add_default_handler: bool = True,
+    ) -> None:
+        self.routes = routes
+        self.llm = llm
+        self.history = history
+
+        if add_default_handler:
+            self._add_default_handler()
+
+    @property
+    def runnable(self) -> RunnableSerializable[HumanMessage, AIMessage]:
+        # TODO: update history somewhere
+        return {
+            "input": RunnablePassthrough(),
+            "key": {
+                # todo "images": x.images,
+                "user_request": msg_to_str,
+                "routes": lambda _: self._routes_repr(),
+            }
+            # route selection
+            | self._selector()
+            | (lambda x: x.selector.value),
+        } | RouterRunnable(
+            runnables={name: run["handler"] for name, run in self.routes.items()},
+        )  # maybe add auto conversion of strings to AI Messages/Chunks
+
+    def _selector(self) -> Runnable[dict[str, str], Any]:
+        RouteChoices = Enum(  # type: ignore
+            "RouteChoices",
+            {r: r for r in self.routes.keys()},
+            type=str,
+        )
+        from pydantic import BaseModel, Field
+
+        class RouterModel(BaseModel):
+            selector: RouteChoices = Field(
+                default="default",
+                description="Enum of the available routes.",
+            )
+
+        return compile_runnable(
+            instruction="Given the user request select the appropriate route.",
+            input_args=["user_request", "routes"],  # todo: optional images
+            output_types=[RouterModel],
+            context=self.history.messages if self.history else [],
+            llm=self.llm,
+        )
+
+    def _add_default_handler(self) -> None:
+        if "default" not in self.routes.keys():
+            self.routes["default"] = {
+                "handler": (
+                    {"user_request": lambda x: msg_to_str(x)}
+                    | compile_runnable(
+                        instruction="{user_request}",
+                        input_args=["user_request"],
+                        output_types=[str],
+                        llm=self.llm,
+                    )
+                    | RunnableLambda(lambda x: AIMessage(content=x))
+                ),
+                "description": (
+                    "Choose this for everything else like "
+                    "normal questions or random things.\n"
+                    "As example: 'How does this work?' or "
+                    "'Whatsup' or 'What is the meaning of life?'"
+                ),
+            }
+
+    def _routes_repr(self) -> str:
+        return "\n".join([f"{route_name}: {route['description']}" for route_name, route in self.routes.items()])
+
+    def invoke(self, input: HumanMessage, config: RunnableConfig | None = None) -> AIMessage:
+        return self.runnable.invoke(input, config=config)
+
+    async def ainvoke(self, input: HumanMessage, config: RunnableConfig | None = None, **kwargs: Any) -> AIMessage:
+        return await self.runnable.ainvoke(input, config, **kwargs)
+
+    def stream(
+        self,
+        input: HumanMessage,
+        config: RunnableConfig | None = None,
+        **kwargs: Any | None,
+    ) -> Iterator[AIMessage]:
+        yield from self.runnable.stream(input, config, **kwargs)
+
+    async def astream(
+        self,
+        input: HumanMessage,
+        config: RunnableConfig | None = None,
+        **kwargs: Any | None,
+    ) -> AsyncIterator[AIMessage]:
+        async for msg in self.runnable.astream(input, config, **kwargs):
+            yield msg
diff --git a/src/funcchain/syntax/decorators.py b/src/funcchain/syntax/decorators.py
new file mode 100644
index 0000000..180fe0c
--- /dev/null
+++ b/src/funcchain/syntax/decorators.py
@@ -0,0 +1,59 @@
+from types import FunctionType
+from typing import Callable, Optional, TypeVar, Union, overload
+
+from langchain_core.language_models import BaseChatModel
+from langchain_core.runnables import Runnable
+
+from ..backend.compiler import compile_chain
+from ..backend.meta_inspect import gather_signature
+from ..backend.settings import SettingsOverride, create_local_settings
+from ..schema.signature import Signature
+
+OutputT = TypeVar("OutputT")
+
+
+@overload
+def runnable(
+    f: Callable[..., OutputT],
+) -> Runnable[dict[str, str], OutputT]:
+    ...
+
+
+@overload
+def runnable(
+    *,
+    llm: BaseChatModel | str | None = None,
+    settings: SettingsOverride = {},
+    auto_tune: bool = False,
+) -> Callable[[Callable], Runnable[dict[str, str], OutputT]]:
+    ...
+
+
+def runnable(
+    f: Optional[Callable[..., OutputT]] = None,
+    *,
+    llm: BaseChatModel | str | None = None,
+    settings: SettingsOverride = {},
+    auto_tune: bool = False,
+) -> Union[Callable, Runnable]:
+    """Decorator for funcchain syntax.
+    Compiles the function into a runnable.
+    """
+    if llm:
+        settings["llm"] = llm
+
+    def decorator(f: Callable) -> Runnable:
+        if not isinstance(f, FunctionType):
+            raise ValueError("funcchain can only be used on functions")
+
+        _signature: dict = gather_signature(f)
+        _signature["settings"] = create_local_settings(override=settings)
+        _signature["auto_tune"] = auto_tune
+
+        sig: Signature = Signature(**_signature)
+        return compile_chain(sig)
+
+    if callable(f):
+        return decorator(f)
+    else:
+        return decorator
diff --git a/src/funcchain/syntax/executable.py b/src/funcchain/syntax/executable.py
new file mode 100644
index 0000000..4bab684
--- /dev/null
+++ b/src/funcchain/syntax/executable.py
@@ -0,0 +1,150 @@
+from typing import Any, TypeVar
+
+from langchain_core.callbacks.base import Callbacks
+from langchain_core.chat_history import BaseChatMessageHistory
+from langchain_core.language_models import BaseChatModel
+from langchain_core.messages import BaseMessage, SystemMessage
+from langchain_core.runnables import Runnable
+
+from ..backend.compiler import compile_chain
+from ..backend.meta_inspect import (
+    args_from_parent,
+    from_docstring,
+    get_output_types,
+    get_parent_frame,
+    kwargs_from_parent,
+)
+from ..backend.settings import SettingsOverride, create_local_settings
+from ..schema.signature import Signature
+from ..utils.memory import ChatMessageHistory
+from .input_types import Image
+
+
+def chain(
+    *,
+    system: str | None = None,
+    instruction: str | None = None,
+    context: list[BaseMessage] = [],
+    memory: BaseChatMessageHistory | None = None,
+    settings_override: SettingsOverride = {},
+    **input_kwargs: Any,
+) -> Any:
+    """
+    Generate response of llm for provided instructions.
+    """
+    settings = create_local_settings(settings_override)
+    callbacks: Callbacks = None
+    output_types = get_output_types()
+    input_args: list[tuple[str, type]] = args_from_parent()
+
+    memory = memory or ChatMessageHistory()
+    input_kwargs.update(kwargs_from_parent())
+
+    # todo maybe this should be done in the prompt processor?
+    system = system or settings.system_prompt
+    instruction = instruction or from_docstring()
+
+    # temp image handling
+    temp_images: list[Image] = []
+    for k, v in input_kwargs.copy().items():
+        if isinstance(v, Image):
+            temp_images.append(v)
+            input_kwargs.pop(k)
+
+    sig: Signature = Signature(
+        instruction=instruction,
+        input_args=input_args,
+        output_types=output_types,
+        history=context,
+        settings=settings,
+    )
+    chain: Runnable[dict[str, Any], Any] = compile_chain(sig, temp_images)
+    result = chain.invoke(input_kwargs, {"run_name": get_parent_frame(2).function, "callbacks": callbacks})
+
+    if memory and isinstance(result, str):
+        # TODO: function calls?
+        memory.add_ai_message(result)
+
+    return result
+
+
+async def achain(
+    *,
+    system: str | None = None,
+    instruction: str | None = None,
+    context: list[BaseMessage] = [],
+    memory: BaseChatMessageHistory | None = None,
+    settings_override: SettingsOverride = {},
+    **input_kwargs: Any,
+) -> Any:
+    """
+    Asyncronously generate response of llm for provided instructions.
+    """
+    settings = create_local_settings(settings_override)
+    callbacks: Callbacks = None
+    output_types = get_output_types()
+    input_args: list[tuple[str, type]] = args_from_parent()
+
+    memory = memory or ChatMessageHistory()
+    input_kwargs.update(kwargs_from_parent())
+
+    # todo maybe this should be done in the prompt processor?
+    system = system or settings.system_prompt
+    instruction = instruction or from_docstring()
+
+    # temp image handling
+    temp_images: list[Image] = []
+    for v, k in input_kwargs.copy().items():
+        if isinstance(v, Image):
+            temp_images.append(v)
+            input_kwargs.pop(k)
+
+    sig: Signature = Signature(
+        instruction=instruction,
+        input_args=input_args,
+        output_types=output_types,
+        history=context,
+        settings=settings,
+    )
+    chain: Runnable[dict[str, str], Any] = compile_chain(sig, temp_images)
+    result = await chain.ainvoke(input_kwargs, {"run_name": get_parent_frame(2).function, "callbacks": callbacks})
+
+    if memory and isinstance(result, str):
+        # TODO: function calls?
+        memory.add_ai_message(result)
+
+    return result
+
+
+ChainOut = TypeVar("ChainOut")
+
+
+def compile_runnable(
+    *,
+    instruction: str,
+    output_types: list[type[ChainOut]],
+    input_args: list[str] = [],
+    context: list = [],
+    llm: BaseChatModel | str | None = None,
+    system: str = "",
+    settings_override: SettingsOverride = {},
+) -> Runnable[dict[str, str], ChainOut]:
+    """
+    On the fly compilation of the funcchain syntax.
+    """
+    if settings_override and llm:
+        settings_override["llm"] = llm
+    instruction = "\n" + instruction
+    settings = create_local_settings(settings_override)
+    context = [SystemMessage(content=system)] + context
+    _input_args: list[tuple[str, type]] = [(arg, str) for arg in input_args]
+
+    sig: Signature = Signature(
+        instruction=instruction,
+        input_args=_input_args,
+        output_types=output_types,
+        history=context,
+        settings=settings,
+    )
+
+    return compile_chain(sig, temp_images=[])
diff --git a/src/funcchain/syntax/input_types.py b/src/funcchain/syntax/input_types.py
new file mode 100644
index 0000000..9833718
--- /dev/null
+++ b/src/funcchain/syntax/input_types.py
@@ -0,0 +1,82 @@
+import base64
+from typing import TYPE_CHECKING
+
+from langchain_core.chat_history import BaseChatMessageHistory
+from langchain_core.messages import BaseMessage
+
+from ..utils.msg_tools import msg_images
+
+if TYPE_CHECKING:
+    from PIL.Image import Image as PImage
+else:
+    PImage = type("PImage")
+
+
+class Image:
+    """
+    Funcchain type for passing an image.
+    Supports multiple input and output formats.
+    (base64, bytes, pillow, file, web_url)
+    """
+
+    __slots__ = ("url",)
+
+    def __init__(self, base64_url: str) -> None:
+        self.url = base64_url
+
+    def from_bytes(self, data: bytes) -> "Image":
+        encoded_string = base64.b64encode(data).decode()
+        return self.from_base64(encoded_string)
+
+    @classmethod
+    def from_message(cls, message: BaseMessage) -> list["Image"]:
+        return [cls(i) for i in images] if (images := msg_images(message)) else []
+
+    @classmethod
+    def from_base64(cls, base64: str) -> "Image":
+        return cls("data:image/png;base64," + base64)
+
+    @classmethod
+    def from_file(cls, path: str) -> "Image":
+        with open(path, "rb") as file:
+            encoded_string = base64.b64encode(file.read()).decode()
+        return cls("data:image/png;base64," + encoded_string)
+
+    @classmethod
+    def from_pillow(cls, image: PImage) -> "Image":
+        encoded_string = base64.b64encode(image.tobytes()).decode()
+        return cls("data:image/png;base64," + encoded_string)
+
+    @classmethod
+    def from_url(cls, url: str) -> "Image":
+        from requests import get  # type: ignore
+
+        response_content = get(url).content
+        encoded_string = base64.b64encode(response_content).decode()
+        return cls("data:image/png;base64," + encoded_string)
+
+    def to_base64(self) -> str:
+        return self.url.split(",")[1]
+
+    def to_bytes(self) -> bytes:
+        base64_str = self.to_base64()
+        return base64.b64decode(base64_str)
+
+    def to_pillow(self) -> PImage:
+        from io import BytesIO  # type: ignore
+
+        image_bytes = self.to_bytes()
+        return PImage.open(BytesIO(image_bytes))
+
+    def to_file(self, path: str) -> None:
+        open(path, "wb").write(self.to_bytes())
+
+    def __str__(self) -> str:
+        return self.url
+
+
+# TODO: implement
+class ChatHistory(BaseChatMessageHistory):
+    """Funcchain Type Wrapper for detecting ChatHistorys."""
+
+    ...
diff --git a/src/funcchain/types.py b/src/funcchain/syntax/output_types.py
similarity index 66%
rename from src/funcchain/types.py
rename to src/funcchain/syntax/output_types.py
index ac94ea3..829a3e0 100644
--- a/src/funcchain/types.py
+++ b/src/funcchain/syntax/output_types.py
@@ -2,35 +2,40 @@
 import re
 from typing import Optional
 
-from langchain.output_parsers.format_instructions import PYDANTIC_FORMAT_INSTRUCTIONS
 from langchain_core.exceptions import OutputParserException
-from langchain_core.output_parsers import BaseOutputParser
+from langchain_core.output_parsers import BaseLLMOutputParser
 from pydantic import BaseModel, Field
 from typing_extensions import Self
 
 
 class ParserBaseModel(BaseModel):
     @classmethod
-    def output_parser(cls) -> BaseOutputParser[Self]:
-        from .parser import CustomPydanticOutputParser
+    def output_parser(cls) -> BaseLLMOutputParser[Self]:
+        from ..parser.custom import CustomPydanticOutputParser
 
         return CustomPydanticOutputParser(pydantic_object=cls)
 
     @classmethod
     def parse(cls, text: str) -> Self:
         """Override for custom parsing."""
-        match = re.search(
-            r"\{.*\}", text.strip(), re.MULTILINE | re.IGNORECASE | re.DOTALL
-        )
+        match = re.search(r"\{.*\}", text.strip(), re.MULTILINE | re.IGNORECASE | re.DOTALL)
         json_str = ""
         if match:
             json_str = match.group()
         json_object = json.loads(json_str, strict=False)
-        return cls.parse_obj(json_object)
+        return cls.model_validate(json_object)
 
     @staticmethod
     def format_instructions() -> str:
-        return PYDANTIC_FORMAT_INSTRUCTIONS
+        return (
+            "Please respond with a json result matching the following schema:"
+            "\n\n```schema\n{schema}\n```\n"
+            "Do not repeat the schema. Only respond with the result."
+        )
+
+    @staticmethod
+    def custom_grammar() -> str | None:
+        return None
 
 
 class CodeBlock(ParserBaseModel):
@@ -39,9 +44,7 @@ class CodeBlock(ParserBaseModel):
 
     @classmethod
     def parse(cls, text: str) -> "CodeBlock":
-        matches = re.finditer(
-            r"```(?P<language>\w+)?\n?(?P<code>.*?)```", text, re.DOTALL
-        )
+        matches = re.finditer(r"```(?P<language>\w+)?\n?(?P<code>.*?)```", text, re.DOTALL)
         for match in matches:
             groupdict = match.groupdict()
             groupdict["language"] = groupdict.get("language", None)
@@ -63,6 +66,10 @@ def parse(cls, text: str) -> "CodeBlock":
     def format_instructions() -> str:
         return "Answer with a codeblock."
 
+    @staticmethod
+    def custom_grammar() -> str | None:
+        return 'root ::= "```" ([^`] | "`" [^`] | "``" [^`])* "```"'
+
     def __str__(self) -> str:
         return self.code
 
@@ -75,9 +82,7 @@ class Error(BaseModel):
     """
 
     title: str = Field(description="CamelCase Name titeling the error")
-    description: str = Field(
-        ..., description="Short description of the unexpected situation"
-    )
+    description: str = Field(..., description="Short description of the unexpected situation")
 
     def __raise__(self) -> None:
         raise Exception(self.description)
diff --git a/src/funcchain/utils/__init__.py b/src/funcchain/utils/__init__.py
index 20c7876..6505144 100644
--- a/src/funcchain/utils/__init__.py
+++ b/src/funcchain/utils/__init__.py
@@ -1,7 +1,5 @@
-from .decorators import *  # noqa: F401, F403
-from .function_frame import *  # noqa: F401, F403
-from .grammars import pydantic_to_grammar  # noqa: F401, F403
-from .grammars import schema_to_grammar  # noqa: F401, F403
-from .helpers import *  # noqa: F401, F403
-from .image import *  # noqa: F401, F403
-from .model_defaults import *  # noqa: F401, F403
+from typing import NoReturn
+
+
+def raiser(e: Exception | str) -> NoReturn:
+    raise e if isinstance(e, Exception) else Exception(e)
diff --git a/src/funcchain/utils/decorators.py b/src/funcchain/utils/decorators.py
deleted file mode 100644
index 5286bf3..0000000
--- a/src/funcchain/utils/decorators.py
+++ /dev/null
@@ -1,148 +0,0 @@
-from asyncio import iscoroutinefunction
-from asyncio import sleep as asleep
-from functools import wraps
-from time import sleep
-from typing import Any
-
-from langchain.callbacks import get_openai_callback
-from langchain.callbacks.openai_info import OpenAICallbackHandler
-from langchain_core.chat_history import BaseChatMessageHistory
-from langchain_core.exceptions import OutputParserException
-from langchain_core.messages import AIMessage
-from rich import print
-
-from ..exceptions import ParsingRetryException
-from ..settings import FuncchainSettings
-from .function_frame import get_parent_frame
-
-
-def retry_parse(fn: Any) -> Any:
-    """
-    Retry parsing the output for a given number of times.
-
-    Raises:
-    - OutputParserException: If the output cannot be parsed.
-    """
-    if iscoroutinefunction(fn):
-
-        @wraps(fn)
-        async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
-            memory: BaseChatMessageHistory = args[3]
-            settings: FuncchainSettings = args[4]
-            retry = settings.retry_parse
-            for r in range(retry):
-                try:
-                    return await fn(*args, **kwargs)
-                except ParsingRetryException as e:
-                    _handle_error(e, r, retry, memory)
-                    await asleep(settings.retry_parse_sleep + r)
-                except OutputParserException as e:
-                    if e.llm_output:
-                        _handle_error(
-                            ParsingRetryException(
-                                e.observation,
-                                e.llm_output,
-                                e.send_to_llm,
-                                message=AIMessage(content=e.llm_output),
-                            ),
-                            r,
-                            retry,
-                            memory,
-                        )
-                        sleep(settings.retry_parse_sleep + r)
-                    else:
-                        raise e
-
-        return async_wrapper
-
-    else:
-
-        @wraps(fn)
-        def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
-            memory: BaseChatMessageHistory = args[3]
-            settings: FuncchainSettings = args[4]
-            retry = settings.retry_parse
-            for r in range(retry):
-                try:
-                    return fn(*args, **kwargs)
-                except ParsingRetryException as e:
-                    _handle_error(e, r, retry, memory)
-                    sleep(settings.retry_parse_sleep + r)
-                except OutputParserException as e:
-                    if e.llm_output:
-                        _handle_error(
-                            ParsingRetryException(
-                                e.observation,
-                                e.llm_output,
-                                e.send_to_llm,
-                                message=AIMessage(content=e.llm_output),
-                            ),
-                            r,
-                            retry,
-                            memory,
-                        )
-                        sleep(settings.retry_parse_sleep + r)
-                    else:
-                        raise e
-
-        return sync_wrapper
-
-
-def _handle_error(
-    e: ParsingRetryException,
-    r: int,
-    retry: int,
-    memory: BaseChatMessageHistory,
-) -> None:
-    """handle output parser exception retry"""
-    print(f"[bright_black]Retrying due to:\n{e}[/bright_black]")
-    # remove last retry from memory
-    if isinstance(m := memory.messages[-1].content, str):
-        if m.startswith("I got this error:") and m.endswith("Can you retry?"):
-            memory.messages.pop(), memory.messages.pop()
-
-    memory.add_message(e.message)
-    memory.add_user_message(
-        "I got this error when trying to parse your json:"
-        f"\n```\n{e}\n```\n"
-        "Can you rewrite it so I do not get this again?"
-    )
-
-    if r == retry - 1:
-        raise e
-
-
-def log_openai_callback(fn: Any) -> Any:
-    if not iscoroutinefunction(fn):
-
-        @wraps(fn)
-        def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
-            with get_openai_callback() as cb:
-                result = fn(*args, **kwargs)
-                _log_cost(cb, name=get_parent_frame(4).function)
-                return result
-
-        return sync_wrapper
-
-    else:
-
-        @wraps(fn)
-        async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
-            with get_openai_callback() as cb:
-                result = await fn(*args, **kwargs)
-                _log_cost(cb, name=get_parent_frame(4).function)
-                return result
-
-        return async_wrapper
-
-
-def _log_cost(cb: OpenAICallbackHandler, name: str) -> None:
-    if cb.total_tokens != 0:
-        total_cost = f"/ {cb.total_cost:.3f}$ " if cb.total_cost > 0 else ""
-        if total_cost == "/ 0.000$ ":
-            total_cost = "/ 0.001$ "
-        print(
-            "[bright_black]"
-            f"{cb.total_tokens:05}T {total_cost}- {name}"
-            "[/bright_black]"
-        )
diff --git a/src/funcchain/utils/function_frame.py b/src/funcchain/utils/function_frame.py
deleted file mode 100644
index b0f7418..0000000
--- a/src/funcchain/utils/function_frame.py
+++ /dev/null
@@ -1,88 +0,0 @@
-import types
-from inspect import FrameInfo, currentframe, getouterframes
-from typing import Union
-
-from langchain_core.output_parsers import BaseOutputParser, StrOutputParser
-
-from ..parser import BoolOutputParser, ParserBaseModel, PydanticOutputParser
-
-FUNC_DEPTH = 7
-
-
-def get_parent_frame(depth: int = FUNC_DEPTH) -> FrameInfo:
-    """
-    Get the dep'th parent function information.
-    """
-    return getouterframes(currentframe())[depth]
-
-
-def get_func_obj() -> types.FunctionType:
-    """
-    Get the parent caller function.
-    """
-    func_name = get_parent_frame().function
-    if func_name == "<module>":
-        raise RuntimeError("Cannot get function object from module")
-    if func_name == "<lambda>":
-        raise RuntimeError("Cannot get function object from lambda")
-
-    try:
-        func = get_parent_frame().frame.f_globals[func_name]
-    except KeyError:
-        func = get_parent_frame(FUNC_DEPTH + 1).frame.f_locals[func_name]
-    return func
-
-
-def from_docstring() -> str:
-    """
-    Get the docstring of the parent caller function.
-    """
-    if doc_str := get_func_obj().__doc__:
-        return "\n".join([line.lstrip() for line in doc_str.split("\n")])
-    raise ValueError(
-        f"The funcchain ({get_parent_frame().function}) must have a docstring"
-    )
-
-
-def get_output_type() -> type:
-    """
-    Get the output type annotation of the parent caller function.
-    """
-    try:
-        # print(get_parent_frame().frame.f_globals)
-        return get_func_obj().__annotations__["return"]
-    except KeyError:
-        raise ValueError("The funcchain must have a return type annotation")
-
-
-def parser_for(output_type: type) -> BaseOutputParser:
-    """
-    Get the parser from the type annotation of the parent caller function.
-    """
-    if isinstance(output_type, types.UnionType):
-        return None  # type: ignore  # TODO: fix
-    #     return MultiPydanticOutputParser(pydantic_objects=output_type.__args__)
-    if getattr(output_type, "__origin__", None) is Union:
-        output_type = output_type.__args__[0]  # type: ignore
-        return None  # type: ignore  # TODO: fix
-    #     return MultiPydanticOutputParser(pydantic_objects=output_type.__args__)
-    if output_type is str:
-        return StrOutputParser()
-    if output_type is bool:
-        return BoolOutputParser()
-    if issubclass(output_type, ParserBaseModel):
-        return output_type.output_parser()  # type: ignore
-
-    from pydantic import BaseModel
-
-    if issubclass(output_type, BaseModel):
-        return PydanticOutputParser(pydantic_object=output_type)
-    else:
-        raise RuntimeError(f"Output Type is not supported: {output_type}")
-
-
-def kwargs_from_parent() -> dict[str, str]:
-    """
-    Get the kwargs from the parent function.
-    """
-    return get_parent_frame(FUNC_DEPTH - 1).frame.f_locals
diff --git a/src/funcchain/utils/helpers.py b/src/funcchain/utils/helpers.py
deleted file mode 100644
index b03ca1b..0000000
--- a/src/funcchain/utils/helpers.py
+++ /dev/null
@@ -1,152 +0,0 @@
-from typing import Any, NoReturn, Type
-
-from docstring_parser import parse
-from langchain.chat_models import ChatOpenAI
-from langchain_core.language_models import BaseChatModel, BaseLanguageModel
-from langchain_core.messages import HumanMessage, SystemMessage
-from langchain_core.runnables import Runnable, RunnableWithFallbacks
-from pydantic import BaseModel
-from tiktoken import encoding_for_model
-
-
-def raiser(e: Exception | str) -> NoReturn:
-    raise e if isinstance(e, Exception) else Exception(e)
-
-
-def count_tokens(text: str, model: str = "gpt-4") -> int:
-    return len(encoding_for_model(model).encode(text))
-
-
-verified_function_models = [
-    "gpt-4",
-    "gpt-4-0613",
-    "gpt-4-1106-preview",
-    "gpt-4-32k",
-    "gpt-4-32k-0613",
-    "gpt-3.5-turbo",
-    "gpt-3.5-turbo-0613",
-    "gpt-3.5-turbo-1106",
-    "gpt-3.5-turbo-16k",
-    "gpt-3.5-turbo-16k-0613",
-]
-
-verified_vision_models = [
-    "gpt-4-vision-preview",
-]
-
-
-def gather_llm_type(llm: BaseLanguageModel | Runnable, func_check: bool = True) -> str:
-    if isinstance(llm, RunnableWithFallbacks):
-        llm = llm.runnable
-    if not isinstance(llm, BaseChatModel):
-        return "base_model"
-    if not isinstance(llm, ChatOpenAI):
-        return "chat_model"
-    if llm.model_name in verified_vision_models:
-        return "vision_model"
-    if llm.model_name in verified_function_models:
-        return "function_model"
-    try:
-        if func_check:
-            llm.predict_messages(
-                [
-                    SystemMessage(
-                        content="This is a test message to see if the model can run functions."
-                    ),
-                    HumanMessage(content="Hello!"),
-                ],
-                functions=[
-                    {
-                        "name": "print",
-                        "description": "show the input",
-                        "parameters": {
-                            "properties": {
-                                "__arg1": {"title": "__arg1", "type": "string"},
-                            },
-                            "required": ["__arg1"],
-                            "type": "object",
-                        },
-                    }
-                ],
-            )
-    except Exception:
-        return "chat_model"
-    else:
-        return "function_model"
-
-
-def is_function_model(
-    llm: BaseLanguageModel | RunnableWithFallbacks,
-) -> bool:
-    return gather_llm_type(llm) == "function_model"
-
-
-def is_vision_model(
-    llm: BaseLanguageModel | RunnableWithFallbacks,
-) -> bool:
-    return gather_llm_type(llm) == "vision_model"
-
-
-def _remove_a_key(d: dict, remove_key: str) -> None:
-    """Remove a key from a dictionary recursively"""
-    if isinstance(d, dict):
-        for key in list(d.keys()):
-            if key == remove_key and "type" in d.keys():
-                del d[key]
-            else:
-                _remove_a_key(d[key], remove_key)
-
-
-def pydantic_to_functions(pydantic_type: Type[BaseModel]) -> dict[str, Any]:
-    schema = pydantic_type.model_json_schema()
-
-    docstring = parse(pydantic_type.__doc__ or "")
-    parameters = {k: v for k, v in schema.items() if k not in ("title", "description")}
-
-    for param in docstring.params:
-        if (name := param.arg_name) in parameters["properties"] and (
-            description := param.description
-        ):
-            if "description" not in parameters["properties"][name]:
-                parameters["properties"][name]["description"] = description
-
-    parameters["type"] = "object"
-
-    if "description" not in schema:
-        if docstring.short_description:
-            schema["description"] = docstring.short_description
-        else:
-            schema["description"] = (
-                f"Correctly extracted `{pydantic_type.__name__.lower()}` with all "
-                f"the required parameters with correct types"
-            )
-
-    _remove_a_key(parameters, "title")
-    _remove_a_key(parameters, "additionalProperties")
-
-    return {
-        "function_call": {
-            "name": pydantic_type.__name__.lower(),
-        },
-        "functions": [
-            {
-                "name": pydantic_type.__name__.lower(),
-                "description": schema["description"],
-                "parameters": parameters,
-            },
-        ],
-    }
-
-
-def multi_pydantic_to_functions(
-    pydantic_types: list[Type[BaseModel]],
-) -> dict[str, Any]:
-    functions: list[dict[str, Any]] = [
-        pydantic_to_functions(pydantic_type)["functions"][0]
-        for pydantic_type in pydantic_types
-    ]
-
-    return {
-        "function_call": "auto",
-        "functions": functions,
-    }
diff --git a/src/funcchain/utils/image.py b/src/funcchain/utils/image.py
index 0bcb109..38f55ea 100644
--- a/src/funcchain/utils/image.py
+++ b/src/funcchain/utils/image.py
@@ -1,11 +1,36 @@
-from base64 import b64encode
+from __future__ import annotations
+
+from base64 import b64decode, b64encode
 from io import BytesIO
+from typing import TYPE_CHECKING
+
+from ..syntax.input_types import Image
+
+if TYPE_CHECKING:
+    from PIL.Image import Image as PImage
+else:
+    PImage = type("PImage")
+
+
+def image_to_base64_url(image: Image) -> str:
+    return image.url
 
-from PIL import Image
 
+def base64_url_to_image(base64_url: str) -> Image:
+    return Image(base64_url)
 
-def image_to_base64_url(image: Image.Image) -> str:
+
+def pillow_image_to_base64_url(image: PImage) -> str:
     with BytesIO() as output:
         image.save(output, format="PNG")
         base64_image = b64encode(output.getvalue()).decode("utf-8")
     return f"data:image/jpeg;base64,{base64_image}"
+
+
+def base64_url_to_pillow_image(base64_url: str) -> PImage:
+    from PIL.Image import Image as PImage
+
+    base64_image = base64_url.split(",")[1]
+    image_bytes = b64decode(base64_image)
+    image = PImage.open(BytesIO(image_bytes))
+    return image
diff --git a/src/funcchain/utils/memory.py b/src/funcchain/utils/memory.py
new file mode 100644
index 0000000..a0c97be
--- /dev/null
+++ b/src/funcchain/utils/memory.py
@@ -0,0 +1,21 @@
+"""langchain_community.chat_message_histories.in_memory.ChatMessageHistory"""
+
+from langchain_core.chat_history import BaseChatMessageHistory
+from langchain_core.messages import BaseMessage
+from langchain_core.pydantic_v1 import BaseModel, Field
+
+
+class ChatMessageHistory(BaseChatMessageHistory, BaseModel):
+    """In memory implementation of chat message history.
+
+    Stores messages in an in memory list.
+    """
+
+    messages: list[BaseMessage] = Field(default_factory=list)
+
+    def add_message(self, message: BaseMessage) -> None:
+        """Add a self-created message to the store"""
+        self.messages.append(message)
+
+    def clear(self) -> None:
+        self.messages = []
diff --git a/src/funcchain/utils/msg_tools.py b/src/funcchain/utils/msg_tools.py
new file mode 100644
index 0000000..365326f
--- /dev/null
+++ b/src/funcchain/utils/msg_tools.py
@@ -0,0 +1,24 @@
+from typing import Union
+
+from langchain_core.messages import BaseMessage as _BaseMessage
+from langchain_core.messages import BaseMessageChunk
+
+BaseMessage = Union[_BaseMessage, BaseMessageChunk]
+
+
+def msg_images(msg: BaseMessage) -> list[str]:
+    """Return a list of image URLs in the message content."""
+    if isinstance(msg.content, str):
+        return []
+    return [item["image_url"]["url"] for item in msg.content if isinstance(item, dict) and item["type"] == "image_url"]
+
+
+def msg_to_str(msg: BaseMessage) -> str:
+    """Return the message content."""
+    return (
+        msg.content
+        if isinstance(msg.content, str)
+        else msg.content[0]
+        if isinstance(msg.content[0], str)
+        else msg.content[0]["text"]
+    )
diff --git a/src/funcchain/utils/pydantic.py b/src/funcchain/utils/pydantic.py
new file mode 100644
index 0000000..5d2c0b0
--- /dev/null
+++ b/src/funcchain/utils/pydantic.py
@@ -0,0 +1,66 @@
+from typing import Any
+
+from docstring_parser import parse
+from pydantic import BaseModel
+
+
+def _remove_a_key(d: dict, remove_key: str) -> None:
+    """Remove a key from a dictionary recursively"""
+    if isinstance(d, dict):
+        for key in list(d.keys()):
+            if key == remove_key and "type" in d.keys():
+                del d[key]
+            else:
+                _remove_a_key(d[key], remove_key)
+
+
+def pydantic_to_functions(pydantic_type: type[BaseModel]) -> dict[str, Any]:
+    schema = pydantic_type.model_json_schema()
+
+    docstring = parse(pydantic_type.__doc__ or "")
+    parameters = {k: v for k, v in schema.items() if k not in ("title", "description")}
+
+    for param in docstring.params:
+        if (name := param.arg_name) in parameters["properties"] and (description := param.description):
+            if "description" not in parameters["properties"][name]:
+                parameters["properties"][name]["description"] = description
+
+    parameters["type"] = "object"
+
+    if "description" not in schema:
+        if docstring.short_description:
+            schema["description"] = docstring.short_description
+        else:
+            schema["description"] = (
+                f"Correctly extracted `{pydantic_type.__name__.lower()}` with all "
+                f"the required parameters with correct types"
+            )
+
+    _remove_a_key(parameters, "title")
+    _remove_a_key(parameters, "additionalProperties")
+
+    return {
+        "function_call": {
+            "name": pydantic_type.__name__.lower(),
+        },
+        "functions": [
+            {
+                "name": pydantic_type.__name__.lower(),
+                "description": schema["description"],
+                "parameters": parameters,
+            },
+        ],
+    }
+
+
+def multi_pydantic_to_functions(
+    pydantic_types: list[type[BaseModel]],
+) -> dict[str, Any]:
+    functions: list[dict[str, Any]] = [
+        pydantic_to_functions(pydantic_type)["functions"][0] for pydantic_type in pydantic_types
+    ]
+
+    return {
+        "function_call": "auto",
+        "functions": functions,
+    }
diff --git a/src/funcchain/utils/token_counter.py b/src/funcchain/utils/token_counter.py
new file mode 100644
index 0000000..6c45f28
--- /dev/null
+++ b/src/funcchain/utils/token_counter.py
@@ -0,0 +1,7 @@
+def count_tokens(text: str, model: str = "gpt-4") -> int:
+    if "gpt-4" in model:
+        from tiktoken import encoding_for_model
+
+        return len(encoding_for_model(model).encode(text))
+    else:
+        raise NotImplementedError("Please sumbmit a PR or write an issue with your desired model.")
diff --git a/tests/async_test.py b/tests/async_test.py
index e4cb787..d5e3830 100644
--- a/tests/async_test.py
+++ b/tests/async_test.py
@@ -2,9 +2,8 @@
 from asyncio import run as _await
 from random import shuffle
 
-from pydantic import BaseModel
-
 from funcchain import achain, settings
+from pydantic import BaseModel
 
 settings.temperature = 1
 settings.llm = "openai/gpt-3.5-turbo-1106"
@@ -24,7 +23,7 @@ class RankedAnswer(BaseModel):
 
 async def rank_answers(
     question: str,
-    answers: list[tuple[int, str]],
+    answers: str,
 ) -> RankedAnswer:
     """
     Given the list of answers, select the answer
@@ -40,14 +39,10 @@ async def expert_answer(
     # Shuffle the answers to ensure randomness
     enum_answers = list(enumerate(answers))
     shuffle(enum_answers)
-    ranked_answers = await gather(
-        *(rank_answers(question, enum_answers) for _ in range(3))
-    )
+    ranked_answers = await gather(*(rank_answers(question, str(enum_answers)) for _ in range(3)))
     highest_ranked_answer = max(
         ranked_answers,
-        key=lambda x: sum(
-            1 for ans in ranked_answers if ans.selected_answer == x.selected_answer
-        ),
+        key=lambda x: sum(1 for ans in ranked_answers if ans.selected_answer == x.selected_answer),
     ).selected_answer
     return answers[highest_ranked_answer]
 
diff --git a/tests/features/jinja_test.py b/tests/features/jinja_test.py
new file mode 100644
index 0000000..15d61cb
--- /dev/null
+++ b/tests/features/jinja_test.py
@@ -0,0 +1 @@
+# TODO: implement tests for jinja2 templates
diff --git a/tests/features/primitive_types_test.py b/tests/features/primitive_types_test.py
new file mode 100644
index 0000000..cc49d71
--- /dev/null
+++ b/tests/features/primitive_types_test.py
@@ -0,0 +1 @@
+# TODO: implement tests for primitive types
diff --git a/tests/features/retry_validation_test.py b/tests/features/retry_validation_test.py
new file mode 100644
index 0000000..7ec78e1
--- /dev/null
+++ b/tests/features/retry_validation_test.py
@@ -0,0 +1 @@
+# TODO: implement tests for retry validation
diff --git a/tests/features/router_component_test.py b/tests/features/router_component_test.py
new file mode 100644
index 0000000..3c52739
--- /dev/null
+++ b/tests/features/router_component_test.py
@@ -0,0 +1 @@
+# TODO: implement tests for router component
diff --git a/tests/features/streaming_test.py b/tests/features/streaming_test.py
new file mode 100644
index 0000000..e978ce3
--- /dev/null
+++ b/tests/features/streaming_test.py
@@ -0,0 +1 @@
+# TODO: implement tests for streaming
diff --git a/tests/llamacpp_test.py b/tests/models/llamacpp_test.py
similarity index 56%
rename from tests/llamacpp_test.py
rename to tests/models/llamacpp_test.py
index adf8c01..cb5f38c 100644
--- a/tests/llamacpp_test.py
+++ b/tests/models/llamacpp_test.py
@@ -1,7 +1,6 @@
 import pytest
-from pydantic import BaseModel
-
-from funcchain import chain, settings
+from funcchain import Image, chain, settings
+from pydantic import BaseModel, Field
 
 
 class Task(BaseModel):
@@ -22,7 +21,7 @@ def todo_list(job_title: str) -> TodoList:
 
 @pytest.mark.skip_on_actions
 def test_openhermes() -> None:
-    settings.llm = "gguf/openhermes-2.5-mistral-7b"
+    settings.llm = "llamacpp/Nous-Hermes-2-SOLAR-10.7B"
 
     assert isinstance(
         todo_list("software engineer"),
@@ -32,7 +31,7 @@ def test_openhermes() -> None:
 
 @pytest.mark.skip_on_actions
 def test_neural_chat() -> None:
-    settings.llm = "gguf/neural-chat-7b-v3-1"
+    settings.llm = "llamacpp/openchat-3.5-0106:Q3_K_M"
 
     assert isinstance(
         todo_list("ai engineer"),
@@ -40,26 +39,29 @@ def test_neural_chat() -> None:
     )
 
 
-# def test_vision() -> None:
-#     from PIL import Image
+class Analysis(BaseModel):
+    description: str = Field(description="A description of the image")
+    objects: list[str] = Field(description="A list of objects found in the image")
 
-#     settings.llm = "mys/ggml_llava-v1.5-13b"
 
-#     class Analysis(BaseModel):
-#         description: str = Field(description="A description of the image")
-#         objects: list[str] = Field(description="A list of objects found in the image")
+def analyse(image: Image) -> Analysis:
+    """
+    Analyse the image and extract its
+    theme, description and objects.
+    """
+    return chain()
+
 
-#     def analyse(image: Image.Image) -> Analysis:
-#         """
-#         Analyse the image and extract its
-#         theme, description and objects.
-#         """
-#         return chain()
+# TODO: vision support
+# @pytest.mark.skip_on_actions
+# def test_vision() -> None:
+#     settings.llm = "llamacpp/bakllava"
 
 #     assert isinstance(
-#         analyse(Image.open("examples/assets/old_chinese_temple.jpg")),
+#         analyse(Image.from_file("examples/assets/old_chinese_temple.jpg")),
 #         Analysis,
-#     )
+#     )  # todo check actual output
+
 
 # TODO: Test union types
 # def test_union_types() -> None:
@@ -67,7 +69,7 @@ def test_neural_chat() -> None:
 
 
 def test_model_search_failure() -> None:
-    settings.llm = "gguf/neural-chat-ultra-mega"
+    settings.llm = "llamacpp/neural-chat-ultra-mega"
 
     try:
         todo_list("software engineer")
diff --git a/tests/models/ollama_test.py b/tests/models/ollama_test.py
new file mode 100644
index 0000000..1cf0546
--- /dev/null
+++ b/tests/models/ollama_test.py
@@ -0,0 +1,85 @@
+import pytest
+from funcchain import Image, chain, settings
+from pydantic import BaseModel, Field
+
+
+class Task(BaseModel):
+    description: str
+    difficulty: int
+
+
+class TodoList(BaseModel):
+    tasks: list[Task]
+
+
+def todo_list(job_title: str) -> TodoList:
+    """
+    Create a todo list for a perfect day for the given job.
+    """
+    return chain()
+
+
+@pytest.mark.skip_on_actions
+def test_openhermes() -> None:
+    settings.llm = "ollama/openhermes2.5-mistral"
+
+    assert isinstance(
+        todo_list("software engineer"),
+        TodoList,
+    )
+
+
+@pytest.mark.skip_on_actions
+def test_neural_chat() -> None:
+    settings.llm = "ollama/openchat"
+
+    assert isinstance(
+        todo_list("ai engineer"),
+        TodoList,
+    )
+
+
+class Analysis(BaseModel):
+    description: str = Field(description="A description of the image")
+    objects: list[str] = Field(description="A list of objects found in the image")
+
+
+def analyse(image: Image) -> Analysis:
+    """
+    Analyse the image and extract its
+    theme, description and objects.
+    """
+    return chain()
+
+
+@pytest.mark.skip_on_actions
+def test_vision() -> None:
+    settings.llm = "ollama/bakllava"
+
+    assert isinstance(
+        analyse(Image.from_file("examples/assets/old_chinese_temple.jpg")),
+        Analysis,
+    )  # todo check actual output
+
+
+# TODO: Test union types
+# def test_union_types() -> None:
+#     ...
+
+
+def test_model_search_failure() -> None:
+    settings.llm = "ollama/neural-chat-ultra-mega"
+
+    try:
+        todo_list("software engineer")
+    except Exception:
+        assert True
+    else:
+        assert False, "Model should not be found"
+
+
+if __name__ == "__main__":
+    test_openhermes()
+    test_neural_chat()
+    # test_vision()
+    test_model_search_failure()
diff --git a/tests/openai_test.py b/tests/models/openai_test.py
similarity index 68%
rename from tests/openai_test.py
rename to tests/models/openai_test.py
index 2927800..efbabed 100644
--- a/tests/openai_test.py
+++ b/tests/models/openai_test.py
@@ -1,6 +1,5 @@
-from pydantic import BaseModel, Field
-
 from funcchain import chain, settings
+from pydantic import BaseModel, Field
 
 
 class Task(BaseModel):
@@ -38,7 +37,7 @@ def test_gpt4() -> None:
 
 
 def test_vision() -> None:
-    from PIL import Image
+    from funcchain import Image
 
     settings.llm = "openai/gpt-4-vision-preview"
 
@@ -46,7 +45,7 @@ class Analysis(BaseModel):
         description: str = Field(description="A description of the image")
         objects: list[str] = Field(description="A list of objects found in the image")
 
-    def analyse(image: Image.Image) -> Analysis:
+    def analyse(image: Image) -> Analysis:
         """
         Analyse the image and extract its
         theme, description and objects.
@@ -54,25 +53,12 @@ def analyse(image: Image.Image) -> Analysis:
         return chain()
 
     assert isinstance(
-        analyse(Image.open("examples/assets/old_chinese_temple.jpg")),
+        analyse(Image.from_file("examples/assets/old_chinese_temple.jpg")),
         Analysis,
     )
 
 
-def test_api_key_failure() -> None:
-    settings.llm = "gpt-3.5-turbo-1106"
-    settings.openai_api_key = "test"
-
-    try:
-        print(todo_list("software engineer"))
-    except Exception:
-        assert True
-    else:
-        assert False, "API Key failure did not occur."
-
-
 if __name__ == "__main__":
-    # test_gpt_35_turbo()
-    # test_gpt4()
-    # test_vision()
-    test_api_key_failure()
+    test_gpt_35_turbo()
+    test_gpt4()
+    test_vision()
diff --git a/tests/router_test.py b/tests/router_test.py
deleted file mode 100644
index 67b38a6..0000000
--- a/tests/router_test.py
+++ /dev/null
@@ -1,40 +0,0 @@
-from funcchain.components import ChatRouter
-
-
-def handle_pdf_requests(user_query: str) -> str:
-    return f"Handling PDF requests with user query: {user_query}"
-
-
-def handle_csv_requests(user_query: str) -> str:
-    return f"Handling CSV requests with user query: {user_query}"
-
-
-def handle_default_requests(user_query: str) -> str:
-    return f"Handling DEFAULT requests with user query: {user_query}"
-
-
-router = ChatRouter(
-    routes={
-        "pdf": {
-            "handler": handle_pdf_requests,
-            "description": "Call this for requests including PDF Files.",
-        },
-        "csv": {
-            "handler": handle_csv_requests,
-            "description": "Call this for requests including CSV Files.",
-        },
-        "default": handle_default_requests,
-    },
-)
-
-
-def test_router() -> None:
-    assert "Handling CSV" in router.invoke_route("Can you summarize this csv?")
-
-    assert "Handling PDF" in router.invoke_route("Can you summarize this pdf?")
-
-    assert "Handling DEFAULT" in router.invoke_route("Hey, whatsup?")
-
-
-if __name__ == "__main__":
-    test_router()
diff --git a/tests/run_examples_test.py b/tests/run_examples_test.py
new file mode 100644
index 0000000..feb8b3a
--- /dev/null
+++ b/tests/run_examples_test.py
@@ -0,0 +1,35 @@
+import asyncio
+import glob
+import subprocess
+
+
+async def run_script(file_path: str) -> tuple[str, int | None, bytes, bytes]:
+    """Run a single script and return the result."""
+    print(f"Running {file_path}...")
+    process = await asyncio.create_subprocess_exec("python", file_path, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    stdout, stderr = await process.communicate()
+    print(f"Finished {file_path}.")
+    print(stdout.decode(), stderr.decode())
+    return file_path, process.returncode, stdout, stderr
+
+
+async def main() -> None:
+    files: list[str] = glob.glob("examples/**/*.py", recursive=True)
+    tasks: list = [run_script(file) for file in files]
+    results: list[tuple[str, int | None, bytes, bytes]] = await asyncio.gather(*tasks)
+
+    for file, returncode, stdout, stderr in results:
+        if returncode != 0:
+            print(f"Error in {file}:")
+            print(stderr.decode())
+        else:
+            print(f"{file} executed successfully.")
+
+
+def test_examples() -> None:
+    # asyncio.run(main())
+    ...
+
+
+if __name__ == "__main__":
+    asyncio.run(main())