Add Deepseek R1

jtlicardo · Jan 22, 2025 · 4177607 · 4177607
1 parent eaf0d11
commit 4177607
Show file tree

Hide file tree

Showing 5 changed files with 45 additions and 14 deletions.
diff --git a/README.md b/README.md
@@ -71,10 +71,15 @@ Note: You can use any combination of the API keys above, but at least one is req
 
 * GPT-4o mini
 * GPT-4o
-* o1-preview
-* o1-mini
+* o1-preview (reasoning model)
+* o1-mini (reasoning model)
+
+> Note: o1-preview and o1-mini are used as reasoning models to enhance the application's capabilities:
+> 1. They "think through" the process before creating BPMN diagrams, leading to more logical and coherent results
+> 2. They analyze and define change requests when editing diagrams, improving accuracy and consistency
+> 
+> These models are used in conjunction with their respective GPT-4o counterparts as they don't support structured outputs yet. While providing enhanced performance, they have longer response times and can incur significant costs.
 
-> Note: Both o1-preview and o1-mini are used together with their respective GPT-4o models as they don't support structured outputs yet. While providing enhanced performance, they have longer response times and can incur significant costs.
 
 ### Anthropic
 
@@ -91,6 +96,9 @@ Note: You can use any combination of the API keys above, but at least one is req
 * Llama 3.3 70B Instruct
 * Qwen 2.5 72B Instruct
 * Deepseek V3
+* Deepseek R1 (reasoning model)
+
+> Note: Deepseek R1 is used together with Deepseek V3, similar to the OpenAI model pairings.
 
 ## Screenshots
 
@@ -124,11 +132,6 @@ The application currently supports a subset of BPMN elements:
   version. Keep this in mind when interacting with the assistant after making manual changes.
 * Pools and lanes are not and will not be supported.
 
-## Future improvements
-
-* Expanded BPMN element support
-* Implementing LLM awareness of manual edits to the diagram
-
 ## Contact
 
 If you have any questions or feedback, please open an issue on this GitHub repository.
diff --git a/src/bpmn_assistant/core/enums/models.py b/src/bpmn_assistant/core/enums/models.py
@@ -22,3 +22,4 @@ class FireworksAIModels(Enum):
     LLAMA_3_3_70B = "fireworks_ai/accounts/fireworks/models/llama-v3p3-70b-instruct"
     QWEN_2_5_72B = "fireworks_ai/accounts/fireworks/models/qwen2p5-72b-instruct"
     DEEPSEEK_V3 = "fireworks_ai/accounts/fireworks/models/deepseek-v3"
+    DEEPSEEK_R1 = "fireworks_ai/accounts/fireworks/models/deepseek-r1"
diff --git a/src/bpmn_assistant/core/provider_impl/fireworks_ai_provider.py b/src/bpmn_assistant/core/provider_impl/fireworks_ai_provider.py
@@ -1,5 +1,6 @@
 import json
 import os
+import re
 from typing import Any, Generator
 
 from litellm import completion
@@ -46,6 +47,18 @@ def call(
 
         raw_output = response.choices[0].message.content
 
+        if model == FireworksAIModels.DEEPSEEK_R1.value:
+            # Extract thinking phase and clean output
+            think_pattern = r"<think>(.*?)</think>"
+            think_match = re.search(think_pattern, raw_output, re.DOTALL)
+
+            if think_match:
+                thinking = think_match.group(1).strip()
+                logger.info(f"Model thinking phase: {thinking}")
+                raw_output = re.sub(
+                    think_pattern, "", raw_output, flags=re.DOTALL
+                ).strip()
+
         return self._process_response(raw_output)
 
     def stream(

diff --git a/src/bpmn_assistant/utils/utils.py b/src/bpmn_assistant/utils/utils.py
@@ -67,26 +67,34 @@ def get_available_providers() -> dict:
     }
 
 
+def is_reasoning_model(model: str) -> bool:
+    return model in [
+        model.value
+        for model in [
+            OpenAIModels.O1,
+            OpenAIModels.O1_MINI,
+            FireworksAIModels.DEEPSEEK_R1,
+        ]
+    ]
+
+
 def replace_reasoning_model(model: str) -> str:
     """
-    Returns GPT-4o if o1-preview is requested, or GPT-4o-mini if o1-mini is requested.
-    Otherwise returns the original model.
+    Replaces reasoning models with non-reasoning models.
     """
     if model == OpenAIModels.O1.value:
         return OpenAIModels.GPT_4O.value
     elif model == OpenAIModels.O1_MINI.value:
         return OpenAIModels.GPT_4O_MINI.value
+    elif model == FireworksAIModels.DEEPSEEK_R1.value:
+        return FireworksAIModels.DEEPSEEK_V3.value
     return model
 
 
 def is_openai_model(model: str) -> bool:
     return model in [model.value for model in OpenAIModels]
 
 
-def is_reasoning_model(model: str) -> bool:
-    return model in [model.value for model in [OpenAIModels.O1, OpenAIModels.O1_MINI]]
-
-
 def is_anthropic_model(model: str) -> bool:
     return model in [model.value for model in AnthropicModels]
 

diff --git a/src/bpmn_frontend/src/components/ModelPicker.vue b/src/bpmn_frontend/src/components/ModelPicker.vue
@@ -27,6 +27,7 @@ const Models = Object.freeze({
     "fireworks_ai/accounts/fireworks/models/llama-v3p3-70b-instruct",
   QWEN_2_5_72B: "fireworks_ai/accounts/fireworks/models/qwen2p5-72b-instruct",
   DEEPSEEK_V3: "fireworks_ai/accounts/fireworks/models/deepseek-v3",
+  DEEPSEEK_R1: "fireworks_ai/accounts/fireworks/models/deepseek-r1",
 });
 
 const Providers = Object.freeze({
@@ -93,6 +94,11 @@ export default {
           title: "Deepseek V3",
           provider: Providers.FIREWORKS_AI,
         },
+        {
+          value: Models.DEEPSEEK_R1,
+          title: "Deepseek R1",
+          provider: Providers.FIREWORKS_AI,
+        },
       ],
       availableProviders: [],
     };