feat: add siliconflow client (#831)

sigoden · Sep 3, 2024 · df13045 · df13045
1 parent 9654445
commit df13045
Show file tree

Hide file tree

Showing 4 changed files with 81 additions and 12 deletions.
diff --git a/Argcfile.sh b/Argcfile.sh
@@ -96,6 +96,7 @@ OPENAI_COMPATIBLE_PLATFORMS=( \
   ollama,llama3.1:latest,http://localhost:11434/v1 \
   perplexity,llama-3.1-8b-instruct,https://api.perplexity.ai \
   qianwen,qwen-turbo,https://dashscope.aliyuncs.com/compatible-mode/v1 \
+  siliconflow,meta-llama/Meta-Llama-3.1-8B-Instruct,https://api.siliconflow.cn/v1 \
   together,meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo,https://api.together.xyz/v1 \
   zhipuai,glm-4-0520,https://open.bigmodel.cn/api/paas/v4 \
 )

diff --git a/config.example.yaml b/config.example.yaml
@@ -109,7 +109,7 @@ clients:
   # See https://platform.openai.com/docs/quickstart
   - type: openai
     api_base: https://api.openai.com/v1               # Optional
-    api_key: sk-xxx
+    api_key: xxx
     organization_id: org-xxx                          # Optional
 
   # For any platform compatible with OpenAI's API
@@ -149,7 +149,7 @@ clients:
   # See https://docs.anthropic.com/claude/reference/getting-started-with-the-api
   - type: claude
     api_base: https://api.anthropic.com/v1            # Optional
-    api_key: sk-ant-xxx
+    api_key: xxx
 
   # See https://docs.mistral.ai/
   - type: openai-compatible
@@ -172,13 +172,13 @@ clients:
   - type: openai-compatible
     name: perplexity
     api_base: https://api.perplexity.ai
-    api_key: pplx-xxx
+    api_key: xxx
 
   # See https://console.groq.com/docs/quickstart
   - type: openai-compatible
     name: groq
     api_base: https://api.groq.com/openai/v1
-    api_key: gsk_xxx
+    api_key: xxx
 
   # See https://github.com/jmorganca/ollama
   - type: openai-compatible
@@ -233,7 +233,7 @@ clients:
   - type: openai-compatible
     name: huggingface
     api_base: https://api-inference.huggingface.co/v1
-    api_key: hf_xxx
+    api_key: xxx
 
   # See https://replicate.com/docs
   - type: replicate
@@ -242,25 +242,25 @@ clients:
   # See https://cloud.baidu.com/doc/WENXINWORKSHOP/index.html
   - type: ernie
     api_key: xxx
-    secret_key: xxxx
+    secret_key: xxx
 
   # See https://help.aliyun.com/zh/dashscope/
   - type: openai-compatible
     name: qianwen
     api_base: https://dashscope.aliyuncs.com/compatible-mode/v1
-    api_key: sk-xxx
+    api_key: xxx
 
   # See https://platform.moonshot.cn/docs/intro
   - type: openai-compatible
     name: moonshot
     api_base: https://api.moonshot.cn/v1
-    api_key: sk-xxx
+    api_key: xxx
 
   # See https://platform.deepseek.com/api-docs/
   - type: openai-compatible
     name: deepseek
     api_base: https://api.deepseek.com
-    api_key: sk-xxx
+    api_key: xxx
 
   # See https://open.bigmodel.cn/dev/howuse/introduction
   - type: openai-compatible
@@ -304,6 +304,12 @@ clients:
     api_base: https://text.octoai.run/v1
     api_key: xxx
 
+  # See https://docs.siliconflow.cn/docs/getting-started
+  - type: openai-compatible
+    name: siliconflow
+    api_base: https://api.siliconflow.cn/v1
+    api_key: xxx
+
   # See https://docs.together.ai/docs/quickstart
   - type: openai-compatible
     name: together

diff --git a/models.yaml b/models.yaml
@@ -546,7 +546,7 @@
       max_batch_size: 100
 
 # Links:
-#  - https://huggingface.co/models?inference=warm&pipeline_tag=text-generation&other=text-generation-inference&sort=trending
+#  - https://huggingface.co/models?other=text-generation-inference
 #  - https://huggingface.co/docs/text-generation-inference/en/reference/api_reference
 - platform: huggingface
   models:
@@ -1266,6 +1266,67 @@
       default_chunk_size: 1000
       max_batch_size: 100
 
+# Links
+#  - https://siliconflow.cn/zh-cn/models
+#  - https://siliconflow.cn/zh-cn/maaspricing
+#  - https://docs.siliconflow.cn/reference/chat-completions-3
+- platform: siliconflow
+  models:
+    - name: Qwen/Qwen2-72B-Instruct
+      max_input_tokens: 32768
+      input_price: 0
+      output_price: 0
+    - name: meta-llama/Meta-Llama-3.1-405B-Instruct
+      max_input_tokens: 32768
+      input_price: 2.94
+      output_price: 2.94
+    - name: meta-llama/Meta-Llama-3.1-70B-Instruct
+      max_input_tokens: 32768
+      input_price: 0.578
+      output_price: 0.578
+    - name: meta-llama/Meta-Llama-3.1-8B-Instruct
+      max_input_tokens: 32768
+      input_price: 0
+      output_price: 0
+    - name: google/gemma-2-27b-it
+      max_input_tokens: 8192
+      input_price: 0.176
+      output_price: 0.176
+    - name: google/gemma-2-9b-it
+      max_input_tokens: 8192
+      input_price: 0
+      output_price: 0
+    - name: deepseek-ai/DeepSeek-V2-Chat
+      max_input_tokens: 32768
+      input_price: 0.186
+      output_price: 0.186
+    - name: deepseek-ai/DeepSeek-Coder-V2-Instruct
+      max_input_tokens: 32768
+      input_price: 0.186
+      output_price: 0.186
+    - name: BAAI/bge-large-en-v1.5
+      type: embedding
+      input_price: 0
+      max_tokens_per_chunk: 512
+      default_chunk_size: 1000
+      max_batch_size: 100
+    - name: BAAI/bge-large-zh-v1.5
+      type: embedding
+      input_price: 0
+      max_tokens_per_chunk: 512
+      default_chunk_size: 1000
+      max_batch_size: 100
+    - name: BAAI/bge-m3
+      type: embedding
+      input_price: 0
+      max_tokens_per_chunk: 8192
+      default_chunk_size: 2000
+      max_batch_size: 100
+    - name: BAAI/bge-reranker-v2-m3
+      type: reranker
+      max_input_tokens: 8192
+      input_price: 0
+
 # Links:
 #  - https://docs.together.ai/docs/inference-models
 #  - https://docs.together.ai/docs/embedding-models

diff --git a/src/client/mod.rs b/src/client/mod.rs
@@ -37,7 +37,7 @@ register_client!(
     (ernie, "ernie", ErnieConfig, ErnieClient),
 );
 
-pub const OPENAI_COMPATIBLE_PLATFORMS: [(&str, &str); 20] = [
+pub const OPENAI_COMPATIBLE_PLATFORMS: [(&str, &str); 21] = [
     ("ai21", "https://api.ai21.com/studio/v1"),
     ("cloudflare", ""),
     ("deepinfra", "https://api.deepinfra.com/v1/openai"),
@@ -51,12 +51,13 @@ pub const OPENAI_COMPATIBLE_PLATFORMS: [(&str, &str); 20] = [
     ("moonshot", "https://api.moonshot.cn/v1"),
     ("openrouter", "https://openrouter.ai/api/v1"),
     ("octoai", "https://text.octoai.run/v1"),
-    ("ollama", "http://localhost:11434/v1"),
+    ("ollama", ""),
     ("perplexity", "https://api.perplexity.ai"),
     (
         "qianwen",
         "https://dashscope.aliyuncs.com/compatible-mode/v1",
     ),
+    ("siliconflow", "https://api.siliconflow.cn/v1"),
     ("together", "https://api.together.xyz/v1"),
     ("zhipuai", "https://open.bigmodel.cn/api/paas/v4"),
     // RAG-dedicated