Skip to content

Commit

Permalink
feat: add siliconflow client (#831)
Browse files Browse the repository at this point in the history
  • Loading branch information
sigoden authored Sep 3, 2024
1 parent 9654445 commit df13045
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 12 deletions.
1 change: 1 addition & 0 deletions Argcfile.sh
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ OPENAI_COMPATIBLE_PLATFORMS=( \
ollama,llama3.1:latest,http://localhost:11434/v1 \
perplexity,llama-3.1-8b-instruct,https://api.perplexity.ai \
qianwen,qwen-turbo,https://dashscope.aliyuncs.com/compatible-mode/v1 \
siliconflow,meta-llama/Meta-Llama-3.1-8B-Instruct,https://api.siliconflow.cn/v1 \
together,meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo,https://api.together.xyz/v1 \
zhipuai,glm-4-0520,https://open.bigmodel.cn/api/paas/v4 \
)
Expand Down
24 changes: 15 additions & 9 deletions config.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ clients:
# See https://platform.openai.com/docs/quickstart
- type: openai
api_base: https://api.openai.com/v1 # Optional
api_key: sk-xxx
api_key: xxx
organization_id: org-xxx # Optional

# For any platform compatible with OpenAI's API
Expand Down Expand Up @@ -149,7 +149,7 @@ clients:
# See https://docs.anthropic.com/claude/reference/getting-started-with-the-api
- type: claude
api_base: https://api.anthropic.com/v1 # Optional
api_key: sk-ant-xxx
api_key: xxx

# See https://docs.mistral.ai/
- type: openai-compatible
Expand All @@ -172,13 +172,13 @@ clients:
- type: openai-compatible
name: perplexity
api_base: https://api.perplexity.ai
api_key: pplx-xxx
api_key: xxx

# See https://console.groq.com/docs/quickstart
- type: openai-compatible
name: groq
api_base: https://api.groq.com/openai/v1
api_key: gsk_xxx
api_key: xxx

# See https://github.com/jmorganca/ollama
- type: openai-compatible
Expand Down Expand Up @@ -233,7 +233,7 @@ clients:
- type: openai-compatible
name: huggingface
api_base: https://api-inference.huggingface.co/v1
api_key: hf_xxx
api_key: xxx

# See https://replicate.com/docs
- type: replicate
Expand All @@ -242,25 +242,25 @@ clients:
# See https://cloud.baidu.com/doc/WENXINWORKSHOP/index.html
- type: ernie
api_key: xxx
secret_key: xxxx
secret_key: xxx

# See https://help.aliyun.com/zh/dashscope/
- type: openai-compatible
name: qianwen
api_base: https://dashscope.aliyuncs.com/compatible-mode/v1
api_key: sk-xxx
api_key: xxx

# See https://platform.moonshot.cn/docs/intro
- type: openai-compatible
name: moonshot
api_base: https://api.moonshot.cn/v1
api_key: sk-xxx
api_key: xxx

# See https://platform.deepseek.com/api-docs/
- type: openai-compatible
name: deepseek
api_base: https://api.deepseek.com
api_key: sk-xxx
api_key: xxx

# See https://open.bigmodel.cn/dev/howuse/introduction
- type: openai-compatible
Expand Down Expand Up @@ -304,6 +304,12 @@ clients:
api_base: https://text.octoai.run/v1
api_key: xxx

# See https://docs.siliconflow.cn/docs/getting-started
- type: openai-compatible
name: siliconflow
api_base: https://api.siliconflow.cn/v1
api_key: xxx

# See https://docs.together.ai/docs/quickstart
- type: openai-compatible
name: together
Expand Down
63 changes: 62 additions & 1 deletion models.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@
max_batch_size: 100

# Links:
# - https://huggingface.co/models?inference=warm&pipeline_tag=text-generation&other=text-generation-inference&sort=trending
# - https://huggingface.co/models?other=text-generation-inference
# - https://huggingface.co/docs/text-generation-inference/en/reference/api_reference
- platform: huggingface
models:
Expand Down Expand Up @@ -1266,6 +1266,67 @@
default_chunk_size: 1000
max_batch_size: 100

# Links
# - https://siliconflow.cn/zh-cn/models
# - https://siliconflow.cn/zh-cn/maaspricing
# - https://docs.siliconflow.cn/reference/chat-completions-3
- platform: siliconflow
models:
- name: Qwen/Qwen2-72B-Instruct
max_input_tokens: 32768
input_price: 0
output_price: 0
- name: meta-llama/Meta-Llama-3.1-405B-Instruct
max_input_tokens: 32768
input_price: 2.94
output_price: 2.94
- name: meta-llama/Meta-Llama-3.1-70B-Instruct
max_input_tokens: 32768
input_price: 0.578
output_price: 0.578
- name: meta-llama/Meta-Llama-3.1-8B-Instruct
max_input_tokens: 32768
input_price: 0
output_price: 0
- name: google/gemma-2-27b-it
max_input_tokens: 8192
input_price: 0.176
output_price: 0.176
- name: google/gemma-2-9b-it
max_input_tokens: 8192
input_price: 0
output_price: 0
- name: deepseek-ai/DeepSeek-V2-Chat
max_input_tokens: 32768
input_price: 0.186
output_price: 0.186
- name: deepseek-ai/DeepSeek-Coder-V2-Instruct
max_input_tokens: 32768
input_price: 0.186
output_price: 0.186
- name: BAAI/bge-large-en-v1.5
type: embedding
input_price: 0
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 100
- name: BAAI/bge-large-zh-v1.5
type: embedding
input_price: 0
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 100
- name: BAAI/bge-m3
type: embedding
input_price: 0
max_tokens_per_chunk: 8192
default_chunk_size: 2000
max_batch_size: 100
- name: BAAI/bge-reranker-v2-m3
type: reranker
max_input_tokens: 8192
input_price: 0

# Links:
# - https://docs.together.ai/docs/inference-models
# - https://docs.together.ai/docs/embedding-models
Expand Down
5 changes: 3 additions & 2 deletions src/client/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ register_client!(
(ernie, "ernie", ErnieConfig, ErnieClient),
);

pub const OPENAI_COMPATIBLE_PLATFORMS: [(&str, &str); 20] = [
pub const OPENAI_COMPATIBLE_PLATFORMS: [(&str, &str); 21] = [
("ai21", "https://api.ai21.com/studio/v1"),
("cloudflare", ""),
("deepinfra", "https://api.deepinfra.com/v1/openai"),
Expand All @@ -51,12 +51,13 @@ pub const OPENAI_COMPATIBLE_PLATFORMS: [(&str, &str); 20] = [
("moonshot", "https://api.moonshot.cn/v1"),
("openrouter", "https://openrouter.ai/api/v1"),
("octoai", "https://text.octoai.run/v1"),
("ollama", "http://localhost:11434/v1"),
("ollama", ""),
("perplexity", "https://api.perplexity.ai"),
(
"qianwen",
"https://dashscope.aliyuncs.com/compatible-mode/v1",
),
("siliconflow", "https://api.siliconflow.cn/v1"),
("together", "https://api.together.xyz/v1"),
("zhipuai", "https://open.bigmodel.cn/api/paas/v4"),
// RAG-dedicated
Expand Down

0 comments on commit df13045

Please sign in to comment.