Skip to content

Commit

Permalink
chore: use a better mistral models (#72)
Browse files Browse the repository at this point in the history
Signed-off-by: Aaron Pham <[email protected]>
  • Loading branch information
aarnphm authored Feb 7, 2025
1 parent 4ff50b2 commit b9eefe9
Showing 1 changed file with 7 additions and 36 deletions.
43 changes: 7 additions & 36 deletions src/recipe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,10 @@
model: microsoft/phi-4
max_model_len: 4096
dtype: half
chat_template: phi-3
extra_labels:
openllm_alias: 14b
model_name: microsoft/phi-4
'mistral:7b-instruct':
'mistral:8b-instruct':
project: vllm-chat
service_config:
name: mistral
Expand All @@ -25,17 +24,14 @@
gpu: 1
gpu_type: nvidia-l4
engine_config:
model: mistralai/Mistral-7B-Instruct-v0.1
max_model_len: 1024
enforce_eager: true
model: mistralai/Ministral-8B-Instruct-2410
dtype: half
extra_envs:
- name: HF_TOKEN
chat_template: mistral-instruct
extra_labels:
openllm_alias: 7b,7b-instruct
model_name: mistralai/Mistral-7B-Instruct-v0.1
'mistral:24b-instruct-nemo':
model_name: mistralai/Ministral-8B-Instruct-2410
'mistralai:24b-small-instruct-2501':
project: vllm-chat
service_config:
name: mistral
Expand All @@ -45,16 +41,12 @@
gpu: 1
gpu_type: nvidia-a100-80gb
engine_config:
model: mistralai/Mistral-Nemo-Instruct-2407
max_model_len: 2048
enforce_eager: true
dtype: half
chat_template: mistral-instruct
model: mistralai/Mistral-Small-24B-Instruct-2501
extra_envs:
- name: HF_TOKEN
extra_labels:
openllm_alias: nemo,nemo-instruct
model_name: mistralai/Mistral-Nemo-Instruct-2407
openllm_alias: 24b, 24b-instruct-2501
model_name: mistralai/Mistral-Small-24B-Instruct-2501
'mistral-large:123b-instruct':
project: vllm-chat
service_config:
Expand All @@ -69,7 +61,6 @@
max_model_len: 2048
dtype: half
tensor_parallel_size: 4
chat_template: mistral-instruct
extra_envs:
- name: HF_TOKEN
extra_labels:
Expand All @@ -88,7 +79,6 @@
model: casperhansen/mistral-large-instruct-2407-awq
max_model_len: 2048
dtype: half
chat_template: mistral-instruct
extra_labels:
openllm_alias: 123b-4bit,123b-instruct-2407-4bit
model_name: casperhansen/mistral-large-instruct-2407-awq
Expand All @@ -110,24 +100,6 @@
extra_labels:
openllm_alias: 8b,8b-instruct
model_name: meta-llama/Meta-Llama-3.1-8B-Instruct
'llama3.1:70b-instruct':
project: vllm-chat
service_config:
name: llama3.1
traffic:
timeout: 300
resources:
gpu: 2
gpu_type: nvidia-a100-80gb
engine_config:
model: meta-llama/Meta-Llama-3.1-70B-Instruct
max_model_len: 2048
tensor_parallel_size: 2
extra_envs:
- name: HF_TOKEN
extra_labels:
openllm_alias: 70b,70b-instruct
model_name: meta-llama/Meta-Llama-3.1-70B-Instruct
'gemma2:9b-instruct':
project: vllm-chat
service_config:
Expand Down Expand Up @@ -181,7 +153,6 @@
model: mistralai/Mixtral-8x7B-Instruct-v0.1
max_model_len: 2048
tensor_parallel_size: 2
chat_template: mistral-instruct
extra_envs:
- name: HF_TOKEN
extra_labels:
Expand Down

0 comments on commit b9eefe9

Please sign in to comment.