Merge branch 'master' into feat/vllm_multimodal

mudler · Oct 4, 2024 · 536434e · 536434e
2 parents 5717f72 + 648ffdf
commit 536434e
Show file tree

Hide file tree

Showing 8 changed files with 93 additions and 11 deletions.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -15,8 +15,6 @@ Thank you for your interest in contributing to LocalAI! We appreciate your time
 - [Documentation](#documentation)
 - [Community and Communication](#community-and-communication)
 
-
-
 ## Getting Started
 
 ### Prerequisites
@@ -54,7 +52,7 @@ If you find a bug, have a feature request, or encounter any issues, please check
 
 ## Coding Guidelines
 
-- No specific coding guidelines at the moment. Please make sure the code can be tested. The most popular lint tools like []`golangci-lint`](https://golangci-lint.run) can help you here.
+- No specific coding guidelines at the moment. Please make sure the code can be tested. The most popular lint tools like [`golangci-lint`](https://golangci-lint.run) can help you here.
 
 ## Testing
 
@@ -84,5 +82,3 @@ We are welcome the contribution of the documents, please open new PR or create a
 - You can reach out via the Github issue tracker.
 - Open a new discussion at [Discussion](https://github.com/go-skynet/LocalAI/discussions)
 - Join the Discord channel [Discord](https://discord.gg/uJAeKSAGDy)
-
----
diff --git a/Makefile b/Makefile
@@ -8,15 +8,15 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=a39ab216aa624308fda7fa84439c6b61dc98b87a
+CPPLLAMA_VERSION?=d5ed2b929d85bbd7dbeecb690880f07d9d7a6077
 
 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
-WHISPER_CPP_VERSION?=ede1718f6d45aa3f7ad4a1e169dfbc9d51570c4e
+WHISPER_CPP_VERSION?=ccc2547210e09e3a1785817383ab770389bb442b
 
 # bert.cpp version
 BERT_REPO?=https://github.com/go-skynet/go-bert.cpp

diff --git a/core/config/backend_config.go b/core/config/backend_config.go
@@ -196,6 +196,10 @@ type TemplateConfig struct {
 	// JoinChatMessagesByCharacter is a string that will be used to join chat messages together.
 	// It defaults to \n
 	JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character"`
+
+	Video string `yaml:"video"`
+	Image string `yaml:"image"`
+	Audio string `yaml:"audio"`
 }
 
 func (c *BackendConfig) UnmarshalYAML(value *yaml.Node) error {

diff --git a/core/http/endpoints/openai/request.go b/core/http/endpoints/openai/request.go
@@ -12,6 +12,7 @@ import (
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/functions"
 	"github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/templates"
 	"github.com/mudler/LocalAI/pkg/utils"
 	"github.com/rs/zerolog/log"
 )
@@ -168,8 +169,13 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
 						continue CONTENT
 					}
 					input.Messages[i].StringVideos = append(input.Messages[i].StringVideos, base64) // TODO: make sure that we only return base64 stuff
+
+					t := "[vid-{{.ID}}]{{.Text}}"
+					if config.TemplateConfig.Video != "" {
+						t = config.TemplateConfig.Video
+					}
 					// set a placeholder for each image
-					input.Messages[i].StringContent = fmt.Sprintf("[vid-%d]", vidIndex) + input.Messages[i].StringContent
+					input.Messages[i].StringContent, _ = templates.TemplateMultiModal(t, vidIndex, input.Messages[i].StringContent)
 					vidIndex++
 				case "audio_url", "audio":
 					// Decode content as base64 either if it's an URL or base64 text
@@ -180,7 +186,11 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
 					}
 					input.Messages[i].StringAudios = append(input.Messages[i].StringAudios, base64) // TODO: make sure that we only return base64 stuff
 					// set a placeholder for each image
-					input.Messages[i].StringContent = fmt.Sprintf("[audio-%d]", audioIndex) + input.Messages[i].StringContent
+					t := "[audio-{{.ID}}]{{.Text}}"
+					if config.TemplateConfig.Audio != "" {
+						t = config.TemplateConfig.Audio
+					}
+					input.Messages[i].StringContent, _ = templates.TemplateMultiModal(t, audioIndex, input.Messages[i].StringContent)
 					audioIndex++
 				case "image_url", "image":
 					// Decode content as base64 either if it's an URL or base64 text
@@ -189,9 +199,14 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
 						log.Error().Msgf("Failed encoding image: %s", err)
 						continue CONTENT
 					}
+
+					t := "[img-{{.ID}}]{{.Text}}"
+					if config.TemplateConfig.Image != "" {
+						t = config.TemplateConfig.Image
+					}
 					input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
 					// set a placeholder for each image
-					input.Messages[i].StringContent = fmt.Sprintf("[img-%d]", imgIndex) + input.Messages[i].StringContent
+					input.Messages[i].StringContent, _ = templates.TemplateMultiModal(t, imgIndex, input.Messages[i].StringContent)
 					imgIndex++
 				}
 			}

diff --git a/gallery/index.yaml b/gallery/index.yaml
@@ -1,4 +1,28 @@
 ---
+- name: "salamandra-7b-instruct"
+  icon: https://huggingface.co/BSC-LT/salamandra-7b-instruct/resolve/main/images/salamandra_header.png
+  # Uses chatml
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  license: apache-2.0
+  urls:
+    - https://huggingface.co/BSC-LT/salamandra-7b-instruct
+    - https://huggingface.co/cstr/salamandra-7b-instruct-GGUF
+  tags:
+    - llm
+    - gguf
+    - gpu
+    - cpu
+    - salamandra
+  description: |
+    Transformer-based decoder-only language model that has been pre-trained on 7.8 trillion tokens of highly curated data. The pre-training corpus contains text in 35 European languages and code.
+    Salamandra comes in three different sizes — 2B, 7B and 40B parameters — with their respective base and instruction-tuned variants. This model card corresponds to the 7B instructed version.
+  overrides:
+    parameters:
+      model: salamandra-7b-instruct.Q4_K_M-f32.gguf
+  files:
+    - filename: salamandra-7b-instruct.Q4_K_M-f32.gguf
+      sha256: bac8e8c1d1d9d53cbdb148b8ff9ad378ddb392429207099e85b5aae3a43bff3d
+      uri: huggingface://cstr/salamandra-7b-instruct-GGUF/salamandra-7b-instruct.Q4_K_M-f32.gguf
 ## llama3.2
 - &llama32
   url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"

diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
@@ -314,7 +314,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
 
 				client = NewModel(modelID, serverAddress, process)
 			} else {
-				log.Debug().Msg("external backend is uri")
+				log.Debug().Msg("external backend is a uri")
 				// address
 				client = NewModel(modelID, uri, nil)
 			}

diff --git a/pkg/templates/multimodal.go b/pkg/templates/multimodal.go
@@ -0,0 +1,24 @@
+package templates
+
+import (
+	"bytes"
+	"text/template"
+)
+
+func TemplateMultiModal(templateString string, templateID int, text string) (string, error) {
+	// compile the template
+	tmpl, err := template.New("template").Parse(templateString)
+	if err != nil {
+		return "", err
+	}
+	result := bytes.NewBuffer(nil)
+	// execute the template
+	err = tmpl.Execute(result, struct {
+		ID   int
+		Text string
+	}{
+		ID:   templateID,
+		Text: text,
+	})
+	return result.String(), err
+}
diff --git a/pkg/templates/multimodal_test.go b/pkg/templates/multimodal_test.go
@@ -0,0 +1,19 @@
+package templates_test
+
+import (
+	. "github.com/mudler/LocalAI/pkg/templates" // Update with your module path
+
+	// Update with your module path
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+var _ = Describe("EvaluateTemplate", func() {
+	Context("templating simple strings for multimodal chat", func() {
+		It("should template messages correctly", func() {
+			result, err := TemplateMultiModal("[img-{{.ID}}]{{.Text}}", 1, "bar")
+			Expect(err).NotTo(HaveOccurred())
+			Expect(result).To(Equal("[img-1]bar"))
+		})
+	})
+})