From 7fb081b1f51c32866090ec6dab04218743fecf6d Mon Sep 17 00:00:00 2001 From: Rene Leonhardt <65483435+reneleonhardt@users.noreply.github.com> Date: Thu, 16 May 2024 09:30:12 +0200 Subject: [PATCH] feat: Support Falcon2-11B --- .../codegpt/completions/HuggingFaceModel.java | 11 +++++++- .../codegpt/completions/llama/LlamaModel.java | 13 ++++++++++ .../completions/llama/PromptTemplate.java | 25 +++++++++++++++++++ 3 files changed, 48 insertions(+), 1 deletion(-) diff --git a/src/main/java/ee/carlrobert/codegpt/completions/HuggingFaceModel.java b/src/main/java/ee/carlrobert/codegpt/completions/HuggingFaceModel.java index 4d6939b45..3b3aae7c9 100644 --- a/src/main/java/ee/carlrobert/codegpt/completions/HuggingFaceModel.java +++ b/src/main/java/ee/carlrobert/codegpt/completions/HuggingFaceModel.java @@ -1,5 +1,6 @@ package ee.carlrobert.codegpt.completions; +import static ee.carlrobert.codegpt.completions.HuggingFaceModel.Model.F11; import static ee.carlrobert.codegpt.completions.HuggingFaceModel.Model.SC3; import static ee.carlrobert.codegpt.completions.llama.LlamaModel.getDownloadedMarker; import static ee.carlrobert.codegpt.completions.llama.LlamaModel.getLlamaModelsPath; @@ -123,10 +124,18 @@ public enum HuggingFaceModel { STABLE_CODE_3B_Q5_K_M(SC3, 5, "stable-code-instruct-3b-Q5_K_M.gguf", 1.99), STABLE_CODE_3B_Q6_K(SC3, 6, "stable-code-instruct-3b-Q6_K.gguf", 2.3), STABLE_CODE_3B_Q8_0(SC3, 8, "stable-code-instruct-3b-Q8_0.gguf", 2.97), + + FALCON2_Q3_K_M(F11, 3, "falcon-11B-Q3_K_M.gguf", 5.44), + FALCON2_Q4_K_M(F11, 4, "falcon-11B-Q4_K_M.gguf", 6.85), + FALCON2_Q5_K_M(F11, 5, "falcon-11B-Q5_K_M.gguf", 8.2), + FALCON2_Q6_K(F11, 6, "falcon-11B-Q6_K.gguf", 9.18), + FALCON2_Q8_0(F11, 8, "falcon-11B-Q8_0.gguf", 11.8), ; enum Model { - SC3("bartowski", 3, "stable-code-instruct-3b-GGUF"); + SC3("bartowski", 3, "stable-code-instruct-3b-GGUF"), + F11("bartowski", 11, "falcon-11B-GGUF") + ; private final String user; private final int parameterSize; diff --git a/src/main/java/ee/carlrobert/codegpt/completions/llama/LlamaModel.java b/src/main/java/ee/carlrobert/codegpt/completions/llama/LlamaModel.java index 6140aeaa9..edbe85a5a 100644 --- a/src/main/java/ee/carlrobert/codegpt/completions/llama/LlamaModel.java +++ b/src/main/java/ee/carlrobert/codegpt/completions/llama/LlamaModel.java @@ -169,6 +169,19 @@ datasets using Direct Preference Optimization (DPO). HuggingFaceModel.STABLE_CODE_3B_Q5_K_M, HuggingFaceModel.STABLE_CODE_3B_Q6_K, HuggingFaceModel.STABLE_CODE_3B_Q8_0)), + FALCON2( + "Falcon2", """ + Falcon2-11B is an 11B parameters causal decoder-only model built by TII and trained on over \ + 5,000B tokens of RefinedWeb enhanced with curated corpora. The model is made available under \ + the TII Falcon License 2.0, the permissive Apache 2.0-based software license which includes \ + an acceptable use policy that promotes the responsible use of AI.""", + PromptTemplate.FALCON2, + List.of( + HuggingFaceModel.FALCON2_Q3_K_M, + HuggingFaceModel.FALCON2_Q4_K_M, + HuggingFaceModel.FALCON2_Q5_K_M, + HuggingFaceModel.FALCON2_Q6_K, + HuggingFaceModel.FALCON2_Q8_0)), ; private final String label; diff --git a/src/main/java/ee/carlrobert/codegpt/completions/llama/PromptTemplate.java b/src/main/java/ee/carlrobert/codegpt/completions/llama/PromptTemplate.java index 24fe77899..9621087d0 100644 --- a/src/main/java/ee/carlrobert/codegpt/completions/llama/PromptTemplate.java +++ b/src/main/java/ee/carlrobert/codegpt/completions/llama/PromptTemplate.java @@ -210,6 +210,31 @@ public String buildPrompt(String systemPrompt, String userPrompt, List .toString(); } }, + FALCON2("Falcon2", List.of("<|endoftext|>")) { + @Override + public String buildPrompt(String systemPrompt, String userPrompt, List history) { + StringBuilder prompt = new StringBuilder(); + + if (systemPrompt != null && !systemPrompt.isBlank()) { + prompt.append("\nSystem:\n") + .append(systemPrompt) + .append("\n"); + } + + for (Message message : history) { + prompt.append("\n\nUser:\n") + .append(message.getPrompt()) + .append("\n\nFalcon:\n") + .append(message.getResponse()) + .append("\n"); + } + + return prompt.append("\nUser:\n") + .append(userPrompt) + .append("\n\nFalcon:\n") + .toString(); + } + }, ALPACA("Alpaca/Vicuna") { @Override public String buildPrompt(String systemPrompt, String userPrompt, List history) {