From 0e35993ec7eb721b162be7146d913b02a70afb78 Mon Sep 17 00:00:00 2001
From: Thomas Vitale <ThomasVitale@users.noreply.github.com>
Date: Wed, 16 Oct 2024 22:58:13 +0200
Subject: [PATCH] feat: Add example with Ollama and Hugging Face

---
 01-chat-models/chat-models-ollama/README.md   | 18 ++++++++++--
 .../ai/spring/ChatController.java             | 29 +++++++++++++------
 .../ai/spring/model/ChatModelController.java  | 10 ++++++-
 .../src/main/resources/application.yml        |  2 +-
 .../spring/TestcontainersConfiguration.java   |  2 +-
 5 files changed, 47 insertions(+), 14 deletions(-)

diff --git a/01-chat-models/chat-models-ollama/README.md b/01-chat-models/chat-models-ollama/README.md
index ee3272a..493cfec 100644
--- a/01-chat-models/chat-models-ollama/README.md
+++ b/01-chat-models/chat-models-ollama/README.md
@@ -53,10 +53,10 @@ The application relies on Ollama for providing LLMs. You can either run Ollama l
 ### Ollama as a native application
 
 First, make sure you have [Ollama](https://ollama.ai) installed on your laptop.
-Then, use Ollama to pull the _mistral_ large language model.
+Then, use Ollama to pull the _llama3.2_ large language model.
 
 ```shell
-ollama pull mistral
+ollama pull llama3.2
 ```
 
 Finally, run the Spring Boot application.
@@ -105,3 +105,17 @@ The final request returns the model's answer as a stream.
 ```shell
 http --stream :8080/chat/stream question=="Why is a raven like a writing desk? Answer in 3 paragraphs." -b
 ```
+
+Ollama lets you run models directly from Hugging Face. Let's try that out.
+
+First, pull the `hf.co/SanctumAI/Meta-Llama-3.1-8B-Instruct-GGUF` model from Hugging Face.
+
+```shell
+ollama pull hf.co/SanctumAI/Meta-Llama-3.1-8B-Instruct-GGUF
+```
+
+Then, send a request.
+
+```shell
+http :8080/chat/hugging-face question=="Why is a raven like a writing desk? Give a short answer." -b
+```
diff --git a/01-chat-models/chat-models-ollama/src/main/java/com/thomasvitale/ai/spring/ChatController.java b/01-chat-models/chat-models-ollama/src/main/java/com/thomasvitale/ai/spring/ChatController.java
index e49a3c2..0be1835 100644
--- a/01-chat-models/chat-models-ollama/src/main/java/com/thomasvitale/ai/spring/ChatController.java
+++ b/01-chat-models/chat-models-ollama/src/main/java/com/thomasvitale/ai/spring/ChatController.java
@@ -22,16 +22,16 @@ class ChatController {
 
     @GetMapping("/chat")
     String chat(@RequestParam(defaultValue = "What did Gandalf say to the Balrog?") String question) {
-        return chatClient.prompt()
-                .user(question)
+        return chatClient
+                .prompt(question)
                 .call()
                 .content();
     }
 
     @GetMapping("/chat/generic-options")
     String chatWithGenericOptions(@RequestParam(defaultValue = "What did Gandalf say to the Balrog?") String question) {
-        return chatClient.prompt()
-                .user(question)
+        return chatClient
+                .prompt(question)
                 .options(ChatOptionsBuilder.builder()
                         .withTemperature(0.9)
                         .build())
@@ -41,19 +41,30 @@ String chatWithGenericOptions(@RequestParam(defaultValue = "What did Gandalf say
 
     @GetMapping("/chat/provider-options")
     String chatWithProviderOptions(@RequestParam(defaultValue = "What did Gandalf say to the Balrog?") String question) {
-        return chatClient.prompt()
-                .user(question)
+        return chatClient
+                .prompt(question)
                 .options(OllamaOptions.create()
-                        .withModel("mistral")
+                        .withModel("llama3.2")
                         .withRepeatPenalty(1.5))
                 .call()
                 .content();
     }
 
+    @GetMapping("/chat/huggingface")
+    String chatWithHuggingFace(@RequestParam(defaultValue = "What did Gandalf say to the Balrog?") String question) {
+        return chatClient
+                .prompt(question)
+                .options(ChatOptionsBuilder.builder()
+                        .withModel("hf.co/SanctumAI/Meta-Llama-3.1-8B-Instruct-GGUF")
+                        .build())
+                .call()
+                .content();
+    }
+
     @GetMapping("/chat/stream")
     Flux<String> chatStream(@RequestParam(defaultValue = "What did Gandalf say to the Balrog?") String question) {
-        return chatClient.prompt()
-                .user(question)
+        return chatClient
+                .prompt(question)
                 .stream()
                 .content();
     }
diff --git a/01-chat-models/chat-models-ollama/src/main/java/com/thomasvitale/ai/spring/model/ChatModelController.java b/01-chat-models/chat-models-ollama/src/main/java/com/thomasvitale/ai/spring/model/ChatModelController.java
index a3b0710..bc0368e 100644
--- a/01-chat-models/chat-models-ollama/src/main/java/com/thomasvitale/ai/spring/model/ChatModelController.java
+++ b/01-chat-models/chat-models-ollama/src/main/java/com/thomasvitale/ai/spring/model/ChatModelController.java
@@ -39,11 +39,19 @@ String chatWithGenericOptions(@RequestParam(defaultValue = "What did Gandalf say
     @GetMapping("/chat/provider-options")
     String chatWithProviderOptions(@RequestParam(defaultValue = "What did Gandalf say to the Balrog?") String question) {
         return chatModel.call(new Prompt(question, OllamaOptions.create()
-                        .withModel("mistral")
+                        .withModel("llama3.2")
                         .withRepeatPenalty(1.5)))
                 .getResult().getOutput().getContent();
     }
 
+    @GetMapping("/chat/huggingface")
+    String chatWithHuggingFace(@RequestParam(defaultValue = "What did Gandalf say to the Balrog?") String question) {
+        return chatModel.call(new Prompt(question, ChatOptionsBuilder.builder()
+                        .withModel("hf.co/SanctumAI/Meta-Llama-3.1-8B-Instruct-GGUF")
+                        .build()))
+                .getResult().getOutput().getContent();
+    }
+
     @GetMapping("/chat/stream")
     Flux<String> chatStream(@RequestParam(defaultValue = "What did Gandalf say to the Balrog?") String question) {
         return chatModel.stream(question);
diff --git a/01-chat-models/chat-models-ollama/src/main/resources/application.yml b/01-chat-models/chat-models-ollama/src/main/resources/application.yml
index a05c157..9da73b9 100644
--- a/01-chat-models/chat-models-ollama/src/main/resources/application.yml
+++ b/01-chat-models/chat-models-ollama/src/main/resources/application.yml
@@ -3,5 +3,5 @@ spring:
     ollama:
       chat:
         options:
-          model: mistral
+          model: llama3.2
           temperature: 0.7
diff --git a/01-chat-models/chat-models-ollama/src/test/java/com/thomasvitale/ai/spring/TestcontainersConfiguration.java b/01-chat-models/chat-models-ollama/src/test/java/com/thomasvitale/ai/spring/TestcontainersConfiguration.java
index 5b1db67..8b84ba3 100644
--- a/01-chat-models/chat-models-ollama/src/test/java/com/thomasvitale/ai/spring/TestcontainersConfiguration.java
+++ b/01-chat-models/chat-models-ollama/src/test/java/com/thomasvitale/ai/spring/TestcontainersConfiguration.java
@@ -14,7 +14,7 @@ class TestcontainersConfiguration {
     @RestartScope
     @ServiceConnection
     OllamaContainer ollama() {
-        return new OllamaContainer(DockerImageName.parse("ghcr.io/thomasvitale/ollama-mistral")
+        return new OllamaContainer(DockerImageName.parse("ghcr.io/thomasvitale/ollama-llama-3-2")
                 .asCompatibleSubstituteFor("ollama/ollama"));
     }